summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitattributes1
-rw-r--r--.mailmap10
-rw-r--r--CREDITS4
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst22
-rw-r--r--Documentation/admin-guide/cifs/changes.rst4
-rw-r--r--Documentation/admin-guide/cifs/usage.rst8
-rw-r--r--Documentation/admin-guide/quickly-build-trimmed-linux.rst49
-rw-r--r--Documentation/cdrom/index.rst6
-rw-r--r--Documentation/devicetree/bindings/ata/ahci-common.yaml2
-rw-r--r--Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml2
-rw-r--r--Documentation/devicetree/bindings/cache/qcom,llcc.yaml1
-rw-r--r--Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml2
-rw-r--r--Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml12
-rw-r--r--Documentation/devicetree/bindings/firmware/qcom,scm.yaml2
-rw-r--r--Documentation/devicetree/bindings/fpga/lattice,sysconfig.yaml2
-rw-r--r--Documentation/devicetree/bindings/fpga/microchip,mpf-spi-fpga-mgr.yaml2
-rw-r--r--Documentation/devicetree/bindings/i2c/opencores,i2c-ocores.yaml1
-rw-r--r--Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml2
-rw-r--r--Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml7
-rw-r--r--Documentation/devicetree/bindings/iio/adc/renesas,rcar-gyroadc.yaml2
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml6
-rw-r--r--Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/can/st,stm32-bxcan.yaml19
-rw-r--r--Documentation/devicetree/bindings/net/realtek-bluetooth.yaml4
-rw-r--r--Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.yaml5
-rw-r--r--Documentation/devicetree/bindings/power/qcom,rpmpd.yaml1
-rw-r--r--Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml2
-rw-r--r--Documentation/devicetree/bindings/riscv/canaan.yaml2
-rw-r--r--Documentation/devicetree/bindings/serial/8250_omap.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/tas2562.yaml6
-rw-r--r--Documentation/devicetree/bindings/sound/tas2770.yaml6
-rw-r--r--Documentation/devicetree/bindings/sound/tas27xx.yaml6
-rw-r--r--Documentation/devicetree/bindings/sound/tlv320aic32x4.txt2
-rw-r--r--Documentation/devicetree/bindings/usb/cdns,usb3.yaml2
-rw-r--r--Documentation/devicetree/bindings/usb/snps,dwc3.yaml2
-rw-r--r--Documentation/devicetree/usage-model.rst2
-rw-r--r--Documentation/filesystems/directory-locking.rst26
-rw-r--r--Documentation/filesystems/fsverity.rst192
-rw-r--r--Documentation/filesystems/index.rst2
-rw-r--r--Documentation/filesystems/ramfs-rootfs-initramfs.rst3
-rw-r--r--Documentation/filesystems/sharedsubtree.rst4
-rw-r--r--Documentation/filesystems/smb/cifsroot.rst (renamed from Documentation/filesystems/cifs/cifsroot.rst)2
-rw-r--r--Documentation/filesystems/smb/index.rst (renamed from Documentation/filesystems/cifs/index.rst)0
-rw-r--r--Documentation/filesystems/smb/ksmbd.rst (renamed from Documentation/filesystems/cifs/ksmbd.rst)0
-rw-r--r--Documentation/fpga/index.rst2
-rw-r--r--Documentation/locking/index.rst2
-rw-r--r--Documentation/mm/page_table_check.rst19
-rw-r--r--Documentation/netlink/specs/ethtool.yaml64
-rw-r--r--Documentation/netlink/specs/handshake.yaml4
-rw-r--r--Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst60
-rw-r--r--Documentation/networking/ip-sysctl.rst4
-rw-r--r--Documentation/networking/tls-handshake.rst5
-rw-r--r--Documentation/pcmcia/index.rst2
-rw-r--r--Documentation/process/changes.rst2
-rw-r--r--Documentation/process/maintainer-netdev.rst33
-rw-r--r--Documentation/riscv/patch-acceptance.rst18
-rw-r--r--Documentation/rust/quick-start.rst4
-rw-r--r--Documentation/s390/vfio-ap.rst1
-rw-r--r--Documentation/staging/crc32.rst2
-rw-r--r--Documentation/timers/index.rst2
-rw-r--r--Documentation/trace/histogram.rst64
-rw-r--r--Documentation/trace/user_events.rst7
-rw-r--r--Documentation/translations/zh_CN/devicetree/usage-model.rst2
-rw-r--r--Documentation/userspace-api/ioctl/ioctl-number.rst2
-rw-r--r--MAINTAINERS105
-rw-r--r--Makefile2
-rw-r--r--arch/arm/boot/dts/am57xx-cl-som-am57x.dts2
-rw-r--r--arch/arm/boot/dts/at91-sama7g5ek.dts2
-rw-r--r--arch/arm/boot/dts/at91sam9261ek.dts2
-rw-r--r--arch/arm/boot/dts/imx6qdl-mba6.dtsi1
-rw-r--r--arch/arm/boot/dts/imx6ull-dhcor-som.dtsi7
-rw-r--r--arch/arm/boot/dts/imx7d-pico-hobbit.dts2
-rw-r--r--arch/arm/boot/dts/imx7d-sdb.dts2
-rw-r--r--arch/arm/boot/dts/omap3-cm-t3x.dtsi2
-rw-r--r--arch/arm/boot/dts/omap3-devkit8000-lcd-common.dtsi2
-rw-r--r--arch/arm/boot/dts/omap3-lilly-a83x.dtsi2
-rw-r--r--arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi2
-rw-r--r--arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi2
-rw-r--r--arch/arm/boot/dts/omap3-pandora-common.dtsi2
-rw-r--r--arch/arm/boot/dts/omap5-cm-t54.dts2
-rw-r--r--arch/arm/boot/dts/qcom-apq8026-asus-sparrow.dts2
-rw-r--r--arch/arm/boot/dts/qcom-apq8026-huawei-sturgeon.dts1
-rw-r--r--arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts3
-rw-r--r--arch/arm/boot/dts/qcom-apq8064.dtsi1
-rw-r--r--arch/arm/boot/dts/qcom-apq8084.dtsi1
-rw-r--r--arch/arm/boot/dts/qcom-ipq4019.dtsi1
-rw-r--r--arch/arm/boot/dts/qcom-ipq8064.dtsi1
-rw-r--r--arch/arm/boot/dts/qcom-mdm9615-wp8548-mangoh-green.dts1
-rw-r--r--arch/arm/boot/dts/qcom-msm8660.dtsi1
-rw-r--r--arch/arm/boot/dts/qcom-msm8960.dtsi1
-rw-r--r--arch/arm/boot/dts/qcom-msm8974-lge-nexus5-hammerhead.dts2
-rw-r--r--arch/arm/boot/dts/qcom-msm8974-sony-xperia-rhine.dtsi1
-rw-r--r--arch/arm/boot/dts/qcom-msm8974.dtsi1
-rw-r--r--arch/arm/boot/dts/qcom-msm8974pro-oneplus-bacon.dts1
-rw-r--r--arch/arm/boot/dts/qcom-msm8974pro-samsung-klte.dts4
-rw-r--r--arch/arm/boot/dts/qcom-msm8974pro-sony-xperia-shinano-castor.dts1
-rw-r--r--arch/arm/boot/dts/stm32f429.dtsi1
-rw-r--r--arch/arm/boot/dts/stm32f7-pinctrl.dtsi82
-rw-r--r--arch/arm/boot/dts/vexpress-v2p-ca5s.dts1
-rw-r--r--arch/arm/include/asm/arm_pmuv3.h11
-rw-r--r--arch/arm/mach-at91/pm.c20
-rw-r--r--arch/arm64/Kconfig2
-rw-r--r--arch/arm64/boot/dts/arm/foundation-v8.dtsi1
-rw-r--r--arch/arm64/boot/dts/arm/rtsm_ve-aemv8a.dts1
-rw-r--r--arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts1
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi1
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi8
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mn-beacon-baseboard.dtsi4
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi8
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mn.dtsi28
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp.dtsi25
-rw-r--r--arch/arm64/boot/dts/freescale/imx8qm-mek.dts4
-rw-r--r--arch/arm64/boot/dts/freescale/imx8x-colibri-eval-v3.dtsi6
-rw-r--r--arch/arm64/boot/dts/freescale/imx8x-colibri-iris.dtsi3
-rw-r--r--arch/arm64/boot/dts/freescale/imx8x-colibri.dtsi14
-rw-r--r--arch/arm64/boot/dts/qcom/ipq5332.dtsi1
-rw-r--r--arch/arm64/boot/dts/qcom/ipq6018.dtsi3
-rw-r--r--arch/arm64/boot/dts/qcom/ipq8074.dtsi3
-rw-r--r--arch/arm64/boot/dts/qcom/ipq9574.dtsi1
-rw-r--r--arch/arm64/boot/dts/qcom/msm8916.dtsi1
-rw-r--r--arch/arm64/boot/dts/qcom/msm8953.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/msm8976.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/msm8994.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/msm8996.dtsi10
-rw-r--r--arch/arm64/boot/dts/qcom/msm8998.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/qcm2290.dtsi1
-rw-r--r--arch/arm64/boot/dts/qcom/qcs404.dtsi1
-rw-r--r--arch/arm64/boot/dts/qcom/qdu1000.dtsi10
-rw-r--r--arch/arm64/boot/dts/qcom/sa8155p-adp.dts2
-rw-r--r--arch/arm64/boot/dts/qcom/sa8155p.dtsi40
-rw-r--r--arch/arm64/boot/dts/qcom/sa8775p.dtsi20
-rw-r--r--arch/arm64/boot/dts/qcom/sc7180-idp.dts5
-rw-r--r--arch/arm64/boot/dts/qcom/sc7180-lite.dtsi8
-rw-r--r--arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi5
-rw-r--r--arch/arm64/boot/dts/qcom/sc7180.dtsi11
-rw-r--r--arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi5
-rw-r--r--arch/arm64/boot/dts/qcom/sc7280-idp.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/sc7280-qcard.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/sc7280.dtsi11
-rw-r--r--arch/arm64/boot/dts/qcom/sc8280xp.dtsi18
-rw-r--r--arch/arm64/boot/dts/qcom/sdm630.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/sdm670.dtsi20
-rw-r--r--arch/arm64/boot/dts/qcom/sdm845.dtsi13
-rw-r--r--arch/arm64/boot/dts/qcom/sm6115.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/sm6125.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/sm6350.dtsi9
-rw-r--r--arch/arm64/boot/dts/qcom/sm6375-sony-xperia-murray-pdx225.dts4
-rw-r--r--arch/arm64/boot/dts/qcom/sm6375.dtsi52
-rw-r--r--arch/arm64/boot/dts/qcom/sm8150.dtsi13
-rw-r--r--arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-boe.dts2
-rw-r--r--arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-csot.dts2
-rw-r--r--arch/arm64/boot/dts/qcom/sm8350.dtsi61
-rw-r--r--arch/arm64/boot/dts/qcom/sm8450.dtsi61
-rw-r--r--arch/arm64/boot/dts/qcom/sm8550.dtsi26
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3308.dtsi1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328-rock64.dts14
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328.dtsi1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-soquartz-cm4.dts18
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi29
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5c.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568.dtsi14
-rw-r--r--arch/arm64/boot/dts/rockchip/rk356x.dtsi7
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s.dtsi9
-rw-r--r--arch/arm64/hyperv/mshyperv.c2
-rw-r--r--arch/arm64/include/asm/arm_pmuv3.h6
-rw-r--r--arch/arm64/include/asm/cputype.h8
-rw-r--r--arch/arm64/include/asm/kvm_host.h7
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h7
-rw-r--r--arch/arm64/include/asm/sysreg.h6
-rw-r--r--arch/arm64/kernel/mte.c7
-rw-r--r--arch/arm64/kernel/vdso.c2
-rw-r--r--arch/arm64/kvm/fpsimd.c26
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h35
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c14
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c2
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c58
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c15
-rw-r--r--arch/arm64/kvm/inject_fault.c2
-rw-r--r--arch/arm64/kvm/pmu-emul.c74
-rw-r--r--arch/arm64/kvm/pmu.c27
-rw-r--r--arch/arm64/kvm/sys_regs.c19
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c34
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c14
-rw-r--r--arch/arm64/kvm/vgic/vgic-kvm-device.c10
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c31
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio.c9
-rw-r--r--arch/arm64/kvm/vgic/vgic-v2.c6
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c11
-rw-r--r--arch/arm64/kvm/vgic/vgic-v4.c3
-rw-r--r--arch/arm64/kvm/vmid.c7
-rw-r--r--arch/arm64/mm/copypage.c5
-rw-r--r--arch/arm64/mm/fault.c7
-rw-r--r--arch/loongarch/include/asm/loongarch.h2
-rw-r--r--arch/loongarch/include/asm/pgtable-bits.h2
-rw-r--r--arch/loongarch/include/asm/pgtable.h3
-rw-r--r--arch/loongarch/kernel/hw_breakpoint.c2
-rw-r--r--arch/loongarch/kernel/perf_event.c6
-rw-r--r--arch/loongarch/kernel/unaligned.c2
-rw-r--r--arch/m68k/kernel/signal.c14
-rw-r--r--arch/mips/Kconfig1
-rw-r--r--arch/mips/alchemy/common/dbdma.c27
-rw-r--r--arch/mips/kernel/cpu-probe.c5
-rw-r--r--arch/mips/kernel/setup.c9
-rw-r--r--arch/nios2/boot/dts/10m50_devboard.dts2
-rw-r--r--arch/nios2/boot/dts/3c120_devboard.dts2
-rw-r--r--arch/nios2/include/asm/pgalloc.h8
-rw-r--r--arch/nios2/kernel/cpuinfo.c2
-rw-r--r--arch/nios2/kernel/setup.c6
-rw-r--r--arch/parisc/Kconfig4
-rw-r--r--arch/parisc/Kconfig.debug11
-rw-r--r--arch/parisc/include/asm/assembly.h4
-rw-r--r--arch/parisc/include/asm/cacheflush.h4
-rw-r--r--arch/parisc/include/asm/pgtable.h3
-rw-r--r--arch/parisc/include/asm/spinlock.h39
-rw-r--r--arch/parisc/include/asm/spinlock_types.h8
-rw-r--r--arch/parisc/kernel/alternative.c2
-rw-r--r--arch/parisc/kernel/cache.c5
-rw-r--r--arch/parisc/kernel/pci-dma.c18
-rw-r--r--arch/parisc/kernel/process.c11
-rw-r--r--arch/parisc/kernel/traps.c18
-rw-r--r--arch/powerpc/Kconfig6
-rw-r--r--arch/powerpc/boot/Makefile6
-rw-r--r--arch/powerpc/crypto/Kconfig2
-rw-r--r--arch/powerpc/crypto/Makefile10
-rw-r--r--arch/powerpc/crypto/aes-gcm-p10-glue.c18
-rw-r--r--arch/powerpc/crypto/aesp10-ppc.pl (renamed from arch/powerpc/crypto/aesp8-ppc.pl)2
-rw-r--r--arch/powerpc/crypto/ghashp10-ppc.pl (renamed from arch/powerpc/crypto/ghashp8-ppc.pl)12
-rw-r--r--arch/powerpc/include/asm/iommu.h5
-rw-r--r--arch/powerpc/include/asm/pgtable.h3
-rw-r--r--arch/powerpc/kernel/dma-iommu.c4
-rw-r--r--arch/powerpc/kernel/iommu.c28
-rw-r--r--arch/powerpc/kernel/isa-bridge.c5
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c4
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c10
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c2
-rw-r--r--arch/powerpc/platforms/Kconfig1
-rw-r--r--arch/powerpc/platforms/powernv/pci.c25
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c51
-rw-r--r--arch/powerpc/purgatory/Makefile5
-rw-r--r--arch/powerpc/xmon/xmon.c2
-rw-r--r--arch/riscv/Kconfig6
-rw-r--r--arch/riscv/errata/Makefile4
-rw-r--r--arch/riscv/include/asm/hugetlb.h3
-rw-r--r--arch/riscv/include/asm/kfence.h33
-rw-r--r--arch/riscv/include/asm/perf_event.h7
-rw-r--r--arch/riscv/include/asm/pgtable.h3
-rw-r--r--arch/riscv/kernel/Makefile4
-rw-r--r--arch/riscv/kernel/probes/Makefile2
-rw-r--r--arch/riscv/mm/hugetlbpage.c30
-rw-r--r--arch/riscv/mm/init.c50
-rw-r--r--arch/riscv/purgatory/Makefile5
-rw-r--r--arch/s390/Kconfig11
-rw-r--r--arch/s390/boot/vmem.c15
-rw-r--r--arch/s390/configs/debug_defconfig4
-rw-r--r--arch/s390/configs/defconfig4
-rw-r--r--arch/s390/configs/zfcpdump_defconfig1
-rw-r--r--arch/s390/crypto/chacha-glue.c2
-rw-r--r--arch/s390/include/asm/compat.h2
-rw-r--r--arch/s390/include/asm/pgtable.h3
-rw-r--r--arch/s390/include/asm/physmem_info.h5
-rw-r--r--arch/s390/include/uapi/asm/statfs.h4
-rw-r--r--arch/s390/kernel/Makefile1
-rw-r--r--arch/s390/kernel/crash_dump.c2
-rw-r--r--arch/s390/kernel/ipl.c3
-rw-r--r--arch/s390/kernel/topology.c32
-rw-r--r--arch/s390/mm/pageattr.c1
-rw-r--r--arch/s390/mm/vmem.c14
-rw-r--r--arch/s390/purgatory/Makefile1
-rw-r--r--arch/um/drivers/Makefile4
-rw-r--r--arch/um/drivers/harddog.h9
-rw-r--r--arch/um/drivers/harddog_kern.c7
-rw-r--r--arch/um/drivers/harddog_user.c1
-rw-r--r--arch/um/drivers/harddog_user_exp.c9
-rw-r--r--arch/x86/Makefile12
-rw-r--r--arch/x86/crypto/aria-aesni-avx-asm_64.S2
-rw-r--r--arch/x86/events/intel/core.c17
-rw-r--r--arch/x86/events/intel/uncore_snbep.c11
-rw-r--r--arch/x86/hyperv/hv_init.c2
-rw-r--r--arch/x86/hyperv/hv_vtl.c2
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/fpu/sched.h2
-rw-r--r--arch/x86/include/asm/orc_header.h19
-rw-r--r--arch/x86/include/asm/pgtable.h1
-rw-r--r--arch/x86/include/asm/pgtable_types.h3
-rw-r--r--arch/x86/include/asm/vmx.h2
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c5
-rw-r--r--arch/x86/kernel/cpu/topology.c5
-rw-r--r--arch/x86/kernel/dumpstack.c7
-rw-r--r--arch/x86/kernel/fpu/context.h2
-rw-r--r--arch/x86/kernel/fpu/core.c2
-rw-r--r--arch/x86/kernel/head_64.S18
-rw-r--r--arch/x86/kernel/unwind_orc.c3
-rw-r--r--arch/x86/kvm/cpuid.c16
-rw-r--r--arch/x86/kvm/lapic.c20
-rw-r--r--arch/x86/kvm/mmu/mmu.c5
-rw-r--r--arch/x86/kvm/svm/svm.c2
-rw-r--r--arch/x86/kvm/vmx/sgx.c11
-rw-r--r--arch/x86/kvm/x86.c9
-rw-r--r--arch/x86/lib/copy_user_64.S10
-rw-r--r--arch/x86/mm/init.c25
-rw-r--r--arch/x86/mm/kaslr.c8
-rw-r--r--arch/x86/mm/pat/set_memory.c1
-rw-r--r--arch/x86/net/bpf_jit_comp.c2
-rw-r--r--arch/x86/pci/xen.c8
-rw-r--r--arch/x86/purgatory/Makefile5
-rw-r--r--arch/xtensa/kernel/signal.c35
-rw-r--r--arch/xtensa/kernel/xtensa_ksyms.c4
-rw-r--r--arch/xtensa/lib/Makefile2
-rw-r--r--arch/xtensa/lib/bswapdi2.S21
-rw-r--r--arch/xtensa/lib/bswapsi2.S16
-rw-r--r--block/blk-cgroup.c41
-rw-r--r--block/blk-core.c2
-rw-r--r--block/blk-map.c2
-rw-r--r--block/blk-mq-tag.c12
-rw-r--r--block/blk-mq.c8
-rw-r--r--block/blk-settings.c3
-rw-r--r--block/blk-wbt.c12
-rw-r--r--block/fops.c17
-rw-r--r--crypto/asymmetric_keys/public_key.c38
-rw-r--r--drivers/accel/ivpu/Kconfig1
-rw-r--r--drivers/accel/ivpu/ivpu_hw_mtl.c22
-rw-r--r--drivers/accel/ivpu/ivpu_hw_mtl_reg.h1
-rw-r--r--drivers/accel/ivpu/ivpu_ipc.c4
-rw-r--r--drivers/accel/ivpu/ivpu_job.c21
-rw-r--r--drivers/accel/ivpu/ivpu_mmu.c22
-rw-r--r--drivers/accel/qaic/qaic_control.c41
-rw-r--r--drivers/accel/qaic/qaic_data.c97
-rw-r--r--drivers/accel/qaic/qaic_drv.c6
-rw-r--r--drivers/acpi/acpica/achware.h2
-rw-r--r--drivers/acpi/apei/apei-internal.h6
-rw-r--r--drivers/acpi/apei/bert.c1
-rw-r--r--drivers/acpi/resource.c12
-rw-r--r--drivers/acpi/sleep.c16
-rw-r--r--drivers/android/binder.c28
-rw-r--r--drivers/android/binder_alloc.c64
-rw-r--r--drivers/android/binder_alloc.h4
-rw-r--r--drivers/android/binder_alloc_selftest.c2
-rw-r--r--drivers/ata/libata-core.c3
-rw-r--r--drivers/ata/libata-eh.c2
-rw-r--r--drivers/ata/libata-scsi.c56
-rw-r--r--drivers/auxdisplay/ht16k33.c2
-rw-r--r--drivers/auxdisplay/lcd2s.c2
-rw-r--r--drivers/base/cacheinfo.c26
-rw-r--r--drivers/base/class.c2
-rw-r--r--drivers/base/firmware_loader/main.c2
-rw-r--r--drivers/base/regmap/Kconfig13
-rw-r--r--drivers/base/regmap/regcache-maple.c5
-rw-r--r--drivers/base/regmap/regcache.c3
-rw-r--r--drivers/base/regmap/regmap-sdw.c4
-rw-r--r--drivers/base/regmap/regmap-spi-avmm.c2
-rw-r--r--drivers/base/regmap/regmap.c6
-rw-r--r--drivers/block/null_blk/main.c1
-rw-r--r--drivers/block/rbd.c62
-rw-r--r--drivers/block/ublk_drv.c9
-rw-r--r--drivers/block/virtio_blk.c82
-rw-r--r--drivers/block/xen-blkfront.c3
-rw-r--r--drivers/bluetooth/btnxpuart.c6
-rw-r--r--drivers/bluetooth/hci_qca.c6
-rw-r--r--drivers/char/agp/parisc-agp.c15
-rw-r--r--drivers/char/tpm/tpm-chip.c4
-rw-r--r--drivers/char/tpm/tpm-interface.c10
-rw-r--r--drivers/char/tpm/tpm_tis.c23
-rw-r--r--drivers/char/tpm/tpm_tis_core.c43
-rw-r--r--drivers/char/tpm/tpm_tis_core.h8
-rw-r--r--drivers/clk/clk-composite.c5
-rw-r--r--drivers/clk/clk-loongson2.c2
-rw-r--r--drivers/clk/mediatek/clk-mt8365.c18
-rw-r--r--drivers/clk/pxa/clk-pxa3xx.c2
-rw-r--r--drivers/cpufreq/acpi-cpufreq.c2
-rw-r--r--drivers/cpufreq/amd-pstate.c46
-rw-r--r--drivers/cpufreq/pcc-cpufreq.c2
-rw-r--r--drivers/cxl/core/mbox.c12
-rw-r--r--drivers/cxl/core/pci.c112
-rw-r--r--drivers/cxl/core/port.c7
-rw-r--r--drivers/cxl/cxl.h1
-rw-r--r--drivers/cxl/cxlmem.h2
-rw-r--r--drivers/cxl/cxlpci.h2
-rw-r--r--drivers/cxl/mem.c3
-rw-r--r--drivers/cxl/pci.c6
-rw-r--r--drivers/cxl/port.c20
-rw-r--r--drivers/dma-buf/udmabuf.c47
-rw-r--r--drivers/dma/at_hdmac.c17
-rw-r--r--drivers/dma/at_xdmac.c7
-rw-r--r--drivers/dma/idxd/cdev.c1
-rw-r--r--drivers/dma/pl330.c8
-rw-r--r--drivers/dma/ti/k3-udma.c4
-rw-r--r--drivers/edac/qcom_edac.c118
-rw-r--r--drivers/firmware/arm_ffa/bus.c19
-rw-r--r--drivers/firmware/arm_ffa/driver.c10
-rw-r--r--drivers/firmware/arm_scmi/raw_mode.c2
-rw-r--r--drivers/firmware/cirrus/cs_dsp.c5
-rw-r--r--drivers/firmware/efi/efi.c21
-rw-r--r--drivers/firmware/efi/libstub/Makefile.zboot3
-rw-r--r--drivers/firmware/efi/libstub/efistub.h3
-rw-r--r--drivers/gpio/Kconfig2
-rw-r--r--drivers/gpio/gpio-f7188x.c28
-rw-r--r--drivers/gpio/gpio-mockup.c2
-rw-r--r--drivers/gpio/gpio-sifive.c8
-rw-r--r--drivers/gpio/gpio-sim.c18
-rw-r--r--drivers/gpio/gpiolib.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c145
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v10_0.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c11
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c43
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c16
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c36
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_resource.c20
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c25
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_detection.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_validation.c3
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_pm.c12
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c29
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c18
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c20
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c92
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c10
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c5
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c4
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c33
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c5
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c5
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c5
-rw-r--r--drivers/gpu/drm/ast/ast_dp.c55
-rw-r--r--drivers/gpu/drm/ast/ast_drv.h5
-rw-r--r--drivers/gpu/drm/ast/ast_main.c11
-rw-r--r--drivers/gpu/drm/ast/ast_mode.c15
-rw-r--r--drivers/gpu/drm/ast/ast_post.c3
-rw-r--r--drivers/gpu/drm/bridge/ti-sn65dsi86.c4
-rw-r--r--drivers/gpu/drm/display/drm_dp_mst_topology.c2
-rw-r--r--drivers/gpu/drm/drm_fb_helper.c12
-rw-r--r--drivers/gpu/drm/drm_managed.c22
-rw-r--r--drivers/gpu/drm/drm_panel_orientation_quirks.c2
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_g2d.c2
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_g2d.h4
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_vidi.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_cdclk.c30
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c12
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_aux.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdcp.c10
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c14
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_execlists.c12
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c17
-rw-r--r--drivers/gpu/drm/lima/lima_sched.c2
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_mode.c5
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gmu.c2
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu.c2
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h16
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h4
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h4
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h4
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h4
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h1
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h1
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h12
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h8
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h24
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h16
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h16
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c17
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c23
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c5
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c1
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hwio.h3
-rw-r--r--drivers/gpu/drm/msm/dp/dp_audio.c12
-rw-r--r--drivers/gpu/drm/msm/dp/dp_audio.h2
-rw-r--r--drivers/gpu/drm/msm/dp/dp_catalog.c15
-rw-r--r--drivers/gpu/drm/msm/dp/dp_catalog.h3
-rw-r--r--drivers/gpu/drm/msm/dp/dp_display.c78
-rw-r--r--drivers/gpu/drm/msm/msm_atomic.c2
-rw-r--r--drivers/gpu/drm/msm/msm_drv.c4
-rw-r--r--drivers/gpu/drm/msm/msm_gem.c22
-rw-r--r--drivers/gpu/drm/msm/msm_gem_submit.c25
-rw-r--r--drivers/gpu/drm/msm/msm_iommu.c9
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_acpi.c3
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_connector.c7
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drm.c14
-rw-r--r--drivers/gpu/drm/pl111/pl111_display.c2
-rw-r--r--drivers/gpu/drm/pl111/pl111_drm.h4
-rw-r--r--drivers/gpu/drm/pl111/pl111_drv.c8
-rw-r--r--drivers/gpu/drm/pl111/pl111_versatile.c10
-rw-r--r--drivers/gpu/drm/radeon/radeon_fbdev.c1
-rw-r--r--drivers/gpu/drm/radeon/radeon_gem.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_irq_kms.c10
-rw-r--r--drivers/gpu/drm/scheduler/sched_main.c3
-rw-r--r--drivers/hid/hid-google-hammer.c2
-rw-r--r--drivers/hid/hid-ids.h1
-rw-r--r--drivers/hid/hid-logitech-hidpp.c12
-rw-r--r--drivers/hid/wacom_sys.c21
-rw-r--r--drivers/hid/wacom_wac.c2
-rw-r--r--drivers/hv/channel_mgmt.c18
-rw-r--r--drivers/hv/hv_common.c48
-rw-r--r--drivers/hv/vmbus_drv.c5
-rw-r--r--drivers/hwtracing/coresight/coresight-etm-perf.c1
-rw-r--r--drivers/hwtracing/coresight/coresight-tmc-etr.c2
-rw-r--r--drivers/i2c/busses/i2c-designware-core.h1
-rw-r--r--drivers/i2c/busses/i2c-designware-slave.c4
-rw-r--r--drivers/i2c/busses/i2c-img-scb.c2
-rw-r--r--drivers/i2c/busses/i2c-imx-lpi2c.c4
-rw-r--r--drivers/i2c/busses/i2c-mchp-pci1xxxx.c6
-rw-r--r--drivers/i2c/busses/i2c-mv64xxx.c11
-rw-r--r--drivers/i2c/busses/i2c-qup.c21
-rw-r--r--drivers/i2c/busses/i2c-sprd.c8
-rw-r--r--drivers/iio/accel/kionix-kx022a.c2
-rw-r--r--drivers/iio/accel/st_accel_core.c4
-rw-r--r--drivers/iio/adc/ad4130.c12
-rw-r--r--drivers/iio/adc/ad7192.c8
-rw-r--r--drivers/iio/adc/ad_sigma_delta.c4
-rw-r--r--drivers/iio/adc/imx93_adc.c7
-rw-r--r--drivers/iio/adc/mt6370-adc.c53
-rw-r--r--drivers/iio/adc/mxs-lradc-adc.c10
-rw-r--r--drivers/iio/adc/palmas_gpadc.c10
-rw-r--r--drivers/iio/adc/stm32-adc.c61
-rw-r--r--drivers/iio/addac/ad74413r.c2
-rw-r--r--drivers/iio/dac/Makefile2
-rw-r--r--drivers/iio/dac/mcp4725.c16
-rw-r--r--drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c10
-rw-r--r--drivers/iio/industrialio-gts-helper.c42
-rw-r--r--drivers/iio/light/rohm-bu27034.c26
-rw-r--r--drivers/iio/light/vcnl4035.c3
-rw-r--r--drivers/iio/magnetometer/tmag5273.c5
-rw-r--r--drivers/infiniband/core/cma.c4
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c7
-rw-r--r--drivers/infiniband/core/uverbs_main.c12
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h2
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c11
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c6
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c11
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c12
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c7
-rw-r--r--drivers/infiniband/hw/efa/efa_verbs.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c25
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c43
-rw-r--r--drivers/infiniband/hw/irdma/verbs.c12
-rw-r--r--drivers/infiniband/hw/mlx5/counters.c89
-rw-r--r--drivers/infiniband/hw/mlx5/fs.c276
-rw-r--r--drivers/infiniband/hw/mlx5/fs.h16
-rw-r--r--drivers/infiniband/hw/mlx5/main.c3
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h14
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c26
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c13
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c44
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c9
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c30
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c17
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c25
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c16
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.c55
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs.c4
-rw-r--r--drivers/input/input.c2
-rw-r--r--drivers/input/joystick/xpad.c1
-rw-r--r--drivers/input/misc/soc_button_array.c30
-rw-r--r--drivers/input/mouse/elantech.c9
-rw-r--r--drivers/input/touchscreen/cyttsp5.c2
-rw-r--r--drivers/iommu/Kconfig17
-rw-r--r--drivers/iommu/amd/amd_iommu.h4
-rw-r--r--drivers/iommu/amd/init.c24
-rw-r--r--drivers/iommu/amd/iommu.c39
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c10
-rw-r--r--drivers/iommu/mtk_iommu.c3
-rw-r--r--drivers/iommu/rockchip-iommu.c14
-rw-r--r--drivers/irqchip/irq-gic-common.c10
-rw-r--r--drivers/irqchip/irq-gic-common.h1
-rw-r--r--drivers/irqchip/irq-gic-v3.c20
-rw-r--r--drivers/irqchip/irq-mbigen.c31
-rw-r--r--drivers/irqchip/irq-meson-gpio.c2
-rw-r--r--drivers/irqchip/irq-mips-gic.c32
-rw-r--r--drivers/leds/rgb/leds-qcom-lpg.c8
-rw-r--r--drivers/mailbox/mailbox-test.c10
-rw-r--r--drivers/md/dm-cache-metadata.c2
-rw-r--r--drivers/md/dm-ioctl.c5
-rw-r--r--drivers/md/dm-thin-metadata.c22
-rw-r--r--drivers/md/dm-thin.c3
-rw-r--r--drivers/md/dm.c29
-rw-r--r--drivers/md/raid5.c2
-rw-r--r--drivers/media/cec/core/cec-adap.c8
-rw-r--r--drivers/media/cec/core/cec-core.c2
-rw-r--r--drivers/media/cec/core/cec-priv.h1
-rw-r--r--drivers/media/dvb-core/dvb_ca_en50221.c49
-rw-r--r--drivers/media/dvb-core/dvb_demux.c4
-rw-r--r--drivers/media/dvb-core/dvb_frontend.c16
-rw-r--r--drivers/media/dvb-core/dvb_net.c38
-rw-r--r--drivers/media/dvb-core/dvbdev.c84
-rw-r--r--drivers/media/dvb-frontends/mn88443x.c2
-rw-r--r--drivers/media/pci/netup_unidvb/netup_unidvb_core.c19
-rw-r--r--drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_stateful.c3
-rw-r--r--drivers/media/platform/qcom/camss/camss-video.c1
-rw-r--r--drivers/media/platform/verisilicon/hantro_v4l2.c6
-rw-r--r--drivers/media/usb/dvb-usb-v2/ce6230.c8
-rw-r--r--drivers/media/usb/dvb-usb-v2/ec168.c12
-rw-r--r--drivers/media/usb/dvb-usb-v2/rtl28xxu.c20
-rw-r--r--drivers/media/usb/dvb-usb/az6027.c12
-rw-r--r--drivers/media/usb/dvb-usb/digitv.c4
-rw-r--r--drivers/media/usb/dvb-usb/dw2102.c2
-rw-r--r--drivers/media/usb/pvrusb2/Kconfig1
-rw-r--r--drivers/media/usb/ttusb-dec/ttusb_dec.c3
-rw-r--r--drivers/media/usb/uvc/uvc_driver.c16
-rw-r--r--drivers/media/v4l2-core/v4l2-mc.c3
-rw-r--r--drivers/misc/eeprom/Kconfig1
-rw-r--r--drivers/misc/fastrpc.c31
-rw-r--r--drivers/mmc/core/block.c5
-rw-r--r--drivers/mmc/core/pwrseq_sd8787.c34
-rw-r--r--drivers/mmc/host/bcm2835.c4
-rw-r--r--drivers/mmc/host/litex_mmc.c1
-rw-r--r--drivers/mmc/host/meson-gx-mmc.c14
-rw-r--r--drivers/mmc/host/mmci.c3
-rw-r--r--drivers/mmc/host/mtk-sd.c2
-rw-r--r--drivers/mmc/host/mvsdio.c2
-rw-r--r--drivers/mmc/host/omap.c2
-rw-r--r--drivers/mmc/host/omap_hsmmc.c6
-rw-r--r--drivers/mmc/host/owl-mmc.c2
-rw-r--r--drivers/mmc/host/sdhci-acpi.c2
-rw-r--r--drivers/mmc/host/sdhci-cadence.c8
-rw-r--r--drivers/mmc/host/sdhci-esdhc-imx.c18
-rw-r--r--drivers/mmc/host/sdhci-msm.c3
-rw-r--r--drivers/mmc/host/sdhci-spear.c4
-rw-r--r--drivers/mmc/host/sh_mmcif.c2
-rw-r--r--drivers/mmc/host/sunxi-mmc.c4
-rw-r--r--drivers/mmc/host/usdhi6rol0.c6
-rw-r--r--drivers/mmc/host/vub300.c3
-rw-r--r--drivers/mtd/mtdchar.c8
-rw-r--r--drivers/mtd/nand/raw/ingenic/ingenic_ecc.h8
-rw-r--r--drivers/mtd/nand/raw/marvell_nand.c10
-rw-r--r--drivers/mtd/spi-nor/core.c5
-rw-r--r--drivers/mtd/spi-nor/spansion.c4
-rw-r--r--drivers/net/bonding/bond_main.c8
-rw-r--r--drivers/net/can/Kconfig2
-rw-r--r--drivers/net/can/bxcan.c34
-rw-r--r--drivers/net/can/dev/skb.c3
-rw-r--r--drivers/net/can/kvaser_pciefd.c51
-rw-r--r--drivers/net/dsa/lan9303-core.c4
-rw-r--r--drivers/net/dsa/mt7530.c48
-rw-r--r--drivers/net/dsa/mt7530.h6
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c2
-rw-r--r--drivers/net/dsa/mv88e6xxx/port.h2
-rw-r--r--drivers/net/dsa/ocelot/felix_vsc9959.c2
-rw-r--r--drivers/net/dsa/qca/Kconfig1
-rw-r--r--drivers/net/dsa/rzn1_a5psw.c83
-rw-r--r--drivers/net/dsa/rzn1_a5psw.h3
-rw-r--r--drivers/net/ethernet/3com/3c515.c4
-rw-r--r--drivers/net/ethernet/3com/3c589_cs.c11
-rw-r--r--drivers/net/ethernet/8390/ne.c1
-rw-r--r--drivers/net/ethernet/8390/smc-ultra.c1
-rw-r--r--drivers/net/ethernet/8390/wd.c1
-rw-r--r--drivers/net/ethernet/amd/lance.c1
-rw-r--r--drivers/net/ethernet/amd/pds_core/dev.c10
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-mdio.c12
-rw-r--r--drivers/net/ethernet/broadcom/bcmsysport.c4
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c9
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c42
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c1
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c30
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.h3
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmmii.c5
-rw-r--r--drivers/net/ethernet/cirrus/cs89x0.c2
-rw-r--r--drivers/net/ethernet/emulex/benet/be_main.c4
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c7
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc.c16
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_qos.c4
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c33
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c25
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h8
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h1
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c15
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c4
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h5
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c5
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf.h2
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c15
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_register.h2
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_virtchnl.c5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.h2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_lib.c5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_gnss.c72
-rw-r--r--drivers/net/ethernet/intel/ice/ice_gnss.h10
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c20
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sriov.c8
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c10
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.h9
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_lib.c19
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_lib.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl.c1
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_mac.c4
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ethtool.c3
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c8
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c12
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c2
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_main.c7
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c7
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c29
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c4
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c46
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c124
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_common.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c76
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c153
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c50
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c70
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mr.c21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c82
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c13
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.c10
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c10
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_ethtool.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nic/main.h2
-rw-r--r--drivers/net/ethernet/nvidia/forcedeth.c1
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_l2.c2
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede.h4
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_ethtool.c24
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_main.c34
-rw-r--r--drivers/net/ethernet/qualcomm/qca_spi.c3
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c44
-rw-r--r--drivers/net/ethernet/renesas/rswitch.c38
-rw-r--r--drivers/net/ethernet/sfc/ef10.c25
-rw-r--r--drivers/net/ethernet/sfc/ef100_netdev.c4
-rw-r--r--drivers/net/ethernet/sfc/ef100_nic.c7
-rw-r--r--drivers/net/ethernet/sfc/ef100_tx.c4
-rw-r--r--drivers/net/ethernet/sfc/ef100_tx.h2
-rw-r--r--drivers/net/ethernet/sfc/efx_channels.c2
-rw-r--r--drivers/net/ethernet/sfc/efx_devlink.c95
-rw-r--r--drivers/net/ethernet/sfc/siena/efx_channels.c2
-rw-r--r--drivers/net/ethernet/sfc/tc.c27
-rw-r--r--drivers/net/ethernet/sfc/tx_common.c4
-rw-r--r--drivers/net/ethernet/sfc/tx_common.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c12
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c6
-rw-r--r--drivers/net/ethernet/sun/cassini.c2
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.c2
-rw-r--r--drivers/net/ieee802154/adf7242.c2
-rw-r--r--drivers/net/ieee802154/mac802154_hwsim.c6
-rw-r--r--drivers/net/ipa/ipa_endpoint.c2
-rw-r--r--drivers/net/ipvlan/ipvlan_l3s.c4
-rw-r--r--drivers/net/macsec.c12
-rw-r--r--drivers/net/mdio/mdio-i2c.c15
-rw-r--r--drivers/net/pcs/pcs-xpcs.c2
-rw-r--r--drivers/net/phy/dp83867.c24
-rw-r--r--drivers/net/phy/mdio_bus.c2
-rw-r--r--drivers/net/phy/mscc/mscc.h2
-rw-r--r--drivers/net/phy/mscc/mscc_main.c82
-rw-r--r--drivers/net/phy/mxl-gpy.c16
-rw-r--r--drivers/net/phy/phy_device.c15
-rw-r--r--drivers/net/phy/phylink.c50
-rw-r--r--drivers/net/team/team.c7
-rw-r--r--drivers/net/tun.c15
-rw-r--r--drivers/net/usb/cdc_ncm.c24
-rw-r--r--drivers/net/usb/qmi_wwan.c4
-rw-r--r--drivers/net/virtio_net.c77
-rw-r--r--drivers/net/wan/lapbether.c3
-rw-r--r--drivers/net/wireless/broadcom/b43/b43.h2
-rw-r--r--drivers/net/wireless/broadcom/b43legacy/b43legacy.h2
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c5
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c11
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c11
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/acpi.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/dbg.c19
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/d3.c8
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c5
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/fw.c5
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/link.c12
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c55
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c9
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c14
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mvm.h1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/nvm.c10
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rfi.c16
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rs.c11
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c9
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/sta.c13
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/tx.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/drv.c2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7615/mac.c3
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76_connac2_mac.h2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c3
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7996/mac.c21
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h1
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c4
-rw-r--r--drivers/net/wireless/realtek/rtw88/mac80211.c16
-rw-r--r--drivers/net/wireless/realtek/rtw88/main.c19
-rw-r--r--drivers/net/wireless/realtek/rtw88/main.h3
-rw-r--r--drivers/net/wireless/realtek/rtw88/ps.c43
-rw-r--r--drivers/net/wireless/realtek/rtw88/ps.h2
-rw-r--r--drivers/net/wireless/realtek/rtw88/sdio.c8
-rw-r--r--drivers/net/wireless/realtek/rtw88/usb.h2
-rw-r--r--drivers/net/wireless/realtek/rtw89/core.c3
-rw-r--r--drivers/net/wireless/realtek/rtw89/mac.c4
-rw-r--r--drivers/net/wireless/realtek/rtw89/mac.h2
-rw-r--r--drivers/net/wireless/realtek/rtw89/mac80211.c15
-rw-r--r--drivers/net/wireless/realtek/rtw89/ps.c26
-rw-r--r--drivers/net/wireless/realtek/rtw89/ps.h1
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8852b.c28
-rw-r--r--drivers/net/wireless/virtual/mac80211_hwsim.c3
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_imem.c27
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_imem_ops.c12
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_imem_ops.h6
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_mux_codec.c15
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_mux_codec.h2
-rw-r--r--drivers/net/wwan/t7xx/t7xx_pci.c18
-rw-r--r--drivers/net/wwan/t7xx/t7xx_pci.h1
-rw-r--r--drivers/nfc/fdp/fdp.c3
-rw-r--r--drivers/nfc/nfcsim.c4
-rw-r--r--drivers/nvme/host/constants.c2
-rw-r--r--drivers/nvme/host/core.c55
-rw-r--r--drivers/nvme/host/hwmon.c4
-rw-r--r--drivers/nvme/host/ioctl.c6
-rw-r--r--drivers/nvme/host/multipath.c1
-rw-r--r--drivers/nvme/host/nvme.h8
-rw-r--r--drivers/nvme/host/pci.c10
-rw-r--r--drivers/nvme/target/passthru.c2
-rw-r--r--drivers/of/overlay.c1
-rw-r--r--drivers/pci/controller/pci-hyperv.c139
-rw-r--r--drivers/pci/quirks.c9
-rw-r--r--drivers/perf/arm_pmuv3.c21
-rw-r--r--drivers/phy/amlogic/phy-meson-g12a-mipi-dphy-analog.c2
-rw-r--r--drivers/phy/mediatek/phy-mtk-hdmi-mt8195.c10
-rw-r--r--drivers/phy/qualcomm/phy-qcom-qmp-combo.c5
-rw-r--r--drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c5
-rw-r--r--drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c2
-rw-r--r--drivers/pinctrl/meson/pinctrl-meson-axg.c1
-rw-r--r--drivers/platform/mellanox/mlxbf-pmc.c5
-rw-r--r--drivers/platform/surface/aggregator/controller.c2
-rw-r--r--drivers/platform/surface/surface_aggregator_tabletsw.c10
-rw-r--r--drivers/platform/x86/amd/pmf/core.c42
-rw-r--r--drivers/platform/x86/asus-nb-wmi.c2
-rw-r--r--drivers/platform/x86/intel/ifs/load.c2
-rw-r--r--drivers/platform/x86/intel/int3472/clk_and_regulator.c13
-rw-r--r--drivers/platform/x86/intel/speed_select_if/isst_if_common.c12
-rw-r--r--drivers/power/supply/ab8500_btemp.c6
-rw-r--r--drivers/power/supply/ab8500_fg.c6
-rw-r--r--drivers/power/supply/axp288_fuel_gauge.c2
-rw-r--r--drivers/power/supply/bq24190_charger.c1
-rw-r--r--drivers/power/supply/bq25890_charger.c5
-rw-r--r--drivers/power/supply/bq27xxx_battery.c181
-rw-r--r--drivers/power/supply/bq27xxx_battery_i2c.c3
-rw-r--r--drivers/power/supply/mt6360_charger.c4
-rw-r--r--drivers/power/supply/power_supply_core.c15
-rw-r--r--drivers/power/supply/power_supply_leds.c5
-rw-r--r--drivers/power/supply/power_supply_sysfs.c3
-rw-r--r--drivers/power/supply/rt9467-charger.c2
-rw-r--r--drivers/power/supply/sbs-charger.c2
-rw-r--r--drivers/power/supply/sc27xx_fuel_gauge.c9
-rw-r--r--drivers/regulator/core.c4
-rw-r--r--drivers/regulator/mt6359-regulator.c7
-rw-r--r--drivers/regulator/pca9450-regulator.c4
-rw-r--r--drivers/regulator/qcom-rpmh-regulator.c30
-rw-r--r--drivers/s390/block/dasd_eckd.c33
-rw-r--r--drivers/s390/block/dasd_ioctl.c4
-rw-r--r--drivers/s390/cio/device.c7
-rw-r--r--drivers/s390/cio/qdio.h2
-rw-r--r--drivers/s390/crypto/pkey_api.c3
-rw-r--r--drivers/s390/net/ism_drv.c8
-rw-r--r--drivers/scsi/aacraid/aacraid.h1
-rw-r--r--drivers/scsi/aacraid/commsup.c6
-rw-r--r--drivers/scsi/aacraid/linit.c14
-rw-r--r--drivers/scsi/aacraid/src.c25
-rw-r--r--drivers/scsi/lpfc/lpfc_bsg.c8
-rw-r--r--drivers/scsi/qla2xxx/qla_def.h1
-rw-r--r--drivers/scsi/qla2xxx/qla_init.c3
-rw-r--r--drivers/scsi/qla2xxx/qla_inline.h3
-rw-r--r--drivers/scsi/qla2xxx/qla_isr.c3
-rw-r--r--drivers/scsi/scsi_lib.c5
-rw-r--r--drivers/scsi/stex.c4
-rw-r--r--drivers/scsi/storvsc_drv.c10
-rw-r--r--drivers/soc/fsl/qe/Kconfig4
-rw-r--r--drivers/soc/qcom/Makefile3
-rw-r--r--drivers/soc/qcom/icc-bwmon.c4
-rw-r--r--drivers/soc/qcom/ramp_controller.c2
-rw-r--r--drivers/soc/qcom/rmtfs_mem.c1
-rw-r--r--drivers/soc/qcom/rpmh-rsc.c2
-rw-r--r--drivers/soc/qcom/rpmhpd.c16
-rw-r--r--drivers/soundwire/dmi-quirks.c7
-rw-r--r--drivers/soundwire/qcom.c17
-rw-r--r--drivers/soundwire/stream.c4
-rw-r--r--drivers/spi/spi-cadence-quadspi.c7
-rw-r--r--drivers/spi/spi-cadence.c105
-rw-r--r--drivers/spi/spi-dw-mmio.c8
-rw-r--r--drivers/spi/spi-fsl-dspi.c15
-rw-r--r--drivers/spi/spi-fsl-lpspi.c7
-rw-r--r--drivers/spi/spi-geni-qcom.c4
-rw-r--r--drivers/spi/spi-mt65xx.c3
-rw-r--r--drivers/spi/spi-qup.c37
-rw-r--r--drivers/staging/media/atomisp/i2c/atomisp-ov2680.c4
-rw-r--r--drivers/staging/media/imx/imx8mq-mipi-csi2.c2
-rw-r--r--drivers/staging/octeon/TODO1
-rw-r--r--drivers/target/iscsi/iscsi_target.c2
-rw-r--r--drivers/target/iscsi/iscsi_target_login.c63
-rw-r--r--drivers/target/iscsi/iscsi_target_nego.c74
-rw-r--r--drivers/target/iscsi/iscsi_target_util.c51
-rw-r--r--drivers/target/iscsi/iscsi_target_util.h4
-rw-r--r--drivers/target/target_core_transport.c2
-rw-r--r--drivers/tee/amdtee/amdtee_if.h10
-rw-r--r--drivers/tee/amdtee/call.c30
-rw-r--r--drivers/tee/optee/smc_abi.c4
-rw-r--r--drivers/thermal/intel/int340x_thermal/int3400_thermal.c4
-rw-r--r--drivers/thermal/intel/intel_soc_dts_iosf.c2
-rw-r--r--drivers/thunderbolt/dma_test.c8
-rw-r--r--drivers/thunderbolt/nhi.c34
-rw-r--r--drivers/thunderbolt/nhi_regs.h2
-rw-r--r--drivers/thunderbolt/tb.c17
-rw-r--r--drivers/thunderbolt/tunnel.c2
-rw-r--r--drivers/tty/serial/8250/8250_bcm7271.c7
-rw-r--r--drivers/tty/serial/8250/8250_exar.c17
-rw-r--r--drivers/tty/serial/8250/8250_pci.c5
-rw-r--r--drivers/tty/serial/8250/8250_port.c1
-rw-r--r--drivers/tty/serial/8250/8250_tegra.c4
-rw-r--r--drivers/tty/serial/Kconfig2
-rw-r--r--drivers/tty/serial/arc_uart.c7
-rw-r--r--drivers/tty/serial/cpm_uart/cpm_uart.h2
-rw-r--r--drivers/tty/serial/fsl_lpuart.c46
-rw-r--r--drivers/tty/serial/lantiq.c1
-rw-r--r--drivers/tty/serial/qcom_geni_serial.c9
-rw-r--r--drivers/tty/vt/vc_screen.c11
-rw-r--r--drivers/ufs/core/ufs-mcq.c5
-rw-r--r--drivers/ufs/core/ufshcd.c10
-rw-r--r--drivers/usb/cdns3/cdns3-gadget.c13
-rw-r--r--drivers/usb/class/usbtmc.c2
-rw-r--r--drivers/usb/core/buffer.c41
-rw-r--r--drivers/usb/core/devio.c20
-rw-r--r--drivers/usb/dwc3/core.c7
-rw-r--r--drivers/usb/dwc3/core.h2
-rw-r--r--drivers/usb/dwc3/debugfs.c109
-rw-r--r--drivers/usb/dwc3/dwc3-qcom.c11
-rw-r--r--drivers/usb/dwc3/gadget.c80
-rw-r--r--drivers/usb/gadget/function/f_fs.c2
-rw-r--r--drivers/usb/gadget/function/u_ether.c3
-rw-r--r--drivers/usb/gadget/udc/amd5536udc_pci.c3
-rw-r--r--drivers/usb/gadget/udc/core.c71
-rw-r--r--drivers/usb/gadget/udc/renesas_usb3.c4
-rw-r--r--drivers/usb/host/uhci-pci.c10
-rw-r--r--drivers/usb/host/xhci-pci.c12
-rw-r--r--drivers/usb/host/xhci-ring.c29
-rw-r--r--drivers/usb/host/xhci.h2
-rw-r--r--drivers/usb/serial/option.c16
-rw-r--r--drivers/usb/storage/scsiglue.c28
-rw-r--r--drivers/usb/typec/altmodes/displayport.c4
-rw-r--r--drivers/usb/typec/pd.c2
-rw-r--r--drivers/usb/typec/tipd/core.c5
-rw-r--r--drivers/usb/typec/ucsi/ucsi.c11
-rw-r--r--drivers/vdpa/mlx5/net/mlx5_vnet.c2
-rw-r--r--drivers/vdpa/vdpa_user/vduse_dev.c3
-rw-r--r--drivers/vfio/vfio_iommu_type1.c5
-rw-r--r--drivers/vhost/net.c11
-rw-r--r--drivers/vhost/vdpa.c34
-rw-r--r--drivers/vhost/vhost.c93
-rw-r--r--drivers/vhost/vhost.h10
-rw-r--r--drivers/video/fbdev/Kconfig2
-rw-r--r--drivers/video/fbdev/arcfb.c5
-rw-r--r--drivers/video/fbdev/aty/atyfb_base.c5
-rw-r--r--drivers/video/fbdev/au1100fb.c11
-rw-r--r--drivers/video/fbdev/au1200fb.c6
-rw-r--r--drivers/video/fbdev/broadsheetfb.c5
-rw-r--r--drivers/video/fbdev/bw2.c6
-rw-r--r--drivers/video/fbdev/core/bitblit.c3
-rw-r--r--drivers/video/fbdev/core/fbmem.c2
-rw-r--r--drivers/video/fbdev/i810/i810_dvt.c3
-rw-r--r--drivers/video/fbdev/imsttfb.c12
-rw-r--r--drivers/video/fbdev/matrox/matroxfb_maven.c2
-rw-r--r--drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c3
-rw-r--r--drivers/video/fbdev/ssd1307fb.c2
-rw-r--r--drivers/video/fbdev/stifb.c6
-rw-r--r--drivers/video/fbdev/udlfb.c27
-rw-r--r--drivers/xen/pvcalls-back.c9
-rw-r--r--fs/Kconfig9
-rw-r--r--fs/Makefile4
-rw-r--r--fs/afs/dir.c3
-rw-r--r--fs/afs/vl_probe.c4
-rw-r--r--fs/afs/write.c7
-rw-r--r--fs/aio.c26
-rw-r--r--fs/autofs/root.c6
-rw-r--r--fs/btrfs/async-thread.c44
-rw-r--r--fs/btrfs/async-thread.h3
-rw-r--r--fs/btrfs/bio.c128
-rw-r--r--fs/btrfs/bio.h29
-rw-r--r--fs/btrfs/block-group.c63
-rw-r--r--fs/btrfs/block-group.h9
-rw-r--r--fs/btrfs/block-rsv.c19
-rw-r--r--fs/btrfs/block-rsv.h2
-rw-r--r--fs/btrfs/btrfs_inode.h23
-rw-r--r--fs/btrfs/check-integrity.c21
-rw-r--r--fs/btrfs/compression.c50
-rw-r--r--fs/btrfs/compression.h7
-rw-r--r--fs/btrfs/ctree.c429
-rw-r--r--fs/btrfs/ctree.h6
-rw-r--r--fs/btrfs/defrag.c3
-rw-r--r--fs/btrfs/delayed-ref.c110
-rw-r--r--fs/btrfs/delayed-ref.h25
-rw-r--r--fs/btrfs/dev-replace.c6
-rw-r--r--fs/btrfs/discard.c34
-rw-r--r--fs/btrfs/discard.h1
-rw-r--r--fs/btrfs/disk-io.c483
-rw-r--r--fs/btrfs/disk-io.h7
-rw-r--r--fs/btrfs/extent-io-tree.c37
-rw-r--r--fs/btrfs/extent-io-tree.h62
-rw-r--r--fs/btrfs/extent-tree.c186
-rw-r--r--fs/btrfs/extent-tree.h2
-rw-r--r--fs/btrfs/extent_io.c848
-rw-r--r--fs/btrfs/extent_io.h11
-rw-r--r--fs/btrfs/extent_map.c110
-rw-r--r--fs/btrfs/extent_map.h6
-rw-r--r--fs/btrfs/file-item.c88
-rw-r--r--fs/btrfs/file-item.h1
-rw-r--r--fs/btrfs/file.c12
-rw-r--r--fs/btrfs/free-space-cache.c122
-rw-r--r--fs/btrfs/free-space-cache.h2
-rw-r--r--fs/btrfs/free-space-tree.c3
-rw-r--r--fs/btrfs/fs.h5
-rw-r--r--fs/btrfs/inode-item.h16
-rw-r--r--fs/btrfs/inode.c612
-rw-r--r--fs/btrfs/ioctl.c23
-rw-r--r--fs/btrfs/locking.c5
-rw-r--r--fs/btrfs/lzo.c6
-rw-r--r--fs/btrfs/messages.c8
-rw-r--r--fs/btrfs/messages.h15
-rw-r--r--fs/btrfs/misc.h20
-rw-r--r--fs/btrfs/ordered-data.c364
-rw-r--r--fs/btrfs/ordered-data.h27
-rw-r--r--fs/btrfs/print-tree.c16
-rw-r--r--fs/btrfs/print-tree.h4
-rw-r--r--fs/btrfs/qgroup.c20
-rw-r--r--fs/btrfs/raid56.c49
-rw-r--r--fs/btrfs/raid56.h3
-rw-r--r--fs/btrfs/relocation.c61
-rw-r--r--fs/btrfs/relocation.h3
-rw-r--r--fs/btrfs/scrub.c232
-rw-r--r--fs/btrfs/send.c16
-rw-r--r--fs/btrfs/subpage.c97
-rw-r--r--fs/btrfs/subpage.h12
-rw-r--r--fs/btrfs/super.c7
-rw-r--r--fs/btrfs/tests/extent-io-tests.c16
-rw-r--r--fs/btrfs/transaction.c16
-rw-r--r--fs/btrfs/transaction.h3
-rw-r--r--fs/btrfs/tree-checker.c156
-rw-r--r--fs/btrfs/tree-checker.h29
-rw-r--r--fs/btrfs/tree-log.c60
-rw-r--r--fs/btrfs/tree-log.h2
-rw-r--r--fs/btrfs/tree-mod-log.c257
-rw-r--r--fs/btrfs/volumes.c204
-rw-r--r--fs/btrfs/volumes.h68
-rw-r--r--fs/btrfs/zlib.c2
-rw-r--r--fs/btrfs/zoned.c159
-rw-r--r--fs/btrfs/zoned.h8
-rw-r--r--fs/btrfs/zstd.c2
-rw-r--r--fs/buffer.c1
-rw-r--r--fs/cachefiles/namei.c11
-rw-r--r--fs/ceph/caps.c6
-rw-r--r--fs/ceph/mds_client.c3
-rw-r--r--fs/ceph/snap.c17
-rw-r--r--fs/char_dev.c2
-rw-r--r--fs/coredump.c6
-rw-r--r--fs/crypto/fscrypt_private.h2
-rw-r--r--fs/crypto/hooks.c10
-rw-r--r--fs/d_path.c1
-rw-r--r--fs/erofs/Kconfig1
-rw-r--r--fs/erofs/Makefile4
-rw-r--r--fs/erofs/compress.h3
-rw-r--r--fs/erofs/decompressor.c8
-rw-r--r--fs/erofs/internal.h54
-rw-r--r--fs/erofs/super.c69
-rw-r--r--fs/erofs/utils.c86
-rw-r--r--fs/erofs/xattr.c672
-rw-r--r--fs/erofs/zdata.c271
-rw-r--r--fs/erofs/zmap.c75
-rw-r--r--fs/eventfd.c12
-rw-r--r--fs/eventpoll.c8
-rw-r--r--fs/ext4/balloc.c25
-rw-r--r--fs/ext4/ext4.h5
-rw-r--r--fs/ext4/fsync.c7
-rw-r--r--fs/ext4/inode.c34
-rw-r--r--fs/ext4/mballoc.c16
-rw-r--r--fs/ext4/namei.c17
-rw-r--r--fs/ext4/super.c28
-rw-r--r--fs/ext4/xattr.c47
-rw-r--r--fs/f2fs/namei.c16
-rw-r--r--fs/file_table.c91
-rw-r--r--fs/fs_context.c3
-rw-r--r--fs/gfs2/file.c17
-rw-r--r--fs/inode.c62
-rw-r--r--fs/internal.h48
-rw-r--r--fs/jffs2/build.c5
-rw-r--r--fs/jffs2/xattr.c13
-rw-r--r--fs/jffs2/xattr.h4
-rw-r--r--fs/jfs/namei.c6
-rw-r--r--fs/lockd/svc.c3
-rw-r--r--fs/namei.c50
-rw-r--r--fs/namespace.c476
-rw-r--r--fs/nfs/dir.c4
-rw-r--r--fs/nfs/nfs4proc.c12
-rw-r--r--fs/nfsd/cache.h2
-rw-r--r--fs/nfsd/export.c12
-rw-r--r--fs/nfsd/nfs3proc.c14
-rw-r--r--fs/nfsd/nfs3xdr.c11
-rw-r--r--fs/nfsd/nfs4xdr.c289
-rw-r--r--fs/nfsd/nfscache.c25
-rw-r--r--fs/nfsd/nfsctl.c148
-rw-r--r--fs/nfsd/nfsfh.c26
-rw-r--r--fs/nfsd/nfsproc.c14
-rw-r--r--fs/nfsd/nfssvc.c5
-rw-r--r--fs/nfsd/nfsxdr.c11
-rw-r--r--fs/nfsd/trace.h265
-rw-r--r--fs/nfsd/vfs.c90
-rw-r--r--fs/nfsd/vfs.h9
-rw-r--r--fs/nilfs2/btnode.c12
-rw-r--r--fs/nilfs2/inode.c18
-rw-r--r--fs/nilfs2/page.c10
-rw-r--r--fs/nilfs2/segbuf.c6
-rw-r--r--fs/nilfs2/segment.c7
-rw-r--r--fs/nilfs2/sufile.c9
-rw-r--r--fs/nilfs2/super.c23
-rw-r--r--fs/nilfs2/the_nilfs.c43
-rw-r--r--fs/ntfs/attrib.c2
-rw-r--r--fs/ntfs/compress.c2
-rw-r--r--fs/ntfs/mft.c36
-rw-r--r--fs/ntfs/super.c4
-rw-r--r--fs/ocfs2/file.c8
-rw-r--r--fs/ocfs2/super.c6
-rw-r--r--fs/open.c90
-rw-r--r--fs/overlayfs/file.c8
-rw-r--r--fs/overlayfs/overlayfs.h5
-rw-r--r--fs/pnode.c42
-rw-r--r--fs/pnode.h3
-rw-r--r--fs/readdir.c8
-rw-r--r--fs/remap_range.c5
-rw-r--r--fs/smb/Kconfig11
-rw-r--r--fs/smb/Makefile5
-rw-r--r--fs/smb/client/Kconfig (renamed from fs/cifs/Kconfig)0
-rw-r--r--fs/smb/client/Makefile (renamed from fs/cifs/Makefile)0
-rw-r--r--fs/smb/client/asn1.c (renamed from fs/cifs/asn1.c)0
-rw-r--r--fs/smb/client/cached_dir.c (renamed from fs/cifs/cached_dir.c)0
-rw-r--r--fs/smb/client/cached_dir.h (renamed from fs/cifs/cached_dir.h)0
-rw-r--r--fs/smb/client/cifs_debug.c (renamed from fs/cifs/cifs_debug.c)66
-rw-r--r--fs/smb/client/cifs_debug.h (renamed from fs/cifs/cifs_debug.h)0
-rw-r--r--fs/smb/client/cifs_dfs_ref.c (renamed from fs/cifs/cifs_dfs_ref.c)0
-rw-r--r--fs/smb/client/cifs_fs_sb.h (renamed from fs/cifs/cifs_fs_sb.h)0
-rw-r--r--fs/smb/client/cifs_ioctl.h (renamed from fs/cifs/cifs_ioctl.h)0
-rw-r--r--fs/smb/client/cifs_spnego.c (renamed from fs/cifs/cifs_spnego.c)0
-rw-r--r--fs/smb/client/cifs_spnego.h (renamed from fs/cifs/cifs_spnego.h)0
-rw-r--r--fs/smb/client/cifs_spnego_negtokeninit.asn1 (renamed from fs/cifs/cifs_spnego_negtokeninit.asn1)0
-rw-r--r--fs/smb/client/cifs_swn.c (renamed from fs/cifs/cifs_swn.c)0
-rw-r--r--fs/smb/client/cifs_swn.h (renamed from fs/cifs/cifs_swn.h)0
-rw-r--r--fs/smb/client/cifs_unicode.c (renamed from fs/cifs/cifs_unicode.c)0
-rw-r--r--fs/smb/client/cifs_unicode.h (renamed from fs/cifs/cifs_unicode.h)0
-rw-r--r--fs/smb/client/cifs_uniupr.h (renamed from fs/cifs/cifs_uniupr.h)0
-rw-r--r--fs/smb/client/cifsacl.c (renamed from fs/cifs/cifsacl.c)0
-rw-r--r--fs/smb/client/cifsacl.h (renamed from fs/cifs/cifsacl.h)0
-rw-r--r--fs/smb/client/cifsencrypt.c (renamed from fs/cifs/cifsencrypt.c)2
-rw-r--r--fs/smb/client/cifsfs.c (renamed from fs/cifs/cifsfs.c)0
-rw-r--r--fs/smb/client/cifsfs.h (renamed from fs/cifs/cifsfs.h)0
-rw-r--r--fs/smb/client/cifsglob.h (renamed from fs/cifs/cifsglob.h)43
-rw-r--r--fs/smb/client/cifspdu.h (renamed from fs/cifs/cifspdu.h)2
-rw-r--r--fs/smb/client/cifsproto.h (renamed from fs/cifs/cifsproto.h)1
-rw-r--r--fs/smb/client/cifsroot.c (renamed from fs/cifs/cifsroot.c)0
-rw-r--r--fs/smb/client/cifssmb.c (renamed from fs/cifs/cifssmb.c)0
-rw-r--r--fs/smb/client/connect.c (renamed from fs/cifs/connect.c)59
-rw-r--r--fs/smb/client/dfs.c (renamed from fs/cifs/dfs.c)11
-rw-r--r--fs/smb/client/dfs.h (renamed from fs/cifs/dfs.h)0
-rw-r--r--fs/smb/client/dfs_cache.c (renamed from fs/cifs/dfs_cache.c)0
-rw-r--r--fs/smb/client/dfs_cache.h (renamed from fs/cifs/dfs_cache.h)0
-rw-r--r--fs/smb/client/dir.c (renamed from fs/cifs/dir.c)0
-rw-r--r--fs/smb/client/dns_resolve.c (renamed from fs/cifs/dns_resolve.c)0
-rw-r--r--fs/smb/client/dns_resolve.h (renamed from fs/cifs/dns_resolve.h)0
-rw-r--r--fs/smb/client/export.c (renamed from fs/cifs/export.c)0
-rw-r--r--fs/smb/client/file.c (renamed from fs/cifs/file.c)35
-rw-r--r--fs/smb/client/fs_context.c (renamed from fs/cifs/fs_context.c)8
-rw-r--r--fs/smb/client/fs_context.h (renamed from fs/cifs/fs_context.h)0
-rw-r--r--fs/smb/client/fscache.c (renamed from fs/cifs/fscache.c)0
-rw-r--r--fs/smb/client/fscache.h (renamed from fs/cifs/fscache.h)0
-rw-r--r--fs/smb/client/inode.c (renamed from fs/cifs/inode.c)0
-rw-r--r--fs/smb/client/ioctl.c (renamed from fs/cifs/ioctl.c)6
-rw-r--r--fs/smb/client/link.c (renamed from fs/cifs/link.c)0
-rw-r--r--fs/smb/client/misc.c (renamed from fs/cifs/misc.c)0
-rw-r--r--fs/smb/client/netlink.c (renamed from fs/cifs/netlink.c)0
-rw-r--r--fs/smb/client/netlink.h (renamed from fs/cifs/netlink.h)0
-rw-r--r--fs/smb/client/netmisc.c (renamed from fs/cifs/netmisc.c)0
-rw-r--r--fs/smb/client/nterr.c (renamed from fs/cifs/nterr.c)0
-rw-r--r--fs/smb/client/nterr.h (renamed from fs/cifs/nterr.h)0
-rw-r--r--fs/smb/client/ntlmssp.h (renamed from fs/cifs/ntlmssp.h)0
-rw-r--r--fs/smb/client/readdir.c (renamed from fs/cifs/readdir.c)0
-rw-r--r--fs/smb/client/rfc1002pdu.h (renamed from fs/cifs/rfc1002pdu.h)0
-rw-r--r--fs/smb/client/sess.c (renamed from fs/cifs/sess.c)0
-rw-r--r--fs/smb/client/smb1ops.c (renamed from fs/cifs/smb1ops.c)9
-rw-r--r--fs/smb/client/smb2file.c (renamed from fs/cifs/smb2file.c)0
-rw-r--r--fs/smb/client/smb2glob.h (renamed from fs/cifs/smb2glob.h)0
-rw-r--r--fs/smb/client/smb2inode.c (renamed from fs/cifs/smb2inode.c)0
-rw-r--r--fs/smb/client/smb2maperror.c (renamed from fs/cifs/smb2maperror.c)0
-rw-r--r--fs/smb/client/smb2misc.c (renamed from fs/cifs/smb2misc.c)0
-rw-r--r--fs/smb/client/smb2ops.c (renamed from fs/cifs/smb2ops.c)48
-rw-r--r--fs/smb/client/smb2pdu.c (renamed from fs/cifs/smb2pdu.c)34
-rw-r--r--fs/smb/client/smb2pdu.h (renamed from fs/cifs/smb2pdu.h)0
-rw-r--r--fs/smb/client/smb2proto.h (renamed from fs/cifs/smb2proto.h)0
-rw-r--r--fs/smb/client/smb2status.h (renamed from fs/cifs/smb2status.h)0
-rw-r--r--fs/smb/client/smb2transport.c (renamed from fs/cifs/smb2transport.c)0
-rw-r--r--fs/smb/client/smbdirect.c (renamed from fs/cifs/smbdirect.c)0
-rw-r--r--fs/smb/client/smbdirect.h (renamed from fs/cifs/smbdirect.h)0
-rw-r--r--fs/smb/client/smbencrypt.c (renamed from fs/cifs/smbencrypt.c)2
-rw-r--r--fs/smb/client/smberr.h (renamed from fs/cifs/smberr.h)0
-rw-r--r--fs/smb/client/trace.c (renamed from fs/cifs/trace.c)0
-rw-r--r--fs/smb/client/trace.h (renamed from fs/cifs/trace.h)0
-rw-r--r--fs/smb/client/transport.c (renamed from fs/cifs/transport.c)2
-rw-r--r--fs/smb/client/unc.c (renamed from fs/cifs/unc.c)0
-rw-r--r--fs/smb/client/winucase.c (renamed from fs/cifs/winucase.c)0
-rw-r--r--fs/smb/client/xattr.c (renamed from fs/cifs/xattr.c)0
-rw-r--r--fs/smb/common/Makefile (renamed from fs/smbfs_common/Makefile)4
-rw-r--r--fs/smb/common/arc4.h (renamed from fs/smbfs_common/arc4.h)0
-rw-r--r--fs/smb/common/cifs_arc4.c (renamed from fs/smbfs_common/cifs_arc4.c)0
-rw-r--r--fs/smb/common/cifs_md4.c (renamed from fs/smbfs_common/cifs_md4.c)0
-rw-r--r--fs/smb/common/md4.h (renamed from fs/smbfs_common/md4.h)0
-rw-r--r--fs/smb/common/smb2pdu.h (renamed from fs/smbfs_common/smb2pdu.h)0
-rw-r--r--fs/smb/common/smbfsctl.h (renamed from fs/smbfs_common/smbfsctl.h)0
-rw-r--r--fs/smb/server/Kconfig (renamed from fs/ksmbd/Kconfig)0
-rw-r--r--fs/smb/server/Makefile (renamed from fs/ksmbd/Makefile)0
-rw-r--r--fs/smb/server/asn1.c (renamed from fs/ksmbd/asn1.c)0
-rw-r--r--fs/smb/server/asn1.h (renamed from fs/ksmbd/asn1.h)0
-rw-r--r--fs/smb/server/auth.c (renamed from fs/ksmbd/auth.c)2
-rw-r--r--fs/smb/server/auth.h (renamed from fs/ksmbd/auth.h)0
-rw-r--r--fs/smb/server/connection.c (renamed from fs/ksmbd/connection.c)20
-rw-r--r--fs/smb/server/connection.h (renamed from fs/ksmbd/connection.h)0
-rw-r--r--fs/smb/server/crypto_ctx.c (renamed from fs/ksmbd/crypto_ctx.c)0
-rw-r--r--fs/smb/server/crypto_ctx.h (renamed from fs/ksmbd/crypto_ctx.h)0
-rw-r--r--fs/smb/server/glob.h (renamed from fs/ksmbd/glob.h)0
-rw-r--r--fs/smb/server/ksmbd_netlink.h (renamed from fs/ksmbd/ksmbd_netlink.h)0
-rw-r--r--fs/smb/server/ksmbd_spnego_negtokeninit.asn1 (renamed from fs/ksmbd/ksmbd_spnego_negtokeninit.asn1)0
-rw-r--r--fs/smb/server/ksmbd_spnego_negtokentarg.asn1 (renamed from fs/ksmbd/ksmbd_spnego_negtokentarg.asn1)0
-rw-r--r--fs/smb/server/ksmbd_work.c (renamed from fs/ksmbd/ksmbd_work.c)0
-rw-r--r--fs/smb/server/ksmbd_work.h (renamed from fs/ksmbd/ksmbd_work.h)0
-rw-r--r--fs/smb/server/mgmt/ksmbd_ida.c (renamed from fs/ksmbd/mgmt/ksmbd_ida.c)0
-rw-r--r--fs/smb/server/mgmt/ksmbd_ida.h (renamed from fs/ksmbd/mgmt/ksmbd_ida.h)0
-rw-r--r--fs/smb/server/mgmt/share_config.c (renamed from fs/ksmbd/mgmt/share_config.c)0
-rw-r--r--fs/smb/server/mgmt/share_config.h (renamed from fs/ksmbd/mgmt/share_config.h)0
-rw-r--r--fs/smb/server/mgmt/tree_connect.c (renamed from fs/ksmbd/mgmt/tree_connect.c)0
-rw-r--r--fs/smb/server/mgmt/tree_connect.h (renamed from fs/ksmbd/mgmt/tree_connect.h)0
-rw-r--r--fs/smb/server/mgmt/user_config.c (renamed from fs/ksmbd/mgmt/user_config.c)0
-rw-r--r--fs/smb/server/mgmt/user_config.h (renamed from fs/ksmbd/mgmt/user_config.h)0
-rw-r--r--fs/smb/server/mgmt/user_session.c (renamed from fs/ksmbd/mgmt/user_session.c)0
-rw-r--r--fs/smb/server/mgmt/user_session.h (renamed from fs/ksmbd/mgmt/user_session.h)0
-rw-r--r--fs/smb/server/misc.c (renamed from fs/ksmbd/misc.c)0
-rw-r--r--fs/smb/server/misc.h (renamed from fs/ksmbd/misc.h)0
-rw-r--r--fs/smb/server/ndr.c (renamed from fs/ksmbd/ndr.c)0
-rw-r--r--fs/smb/server/ndr.h (renamed from fs/ksmbd/ndr.h)0
-rw-r--r--fs/smb/server/nterr.h (renamed from fs/ksmbd/nterr.h)0
-rw-r--r--fs/smb/server/ntlmssp.h (renamed from fs/ksmbd/ntlmssp.h)0
-rw-r--r--fs/smb/server/oplock.c (renamed from fs/ksmbd/oplock.c)143
-rw-r--r--fs/smb/server/oplock.h (renamed from fs/ksmbd/oplock.h)2
-rw-r--r--fs/smb/server/server.c (renamed from fs/ksmbd/server.c)33
-rw-r--r--fs/smb/server/server.h (renamed from fs/ksmbd/server.h)0
-rw-r--r--fs/smb/server/smb2misc.c (renamed from fs/ksmbd/smb2misc.c)38
-rw-r--r--fs/smb/server/smb2ops.c (renamed from fs/ksmbd/smb2ops.c)0
-rw-r--r--fs/smb/server/smb2pdu.c (renamed from fs/ksmbd/smb2pdu.c)198
-rw-r--r--fs/smb/server/smb2pdu.h (renamed from fs/ksmbd/smb2pdu.h)0
-rw-r--r--fs/smb/server/smb_common.c (renamed from fs/ksmbd/smb_common.c)14
-rw-r--r--fs/smb/server/smb_common.h (renamed from fs/ksmbd/smb_common.h)2
-rw-r--r--fs/smb/server/smbacl.c (renamed from fs/ksmbd/smbacl.c)14
-rw-r--r--fs/smb/server/smbacl.h (renamed from fs/ksmbd/smbacl.h)0
-rw-r--r--fs/smb/server/smbfsctl.h (renamed from fs/ksmbd/smbfsctl.h)2
-rw-r--r--fs/smb/server/smbstatus.h (renamed from fs/ksmbd/smbstatus.h)2
-rw-r--r--fs/smb/server/transport_ipc.c (renamed from fs/ksmbd/transport_ipc.c)0
-rw-r--r--fs/smb/server/transport_ipc.h (renamed from fs/ksmbd/transport_ipc.h)0
-rw-r--r--fs/smb/server/transport_rdma.c (renamed from fs/ksmbd/transport_rdma.c)0
-rw-r--r--fs/smb/server/transport_rdma.h (renamed from fs/ksmbd/transport_rdma.h)0
-rw-r--r--fs/smb/server/transport_tcp.c (renamed from fs/ksmbd/transport_tcp.c)0
-rw-r--r--fs/smb/server/transport_tcp.h (renamed from fs/ksmbd/transport_tcp.h)0
-rw-r--r--fs/smb/server/unicode.c (renamed from fs/ksmbd/unicode.c)0
-rw-r--r--fs/smb/server/unicode.h (renamed from fs/ksmbd/unicode.h)0
-rw-r--r--fs/smb/server/uniupr.h (renamed from fs/ksmbd/uniupr.h)0
-rw-r--r--fs/smb/server/vfs.c (renamed from fs/ksmbd/vfs.c)130
-rw-r--r--fs/smb/server/vfs.h (renamed from fs/ksmbd/vfs.h)17
-rw-r--r--fs/smb/server/vfs_cache.c (renamed from fs/ksmbd/vfs_cache.c)2
-rw-r--r--fs/smb/server/vfs_cache.h (renamed from fs/ksmbd/vfs_cache.h)0
-rw-r--r--fs/smb/server/xattr.h (renamed from fs/ksmbd/xattr.h)0
-rw-r--r--fs/statfs.c4
-rw-r--r--fs/super.c24
-rw-r--r--fs/sysv/dir.c22
-rw-r--r--fs/sysv/itree.c4
-rw-r--r--fs/sysv/namei.c8
-rw-r--r--fs/udf/namei.c14
-rw-r--r--fs/userfaultfd.c13
-rw-r--r--fs/verity/Kconfig16
-rw-r--r--fs/verity/enable.c21
-rw-r--r--fs/verity/fsverity_private.h23
-rw-r--r--fs/verity/hash_algs.c139
-rw-r--r--fs/verity/measure.c37
-rw-r--r--fs/verity/open.c12
-rw-r--r--fs/verity/read_metadata.c4
-rw-r--r--fs/verity/signature.c8
-rw-r--r--fs/verity/verify.c164
-rw-r--r--fs/xattr.c15
-rw-r--r--fs/xfs/libxfs/xfs_ag.c5
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c91
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h6
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c10
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c7
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c24
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h9
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c13
-rw-r--r--fs/xfs/libxfs/xfs_trans_inode.c113
-rw-r--r--fs/xfs/scrub/bmap.c25
-rw-r--r--fs/xfs/scrub/scrub.h8
-rw-r--r--fs/xfs/xfs_buf_item.c88
-rw-r--r--fs/xfs/xfs_filestream.c1
-rw-r--r--fs/xfs/xfs_icache.c46
-rw-r--r--fs/xfs/xfs_icache.h4
-rw-r--r--fs/xfs/xfs_inode.c20
-rw-r--r--fs/xfs/xfs_inode.h2
-rw-r--r--fs/xfs/xfs_inode_item.c149
-rw-r--r--fs/xfs/xfs_inode_item.h1
-rw-r--r--fs/xfs/xfs_log_recover.c19
-rw-r--r--fs/xfs/xfs_mount.h1
-rw-r--r--fs/xfs/xfs_reflink.c4
-rw-r--r--fs/xfs/xfs_super.c1
-rw-r--r--fs/xfs/xfs_trans.c9
-rw-r--r--fs/zonefs/file.c208
-rw-r--r--fs/zonefs/super.c9
-rw-r--r--fs/zonefs/zonefs.h2
-rw-r--r--include/acpi/acpixf.h1
-rw-r--r--include/asm-generic/vmlinux.lds.h12
-rw-r--r--include/drm/drm_managed.h18
-rw-r--r--include/dt-bindings/power/qcom-rpmpd.h9
-rw-r--r--include/linux/arm_ffa.h1
-rw-r--r--include/linux/blkdev.h2
-rw-r--r--include/linux/compiler.h5
-rw-r--r--include/linux/cper.h6
-rw-r--r--include/linux/cpuhotplug.h1
-rw-r--r--include/linux/device/class.h1
-rw-r--r--include/linux/efi.h2
-rw-r--r--include/linux/eventfd.h8
-rw-r--r--include/linux/firewire.h2
-rw-r--r--include/linux/fs.h104
-rw-r--r--include/linux/fsnotify.h4
-rw-r--r--include/linux/fsverity.h14
-rw-r--r--include/linux/gpio/driver.h8
-rw-r--r--include/linux/highmem.h2
-rw-r--r--include/linux/if_team.h1
-rw-r--r--include/linux/iio/iio-gts-helper.h2
-rw-r--r--include/linux/io_uring.h18
-rw-r--r--include/linux/io_uring_types.h10
-rw-r--r--include/linux/libata.h2
-rw-r--r--include/linux/lockdep.h14
-rw-r--r--include/linux/lockdep_types.h1
-rw-r--r--include/linux/mlx5/driver.h13
-rw-r--r--include/linux/mlx5/mlx5_ifc.h4
-rw-r--r--include/linux/msi.h9
-rw-r--r--include/linux/netdevice.h9
-rw-r--r--include/linux/notifier.h10
-rw-r--r--include/linux/page-flags.h6
-rw-r--r--include/linux/pe.h25
-rw-r--r--include/linux/perf_event.h6
-rw-r--r--include/linux/phy.h2
-rw-r--r--include/linux/pipe_fs_i.h4
-rw-r--r--include/linux/power/bq27xxx_battery.h4
-rw-r--r--include/linux/proc_fs.h2
-rw-r--r--include/linux/regulator/pca9450.h4
-rw-r--r--include/linux/sched/task.h1
-rw-r--r--include/linux/sched/vhost_task.h15
-rw-r--r--include/linux/shrinker.h13
-rw-r--r--include/linux/skbuff.h10
-rw-r--r--include/linux/skmsg.h3
-rw-r--r--include/linux/soc/qcom/llcc-qcom.h6
-rw-r--r--include/linux/sunrpc/svc.h21
-rw-r--r--include/linux/sunrpc/svc_rdma.h7
-rw-r--r--include/linux/sunrpc/svc_xprt.h2
-rw-r--r--include/linux/sunrpc/svcsock.h7
-rw-r--r--include/linux/sunrpc/xdr.h3
-rw-r--r--include/linux/surface_aggregator/device.h6
-rw-r--r--include/linux/tpm.h1
-rw-r--r--include/linux/trace_events.h1
-rw-r--r--include/linux/usb/composite.h2
-rw-r--r--include/linux/usb/hcd.h5
-rw-r--r--include/linux/user_events.h3
-rw-r--r--include/linux/watch_queue.h3
-rw-r--r--include/linux/workqueue.h15
-rw-r--r--include/media/dvb_net.h4
-rw-r--r--include/media/dvbdev.h15
-rw-r--r--include/media/v4l2-subdev.h1
-rw-r--r--include/net/bluetooth/hci.h1
-rw-r--r--include/net/bluetooth/hci_core.h6
-rw-r--r--include/net/bonding.h1
-rw-r--r--include/net/dsa.h8
-rw-r--r--include/net/handshake.h1
-rw-r--r--include/net/ip.h2
-rw-r--r--include/net/mana/mana.h2
-rw-r--r--include/net/neighbour.h2
-rw-r--r--include/net/netfilter/nf_flow_table.h2
-rw-r--r--include/net/netfilter/nf_tables.h35
-rw-r--r--include/net/netns/ipv6.h2
-rw-r--r--include/net/nexthop.h23
-rw-r--r--include/net/page_pool.h18
-rw-r--r--include/net/ping.h6
-rw-r--r--include/net/pkt_sched.h2
-rw-r--r--include/net/rpl.h3
-rw-r--r--include/net/sch_generic.h14
-rw-r--r--include/net/sock.h22
-rw-r--r--include/net/tcp.h11
-rw-r--r--include/net/tls.h1
-rw-r--r--include/net/xfrm.h1
-rw-r--r--include/rdma/ib_addr.h23
-rw-r--r--include/sound/hda-mlink.h14
-rw-r--r--include/sound/soc-acpi.h1
-rw-r--r--include/sound/soc-dpcm.h4
-rw-r--r--include/target/iscsi/iscsi_target_core.h7
-rw-r--r--include/trace/events/btrfs.h39
-rw-r--r--include/trace/events/rpcrdma.h8
-rw-r--r--include/trace/events/sunrpc.h39
-rw-r--r--include/trace/events/writeback.h2
-rw-r--r--include/uapi/linux/bpf.h1
-rw-r--r--include/uapi/linux/ethtool_netlink.h2
-rw-r--r--include/uapi/linux/eventfd.h11
-rw-r--r--include/uapi/linux/handshake.h1
-rw-r--r--include/uapi/linux/in.h1
-rw-r--r--include/uapi/linux/io_uring.h16
-rw-r--r--include/uapi/linux/mount.h3
-rw-r--r--include/uapi/sound/skl-tplg-interface.h3
-rw-r--r--include/uapi/sound/sof/tokens.h3
-rw-r--r--include/ufs/ufshcd.h2
-rw-r--r--init/do_mounts.c6
-rw-r--r--io_uring/cancel.c5
-rw-r--r--io_uring/epoll.c4
-rw-r--r--io_uring/filetable.c11
-rw-r--r--io_uring/filetable.h28
-rw-r--r--io_uring/io-wq.c10
-rw-r--r--io_uring/io_uring.c497
-rw-r--r--io_uring/io_uring.h17
-rw-r--r--io_uring/msg_ring.c4
-rw-r--r--io_uring/net.c79
-rw-r--r--io_uring/poll.c15
-rw-r--r--io_uring/poll.h2
-rw-r--r--io_uring/rsrc.c8
-rw-r--r--io_uring/rw.c6
-rw-r--r--io_uring/rw.h1
-rw-r--r--io_uring/sqpoll.c6
-rw-r--r--io_uring/tctx.c31
-rw-r--r--io_uring/timeout.c6
-rw-r--r--io_uring/uring_cmd.c16
-rw-r--r--kernel/bpf/btf.c20
-rw-r--r--kernel/bpf/hashtab.c6
-rw-r--r--kernel/bpf/map_in_map.c8
-rw-r--r--kernel/bpf/offload.c2
-rw-r--r--kernel/bpf/syscall.c14
-rw-r--r--kernel/bpf/verifier.c12
-rw-r--r--kernel/cgroup/cgroup-v1.c4
-rw-r--r--kernel/cgroup/cgroup.c37
-rw-r--r--kernel/cgroup/legacy_freezer.c8
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/fork.c15
-rw-r--r--kernel/irq/msi.c4
-rw-r--r--kernel/kexec_file.c14
-rw-r--r--kernel/locking/lockdep.c28
-rw-r--r--kernel/module/decompress.c2
-rw-r--r--kernel/module/main.c4
-rw-r--r--kernel/module/stats.c4
-rw-r--r--kernel/signal.c8
-rw-r--r--kernel/time/tick-common.c13
-rw-r--r--kernel/time/tick-sched.c13
-rw-r--r--kernel/trace/bpf_trace.c12
-rw-r--r--kernel/trace/fprobe.c73
-rw-r--r--kernel/trace/rethook.c4
-rw-r--r--kernel/trace/trace.c44
-rw-r--r--kernel/trace/trace_events.c2
-rw-r--r--kernel/trace/trace_events_hist.c39
-rw-r--r--kernel/trace/trace_events_user.c400
-rw-r--r--kernel/trace/trace_osnoise.c2
-rw-r--r--kernel/trace/trace_output.c2
-rw-r--r--kernel/trace/trace_probe.h2
-rw-r--r--kernel/trace/trace_selftest.c10
-rw-r--r--kernel/vhost_task.c94
-rw-r--r--kernel/watch_queue.c12
-rw-r--r--kernel/workqueue.c13
-rw-r--r--lib/cpu_rmap.c2
-rw-r--r--lib/debugobjects.c17
-rw-r--r--lib/maple_tree.c12
-rw-r--r--lib/radix-tree.c2
-rw-r--r--lib/radix-tree.h8
-rw-r--r--lib/test_firmware.c81
-rw-r--r--lib/test_vmalloc.c2
-rw-r--r--lib/xarray.c6
-rw-r--r--mm/Kconfig.debug1
-rw-r--r--mm/damon/core.c2
-rw-r--r--mm/filemap.c26
-rw-r--r--mm/gup_test.c1
-rw-r--r--mm/kfence/kfence.h2
-rw-r--r--mm/khugepaged.c1
-rw-r--r--mm/memfd.c9
-rw-r--r--mm/mmap.c37
-rw-r--r--mm/mprotect.c2
-rw-r--r--mm/page_table_check.c6
-rw-r--r--mm/shrinker_debug.c54
-rw-r--r--mm/vmalloc.c17
-rw-r--r--mm/vmscan.c130
-rw-r--r--mm/zsmalloc.c36
-rw-r--r--mm/zswap.c27
-rw-r--r--net/8021q/vlan_dev.c4
-rw-r--r--net/atm/resources.c2
-rw-r--r--net/batman-adv/distributed-arp-table.c2
-rw-r--r--net/bluetooth/hci_conn.c99
-rw-r--r--net/bluetooth/hci_core.c10
-rw-r--r--net/bluetooth/hci_event.c44
-rw-r--r--net/bluetooth/hci_sync.c23
-rw-r--r--net/bluetooth/l2cap_core.c13
-rw-r--r--net/bridge/br_private_tunnel.h8
-rw-r--r--net/can/isotp.c2
-rw-r--r--net/can/j1939/main.c24
-rw-r--r--net/can/j1939/socket.c7
-rw-r--r--net/core/dev.c8
-rw-r--r--net/core/page_pool.c28
-rw-r--r--net/core/rtnetlink.c54
-rw-r--r--net/core/skbuff.c4
-rw-r--r--net/core/skmsg.c82
-rw-r--r--net/core/sock.c8
-rw-r--r--net/core/sock_map.c3
-rw-r--r--net/dccp/proto.c3
-rw-r--r--net/devlink/core.c16
-rw-r--r--net/devlink/devl_internal.h1
-rw-r--r--net/devlink/leftover.c5
-rw-r--r--net/dsa/dsa.c24
-rw-r--r--net/handshake/handshake-test.c44
-rw-r--r--net/handshake/netlink.c12
-rw-r--r--net/handshake/tlshd.c8
-rw-r--r--net/ieee802154/trace.h2
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/esp4_offload.c3
-rw-r--r--net/ipv4/inet_connection_sock.c1
-rw-r--r--net/ipv4/ip_sockglue.c12
-rw-r--r--net/ipv4/raw.c5
-rw-r--r--net/ipv4/sysctl_net_ipv4.c8
-rw-r--r--net/ipv4/tcp.c20
-rw-r--r--net/ipv4/tcp_bpf.c79
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_ipv4.c5
-rw-r--r--net/ipv4/tcp_offload.c19
-rw-r--r--net/ipv4/tcp_timer.c16
-rw-r--r--net/ipv4/udp.c7
-rw-r--r--net/ipv4/udplite.c4
-rw-r--r--net/ipv4/xfrm4_input.c1
-rw-r--r--net/ipv6/esp6_offload.c3
-rw-r--r--net/ipv6/exthdrs.c29
-rw-r--r--net/ipv6/exthdrs_core.c2
-rw-r--r--net/ipv6/ip6_fib.c16
-rw-r--r--net/ipv6/ip6_gre.c13
-rw-r--r--net/ipv6/ping.c3
-rw-r--r--net/ipv6/raw.c3
-rw-r--r--net/ipv6/route.c4
-rw-r--r--net/ipv6/udplite.c6
-rw-r--r--net/ipv6/xfrm6_input.c3
-rw-r--r--net/key/af_key.c12
-rw-r--r--net/mac80211/cfg.c16
-rw-r--r--net/mac80211/chan.c75
-rw-r--r--net/mac80211/he.c15
-rw-r--r--net/mac80211/ieee80211_i.h5
-rw-r--r--net/mac80211/link.c4
-rw-r--r--net/mac80211/mlme.c13
-rw-r--r--net/mac80211/rx.c6
-rw-r--r--net/mac80211/trace.h2
-rw-r--r--net/mac80211/tx.c13
-rw-r--r--net/mac80211/util.c6
-rw-r--r--net/mac802154/trace.h2
-rw-r--r--net/mptcp/pm.c23
-rw-r--r--net/mptcp/pm_netlink.c19
-rw-r--r--net/mptcp/pm_userspace.c48
-rw-r--r--net/mptcp/protocol.c298
-rw-r--r--net/mptcp/protocol.h21
-rw-r--r--net/mptcp/subflow.c45
-rw-r--r--net/netfilter/ipset/ip_set_core.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c3
-rw-r--r--net/netfilter/nf_conntrack_netlink.c4
-rw-r--r--net/netfilter/nf_flow_table_core.c13
-rw-r--r--net/netfilter/nf_flow_table_ip.c4
-rw-r--r--net/netfilter/nf_tables_api.c431
-rw-r--r--net/netfilter/nfnetlink.c3
-rw-r--r--net/netfilter/nfnetlink_osf.c1
-rw-r--r--net/netfilter/nft_bitwise.c2
-rw-r--r--net/netfilter/nft_immediate.c90
-rw-r--r--net/netfilter/nft_set_bitmap.c5
-rw-r--r--net/netfilter/nft_set_hash.c23
-rw-r--r--net/netfilter/nft_set_pipapo.c75
-rw-r--r--net/netfilter/nft_set_rbtree.c25
-rw-r--r--net/netfilter/xt_osf.c1
-rw-r--r--net/netlabel/netlabel_kapi.c3
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/netrom/nr_subr.c7
-rw-r--r--net/nsh/nsh.c8
-rw-r--r--net/openvswitch/datapath.c19
-rw-r--r--net/openvswitch/vport.c18
-rw-r--r--net/packet/af_packet.c8
-rw-r--r--net/packet/diag.c2
-rw-r--r--net/rxrpc/af_rxrpc.c1
-rw-r--r--net/rxrpc/ar-internal.h1
-rw-r--r--net/rxrpc/local_event.c11
-rw-r--r--net/sched/act_ct.c9
-rw-r--r--net/sched/act_pedit.c48
-rw-r--r--net/sched/act_police.c10
-rw-r--r--net/sched/cls_api.c15
-rw-r--r--net/sched/cls_flower.c3
-rw-r--r--net/sched/cls_u32.c18
-rw-r--r--net/sched/sch_api.c88
-rw-r--r--net/sched/sch_fq_pie.c10
-rw-r--r--net/sched/sch_generic.c44
-rw-r--r--net/sched/sch_ingress.c16
-rw-r--r--net/sched/sch_mq.c8
-rw-r--r--net/sched/sch_mqprio.c8
-rw-r--r--net/sched/sch_netem.c8
-rw-r--r--net/sched/sch_pie.c5
-rw-r--r--net/sched/sch_red.c5
-rw-r--r--net/sched/sch_sfq.c5
-rw-r--r--net/sched/sch_taprio.c9
-rw-r--r--net/sched/sch_teql.c2
-rw-r--r--net/sctp/sm_sideeffect.c5
-rw-r--r--net/sctp/sm_statefuns.c2
-rw-r--r--net/sctp/transport.c11
-rw-r--r--net/smc/af_smc.c9
-rw-r--r--net/smc/smc_core.c1
-rw-r--r--net/smc/smc_llc.c13
-rw-r--r--net/socket.c1
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c10
-rw-r--r--net/sunrpc/sched.c5
-rw-r--r--net/sunrpc/svc.c60
-rw-r--r--net/sunrpc/svc_xprt.c50
-rw-r--r--net/sunrpc/svcsock.c108
-rw-r--r--net/sunrpc/xdr.c26
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c8
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c47
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c24
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c62
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c20
-rw-r--r--net/tipc/bearer.c21
-rw-r--r--net/tipc/bearer.h3
-rw-r--r--net/tipc/link.c9
-rw-r--r--net/tipc/udp_media.c5
-rw-r--r--net/tls/tls.h5
-rw-r--r--net/tls/tls_device.c22
-rw-r--r--net/tls/tls_strp.c189
-rw-r--r--net/tls/tls_sw.c8
-rw-r--r--net/unix/af_unix.c7
-rw-r--r--net/vmw_vsock/af_vsock.c2
-rw-r--r--net/vmw_vsock/virtio_transport_common.c5
-rw-r--r--net/wireless/core.c4
-rw-r--r--net/wireless/nl80211.c2
-rw-r--r--net/wireless/rdev-ops.h6
-rw-r--r--net/wireless/reg.c7
-rw-r--r--net/wireless/scan.c6
-rw-r--r--net/wireless/util.c9
-rw-r--r--net/xfrm/xfrm_device.c2
-rw-r--r--net/xfrm/xfrm_input.c8
-rw-r--r--net/xfrm/xfrm_policy.c34
-rw-r--r--net/xfrm/xfrm_user.c15
-rw-r--r--rust/alloc/README.md3
-rw-r--r--rust/alloc/alloc.rs55
-rw-r--r--rust/alloc/boxed.rs446
-rw-r--r--rust/alloc/collections/mod.rs5
-rw-r--r--rust/alloc/lib.rs71
-rw-r--r--rust/alloc/raw_vec.rs16
-rw-r--r--rust/alloc/slice.rs445
-rw-r--r--rust/alloc/vec/drain.rs81
-rw-r--r--rust/alloc/vec/drain_filter.rs60
-rw-r--r--rust/alloc/vec/into_iter.rs125
-rw-r--r--rust/alloc/vec/is_zero.rs96
-rw-r--r--rust/alloc/vec/mod.rs464
-rw-r--r--rust/alloc/vec/set_len_on_drop.rs5
-rw-r--r--rust/alloc/vec/spec_extend.rs63
-rw-r--r--rust/bindings/bindings_helper.h1
-rw-r--r--rust/bindings/lib.rs1
-rw-r--r--rust/helpers.c7
-rw-r--r--rust/kernel/build_assert.rs2
-rw-r--r--rust/kernel/error.rs61
-rw-r--r--rust/kernel/init.rs5
-rw-r--r--rust/kernel/init/macros.rs85
-rw-r--r--rust/kernel/lib.rs4
-rw-r--r--rust/kernel/std_vendor.rs2
-rw-r--r--rust/kernel/str.rs22
-rw-r--r--rust/kernel/sync/arc.rs25
-rw-r--r--rust/kernel/task.rs10
-rw-r--r--rust/kernel/types.rs13
-rw-r--r--rust/macros/helpers.rs86
-rw-r--r--rust/macros/pin_data.rs168
-rw-r--r--rust/macros/quote.rs14
-rw-r--r--rust/uapi/lib.rs1
-rw-r--r--samples/bpf/hbm.c1
-rw-r--r--scripts/Makefile.build2
-rw-r--r--scripts/gdb/linux/constants.py.in12
-rwxr-xr-xscripts/gfp-translate6
-rwxr-xr-xscripts/min-tool-version.sh2
-rw-r--r--scripts/mod/modpost.c5
-rw-r--r--scripts/orc_hash.sh16
-rw-r--r--security/integrity/ima/ima_api.c31
-rw-r--r--security/selinux/Makefile6
-rw-r--r--sound/core/oss/pcm_plugin.h16
-rw-r--r--sound/core/seq/oss/seq_oss_midi.c35
-rw-r--r--sound/firewire/digi00x/digi00x-stream.c4
-rw-r--r--sound/hda/hdac_device.c2
-rw-r--r--sound/isa/gus/gus_pcm.c2
-rw-r--r--sound/pci/cmipci.c6
-rw-r--r--sound/pci/cs46xx/cs46xx_lib.c2
-rw-r--r--sound/pci/hda/hda_codec.c6
-rw-r--r--sound/pci/hda/hda_generic.c7
-rw-r--r--sound/pci/hda/patch_ca0132.c1
-rw-r--r--sound/pci/hda/patch_hdmi.c5
-rw-r--r--sound/pci/hda/patch_realtek.c52
-rw-r--r--sound/pci/ice1712/aureon.c7
-rw-r--r--sound/pci/ice1712/ice1712.c14
-rw-r--r--sound/pci/ice1712/ice1724.c16
-rw-r--r--sound/pci/ymfpci/ymfpci_main.c6
-rw-r--r--sound/soc/amd/ps/pci-ps.c3
-rw-r--r--sound/soc/amd/ps/ps-pdm-dma.c10
-rw-r--r--sound/soc/amd/yc/acp6x-mach.c14
-rw-r--r--sound/soc/codecs/cs35l41-lib.c6
-rw-r--r--sound/soc/codecs/cs35l56.c6
-rw-r--r--sound/soc/codecs/lpass-tx-macro.c5
-rw-r--r--sound/soc/codecs/max98363.c4
-rw-r--r--sound/soc/codecs/nau8824.c24
-rw-r--r--sound/soc/codecs/rt5682-i2c.c4
-rw-r--r--sound/soc/codecs/rt5682.c6
-rw-r--r--sound/soc/codecs/rt5682.h1
-rw-r--r--sound/soc/codecs/ssm2602.c15
-rw-r--r--sound/soc/codecs/wcd938x-sdw.c1
-rw-r--r--sound/soc/codecs/wsa881x.c1
-rw-r--r--sound/soc/codecs/wsa883x.c1
-rw-r--r--sound/soc/dwc/dwc-i2s.c45
-rw-r--r--sound/soc/fsl/fsl_micfil.c14
-rw-r--r--sound/soc/fsl/fsl_sai.c11
-rw-r--r--sound/soc/fsl/fsl_sai.h1
-rw-r--r--sound/soc/generic/simple-card-utils.c2
-rw-r--r--sound/soc/generic/simple-card.c1
-rw-r--r--sound/soc/intel/avs/apl.c6
-rw-r--r--sound/soc/intel/avs/avs.h4
-rw-r--r--sound/soc/intel/avs/board_selection.c2
-rw-r--r--sound/soc/intel/avs/control.c22
-rw-r--r--sound/soc/intel/avs/dsp.c4
-rw-r--r--sound/soc/intel/avs/messages.h2
-rw-r--r--sound/soc/intel/avs/path.h2
-rw-r--r--sound/soc/intel/avs/pcm.c23
-rw-r--r--sound/soc/intel/avs/probes.c2
-rw-r--r--sound/soc/intel/boards/sof_sdw.c2
-rw-r--r--sound/soc/jz4740/jz4740-i2s.c54
-rw-r--r--sound/soc/mediatek/mt8186/mt8186-afe-clk.c6
-rw-r--r--sound/soc/mediatek/mt8186/mt8186-afe-clk.h1
-rw-r--r--sound/soc/mediatek/mt8186/mt8186-afe-pcm.c4
-rw-r--r--sound/soc/mediatek/mt8186/mt8186-audsys-clk.c46
-rw-r--r--sound/soc/mediatek/mt8186/mt8186-audsys-clk.h1
-rw-r--r--sound/soc/mediatek/mt8188/mt8188-afe-clk.c7
-rw-r--r--sound/soc/mediatek/mt8188/mt8188-afe-clk.h1
-rw-r--r--sound/soc/mediatek/mt8188/mt8188-afe-pcm.c4
-rw-r--r--sound/soc/mediatek/mt8188/mt8188-audsys-clk.c47
-rw-r--r--sound/soc/mediatek/mt8188/mt8188-audsys-clk.h1
-rw-r--r--sound/soc/mediatek/mt8195/mt8195-afe-clk.c5
-rw-r--r--sound/soc/mediatek/mt8195/mt8195-afe-clk.h1
-rw-r--r--sound/soc/mediatek/mt8195/mt8195-afe-pcm.c4
-rw-r--r--sound/soc/mediatek/mt8195/mt8195-audsys-clk.c47
-rw-r--r--sound/soc/mediatek/mt8195/mt8195-audsys-clk.h1
-rw-r--r--sound/soc/soc-pcm.c20
-rw-r--r--sound/soc/sof/amd/acp-ipc.c7
-rw-r--r--sound/soc/sof/debug.c4
-rw-r--r--sound/soc/sof/intel/hda-mlink.c96
-rw-r--r--sound/soc/sof/ipc3-topology.c7
-rw-r--r--sound/soc/sof/ipc4-topology.c4
-rw-r--r--sound/soc/sof/pcm.c11
-rw-r--r--sound/soc/sof/pm.c14
-rw-r--r--sound/soc/sof/sof-client-probes.c14
-rw-r--r--sound/soc/sof/topology.c6
-rw-r--r--sound/soc/tegra/tegra_pcm.c3
-rw-r--r--sound/usb/format.c1
-rw-r--r--sound/usb/pcm.c4
-rw-r--r--sound/usb/quirks.c2
-rw-r--r--tools/arch/arm64/include/uapi/asm/kvm.h36
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h26
-rw-r--r--tools/arch/x86/include/asm/disabled-features.h8
-rw-r--r--tools/arch/x86/include/asm/msr-index.h2
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h3
-rw-r--r--tools/arch/x86/include/uapi/asm/prctl.h8
-rw-r--r--tools/arch/x86/include/uapi/asm/unistd_32.h3
-rw-r--r--tools/arch/x86/lib/memcpy_64.S34
-rw-r--r--tools/arch/x86/lib/memset_64.S47
-rw-r--r--tools/gpio/lsgpio.c2
-rw-r--r--tools/include/asm/alternative.h3
-rw-r--r--tools/include/linux/coresight-pmu.h13
-rw-r--r--tools/include/uapi/drm/drm.h57
-rw-r--r--tools/include/uapi/drm/i915_drm.h25
-rw-r--r--tools/include/uapi/linux/bpf.h1
-rw-r--r--tools/include/uapi/linux/const.h2
-rw-r--r--tools/include/uapi/linux/in.h2
-rw-r--r--tools/include/uapi/linux/kvm.h12
-rw-r--r--tools/include/uapi/linux/prctl.h2
-rw-r--r--tools/include/uapi/sound/asound.h14
-rw-r--r--tools/lib/bpf/libbpf.c3
-rw-r--r--tools/lib/bpf/libbpf_probes.c2
-rw-r--r--tools/net/ynl/lib/ynl.py5
-rw-r--r--tools/objtool/check.c1
-rw-r--r--tools/perf/Makefile.config7
-rw-r--r--tools/perf/Makefile.perf25
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c9
-rw-r--r--tools/perf/arch/arm/util/pmu.c2
-rw-r--r--tools/perf/arch/arm64/util/header.c4
-rw-r--r--tools/perf/arch/arm64/util/pmu.c2
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm-def.h4
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm.S2
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm-def.h4
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm.S2
-rw-r--r--tools/perf/builtin-ftrace.c2
-rw-r--r--tools/perf/builtin-script.c7
-rw-r--r--tools/perf/builtin-stat.c38
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json26
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json14
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json12
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json12
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json12
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json12
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json12
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json12
-rw-r--r--tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json12
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json12
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json12
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json12
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json12
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json12
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json12
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json12
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json12
-rw-r--r--tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json12
-rwxr-xr-xtools/perf/pmu-events/jevents.py4
-rw-r--r--tools/perf/pmu-events/pmu-events.h1
-rw-r--r--tools/perf/tests/attr.py6
-rw-r--r--tools/perf/tests/attr/base-stat2
-rw-r--r--tools/perf/tests/attr/test-stat-default80
-rw-r--r--tools/perf/tests/attr/test-stat-detailed-195
-rw-r--r--tools/perf/tests/attr/test-stat-detailed-2119
-rw-r--r--tools/perf/tests/attr/test-stat-detailed-3127
-rw-r--r--tools/perf/tests/expr.c3
-rw-r--r--tools/perf/tests/parse-metric.c1
-rwxr-xr-xtools/perf/tests/shell/stat.sh13
-rwxr-xr-xtools/perf/tests/shell/test_intel_pt.sh7
-rwxr-xr-xtools/perf/tests/shell/test_java_symbol.sh2
-rw-r--r--tools/perf/trace/beauty/arch_prctl.c2
-rwxr-xr-xtools/perf/trace/beauty/x86_arch_prctl.sh1
-rw-r--r--tools/perf/util/Build2
-rw-r--r--tools/perf/util/bpf_skel/lock_contention.bpf.c2
-rw-r--r--tools/perf/util/bpf_skel/sample_filter.bpf.c4
-rw-r--r--tools/perf/util/bpf_skel/vmlinux.h1
-rw-r--r--tools/perf/util/cs-etm.h13
-rw-r--r--tools/perf/util/evsel.c38
-rw-r--r--tools/perf/util/evsel.h7
-rw-r--r--tools/perf/util/expr.y6
-rw-r--r--tools/perf/util/metricgroup.c10
-rw-r--r--tools/perf/util/parse-events.c23
-rw-r--r--tools/perf/util/stat-display.c2
-rw-r--r--tools/perf/util/stat-shadow.c25
-rw-r--r--tools/perf/util/symbol-elf.c27
-rw-r--r--tools/power/cpupower/lib/powercap.c23
-rw-r--r--tools/power/cpupower/utils/idle_monitor/mperf_monitor.c31
-rw-r--r--tools/testing/cxl/Kbuild1
-rw-r--r--tools/testing/cxl/test/mem.c1
-rw-r--r--tools/testing/cxl/test/mock.c15
-rw-r--r--tools/testing/radix-tree/Makefile5
-rw-r--r--tools/testing/selftests/alsa/pcm-test.c10
-rw-r--r--tools/testing/selftests/bpf/Makefile2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/inner_array_lookup.c31
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c131
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h390
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c370
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_sk.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/subprogs_extable.c29
-rw-r--r--tools/testing/selftests/bpf/progs/inner_array_lookup.c45
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_drop_prog.c32
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_kern.h12
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c32
-rw-r--r--tools/testing/selftests/bpf/progs/test_subprogs_extable.c51
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_spill_fill.c79
-rw-r--r--tools/testing/selftests/ftrace/Makefile3
-rwxr-xr-xtools/testing/selftests/ftrace/ftracetest63
-rwxr-xr-xtools/testing/selftests/ftrace/ftracetest-ktap8
-rw-r--r--tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc45
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack-legacy.tc24
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack.tc5
-rwxr-xr-xtools/testing/selftests/gpio/gpio-sim.sh3
-rw-r--r--tools/testing/selftests/kselftest_harness.h6
-rw-r--r--tools/testing/selftests/kvm/Makefile1
-rw-r--r--tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c74
-rw-r--r--tools/testing/selftests/mm/Makefile13
-rw-r--r--tools/testing/selftests/net/.gitignore3
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh27
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh2
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/hw_stats_l3.sh11
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh4
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh4
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile2
-rw-r--r--tools/testing/selftests/net/mptcp/config1
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh46
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh24
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh541
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh104
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_sockopt.c18
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh24
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh31
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh4
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh17
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh17
-rw-r--r--tools/testing/selftests/net/tls.c24
-rwxr-xr-xtools/testing/selftests/net/vrf-xfrm-tests.sh32
-rw-r--r--tools/testing/selftests/ptp/testptp.c6
-rw-r--r--tools/testing/selftests/sgx/Makefile1
-rw-r--r--tools/testing/selftests/tc-testing/config6
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfb.json4
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.sh1
-rw-r--r--tools/testing/selftests/user_events/dyn_test.c177
-rw-r--r--tools/testing/selftests/user_events/ftrace_test.c88
-rw-r--r--tools/testing/selftests/user_events/perf_test.c82
-rw-r--r--tools/virtio/ringtest/.gitignore7
-rw-r--r--tools/virtio/ringtest/main.h11
-rw-r--r--tools/virtio/virtio-trace/README2
-rw-r--r--tools/virtio/virtio-trace/trace-agent.c12
-rw-r--r--virt/kvm/kvm_main.c79
1981 files changed, 24904 insertions, 14096 deletions
diff --git a/.gitattributes b/.gitattributes
index c9ba5bfc4036..2325c529e185 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -2,3 +2,4 @@
*.[ch] diff=cpp
*.dts diff=dts
*.dts[io] diff=dts
+*.rs diff=rust
diff --git a/.mailmap b/.mailmap
index 71127b2608d2..4d71480e193f 100644
--- a/.mailmap
+++ b/.mailmap
@@ -70,6 +70,8 @@ Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@unisoc.com>
Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang7@gmail.com>
Bart Van Assche <bvanassche@acm.org> <bart.vanassche@sandisk.com>
Bart Van Assche <bvanassche@acm.org> <bart.vanassche@wdc.com>
+Ben Dooks <ben-linux@fluff.org> <ben.dooks@simtec.co.uk>
+Ben Dooks <ben-linux@fluff.org> <ben.dooks@sifive.com>
Ben Gardner <bgardner@wabtec.com>
Ben M Cahill <ben.m.cahill@intel.com>
Ben Widawsky <bwidawsk@kernel.org> <ben@bwidawsk.net>
@@ -181,6 +183,8 @@ Henrik Rydberg <rydberg@bitmath.org>
Herbert Xu <herbert@gondor.apana.org.au>
Huacai Chen <chenhuacai@kernel.org> <chenhc@lemote.com>
Huacai Chen <chenhuacai@kernel.org> <chenhuacai@loongson.cn>
+J. Bruce Fields <bfields@fieldses.org> <bfields@redhat.com>
+J. Bruce Fields <bfields@fieldses.org> <bfields@citi.umich.edu>
Jacob Shin <Jacob.Shin@amd.com>
Jaegeuk Kim <jaegeuk@kernel.org> <jaegeuk@google.com>
Jaegeuk Kim <jaegeuk@kernel.org> <jaegeuk.kim@samsung.com>
@@ -233,6 +237,7 @@ Jisheng Zhang <jszhang@kernel.org> <Jisheng.Zhang@synaptics.com>
Johan Hovold <johan@kernel.org> <jhovold@gmail.com>
Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com>
John Crispin <john@phrozen.org> <blogic@openwrt.org>
+John Keeping <john@keeping.me.uk> <john@metanate.com>
John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
John Stultz <johnstul@us.ibm.com>
<jon.toppins+linux@gmail.com> <jtoppins@cumulusnetworks.com>
@@ -364,6 +369,11 @@ Nicolas Pitre <nico@fluxnic.net> <nico@linaro.org>
Nicolas Saenz Julienne <nsaenz@kernel.org> <nsaenzjulienne@suse.de>
Nicolas Saenz Julienne <nsaenz@kernel.org> <nsaenzjulienne@suse.com>
Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
+Nikolay Aleksandrov <razor@blackwall.org> <naleksan@redhat.com>
+Nikolay Aleksandrov <razor@blackwall.org> <nikolay@redhat.com>
+Nikolay Aleksandrov <razor@blackwall.org> <nikolay@cumulusnetworks.com>
+Nikolay Aleksandrov <razor@blackwall.org> <nikolay@nvidia.com>
+Nikolay Aleksandrov <razor@blackwall.org> <nikolay@isovalent.com>
Oleksandr Natalenko <oleksandr@natalenko.name> <oleksandr@redhat.com>
Oleksij Rempel <linux@rempel-privat.de> <bug-track@fisher-privat.net>
Oleksij Rempel <linux@rempel-privat.de> <external.Oleksij.Rempel@de.bosch.com>
diff --git a/CREDITS b/CREDITS
index 2d9da9a7defa..de7e4dbbc599 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1706,6 +1706,10 @@ S: Panoramastrasse 18
S: D-69126 Heidelberg
S: Germany
+N: Neil Horman
+M: nhorman@tuxdriver.com
+D: SCTP protocol maintainer.
+
N: Simon Horman
M: horms@verge.net.au
D: Renesas ARM/ARM64 SoC maintainer
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 7544ce00e0cb..c63358c38a1d 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1213,23 +1213,25 @@ PAGE_SIZE multiple when read back.
A read-write single value file which exists on non-root
cgroups. The default is "max".
- Memory usage throttle limit. This is the main mechanism to
- control memory usage of a cgroup. If a cgroup's usage goes
+ Memory usage throttle limit. If a cgroup's usage goes
over the high boundary, the processes of the cgroup are
throttled and put under heavy reclaim pressure.
Going over the high limit never invokes the OOM killer and
- under extreme conditions the limit may be breached.
+ under extreme conditions the limit may be breached. The high
+ limit should be used in scenarios where an external process
+ monitors the limited cgroup to alleviate heavy reclaim
+ pressure.
memory.max
A read-write single value file which exists on non-root
cgroups. The default is "max".
- Memory usage hard limit. This is the final protection
- mechanism. If a cgroup's memory usage reaches this limit and
- can't be reduced, the OOM killer is invoked in the cgroup.
- Under certain circumstances, the usage may go over the limit
- temporarily.
+ Memory usage hard limit. This is the main mechanism to limit
+ memory usage of a cgroup. If a cgroup's memory usage reaches
+ this limit and can't be reduced, the OOM killer is invoked in
+ the cgroup. Under certain circumstances, the usage may go
+ over the limit temporarily.
In default configuration regular 0-order allocations always
succeed unless OOM killer chooses current task as a victim.
@@ -1238,10 +1240,6 @@ PAGE_SIZE multiple when read back.
Caller could retry them differently, return into userspace
as -ENOMEM or silently ignore in cases like disk readahead.
- This is the ultimate protection mechanism. As long as the
- high limit is used and monitored properly, this limit's
- utility is limited to providing the final safety net.
-
memory.reclaim
A write-only nested-keyed file which exists for all cgroups.
diff --git a/Documentation/admin-guide/cifs/changes.rst b/Documentation/admin-guide/cifs/changes.rst
index 3147bbae9c43..8c42c4de510b 100644
--- a/Documentation/admin-guide/cifs/changes.rst
+++ b/Documentation/admin-guide/cifs/changes.rst
@@ -5,5 +5,5 @@ Changes
See https://wiki.samba.org/index.php/LinuxCIFSKernel for summary
information about fixes/improvements to CIFS/SMB2/SMB3 support (changes
to cifs.ko module) by kernel version (and cifs internal module version).
-This may be easier to read than parsing the output of "git log fs/cifs"
-by release.
+This may be easier to read than parsing the output of
+"git log fs/smb/client" by release.
diff --git a/Documentation/admin-guide/cifs/usage.rst b/Documentation/admin-guide/cifs/usage.rst
index 2e151cd8c2e4..5f936b4b6018 100644
--- a/Documentation/admin-guide/cifs/usage.rst
+++ b/Documentation/admin-guide/cifs/usage.rst
@@ -45,7 +45,7 @@ Installation instructions
If you have built the CIFS vfs as module (successfully) simply
type ``make modules_install`` (or if you prefer, manually copy the file to
-the modules directory e.g. /lib/modules/2.4.10-4GB/kernel/fs/cifs/cifs.ko).
+the modules directory e.g. /lib/modules/6.3.0-060300-generic/kernel/fs/smb/client/cifs.ko).
If you have built the CIFS vfs into the kernel itself, follow the instructions
for your distribution on how to install a new kernel (usually you
@@ -66,15 +66,15 @@ If cifs is built as a module, then the size and number of network buffers
and maximum number of simultaneous requests to one server can be configured.
Changing these from their defaults is not recommended. By executing modinfo::
- modinfo kernel/fs/cifs/cifs.ko
+ modinfo <path to cifs.ko>
-on kernel/fs/cifs/cifs.ko the list of configuration changes that can be made
+on kernel/fs/smb/client/cifs.ko the list of configuration changes that can be made
at module initialization time (by running insmod cifs.ko) can be seen.
Recommendations
===============
-To improve security the SMB2.1 dialect or later (usually will get SMB3) is now
+To improve security the SMB2.1 dialect or later (usually will get SMB3.1.1) is now
the new default. To use old dialects (e.g. to mount Windows XP) use "vers=1.0"
on mount (or vers=2.0 for Windows Vista). Note that the CIFS (vers=1.0) is
much older and less secure than the default dialect SMB3 which includes
diff --git a/Documentation/admin-guide/quickly-build-trimmed-linux.rst b/Documentation/admin-guide/quickly-build-trimmed-linux.rst
index ff4f4cc8522b..f08149bc53f8 100644
--- a/Documentation/admin-guide/quickly-build-trimmed-linux.rst
+++ b/Documentation/admin-guide/quickly-build-trimmed-linux.rst
@@ -215,12 +215,14 @@ again.
reduce the compile time enormously, especially if you are running an
universal kernel from a commodity Linux distribution.
- There is a catch: the make target 'localmodconfig' will disable kernel
- features you have not directly or indirectly through some program utilized
- since you booted the system. You can reduce or nearly eliminate that risk by
- using tricks outlined in the reference section; for quick testing purposes
- that risk is often negligible, but it is an aspect you want to keep in mind
- in case your kernel behaves oddly.
+ There is a catch: 'localmodconfig' is likely to disable kernel features you
+ did not use since you booted your Linux -- like drivers for currently
+ disconnected peripherals or a virtualization software not haven't used yet.
+ You can reduce or nearly eliminate that risk with tricks the reference
+ section outlines; but when building a kernel just for quick testing purposes
+ it is often negligible if such features are missing. But you should keep that
+ aspect in mind when using a kernel built with this make target, as it might
+ be the reason why something you only use occasionally stopped working.
[:ref:`details<configuration>`]
@@ -271,6 +273,9 @@ again.
does nothing at all; in that case you have to manually install your kernel,
as outlined in the reference section.
+ If you are running a immutable Linux distribution, check its documentation
+ and the web to find out how to install your own kernel there.
+
[:ref:`details<install>`]
.. _another_sbs:
@@ -291,29 +296,29 @@ again.
version you care about, as git otherwise might retrieve the entire commit
history::
- git fetch --shallow-exclude=v6.1 origin
-
- If you modified the sources (for example by applying a patch), you now need
- to discard those modifications; that's because git otherwise will not be able
- to switch to the sources of another version due to potential conflicting
- changes::
-
- git reset --hard
+ git fetch --shallow-exclude=v6.0 origin
- Now checkout the version you are interested in, as explained above::
+ Now switch to the version you are interested in -- but be aware the command
+ used here will discard any modifications you performed, as they would
+ conflict with the sources you want to checkout::
- git checkout --detach origin/master
+ git checkout --force --detach origin/master
At this point you might want to patch the sources again or set/modify a build
- tag, as explained earlier; afterwards adjust the build configuration to the
- new codebase and build your next kernel::
+ tag, as explained earlier. Afterwards adjust the build configuration to the
+ new codebase using olddefconfig, which will now adjust the configuration file
+ you prepared earlier using localmodconfig (~/linux/.config) for your next
+ kernel::
# reminder: if you want to apply patches, do it at this point
# reminder: you might want to update your build tag at this point
make olddefconfig
+
+ Now build your kernel::
+
make -j $(nproc --all)
- Install the kernel as outlined above::
+ Afterwards install the kernel as outlined above::
command -v installkernel && sudo make modules_install install
@@ -584,11 +589,11 @@ versions and individual commits at hand at any time::
curl -L \
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/clone.bundle \
-o linux-stable.git.bundle
- git clone clone.bundle ~/linux/
+ git clone linux-stable.git.bundle ~/linux/
rm linux-stable.git.bundle
cd ~/linux/
- git remote set-url origin
- https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
+ git remote set-url origin \
+ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
git fetch origin
git checkout --detach origin/master
diff --git a/Documentation/cdrom/index.rst b/Documentation/cdrom/index.rst
index e87a8785bc1a..e9b022d70939 100644
--- a/Documentation/cdrom/index.rst
+++ b/Documentation/cdrom/index.rst
@@ -1,8 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0
-=====
-cdrom
-=====
+======
+CD-ROM
+======
.. toctree::
:maxdepth: 1
diff --git a/Documentation/devicetree/bindings/ata/ahci-common.yaml b/Documentation/devicetree/bindings/ata/ahci-common.yaml
index 7fdf40954a4c..38770c4c85fd 100644
--- a/Documentation/devicetree/bindings/ata/ahci-common.yaml
+++ b/Documentation/devicetree/bindings/ata/ahci-common.yaml
@@ -8,7 +8,7 @@ title: Common Properties for Serial ATA AHCI controllers
maintainers:
- Hans de Goede <hdegoede@redhat.com>
- - Damien Le Moal <damien.lemoal@opensource.wdc.com>
+ - Damien Le Moal <dlemoal@kernel.org>
description:
This document defines device tree properties for a common AHCI SATA
diff --git a/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml b/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml
index 9b31f864e071..71364c6081ff 100644
--- a/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml
+++ b/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml
@@ -32,7 +32,7 @@ properties:
maxItems: 1
iommus:
- maxItems: 1
+ maxItems: 4
power-domains:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/cache/qcom,llcc.yaml b/Documentation/devicetree/bindings/cache/qcom,llcc.yaml
index d8b91944180a..44892aa589fd 100644
--- a/Documentation/devicetree/bindings/cache/qcom,llcc.yaml
+++ b/Documentation/devicetree/bindings/cache/qcom,llcc.yaml
@@ -129,6 +129,7 @@ allOf:
- qcom,sm8250-llcc
- qcom,sm8350-llcc
- qcom,sm8450-llcc
+ - qcom,sm8550-llcc
then:
properties:
reg:
diff --git a/Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml b/Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml
index 998e5cce652f..380cb6d80025 100644
--- a/Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml
+++ b/Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Canaan Kendryte K210 Clock
maintainers:
- - Damien Le Moal <damien.lemoal@wdc.com>
+ - Damien Le Moal <dlemoal@kernel.org>
description: |
Canaan Kendryte K210 SoC clocks driver bindings. The clock
diff --git a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml
index e6c1ebfe8a32..130e16d025bc 100644
--- a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml
+++ b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml
@@ -82,6 +82,18 @@ properties:
Indicates if the DSI controller is driving a panel which needs
2 DSI links.
+ qcom,master-dsi:
+ type: boolean
+ description: |
+ Indicates if the DSI controller is the master DSI controller when
+ qcom,dual-dsi-mode enabled.
+
+ qcom,sync-dual-dsi:
+ type: boolean
+ description: |
+ Indicates if the DSI controller needs to sync the other DSI controller
+ with MIPI DCS commands when qcom,dual-dsi-mode enabled.
+
assigned-clocks:
minItems: 2
maxItems: 4
diff --git a/Documentation/devicetree/bindings/firmware/qcom,scm.yaml b/Documentation/devicetree/bindings/firmware/qcom,scm.yaml
index 367d04ad1923..83381f3a1341 100644
--- a/Documentation/devicetree/bindings/firmware/qcom,scm.yaml
+++ b/Documentation/devicetree/bindings/firmware/qcom,scm.yaml
@@ -71,6 +71,8 @@ properties:
minItems: 1
maxItems: 3
+ dma-coherent: true
+
interconnects:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/fpga/lattice,sysconfig.yaml b/Documentation/devicetree/bindings/fpga/lattice,sysconfig.yaml
index 4fb05eb84e2a..164331eb6275 100644
--- a/Documentation/devicetree/bindings/fpga/lattice,sysconfig.yaml
+++ b/Documentation/devicetree/bindings/fpga/lattice,sysconfig.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Lattice Slave SPI sysCONFIG FPGA manager
maintainers:
- - Ivan Bornyakov <i.bornyakov@metrotek.ru>
+ - Vladimir Georgiev <v.georgiev@metrotek.ru>
description: |
Lattice sysCONFIG port, which is used for FPGA configuration, among others,
diff --git a/Documentation/devicetree/bindings/fpga/microchip,mpf-spi-fpga-mgr.yaml b/Documentation/devicetree/bindings/fpga/microchip,mpf-spi-fpga-mgr.yaml
index 527532f039ce..a157eecfb5fc 100644
--- a/Documentation/devicetree/bindings/fpga/microchip,mpf-spi-fpga-mgr.yaml
+++ b/Documentation/devicetree/bindings/fpga/microchip,mpf-spi-fpga-mgr.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Microchip Polarfire FPGA manager.
maintainers:
- - Ivan Bornyakov <i.bornyakov@metrotek.ru>
+ - Vladimir Georgiev <v.georgiev@metrotek.ru>
description:
Device Tree Bindings for Microchip Polarfire FPGA Manager using slave SPI to
diff --git a/Documentation/devicetree/bindings/i2c/opencores,i2c-ocores.yaml b/Documentation/devicetree/bindings/i2c/opencores,i2c-ocores.yaml
index 85d9efb743ee..d9ef86729011 100644
--- a/Documentation/devicetree/bindings/i2c/opencores,i2c-ocores.yaml
+++ b/Documentation/devicetree/bindings/i2c/opencores,i2c-ocores.yaml
@@ -60,6 +60,7 @@ properties:
default: 0
regstep:
+ $ref: /schemas/types.yaml#/definitions/uint32
description: |
deprecated, use reg-shift above
deprecated: true
diff --git a/Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml b/Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml
index 62f3ca66274f..32c821f97779 100644
--- a/Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml
+++ b/Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml
@@ -44,7 +44,7 @@ required:
- clock-names
- clocks
-additionalProperties: true
+unevaluatedProperties: false
examples:
- |
diff --git a/Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml b/Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml
index 63369ba388e4..0a192ca192c5 100644
--- a/Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml
@@ -39,6 +39,12 @@ properties:
power-domains:
maxItems: 1
+ vref-supply:
+ description: |
+ External ADC reference voltage supply on VREFH pad. If VERID[MVI] is
+ set, there are additional, internal reference voltages selectable.
+ VREFH1 is always from VREFH pad.
+
"#io-channel-cells":
const: 1
@@ -72,6 +78,7 @@ examples:
assigned-clocks = <&clk IMX_SC_R_ADC_0>;
assigned-clock-rates = <24000000>;
power-domains = <&pd IMX_SC_R_ADC_0>;
+ vref-supply = <&reg_1v8>;
#io-channel-cells = <1>;
};
};
diff --git a/Documentation/devicetree/bindings/iio/adc/renesas,rcar-gyroadc.yaml b/Documentation/devicetree/bindings/iio/adc/renesas,rcar-gyroadc.yaml
index 1c7aee5ed3e0..36dff3250ea7 100644
--- a/Documentation/devicetree/bindings/iio/adc/renesas,rcar-gyroadc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/renesas,rcar-gyroadc.yaml
@@ -90,7 +90,7 @@ patternProperties:
of the MAX chips to the GyroADC, while MISO line of each Maxim
ADC connects to a shared input pin of the GyroADC.
enum:
- - adi,7476
+ - adi,ad7476
- fujitsu,mb88101a
- maxim,max1162
- maxim,max11100
diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml
index 92117261e1e1..39e64c7f6360 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml
@@ -166,6 +166,12 @@ properties:
resets:
maxItems: 1
+ mediatek,broken-save-restore-fw:
+ type: boolean
+ description:
+ Asserts that the firmware on this device has issues saving and restoring
+ GICR registers when the GIC redistributors are powered off.
+
dependencies:
mbi-ranges: [ msi-controller ]
msi-controller: [ mbi-ranges ]
diff --git a/Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml b/Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml
index 8459d3642205..3b3beab9db3f 100644
--- a/Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml
+++ b/Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Canaan Kendryte K210 System Controller
maintainers:
- - Damien Le Moal <damien.lemoal@wdc.com>
+ - Damien Le Moal <dlemoal@kernel.org>
description:
Canaan Inc. Kendryte K210 SoC system controller which provides a
diff --git a/Documentation/devicetree/bindings/net/can/st,stm32-bxcan.yaml b/Documentation/devicetree/bindings/net/can/st,stm32-bxcan.yaml
index 769fa5c27b76..de1d4298893b 100644
--- a/Documentation/devicetree/bindings/net/can/st,stm32-bxcan.yaml
+++ b/Documentation/devicetree/bindings/net/can/st,stm32-bxcan.yaml
@@ -21,11 +21,22 @@ properties:
st,can-primary:
description:
- Primary and secondary mode of the bxCAN peripheral is only relevant
- if the chip has two CAN peripherals. In that case they share some
- of the required logic.
+ Primary mode of the bxCAN peripheral is only relevant if the chip has
+ two CAN peripherals in dual CAN configuration. In that case they share
+ some of the required logic.
+ Not to be used if the peripheral is in single CAN configuration.
To avoid misunderstandings, it should be noted that ST documentation
- uses the terms master/slave instead of primary/secondary.
+ uses the terms master instead of primary.
+ type: boolean
+
+ st,can-secondary:
+ description:
+ Secondary mode of the bxCAN peripheral is only relevant if the chip
+ has two CAN peripherals in dual CAN configuration. In that case they
+ share some of the required logic.
+ Not to be used if the peripheral is in single CAN configuration.
+ To avoid misunderstandings, it should be noted that ST documentation
+ uses the terms slave instead of secondary.
type: boolean
reg:
diff --git a/Documentation/devicetree/bindings/net/realtek-bluetooth.yaml b/Documentation/devicetree/bindings/net/realtek-bluetooth.yaml
index 8cc2b9924680..043e118c605c 100644
--- a/Documentation/devicetree/bindings/net/realtek-bluetooth.yaml
+++ b/Documentation/devicetree/bindings/net/realtek-bluetooth.yaml
@@ -11,7 +11,7 @@ maintainers:
- Alistair Francis <alistair@alistair23.me>
description:
- RTL8723CS/RTL8723CS/RTL8821CS/RTL8822CS is a WiFi + BT chip. WiFi part
+ RTL8723BS/RTL8723CS/RTL8821CS/RTL8822CS is a WiFi + BT chip. WiFi part
is connected over SDIO, while BT is connected over serial. It speaks
H5 protocol with few extra commands to upload firmware and change
module speed.
@@ -27,7 +27,7 @@ properties:
- items:
- enum:
- realtek,rtl8821cs-bt
- - const: realtek,rtl8822cs-bt
+ - const: realtek,rtl8723bs-bt
device-wake-gpios:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml b/Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml
index 7f4f36a58e56..739a08f00467 100644
--- a/Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Canaan Kendryte K210 FPIOA
maintainers:
- - Damien Le Moal <damien.lemoal@wdc.com>
+ - Damien Le Moal <dlemoal@kernel.org>
description:
The Canaan Kendryte K210 SoC Fully Programmable IO Array (FPIOA)
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.yaml
index c91d3e3a094b..80f960671857 100644
--- a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.yaml
@@ -144,8 +144,9 @@ $defs:
enum: [0, 1, 2, 3, 4, 5, 6, 7]
qcom,paired:
- - description:
- Indicates that the pin should be operating in paired mode.
+ type: boolean
+ description:
+ Indicates that the pin should be operating in paired mode.
required:
- pins
diff --git a/Documentation/devicetree/bindings/power/qcom,rpmpd.yaml b/Documentation/devicetree/bindings/power/qcom,rpmpd.yaml
index afad3135ed67..f9c211a9a938 100644
--- a/Documentation/devicetree/bindings/power/qcom,rpmpd.yaml
+++ b/Documentation/devicetree/bindings/power/qcom,rpmpd.yaml
@@ -29,6 +29,7 @@ properties:
- qcom,qcm2290-rpmpd
- qcom,qcs404-rpmpd
- qcom,qdu1000-rpmhpd
+ - qcom,sa8155p-rpmhpd
- qcom,sa8540p-rpmhpd
- qcom,sa8775p-rpmhpd
- qcom,sdm660-rpmpd
diff --git a/Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml b/Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml
index ee8a2dcf5dfa..0c0135964b91 100644
--- a/Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml
+++ b/Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Canaan Kendryte K210 Reset Controller
maintainers:
- - Damien Le Moal <damien.lemoal@wdc.com>
+ - Damien Le Moal <dlemoal@kernel.org>
description: |
Canaan Kendryte K210 reset controller driver which supports the SoC
diff --git a/Documentation/devicetree/bindings/riscv/canaan.yaml b/Documentation/devicetree/bindings/riscv/canaan.yaml
index f8f3f286bd55..41fd11f70a49 100644
--- a/Documentation/devicetree/bindings/riscv/canaan.yaml
+++ b/Documentation/devicetree/bindings/riscv/canaan.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Canaan SoC-based boards
maintainers:
- - Damien Le Moal <damien.lemoal@wdc.com>
+ - Damien Le Moal <dlemoal@kernel.org>
description:
Canaan Kendryte K210 SoC-based boards
diff --git a/Documentation/devicetree/bindings/serial/8250_omap.yaml b/Documentation/devicetree/bindings/serial/8250_omap.yaml
index eb3488d8f9ee..6a7be42da523 100644
--- a/Documentation/devicetree/bindings/serial/8250_omap.yaml
+++ b/Documentation/devicetree/bindings/serial/8250_omap.yaml
@@ -70,6 +70,7 @@ properties:
dsr-gpios: true
rng-gpios: true
dcd-gpios: true
+ rs485-rts-active-high: true
rts-gpio: true
power-domains: true
clock-frequency: true
diff --git a/Documentation/devicetree/bindings/sound/tas2562.yaml b/Documentation/devicetree/bindings/sound/tas2562.yaml
index a5bb561bfcfb..31a3024ea789 100644
--- a/Documentation/devicetree/bindings/sound/tas2562.yaml
+++ b/Documentation/devicetree/bindings/sound/tas2562.yaml
@@ -55,7 +55,9 @@ properties:
description: TDM TX current sense time slot.
'#sound-dai-cells':
- const: 1
+ # The codec has a single DAI, the #sound-dai-cells=<1>; case is left in for backward
+ # compatibility but is deprecated.
+ enum: [0, 1]
required:
- compatible
@@ -72,7 +74,7 @@ examples:
codec: codec@4c {
compatible = "ti,tas2562";
reg = <0x4c>;
- #sound-dai-cells = <1>;
+ #sound-dai-cells = <0>;
interrupt-parent = <&gpio1>;
interrupts = <14>;
shutdown-gpios = <&gpio1 15 0>;
diff --git a/Documentation/devicetree/bindings/sound/tas2770.yaml b/Documentation/devicetree/bindings/sound/tas2770.yaml
index 26088adb9dc2..8908bf1122e9 100644
--- a/Documentation/devicetree/bindings/sound/tas2770.yaml
+++ b/Documentation/devicetree/bindings/sound/tas2770.yaml
@@ -57,7 +57,9 @@ properties:
- 1 # Falling edge
'#sound-dai-cells':
- const: 1
+ # The codec has a single DAI, the #sound-dai-cells=<1>; case is left in for backward
+ # compatibility but is deprecated.
+ enum: [0, 1]
required:
- compatible
@@ -74,7 +76,7 @@ examples:
codec: codec@41 {
compatible = "ti,tas2770";
reg = <0x41>;
- #sound-dai-cells = <1>;
+ #sound-dai-cells = <0>;
interrupt-parent = <&gpio1>;
interrupts = <14>;
reset-gpio = <&gpio1 15 0>;
diff --git a/Documentation/devicetree/bindings/sound/tas27xx.yaml b/Documentation/devicetree/bindings/sound/tas27xx.yaml
index 8cba01316855..a876545ec87d 100644
--- a/Documentation/devicetree/bindings/sound/tas27xx.yaml
+++ b/Documentation/devicetree/bindings/sound/tas27xx.yaml
@@ -50,7 +50,9 @@ properties:
description: TDM TX voltage sense time slot.
'#sound-dai-cells':
- const: 1
+ # The codec has a single DAI, the #sound-dai-cells=<1>; case is left in for backward
+ # compatibility but is deprecated.
+ enum: [0, 1]
required:
- compatible
@@ -67,7 +69,7 @@ examples:
codec: codec@38 {
compatible = "ti,tas2764";
reg = <0x38>;
- #sound-dai-cells = <1>;
+ #sound-dai-cells = <0>;
interrupt-parent = <&gpio1>;
interrupts = <14>;
reset-gpios = <&gpio1 15 0>;
diff --git a/Documentation/devicetree/bindings/sound/tlv320aic32x4.txt b/Documentation/devicetree/bindings/sound/tlv320aic32x4.txt
index f59125bc79d1..0b4e21bde5bc 100644
--- a/Documentation/devicetree/bindings/sound/tlv320aic32x4.txt
+++ b/Documentation/devicetree/bindings/sound/tlv320aic32x4.txt
@@ -8,7 +8,7 @@ Required properties:
"ti,tlv320aic32x6" TLV320AIC3206, TLV320AIC3256
"ti,tas2505" TAS2505, TAS2521
- reg: I2C slave address
- - supply-*: Required supply regulators are:
+ - *-supply: Required supply regulators are:
"iov" - digital IO power supply
"ldoin" - LDO power supply
"dv" - Digital core power supply
diff --git a/Documentation/devicetree/bindings/usb/cdns,usb3.yaml b/Documentation/devicetree/bindings/usb/cdns,usb3.yaml
index cae46c4982ad..69a93a0722f0 100644
--- a/Documentation/devicetree/bindings/usb/cdns,usb3.yaml
+++ b/Documentation/devicetree/bindings/usb/cdns,usb3.yaml
@@ -64,7 +64,7 @@ properties:
description:
size of memory intended as internal memory for endpoints
buffers expressed in KB
- $ref: /schemas/types.yaml#/definitions/uint32
+ $ref: /schemas/types.yaml#/definitions/uint16
cdns,phyrst-a-enable:
description: Enable resetting of PHY if Rx fail is detected
diff --git a/Documentation/devicetree/bindings/usb/snps,dwc3.yaml b/Documentation/devicetree/bindings/usb/snps,dwc3.yaml
index 50edc4da780e..4f7625955ccc 100644
--- a/Documentation/devicetree/bindings/usb/snps,dwc3.yaml
+++ b/Documentation/devicetree/bindings/usb/snps,dwc3.yaml
@@ -287,7 +287,7 @@ properties:
description:
High-Speed PHY interface selection between UTMI+ and ULPI when the
DWC_USB3_HSPHY_INTERFACE has value 3.
- $ref: /schemas/types.yaml#/definitions/uint8
+ $ref: /schemas/types.yaml#/definitions/string
enum: [utmi, ulpi]
snps,quirk-frame-length-adjustment:
diff --git a/Documentation/devicetree/usage-model.rst b/Documentation/devicetree/usage-model.rst
index b6a287955ee5..0717426856b2 100644
--- a/Documentation/devicetree/usage-model.rst
+++ b/Documentation/devicetree/usage-model.rst
@@ -415,6 +415,6 @@ When using the DT, this creates problems for of_platform_populate()
because it must decide whether to register each node as either a
platform_device or an amba_device. This unfortunately complicates the
device creation model a little bit, but the solution turns out not to
-be too invasive. If a node is compatible with "arm,amba-primecell", then
+be too invasive. If a node is compatible with "arm,primecell", then
of_platform_populate() will register it as an amba_device instead of a
platform_device.
diff --git a/Documentation/filesystems/directory-locking.rst b/Documentation/filesystems/directory-locking.rst
index 504ba940c36c..dccd61c7c5c3 100644
--- a/Documentation/filesystems/directory-locking.rst
+++ b/Documentation/filesystems/directory-locking.rst
@@ -22,12 +22,11 @@ exclusive.
3) object removal. Locking rules: caller locks parent, finds victim,
locks victim and calls the method. Locks are exclusive.
-4) rename() that is _not_ cross-directory. Locking rules: caller locks
-the parent and finds source and target. In case of exchange (with
-RENAME_EXCHANGE in flags argument) lock both. In any case,
-if the target already exists, lock it. If the source is a non-directory,
-lock it. If we need to lock both, lock them in inode pointer order.
-Then call the method. All locks are exclusive.
+4) rename() that is _not_ cross-directory. Locking rules: caller locks the
+parent and finds source and target. We lock both (provided they exist). If we
+need to lock two inodes of different type (dir vs non-dir), we lock directory
+first. If we need to lock two inodes of the same type, lock them in inode
+pointer order. Then call the method. All locks are exclusive.
NB: we might get away with locking the source (and target in exchange
case) shared.
@@ -44,15 +43,17 @@ All locks are exclusive.
rules:
* lock the filesystem
- * lock parents in "ancestors first" order.
+ * lock parents in "ancestors first" order. If one is not ancestor of
+ the other, lock them in inode pointer order.
* find source and target.
* if old parent is equal to or is a descendent of target
fail with -ENOTEMPTY
* if new parent is equal to or is a descendent of source
fail with -ELOOP
- * If it's an exchange, lock both the source and the target.
- * If the target exists, lock it. If the source is a non-directory,
- lock it. If we need to lock both, do so in inode pointer order.
+ * Lock both the source and the target provided they exist. If we
+ need to lock two inodes of different type (dir vs non-dir), we lock
+ the directory first. If we need to lock two inodes of the same type,
+ lock them in inode pointer order.
* call the method.
All ->i_rwsem are taken exclusive. Again, we might get away with locking
@@ -66,8 +67,9 @@ If no directory is its own ancestor, the scheme above is deadlock-free.
Proof:
- First of all, at any moment we have a partial ordering of the
- objects - A < B iff A is an ancestor of B.
+ First of all, at any moment we have a linear ordering of the
+ objects - A < B iff (A is an ancestor of B) or (B is not an ancestor
+ of A and ptr(A) < ptr(B)).
That ordering can change. However, the following is true:
diff --git a/Documentation/filesystems/fsverity.rst b/Documentation/filesystems/fsverity.rst
index ede672dedf11..cb845e8e5435 100644
--- a/Documentation/filesystems/fsverity.rst
+++ b/Documentation/filesystems/fsverity.rst
@@ -38,20 +38,14 @@ fail at runtime.
Use cases
=========
-By itself, the base fs-verity feature only provides integrity
-protection, i.e. detection of accidental (non-malicious) corruption.
+By itself, fs-verity only provides integrity protection, i.e.
+detection of accidental (non-malicious) corruption.
However, because fs-verity makes retrieving the file hash extremely
efficient, it's primarily meant to be used as a tool to support
authentication (detection of malicious modifications) or auditing
(logging file hashes before use).
-Trusted userspace code (e.g. operating system code running on a
-read-only partition that is itself authenticated by dm-verity) can
-authenticate the contents of an fs-verity file by using the
-`FS_IOC_MEASURE_VERITY`_ ioctl to retrieve its hash, then verifying a
-digital signature of it.
-
A standard file hash could be used instead of fs-verity. However,
this is inefficient if the file is large and only a small portion may
be accessed. This is often the case for Android application package
@@ -69,24 +63,31 @@ still be used on read-only filesystems. fs-verity is for files that
must live on a read-write filesystem because they are independently
updated and potentially user-installed, so dm-verity cannot be used.
-The base fs-verity feature is a hashing mechanism only; actually
-authenticating the files may be done by:
-
-* Userspace-only
-
-* Builtin signature verification + userspace policy
-
- fs-verity optionally supports a simple signature verification
- mechanism where users can configure the kernel to require that
- all fs-verity files be signed by a key loaded into a keyring;
- see `Built-in signature verification`_.
-
-* Integrity Measurement Architecture (IMA)
-
- IMA supports including fs-verity file digests and signatures in the
- IMA measurement list and verifying fs-verity based file signatures
- stored as security.ima xattrs, based on policy.
-
+fs-verity does not mandate a particular scheme for authenticating its
+file hashes. (Similarly, dm-verity does not mandate a particular
+scheme for authenticating its block device root hashes.) Options for
+authenticating fs-verity file hashes include:
+
+- Trusted userspace code. Often, the userspace code that accesses
+ files can be trusted to authenticate them. Consider e.g. an
+ application that wants to authenticate data files before using them,
+ or an application loader that is part of the operating system (which
+ is already authenticated in a different way, such as by being loaded
+ from a read-only partition that uses dm-verity) and that wants to
+ authenticate applications before loading them. In these cases, this
+ trusted userspace code can authenticate a file's contents by
+ retrieving its fs-verity digest using `FS_IOC_MEASURE_VERITY`_, then
+ verifying a signature of it using any userspace cryptographic
+ library that supports digital signatures.
+
+- Integrity Measurement Architecture (IMA). IMA supports fs-verity
+ file digests as an alternative to its traditional full file digests.
+ "IMA appraisal" enforces that files contain a valid, matching
+ signature in their "security.ima" extended attribute, as controlled
+ by the IMA policy. For more information, see the IMA documentation.
+
+- Trusted userspace code in combination with `Built-in signature
+ verification`_. This approach should be used only with great care.
User API
========
@@ -111,8 +112,7 @@ follows::
};
This structure contains the parameters of the Merkle tree to build for
-the file, and optionally contains a signature. It must be initialized
-as follows:
+the file. It must be initialized as follows:
- ``version`` must be 1.
- ``hash_algorithm`` must be the identifier for the hash algorithm to
@@ -129,12 +129,14 @@ as follows:
file or device. Currently the maximum salt size is 32 bytes.
- ``salt_ptr`` is the pointer to the salt, or NULL if no salt is
provided.
-- ``sig_size`` is the size of the signature in bytes, or 0 if no
- signature is provided. Currently the signature is (somewhat
- arbitrarily) limited to 16128 bytes. See `Built-in signature
- verification`_ for more information.
-- ``sig_ptr`` is the pointer to the signature, or NULL if no
- signature is provided.
+- ``sig_size`` is the size of the builtin signature in bytes, or 0 if no
+ builtin signature is provided. Currently the builtin signature is
+ (somewhat arbitrarily) limited to 16128 bytes.
+- ``sig_ptr`` is the pointer to the builtin signature, or NULL if no
+ builtin signature is provided. A builtin signature is only needed
+ if the `Built-in signature verification`_ feature is being used. It
+ is not needed for IMA appraisal, and it is not needed if the file
+ signature is being handled entirely in userspace.
- All reserved fields must be zeroed.
FS_IOC_ENABLE_VERITY causes the filesystem to build a Merkle tree for
@@ -158,7 +160,7 @@ fatal signal), no changes are made to the file.
FS_IOC_ENABLE_VERITY can fail with the following errors:
- ``EACCES``: the process does not have write access to the file
-- ``EBADMSG``: the signature is malformed
+- ``EBADMSG``: the builtin signature is malformed
- ``EBUSY``: this ioctl is already running on the file
- ``EEXIST``: the file already has verity enabled
- ``EFAULT``: the caller provided inaccessible memory
@@ -168,10 +170,10 @@ FS_IOC_ENABLE_VERITY can fail with the following errors:
reserved bits are set; or the file descriptor refers to neither a
regular file nor a directory.
- ``EISDIR``: the file descriptor refers to a directory
-- ``EKEYREJECTED``: the signature doesn't match the file
-- ``EMSGSIZE``: the salt or signature is too long
-- ``ENOKEY``: the fs-verity keyring doesn't contain the certificate
- needed to verify the signature
+- ``EKEYREJECTED``: the builtin signature doesn't match the file
+- ``EMSGSIZE``: the salt or builtin signature is too long
+- ``ENOKEY``: the ".fs-verity" keyring doesn't contain the certificate
+ needed to verify the builtin signature
- ``ENOPKG``: fs-verity recognizes the hash algorithm, but it's not
available in the kernel's crypto API as currently configured (e.g.
for SHA-512, missing CONFIG_CRYPTO_SHA512).
@@ -180,8 +182,8 @@ FS_IOC_ENABLE_VERITY can fail with the following errors:
support; or the filesystem superblock has not had the 'verity'
feature enabled on it; or the filesystem does not support fs-verity
on this file. (See `Filesystem support`_.)
-- ``EPERM``: the file is append-only; or, a signature is required and
- one was not provided.
+- ``EPERM``: the file is append-only; or, a builtin signature is
+ required and one was not provided.
- ``EROFS``: the filesystem is read-only
- ``ETXTBSY``: someone has the file open for writing. This can be the
caller's file descriptor, another open file descriptor, or the file
@@ -270,9 +272,9 @@ This ioctl takes in a pointer to the following structure::
- ``FS_VERITY_METADATA_TYPE_DESCRIPTOR`` reads the fs-verity
descriptor. See `fs-verity descriptor`_.
-- ``FS_VERITY_METADATA_TYPE_SIGNATURE`` reads the signature which was
- passed to FS_IOC_ENABLE_VERITY, if any. See `Built-in signature
- verification`_.
+- ``FS_VERITY_METADATA_TYPE_SIGNATURE`` reads the builtin signature
+ which was passed to FS_IOC_ENABLE_VERITY, if any. See `Built-in
+ signature verification`_.
The semantics are similar to those of ``pread()``. ``offset``
specifies the offset in bytes into the metadata item to read from, and
@@ -299,7 +301,7 @@ FS_IOC_READ_VERITY_METADATA can fail with the following errors:
overflowed
- ``ENODATA``: the file is not a verity file, or
FS_VERITY_METADATA_TYPE_SIGNATURE was requested but the file doesn't
- have a built-in signature
+ have a builtin signature
- ``ENOTTY``: this type of filesystem does not implement fs-verity, or
this ioctl is not yet implemented on it
- ``EOPNOTSUPP``: the kernel was not configured with fs-verity
@@ -347,8 +349,8 @@ non-verity one, with the following exceptions:
with EIO (for read()) or SIGBUS (for mmap() reads).
- If the sysctl "fs.verity.require_signatures" is set to 1 and the
- file is not signed by a key in the fs-verity keyring, then opening
- the file will fail. See `Built-in signature verification`_.
+ file is not signed by a key in the ".fs-verity" keyring, then
+ opening the file will fail. See `Built-in signature verification`_.
Direct access to the Merkle tree is not supported. Therefore, if a
verity file is copied, or is backed up and restored, then it will lose
@@ -433,20 +435,25 @@ root hash as well as other fields such as the file size::
Built-in signature verification
===============================
-With CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y, fs-verity supports putting
-a portion of an authentication policy (see `Use cases`_) in the
-kernel. Specifically, it adds support for:
+CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y adds supports for in-kernel
+verification of fs-verity builtin signatures.
+
+**IMPORTANT**! Please take great care before using this feature.
+It is not the only way to do signatures with fs-verity, and the
+alternatives (such as userspace signature verification, and IMA
+appraisal) can be much better. It's also easy to fall into a trap
+of thinking this feature solves more problems than it actually does.
+
+Enabling this option adds the following:
-1. At fs-verity module initialization time, a keyring ".fs-verity" is
- created. The root user can add trusted X.509 certificates to this
- keyring using the add_key() system call, then (when done)
- optionally use keyctl_restrict_keyring() to prevent additional
- certificates from being added.
+1. At boot time, the kernel creates a keyring named ".fs-verity". The
+ root user can add trusted X.509 certificates to this keyring using
+ the add_key() system call.
2. `FS_IOC_ENABLE_VERITY`_ accepts a pointer to a PKCS#7 formatted
detached signature in DER format of the file's fs-verity digest.
- On success, this signature is persisted alongside the Merkle tree.
- Then, any time the file is opened, the kernel will verify the
+ On success, the ioctl persists the signature alongside the Merkle
+ tree. Then, any time the file is opened, the kernel verifies the
file's actual digest against this signature, using the certificates
in the ".fs-verity" keyring.
@@ -454,8 +461,8 @@ kernel. Specifically, it adds support for:
When set to 1, the kernel requires that all verity files have a
correctly signed digest as described in (2).
-fs-verity file digests must be signed in the following format, which
-is similar to the structure used by `FS_IOC_MEASURE_VERITY`_::
+The data that the signature as described in (2) must be a signature of
+is the fs-verity file digest in the following format::
struct fsverity_formatted_digest {
char magic[8]; /* must be "FSVerity" */
@@ -464,13 +471,66 @@ is similar to the structure used by `FS_IOC_MEASURE_VERITY`_::
__u8 digest[];
};
-fs-verity's built-in signature verification support is meant as a
-relatively simple mechanism that can be used to provide some level of
-authenticity protection for verity files, as an alternative to doing
-the signature verification in userspace or using IMA-appraisal.
-However, with this mechanism, userspace programs still need to check
-that the verity bit is set, and there is no protection against verity
-files being swapped around.
+That's it. It should be emphasized again that fs-verity builtin
+signatures are not the only way to do signatures with fs-verity. See
+`Use cases`_ for an overview of ways in which fs-verity can be used.
+fs-verity builtin signatures have some major limitations that should
+be carefully considered before using them:
+
+- Builtin signature verification does *not* make the kernel enforce
+ that any files actually have fs-verity enabled. Thus, it is not a
+ complete authentication policy. Currently, if it is used, the only
+ way to complete the authentication policy is for trusted userspace
+ code to explicitly check whether files have fs-verity enabled with a
+ signature before they are accessed. (With
+ fs.verity.require_signatures=1, just checking whether fs-verity is
+ enabled suffices.) But, in this case the trusted userspace code
+ could just store the signature alongside the file and verify it
+ itself using a cryptographic library, instead of using this feature.
+
+- A file's builtin signature can only be set at the same time that
+ fs-verity is being enabled on the file. Changing or deleting the
+ builtin signature later requires re-creating the file.
+
+- Builtin signature verification uses the same set of public keys for
+ all fs-verity enabled files on the system. Different keys cannot be
+ trusted for different files; each key is all or nothing.
+
+- The sysctl fs.verity.require_signatures applies system-wide.
+ Setting it to 1 only works when all users of fs-verity on the system
+ agree that it should be set to 1. This limitation can prevent
+ fs-verity from being used in cases where it would be helpful.
+
+- Builtin signature verification can only use signature algorithms
+ that are supported by the kernel. For example, the kernel does not
+ yet support Ed25519, even though this is often the signature
+ algorithm that is recommended for new cryptographic designs.
+
+- fs-verity builtin signatures are in PKCS#7 format, and the public
+ keys are in X.509 format. These formats are commonly used,
+ including by some other kernel features (which is why the fs-verity
+ builtin signatures use them), and are very feature rich.
+ Unfortunately, history has shown that code that parses and handles
+ these formats (which are from the 1990s and are based on ASN.1)
+ often has vulnerabilities as a result of their complexity. This
+ complexity is not inherent to the cryptography itself.
+
+ fs-verity users who do not need advanced features of X.509 and
+ PKCS#7 should strongly consider using simpler formats, such as plain
+ Ed25519 keys and signatures, and verifying signatures in userspace.
+
+ fs-verity users who choose to use X.509 and PKCS#7 anyway should
+ still consider that verifying those signatures in userspace is more
+ flexible (for other reasons mentioned earlier in this document) and
+ eliminates the need to enable CONFIG_FS_VERITY_BUILTIN_SIGNATURES
+ and its associated increase in kernel attack surface. In some cases
+ it can even be necessary, since advanced X.509 and PKCS#7 features
+ do not always work as intended with the kernel. For example, the
+ kernel does not check X.509 certificate validity times.
+
+ Note: IMA appraisal, which supports fs-verity, does not use PKCS#7
+ for its signatures, so it partially avoids the issues discussed
+ here. IMA appraisal does use X.509.
Filesystem support
==================
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index fbb2b5ada95b..eb252fc972aa 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -72,7 +72,6 @@ Documentation for filesystem implementations.
befs
bfs
btrfs
- cifs/index
ceph
coda
configfs
@@ -111,6 +110,7 @@ Documentation for filesystem implementations.
ramfs-rootfs-initramfs
relay
romfs
+ smb/index
spufs/index
squashfs
sysfs
diff --git a/Documentation/filesystems/ramfs-rootfs-initramfs.rst b/Documentation/filesystems/ramfs-rootfs-initramfs.rst
index 164960631925..447f767c6462 100644
--- a/Documentation/filesystems/ramfs-rootfs-initramfs.rst
+++ b/Documentation/filesystems/ramfs-rootfs-initramfs.rst
@@ -6,8 +6,7 @@ Ramfs, rootfs and initramfs
October 17, 2005
-Rob Landley <rob@landley.net>
-=============================
+:Author: Rob Landley <rob@landley.net>
What is ramfs?
--------------
diff --git a/Documentation/filesystems/sharedsubtree.rst b/Documentation/filesystems/sharedsubtree.rst
index d83395354250..1cf56489ed48 100644
--- a/Documentation/filesystems/sharedsubtree.rst
+++ b/Documentation/filesystems/sharedsubtree.rst
@@ -147,6 +147,7 @@ replicas continue to be exactly same.
3) Setting mount states
+-----------------------
The mount command (util-linux package) can be used to set mount
states::
@@ -612,6 +613,7 @@ replicas continue to be exactly same.
6) Quiz
+-------
A. What is the result of the following command sequence?
@@ -673,6 +675,7 @@ replicas continue to be exactly same.
/mnt/1/test be?
7) FAQ
+------
Q1. Why is bind mount needed? How is it different from symbolic links?
symbolic links can get stale if the destination mount gets
@@ -841,6 +844,7 @@ replicas continue to be exactly same.
tmp usr tmp usr tmp usr
8) Implementation
+-----------------
8A) Datastructure
diff --git a/Documentation/filesystems/cifs/cifsroot.rst b/Documentation/filesystems/smb/cifsroot.rst
index 4930bb443134..bf2d9db3acb9 100644
--- a/Documentation/filesystems/cifs/cifsroot.rst
+++ b/Documentation/filesystems/smb/cifsroot.rst
@@ -59,7 +59,7 @@ the root file system via SMB protocol.
Enables the kernel to mount the root file system via SMB that are
located in the <server-ip> and <share> specified in this option.
-The default mount options are set in fs/cifs/cifsroot.c.
+The default mount options are set in fs/smb/client/cifsroot.c.
server-ip
IPv4 address of the server.
diff --git a/Documentation/filesystems/cifs/index.rst b/Documentation/filesystems/smb/index.rst
index 1c8597a679ab..1c8597a679ab 100644
--- a/Documentation/filesystems/cifs/index.rst
+++ b/Documentation/filesystems/smb/index.rst
diff --git a/Documentation/filesystems/cifs/ksmbd.rst b/Documentation/filesystems/smb/ksmbd.rst
index 7bed96d794fc..7bed96d794fc 100644
--- a/Documentation/filesystems/cifs/ksmbd.rst
+++ b/Documentation/filesystems/smb/ksmbd.rst
diff --git a/Documentation/fpga/index.rst b/Documentation/fpga/index.rst
index f80f95667ca2..43c968871d99 100644
--- a/Documentation/fpga/index.rst
+++ b/Documentation/fpga/index.rst
@@ -1,7 +1,7 @@
.. SPDX-License-Identifier: GPL-2.0
====
-fpga
+FPGA
====
.. toctree::
diff --git a/Documentation/locking/index.rst b/Documentation/locking/index.rst
index 7003bd5aeff4..6a9ea96c8bcb 100644
--- a/Documentation/locking/index.rst
+++ b/Documentation/locking/index.rst
@@ -1,7 +1,7 @@
.. SPDX-License-Identifier: GPL-2.0
=======
-locking
+Locking
=======
.. toctree::
diff --git a/Documentation/mm/page_table_check.rst b/Documentation/mm/page_table_check.rst
index cfd8f4117cf3..c12838ce6b8d 100644
--- a/Documentation/mm/page_table_check.rst
+++ b/Documentation/mm/page_table_check.rst
@@ -52,3 +52,22 @@ Build kernel with:
Optionally, build kernel with PAGE_TABLE_CHECK_ENFORCED in order to have page
table support without extra kernel parameter.
+
+Implementation notes
+====================
+
+We specifically decided not to use VMA information in order to avoid relying on
+MM states (except for limited "struct page" info). The page table check is a
+separate from Linux-MM state machine that verifies that the user accessible
+pages are not falsely shared.
+
+PAGE_TABLE_CHECK depends on EXCLUSIVE_SYSTEM_RAM. The reason is that without
+EXCLUSIVE_SYSTEM_RAM, users are allowed to map arbitrary physical memory
+regions into the userspace via /dev/mem. At the same time, pages may change
+their properties (e.g., from anonymous pages to named pages) while they are
+still being mapped in the userspace, leading to "corruption" detected by the
+page table check.
+
+Even with EXCLUSIVE_SYSTEM_RAM, I/O pages may be still allowed to be mapped via
+/dev/mem. However, these pages are always considered as named pages, so they
+won't break the logic used in the page table check.
diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index 129f413ea349..4846345bade4 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -61,22 +61,6 @@ attribute-sets:
nested-attributes: bitset-bits
-
- name: u64-array
- attributes:
- -
- name: u64
- type: nest
- multi-attr: true
- nested-attributes: u64
- -
- name: s32-array
- attributes:
- -
- name: s32
- type: nest
- multi-attr: true
- nested-attributes: s32
- -
name: string
attributes:
-
@@ -239,7 +223,7 @@ attribute-sets:
name: tx-min-frag-size
type: u32
-
- name: tx-min-frag-size
+ name: rx-min-frag-size
type: u32
-
name: verify-enabled
@@ -310,7 +294,7 @@ attribute-sets:
name: master-slave-state
type: u8
-
- name: master-slave-lanes
+ name: lanes
type: u32
-
name: rate-matching
@@ -338,7 +322,7 @@ attribute-sets:
name: ext-substate
type: u8
-
- name: down-cnt
+ name: ext-down-cnt
type: u32
-
name: debug
@@ -593,7 +577,7 @@ attribute-sets:
name: phc-index
type: u32
-
- name: cable-test-nft-nest-result
+ name: cable-test-ntf-nest-result
attributes:
-
name: pair
@@ -602,7 +586,7 @@ attribute-sets:
name: code
type: u8
-
- name: cable-test-nft-nest-fault-length
+ name: cable-test-ntf-nest-fault-length
attributes:
-
name: pair
@@ -611,16 +595,16 @@ attribute-sets:
name: cm
type: u32
-
- name: cable-test-nft-nest
+ name: cable-test-ntf-nest
attributes:
-
name: result
type: nest
- nested-attributes: cable-test-nft-nest-result
+ nested-attributes: cable-test-ntf-nest-result
-
name: fault-length
type: nest
- nested-attributes: cable-test-nft-nest-fault-length
+ nested-attributes: cable-test-ntf-nest-fault-length
-
name: cable-test
attributes:
@@ -634,7 +618,7 @@ attribute-sets:
-
name: nest
type: nest
- nested-attributes: cable-test-nft-nest
+ nested-attributes: cable-test-ntf-nest
-
name: cable-test-tdr-cfg
attributes:
@@ -705,16 +689,16 @@ attribute-sets:
type: u8
-
name: corrected
- type: nest
- nested-attributes: u64-array
+ type: binary
+ sub-type: u64
-
name: uncorr
- type: nest
- nested-attributes: u64-array
+ type: binary
+ sub-type: u64
-
name: corr-bits
- type: nest
- nested-attributes: u64-array
+ type: binary
+ sub-type: u64
-
name: fec
attributes:
@@ -792,7 +776,7 @@ attribute-sets:
name: hist-bkt-hi
type: u32
-
- name: hist-bkt-val
+ name: hist-val
type: u64
-
name: stats
@@ -827,8 +811,8 @@ attribute-sets:
type: u32
-
name: index
- type: nest
- nested-attributes: s32-array
+ type: binary
+ sub-type: s32
-
name: module
attributes:
@@ -981,7 +965,7 @@ operations:
- duplex
- master-slave-cfg
- master-slave-state
- - master-slave-lanes
+ - lanes
- rate-matching
dump: *linkmodes-get-op
-
@@ -1015,7 +999,7 @@ operations:
- sqi-max
- ext-state
- ext-substate
- - down-cnt
+ - ext-down-cnt
dump: *linkstate-get-op
-
name: debug-get
@@ -1367,7 +1351,7 @@ operations:
reply:
attributes:
- header
- - cable-test-nft-nest
+ - cable-test-ntf-nest
-
name: cable-test-tdr-act
doc: Cable test TDR.
@@ -1555,7 +1539,7 @@ operations:
- hkey
dump: *rss-get-op
-
- name: plca-get
+ name: plca-get-cfg
doc: Get PLCA params.
attribute-set: plca
@@ -1577,7 +1561,7 @@ operations:
- burst-tmr
dump: *plca-get-op
-
- name: plca-set
+ name: plca-set-cfg
doc: Set PLCA params.
attribute-set: plca
@@ -1601,7 +1585,7 @@ operations:
-
name: plca-ntf
doc: Notification for change in PLCA params.
- notify: plca-get
+ notify: plca-get-cfg
-
name: mm-get
doc: Get MAC Merge configuration and state
diff --git a/Documentation/netlink/specs/handshake.yaml b/Documentation/netlink/specs/handshake.yaml
index 614f1a585511..6d89e30f5fd5 100644
--- a/Documentation/netlink/specs/handshake.yaml
+++ b/Documentation/netlink/specs/handshake.yaml
@@ -68,6 +68,9 @@ attribute-sets:
type: nest
nested-attributes: x509
multi-attr: true
+ -
+ name: peername
+ type: string
-
name: done
attributes:
@@ -105,6 +108,7 @@ operations:
- auth-mode
- peer-identity
- certificate
+ - peername
-
name: done
doc: Handler reports handshake completion
diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst
index 3a7a714cc08f..3354ca3608ee 100644
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst
@@ -40,6 +40,7 @@ flow_steering_mode: Device flow steering mode
---------------------------------------------
The flow steering mode parameter controls the flow steering mode of the driver.
Two modes are supported:
+
1. 'dmfs' - Device managed flow steering.
2. 'smfs' - Software/Driver managed flow steering.
@@ -99,6 +100,7 @@ between representors and stacked devices.
By default metadata is enabled on the supported devices in E-switch.
Metadata is applicable only for E-switch in switchdev mode and
users may disable it when NONE of the below use cases will be in use:
+
1. HCA is in Dual/multi-port RoCE mode.
2. VF/SF representor bonding (Usually used for Live migration)
3. Stacked devices
@@ -180,7 +182,8 @@ User commands examples:
$ devlink health diagnose pci/0000:82:00.0 reporter tx
-NOTE: This command has valid output only when interface is up, otherwise the command has empty output.
+.. note::
+ This command has valid output only when interface is up, otherwise the command has empty output.
- Show number of tx errors indicated, number of recover flows ended successfully,
is autorecover enabled and graceful period from last recover::
@@ -232,8 +235,9 @@ User commands examples:
$ devlink health dump show pci/0000:82:00.0 reporter fw
-NOTE: This command can run only on the PF which has fw tracer ownership,
-running it on other PF or any VF will return "Operation not permitted".
+.. note::
+ This command can run only on the PF which has fw tracer ownership,
+ running it on other PF or any VF will return "Operation not permitted".
fw fatal reporter
-----------------
@@ -256,7 +260,8 @@ User commands examples:
$ devlink health dump show pci/0000:82:00.1 reporter fw_fatal
-NOTE: This command can run only on PF.
+.. note::
+ This command can run only on PF.
vnic reporter
-------------
@@ -265,28 +270,37 @@ It is responsible for querying the vnic diagnostic counters from fw and displayi
them in realtime.
Description of the vnic counters:
-total_q_under_processor_handle: number of queues in an error state due to
-an async error or errored command.
-send_queue_priority_update_flow: number of QP/SQ priority/SL update
-events.
-cq_overrun: number of times CQ entered an error state due to an
-overflow.
-async_eq_overrun: number of times an EQ mapped to async events was
-overrun.
-comp_eq_overrun: number of times an EQ mapped to completion events was
-overrun.
-quota_exceeded_command: number of commands issued and failed due to quota
-exceeded.
-invalid_command: number of commands issued and failed dues to any reason
-other than quota exceeded.
-nic_receive_steering_discard: number of packets that completed RX flow
-steering but were discarded due to a mismatch in flow table.
+
+- total_q_under_processor_handle
+ number of queues in an error state due to
+ an async error or errored command.
+- send_queue_priority_update_flow
+ number of QP/SQ priority/SL update events.
+- cq_overrun
+ number of times CQ entered an error state due to an overflow.
+- async_eq_overrun
+ number of times an EQ mapped to async events was overrun.
+ comp_eq_overrun number of times an EQ mapped to completion events was
+ overrun.
+- quota_exceeded_command
+ number of commands issued and failed due to quota exceeded.
+- invalid_command
+ number of commands issued and failed dues to any reason other than quota
+ exceeded.
+- nic_receive_steering_discard
+ number of packets that completed RX flow
+ steering but were discarded due to a mismatch in flow table.
User commands examples:
-- Diagnose PF/VF vnic counters
+
+- Diagnose PF/VF vnic counters::
+
$ devlink health diagnose pci/0000:82:00.1 reporter vnic
+
- Diagnose representor vnic counters (performed by supplying devlink port of the
- representor, which can be obtained via devlink port command)
+ representor, which can be obtained via devlink port command)::
+
$ devlink health diagnose pci/0000:82:00.1/65537 reporter vnic
-NOTE: This command can run over all interfaces such as PF/VF and representor ports.
+.. note::
+ This command can run over all interfaces such as PF/VF and representor ports.
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 6ec06a33688a..80b8f73a0244 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -1352,8 +1352,8 @@ ping_group_range - 2 INTEGERS
Restrict ICMP_PROTO datagram sockets to users in the group range.
The default is "1 0", meaning, that nobody (not even root) may
create ping sockets. Setting it to "100 100" would grant permissions
- to the single group. "0 4294967295" would enable it for the world, "100
- 4294967295" would enable it for the users, but not daemons.
+ to the single group. "0 4294967294" would enable it for the world, "100
+ 4294967294" would enable it for the users, but not daemons.
tcp_early_demux - BOOLEAN
Enable early demux for established TCP sockets.
diff --git a/Documentation/networking/tls-handshake.rst b/Documentation/networking/tls-handshake.rst
index a2817a88e905..6f5ea1646a47 100644
--- a/Documentation/networking/tls-handshake.rst
+++ b/Documentation/networking/tls-handshake.rst
@@ -53,6 +53,7 @@ fills in a structure that contains the parameters of the request:
struct socket *ta_sock;
tls_done_func_t ta_done;
void *ta_data;
+ const char *ta_peername;
unsigned int ta_timeout_ms;
key_serial_t ta_keyring;
key_serial_t ta_my_cert;
@@ -71,6 +72,10 @@ instantiated a struct file in sock->file.
has completed. Further explanation of this function is in the "Handshake
Completion" sesction below.
+The consumer can provide a NUL-terminated hostname in the @ta_peername
+field that is sent as part of ClientHello. If no peername is provided,
+the DNS hostname associated with the server's IP address is used instead.
+
The consumer can fill in the @ta_timeout_ms field to force the servicing
handshake agent to exit after a number of milliseconds. This enables the
socket to be fully closed once both the kernel and the handshake agent
diff --git a/Documentation/pcmcia/index.rst b/Documentation/pcmcia/index.rst
index 7ae1f62fca14..8067236c51ab 100644
--- a/Documentation/pcmcia/index.rst
+++ b/Documentation/pcmcia/index.rst
@@ -1,7 +1,7 @@
.. SPDX-License-Identifier: GPL-2.0
======
-pcmcia
+PCMCIA
======
.. toctree::
diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst
index ef540865ad22..5cf6a5f8ca57 100644
--- a/Documentation/process/changes.rst
+++ b/Documentation/process/changes.rst
@@ -31,7 +31,7 @@ you probably needn't concern yourself with pcmciautils.
====================== =============== ========================================
GNU C 5.1 gcc --version
Clang/LLVM (optional) 11.0.0 clang --version
-Rust (optional) 1.62.0 rustc --version
+Rust (optional) 1.68.2 rustc --version
bindgen (optional) 0.56.0 bindgen --version
GNU make 3.82 make --version
bash 4.2 bash --version
diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst
index f73ac9e175a8..83614cec9328 100644
--- a/Documentation/process/maintainer-netdev.rst
+++ b/Documentation/process/maintainer-netdev.rst
@@ -127,13 +127,32 @@ the value of ``Message-ID`` to the URL above.
Updating patch status
~~~~~~~~~~~~~~~~~~~~~
-It may be tempting to help the maintainers and update the state of your
-own patches when you post a new version or spot a bug. Please **do not**
-do that.
-Interfering with the patch status on patchwork will only cause confusion. Leave
-it to the maintainer to figure out what is the most recent and current
-version that should be applied. If there is any doubt, the maintainer
-will reply and ask what should be done.
+Contributors and reviewers do not have the permissions to update patch
+state directly in patchwork. Patchwork doesn't expose much information
+about the history of the state of patches, therefore having multiple
+people update the state leads to confusion.
+
+Instead of delegating patchwork permissions netdev uses a simple mail
+bot which looks for special commands/lines within the emails sent to
+the mailing list. For example to mark a series as Changes Requested
+one needs to send the following line anywhere in the email thread::
+
+ pw-bot: changes-requested
+
+As a result the bot will set the entire series to Changes Requested.
+This may be useful when author discovers a bug in their own series
+and wants to prevent it from getting applied.
+
+The use of the bot is entirely optional, if in doubt ignore its existence
+completely. Maintainers will classify and update the state of the patches
+themselves. No email should ever be sent to the list with the main purpose
+of communicating with the bot, the bot commands should be seen as metadata.
+
+The use of the bot is restricted to authors of the patches (the ``From:``
+header on patch submission and command must match!), maintainers themselves
+and a handful of senior reviewers. Bot records its activity here:
+
+ https://patchwork.hopto.org/pw-bot.html
Review timelines
~~~~~~~~~~~~~~~~
diff --git a/Documentation/riscv/patch-acceptance.rst b/Documentation/riscv/patch-acceptance.rst
index 07d5a5623e2a..634aa222b410 100644
--- a/Documentation/riscv/patch-acceptance.rst
+++ b/Documentation/riscv/patch-acceptance.rst
@@ -16,6 +16,24 @@ tested code over experimental code. We wish to extend these same
principles to the RISC-V-related code that will be accepted for
inclusion in the kernel.
+Patchwork
+---------
+
+RISC-V has a patchwork instance, where the status of patches can be checked:
+
+ https://patchwork.kernel.org/project/linux-riscv/list/
+
+If your patch does not appear in the default view, the RISC-V maintainers have
+likely either requested changes, or expect it to be applied to another tree.
+
+Automation runs against this patchwork instance, building/testing patches as
+they arrive. The automation applies patches against the current HEAD of the
+RISC-V `for-next` and `fixes` branches, depending on whether the patch has been
+detected as a fix. Failing those, it will use the RISC-V `master` branch.
+The exact commit to which a series has been applied will be noted on patchwork.
+Patches for which any of the checks fail are unlikely to be applied and in most
+cases will need to be resubmitted.
+
Submit Checklist Addendum
-------------------------
We'll only accept patches for new modules or extensions if the
diff --git a/Documentation/rust/quick-start.rst b/Documentation/rust/quick-start.rst
index 13b7744b1e27..a8931512ed98 100644
--- a/Documentation/rust/quick-start.rst
+++ b/Documentation/rust/quick-start.rst
@@ -38,9 +38,9 @@ and run::
rustup override set $(scripts/min-tool-version.sh rustc)
-Otherwise, fetch a standalone installer or install ``rustup`` from:
+Otherwise, fetch a standalone installer from:
- https://www.rust-lang.org
+ https://forge.rust-lang.org/infra/other-installation-methods.html#standalone
Rust standard library source
diff --git a/Documentation/s390/vfio-ap.rst b/Documentation/s390/vfio-ap.rst
index d46e98c7c1ec..bb3f4c4e2885 100644
--- a/Documentation/s390/vfio-ap.rst
+++ b/Documentation/s390/vfio-ap.rst
@@ -551,7 +551,6 @@ These are the steps:
* IOMMU_SUPPORT
* S390
* ZCRYPT
- * S390_AP_IOMMU
* VFIO
* KVM
diff --git a/Documentation/staging/crc32.rst b/Documentation/staging/crc32.rst
index 8a6860f33b4e..7542220967cb 100644
--- a/Documentation/staging/crc32.rst
+++ b/Documentation/staging/crc32.rst
@@ -1,5 +1,5 @@
=================================
-brief tutorial on CRC computation
+Brief tutorial on CRC computation
=================================
A CRC is a long-division remainder. You add the CRC to the message,
diff --git a/Documentation/timers/index.rst b/Documentation/timers/index.rst
index df510ad0c989..983f91f8f023 100644
--- a/Documentation/timers/index.rst
+++ b/Documentation/timers/index.rst
@@ -1,7 +1,7 @@
.. SPDX-License-Identifier: GPL-2.0
======
-timers
+Timers
======
.. toctree::
diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst
index 479c9eac6335..3c9b263de9c2 100644
--- a/Documentation/trace/histogram.rst
+++ b/Documentation/trace/histogram.rst
@@ -35,7 +35,7 @@ Documentation written by Tom Zanussi
in place of an explicit value field - this is simply a count of
event hits. If 'values' isn't specified, an implicit 'hitcount'
value will be automatically created and used as the only value.
- Keys can be any field, or the special string 'stacktrace', which
+ Keys can be any field, or the special string 'common_stacktrace', which
will use the event's kernel stacktrace as the key. The keywords
'keys' or 'key' can be used to specify keys, and the keywords
'values', 'vals', or 'val' can be used to specify values. Compound
@@ -54,7 +54,7 @@ Documentation written by Tom Zanussi
'compatible' if the fields named in the trigger share the same
number and type of fields and those fields also have the same names.
Note that any two events always share the compatible 'hitcount' and
- 'stacktrace' fields and can therefore be combined using those
+ 'common_stacktrace' fields and can therefore be combined using those
fields, however pointless that may be.
'hist' triggers add a 'hist' file to each event's subdirectory.
@@ -547,9 +547,9 @@ Extended error information
the hist trigger display symbolic call_sites, we can have the hist
trigger additionally display the complete set of kernel stack traces
that led to each call_site. To do that, we simply use the special
- value 'stacktrace' for the key parameter::
+ value 'common_stacktrace' for the key parameter::
- # echo 'hist:keys=stacktrace:values=bytes_req,bytes_alloc:sort=bytes_alloc' > \
+ # echo 'hist:keys=common_stacktrace:values=bytes_req,bytes_alloc:sort=bytes_alloc' > \
/sys/kernel/tracing/events/kmem/kmalloc/trigger
The above trigger will use the kernel stack trace in effect when an
@@ -561,9 +561,9 @@ Extended error information
every callpath to a kmalloc for a kernel compile)::
# cat /sys/kernel/tracing/events/kmem/kmalloc/hist
- # trigger info: hist:keys=stacktrace:vals=bytes_req,bytes_alloc:sort=bytes_alloc:size=2048 [active]
+ # trigger info: hist:keys=common_stacktrace:vals=bytes_req,bytes_alloc:sort=bytes_alloc:size=2048 [active]
- { stacktrace:
+ { common_stacktrace:
__kmalloc_track_caller+0x10b/0x1a0
kmemdup+0x20/0x50
hidraw_report_event+0x8a/0x120 [hid]
@@ -581,7 +581,7 @@ Extended error information
cpu_startup_entry+0x315/0x3e0
rest_init+0x7c/0x80
} hitcount: 3 bytes_req: 21 bytes_alloc: 24
- { stacktrace:
+ { common_stacktrace:
__kmalloc_track_caller+0x10b/0x1a0
kmemdup+0x20/0x50
hidraw_report_event+0x8a/0x120 [hid]
@@ -596,7 +596,7 @@ Extended error information
do_IRQ+0x5a/0xf0
ret_from_intr+0x0/0x30
} hitcount: 3 bytes_req: 21 bytes_alloc: 24
- { stacktrace:
+ { common_stacktrace:
kmem_cache_alloc_trace+0xeb/0x150
aa_alloc_task_context+0x27/0x40
apparmor_cred_prepare+0x1f/0x50
@@ -608,7 +608,7 @@ Extended error information
.
.
.
- { stacktrace:
+ { common_stacktrace:
__kmalloc+0x11b/0x1b0
i915_gem_execbuffer2+0x6c/0x2c0 [i915]
drm_ioctl+0x349/0x670 [drm]
@@ -616,7 +616,7 @@ Extended error information
SyS_ioctl+0x81/0xa0
system_call_fastpath+0x12/0x6a
} hitcount: 17726 bytes_req: 13944120 bytes_alloc: 19593808
- { stacktrace:
+ { common_stacktrace:
__kmalloc+0x11b/0x1b0
load_elf_phdrs+0x76/0xa0
load_elf_binary+0x102/0x1650
@@ -625,7 +625,7 @@ Extended error information
SyS_execve+0x3a/0x50
return_from_execve+0x0/0x23
} hitcount: 33348 bytes_req: 17152128 bytes_alloc: 20226048
- { stacktrace:
+ { common_stacktrace:
kmem_cache_alloc_trace+0xeb/0x150
apparmor_file_alloc_security+0x27/0x40
security_file_alloc+0x16/0x20
@@ -636,7 +636,7 @@ Extended error information
SyS_open+0x1e/0x20
system_call_fastpath+0x12/0x6a
} hitcount: 4766422 bytes_req: 9532844 bytes_alloc: 38131376
- { stacktrace:
+ { common_stacktrace:
__kmalloc+0x11b/0x1b0
seq_buf_alloc+0x1b/0x50
seq_read+0x2cc/0x370
@@ -1026,7 +1026,7 @@ Extended error information
First we set up an initially paused stacktrace trigger on the
netif_receive_skb event::
- # echo 'hist:key=stacktrace:vals=len:pause' > \
+ # echo 'hist:key=common_stacktrace:vals=len:pause' > \
/sys/kernel/tracing/events/net/netif_receive_skb/trigger
Next, we set up an 'enable_hist' trigger on the sched_process_exec
@@ -1060,9 +1060,9 @@ Extended error information
$ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz
# cat /sys/kernel/tracing/events/net/netif_receive_skb/hist
- # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused]
+ # trigger info: hist:keys=common_stacktrace:vals=len:sort=hitcount:size=2048 [paused]
- { stacktrace:
+ { common_stacktrace:
__netif_receive_skb_core+0x46d/0x990
__netif_receive_skb+0x18/0x60
netif_receive_skb_internal+0x23/0x90
@@ -1079,7 +1079,7 @@ Extended error information
kthread+0xd2/0xf0
ret_from_fork+0x42/0x70
} hitcount: 85 len: 28884
- { stacktrace:
+ { common_stacktrace:
__netif_receive_skb_core+0x46d/0x990
__netif_receive_skb+0x18/0x60
netif_receive_skb_internal+0x23/0x90
@@ -1097,7 +1097,7 @@ Extended error information
irq_thread+0x11f/0x150
kthread+0xd2/0xf0
} hitcount: 98 len: 664329
- { stacktrace:
+ { common_stacktrace:
__netif_receive_skb_core+0x46d/0x990
__netif_receive_skb+0x18/0x60
process_backlog+0xa8/0x150
@@ -1115,7 +1115,7 @@ Extended error information
inet_sendmsg+0x64/0xa0
sock_sendmsg+0x3d/0x50
} hitcount: 115 len: 13030
- { stacktrace:
+ { common_stacktrace:
__netif_receive_skb_core+0x46d/0x990
__netif_receive_skb+0x18/0x60
netif_receive_skb_internal+0x23/0x90
@@ -1142,14 +1142,14 @@ Extended error information
into the histogram. In order to avoid having to set everything up
again, we can just clear the histogram first::
- # echo 'hist:key=stacktrace:vals=len:clear' >> \
+ # echo 'hist:key=common_stacktrace:vals=len:clear' >> \
/sys/kernel/tracing/events/net/netif_receive_skb/trigger
Just to verify that it is in fact cleared, here's what we now see in
the hist file::
# cat /sys/kernel/tracing/events/net/netif_receive_skb/hist
- # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused]
+ # trigger info: hist:keys=common_stacktrace:vals=len:sort=hitcount:size=2048 [paused]
Totals:
Hits: 0
@@ -1485,12 +1485,12 @@ Extended error information
And here's an example that shows how to combine histogram data from
any two events even if they don't share any 'compatible' fields
- other than 'hitcount' and 'stacktrace'. These commands create a
+ other than 'hitcount' and 'common_stacktrace'. These commands create a
couple of triggers named 'bar' using those fields::
- # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \
+ # echo 'hist:name=bar:key=common_stacktrace:val=hitcount' > \
/sys/kernel/tracing/events/sched/sched_process_fork/trigger
- # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \
+ # echo 'hist:name=bar:key=common_stacktrace:val=hitcount' > \
/sys/kernel/tracing/events/net/netif_rx/trigger
And displaying the output of either shows some interesting if
@@ -1501,16 +1501,16 @@ Extended error information
# event histogram
#
- # trigger info: hist:name=bar:keys=stacktrace:vals=hitcount:sort=hitcount:size=2048 [active]
+ # trigger info: hist:name=bar:keys=common_stacktrace:vals=hitcount:sort=hitcount:size=2048 [active]
#
- { stacktrace:
+ { common_stacktrace:
kernel_clone+0x18e/0x330
kernel_thread+0x29/0x30
kthreadd+0x154/0x1b0
ret_from_fork+0x3f/0x70
} hitcount: 1
- { stacktrace:
+ { common_stacktrace:
netif_rx_internal+0xb2/0xd0
netif_rx_ni+0x20/0x70
dev_loopback_xmit+0xaa/0xd0
@@ -1528,7 +1528,7 @@ Extended error information
call_cpuidle+0x3b/0x60
cpu_startup_entry+0x22d/0x310
} hitcount: 1
- { stacktrace:
+ { common_stacktrace:
netif_rx_internal+0xb2/0xd0
netif_rx_ni+0x20/0x70
dev_loopback_xmit+0xaa/0xd0
@@ -1543,7 +1543,7 @@ Extended error information
SyS_sendto+0xe/0x10
entry_SYSCALL_64_fastpath+0x12/0x6a
} hitcount: 2
- { stacktrace:
+ { common_stacktrace:
netif_rx_internal+0xb2/0xd0
netif_rx+0x1c/0x60
loopback_xmit+0x6c/0xb0
@@ -1561,7 +1561,7 @@ Extended error information
sock_sendmsg+0x38/0x50
___sys_sendmsg+0x14e/0x270
} hitcount: 76
- { stacktrace:
+ { common_stacktrace:
netif_rx_internal+0xb2/0xd0
netif_rx+0x1c/0x60
loopback_xmit+0x6c/0xb0
@@ -1579,7 +1579,7 @@ Extended error information
sock_sendmsg+0x38/0x50
___sys_sendmsg+0x269/0x270
} hitcount: 77
- { stacktrace:
+ { common_stacktrace:
netif_rx_internal+0xb2/0xd0
netif_rx+0x1c/0x60
loopback_xmit+0x6c/0xb0
@@ -1597,7 +1597,7 @@ Extended error information
sock_sendmsg+0x38/0x50
SYSC_sendto+0xef/0x170
} hitcount: 88
- { stacktrace:
+ { common_stacktrace:
kernel_clone+0x18e/0x330
SyS_clone+0x19/0x20
entry_SYSCALL_64_fastpath+0x12/0x6a
@@ -1949,7 +1949,7 @@ uninterruptible state::
# cd /sys/kernel/tracing
# echo 's:block_lat pid_t pid; u64 delta; unsigned long[] stack;' > dynamic_events
- # echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=stacktrace if prev_state == 2' >> events/sched/sched_switch/trigger
+ # echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=common_stacktrace if prev_state == 2' >> events/sched/sched_switch/trigger
# echo 'hist:keys=prev_pid:delta=common_timestamp.usecs-$ts,s=$st:onmax($delta).trace(block_lat,prev_pid,$delta,$s)' >> events/sched/sched_switch/trigger
# echo 1 > events/synthetic/block_lat/enable
# cat trace
diff --git a/Documentation/trace/user_events.rst b/Documentation/trace/user_events.rst
index f79987e16cf4..e7b07313550a 100644
--- a/Documentation/trace/user_events.rst
+++ b/Documentation/trace/user_events.rst
@@ -14,10 +14,6 @@ Programs can view status of the events via
/sys/kernel/tracing/user_events_status and can both register and write
data out via /sys/kernel/tracing/user_events_data.
-Programs can also use /sys/kernel/tracing/dynamic_events to register and
-delete user based events via the u: prefix. The format of the command to
-dynamic_events is the same as the ioctl with the u: prefix applied.
-
Typically programs will register a set of events that they wish to expose to
tools that can read trace_events (such as ftrace and perf). The registration
process tells the kernel which address and bit to reflect if any tool has
@@ -144,6 +140,9 @@ its name. Delete will only succeed if there are no references left to the
event (in both user and kernel space). User programs should use a separate file
to request deletes than the one used for registration due to this.
+**NOTE:** By default events will auto-delete when there are no references left
+to the event. Flags in the future may change this logic.
+
Unregistering
-------------
If after registering an event it is no longer wanted to be updated then it can
diff --git a/Documentation/translations/zh_CN/devicetree/usage-model.rst b/Documentation/translations/zh_CN/devicetree/usage-model.rst
index c6aee82c7e6e..19ba4ae0cd81 100644
--- a/Documentation/translations/zh_CN/devicetree/usage-model.rst
+++ b/Documentation/translations/zh_CN/devicetree/usage-model.rst
@@ -325,6 +325,6 @@ Primecell设备。然而,棘手的一点是,AMBA总线上的所有设备并é
当使用DT时,这给of_platform_populate()带æ¥äº†é—®é¢˜ï¼Œå› ä¸ºå®ƒå¿…须决定是å¦å°†
æ¯ä¸ªèŠ‚点注册为platform_device或amba_device。ä¸å¹¸çš„是,这使设备创建模型
-å˜å¾—有点å¤æ‚,但解决方案原æ¥å¹¶ä¸æ˜¯å¤ªå…·æœ‰ä¾µç•¥æ€§ã€‚如果一个节点与“arm,amba-primecellâ€
+å˜å¾—有点å¤æ‚,但解决方案原æ¥å¹¶ä¸æ˜¯å¤ªå…·æœ‰ä¾µç•¥æ€§ã€‚如果一个节点与“arm,primecellâ€
兼容,那么of_platform_populate()将把它注册为amba_device而ä¸æ˜¯
platform_device。
diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 176e8fc3f31b..4f7b23faebb9 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -363,7 +363,7 @@ Code Seq# Include File Comments
0xCC 00-0F drivers/misc/ibmvmc.h pseries VMC driver
0xCD 01 linux/reiserfs_fs.h
0xCE 01-02 uapi/linux/cxl_mem.h Compute Express Link Memory Devices
-0xCF 02 fs/cifs/ioctl.c
+0xCF 02 fs/smb/client/cifs_ioctl.h
0xDB 00-0F drivers/char/mwave/mwavepub.h
0xDD 00-3F ZFCP device driver see drivers/s390/scsi/
<mailto:aherrman@de.ibm.com>
diff --git a/MAINTAINERS b/MAINTAINERS
index e0ad886d3163..233b9a3b3193 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -956,7 +956,8 @@ F: Documentation/networking/device_drivers/ethernet/amazon/ena.rst
F: drivers/net/ethernet/amazon/
AMAZON RDMA EFA DRIVER
-M: Gal Pressman <galpress@amazon.com>
+M: Michael Margolin <mrgolin@amazon.com>
+R: Gal Pressman <gal.pressman@linux.dev>
R: Yossi Leybovich <sleybo@amazon.com>
L: linux-rdma@vger.kernel.org
S: Supported
@@ -1600,7 +1601,7 @@ F: drivers/media/i2c/ar0521.c
ARASAN NAND CONTROLLER DRIVER
M: Miquel Raynal <miquel.raynal@bootlin.com>
-M: Naga Sureshkumar Relli <nagasure@xilinx.com>
+R: Michal Simek <michal.simek@amd.com>
L: linux-mtd@lists.infradead.org
S: Maintained
F: Documentation/devicetree/bindings/mtd/arasan,nand-controller.yaml
@@ -1677,10 +1678,7 @@ F: drivers/power/reset/arm-versatile-reboot.c
F: drivers/soc/versatile/
ARM KOMEDA DRM-KMS DRIVER
-M: James (Qian) Wang <james.qian.wang@arm.com>
M: Liviu Dudau <liviu.dudau@arm.com>
-M: Mihail Atanassov <mihail.atanassov@arm.com>
-L: Mali DP Maintainers <malidp@foss.arm.com>
S: Supported
T: git git://anongit.freedesktop.org/drm/drm-misc
F: Documentation/devicetree/bindings/display/arm,komeda.yaml
@@ -1701,8 +1699,6 @@ F: include/uapi/drm/panfrost_drm.h
ARM MALI-DP DRM DRIVER
M: Liviu Dudau <liviu.dudau@arm.com>
-M: Brian Starkey <brian.starkey@arm.com>
-L: Mali DP Maintainers <malidp@foss.arm.com>
S: Supported
T: git git://anongit.freedesktop.org/drm/drm-misc
F: Documentation/devicetree/bindings/display/arm,malidp.yaml
@@ -1768,7 +1764,7 @@ F: include/linux/amba/mmci.h
ARM PRIMECELL PL35X NAND CONTROLLER DRIVER
M: Miquel Raynal <miquel.raynal@bootlin.com>
-M: Naga Sureshkumar Relli <nagasure@xilinx.com>
+R: Michal Simek <michal.simek@amd.com>
L: linux-mtd@lists.infradead.org
S: Maintained
F: Documentation/devicetree/bindings/mtd/arm,pl353-nand-r2p1.yaml
@@ -1776,7 +1772,7 @@ F: drivers/mtd/nand/raw/pl35x-nand-controller.c
ARM PRIMECELL PL35X SMC DRIVER
M: Miquel Raynal <miquel.raynal@bootlin.com>
-M: Naga Sureshkumar Relli <nagasure@xilinx.com>
+R: Michal Simek <michal.simek@amd.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
F: Documentation/devicetree/bindings/memory-controllers/arm,pl35x-smc.yaml
@@ -2434,6 +2430,15 @@ X: drivers/net/wireless/atmel/
N: at91
N: atmel
+ARM/MICROCHIP (ARM64) SoC support
+M: Conor Dooley <conor@kernel.org>
+M: Nicolas Ferre <nicolas.ferre@microchip.com>
+M: Claudiu Beznea <claudiu.beznea@microchip.com>
+L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S: Supported
+T: git https://git.kernel.org/pub/scm/linux/kernel/git/at91/linux.git
+F: arch/arm64/boot/dts/microchip/
+
ARM/Microchip Sparx5 SoC support
M: Lars Povlsen <lars.povlsen@microchip.com>
M: Steen Hegelund <Steen.Hegelund@microchip.com>
@@ -2441,8 +2446,7 @@ M: Daniel Machon <daniel.machon@microchip.com>
M: UNGLinuxDriver@microchip.com
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Supported
-T: git git://github.com/microchip-ung/linux-upstream.git
-F: arch/arm64/boot/dts/microchip/
+F: arch/arm64/boot/dts/microchip/sparx*
F: drivers/net/ethernet/microchip/vcap/
F: drivers/pinctrl/pinctrl-microchip-sgpio.c
N: sparx5
@@ -3541,7 +3545,7 @@ F: Documentation/filesystems/befs.rst
F: fs/befs/
BFQ I/O SCHEDULER
-M: Paolo Valente <paolo.valente@linaro.org>
+M: Paolo Valente <paolo.valente@unimore.it>
M: Jens Axboe <axboe@kernel.dk>
L: linux-block@vger.kernel.org
S: Maintained
@@ -4914,7 +4918,6 @@ F: drivers/media/cec/i2c/ch7322.c
CIRRUS LOGIC AUDIO CODEC DRIVERS
M: James Schulman <james.schulman@cirrus.com>
M: David Rhodes <david.rhodes@cirrus.com>
-M: Lucas Tanure <tanureal@opensource.cirrus.com>
M: Richard Fitzgerald <rf@opensource.cirrus.com>
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
L: patches@opensource.cirrus.com
@@ -5136,7 +5139,7 @@ X: drivers/clk/clkdev.c
COMMON INTERNET FILE SYSTEM CLIENT (CIFS and SMB3)
M: Steve French <sfrench@samba.org>
-R: Paulo Alcantara <pc@cjr.nz> (DFS, global name space)
+R: Paulo Alcantara <pc@manguebit.com> (DFS, global name space)
R: Ronnie Sahlberg <lsahlber@redhat.com> (directory leases, sparse files)
R: Shyam Prasad N <sprasad@microsoft.com> (multichannel)
R: Tom Talpey <tom@talpey.com> (RDMA, smbdirect)
@@ -5146,8 +5149,8 @@ S: Supported
W: https://wiki.samba.org/index.php/LinuxCIFS
T: git git://git.samba.org/sfrench/cifs-2.6.git
F: Documentation/admin-guide/cifs/
-F: fs/cifs/
-F: fs/smbfs_common/
+F: fs/smb/client/
+F: fs/smb/common/
F: include/uapi/linux/cifs
COMPACTPCI HOTPLUG CORE
@@ -5725,6 +5728,14 @@ F: include/linux/tfrc.h
F: include/uapi/linux/dccp.h
F: net/dccp/
+DEBUGOBJECTS:
+M: Thomas Gleixner <tglx@linutronix.de>
+L: linux-kernel@vger.kernel.org
+S: Maintained
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core/debugobjects
+F: lib/debugobjects.c
+F: include/linux/debugobjects.h
+
DECSTATION PLATFORM SUPPORT
M: "Maciej W. Rozycki" <macro@orcam.me.uk>
L: linux-mips@vger.kernel.org
@@ -6012,7 +6023,7 @@ W: http://www.dialog-semiconductor.com/products
F: Documentation/devicetree/bindings/input/da90??-onkey.txt
F: Documentation/devicetree/bindings/input/dlg,da72??.txt
F: Documentation/devicetree/bindings/mfd/da90*.txt
-F: Documentation/devicetree/bindings/mfd/da90*.yaml
+F: Documentation/devicetree/bindings/mfd/dlg,da90*.yaml
F: Documentation/devicetree/bindings/regulator/da92*.txt
F: Documentation/devicetree/bindings/regulator/dlg,da9*.yaml
F: Documentation/devicetree/bindings/regulator/slg51000.txt
@@ -6211,6 +6222,7 @@ X: Documentation/devicetree/
X: Documentation/driver-api/media/
X: Documentation/firmware-guide/acpi/
X: Documentation/i2c/
+X: Documentation/netlink/
X: Documentation/power/
X: Documentation/spi/
X: Documentation/userspace-api/media/
@@ -8158,6 +8170,7 @@ F: include/linux/spi/spi-fsl-dspi.h
FREESCALE ENETC ETHERNET DRIVERS
M: Claudiu Manoil <claudiu.manoil@nxp.com>
+M: Vladimir Oltean <vladimir.oltean@nxp.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/ethernet/freescale/enetc/
@@ -8786,6 +8799,7 @@ F: include/linux/gpio/regmap.h
GPIO SUBSYSTEM
M: Linus Walleij <linus.walleij@linaro.org>
M: Bartosz Golaszewski <brgl@bgdev.pl>
+R: Andy Shevchenko <andy@kernel.org>
L: linux-gpio@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git
@@ -9339,7 +9353,7 @@ F: include/linux/hisi_acc_qm.h
HISILICON ROCE DRIVER
M: Haoyue Xu <xuhaoyue1@hisilicon.com>
-M: Wenpeng Liang <liangwenpeng@huawei.com>
+M: Junxian Huang <huangjunxian6@hisilicon.com>
L: linux-rdma@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/infiniband/hisilicon-hns-roce.txt
@@ -9682,8 +9696,9 @@ F: include/uapi/linux/i2c-*.h
F: include/uapi/linux/i2c.h
I2C SUBSYSTEM HOST DRIVERS
+M: Andi Shyti <andi.shyti@kernel.org>
L: linux-i2c@vger.kernel.org
-S: Odd Fixes
+S: Maintained
W: https://i2c.wiki.kernel.org/
Q: https://patchwork.ozlabs.org/project/linux-i2c/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git
@@ -9957,8 +9972,9 @@ M: Miquel Raynal <miquel.raynal@bootlin.com>
L: linux-wpan@vger.kernel.org
S: Maintained
W: https://linux-wpan.org/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/sschmidt/wpan.git
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/sschmidt/wpan-next.git
+Q: https://patchwork.kernel.org/project/linux-wpan/list/
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/wpan/wpan.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/wpan/wpan-next.git
F: Documentation/networking/ieee802154.rst
F: drivers/net/ieee802154/
F: include/linux/ieee802154.h
@@ -10110,7 +10126,7 @@ S: Maintained
F: Documentation/process/kernel-docs.rst
INDUSTRY PACK SUBSYSTEM (IPACK)
-M: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+M: Vaibhav Gupta <vaibhavgupta40@gmail.com>
M: Jens Taprogge <jens.taprogge@taprogge.org>
M: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
L: industrypack-devel@lists.sourceforge.net
@@ -11259,6 +11275,10 @@ W: http://kernelnewbies.org/KernelJanitors
KERNEL NFSD, SUNRPC, AND LOCKD SERVERS
M: Chuck Lever <chuck.lever@oracle.com>
M: Jeff Layton <jlayton@kernel.org>
+R: Neil Brown <neilb@suse.de>
+R: Olga Kornievskaia <kolga@netapp.com>
+R: Dai Ngo <Dai.Ngo@oracle.com>
+R: Tom Talpey <tom@talpey.com>
L: linux-nfs@vger.kernel.org
S: Supported
W: http://nfs.sourceforge.net/
@@ -11305,9 +11325,9 @@ R: Tom Talpey <tom@talpey.com>
L: linux-cifs@vger.kernel.org
S: Maintained
T: git git://git.samba.org/ksmbd.git
-F: Documentation/filesystems/cifs/ksmbd.rst
-F: fs/ksmbd/
-F: fs/smbfs_common/
+F: Documentation/filesystems/smb/ksmbd.rst
+F: fs/smb/common/
+F: fs/smb/server/
KERNEL UNIT TESTING FRAMEWORK (KUnit)
M: Brendan Higgins <brendanhiggins@google.com>
@@ -13254,10 +13274,11 @@ F: drivers/memory/mtk-smi.c
F: include/soc/mediatek/smi.h
MEDIATEK SWITCH DRIVER
-M: Sean Wang <sean.wang@mediatek.com>
+M: Arınç ÜNAL <arinc.unal@arinc9.com>
+M: Daniel Golle <daniel@makrotopia.org>
M: Landen Chao <Landen.Chao@mediatek.com>
M: DENG Qingfang <dqfext@gmail.com>
-M: Daniel Golle <daniel@makrotopia.org>
+M: Sean Wang <sean.wang@mediatek.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/dsa/mt7530-mdio.c
@@ -13832,7 +13853,7 @@ F: drivers/tty/serial/8250/8250_pci1xxxx.c
MICROCHIP POLARFIRE FPGA DRIVERS
M: Conor Dooley <conor.dooley@microchip.com>
-R: Ivan Bornyakov <i.bornyakov@metrotek.ru>
+R: Vladimir Georgiev <v.georgiev@metrotek.ru>
L: linux-fpga@vger.kernel.org
S: Supported
F: Documentation/devicetree/bindings/fpga/microchip,mpf-spi-fpga-mgr.yaml
@@ -14566,6 +14587,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
F: Documentation/devicetree/bindings/net/
F: drivers/connector/
F: drivers/net/
+X: drivers/net/wireless/
F: include/dt-bindings/net/
F: include/linux/etherdevice.h
F: include/linux/fcdevice.h
@@ -14615,6 +14637,7 @@ B: mailto:netdev@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
F: Documentation/core-api/netlink.rst
+F: Documentation/netlink/
F: Documentation/networking/
F: Documentation/process/maintainer-netdev.rst
F: Documentation/userspace-api/netlink/
@@ -14629,6 +14652,7 @@ F: include/uapi/linux/netdevice.h
F: lib/net_utils.c
F: lib/random32.c
F: net/
+X: net/bluetooth/
F: tools/net/
F: tools/testing/selftests/net/
@@ -14928,6 +14952,7 @@ F: drivers/ntb/hw/intel/
NTFS FILESYSTEM
M: Anton Altaparmakov <anton@tuxera.com>
+R: Namjae Jeon <linkinjeon@kernel.org>
L: linux-ntfs-dev@lists.sourceforge.net
S: Supported
W: http://www.tuxera.com/
@@ -16365,7 +16390,7 @@ F: Documentation/devicetree/bindings/pci/intel,keembay-pcie*
F: drivers/pci/controller/dwc/pcie-keembay.c
PCIE DRIVER FOR INTEL LGM GW SOC
-M: Rahul Tanwar <rtanwar@maxlinear.com>
+M: Chuanhua Lei <lchuanhua@maxlinear.com>
L: linux-pci@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/pci/intel-gw-pcie.yaml
@@ -17808,7 +17833,7 @@ F: tools/testing/selftests/rtc/
Real-time Linux Analysis (RTLA) tools
M: Daniel Bristot de Oliveira <bristot@kernel.org>
M: Steven Rostedt <rostedt@goodmis.org>
-L: linux-trace-devel@vger.kernel.org
+L: linux-trace-kernel@vger.kernel.org
S: Maintained
F: Documentation/tools/rtla/
F: tools/tracing/rtla/
@@ -18029,6 +18054,14 @@ S: Maintained
F: Documentation/devicetree/bindings/usb/renesas,rzn1-usbf.yaml
F: drivers/usb/gadget/udc/renesas_usbf.c
+RENESAS RZ/V2M I2C DRIVER
+M: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
+L: linux-i2c@vger.kernel.org
+L: linux-renesas-soc@vger.kernel.org
+S: Supported
+F: Documentation/devicetree/bindings/i2c/renesas,rzv2m.yaml
+F: drivers/i2c/busses/i2c-rzv2m.c
+
RENESAS USB PHY DRIVER
M: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
L: linux-renesas-soc@vger.kernel.org
@@ -18370,7 +18403,7 @@ F: drivers/infiniband/ulp/rtrs/
RUNTIME VERIFICATION (RV)
M: Daniel Bristot de Oliveira <bristot@kernel.org>
M: Steven Rostedt <rostedt@goodmis.org>
-L: linux-trace-devel@vger.kernel.org
+L: linux-trace-kernel@vger.kernel.org
S: Maintained
F: Documentation/trace/rv/
F: include/linux/rv.h
@@ -18575,10 +18608,9 @@ F: Documentation/admin-guide/LSM/SafeSetID.rst
F: security/safesetid/
SAMSUNG AUDIO (ASoC) DRIVERS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
M: Sylwester Nawrocki <s.nawrocki@samsung.com>
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
-S: Supported
+S: Maintained
B: mailto:linux-samsung-soc@vger.kernel.org
F: Documentation/devicetree/bindings/sound/samsung*
F: sound/soc/samsung/
@@ -18706,7 +18738,6 @@ F: include/dt-bindings/clock/samsung,*.h
F: include/linux/clk/samsung.h
SAMSUNG SPI DRIVERS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
M: Andi Shyti <andi.shyti@kernel.org>
L: linux-spi@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
@@ -18842,12 +18873,11 @@ F: drivers/target/
F: include/target/
SCTP PROTOCOL
-M: Neil Horman <nhorman@tuxdriver.com>
M: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
M: Xin Long <lucien.xin@gmail.com>
L: linux-sctp@vger.kernel.org
S: Maintained
-W: http://lksctp.sourceforge.net
+W: https://github.com/sctp/lksctp-tools/wiki
F: Documentation/networking/sctp.rst
F: include/linux/sctp.h
F: include/net/sctp/
@@ -19116,6 +19146,9 @@ SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS
M: Karsten Graul <kgraul@linux.ibm.com>
M: Wenjia Zhang <wenjia@linux.ibm.com>
M: Jan Karcher <jaka@linux.ibm.com>
+R: D. Wythe <alibuda@linux.alibaba.com>
+R: Tony Lu <tonylu@linux.alibaba.com>
+R: Wen Gu <guwen@linux.alibaba.com>
L: linux-s390@vger.kernel.org
S: Supported
F: net/smc/
diff --git a/Makefile b/Makefile
index f836936fb4d8..e51e4d9174ab 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 4
SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION =
NAME = Hurr durr I'ma ninja sloth
# *DOCUMENTATION*
diff --git a/arch/arm/boot/dts/am57xx-cl-som-am57x.dts b/arch/arm/boot/dts/am57xx-cl-som-am57x.dts
index 2fc9a5d5e0c0..625b9b311b49 100644
--- a/arch/arm/boot/dts/am57xx-cl-som-am57x.dts
+++ b/arch/arm/boot/dts/am57xx-cl-som-am57x.dts
@@ -527,7 +527,7 @@
interrupt-parent = <&gpio1>;
interrupts = <31 0>;
- pendown-gpio = <&gpio1 31 0>;
+ pendown-gpio = <&gpio1 31 GPIO_ACTIVE_LOW>;
ti,x-min = /bits/ 16 <0x0>;
diff --git a/arch/arm/boot/dts/at91-sama7g5ek.dts b/arch/arm/boot/dts/at91-sama7g5ek.dts
index aa5cc0e98bba..217e9b96c61e 100644
--- a/arch/arm/boot/dts/at91-sama7g5ek.dts
+++ b/arch/arm/boot/dts/at91-sama7g5ek.dts
@@ -792,7 +792,7 @@
};
&shdwc {
- atmel,shdwc-debouncer = <976>;
+ debounce-delay-us = <976>;
status = "okay";
input@0 {
diff --git a/arch/arm/boot/dts/at91sam9261ek.dts b/arch/arm/boot/dts/at91sam9261ek.dts
index 88869ca874d1..045cb253f23a 100644
--- a/arch/arm/boot/dts/at91sam9261ek.dts
+++ b/arch/arm/boot/dts/at91sam9261ek.dts
@@ -156,7 +156,7 @@
compatible = "ti,ads7843";
interrupts-extended = <&pioC 2 IRQ_TYPE_EDGE_BOTH>;
spi-max-frequency = <3000000>;
- pendown-gpio = <&pioC 2 GPIO_ACTIVE_HIGH>;
+ pendown-gpio = <&pioC 2 GPIO_ACTIVE_LOW>;
ti,x-min = /bits/ 16 <150>;
ti,x-max = /bits/ 16 <3830>;
diff --git a/arch/arm/boot/dts/imx6qdl-mba6.dtsi b/arch/arm/boot/dts/imx6qdl-mba6.dtsi
index 78555a618851..7b7e6c2ad190 100644
--- a/arch/arm/boot/dts/imx6qdl-mba6.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-mba6.dtsi
@@ -209,6 +209,7 @@
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_pcie>;
reset-gpio = <&gpio6 7 GPIO_ACTIVE_LOW>;
+ vpcie-supply = <&reg_pcie>;
status = "okay";
};
diff --git a/arch/arm/boot/dts/imx6ull-dhcor-som.dtsi b/arch/arm/boot/dts/imx6ull-dhcor-som.dtsi
index 5882c7565f64..32a6022625d9 100644
--- a/arch/arm/boot/dts/imx6ull-dhcor-som.dtsi
+++ b/arch/arm/boot/dts/imx6ull-dhcor-som.dtsi
@@ -8,6 +8,7 @@
#include <dt-bindings/input/input.h>
#include <dt-bindings/leds/common.h>
#include <dt-bindings/pwm/pwm.h>
+#include <dt-bindings/regulator/dlg,da9063-regulator.h>
#include "imx6ull.dtsi"
/ {
@@ -84,16 +85,20 @@
regulators {
vdd_soc_in_1v4: buck1 {
+ regulator-allowed-modes = <DA9063_BUCK_MODE_SLEEP>; /* PFM */
regulator-always-on;
regulator-boot-on;
+ regulator-initial-mode = <DA9063_BUCK_MODE_SLEEP>;
regulator-max-microvolt = <1400000>;
regulator-min-microvolt = <1400000>;
regulator-name = "vdd_soc_in_1v4";
};
vcc_3v3: buck2 {
+ regulator-allowed-modes = <DA9063_BUCK_MODE_SYNC>; /* PWM */
regulator-always-on;
regulator-boot-on;
+ regulator-initial-mode = <DA9063_BUCK_MODE_SYNC>;
regulator-max-microvolt = <3300000>;
regulator-min-microvolt = <3300000>;
regulator-name = "vcc_3v3";
@@ -106,8 +111,10 @@
* the voltage is set to 1.5V.
*/
vcc_ddr_1v35: buck3 {
+ regulator-allowed-modes = <DA9063_BUCK_MODE_SYNC>; /* PWM */
regulator-always-on;
regulator-boot-on;
+ regulator-initial-mode = <DA9063_BUCK_MODE_SYNC>;
regulator-max-microvolt = <1500000>;
regulator-min-microvolt = <1500000>;
regulator-name = "vcc_ddr_1v35";
diff --git a/arch/arm/boot/dts/imx7d-pico-hobbit.dts b/arch/arm/boot/dts/imx7d-pico-hobbit.dts
index d917dc4f2f22..6ad39dca7009 100644
--- a/arch/arm/boot/dts/imx7d-pico-hobbit.dts
+++ b/arch/arm/boot/dts/imx7d-pico-hobbit.dts
@@ -64,7 +64,7 @@
interrupt-parent = <&gpio2>;
interrupts = <7 0>;
spi-max-frequency = <1000000>;
- pendown-gpio = <&gpio2 7 0>;
+ pendown-gpio = <&gpio2 7 GPIO_ACTIVE_LOW>;
vcc-supply = <&reg_3p3v>;
ti,x-min = /bits/ 16 <0>;
ti,x-max = /bits/ 16 <4095>;
diff --git a/arch/arm/boot/dts/imx7d-sdb.dts b/arch/arm/boot/dts/imx7d-sdb.dts
index f483bc0afe5e..234e5fc647b2 100644
--- a/arch/arm/boot/dts/imx7d-sdb.dts
+++ b/arch/arm/boot/dts/imx7d-sdb.dts
@@ -205,7 +205,7 @@
pinctrl-0 = <&pinctrl_tsc2046_pendown>;
interrupt-parent = <&gpio2>;
interrupts = <29 0>;
- pendown-gpio = <&gpio2 29 GPIO_ACTIVE_HIGH>;
+ pendown-gpio = <&gpio2 29 GPIO_ACTIVE_LOW>;
touchscreen-max-pressure = <255>;
wakeup-source;
};
diff --git a/arch/arm/boot/dts/omap3-cm-t3x.dtsi b/arch/arm/boot/dts/omap3-cm-t3x.dtsi
index e61b8a2bfb7d..51baedf1603b 100644
--- a/arch/arm/boot/dts/omap3-cm-t3x.dtsi
+++ b/arch/arm/boot/dts/omap3-cm-t3x.dtsi
@@ -227,7 +227,7 @@
interrupt-parent = <&gpio2>;
interrupts = <25 0>; /* gpio_57 */
- pendown-gpio = <&gpio2 25 GPIO_ACTIVE_HIGH>;
+ pendown-gpio = <&gpio2 25 GPIO_ACTIVE_LOW>;
ti,x-min = /bits/ 16 <0x0>;
ti,x-max = /bits/ 16 <0x0fff>;
diff --git a/arch/arm/boot/dts/omap3-devkit8000-lcd-common.dtsi b/arch/arm/boot/dts/omap3-devkit8000-lcd-common.dtsi
index 3decc2d78a6c..a7f99ae0c1fe 100644
--- a/arch/arm/boot/dts/omap3-devkit8000-lcd-common.dtsi
+++ b/arch/arm/boot/dts/omap3-devkit8000-lcd-common.dtsi
@@ -54,7 +54,7 @@
interrupt-parent = <&gpio1>;
interrupts = <27 0>; /* gpio_27 */
- pendown-gpio = <&gpio1 27 GPIO_ACTIVE_HIGH>;
+ pendown-gpio = <&gpio1 27 GPIO_ACTIVE_LOW>;
ti,x-min = /bits/ 16 <0x0>;
ti,x-max = /bits/ 16 <0x0fff>;
diff --git a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
index c595afe4181d..d310b5c7bac3 100644
--- a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
+++ b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
@@ -311,7 +311,7 @@
interrupt-parent = <&gpio1>;
interrupts = <8 0>; /* boot6 / gpio_8 */
spi-max-frequency = <1000000>;
- pendown-gpio = <&gpio1 8 GPIO_ACTIVE_HIGH>;
+ pendown-gpio = <&gpio1 8 GPIO_ACTIVE_LOW>;
vcc-supply = <&reg_vcc3>;
pinctrl-names = "default";
pinctrl-0 = <&tsc2048_pins>;
diff --git a/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi b/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi
index 1d6e88f99eb3..c3570acc35fa 100644
--- a/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi
+++ b/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi
@@ -149,7 +149,7 @@
interrupt-parent = <&gpio4>;
interrupts = <18 0>; /* gpio_114 */
- pendown-gpio = <&gpio4 18 GPIO_ACTIVE_HIGH>;
+ pendown-gpio = <&gpio4 18 GPIO_ACTIVE_LOW>;
ti,x-min = /bits/ 16 <0x0>;
ti,x-max = /bits/ 16 <0x0fff>;
diff --git a/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi b/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi
index 7e30f9d45790..d95a0e130058 100644
--- a/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi
+++ b/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi
@@ -160,7 +160,7 @@
interrupt-parent = <&gpio4>;
interrupts = <18 0>; /* gpio_114 */
- pendown-gpio = <&gpio4 18 GPIO_ACTIVE_HIGH>;
+ pendown-gpio = <&gpio4 18 GPIO_ACTIVE_LOW>;
ti,x-min = /bits/ 16 <0x0>;
ti,x-max = /bits/ 16 <0x0fff>;
diff --git a/arch/arm/boot/dts/omap3-pandora-common.dtsi b/arch/arm/boot/dts/omap3-pandora-common.dtsi
index 559853764487..4c3b6bab179c 100644
--- a/arch/arm/boot/dts/omap3-pandora-common.dtsi
+++ b/arch/arm/boot/dts/omap3-pandora-common.dtsi
@@ -651,7 +651,7 @@
pinctrl-0 = <&penirq_pins>;
interrupt-parent = <&gpio3>;
interrupts = <30 IRQ_TYPE_NONE>; /* GPIO_94 */
- pendown-gpio = <&gpio3 30 GPIO_ACTIVE_HIGH>;
+ pendown-gpio = <&gpio3 30 GPIO_ACTIVE_LOW>;
vcc-supply = <&vaux4>;
ti,x-min = /bits/ 16 <0>;
diff --git a/arch/arm/boot/dts/omap5-cm-t54.dts b/arch/arm/boot/dts/omap5-cm-t54.dts
index 2d87b9fc230e..af288d63a26a 100644
--- a/arch/arm/boot/dts/omap5-cm-t54.dts
+++ b/arch/arm/boot/dts/omap5-cm-t54.dts
@@ -354,7 +354,7 @@
interrupt-parent = <&gpio1>;
interrupts = <15 0>; /* gpio1_wk15 */
- pendown-gpio = <&gpio1 15 GPIO_ACTIVE_HIGH>;
+ pendown-gpio = <&gpio1 15 GPIO_ACTIVE_LOW>;
ti,x-min = /bits/ 16 <0x0>;
diff --git a/arch/arm/boot/dts/qcom-apq8026-asus-sparrow.dts b/arch/arm/boot/dts/qcom-apq8026-asus-sparrow.dts
index 7a80e1c9f126..aa0e0e8d2a97 100644
--- a/arch/arm/boot/dts/qcom-apq8026-asus-sparrow.dts
+++ b/arch/arm/boot/dts/qcom-apq8026-asus-sparrow.dts
@@ -268,7 +268,6 @@
function = "gpio";
drive-strength = <8>;
bias-disable;
- input-enable;
};
wlan_hostwake_default_state: wlan-hostwake-default-state {
@@ -276,7 +275,6 @@
function = "gpio";
drive-strength = <2>;
bias-disable;
- input-enable;
};
wlan_regulator_default_state: wlan-regulator-default-state {
diff --git a/arch/arm/boot/dts/qcom-apq8026-huawei-sturgeon.dts b/arch/arm/boot/dts/qcom-apq8026-huawei-sturgeon.dts
index d64096028ab1..5593a3a60d6c 100644
--- a/arch/arm/boot/dts/qcom-apq8026-huawei-sturgeon.dts
+++ b/arch/arm/boot/dts/qcom-apq8026-huawei-sturgeon.dts
@@ -352,7 +352,6 @@
function = "gpio";
drive-strength = <2>;
bias-disable;
- input-enable;
};
wlan_regulator_default_state: wlan-regulator-default-state {
diff --git a/arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts b/arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts
index b82381229adf..b887e5361ec3 100644
--- a/arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts
+++ b/arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts
@@ -307,7 +307,6 @@
function = "gpio";
drive-strength = <2>;
bias-disable;
- input-enable;
};
touch_pins: touch-state {
@@ -317,7 +316,6 @@
drive-strength = <8>;
bias-pull-down;
- input-enable;
};
reset-pins {
@@ -335,7 +333,6 @@
function = "gpio";
drive-strength = <2>;
bias-disable;
- input-enable;
};
wlan_regulator_default_state: wlan-regulator-default-state {
diff --git a/arch/arm/boot/dts/qcom-apq8064.dtsi b/arch/arm/boot/dts/qcom-apq8064.dtsi
index 672b246afbba..d2289205ff81 100644
--- a/arch/arm/boot/dts/qcom-apq8064.dtsi
+++ b/arch/arm/boot/dts/qcom-apq8064.dtsi
@@ -83,6 +83,7 @@
L2: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
idle-states {
diff --git a/arch/arm/boot/dts/qcom-apq8084.dtsi b/arch/arm/boot/dts/qcom-apq8084.dtsi
index b653ea40c441..83839e1ec4d1 100644
--- a/arch/arm/boot/dts/qcom-apq8084.dtsi
+++ b/arch/arm/boot/dts/qcom-apq8084.dtsi
@@ -74,6 +74,7 @@
L2: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
qcom,saw = <&saw_l2>;
};
diff --git a/arch/arm/boot/dts/qcom-ipq4019.dtsi b/arch/arm/boot/dts/qcom-ipq4019.dtsi
index dfcfb3339c23..f0ef86fadc9d 100644
--- a/arch/arm/boot/dts/qcom-ipq4019.dtsi
+++ b/arch/arm/boot/dts/qcom-ipq4019.dtsi
@@ -102,6 +102,7 @@
L2: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
qcom,saw = <&saw_l2>;
};
};
diff --git a/arch/arm/boot/dts/qcom-ipq8064.dtsi b/arch/arm/boot/dts/qcom-ipq8064.dtsi
index af6764770fd1..7581845737a8 100644
--- a/arch/arm/boot/dts/qcom-ipq8064.dtsi
+++ b/arch/arm/boot/dts/qcom-ipq8064.dtsi
@@ -45,6 +45,7 @@
L2: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
diff --git a/arch/arm/boot/dts/qcom-mdm9615-wp8548-mangoh-green.dts b/arch/arm/boot/dts/qcom-mdm9615-wp8548-mangoh-green.dts
index a8304769b509..b269fdca1460 100644
--- a/arch/arm/boot/dts/qcom-mdm9615-wp8548-mangoh-green.dts
+++ b/arch/arm/boot/dts/qcom-mdm9615-wp8548-mangoh-green.dts
@@ -49,7 +49,6 @@
gpioext1-pins {
pins = "gpio2";
function = "gpio";
- input-enable;
bias-disable;
};
};
diff --git a/arch/arm/boot/dts/qcom-msm8660.dtsi b/arch/arm/boot/dts/qcom-msm8660.dtsi
index f601b40ebcf4..78023ed2fdf7 100644
--- a/arch/arm/boot/dts/qcom-msm8660.dtsi
+++ b/arch/arm/boot/dts/qcom-msm8660.dtsi
@@ -36,6 +36,7 @@
L2: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
diff --git a/arch/arm/boot/dts/qcom-msm8960.dtsi b/arch/arm/boot/dts/qcom-msm8960.dtsi
index 2a668cd535cc..616fef2ea682 100644
--- a/arch/arm/boot/dts/qcom-msm8960.dtsi
+++ b/arch/arm/boot/dts/qcom-msm8960.dtsi
@@ -42,6 +42,7 @@
L2: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
diff --git a/arch/arm/boot/dts/qcom-msm8974-lge-nexus5-hammerhead.dts b/arch/arm/boot/dts/qcom-msm8974-lge-nexus5-hammerhead.dts
index ab35f2d644c0..861695cecf84 100644
--- a/arch/arm/boot/dts/qcom-msm8974-lge-nexus5-hammerhead.dts
+++ b/arch/arm/boot/dts/qcom-msm8974-lge-nexus5-hammerhead.dts
@@ -592,7 +592,6 @@
pins = "gpio73";
function = "gpio";
bias-disable;
- input-enable;
};
touch_pin: touch-state {
@@ -602,7 +601,6 @@
drive-strength = <2>;
bias-disable;
- input-enable;
};
reset-pins {
diff --git a/arch/arm/boot/dts/qcom-msm8974-sony-xperia-rhine.dtsi b/arch/arm/boot/dts/qcom-msm8974-sony-xperia-rhine.dtsi
index d3bec03b126c..68a2f9094e53 100644
--- a/arch/arm/boot/dts/qcom-msm8974-sony-xperia-rhine.dtsi
+++ b/arch/arm/boot/dts/qcom-msm8974-sony-xperia-rhine.dtsi
@@ -433,7 +433,6 @@
function = "gpio";
drive-strength = <2>;
bias-disable;
- input-enable;
};
sdc1_on: sdc1-on-state {
diff --git a/arch/arm/boot/dts/qcom-msm8974.dtsi b/arch/arm/boot/dts/qcom-msm8974.dtsi
index 8208012684d4..7ed0d925a4e9 100644
--- a/arch/arm/boot/dts/qcom-msm8974.dtsi
+++ b/arch/arm/boot/dts/qcom-msm8974.dtsi
@@ -80,6 +80,7 @@
L2: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
qcom,saw = <&saw_l2>;
};
diff --git a/arch/arm/boot/dts/qcom-msm8974pro-oneplus-bacon.dts b/arch/arm/boot/dts/qcom-msm8974pro-oneplus-bacon.dts
index 8d2a054d8fee..8230d0e1d95d 100644
--- a/arch/arm/boot/dts/qcom-msm8974pro-oneplus-bacon.dts
+++ b/arch/arm/boot/dts/qcom-msm8974pro-oneplus-bacon.dts
@@ -461,7 +461,6 @@
function = "gpio";
drive-strength = <2>;
bias-disable;
- input-enable;
};
reset-pins {
diff --git a/arch/arm/boot/dts/qcom-msm8974pro-samsung-klte.dts b/arch/arm/boot/dts/qcom-msm8974pro-samsung-klte.dts
index b9698ffb66ca..eb505d6d7f31 100644
--- a/arch/arm/boot/dts/qcom-msm8974pro-samsung-klte.dts
+++ b/arch/arm/boot/dts/qcom-msm8974pro-samsung-klte.dts
@@ -704,7 +704,6 @@
pins = "gpio75";
function = "gpio";
drive-strength = <16>;
- input-enable;
};
devwake-pins {
@@ -760,14 +759,12 @@
i2c_touchkey_pins: i2c-touchkey-state {
pins = "gpio95", "gpio96";
function = "gpio";
- input-enable;
bias-pull-up;
};
i2c_led_gpioex_pins: i2c-led-gpioex-state {
pins = "gpio120", "gpio121";
function = "gpio";
- input-enable;
bias-pull-down;
};
@@ -781,7 +778,6 @@
wifi_pin: wifi-state {
pins = "gpio92";
function = "gpio";
- input-enable;
bias-pull-down;
};
diff --git a/arch/arm/boot/dts/qcom-msm8974pro-sony-xperia-shinano-castor.dts b/arch/arm/boot/dts/qcom-msm8974pro-sony-xperia-shinano-castor.dts
index 04bc58d87abf..0f650ed31005 100644
--- a/arch/arm/boot/dts/qcom-msm8974pro-sony-xperia-shinano-castor.dts
+++ b/arch/arm/boot/dts/qcom-msm8974pro-sony-xperia-shinano-castor.dts
@@ -631,7 +631,6 @@
function = "gpio";
drive-strength = <2>;
bias-disable;
- input-enable;
};
bt_host_wake_pin: bt-host-wake-state {
diff --git a/arch/arm/boot/dts/stm32f429.dtsi b/arch/arm/boot/dts/stm32f429.dtsi
index c9e05e3540d6..00bf53f99c29 100644
--- a/arch/arm/boot/dts/stm32f429.dtsi
+++ b/arch/arm/boot/dts/stm32f429.dtsi
@@ -387,6 +387,7 @@
interrupt-names = "tx", "rx0", "rx1", "sce";
resets = <&rcc STM32F4_APB1_RESET(CAN2)>;
clocks = <&rcc 0 STM32F4_APB1_CLOCK(CAN2)>;
+ st,can-secondary;
st,gcan = <&gcan>;
status = "disabled";
};
diff --git a/arch/arm/boot/dts/stm32f7-pinctrl.dtsi b/arch/arm/boot/dts/stm32f7-pinctrl.dtsi
index c8e6c52fb248..9f65403295ca 100644
--- a/arch/arm/boot/dts/stm32f7-pinctrl.dtsi
+++ b/arch/arm/boot/dts/stm32f7-pinctrl.dtsi
@@ -283,6 +283,88 @@
slew-rate = <2>;
};
};
+
+ can1_pins_a: can1-0 {
+ pins1 {
+ pinmux = <STM32_PINMUX('A', 12, AF9)>; /* CAN1_TX */
+ };
+ pins2 {
+ pinmux = <STM32_PINMUX('A', 11, AF9)>; /* CAN1_RX */
+ bias-pull-up;
+ };
+ };
+
+ can1_pins_b: can1-1 {
+ pins1 {
+ pinmux = <STM32_PINMUX('B', 9, AF9)>; /* CAN1_TX */
+ };
+ pins2 {
+ pinmux = <STM32_PINMUX('B', 8, AF9)>; /* CAN1_RX */
+ bias-pull-up;
+ };
+ };
+
+ can1_pins_c: can1-2 {
+ pins1 {
+ pinmux = <STM32_PINMUX('D', 1, AF9)>; /* CAN1_TX */
+ };
+ pins2 {
+ pinmux = <STM32_PINMUX('D', 0, AF9)>; /* CAN1_RX */
+ bias-pull-up;
+
+ };
+ };
+
+ can1_pins_d: can1-3 {
+ pins1 {
+ pinmux = <STM32_PINMUX('H', 13, AF9)>; /* CAN1_TX */
+ };
+ pins2 {
+ pinmux = <STM32_PINMUX('H', 14, AF9)>; /* CAN1_RX */
+ bias-pull-up;
+
+ };
+ };
+
+ can2_pins_a: can2-0 {
+ pins1 {
+ pinmux = <STM32_PINMUX('B', 6, AF9)>; /* CAN2_TX */
+ };
+ pins2 {
+ pinmux = <STM32_PINMUX('B', 5, AF9)>; /* CAN2_RX */
+ bias-pull-up;
+ };
+ };
+
+ can2_pins_b: can2-1 {
+ pins1 {
+ pinmux = <STM32_PINMUX('B', 13, AF9)>; /* CAN2_TX */
+ };
+ pins2 {
+ pinmux = <STM32_PINMUX('B', 12, AF9)>; /* CAN2_RX */
+ bias-pull-up;
+ };
+ };
+
+ can3_pins_a: can3-0 {
+ pins1 {
+ pinmux = <STM32_PINMUX('A', 15, AF11)>; /* CAN3_TX */
+ };
+ pins2 {
+ pinmux = <STM32_PINMUX('A', 8, AF11)>; /* CAN3_RX */
+ bias-pull-up;
+ };
+ };
+
+ can3_pins_b: can3-1 {
+ pins1 {
+ pinmux = <STM32_PINMUX('B', 4, AF11)>; /* CAN3_TX */
+ };
+ pins2 {
+ pinmux = <STM32_PINMUX('B', 3, AF11)>; /* CAN3_RX */
+ bias-pull-up;
+ };
+ };
};
};
};
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca5s.dts b/arch/arm/boot/dts/vexpress-v2p-ca5s.dts
index 3b88209bacea..ff1f9a1bcfcf 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca5s.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca5s.dts
@@ -132,6 +132,7 @@
reg = <0x2c0f0000 0x1000>;
interrupts = <0 84 4>;
cache-level = <2>;
+ cache-unified;
};
pmu {
diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h
index 78d3d4b82c6c..f3cd04ff022d 100644
--- a/arch/arm/include/asm/arm_pmuv3.h
+++ b/arch/arm/include/asm/arm_pmuv3.h
@@ -92,7 +92,7 @@
#define RETURN_READ_PMEVCNTRN(n) \
return read_sysreg(PMEVCNTR##n)
-static unsigned long read_pmevcntrn(int n)
+static inline unsigned long read_pmevcntrn(int n)
{
PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN);
return 0;
@@ -100,14 +100,14 @@ static unsigned long read_pmevcntrn(int n)
#define WRITE_PMEVCNTRN(n) \
write_sysreg(val, PMEVCNTR##n)
-static void write_pmevcntrn(int n, unsigned long val)
+static inline void write_pmevcntrn(int n, unsigned long val)
{
PMEVN_SWITCH(n, WRITE_PMEVCNTRN);
}
#define WRITE_PMEVTYPERN(n) \
write_sysreg(val, PMEVTYPER##n)
-static void write_pmevtypern(int n, unsigned long val)
+static inline void write_pmevtypern(int n, unsigned long val)
{
PMEVN_SWITCH(n, WRITE_PMEVTYPERN);
}
@@ -222,6 +222,11 @@ static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
return false;
}
+static inline bool kvm_set_pmuserenr(u64 val)
+{
+ return false;
+}
+
/* PMU Version in DFR Register */
#define ARMV8_PMU_DFR_VER_NI 0
#define ARMV8_PMU_DFR_VER_V3P4 0x5
diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index 60dc56d8acfb..437dd0352fd4 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -334,16 +334,14 @@ static bool at91_pm_eth_quirk_is_valid(struct at91_pm_quirk_eth *eth)
pdev = of_find_device_by_node(eth->np);
if (!pdev)
return false;
+ /* put_device(eth->dev) is called at the end of suspend. */
eth->dev = &pdev->dev;
}
/* No quirks if device isn't a wakeup source. */
- if (!device_may_wakeup(eth->dev)) {
- put_device(eth->dev);
+ if (!device_may_wakeup(eth->dev))
return false;
- }
- /* put_device(eth->dev) is called at the end of suspend. */
return true;
}
@@ -439,14 +437,14 @@ clk_unconfigure:
pr_err("AT91: PM: failed to enable %s clocks\n",
j == AT91_PM_G_ETH ? "geth" : "eth");
}
- } else {
- /*
- * Release the reference to eth->dev taken in
- * at91_pm_eth_quirk_is_valid().
- */
- put_device(eth->dev);
- eth->dev = NULL;
}
+
+ /*
+ * Release the reference to eth->dev taken in
+ * at91_pm_eth_quirk_is_valid().
+ */
+ put_device(eth->dev);
+ eth->dev = NULL;
}
return ret;
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b1201d25a8a4..343e1e1cae10 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1516,7 +1516,7 @@ config XEN
# 16K | 27 | 14 | 13 | 11 |
# 64K | 29 | 16 | 13 | 13 |
config ARCH_FORCE_MAX_ORDER
- int "Order of maximal physically contiguous allocations" if EXPERT && (ARM64_4K_PAGES || ARM64_16K_PAGES)
+ int
default "13" if ARM64_64K_PAGES
default "11" if ARM64_16K_PAGES
default "10"
diff --git a/arch/arm64/boot/dts/arm/foundation-v8.dtsi b/arch/arm64/boot/dts/arm/foundation-v8.dtsi
index 029578072d8f..7b41537731a6 100644
--- a/arch/arm64/boot/dts/arm/foundation-v8.dtsi
+++ b/arch/arm64/boot/dts/arm/foundation-v8.dtsi
@@ -59,6 +59,7 @@
L2_0: l2-cache0 {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
diff --git a/arch/arm64/boot/dts/arm/rtsm_ve-aemv8a.dts b/arch/arm64/boot/dts/arm/rtsm_ve-aemv8a.dts
index ef68f5aae7dd..afdf954206f1 100644
--- a/arch/arm64/boot/dts/arm/rtsm_ve-aemv8a.dts
+++ b/arch/arm64/boot/dts/arm/rtsm_ve-aemv8a.dts
@@ -72,6 +72,7 @@
L2_0: l2-cache0 {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
diff --git a/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts b/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts
index 796cd7d02eb5..7bdeb965f0a9 100644
--- a/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts
+++ b/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts
@@ -58,6 +58,7 @@
L2_0: l2-cache0 {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
index 2209c1ac6e9b..e62a43591361 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
@@ -171,6 +171,7 @@ conn_subsys: bus@5b000000 {
interrupt-names = "host", "peripheral", "otg", "wakeup";
phys = <&usb3_phy>;
phy-names = "cdns3,usb3-phy";
+ cdns,on-chip-buff-size = /bits/ 16 <18>;
status = "disabled";
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
index 2dce8f2ee3ea..adb98a72bdfd 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
@@ -90,6 +90,8 @@ dma_subsys: bus@5a000000 {
clocks = <&uart0_lpcg IMX_LPCG_CLK_4>,
<&uart0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "baud";
+ assigned-clocks = <&clk IMX_SC_R_UART_0 IMX_SC_PM_CLK_PER>;
+ assigned-clock-rates = <80000000>;
power-domains = <&pd IMX_SC_R_UART_0>;
status = "disabled";
};
@@ -100,6 +102,8 @@ dma_subsys: bus@5a000000 {
clocks = <&uart1_lpcg IMX_LPCG_CLK_4>,
<&uart1_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "baud";
+ assigned-clocks = <&clk IMX_SC_R_UART_1 IMX_SC_PM_CLK_PER>;
+ assigned-clock-rates = <80000000>;
power-domains = <&pd IMX_SC_R_UART_1>;
status = "disabled";
};
@@ -110,6 +114,8 @@ dma_subsys: bus@5a000000 {
clocks = <&uart2_lpcg IMX_LPCG_CLK_4>,
<&uart2_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "baud";
+ assigned-clocks = <&clk IMX_SC_R_UART_2 IMX_SC_PM_CLK_PER>;
+ assigned-clock-rates = <80000000>;
power-domains = <&pd IMX_SC_R_UART_2>;
status = "disabled";
};
@@ -120,6 +126,8 @@ dma_subsys: bus@5a000000 {
clocks = <&uart3_lpcg IMX_LPCG_CLK_4>,
<&uart3_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "baud";
+ assigned-clocks = <&clk IMX_SC_R_UART_3 IMX_SC_PM_CLK_PER>;
+ assigned-clock-rates = <80000000>;
power-domains = <&pd IMX_SC_R_UART_3>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mn-beacon-baseboard.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-beacon-baseboard.dtsi
index 9e82069c941f..5a1f7c30afe5 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn-beacon-baseboard.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mn-beacon-baseboard.dtsi
@@ -81,7 +81,7 @@
&ecspi2 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_espi2>;
- cs-gpios = <&gpio5 9 GPIO_ACTIVE_LOW>;
+ cs-gpios = <&gpio5 13 GPIO_ACTIVE_LOW>;
status = "okay";
eeprom@0 {
@@ -202,7 +202,7 @@
MX8MN_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0x82
MX8MN_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0x82
MX8MN_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0x82
- MX8MN_IOMUXC_ECSPI1_SS0_GPIO5_IO9 0x41
+ MX8MN_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0x41
>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
index 67072e6c77d5..cbd9d124c80d 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
@@ -98,11 +98,17 @@
#address-cells = <1>;
#size-cells = <0>;
- ethphy: ethernet-phy@4 {
+ ethphy: ethernet-phy@4 { /* AR8033 or ADIN1300 */
compatible = "ethernet-phy-ieee802.3-c22";
reg = <4>;
reset-gpios = <&gpio1 9 GPIO_ACTIVE_LOW>;
reset-assert-us = <10000>;
+ /*
+ * Deassert delay:
+ * ADIN1300 requires 5ms.
+ * AR8033 requires 1ms.
+ */
+ reset-deassert-us = <20000>;
};
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi
index bd84db550053..8be8f090e8b8 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi
@@ -1069,13 +1069,6 @@
<&clk IMX8MN_CLK_DISP_APB_ROOT>,
<&clk IMX8MN_CLK_DISP_AXI_ROOT>;
clock-names = "pix", "axi", "disp_axi";
- assigned-clocks = <&clk IMX8MN_CLK_DISP_PIXEL_ROOT>,
- <&clk IMX8MN_CLK_DISP_AXI>,
- <&clk IMX8MN_CLK_DISP_APB>;
- assigned-clock-parents = <&clk IMX8MN_CLK_DISP_PIXEL>,
- <&clk IMX8MN_SYS_PLL2_1000M>,
- <&clk IMX8MN_SYS_PLL1_800M>;
- assigned-clock-rates = <594000000>, <500000000>, <200000000>;
interrupts = <GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>;
power-domains = <&disp_blk_ctrl IMX8MN_DISPBLK_PD_LCDIF>;
status = "disabled";
@@ -1093,12 +1086,6 @@
clocks = <&clk IMX8MN_CLK_DSI_CORE>,
<&clk IMX8MN_CLK_DSI_PHY_REF>;
clock-names = "bus_clk", "sclk_mipi";
- assigned-clocks = <&clk IMX8MN_CLK_DSI_CORE>,
- <&clk IMX8MN_CLK_DSI_PHY_REF>;
- assigned-clock-parents = <&clk IMX8MN_SYS_PLL1_266M>,
- <&clk IMX8MN_CLK_24M>;
- assigned-clock-rates = <266000000>, <24000000>;
- samsung,pll-clock-frequency = <24000000>;
interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>;
power-domains = <&disp_blk_ctrl IMX8MN_DISPBLK_PD_MIPI_DSI>;
status = "disabled";
@@ -1142,6 +1129,21 @@
"lcdif-axi", "lcdif-apb", "lcdif-pix",
"dsi-pclk", "dsi-ref",
"csi-aclk", "csi-pclk";
+ assigned-clocks = <&clk IMX8MN_CLK_DSI_CORE>,
+ <&clk IMX8MN_CLK_DSI_PHY_REF>,
+ <&clk IMX8MN_CLK_DISP_PIXEL>,
+ <&clk IMX8MN_CLK_DISP_AXI>,
+ <&clk IMX8MN_CLK_DISP_APB>;
+ assigned-clock-parents = <&clk IMX8MN_SYS_PLL1_266M>,
+ <&clk IMX8MN_CLK_24M>,
+ <&clk IMX8MN_VIDEO_PLL1_OUT>,
+ <&clk IMX8MN_SYS_PLL2_1000M>,
+ <&clk IMX8MN_SYS_PLL1_800M>;
+ assigned-clock-rates = <266000000>,
+ <24000000>,
+ <594000000>,
+ <500000000>,
+ <200000000>;
#power-domain-cells = <1>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
index f81391993354..428c60462e3d 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
@@ -1211,13 +1211,6 @@
<&clk IMX8MP_CLK_MEDIA_APB_ROOT>,
<&clk IMX8MP_CLK_MEDIA_AXI_ROOT>;
clock-names = "pix", "axi", "disp_axi";
- assigned-clocks = <&clk IMX8MP_CLK_MEDIA_DISP1_PIX_ROOT>,
- <&clk IMX8MP_CLK_MEDIA_AXI>,
- <&clk IMX8MP_CLK_MEDIA_APB>;
- assigned-clock-parents = <&clk IMX8MP_CLK_MEDIA_DISP1_PIX>,
- <&clk IMX8MP_SYS_PLL2_1000M>,
- <&clk IMX8MP_SYS_PLL1_800M>;
- assigned-clock-rates = <594000000>, <500000000>, <200000000>;
interrupts = <GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>;
power-domains = <&media_blk_ctrl IMX8MP_MEDIABLK_PD_LCDIF_1>;
status = "disabled";
@@ -1237,11 +1230,6 @@
<&clk IMX8MP_CLK_MEDIA_APB_ROOT>,
<&clk IMX8MP_CLK_MEDIA_AXI_ROOT>;
clock-names = "pix", "axi", "disp_axi";
- assigned-clocks = <&clk IMX8MP_CLK_MEDIA_DISP2_PIX>,
- <&clk IMX8MP_VIDEO_PLL1>;
- assigned-clock-parents = <&clk IMX8MP_VIDEO_PLL1_OUT>,
- <&clk IMX8MP_VIDEO_PLL1_REF_SEL>;
- assigned-clock-rates = <0>, <1039500000>;
power-domains = <&media_blk_ctrl IMX8MP_MEDIABLK_PD_LCDIF_2>;
status = "disabled";
@@ -1296,11 +1284,16 @@
"disp1", "disp2", "isp", "phy";
assigned-clocks = <&clk IMX8MP_CLK_MEDIA_AXI>,
- <&clk IMX8MP_CLK_MEDIA_APB>;
+ <&clk IMX8MP_CLK_MEDIA_APB>,
+ <&clk IMX8MP_CLK_MEDIA_DISP1_PIX>,
+ <&clk IMX8MP_CLK_MEDIA_DISP2_PIX>,
+ <&clk IMX8MP_VIDEO_PLL1>;
assigned-clock-parents = <&clk IMX8MP_SYS_PLL2_1000M>,
- <&clk IMX8MP_SYS_PLL1_800M>;
- assigned-clock-rates = <500000000>, <200000000>;
-
+ <&clk IMX8MP_SYS_PLL1_800M>,
+ <&clk IMX8MP_VIDEO_PLL1_OUT>,
+ <&clk IMX8MP_VIDEO_PLL1_OUT>;
+ assigned-clock-rates = <500000000>, <200000000>,
+ <0>, <0>, <1039500000>;
#power-domain-cells = <1>;
lvds_bridge: bridge@5c {
diff --git a/arch/arm64/boot/dts/freescale/imx8qm-mek.dts b/arch/arm64/boot/dts/freescale/imx8qm-mek.dts
index ce9d3f0b98fc..607cd6b4e972 100644
--- a/arch/arm64/boot/dts/freescale/imx8qm-mek.dts
+++ b/arch/arm64/boot/dts/freescale/imx8qm-mek.dts
@@ -82,8 +82,8 @@
pinctrl-0 = <&pinctrl_usdhc2>;
bus-width = <4>;
vmmc-supply = <&reg_usdhc2_vmmc>;
- cd-gpios = <&lsio_gpio4 22 GPIO_ACTIVE_LOW>;
- wp-gpios = <&lsio_gpio4 21 GPIO_ACTIVE_HIGH>;
+ cd-gpios = <&lsio_gpio5 22 GPIO_ACTIVE_LOW>;
+ wp-gpios = <&lsio_gpio5 21 GPIO_ACTIVE_HIGH>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8x-colibri-eval-v3.dtsi b/arch/arm64/boot/dts/freescale/imx8x-colibri-eval-v3.dtsi
index 7264d784ae72..9af769ab8ceb 100644
--- a/arch/arm64/boot/dts/freescale/imx8x-colibri-eval-v3.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8x-colibri-eval-v3.dtsi
@@ -33,6 +33,12 @@
};
};
+&iomuxc {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_ext_io0>, <&pinctrl_hog0>, <&pinctrl_hog1>,
+ <&pinctrl_lpspi2_cs2>;
+};
+
/* Colibri SPI */
&lpspi2 {
status = "okay";
diff --git a/arch/arm64/boot/dts/freescale/imx8x-colibri-iris.dtsi b/arch/arm64/boot/dts/freescale/imx8x-colibri-iris.dtsi
index 5f30c88855e7..f8953067bc3b 100644
--- a/arch/arm64/boot/dts/freescale/imx8x-colibri-iris.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8x-colibri-iris.dtsi
@@ -48,8 +48,7 @@
<IMX8QXP_SAI0_TXFS_LSIO_GPIO0_IO28 0x20>, /* SODIMM 101 */
<IMX8QXP_SAI0_RXD_LSIO_GPIO0_IO27 0x20>, /* SODIMM 97 */
<IMX8QXP_ENET0_RGMII_RXC_LSIO_GPIO5_IO03 0x06000020>, /* SODIMM 85 */
- <IMX8QXP_SAI0_TXC_LSIO_GPIO0_IO26 0x20>, /* SODIMM 79 */
- <IMX8QXP_QSPI0A_DATA1_LSIO_GPIO3_IO10 0x06700041>; /* SODIMM 45 */
+ <IMX8QXP_SAI0_TXC_LSIO_GPIO0_IO26 0x20>; /* SODIMM 79 */
};
pinctrl_uart1_forceoff: uart1forceoffgrp {
diff --git a/arch/arm64/boot/dts/freescale/imx8x-colibri.dtsi b/arch/arm64/boot/dts/freescale/imx8x-colibri.dtsi
index 7cad79102e1a..49d105eb4769 100644
--- a/arch/arm64/boot/dts/freescale/imx8x-colibri.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8x-colibri.dtsi
@@ -363,10 +363,6 @@
/* TODO VPU Encoder/Decoder */
&iomuxc {
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_ext_io0>, <&pinctrl_hog0>, <&pinctrl_hog1>,
- <&pinctrl_hog2>, <&pinctrl_lpspi2_cs2>;
-
/* On-module touch pen-down interrupt */
pinctrl_ad7879_int: ad7879intgrp {
fsl,pins = <IMX8QXP_MIPI_CSI0_I2C0_SCL_LSIO_GPIO3_IO05 0x21>;
@@ -499,8 +495,7 @@
};
pinctrl_hog1: hog1grp {
- fsl,pins = <IMX8QXP_CSI_MCLK_LSIO_GPIO3_IO01 0x20>, /* SODIMM 75 */
- <IMX8QXP_QSPI0A_SCLK_LSIO_GPIO3_IO16 0x20>; /* SODIMM 93 */
+ fsl,pins = <IMX8QXP_QSPI0A_SCLK_LSIO_GPIO3_IO16 0x20>; /* SODIMM 93 */
};
pinctrl_hog2: hog2grp {
@@ -774,3 +769,10 @@
fsl,pins = <IMX8QXP_SCU_BOOT_MODE3_SCU_DSC_RTC_CLOCK_OUTPUT_32K 0x20>;
};
};
+
+/* Delete peripherals which are not present on SOC, but are defined in imx8-ss-*.dtsi */
+
+/delete-node/ &adc1;
+/delete-node/ &adc1_lpcg;
+/delete-node/ &dsp;
+/delete-node/ &dsp_lpcg;
diff --git a/arch/arm64/boot/dts/qcom/ipq5332.dtsi b/arch/arm64/boot/dts/qcom/ipq5332.dtsi
index 12e0e179e139..af4d97143bcf 100644
--- a/arch/arm64/boot/dts/qcom/ipq5332.dtsi
+++ b/arch/arm64/boot/dts/qcom/ipq5332.dtsi
@@ -73,6 +73,7 @@
L2_0: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
diff --git a/arch/arm64/boot/dts/qcom/ipq6018.dtsi b/arch/arm64/boot/dts/qcom/ipq6018.dtsi
index 9ff4e9d45065..f531797f2619 100644
--- a/arch/arm64/boot/dts/qcom/ipq6018.dtsi
+++ b/arch/arm64/boot/dts/qcom/ipq6018.dtsi
@@ -83,7 +83,8 @@
L2_0: l2-cache {
compatible = "cache";
- cache-level = <0x2>;
+ cache-level = <2>;
+ cache-unified;
};
};
diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi
index 84e715aa4310..5b2c1986c8f4 100644
--- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi
+++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi
@@ -66,7 +66,8 @@
L2_0: l2-cache {
compatible = "cache";
- cache-level = <0x2>;
+ cache-level = <2>;
+ cache-unified;
};
};
diff --git a/arch/arm64/boot/dts/qcom/ipq9574.dtsi b/arch/arm64/boot/dts/qcom/ipq9574.dtsi
index 3bb7435f5e7f..0ed19fbf7d87 100644
--- a/arch/arm64/boot/dts/qcom/ipq9574.dtsi
+++ b/arch/arm64/boot/dts/qcom/ipq9574.dtsi
@@ -72,6 +72,7 @@
L2_0: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi
index 7e0fa37a3adf..834e0b66b7f2 100644
--- a/arch/arm64/boot/dts/qcom/msm8916.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi
@@ -180,6 +180,7 @@
L2_0: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
idle-states {
diff --git a/arch/arm64/boot/dts/qcom/msm8953.dtsi b/arch/arm64/boot/dts/qcom/msm8953.dtsi
index 602cb188a635..d44cfa0471e9 100644
--- a/arch/arm64/boot/dts/qcom/msm8953.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8953.dtsi
@@ -153,11 +153,13 @@
L2_0: l2-cache-0 {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
L2_1: l2-cache-1 {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
diff --git a/arch/arm64/boot/dts/qcom/msm8976.dtsi b/arch/arm64/boot/dts/qcom/msm8976.dtsi
index 1f0bd24a074a..f47fb8ea71e2 100644
--- a/arch/arm64/boot/dts/qcom/msm8976.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8976.dtsi
@@ -193,11 +193,13 @@
l2_0: l2-cache0 {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
l2_1: l2-cache1 {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
diff --git a/arch/arm64/boot/dts/qcom/msm8994.dtsi b/arch/arm64/boot/dts/qcom/msm8994.dtsi
index 2831966be960..bdc3f2ba1755 100644
--- a/arch/arm64/boot/dts/qcom/msm8994.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8994.dtsi
@@ -52,6 +52,7 @@
L2_0: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
@@ -88,6 +89,7 @@
L2_1: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi
index 2b35cb3f5292..30257c07e127 100644
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -53,8 +53,9 @@
#cooling-cells = <2>;
next-level-cache = <&L2_0>;
L2_0: l2-cache {
- compatible = "cache";
- cache-level = <2>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
};
};
@@ -83,8 +84,9 @@
#cooling-cells = <2>;
next-level-cache = <&L2_1>;
L2_1: l2-cache {
- compatible = "cache";
- cache-level = <2>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
};
};
diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi
index b150437a8355..3ec941fed14f 100644
--- a/arch/arm64/boot/dts/qcom/msm8998.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi
@@ -146,6 +146,7 @@
L2_0: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
@@ -190,6 +191,7 @@
L2_1: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
diff --git a/arch/arm64/boot/dts/qcom/qcm2290.dtsi b/arch/arm64/boot/dts/qcom/qcm2290.dtsi
index ae5abc76bcc7..b29bc4e4b837 100644
--- a/arch/arm64/boot/dts/qcom/qcm2290.dtsi
+++ b/arch/arm64/boot/dts/qcom/qcm2290.dtsi
@@ -51,6 +51,7 @@
L2_0: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
diff --git a/arch/arm64/boot/dts/qcom/qcs404.dtsi b/arch/arm64/boot/dts/qcom/qcs404.dtsi
index eefed585738c..972f753847e1 100644
--- a/arch/arm64/boot/dts/qcom/qcs404.dtsi
+++ b/arch/arm64/boot/dts/qcom/qcs404.dtsi
@@ -95,6 +95,7 @@
L2_0: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
idle-states {
diff --git a/arch/arm64/boot/dts/qcom/qdu1000.dtsi b/arch/arm64/boot/dts/qcom/qdu1000.dtsi
index 734438113bba..fb553f0bb17a 100644
--- a/arch/arm64/boot/dts/qcom/qdu1000.dtsi
+++ b/arch/arm64/boot/dts/qcom/qdu1000.dtsi
@@ -35,9 +35,13 @@
next-level-cache = <&L2_0>;
L2_0: l2-cache {
compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
L3_0: l3-cache {
compatible = "cache";
+ cache-level = <3>;
+ cache-unified;
};
};
};
@@ -54,6 +58,8 @@
next-level-cache = <&L2_100>;
L2_100: l2-cache {
compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -70,6 +76,8 @@
next-level-cache = <&L2_200>;
L2_200: l2-cache {
compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -86,6 +94,8 @@
next-level-cache = <&L2_300>;
L2_300: l2-cache {
compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
diff --git a/arch/arm64/boot/dts/qcom/sa8155p-adp.dts b/arch/arm64/boot/dts/qcom/sa8155p-adp.dts
index 339fea522509..15e1ae1c1a97 100644
--- a/arch/arm64/boot/dts/qcom/sa8155p-adp.dts
+++ b/arch/arm64/boot/dts/qcom/sa8155p-adp.dts
@@ -7,7 +7,7 @@
#include <dt-bindings/regulator/qcom,rpmh-regulator.h>
#include <dt-bindings/gpio/gpio.h>
-#include "sm8150.dtsi"
+#include "sa8155p.dtsi"
#include "pmm8155au_1.dtsi"
#include "pmm8155au_2.dtsi"
diff --git a/arch/arm64/boot/dts/qcom/sa8155p.dtsi b/arch/arm64/boot/dts/qcom/sa8155p.dtsi
new file mode 100644
index 000000000000..ffb7ab695213
--- /dev/null
+++ b/arch/arm64/boot/dts/qcom/sa8155p.dtsi
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*
+ * Copyright (c) 2023, Linaro Limited
+ *
+ * SA8155P is an automotive variant of SM8150, with some minor changes.
+ * Most notably, the RPMhPD setup differs: MMCX and LCX/LMX rails are gone,
+ * though the cmd-db doesn't reflect that and access attemps result in a bite.
+ */
+
+#include "sm8150.dtsi"
+
+&dispcc {
+ power-domains = <&rpmhpd SA8155P_CX>;
+};
+
+&mdss_dsi0 {
+ power-domains = <&rpmhpd SA8155P_CX>;
+};
+
+&mdss_dsi1 {
+ power-domains = <&rpmhpd SA8155P_CX>;
+};
+
+&mdss_mdp {
+ power-domains = <&rpmhpd SA8155P_CX>;
+};
+
+&remoteproc_slpi {
+ power-domains = <&rpmhpd SA8155P_CX>,
+ <&rpmhpd SA8155P_MX>;
+};
+
+&rpmhpd {
+ /*
+ * The bindings were crafted such that SA8155P PDs match their
+ * SM8150 counterparts to make it more maintainable and only
+ * necessitate adjusting entries that actually differ
+ */
+ compatible = "qcom,sa8155p-rpmhpd";
+};
diff --git a/arch/arm64/boot/dts/qcom/sa8775p.dtsi b/arch/arm64/boot/dts/qcom/sa8775p.dtsi
index 2343df7e0ea4..c3310caf9f68 100644
--- a/arch/arm64/boot/dts/qcom/sa8775p.dtsi
+++ b/arch/arm64/boot/dts/qcom/sa8775p.dtsi
@@ -42,9 +42,13 @@
next-level-cache = <&L2_0>;
L2_0: l2-cache {
compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
L3_0: l3-cache {
compatible = "cache";
+ cache-level = <3>;
+ cache-unified;
};
};
};
@@ -58,6 +62,8 @@
next-level-cache = <&L2_1>;
L2_1: l2-cache {
compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -71,6 +77,8 @@
next-level-cache = <&L2_2>;
L2_2: l2-cache {
compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -84,6 +92,8 @@
next-level-cache = <&L2_3>;
L2_3: l2-cache {
compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -97,9 +107,13 @@
next-level-cache = <&L2_4>;
L2_4: l2-cache {
compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_1>;
L3_1: l3-cache {
compatible = "cache";
+ cache-level = <3>;
+ cache-unified;
};
};
@@ -114,6 +128,8 @@
next-level-cache = <&L2_5>;
L2_5: l2-cache {
compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_1>;
};
};
@@ -127,6 +143,8 @@
next-level-cache = <&L2_6>;
L2_6: l2-cache {
compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_1>;
};
};
@@ -140,6 +158,8 @@
next-level-cache = <&L2_7>;
L2_7: l2-cache {
compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_1>;
};
};
diff --git a/arch/arm64/boot/dts/qcom/sc7180-idp.dts b/arch/arm64/boot/dts/qcom/sc7180-idp.dts
index 9f052270e090..299ef5dc225a 100644
--- a/arch/arm64/boot/dts/qcom/sc7180-idp.dts
+++ b/arch/arm64/boot/dts/qcom/sc7180-idp.dts
@@ -393,6 +393,11 @@
qcom,spare-regs = <&tcsr_regs_2 0xb3e4>;
};
+&scm {
+ /* TF-A firmware maps memory cached so mark dma-coherent to match. */
+ dma-coherent;
+};
+
&sdhc_1 {
status = "okay";
diff --git a/arch/arm64/boot/dts/qcom/sc7180-lite.dtsi b/arch/arm64/boot/dts/qcom/sc7180-lite.dtsi
index d8ed1d7b4ec7..4b306a59d9be 100644
--- a/arch/arm64/boot/dts/qcom/sc7180-lite.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180-lite.dtsi
@@ -16,3 +16,11 @@
&cpu6_opp12 {
opp-peak-kBps = <8532000 23347200>;
};
+
+&cpu6_opp13 {
+ opp-peak-kBps = <8532000 23347200>;
+};
+
+&cpu6_opp14 {
+ opp-peak-kBps = <8532000 23347200>;
+};
diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
index ca6920de7ea8..1472e7f10831 100644
--- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
@@ -892,6 +892,11 @@ hp_i2c: &i2c9 {
qcom,spare-regs = <&tcsr_regs_2 0xb3e4>;
};
+&scm {
+ /* TF-A firmware maps memory cached so mark dma-coherent to match. */
+ dma-coherent;
+};
+
&sdhc_1 {
status = "okay";
diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index ea1ffade1aa1..a65be760d1a7 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -92,10 +92,12 @@
L2_0: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
L3_0: l3-cache {
compatible = "cache";
cache-level = <3>;
+ cache-unified;
};
};
};
@@ -120,6 +122,7 @@
L2_100: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -144,6 +147,7 @@
L2_200: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -168,6 +172,7 @@
L2_300: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -192,6 +197,7 @@
L2_400: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -216,6 +222,7 @@
L2_500: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -240,6 +247,7 @@
L2_600: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -264,6 +272,7 @@
L2_700: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -360,7 +369,7 @@
};
firmware {
- scm {
+ scm: scm {
compatible = "qcom,scm-sc7180", "qcom,scm";
};
};
diff --git a/arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi b/arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi
index f562e4d2b655..2e1cd219fc18 100644
--- a/arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi
@@ -79,6 +79,11 @@
firmware-name = "ath11k/WCN6750/hw1.0/wpss.mdt";
};
+&scm {
+ /* TF-A firmware maps memory cached so mark dma-coherent to match. */
+ dma-coherent;
+};
+
&wifi {
status = "okay";
diff --git a/arch/arm64/boot/dts/qcom/sc7280-idp.dtsi b/arch/arm64/boot/dts/qcom/sc7280-idp.dtsi
index c6dc200c00ce..21027042cf13 100644
--- a/arch/arm64/boot/dts/qcom/sc7280-idp.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7280-idp.dtsi
@@ -480,7 +480,6 @@
wcd_rx: codec@0,4 {
compatible = "sdw20217010d00";
reg = <0 4>;
- #sound-dai-cells = <1>;
qcom,rx-port-mapping = <1 2 3 4 5>;
};
};
@@ -491,7 +490,6 @@
wcd_tx: codec@0,3 {
compatible = "sdw20217010d00";
reg = <0 3>;
- #sound-dai-cells = <1>;
qcom,tx-port-mapping = <1 2 3 4>;
};
};
diff --git a/arch/arm64/boot/dts/qcom/sc7280-qcard.dtsi b/arch/arm64/boot/dts/qcom/sc7280-qcard.dtsi
index 88b3586e389f..9137db066d9e 100644
--- a/arch/arm64/boot/dts/qcom/sc7280-qcard.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7280-qcard.dtsi
@@ -414,7 +414,6 @@
wcd_rx: codec@0,4 {
compatible = "sdw20217010d00";
reg = <0 4>;
- #sound-dai-cells = <1>;
qcom,rx-port-mapping = <1 2 3 4 5>;
};
};
@@ -423,7 +422,6 @@
wcd_tx: codec@0,3 {
compatible = "sdw20217010d00";
reg = <0 3>;
- #sound-dai-cells = <1>;
qcom,tx-port-mapping = <1 2 3 4>;
};
};
diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi
index 31728f461422..36f0bb9b3cbb 100644
--- a/arch/arm64/boot/dts/qcom/sc7280.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi
@@ -182,10 +182,12 @@
L2_0: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
L3_0: l3-cache {
compatible = "cache";
cache-level = <3>;
+ cache-unified;
};
};
};
@@ -208,6 +210,7 @@
L2_100: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -230,6 +233,7 @@
L2_200: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -252,6 +256,7 @@
L2_300: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -274,6 +279,7 @@
L2_400: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -296,6 +302,7 @@
L2_500: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -318,6 +325,7 @@
L2_600: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -340,6 +348,7 @@
L2_700: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -647,7 +656,7 @@
};
firmware {
- scm {
+ scm: scm {
compatible = "qcom,scm-sc7280", "qcom,scm";
};
};
diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi
index 8fa9fbfe5d00..cc4aef21e617 100644
--- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi
@@ -58,10 +58,12 @@
L2_0: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
L3_0: l3-cache {
- compatible = "cache";
- cache-level = <3>;
+ compatible = "cache";
+ cache-level = <3>;
+ cache-unified;
};
};
};
@@ -83,6 +85,7 @@
L2_100: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -104,6 +107,7 @@
L2_200: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -125,6 +129,7 @@
L2_300: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -146,6 +151,7 @@
L2_400: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -167,6 +173,7 @@
L2_500: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -188,6 +195,7 @@
L2_600: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -209,6 +217,7 @@
L2_700: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -2726,6 +2735,7 @@
pins = "gpio7";
function = "dmic1_data";
drive-strength = <8>;
+ input-enable;
};
};
@@ -2743,6 +2753,7 @@
function = "dmic1_data";
drive-strength = <2>;
bias-pull-down;
+ input-enable;
};
};
@@ -2758,6 +2769,7 @@
pins = "gpio9";
function = "dmic2_data";
drive-strength = <8>;
+ input-enable;
};
};
@@ -2775,6 +2787,7 @@
function = "dmic2_data";
drive-strength = <2>;
bias-pull-down;
+ input-enable;
};
};
@@ -3982,6 +3995,7 @@
qcom,tcs-config = <ACTIVE_TCS 2>, <SLEEP_TCS 3>,
<WAKE_TCS 3>, <CONTROL_TCS 1>;
label = "apps_rsc";
+ power-domains = <&CLUSTER_PD>;
apps_bcm_voter: bcm-voter {
compatible = "qcom,bcm-voter";
diff --git a/arch/arm64/boot/dts/qcom/sdm630.dtsi b/arch/arm64/boot/dts/qcom/sdm630.dtsi
index 37e72b1c56dc..eaead2f7beb4 100644
--- a/arch/arm64/boot/dts/qcom/sdm630.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm630.dtsi
@@ -63,6 +63,7 @@
L2_1: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
@@ -127,6 +128,7 @@
L2_0: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
diff --git a/arch/arm64/boot/dts/qcom/sdm670.dtsi b/arch/arm64/boot/dts/qcom/sdm670.dtsi
index c5f839dd1c6e..b61e13db89bd 100644
--- a/arch/arm64/boot/dts/qcom/sdm670.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm670.dtsi
@@ -41,8 +41,12 @@
L2_0: l2-cache {
compatible = "cache";
next-level-cache = <&L3_0>;
+ cache-level = <2>;
+ cache-unified;
L3_0: l3-cache {
- compatible = "cache";
+ compatible = "cache";
+ cache-level = <3>;
+ cache-unified;
};
};
};
@@ -57,6 +61,8 @@
next-level-cache = <&L2_100>;
L2_100: l2-cache {
compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -71,6 +77,8 @@
next-level-cache = <&L2_200>;
L2_200: l2-cache {
compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -85,6 +93,8 @@
next-level-cache = <&L2_300>;
L2_300: l2-cache {
compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -99,6 +109,8 @@
next-level-cache = <&L2_400>;
L2_400: l2-cache {
compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -113,6 +125,8 @@
next-level-cache = <&L2_500>;
L2_500: l2-cache {
compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -127,6 +141,8 @@
next-level-cache = <&L2_600>;
L2_600: l2-cache {
compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -141,6 +157,8 @@
next-level-cache = <&L2_700>;
L2_700: l2-cache {
compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index 90424442bb4a..cdeb05e95674 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -108,10 +108,12 @@
L2_0: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
L3_0: l3-cache {
- compatible = "cache";
- cache-level = <3>;
+ compatible = "cache";
+ cache-level = <3>;
+ cache-unified;
};
};
};
@@ -135,6 +137,7 @@
L2_100: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -158,6 +161,7 @@
L2_200: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -181,6 +185,7 @@
L2_300: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -204,6 +209,7 @@
L2_400: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -227,6 +233,7 @@
L2_500: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -250,6 +257,7 @@
L2_600: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -273,6 +281,7 @@
L2_700: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
diff --git a/arch/arm64/boot/dts/qcom/sm6115.dtsi b/arch/arm64/boot/dts/qcom/sm6115.dtsi
index 631ca327e064..43f31c1b9d5a 100644
--- a/arch/arm64/boot/dts/qcom/sm6115.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm6115.dtsi
@@ -50,6 +50,7 @@
L2_0: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
@@ -102,6 +103,7 @@
L2_1: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
diff --git a/arch/arm64/boot/dts/qcom/sm6125.dtsi b/arch/arm64/boot/dts/qcom/sm6125.dtsi
index 9484752fb850..2aa093d16858 100644
--- a/arch/arm64/boot/dts/qcom/sm6125.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm6125.dtsi
@@ -47,6 +47,7 @@
L2_0: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
@@ -87,6 +88,7 @@
L2_1: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
diff --git a/arch/arm64/boot/dts/qcom/sm6350.dtsi b/arch/arm64/boot/dts/qcom/sm6350.dtsi
index 18c4616848ce..ad34301f6cdd 100644
--- a/arch/arm64/boot/dts/qcom/sm6350.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm6350.dtsi
@@ -60,10 +60,12 @@
L2_0: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
L3_0: l3-cache {
compatible = "cache";
cache-level = <3>;
+ cache-unified;
};
};
};
@@ -86,6 +88,7 @@
L2_100: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -108,6 +111,7 @@
L2_200: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -130,6 +134,7 @@
L2_300: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -152,6 +157,7 @@
L2_400: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -174,6 +180,7 @@
L2_500: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -196,6 +203,7 @@
L2_600: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -218,6 +226,7 @@
L2_700: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
diff --git a/arch/arm64/boot/dts/qcom/sm6375-sony-xperia-murray-pdx225.dts b/arch/arm64/boot/dts/qcom/sm6375-sony-xperia-murray-pdx225.dts
index 8220e6f44117..b2f1bb1d58e9 100644
--- a/arch/arm64/boot/dts/qcom/sm6375-sony-xperia-murray-pdx225.dts
+++ b/arch/arm64/boot/dts/qcom/sm6375-sony-xperia-murray-pdx225.dts
@@ -178,12 +178,12 @@
};
&remoteproc_adsp {
- firmware-name = "qcom/Sony/murray/adsp.mbn";
+ firmware-name = "qcom/sm6375/Sony/murray/adsp.mbn";
status = "okay";
};
&remoteproc_cdsp {
- firmware-name = "qcom/Sony/murray/cdsp.mbn";
+ firmware-name = "qcom/sm6375/Sony/murray/cdsp.mbn";
status = "okay";
};
diff --git a/arch/arm64/boot/dts/qcom/sm6375.dtsi b/arch/arm64/boot/dts/qcom/sm6375.dtsi
index ae9b6bc446cb..f8d9c34d3b2f 100644
--- a/arch/arm64/boot/dts/qcom/sm6375.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm6375.dtsi
@@ -48,10 +48,14 @@
power-domain-names = "psci";
#cooling-cells = <2>;
L2_0: l2-cache {
- compatible = "cache";
- next-level-cache = <&L3_0>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ next-level-cache = <&L3_0>;
L3_0: l3-cache {
- compatible = "cache";
+ compatible = "cache";
+ cache-level = <3>;
+ cache-unified;
};
};
};
@@ -68,8 +72,10 @@
power-domain-names = "psci";
#cooling-cells = <2>;
L2_100: l2-cache {
- compatible = "cache";
- next-level-cache = <&L3_0>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ next-level-cache = <&L3_0>;
};
};
@@ -85,8 +91,10 @@
power-domain-names = "psci";
#cooling-cells = <2>;
L2_200: l2-cache {
- compatible = "cache";
- next-level-cache = <&L3_0>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ next-level-cache = <&L3_0>;
};
};
@@ -102,8 +110,10 @@
power-domain-names = "psci";
#cooling-cells = <2>;
L2_300: l2-cache {
- compatible = "cache";
- next-level-cache = <&L3_0>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ next-level-cache = <&L3_0>;
};
};
@@ -119,8 +129,10 @@
power-domain-names = "psci";
#cooling-cells = <2>;
L2_400: l2-cache {
- compatible = "cache";
- next-level-cache = <&L3_0>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ next-level-cache = <&L3_0>;
};
};
@@ -136,8 +148,10 @@
power-domain-names = "psci";
#cooling-cells = <2>;
L2_500: l2-cache {
- compatible = "cache";
- next-level-cache = <&L3_0>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ next-level-cache = <&L3_0>;
};
};
@@ -153,8 +167,10 @@
power-domain-names = "psci";
#cooling-cells = <2>;
L2_600: l2-cache {
- compatible = "cache";
- next-level-cache = <&L3_0>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ next-level-cache = <&L3_0>;
};
};
@@ -170,8 +186,10 @@
power-domain-names = "psci";
#cooling-cells = <2>;
L2_700: l2-cache {
- compatible = "cache";
- next-level-cache = <&L3_0>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ next-level-cache = <&L3_0>;
};
};
diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi
index 2273fa571988..27dcda0d4288 100644
--- a/arch/arm64/boot/dts/qcom/sm8150.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi
@@ -63,10 +63,12 @@
L2_0: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
L3_0: l3-cache {
- compatible = "cache";
- cache-level = <3>;
+ compatible = "cache";
+ cache-level = <3>;
+ cache-unified;
};
};
};
@@ -90,6 +92,7 @@
L2_100: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -113,6 +116,7 @@
L2_200: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -136,6 +140,7 @@
L2_300: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -159,6 +164,7 @@
L2_400: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -182,6 +188,7 @@
L2_500: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -205,6 +212,7 @@
L2_600: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -228,6 +236,7 @@
L2_700: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
diff --git a/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-boe.dts b/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-boe.dts
index 8b2ae39950ff..de6101ddebe7 100644
--- a/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-boe.dts
+++ b/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-boe.dts
@@ -13,6 +13,6 @@
};
&display_panel {
- compatible = "xiaomi,elish-boe-nt36523";
+ compatible = "xiaomi,elish-boe-nt36523", "novatek,nt36523";
status = "okay";
};
diff --git a/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-csot.dts b/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-csot.dts
index a4d5341495cf..4cffe9c703df 100644
--- a/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-csot.dts
+++ b/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-csot.dts
@@ -13,6 +13,6 @@
};
&display_panel {
- compatible = "xiaomi,elish-csot-nt36523";
+ compatible = "xiaomi,elish-csot-nt36523", "novatek,nt36523";
status = "okay";
};
diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi
index ebcb481571c2..3efdc03ed0f1 100644
--- a/arch/arm64/boot/dts/qcom/sm8350.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi
@@ -58,12 +58,14 @@
power-domain-names = "psci";
#cooling-cells = <2>;
L2_0: l2-cache {
- compatible = "cache";
- cache-level = <2>;
- next-level-cache = <&L3_0>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ next-level-cache = <&L3_0>;
L3_0: l3-cache {
- compatible = "cache";
- cache-level = <3>;
+ compatible = "cache";
+ cache-level = <3>;
+ cache-unified;
};
};
};
@@ -80,9 +82,10 @@
power-domain-names = "psci";
#cooling-cells = <2>;
L2_100: l2-cache {
- compatible = "cache";
- cache-level = <2>;
- next-level-cache = <&L3_0>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ next-level-cache = <&L3_0>;
};
};
@@ -98,9 +101,10 @@
power-domain-names = "psci";
#cooling-cells = <2>;
L2_200: l2-cache {
- compatible = "cache";
- cache-level = <2>;
- next-level-cache = <&L3_0>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ next-level-cache = <&L3_0>;
};
};
@@ -116,9 +120,10 @@
power-domain-names = "psci";
#cooling-cells = <2>;
L2_300: l2-cache {
- compatible = "cache";
- cache-level = <2>;
- next-level-cache = <&L3_0>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ next-level-cache = <&L3_0>;
};
};
@@ -134,9 +139,10 @@
power-domain-names = "psci";
#cooling-cells = <2>;
L2_400: l2-cache {
- compatible = "cache";
- cache-level = <2>;
- next-level-cache = <&L3_0>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ next-level-cache = <&L3_0>;
};
};
@@ -152,9 +158,10 @@
power-domain-names = "psci";
#cooling-cells = <2>;
L2_500: l2-cache {
- compatible = "cache";
- cache-level = <2>;
- next-level-cache = <&L3_0>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ next-level-cache = <&L3_0>;
};
};
@@ -170,9 +177,10 @@
power-domain-names = "psci";
#cooling-cells = <2>;
L2_600: l2-cache {
- compatible = "cache";
- cache-level = <2>;
- next-level-cache = <&L3_0>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ next-level-cache = <&L3_0>;
};
};
@@ -188,9 +196,10 @@
power-domain-names = "psci";
#cooling-cells = <2>;
L2_700: l2-cache {
- compatible = "cache";
- cache-level = <2>;
- next-level-cache = <&L3_0>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ next-level-cache = <&L3_0>;
};
};
diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi
index 595533aeafc4..d59ea8ee7111 100644
--- a/arch/arm64/boot/dts/qcom/sm8450.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi
@@ -57,12 +57,14 @@
#cooling-cells = <2>;
clocks = <&cpufreq_hw 0>;
L2_0: l2-cache {
- compatible = "cache";
- cache-level = <2>;
- next-level-cache = <&L3_0>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ next-level-cache = <&L3_0>;
L3_0: l3-cache {
- compatible = "cache";
- cache-level = <3>;
+ compatible = "cache";
+ cache-level = <3>;
+ cache-unified;
};
};
};
@@ -79,9 +81,10 @@
#cooling-cells = <2>;
clocks = <&cpufreq_hw 0>;
L2_100: l2-cache {
- compatible = "cache";
- cache-level = <2>;
- next-level-cache = <&L3_0>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ next-level-cache = <&L3_0>;
};
};
@@ -97,9 +100,10 @@
#cooling-cells = <2>;
clocks = <&cpufreq_hw 0>;
L2_200: l2-cache {
- compatible = "cache";
- cache-level = <2>;
- next-level-cache = <&L3_0>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ next-level-cache = <&L3_0>;
};
};
@@ -115,9 +119,10 @@
#cooling-cells = <2>;
clocks = <&cpufreq_hw 0>;
L2_300: l2-cache {
- compatible = "cache";
- cache-level = <2>;
- next-level-cache = <&L3_0>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ next-level-cache = <&L3_0>;
};
};
@@ -133,9 +138,10 @@
#cooling-cells = <2>;
clocks = <&cpufreq_hw 1>;
L2_400: l2-cache {
- compatible = "cache";
- cache-level = <2>;
- next-level-cache = <&L3_0>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ next-level-cache = <&L3_0>;
};
};
@@ -151,9 +157,10 @@
#cooling-cells = <2>;
clocks = <&cpufreq_hw 1>;
L2_500: l2-cache {
- compatible = "cache";
- cache-level = <2>;
- next-level-cache = <&L3_0>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ next-level-cache = <&L3_0>;
};
};
@@ -169,9 +176,10 @@
#cooling-cells = <2>;
clocks = <&cpufreq_hw 1>;
L2_600: l2-cache {
- compatible = "cache";
- cache-level = <2>;
- next-level-cache = <&L3_0>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ next-level-cache = <&L3_0>;
};
};
@@ -187,9 +195,10 @@
#cooling-cells = <2>;
clocks = <&cpufreq_hw 2>;
L2_700: l2-cache {
- compatible = "cache";
- cache-level = <2>;
- next-level-cache = <&L3_0>;
+ compatible = "cache";
+ cache-level = <2>;
+ cache-unified;
+ next-level-cache = <&L3_0>;
};
};
diff --git a/arch/arm64/boot/dts/qcom/sm8550.dtsi b/arch/arm64/boot/dts/qcom/sm8550.dtsi
index 6e9bad8f6f33..558cbc430708 100644
--- a/arch/arm64/boot/dts/qcom/sm8550.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8550.dtsi
@@ -80,10 +80,12 @@
L2_0: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
L3_0: l3-cache {
compatible = "cache";
cache-level = <3>;
+ cache-unified;
};
};
};
@@ -104,6 +106,7 @@
L2_100: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -124,6 +127,7 @@
L2_200: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -144,6 +148,7 @@
L2_300: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -164,6 +169,7 @@
L2_400: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -184,6 +190,7 @@
L2_500: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -204,6 +211,7 @@
L2_600: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -224,6 +232,7 @@
L2_700: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
next-level-cache = <&L3_0>;
};
};
@@ -2022,7 +2031,7 @@
qcom,din-ports = <4>;
qcom,dout-ports = <9>;
- qcom,ports-sinterval = <0x07 0x1f 0x3f 0x07 0x1f 0x3f 0x18f 0xff 0xff 0x0f 0x0f 0xff 0x31f>;
+ qcom,ports-sinterval = /bits/ 16 <0x07 0x1f 0x3f 0x07 0x1f 0x3f 0x18f 0xff 0xff 0x0f 0x0f 0xff 0x31f>;
qcom,ports-offset1 = /bits/ 8 <0x01 0x03 0x05 0x02 0x04 0x15 0x00 0xff 0xff 0x06 0x0d 0xff 0x00>;
qcom,ports-offset2 = /bits/ 8 <0xff 0x07 0x1f 0xff 0x07 0x1f 0xff 0xff 0xff 0xff 0xff 0xff 0xff>;
qcom,ports-hstart = /bits/ 8 <0xff 0xff 0xff 0xff 0xff 0xff 0x08 0xff 0xff 0xff 0xff 0xff 0x0f>;
@@ -2068,7 +2077,7 @@
qcom,din-ports = <0>;
qcom,dout-ports = <10>;
- qcom,ports-sinterval = <0x03 0x3f 0x1f 0x07 0x00 0x18f 0xff 0xff 0xff 0xff>;
+ qcom,ports-sinterval = /bits/ 16 <0x03 0x3f 0x1f 0x07 0x00 0x18f 0xff 0xff 0xff 0xff>;
qcom,ports-offset1 = /bits/ 8 <0x00 0x00 0x0b 0x01 0x00 0x00 0xff 0xff 0xff 0xff>;
qcom,ports-offset2 = /bits/ 8 <0x00 0x00 0x0b 0x00 0x00 0x00 0xff 0xff 0xff 0xff>;
qcom,ports-hstart = /bits/ 8 <0xff 0x03 0xff 0xff 0xff 0x08 0xff 0xff 0xff 0xff>;
@@ -2133,7 +2142,7 @@
qcom,din-ports = <4>;
qcom,dout-ports = <9>;
- qcom,ports-sinterval = <0x07 0x1f 0x3f 0x07 0x1f 0x3f 0x18f 0xff 0xff 0x0f 0x0f 0xff 0x31f>;
+ qcom,ports-sinterval = /bits/ 16 <0x07 0x1f 0x3f 0x07 0x1f 0x3f 0x18f 0xff 0xff 0x0f 0x0f 0xff 0x31f>;
qcom,ports-offset1 = /bits/ 8 <0x01 0x03 0x05 0x02 0x04 0x15 0x00 0xff 0xff 0x06 0x0d 0xff 0x00>;
qcom,ports-offset2 = /bits/ 8 <0xff 0x07 0x1f 0xff 0x07 0x1f 0xff 0xff 0xff 0xff 0xff 0xff 0xff>;
qcom,ports-hstart = /bits/ 8 <0xff 0xff 0xff 0xff 0xff 0xff 0x08 0xff 0xff 0xff 0xff 0xff 0x0f>;
@@ -3762,9 +3771,16 @@
system-cache-controller@25000000 {
compatible = "qcom,sm8550-llcc";
- reg = <0 0x25000000 0 0x800000>,
+ reg = <0 0x25000000 0 0x200000>,
+ <0 0x25200000 0 0x200000>,
+ <0 0x25400000 0 0x200000>,
+ <0 0x25600000 0 0x200000>,
<0 0x25800000 0 0x200000>;
- reg-names = "llcc_base", "llcc_broadcast_base";
+ reg-names = "llcc0_base",
+ "llcc1_base",
+ "llcc2_base",
+ "llcc3_base",
+ "llcc_broadcast_base";
interrupts = <GIC_SPI 266 IRQ_TYPE_LEVEL_HIGH>;
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3308.dtsi b/arch/arm64/boot/dts/rockchip/rk3308.dtsi
index dd228a256a32..2ae4bb7d5e62 100644
--- a/arch/arm64/boot/dts/rockchip/rk3308.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3308.dtsi
@@ -97,6 +97,7 @@
l2: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
index f69a38f42d2d..0a27fa5271f5 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
@@ -37,7 +37,8 @@
vin-supply = <&vcc_io>;
};
- vcc_host_5v: vcc-host-5v-regulator {
+ /* Common enable line for all of the rails mentioned in the labels */
+ vcc_host_5v: vcc_host1_5v: vcc_otg_5v: vcc-host-5v-regulator {
compatible = "regulator-fixed";
gpio = <&gpio0 RK_PA2 GPIO_ACTIVE_LOW>;
pinctrl-names = "default";
@@ -48,17 +49,6 @@
vin-supply = <&vcc_sys>;
};
- vcc_host1_5v: vcc_otg_5v: vcc-host1-5v-regulator {
- compatible = "regulator-fixed";
- gpio = <&gpio0 RK_PA2 GPIO_ACTIVE_LOW>;
- pinctrl-names = "default";
- pinctrl-0 = <&usb20_host_drv>;
- regulator-name = "vcc_host1_5v";
- regulator-always-on;
- regulator-boot-on;
- vin-supply = <&vcc_sys>;
- };
-
vcc_sys: vcc-sys {
compatible = "regulator-fixed";
regulator-name = "vcc_sys";
diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
index 6d7a7bf72ac7..e729e7a22b23 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
@@ -103,6 +103,7 @@
l2: l2-cache0 {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-soquartz-cm4.dts b/arch/arm64/boot/dts/rockchip/rk3566-soquartz-cm4.dts
index 263ce40770dd..cddf6cd2fecb 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-soquartz-cm4.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3566-soquartz-cm4.dts
@@ -28,6 +28,16 @@
regulator-max-microvolt = <5000000>;
vin-supply = <&vcc12v_dcin>;
};
+
+ vcc_sd_pwr: vcc-sd-pwr-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc_sd_pwr";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ vin-supply = <&vcc3v3_sys>;
+ };
};
/* phy for pcie */
@@ -130,13 +140,7 @@
};
&sdmmc0 {
- vmmc-supply = <&sdmmc_pwr>;
- status = "okay";
-};
-
-&sdmmc_pwr {
- regulator-min-microvolt = <3300000>;
- regulator-max-microvolt = <3300000>;
+ vmmc-supply = <&vcc_sd_pwr>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi
index 102e448bc026..31aa2b8efe39 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi
@@ -104,16 +104,6 @@
regulator-max-microvolt = <3300000>;
vin-supply = <&vcc5v0_sys>;
};
-
- sdmmc_pwr: sdmmc-pwr-regulator {
- compatible = "regulator-fixed";
- enable-active-high;
- gpio = <&gpio0 RK_PA5 GPIO_ACTIVE_HIGH>;
- pinctrl-names = "default";
- pinctrl-0 = <&sdmmc_pwr_h>;
- regulator-name = "sdmmc_pwr";
- status = "disabled";
- };
};
&cpu0 {
@@ -155,6 +145,19 @@
status = "disabled";
};
+&gpio0 {
+ nextrst-hog {
+ gpio-hog;
+ /*
+ * GPIO_ACTIVE_LOW + output-low here means that the pin is set
+ * to high, because output-low decides the value pre-inversion.
+ */
+ gpios = <RK_PA5 GPIO_ACTIVE_LOW>;
+ line-name = "nEXTRST";
+ output-low;
+ };
+};
+
&gpu {
mali-supply = <&vdd_gpu>;
status = "okay";
@@ -538,12 +541,6 @@
rockchip,pins = <2 RK_PC2 RK_FUNC_GPIO &pcfg_pull_none>;
};
};
-
- sdmmc-pwr {
- sdmmc_pwr_h: sdmmc-pwr-h {
- rockchip,pins = <0 RK_PA5 RK_FUNC_GPIO &pcfg_pull_none>;
- };
- };
};
&pmu_io_domains {
diff --git a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5c.dts b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5c.dts
index f70ca9f0470a..c718b8dbb9c6 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5c.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5c.dts
@@ -106,7 +106,7 @@
rockchip-key {
reset_button_pin: reset-button-pin {
- rockchip,pins = <4 RK_PA0 RK_FUNC_GPIO &pcfg_pull_up>;
+ rockchip,pins = <0 RK_PB7 RK_FUNC_GPIO &pcfg_pull_up>;
};
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts
index 2a1118f15c29..b6ad8328c7eb 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts
@@ -134,4 +134,3 @@
};
};
};
-
diff --git a/arch/arm64/boot/dts/rockchip/rk3568.dtsi b/arch/arm64/boot/dts/rockchip/rk3568.dtsi
index ba67b58f05b7..f1be76a54ceb 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3568.dtsi
@@ -94,9 +94,10 @@
power-domains = <&power RK3568_PD_PIPE>;
reg = <0x3 0xc0400000 0x0 0x00400000>,
<0x0 0xfe270000 0x0 0x00010000>,
- <0x3 0x7f000000 0x0 0x01000000>;
- ranges = <0x01000000 0x0 0x3ef00000 0x3 0x7ef00000 0x0 0x00100000>,
- <0x02000000 0x0 0x00000000 0x3 0x40000000 0x0 0x3ef00000>;
+ <0x0 0xf2000000 0x0 0x00100000>;
+ ranges = <0x01000000 0x0 0xf2100000 0x0 0xf2100000 0x0 0x00100000>,
+ <0x02000000 0x0 0xf2200000 0x0 0xf2200000 0x0 0x01e00000>,
+ <0x03000000 0x0 0x40000000 0x3 0x40000000 0x0 0x40000000>;
reg-names = "dbi", "apb", "config";
resets = <&cru SRST_PCIE30X1_POWERUP>;
reset-names = "pipe";
@@ -146,9 +147,10 @@
power-domains = <&power RK3568_PD_PIPE>;
reg = <0x3 0xc0800000 0x0 0x00400000>,
<0x0 0xfe280000 0x0 0x00010000>,
- <0x3 0xbf000000 0x0 0x01000000>;
- ranges = <0x01000000 0x0 0x3ef00000 0x3 0xbef00000 0x0 0x00100000>,
- <0x02000000 0x0 0x00000000 0x3 0x80000000 0x0 0x3ef00000>;
+ <0x0 0xf0000000 0x0 0x00100000>;
+ ranges = <0x01000000 0x0 0xf0100000 0x0 0xf0100000 0x0 0x00100000>,
+ <0x02000000 0x0 0xf0200000 0x0 0xf0200000 0x0 0x01e00000>,
+ <0x03000000 0x0 0x40000000 0x3 0x80000000 0x0 0x40000000>;
reg-names = "dbi", "apb", "config";
resets = <&cru SRST_PCIE30X2_POWERUP>;
reset-names = "pipe";
diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi
index f62e0fd881a9..61680c7ac489 100644
--- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi
@@ -952,7 +952,7 @@
compatible = "rockchip,rk3568-pcie";
reg = <0x3 0xc0000000 0x0 0x00400000>,
<0x0 0xfe260000 0x0 0x00010000>,
- <0x3 0x3f000000 0x0 0x01000000>;
+ <0x0 0xf4000000 0x0 0x00100000>;
reg-names = "dbi", "apb", "config";
interrupts = <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>,
@@ -982,8 +982,9 @@
phys = <&combphy2 PHY_TYPE_PCIE>;
phy-names = "pcie-phy";
power-domains = <&power RK3568_PD_PIPE>;
- ranges = <0x01000000 0x0 0x3ef00000 0x3 0x3ef00000 0x0 0x00100000
- 0x02000000 0x0 0x00000000 0x3 0x00000000 0x0 0x3ef00000>;
+ ranges = <0x01000000 0x0 0xf4100000 0x0 0xf4100000 0x0 0x00100000>,
+ <0x02000000 0x0 0xf4200000 0x0 0xf4200000 0x0 0x01e00000>,
+ <0x03000000 0x0 0x40000000 0x3 0x00000000 0x0 0x40000000>;
resets = <&cru SRST_PCIE20_POWERUP>;
reset-names = "pipe";
#address-cells = <3>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
index 657c019d27fa..a3124bd2e092 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
@@ -229,6 +229,7 @@
cache-line-size = <64>;
cache-sets = <512>;
cache-level = <2>;
+ cache-unified;
next-level-cache = <&l3_cache>;
};
@@ -238,6 +239,7 @@
cache-line-size = <64>;
cache-sets = <512>;
cache-level = <2>;
+ cache-unified;
next-level-cache = <&l3_cache>;
};
@@ -247,6 +249,7 @@
cache-line-size = <64>;
cache-sets = <512>;
cache-level = <2>;
+ cache-unified;
next-level-cache = <&l3_cache>;
};
@@ -256,6 +259,7 @@
cache-line-size = <64>;
cache-sets = <512>;
cache-level = <2>;
+ cache-unified;
next-level-cache = <&l3_cache>;
};
@@ -265,6 +269,7 @@
cache-line-size = <64>;
cache-sets = <1024>;
cache-level = <2>;
+ cache-unified;
next-level-cache = <&l3_cache>;
};
@@ -274,6 +279,7 @@
cache-line-size = <64>;
cache-sets = <1024>;
cache-level = <2>;
+ cache-unified;
next-level-cache = <&l3_cache>;
};
@@ -283,6 +289,7 @@
cache-line-size = <64>;
cache-sets = <1024>;
cache-level = <2>;
+ cache-unified;
next-level-cache = <&l3_cache>;
};
@@ -292,6 +299,7 @@
cache-line-size = <64>;
cache-sets = <1024>;
cache-level = <2>;
+ cache-unified;
next-level-cache = <&l3_cache>;
};
@@ -301,6 +309,7 @@
cache-line-size = <64>;
cache-sets = <4096>;
cache-level = <3>;
+ cache-unified;
};
};
diff --git a/arch/arm64/hyperv/mshyperv.c b/arch/arm64/hyperv/mshyperv.c
index a406454578f0..f1b8a04ee9f2 100644
--- a/arch/arm64/hyperv/mshyperv.c
+++ b/arch/arm64/hyperv/mshyperv.c
@@ -67,7 +67,7 @@ static int __init hyperv_init(void)
if (ret)
return ret;
- ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "arm64/hyperv_init:online",
+ ret = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "arm64/hyperv_init:online",
hv_common_cpu_init, hv_common_cpu_die);
if (ret < 0) {
hv_common_free();
diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h
index d6b51deb7bf0..18dc2fb3d7b7 100644
--- a/arch/arm64/include/asm/arm_pmuv3.h
+++ b/arch/arm64/include/asm/arm_pmuv3.h
@@ -13,7 +13,7 @@
#define RETURN_READ_PMEVCNTRN(n) \
return read_sysreg(pmevcntr##n##_el0)
-static unsigned long read_pmevcntrn(int n)
+static inline unsigned long read_pmevcntrn(int n)
{
PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN);
return 0;
@@ -21,14 +21,14 @@ static unsigned long read_pmevcntrn(int n)
#define WRITE_PMEVCNTRN(n) \
write_sysreg(val, pmevcntr##n##_el0)
-static void write_pmevcntrn(int n, unsigned long val)
+static inline void write_pmevcntrn(int n, unsigned long val)
{
PMEVN_SWITCH(n, WRITE_PMEVCNTRN);
}
#define WRITE_PMEVTYPERN(n) \
write_sysreg(val, pmevtyper##n##_el0)
-static void write_pmevtypern(int n, unsigned long val)
+static inline void write_pmevtypern(int n, unsigned long val)
{
PMEVN_SWITCH(n, WRITE_PMEVTYPERN);
}
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 683ca3af4084..5f6f84837a49 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -126,6 +126,10 @@
#define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029
#define APPLE_CPU_PART_M2_BLIZZARD 0x032
#define APPLE_CPU_PART_M2_AVALANCHE 0x033
+#define APPLE_CPU_PART_M2_BLIZZARD_PRO 0x034
+#define APPLE_CPU_PART_M2_AVALANCHE_PRO 0x035
+#define APPLE_CPU_PART_M2_BLIZZARD_MAX 0x038
+#define APPLE_CPU_PART_M2_AVALANCHE_MAX 0x039
#define AMPERE_CPU_PART_AMPERE1 0xAC3
@@ -181,6 +185,10 @@
#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX)
#define MIDR_APPLE_M2_BLIZZARD MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD)
#define MIDR_APPLE_M2_AVALANCHE MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE)
+#define MIDR_APPLE_M2_BLIZZARD_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_PRO)
+#define MIDR_APPLE_M2_AVALANCHE_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_PRO)
+#define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX)
+#define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX)
#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 7e7e19ef6993..9787503ff43f 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -699,6 +699,8 @@ struct kvm_vcpu_arch {
#define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(4))
/* Software step state is Active-pending */
#define DBG_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(5))
+/* PMUSERENR for the guest EL0 is on physical CPU */
+#define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(6))
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
@@ -1065,9 +1067,14 @@ void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu);
#ifdef CONFIG_KVM
void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr);
void kvm_clr_pmu_events(u32 clr);
+bool kvm_set_pmuserenr(u64 val);
#else
static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
static inline void kvm_clr_pmu_events(u32 clr) {}
+static inline bool kvm_set_pmuserenr(u64 val)
+{
+ return false;
+}
#endif
void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 4cd6762bda80..93bd0975b15f 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -209,6 +209,7 @@ struct kvm_pgtable_visit_ctx {
kvm_pte_t old;
void *arg;
struct kvm_pgtable_mm_ops *mm_ops;
+ u64 start;
u64 addr;
u64 end;
u32 level;
@@ -631,9 +632,9 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
*
* The walker will walk the page-table entries corresponding to the input
* address range specified, visiting entries according to the walker flags.
- * Invalid entries are treated as leaf entries. Leaf entries are reloaded
- * after invoking the walker callback, allowing the walker to descend into
- * a newly installed table.
+ * Invalid entries are treated as leaf entries. The visited page table entry is
+ * reloaded after invoking the walker callback, allowing the walker to descend
+ * into a newly installed table.
*
* Returning a negative error code from the walker callback function will
* terminate the walk immediately with the same error code.
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index e72d9aaab6b1..eefd712f2430 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -115,8 +115,14 @@
#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31)
#define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2)
+#define SYS_DC_IGSW sys_insn(1, 0, 7, 6, 4)
+#define SYS_DC_IGDSW sys_insn(1, 0, 7, 6, 6)
#define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2)
+#define SYS_DC_CGSW sys_insn(1, 0, 7, 10, 4)
+#define SYS_DC_CGDSW sys_insn(1, 0, 7, 10, 6)
#define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2)
+#define SYS_DC_CIGSW sys_insn(1, 0, 7, 14, 4)
+#define SYS_DC_CIGDSW sys_insn(1, 0, 7, 14, 6)
/*
* Automatically generated definitions for system registers, the
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index f5bcb0dc6267..7e89968bd282 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -66,13 +66,10 @@ void mte_sync_tags(pte_t old_pte, pte_t pte)
return;
/* if PG_mte_tagged is set, tags have already been initialised */
- for (i = 0; i < nr_pages; i++, page++) {
- if (!page_mte_tagged(page)) {
+ for (i = 0; i < nr_pages; i++, page++)
+ if (!page_mte_tagged(page))
mte_sync_page_tags(page, old_pte, check_swap,
pte_is_tagged);
- set_page_mte_tagged(page);
- }
- }
/* ensure the tags are visible before the PTE is set */
smp_wmb();
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 0119dc91abb5..d9e1355730ef 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -288,7 +288,7 @@ static int aarch32_alloc_kuser_vdso_page(void)
memcpy((void *)(vdso_page + 0x1000 - kuser_sz), __kuser_helper_start,
kuser_sz);
- aarch32_vectors_page = virt_to_page(vdso_page);
+ aarch32_vectors_page = virt_to_page((void *)vdso_page);
return 0;
}
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 1279949599b5..4c9dcd8fc939 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -81,26 +81,34 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
fpsimd_kvm_prepare();
+ /*
+ * We will check TIF_FOREIGN_FPSTATE just before entering the
+ * guest in kvm_arch_vcpu_ctxflush_fp() and override this to
+ * FP_STATE_FREE if the flag set.
+ */
vcpu->arch.fp_state = FP_STATE_HOST_OWNED;
vcpu_clear_flag(vcpu, HOST_SVE_ENABLED);
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
vcpu_set_flag(vcpu, HOST_SVE_ENABLED);
- /*
- * We don't currently support SME guests but if we leave
- * things in streaming mode then when the guest starts running
- * FPSIMD or SVE code it may generate SME traps so as a
- * special case if we are in streaming mode we force the host
- * state to be saved now and exit streaming mode so that we
- * don't have to handle any SME traps for valid guest
- * operations. Do this for ZA as well for now for simplicity.
- */
if (system_supports_sme()) {
vcpu_clear_flag(vcpu, HOST_SME_ENABLED);
if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
vcpu_set_flag(vcpu, HOST_SME_ENABLED);
+ /*
+ * If PSTATE.SM is enabled then save any pending FP
+ * state and disable PSTATE.SM. If we leave PSTATE.SM
+ * enabled and the guest does not enable SME via
+ * CPACR_EL1.SMEN then operations that should be valid
+ * may generate SME traps from EL1 to EL1 which we
+ * can't intercept and which would confuse the guest.
+ *
+ * Do the same for PSTATE.ZA in the case where there
+ * is state in the registers which has not already
+ * been saved, this is very unlikely to happen.
+ */
if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) {
vcpu->arch.fp_state = FP_STATE_FREE;
fpsimd_save_and_flush_cpu_state();
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index c41166f1a1dd..4fe217efa218 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -82,8 +82,14 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
* EL1 instead of being trapped to EL2.
*/
if (kvm_arm_support_pmu_v3()) {
+ struct kvm_cpu_context *hctxt;
+
write_sysreg(0, pmselr_el0);
+
+ hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0);
write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
+ vcpu_set_flag(vcpu, PMUSERENR_ON_CPU);
}
vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2);
@@ -106,8 +112,13 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2);
write_sysreg(0, hstr_el2);
- if (kvm_arm_support_pmu_v3())
- write_sysreg(0, pmuserenr_el0);
+ if (kvm_arm_support_pmu_v3()) {
+ struct kvm_cpu_context *hctxt;
+
+ hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0);
+ vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU);
+ }
if (cpus_have_final_cap(ARM64_SME)) {
sysreg_clear_set_s(SYS_HFGRTR_EL2, 0,
@@ -177,9 +188,17 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
sve_guest = vcpu_has_sve(vcpu);
esr_ec = kvm_vcpu_trap_get_class(vcpu);
- /* Don't handle SVE traps for non-SVE vcpus here: */
- if (!sve_guest && esr_ec != ESR_ELx_EC_FP_ASIMD)
+ /* Only handle traps the vCPU can support here: */
+ switch (esr_ec) {
+ case ESR_ELx_EC_FP_ASIMD:
+ break;
+ case ESR_ELx_EC_SVE:
+ if (!sve_guest)
+ return false;
+ break;
+ default:
return false;
+ }
/* Valid trap. Switch the context: */
@@ -404,17 +423,21 @@ static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
return false;
}
-static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
+static bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, u64 *exit_code)
{
if (!__populate_fault_info(vcpu))
return true;
return false;
}
+static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
+ __alias(kvm_hyp_handle_memory_fault);
+static bool kvm_hyp_handle_watchpt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
+ __alias(kvm_hyp_handle_memory_fault);
static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
{
- if (!__populate_fault_info(vcpu))
+ if (kvm_hyp_handle_memory_fault(vcpu, exit_code))
return true;
if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 2e9ec4a2a4a3..a8813b212996 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -575,7 +575,7 @@ struct pkvm_mem_donation {
struct check_walk_data {
enum pkvm_page_state desired;
- enum pkvm_page_state (*get_page_state)(kvm_pte_t pte);
+ enum pkvm_page_state (*get_page_state)(kvm_pte_t pte, u64 addr);
};
static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx,
@@ -583,10 +583,7 @@ static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx,
{
struct check_walk_data *d = ctx->arg;
- if (kvm_pte_valid(ctx->old) && !addr_is_allowed_memory(kvm_pte_to_phys(ctx->old)))
- return -EINVAL;
-
- return d->get_page_state(ctx->old) == d->desired ? 0 : -EPERM;
+ return d->get_page_state(ctx->old, ctx->addr) == d->desired ? 0 : -EPERM;
}
static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
@@ -601,8 +598,11 @@ static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
return kvm_pgtable_walk(pgt, addr, size, &walker);
}
-static enum pkvm_page_state host_get_page_state(kvm_pte_t pte)
+static enum pkvm_page_state host_get_page_state(kvm_pte_t pte, u64 addr)
{
+ if (!addr_is_allowed_memory(addr))
+ return PKVM_NOPAGE;
+
if (!kvm_pte_valid(pte) && pte)
return PKVM_NOPAGE;
@@ -709,7 +709,7 @@ static int host_complete_donation(u64 addr, const struct pkvm_mem_transition *tx
return host_stage2_set_owner_locked(addr, size, host_id);
}
-static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte)
+static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr)
{
if (!kvm_pte_valid(pte))
return PKVM_NOPAGE;
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 71fa16a0dc77..77791495c995 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -186,6 +186,7 @@ static const exit_handler_fn hyp_exit_handlers[] = {
[ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
+ [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
};
@@ -196,6 +197,7 @@ static const exit_handler_fn pvm_exit_handlers[] = {
[ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
+ [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
};
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 3d61bd3e591d..95dae02ccc2e 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -58,8 +58,9 @@
struct kvm_pgtable_walk_data {
struct kvm_pgtable_walker *walker;
+ const u64 start;
u64 addr;
- u64 end;
+ const u64 end;
};
static bool kvm_phys_is_valid(u64 phys)
@@ -201,20 +202,33 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
.old = READ_ONCE(*ptep),
.arg = data->walker->arg,
.mm_ops = mm_ops,
+ .start = data->start,
.addr = data->addr,
.end = data->end,
.level = level,
.flags = flags,
};
int ret = 0;
+ bool reload = false;
kvm_pteref_t childp;
bool table = kvm_pte_table(ctx.old, level);
- if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE))
+ if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE)) {
ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_PRE);
+ reload = true;
+ }
if (!table && (ctx.flags & KVM_PGTABLE_WALK_LEAF)) {
ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_LEAF);
+ reload = true;
+ }
+
+ /*
+ * Reload the page table after invoking the walker callback for leaf
+ * entries or after pre-order traversal, to allow the walker to descend
+ * into a newly installed or replaced table.
+ */
+ if (reload) {
ctx.old = READ_ONCE(*ptep);
table = kvm_pte_table(ctx.old, level);
}
@@ -293,6 +307,7 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
struct kvm_pgtable_walker *walker)
{
struct kvm_pgtable_walk_data walk_data = {
+ .start = ALIGN_DOWN(addr, PAGE_SIZE),
.addr = ALIGN_DOWN(addr, PAGE_SIZE),
.end = PAGE_ALIGN(walk_data.addr + size),
.walker = walker,
@@ -349,7 +364,7 @@ int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
}
struct hyp_map_data {
- u64 phys;
+ const u64 phys;
kvm_pte_t attr;
};
@@ -407,13 +422,12 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
struct hyp_map_data *data)
{
+ u64 phys = data->phys + (ctx->addr - ctx->start);
kvm_pte_t new;
- u64 granule = kvm_granule_size(ctx->level), phys = data->phys;
if (!kvm_block_mapping_supported(ctx, phys))
return false;
- data->phys += granule;
new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level);
if (ctx->old == new)
return true;
@@ -576,7 +590,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
}
struct stage2_map_data {
- u64 phys;
+ const u64 phys;
kvm_pte_t attr;
u8 owner_id;
@@ -794,20 +808,43 @@ static bool stage2_pte_executable(kvm_pte_t pte)
return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
}
+static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx,
+ const struct stage2_map_data *data)
+{
+ u64 phys = data->phys;
+
+ /*
+ * Stage-2 walks to update ownership data are communicated to the map
+ * walker using an invalid PA. Avoid offsetting an already invalid PA,
+ * which could overflow and make the address valid again.
+ */
+ if (!kvm_phys_is_valid(phys))
+ return phys;
+
+ /*
+ * Otherwise, work out the correct PA based on how far the walk has
+ * gotten.
+ */
+ return phys + (ctx->addr - ctx->start);
+}
+
static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx,
struct stage2_map_data *data)
{
+ u64 phys = stage2_map_walker_phys_addr(ctx, data);
+
if (data->force_pte && (ctx->level < (KVM_PGTABLE_MAX_LEVELS - 1)))
return false;
- return kvm_block_mapping_supported(ctx, data->phys);
+ return kvm_block_mapping_supported(ctx, phys);
}
static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
struct stage2_map_data *data)
{
kvm_pte_t new;
- u64 granule = kvm_granule_size(ctx->level), phys = data->phys;
+ u64 phys = stage2_map_walker_phys_addr(ctx, data);
+ u64 granule = kvm_granule_size(ctx->level);
struct kvm_pgtable *pgt = data->mmu->pgt;
struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
@@ -841,8 +878,6 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
stage2_make_pte(ctx, new);
- if (kvm_phys_is_valid(phys))
- data->phys += granule;
return 0;
}
@@ -1297,4 +1332,7 @@ void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pg
};
WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level + 1));
+
+ WARN_ON(mm_ops->page_count(pgtable) != 1);
+ mm_ops->put_page(pgtable);
}
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 3d868e84c7a0..b37e7c96efea 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -92,14 +92,28 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
}
NOKPROBE_SYMBOL(__deactivate_traps);
+/*
+ * Disable IRQs in {activate,deactivate}_traps_vhe_{load,put}() to
+ * prevent a race condition between context switching of PMUSERENR_EL0
+ * in __{activate,deactivate}_traps_common() and IPIs that attempts to
+ * update PMUSERENR_EL0. See also kvm_set_pmuserenr().
+ */
void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
{
+ unsigned long flags;
+
+ local_irq_save(flags);
__activate_traps_common(vcpu);
+ local_irq_restore(flags);
}
void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu)
{
+ unsigned long flags;
+
+ local_irq_save(flags);
__deactivate_traps_common(vcpu);
+ local_irq_restore(flags);
}
static const exit_handler_fn hyp_exit_handlers[] = {
@@ -110,6 +124,7 @@ static const exit_handler_fn hyp_exit_handlers[] = {
[ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
+ [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
};
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index 64c3aec0d937..0bd93a5f21ce 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -204,7 +204,7 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu)
* Size Fault at level 0, as if exceeding PARange.
*
* Non-LPAE guests will only get the external abort, as there
- * is no way to to describe the ASF.
+ * is no way to describe the ASF.
*/
if (vcpu_el1_is_32bit(vcpu) &&
!(vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE))
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 45727d50d18d..560650972478 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -694,45 +694,41 @@ out_unlock:
static struct arm_pmu *kvm_pmu_probe_armpmu(void)
{
- struct perf_event_attr attr = { };
- struct perf_event *event;
- struct arm_pmu *pmu = NULL;
+ struct arm_pmu *tmp, *pmu = NULL;
+ struct arm_pmu_entry *entry;
+ int cpu;
+
+ mutex_lock(&arm_pmus_lock);
/*
- * Create a dummy event that only counts user cycles. As we'll never
- * leave this function with the event being live, it will never
- * count anything. But it allows us to probe some of the PMU
- * details. Yes, this is terrible.
+ * It is safe to use a stale cpu to iterate the list of PMUs so long as
+ * the same value is used for the entirety of the loop. Given this, and
+ * the fact that no percpu data is used for the lookup there is no need
+ * to disable preemption.
+ *
+ * It is still necessary to get a valid cpu, though, to probe for the
+ * default PMU instance as userspace is not required to specify a PMU
+ * type. In order to uphold the preexisting behavior KVM selects the
+ * PMU instance for the core where the first call to the
+ * KVM_ARM_VCPU_PMU_V3_CTRL attribute group occurs. A dependent use case
+ * would be a user with disdain of all things big.LITTLE that affines
+ * the VMM to a particular cluster of cores.
+ *
+ * In any case, userspace should just do the sane thing and use the UAPI
+ * to select a PMU type directly. But, be wary of the baggage being
+ * carried here.
*/
- attr.type = PERF_TYPE_RAW;
- attr.size = sizeof(attr);
- attr.pinned = 1;
- attr.disabled = 0;
- attr.exclude_user = 0;
- attr.exclude_kernel = 1;
- attr.exclude_hv = 1;
- attr.exclude_host = 1;
- attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
- attr.sample_period = GENMASK(63, 0);
-
- event = perf_event_create_kernel_counter(&attr, -1, current,
- kvm_pmu_perf_overflow, &attr);
-
- if (IS_ERR(event)) {
- pr_err_once("kvm: pmu event creation failed %ld\n",
- PTR_ERR(event));
- return NULL;
- }
+ cpu = raw_smp_processor_id();
+ list_for_each_entry(entry, &arm_pmus, entry) {
+ tmp = entry->arm_pmu;
- if (event->pmu) {
- pmu = to_arm_pmu(event->pmu);
- if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI ||
- pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
- pmu = NULL;
+ if (cpumask_test_cpu(cpu, &tmp->supported_cpus)) {
+ pmu = tmp;
+ break;
+ }
}
- perf_event_disable(event);
- perf_event_release_kernel(event);
+ mutex_unlock(&arm_pmus_lock);
return pmu;
}
@@ -912,7 +908,17 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
return -EBUSY;
if (!kvm->arch.arm_pmu) {
- /* No PMU set, get the default one */
+ /*
+ * No PMU set, get the default one.
+ *
+ * The observant among you will notice that the supported_cpus
+ * mask does not get updated for the default PMU even though it
+ * is quite possible the selected instance supports only a
+ * subset of cores in the system. This is intentional, and
+ * upholds the preexisting behavior on heterogeneous systems
+ * where vCPUs can be scheduled on any core but the guest
+ * counters could stop working.
+ */
kvm->arch.arm_pmu = kvm_pmu_probe_armpmu();
if (!kvm->arch.arm_pmu)
return -ENODEV;
diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c
index 7887133d15f0..121f1a14c829 100644
--- a/arch/arm64/kvm/pmu.c
+++ b/arch/arm64/kvm/pmu.c
@@ -209,3 +209,30 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu)
kvm_vcpu_pmu_enable_el0(events_host);
kvm_vcpu_pmu_disable_el0(events_guest);
}
+
+/*
+ * With VHE, keep track of the PMUSERENR_EL0 value for the host EL0 on the pCPU
+ * where PMUSERENR_EL0 for the guest is loaded, since PMUSERENR_EL0 is switched
+ * to the value for the guest on vcpu_load(). The value for the host EL0
+ * will be restored on vcpu_put(), before returning to userspace.
+ * This isn't necessary for nVHE, as the register is context switched for
+ * every guest enter/exit.
+ *
+ * Return true if KVM takes care of the register. Otherwise return false.
+ */
+bool kvm_set_pmuserenr(u64 val)
+{
+ struct kvm_cpu_context *hctxt;
+ struct kvm_vcpu *vcpu;
+
+ if (!kvm_arm_support_pmu_v3() || !has_vhe())
+ return false;
+
+ vcpu = kvm_get_running_vcpu();
+ if (!vcpu || !vcpu_get_flag(vcpu, PMUSERENR_ON_CPU))
+ return false;
+
+ hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val;
+ return true;
+}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 71b12094d613..753aa7418149 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -211,6 +211,19 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
return true;
}
+static bool access_dcgsw(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (!kvm_has_mte(vcpu->kvm)) {
+ kvm_inject_undefined(vcpu);
+ return false;
+ }
+
+ /* Treat MTE S/W ops as we treat the classic ones: with contempt */
+ return access_dcsw(vcpu, p, r);
+}
+
static void get_access_mask(const struct sys_reg_desc *r, u64 *mask, u64 *shift)
{
switch (r->aarch32_map) {
@@ -1756,8 +1769,14 @@ static bool access_spsr(struct kvm_vcpu *vcpu,
*/
static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_DC_ISW), access_dcsw },
+ { SYS_DESC(SYS_DC_IGSW), access_dcgsw },
+ { SYS_DESC(SYS_DC_IGDSW), access_dcgsw },
{ SYS_DESC(SYS_DC_CSW), access_dcsw },
+ { SYS_DESC(SYS_DC_CGSW), access_dcgsw },
+ { SYS_DESC(SYS_DC_CGDSW), access_dcgsw },
{ SYS_DESC(SYS_DC_CISW), access_dcsw },
+ { SYS_DESC(SYS_DC_CIGSW), access_dcgsw },
+ { SYS_DESC(SYS_DC_CIGDSW), access_dcgsw },
DBG_BCR_BVR_WCR_WVR_EL1(0),
DBG_BCR_BVR_WCR_WVR_EL1(1),
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 9d42c7cb2b58..c8c3cb812783 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -235,9 +235,9 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
* KVM io device for the redistributor that belongs to this VCPU.
*/
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
- mutex_lock(&vcpu->kvm->arch.config_lock);
+ mutex_lock(&vcpu->kvm->slots_lock);
ret = vgic_register_redist_iodev(vcpu);
- mutex_unlock(&vcpu->kvm->arch.config_lock);
+ mutex_unlock(&vcpu->kvm->slots_lock);
}
return ret;
}
@@ -406,7 +406,7 @@ void kvm_vgic_destroy(struct kvm *kvm)
/**
* vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest
- * is a GICv2. A GICv3 must be explicitly initialized by the guest using the
+ * is a GICv2. A GICv3 must be explicitly initialized by userspace using the
* KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEVICE group.
* @kvm: kvm struct pointer
*/
@@ -446,11 +446,14 @@ int vgic_lazy_init(struct kvm *kvm)
int kvm_vgic_map_resources(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
+ enum vgic_type type;
+ gpa_t dist_base;
int ret = 0;
if (likely(vgic_ready(kvm)))
return 0;
+ mutex_lock(&kvm->slots_lock);
mutex_lock(&kvm->arch.config_lock);
if (vgic_ready(kvm))
goto out;
@@ -458,18 +461,33 @@ int kvm_vgic_map_resources(struct kvm *kvm)
if (!irqchip_in_kernel(kvm))
goto out;
- if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
+ if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) {
ret = vgic_v2_map_resources(kvm);
- else
+ type = VGIC_V2;
+ } else {
ret = vgic_v3_map_resources(kvm);
+ type = VGIC_V3;
+ }
- if (ret)
+ if (ret) {
__kvm_vgic_destroy(kvm);
- else
- dist->ready = true;
+ goto out;
+ }
+ dist->ready = true;
+ dist_base = dist->vgic_dist_base;
+ mutex_unlock(&kvm->arch.config_lock);
+
+ ret = vgic_register_dist_iodev(kvm, dist_base, type);
+ if (ret) {
+ kvm_err("Unable to register VGIC dist MMIO regions\n");
+ kvm_vgic_destroy(kvm);
+ }
+ mutex_unlock(&kvm->slots_lock);
+ return ret;
out:
mutex_unlock(&kvm->arch.config_lock);
+ mutex_unlock(&kvm->slots_lock);
return ret;
}
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 750e51e3779a..5fe2365a629f 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -1936,6 +1936,7 @@ void vgic_lpi_translation_cache_destroy(struct kvm *kvm)
static int vgic_its_create(struct kvm_device *dev, u32 type)
{
+ int ret;
struct vgic_its *its;
if (type != KVM_DEV_TYPE_ARM_VGIC_ITS)
@@ -1945,9 +1946,12 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
if (!its)
return -ENOMEM;
+ mutex_lock(&dev->kvm->arch.config_lock);
+
if (vgic_initialized(dev->kvm)) {
- int ret = vgic_v4_init(dev->kvm);
+ ret = vgic_v4_init(dev->kvm);
if (ret < 0) {
+ mutex_unlock(&dev->kvm->arch.config_lock);
kfree(its);
return ret;
}
@@ -1960,12 +1964,10 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
/* Yep, even more trickery for lock ordering... */
#ifdef CONFIG_LOCKDEP
- mutex_lock(&dev->kvm->arch.config_lock);
mutex_lock(&its->cmd_lock);
mutex_lock(&its->its_lock);
mutex_unlock(&its->its_lock);
mutex_unlock(&its->cmd_lock);
- mutex_unlock(&dev->kvm->arch.config_lock);
#endif
its->vgic_its_base = VGIC_ADDR_UNDEF;
@@ -1986,7 +1988,11 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
dev->private = its;
- return vgic_its_set_abi(its, NR_ITS_ABIS - 1);
+ ret = vgic_its_set_abi(its, NR_ITS_ABIS - 1);
+
+ mutex_unlock(&dev->kvm->arch.config_lock);
+
+ return ret;
}
static void vgic_its_destroy(struct kvm_device *kvm_dev)
diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c
index 35cfa268fd5d..212b73a715c1 100644
--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
+++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
@@ -102,7 +102,11 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri
if (get_user(addr, uaddr))
return -EFAULT;
- mutex_lock(&kvm->arch.config_lock);
+ /*
+ * Since we can't hold config_lock while registering the redistributor
+ * iodevs, take the slots_lock immediately.
+ */
+ mutex_lock(&kvm->slots_lock);
switch (attr->attr) {
case KVM_VGIC_V2_ADDR_TYPE_DIST:
r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
@@ -182,6 +186,7 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri
if (r)
goto out;
+ mutex_lock(&kvm->arch.config_lock);
if (write) {
r = vgic_check_iorange(kvm, *addr_ptr, addr, alignment, size);
if (!r)
@@ -189,9 +194,10 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri
} else {
addr = *addr_ptr;
}
+ mutex_unlock(&kvm->arch.config_lock);
out:
- mutex_unlock(&kvm->arch.config_lock);
+ mutex_unlock(&kvm->slots_lock);
if (!r && !write)
r = put_user(addr, uaddr);
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index 472b18ac92a2..188d2187eede 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -769,10 +769,13 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
struct vgic_redist_region *rdreg;
gpa_t rd_base;
- int ret;
+ int ret = 0;
+
+ lockdep_assert_held(&kvm->slots_lock);
+ mutex_lock(&kvm->arch.config_lock);
if (!IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr))
- return 0;
+ goto out_unlock;
/*
* We may be creating VCPUs before having set the base address for the
@@ -782,10 +785,12 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
*/
rdreg = vgic_v3_rdist_free_slot(&vgic->rd_regions);
if (!rdreg)
- return 0;
+ goto out_unlock;
- if (!vgic_v3_check_base(kvm))
- return -EINVAL;
+ if (!vgic_v3_check_base(kvm)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
vgic_cpu->rdreg = rdreg;
vgic_cpu->rdreg_index = rdreg->free_index;
@@ -799,16 +804,20 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rd_registers);
rd_dev->redist_vcpu = vcpu;
- mutex_lock(&kvm->slots_lock);
+ mutex_unlock(&kvm->arch.config_lock);
+
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base,
2 * SZ_64K, &rd_dev->dev);
- mutex_unlock(&kvm->slots_lock);
-
if (ret)
return ret;
+ /* Protected by slots_lock */
rdreg->free_index++;
return 0;
+
+out_unlock:
+ mutex_unlock(&kvm->arch.config_lock);
+ return ret;
}
static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
@@ -834,12 +843,10 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm)
/* The current c failed, so iterate over the previous ones. */
int i;
- mutex_lock(&kvm->slots_lock);
for (i = 0; i < c; i++) {
vcpu = kvm_get_vcpu(kvm, i);
vgic_unregister_redist_iodev(vcpu);
}
- mutex_unlock(&kvm->slots_lock);
}
return ret;
@@ -938,7 +945,9 @@ int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count)
{
int ret;
+ mutex_lock(&kvm->arch.config_lock);
ret = vgic_v3_alloc_redist_region(kvm, index, addr, count);
+ mutex_unlock(&kvm->arch.config_lock);
if (ret)
return ret;
@@ -950,8 +959,10 @@ int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count)
if (ret) {
struct vgic_redist_region *rdreg;
+ mutex_lock(&kvm->arch.config_lock);
rdreg = vgic_v3_rdist_region_from_index(kvm, index);
vgic_v3_free_redist_region(rdreg);
+ mutex_unlock(&kvm->arch.config_lock);
return ret;
}
diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c
index 1939c94e0b24..ff558c05e990 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio.c
@@ -1096,7 +1096,6 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
enum vgic_type type)
{
struct vgic_io_device *io_device = &kvm->arch.vgic.dist_iodev;
- int ret = 0;
unsigned int len;
switch (type) {
@@ -1114,10 +1113,6 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
io_device->iodev_type = IODEV_DIST;
io_device->redist_vcpu = NULL;
- mutex_lock(&kvm->slots_lock);
- ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address,
- len, &io_device->dev);
- mutex_unlock(&kvm->slots_lock);
-
- return ret;
+ return kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address,
+ len, &io_device->dev);
}
diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c
index 645648349c99..7e9cdb78f7ce 100644
--- a/arch/arm64/kvm/vgic/vgic-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-v2.c
@@ -312,12 +312,6 @@ int vgic_v2_map_resources(struct kvm *kvm)
return ret;
}
- ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V2);
- if (ret) {
- kvm_err("Unable to register VGIC MMIO regions\n");
- return ret;
- }
-
if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) {
ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base,
kvm_vgic_global_state.vcpu_base,
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 469d816f356f..c3b8e132d599 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -539,7 +539,6 @@ int vgic_v3_map_resources(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
- int ret = 0;
unsigned long c;
kvm_for_each_vcpu(c, vcpu, kvm) {
@@ -569,12 +568,6 @@ int vgic_v3_map_resources(struct kvm *kvm)
return -EBUSY;
}
- ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V3);
- if (ret) {
- kvm_err("Unable to register VGICv3 dist MMIO regions\n");
- return ret;
- }
-
if (kvm_vgic_global_state.has_gicv4_1)
vgic_v4_configure_vsgis(kvm);
@@ -616,6 +609,10 @@ static const struct midr_range broken_seis[] = {
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX),
{},
};
diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index 3bb003478060..c1c28fe680ba 100644
--- a/arch/arm64/kvm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
@@ -184,13 +184,14 @@ static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu)
}
}
-/* Must be called with the kvm lock held */
void vgic_v4_configure_vsgis(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
unsigned long i;
+ lockdep_assert_held(&kvm->arch.config_lock);
+
kvm_arm_halt_guest(kvm);
kvm_for_each_vcpu(i, vcpu, kvm) {
diff --git a/arch/arm64/kvm/vmid.c b/arch/arm64/kvm/vmid.c
index 08978d0672e7..7fe8ba1a2851 100644
--- a/arch/arm64/kvm/vmid.c
+++ b/arch/arm64/kvm/vmid.c
@@ -47,7 +47,7 @@ static void flush_context(void)
int cpu;
u64 vmid;
- bitmap_clear(vmid_map, 0, NUM_USER_VMIDS);
+ bitmap_zero(vmid_map, NUM_USER_VMIDS);
for_each_possible_cpu(cpu) {
vmid = atomic64_xchg_relaxed(&per_cpu(active_vmids, cpu), 0);
@@ -182,8 +182,7 @@ int __init kvm_arm_vmid_alloc_init(void)
*/
WARN_ON(NUM_USER_VMIDS - 1 <= num_possible_cpus());
atomic64_set(&vmid_generation, VMID_FIRST_VERSION);
- vmid_map = kcalloc(BITS_TO_LONGS(NUM_USER_VMIDS),
- sizeof(*vmid_map), GFP_KERNEL);
+ vmid_map = bitmap_zalloc(NUM_USER_VMIDS, GFP_KERNEL);
if (!vmid_map)
return -ENOMEM;
@@ -192,5 +191,5 @@ int __init kvm_arm_vmid_alloc_init(void)
void __init kvm_arm_vmid_alloc_free(void)
{
- kfree(vmid_map);
+ bitmap_free(vmid_map);
}
diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c
index 4aadcfb01754..a7bb20055ce0 100644
--- a/arch/arm64/mm/copypage.c
+++ b/arch/arm64/mm/copypage.c
@@ -21,9 +21,10 @@ void copy_highpage(struct page *to, struct page *from)
copy_page(kto, kfrom);
+ if (kasan_hw_tags_enabled())
+ page_kasan_tag_reset(to);
+
if (system_supports_mte() && page_mte_tagged(from)) {
- if (kasan_hw_tags_enabled())
- page_kasan_tag_reset(to);
/* It's a new page, shouldn't have been tagged yet */
WARN_ON_ONCE(!try_page_mte_tagging(to));
mte_copy_page_tags(kto, kfrom);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 9e0db5c387e3..6045a5117ac1 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -480,8 +480,8 @@ static void do_bad_area(unsigned long far, unsigned long esr,
}
}
-#define VM_FAULT_BADMAP 0x010000
-#define VM_FAULT_BADACCESS 0x020000
+#define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000)
+#define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000)
static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
unsigned int mm_flags, unsigned long vm_flags,
@@ -600,8 +600,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
vma_end_read(vma);
goto lock_mmap;
}
- fault = handle_mm_fault(vma, addr & PAGE_MASK,
- mm_flags | FAULT_FLAG_VMA_LOCK, regs);
+ fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs);
vma_end_read(vma);
if (!(fault & VM_FAULT_RETRY)) {
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index b3323ab5b78d..35e8a52fea11 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -1496,7 +1496,7 @@ __BUILD_CSR_OP(tlbidx)
#define write_fcsr(dest, val) \
do { \
__asm__ __volatile__( \
- " movgr2fcsr %0, "__stringify(dest)" \n" \
+ " movgr2fcsr "__stringify(dest)", %0 \n" \
: : "r" (val)); \
} while (0)
diff --git a/arch/loongarch/include/asm/pgtable-bits.h b/arch/loongarch/include/asm/pgtable-bits.h
index 8b98d22a145b..de46a6b1e9f1 100644
--- a/arch/loongarch/include/asm/pgtable-bits.h
+++ b/arch/loongarch/include/asm/pgtable-bits.h
@@ -22,12 +22,14 @@
#define _PAGE_PFN_SHIFT 12
#define _PAGE_SWP_EXCLUSIVE_SHIFT 23
#define _PAGE_PFN_END_SHIFT 48
+#define _PAGE_PRESENT_INVALID_SHIFT 60
#define _PAGE_NO_READ_SHIFT 61
#define _PAGE_NO_EXEC_SHIFT 62
#define _PAGE_RPLV_SHIFT 63
/* Used by software */
#define _PAGE_PRESENT (_ULCAST_(1) << _PAGE_PRESENT_SHIFT)
+#define _PAGE_PRESENT_INVALID (_ULCAST_(1) << _PAGE_PRESENT_INVALID_SHIFT)
#define _PAGE_WRITE (_ULCAST_(1) << _PAGE_WRITE_SHIFT)
#define _PAGE_ACCESSED (_ULCAST_(1) << _PAGE_ACCESSED_SHIFT)
#define _PAGE_MODIFIED (_ULCAST_(1) << _PAGE_MODIFIED_SHIFT)
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index d28fb9dbec59..9a9f9ff9b709 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -213,7 +213,7 @@ static inline int pmd_bad(pmd_t pmd)
static inline int pmd_present(pmd_t pmd)
{
if (unlikely(pmd_val(pmd) & _PAGE_HUGE))
- return !!(pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE));
+ return !!(pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PRESENT_INVALID));
return pmd_val(pmd) != (unsigned long)invalid_pte_table;
}
@@ -558,6 +558,7 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
static inline pmd_t pmd_mkinvalid(pmd_t pmd)
{
+ pmd_val(pmd) |= _PAGE_PRESENT_INVALID;
pmd_val(pmd) &= ~(_PAGE_PRESENT | _PAGE_VALID | _PAGE_DIRTY | _PAGE_PROTNONE);
return pmd;
diff --git a/arch/loongarch/kernel/hw_breakpoint.c b/arch/loongarch/kernel/hw_breakpoint.c
index 2406c95b34cc..021b59c248fa 100644
--- a/arch/loongarch/kernel/hw_breakpoint.c
+++ b/arch/loongarch/kernel/hw_breakpoint.c
@@ -396,6 +396,8 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
if (hw->ctrl.type != LOONGARCH_BREAKPOINT_EXECUTE)
alignment_mask = 0x7;
+ else
+ alignment_mask = 0x3;
offset = hw->address & alignment_mask;
hw->address &= ~alignment_mask;
diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c
index ff28f99b47d7..0491bf453cd4 100644
--- a/arch/loongarch/kernel/perf_event.c
+++ b/arch/loongarch/kernel/perf_event.c
@@ -271,7 +271,7 @@ static void loongarch_pmu_enable_event(struct hw_perf_event *evt, int idx)
WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters);
/* Make sure interrupt enabled. */
- cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) |
+ cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base) |
(evt->config_base & M_PERFCTL_CONFIG_MASK) | CSR_PERFCTRL_IE;
cpu = (event->cpu >= 0) ? event->cpu : smp_processor_id();
@@ -594,7 +594,7 @@ static struct pmu pmu = {
static unsigned int loongarch_pmu_perf_event_encode(const struct loongarch_perf_event *pev)
{
- return (pev->event_id & 0xff);
+ return M_PERFCTL_EVENT(pev->event_id);
}
static const struct loongarch_perf_event *loongarch_pmu_map_general_event(int idx)
@@ -849,7 +849,7 @@ static void resume_local_counters(void)
static const struct loongarch_perf_event *loongarch_pmu_map_raw_event(u64 config)
{
- raw_event.event_id = config & 0xff;
+ raw_event.event_id = M_PERFCTL_EVENT(config);
return &raw_event;
}
diff --git a/arch/loongarch/kernel/unaligned.c b/arch/loongarch/kernel/unaligned.c
index bdff825d29ef..85fae3d2d71a 100644
--- a/arch/loongarch/kernel/unaligned.c
+++ b/arch/loongarch/kernel/unaligned.c
@@ -485,7 +485,7 @@ static int __init debugfs_unaligned(void)
struct dentry *d;
d = debugfs_create_dir("loongarch", NULL);
- if (!d)
+ if (IS_ERR_OR_NULL(d))
return -ENOMEM;
debugfs_create_u32("unaligned_instructions_user",
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index b9f6908a31bc..ba468b5f3f0b 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -858,11 +858,17 @@ static inline int rt_setup_ucontext(struct ucontext __user *uc, struct pt_regs *
}
static inline void __user *
-get_sigframe(struct ksignal *ksig, size_t frame_size)
+get_sigframe(struct ksignal *ksig, struct pt_regs *tregs, size_t frame_size)
{
unsigned long usp = sigsp(rdusp(), ksig);
+ unsigned long gap = 0;
- return (void __user *)((usp - frame_size) & -8UL);
+ if (CPU_IS_020_OR_030 && tregs->format == 0xb) {
+ /* USP is unreliable so use worst-case value */
+ gap = 256;
+ }
+
+ return (void __user *)((usp - gap - frame_size) & -8UL);
}
static int setup_frame(struct ksignal *ksig, sigset_t *set,
@@ -880,7 +886,7 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
return -EFAULT;
}
- frame = get_sigframe(ksig, sizeof(*frame) + fsize);
+ frame = get_sigframe(ksig, tregs, sizeof(*frame) + fsize);
if (fsize)
err |= copy_to_user (frame + 1, regs + 1, fsize);
@@ -952,7 +958,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
return -EFAULT;
}
- frame = get_sigframe(ksig, sizeof(*frame));
+ frame = get_sigframe(ksig, tregs, sizeof(*frame));
if (fsize)
err |= copy_to_user (&frame->uc.uc_extra, regs + 1, fsize);
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index c2f5498d207f..675a8660cb85 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -79,6 +79,7 @@ config MIPS
select HAVE_LD_DEAD_CODE_DATA_ELIMINATION
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI
+ select HAVE_PATA_PLATFORM
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
diff --git a/arch/mips/alchemy/common/dbdma.c b/arch/mips/alchemy/common/dbdma.c
index 5ab043000409..6a3c890f7bbf 100644
--- a/arch/mips/alchemy/common/dbdma.c
+++ b/arch/mips/alchemy/common/dbdma.c
@@ -30,6 +30,7 @@
*
*/
+#include <linux/dma-map-ops.h> /* for dma_default_coherent */
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -623,17 +624,18 @@ u32 au1xxx_dbdma_put_source(u32 chanid, dma_addr_t buf, int nbytes, u32 flags)
dp->dscr_cmd0 &= ~DSCR_CMD0_IE;
/*
- * There is an errata on the Au1200/Au1550 parts that could result
- * in "stale" data being DMA'ed. It has to do with the snoop logic on
- * the cache eviction buffer. DMA_NONCOHERENT is on by default for
- * these parts. If it is fixed in the future, these dma_cache_inv will
- * just be nothing more than empty macros. See io.h.
+ * There is an erratum on certain Au1200/Au1550 revisions that could
+ * result in "stale" data being DMA'ed. It has to do with the snoop
+ * logic on the cache eviction buffer. dma_default_coherent is set
+ * to false on these parts.
*/
- dma_cache_wback_inv((unsigned long)buf, nbytes);
+ if (!dma_default_coherent)
+ dma_cache_wback_inv(KSEG0ADDR(buf), nbytes);
dp->dscr_cmd0 |= DSCR_CMD0_V; /* Let it rip */
wmb(); /* drain writebuffer */
dma_cache_wback_inv((unsigned long)dp, sizeof(*dp));
ctp->chan_ptr->ddma_dbell = 0;
+ wmb(); /* force doorbell write out to dma engine */
/* Get next descriptor pointer. */
ctp->put_ptr = phys_to_virt(DSCR_GET_NXTPTR(dp->dscr_nxtptr));
@@ -685,17 +687,18 @@ u32 au1xxx_dbdma_put_dest(u32 chanid, dma_addr_t buf, int nbytes, u32 flags)
dp->dscr_source1, dp->dscr_dest0, dp->dscr_dest1);
#endif
/*
- * There is an errata on the Au1200/Au1550 parts that could result in
- * "stale" data being DMA'ed. It has to do with the snoop logic on the
- * cache eviction buffer. DMA_NONCOHERENT is on by default for these
- * parts. If it is fixed in the future, these dma_cache_inv will just
- * be nothing more than empty macros. See io.h.
+ * There is an erratum on certain Au1200/Au1550 revisions that could
+ * result in "stale" data being DMA'ed. It has to do with the snoop
+ * logic on the cache eviction buffer. dma_default_coherent is set
+ * to false on these parts.
*/
- dma_cache_inv((unsigned long)buf, nbytes);
+ if (!dma_default_coherent)
+ dma_cache_inv(KSEG0ADDR(buf), nbytes);
dp->dscr_cmd0 |= DSCR_CMD0_V; /* Let it rip */
wmb(); /* drain writebuffer */
dma_cache_wback_inv((unsigned long)dp, sizeof(*dp));
ctp->chan_ptr->ddma_dbell = 0;
+ wmb(); /* force doorbell write out to dma engine */
/* Get next descriptor pointer. */
ctp->put_ptr = phys_to_virt(DSCR_GET_NXTPTR(dp->dscr_nxtptr));
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 6d15a398d389..e79adcb128e6 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -1502,6 +1502,10 @@ static inline void cpu_probe_alchemy(struct cpuinfo_mips *c, unsigned int cpu)
break;
}
break;
+ case PRID_IMP_NETLOGIC_AU13XX:
+ c->cputype = CPU_ALCHEMY;
+ __cpu_name[cpu] = "Au1300";
+ break;
}
}
@@ -1863,6 +1867,7 @@ void cpu_probe(void)
cpu_probe_mips(c, cpu);
break;
case PRID_COMP_ALCHEMY:
+ case PRID_COMP_NETLOGIC:
cpu_probe_alchemy(c, cpu);
break;
case PRID_COMP_SIBYTE:
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index febdc5564638..c0e65135481b 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -158,10 +158,6 @@ static unsigned long __init init_initrd(void)
pr_err("initrd start must be page aligned\n");
goto disable;
}
- if (initrd_start < PAGE_OFFSET) {
- pr_err("initrd start < PAGE_OFFSET\n");
- goto disable;
- }
/*
* Sanitize initrd addresses. For example firmware
@@ -174,6 +170,11 @@ static unsigned long __init init_initrd(void)
initrd_end = (unsigned long)__va(end);
initrd_start = (unsigned long)__va(__pa(initrd_start));
+ if (initrd_start < PAGE_OFFSET) {
+ pr_err("initrd start < PAGE_OFFSET\n");
+ goto disable;
+ }
+
ROOT_DEV = Root_RAM0;
return PFN_UP(end);
disable:
diff --git a/arch/nios2/boot/dts/10m50_devboard.dts b/arch/nios2/boot/dts/10m50_devboard.dts
index 56339bef3247..0e7e5b0dd685 100644
--- a/arch/nios2/boot/dts/10m50_devboard.dts
+++ b/arch/nios2/boot/dts/10m50_devboard.dts
@@ -97,7 +97,7 @@
rx-fifo-depth = <8192>;
tx-fifo-depth = <8192>;
address-bits = <48>;
- max-frame-size = <1518>;
+ max-frame-size = <1500>;
local-mac-address = [00 00 00 00 00 00];
altr,has-supplementary-unicast;
altr,enable-sup-addr = <1>;
diff --git a/arch/nios2/boot/dts/3c120_devboard.dts b/arch/nios2/boot/dts/3c120_devboard.dts
index d10fb81686c7..3ee316906379 100644
--- a/arch/nios2/boot/dts/3c120_devboard.dts
+++ b/arch/nios2/boot/dts/3c120_devboard.dts
@@ -106,7 +106,7 @@
interrupt-names = "rx_irq", "tx_irq";
rx-fifo-depth = <8192>;
tx-fifo-depth = <8192>;
- max-frame-size = <1518>;
+ max-frame-size = <1500>;
local-mac-address = [ 00 00 00 00 00 00 ];
phy-mode = "rgmii-id";
phy-handle = <&phy0>;
diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
index ecd1657bb2ce..ce6bb8e74271 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -28,10 +28,10 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
extern pgd_t *pgd_alloc(struct mm_struct *mm);
-#define __pte_free_tlb(tlb, pte, addr) \
- do { \
- pgtable_pte_page_dtor(pte); \
- tlb_remove_page((tlb), (pte)); \
+#define __pte_free_tlb(tlb, pte, addr) \
+ do { \
+ pagetable_pte_dtor(page_ptdesc(pte)); \
+ tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \
} while (0)
#endif /* _ASM_NIOS2_PGALLOC_H */
diff --git a/arch/nios2/kernel/cpuinfo.c b/arch/nios2/kernel/cpuinfo.c
index 203870c4b86d..338849c430a5 100644
--- a/arch/nios2/kernel/cpuinfo.c
+++ b/arch/nios2/kernel/cpuinfo.c
@@ -47,7 +47,7 @@ void __init setup_cpuinfo(void)
str = of_get_property(cpu, "altr,implementation", &len);
if (str)
- strlcpy(cpuinfo.cpu_impl, str, sizeof(cpuinfo.cpu_impl));
+ strscpy(cpuinfo.cpu_impl, str, sizeof(cpuinfo.cpu_impl));
else
strcpy(cpuinfo.cpu_impl, "<unknown>");
diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c
index 40bc8fb75e0b..8582ed965844 100644
--- a/arch/nios2/kernel/setup.c
+++ b/arch/nios2/kernel/setup.c
@@ -121,7 +121,7 @@ asmlinkage void __init nios2_boot_init(unsigned r4, unsigned r5, unsigned r6,
dtb_passed = r6;
if (r7)
- strlcpy(cmdline_passed, (char *)r7, COMMAND_LINE_SIZE);
+ strscpy(cmdline_passed, (char *)r7, COMMAND_LINE_SIZE);
}
#endif
@@ -129,10 +129,10 @@ asmlinkage void __init nios2_boot_init(unsigned r4, unsigned r5, unsigned r6,
#ifndef CONFIG_CMDLINE_FORCE
if (cmdline_passed[0])
- strlcpy(boot_command_line, cmdline_passed, COMMAND_LINE_SIZE);
+ strscpy(boot_command_line, cmdline_passed, COMMAND_LINE_SIZE);
#ifdef CONFIG_NIOS2_CMDLINE_IGNORE_DTB
else
- strlcpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+ strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
#endif
#endif
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 466a25525364..967bde65dd0e 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -130,6 +130,10 @@ config PM
config STACKTRACE_SUPPORT
def_bool y
+config LOCKDEP_SUPPORT
+ bool
+ default y
+
config ISA_DMA_API
bool
diff --git a/arch/parisc/Kconfig.debug b/arch/parisc/Kconfig.debug
index f66554cd5c45..3a059cb5e112 100644
--- a/arch/parisc/Kconfig.debug
+++ b/arch/parisc/Kconfig.debug
@@ -1 +1,12 @@
# SPDX-License-Identifier: GPL-2.0
+#
+config LIGHTWEIGHT_SPINLOCK_CHECK
+ bool "Enable lightweight spinlock checks"
+ depends on SMP && !DEBUG_SPINLOCK
+ default y
+ help
+ Add checks with low performance impact to the spinlock functions
+ to catch memory overwrites at runtime. For more advanced
+ spinlock debugging you should choose the DEBUG_SPINLOCK option
+ which will detect unitialized spinlocks too.
+ If unsure say Y here.
diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h
index 0f0d4a496fef..75677b526b2b 100644
--- a/arch/parisc/include/asm/assembly.h
+++ b/arch/parisc/include/asm/assembly.h
@@ -90,10 +90,6 @@
#include <asm/asmregs.h>
#include <asm/psw.h>
- sp = 30
- gp = 27
- ipsw = 22
-
/*
* We provide two versions of each macro to convert from physical
* to virtual and vice versa. The "_r1" versions take one argument
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index 0bdee6724132..c8b6928cee1e 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -48,6 +48,10 @@ void flush_dcache_page(struct page *page);
#define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages)
#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages)
+#define flush_dcache_mmap_lock_irqsave(mapping, flags) \
+ xa_lock_irqsave(&mapping->i_pages, flags)
+#define flush_dcache_mmap_unlock_irqrestore(mapping, flags) \
+ xa_unlock_irqrestore(&mapping->i_pages, flags)
#define flush_icache_page(vma,page) do { \
flush_kernel_dcache_page_addr(page_address(page)); \
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index e715df5385d6..5656395c95ee 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -472,9 +472,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
#define pte_same(A,B) (pte_val(A) == pte_val(B))
-struct seq_file;
-extern void arch_report_meminfo(struct seq_file *m);
-
#endif /* !__ASSEMBLY__ */
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index a6e5d66a7656..edfcb9858bcb 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -7,10 +7,26 @@
#include <asm/processor.h>
#include <asm/spinlock_types.h>
+#define SPINLOCK_BREAK_INSN 0x0000c006 /* break 6,6 */
+
+static inline void arch_spin_val_check(int lock_val)
+{
+ if (IS_ENABLED(CONFIG_LIGHTWEIGHT_SPINLOCK_CHECK))
+ asm volatile( "andcm,= %0,%1,%%r0\n"
+ ".word %2\n"
+ : : "r" (lock_val), "r" (__ARCH_SPIN_LOCK_UNLOCKED_VAL),
+ "i" (SPINLOCK_BREAK_INSN));
+}
+
static inline int arch_spin_is_locked(arch_spinlock_t *x)
{
- volatile unsigned int *a = __ldcw_align(x);
- return READ_ONCE(*a) == 0;
+ volatile unsigned int *a;
+ int lock_val;
+
+ a = __ldcw_align(x);
+ lock_val = READ_ONCE(*a);
+ arch_spin_val_check(lock_val);
+ return (lock_val == 0);
}
static inline void arch_spin_lock(arch_spinlock_t *x)
@@ -18,9 +34,18 @@ static inline void arch_spin_lock(arch_spinlock_t *x)
volatile unsigned int *a;
a = __ldcw_align(x);
- while (__ldcw(a) == 0)
+ do {
+ int lock_val_old;
+
+ lock_val_old = __ldcw(a);
+ arch_spin_val_check(lock_val_old);
+ if (lock_val_old)
+ return; /* got lock */
+
+ /* wait until we should try to get lock again */
while (*a == 0)
continue;
+ } while (1);
}
static inline void arch_spin_unlock(arch_spinlock_t *x)
@@ -29,15 +54,19 @@ static inline void arch_spin_unlock(arch_spinlock_t *x)
a = __ldcw_align(x);
/* Release with ordered store. */
- __asm__ __volatile__("stw,ma %0,0(%1)" : : "r"(1), "r"(a) : "memory");
+ __asm__ __volatile__("stw,ma %0,0(%1)"
+ : : "r"(__ARCH_SPIN_LOCK_UNLOCKED_VAL), "r"(a) : "memory");
}
static inline int arch_spin_trylock(arch_spinlock_t *x)
{
volatile unsigned int *a;
+ int lock_val;
a = __ldcw_align(x);
- return __ldcw(a) != 0;
+ lock_val = __ldcw(a);
+ arch_spin_val_check(lock_val);
+ return lock_val != 0;
}
/*
diff --git a/arch/parisc/include/asm/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h
index ca39ee350c3f..d65934079ebd 100644
--- a/arch/parisc/include/asm/spinlock_types.h
+++ b/arch/parisc/include/asm/spinlock_types.h
@@ -2,13 +2,17 @@
#ifndef __ASM_SPINLOCK_TYPES_H
#define __ASM_SPINLOCK_TYPES_H
+#define __ARCH_SPIN_LOCK_UNLOCKED_VAL 0x1a46
+
typedef struct {
#ifdef CONFIG_PA20
volatile unsigned int slock;
-# define __ARCH_SPIN_LOCK_UNLOCKED { 1 }
+# define __ARCH_SPIN_LOCK_UNLOCKED { __ARCH_SPIN_LOCK_UNLOCKED_VAL }
#else
volatile unsigned int lock[4];
-# define __ARCH_SPIN_LOCK_UNLOCKED { { 1, 1, 1, 1 } }
+# define __ARCH_SPIN_LOCK_UNLOCKED \
+ { { __ARCH_SPIN_LOCK_UNLOCKED_VAL, __ARCH_SPIN_LOCK_UNLOCKED_VAL, \
+ __ARCH_SPIN_LOCK_UNLOCKED_VAL, __ARCH_SPIN_LOCK_UNLOCKED_VAL } }
#endif
} arch_spinlock_t;
diff --git a/arch/parisc/kernel/alternative.c b/arch/parisc/kernel/alternative.c
index 66f5672c70bd..25c4d6c3375d 100644
--- a/arch/parisc/kernel/alternative.c
+++ b/arch/parisc/kernel/alternative.c
@@ -25,7 +25,7 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
{
struct alt_instr *entry;
int index = 0, applied = 0;
- int num_cpus = num_online_cpus();
+ int num_cpus = num_present_cpus();
u16 cond_check;
cond_check = ALT_COND_ALWAYS |
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 1d3b8bc8a623..ca4a302d4365 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -399,6 +399,7 @@ void flush_dcache_page(struct page *page)
unsigned long offset;
unsigned long addr, old_addr = 0;
unsigned long count = 0;
+ unsigned long flags;
pgoff_t pgoff;
if (mapping && !mapping_mapped(mapping)) {
@@ -420,7 +421,7 @@ void flush_dcache_page(struct page *page)
* to flush one address here for them all to become coherent
* on machines that support equivalent aliasing
*/
- flush_dcache_mmap_lock(mapping);
+ flush_dcache_mmap_lock_irqsave(mapping, flags);
vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
addr = mpnt->vm_start + offset;
@@ -460,7 +461,7 @@ void flush_dcache_page(struct page *page)
}
WARN_ON(++count == 4096);
}
- flush_dcache_mmap_unlock(mapping);
+ flush_dcache_mmap_unlock_irqrestore(mapping, flags);
}
EXPORT_SYMBOL(flush_dcache_page);
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index ba87f791323b..71ed5391f29d 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -446,11 +446,27 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
enum dma_data_direction dir)
{
+ /*
+ * fdc: The data cache line is written back to memory, if and only if
+ * it is dirty, and then invalidated from the data cache.
+ */
flush_kernel_dcache_range((unsigned long)phys_to_virt(paddr), size);
}
void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
enum dma_data_direction dir)
{
- flush_kernel_dcache_range((unsigned long)phys_to_virt(paddr), size);
+ unsigned long addr = (unsigned long) phys_to_virt(paddr);
+
+ switch (dir) {
+ case DMA_TO_DEVICE:
+ case DMA_BIDIRECTIONAL:
+ flush_kernel_dcache_range(addr, size);
+ return;
+ case DMA_FROM_DEVICE:
+ purge_kernel_dcache_range_asm(addr, addr + size);
+ return;
+ default:
+ BUG();
+ }
}
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 97c6f875bd0e..24411ab79c30 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -122,13 +122,18 @@ void machine_power_off(void)
/* It seems we have no way to power the system off via
* software. The user has to press the button himself. */
- printk(KERN_EMERG "System shut down completed.\n"
- "Please power this system off now.");
+ printk("Power off or press RETURN to reboot.\n");
/* prevent soft lockup/stalled CPU messages for endless loop. */
rcu_sysrq_start();
lockup_detector_soft_poweroff();
- for (;;);
+ while (1) {
+ /* reboot if user presses RETURN key */
+ if (pdc_iodc_getc() == 13) {
+ printk("Rebooting...\n");
+ machine_restart(NULL);
+ }
+ }
}
void (*pm_power_off)(void);
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index f9696fbf646c..304eebd1c83e 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -47,6 +47,10 @@
#include <linux/kgdb.h>
#include <linux/kprobes.h>
+#if defined(CONFIG_LIGHTWEIGHT_SPINLOCK_CHECK)
+#include <asm/spinlock.h>
+#endif
+
#include "../math-emu/math-emu.h" /* for handle_fpe() */
static void parisc_show_stack(struct task_struct *task,
@@ -291,24 +295,30 @@ static void handle_break(struct pt_regs *regs)
}
#ifdef CONFIG_KPROBES
- if (unlikely(iir == PARISC_KPROBES_BREAK_INSN)) {
+ if (unlikely(iir == PARISC_KPROBES_BREAK_INSN && !user_mode(regs))) {
parisc_kprobe_break_handler(regs);
return;
}
- if (unlikely(iir == PARISC_KPROBES_BREAK_INSN2)) {
+ if (unlikely(iir == PARISC_KPROBES_BREAK_INSN2 && !user_mode(regs))) {
parisc_kprobe_ss_handler(regs);
return;
}
#endif
#ifdef CONFIG_KGDB
- if (unlikely(iir == PARISC_KGDB_COMPILED_BREAK_INSN ||
- iir == PARISC_KGDB_BREAK_INSN)) {
+ if (unlikely((iir == PARISC_KGDB_COMPILED_BREAK_INSN ||
+ iir == PARISC_KGDB_BREAK_INSN)) && !user_mode(regs)) {
kgdb_handle_exception(9, SIGTRAP, 0, regs);
return;
}
#endif
+#ifdef CONFIG_LIGHTWEIGHT_SPINLOCK_CHECK
+ if ((iir == SPINLOCK_BREAK_INSN) && !user_mode(regs)) {
+ die_if_kernel("Spinlock was trashed", regs, 1);
+ }
+#endif
+
if (unlikely(iir != GDB_BREAK_INSN))
parisc_printk_ratelimited(0, regs,
KERN_DEBUG "break %d,%d: pid=%d command='%s'\n",
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 539d1f03ff42..bff5820b7cda 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -906,11 +906,17 @@ config DATA_SHIFT
config ARCH_FORCE_MAX_ORDER
int "Order of maximal physically contiguous allocations"
+ range 7 8 if PPC64 && PPC_64K_PAGES
default "8" if PPC64 && PPC_64K_PAGES
+ range 12 12 if PPC64 && !PPC_64K_PAGES
default "12" if PPC64 && !PPC_64K_PAGES
+ range 8 10 if PPC32 && PPC_16K_PAGES
default "8" if PPC32 && PPC_16K_PAGES
+ range 6 10 if PPC32 && PPC_64K_PAGES
default "6" if PPC32 && PPC_64K_PAGES
+ range 4 10 if PPC32 && PPC_256K_PAGES
default "4" if PPC32 && PPC_256K_PAGES
+ range 10 10
default "10"
help
The kernel page allocator limits the size of maximal physically
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 85cde5bf04b7..771b79423bbc 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -34,8 +34,6 @@ endif
BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
-fno-strict-aliasing -O2 -msoft-float -mno-altivec -mno-vsx \
- $(call cc-option,-mno-prefixed) $(call cc-option,-mno-pcrel) \
- $(call cc-option,-mno-mma) \
$(call cc-option,-mno-spe) $(call cc-option,-mspe=no) \
-pipe -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
$(LINUXINCLUDE)
@@ -71,6 +69,10 @@ BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -nostdinc
BOOTARFLAGS := -crD
+BOOTCFLAGS += $(call cc-option,-mno-prefixed) \
+ $(call cc-option,-mno-pcrel) \
+ $(call cc-option,-mno-mma)
+
ifdef CONFIG_CC_IS_CLANG
BOOTCFLAGS += $(CLANG_FLAGS)
BOOTAFLAGS += $(CLANG_FLAGS)
diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig
index 7113f9355165..ad1872518992 100644
--- a/arch/powerpc/crypto/Kconfig
+++ b/arch/powerpc/crypto/Kconfig
@@ -96,7 +96,7 @@ config CRYPTO_AES_PPC_SPE
config CRYPTO_AES_GCM_P10
tristate "Stitched AES/GCM acceleration support on P10 or later CPU (PPC)"
- depends on PPC64 && CPU_LITTLE_ENDIAN
+ depends on PPC64 && CPU_LITTLE_ENDIAN && VSX
select CRYPTO_LIB_AES
select CRYPTO_ALGAPI
select CRYPTO_AEAD
diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile
index 05c7486f42c5..7b4f516abec1 100644
--- a/arch/powerpc/crypto/Makefile
+++ b/arch/powerpc/crypto/Makefile
@@ -22,15 +22,15 @@ sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o
sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o
crc32c-vpmsum-y := crc32c-vpmsum_asm.o crc32c-vpmsum_glue.o
crct10dif-vpmsum-y := crct10dif-vpmsum_asm.o crct10dif-vpmsum_glue.o
-aes-gcm-p10-crypto-y := aes-gcm-p10-glue.o aes-gcm-p10.o ghashp8-ppc.o aesp8-ppc.o
+aes-gcm-p10-crypto-y := aes-gcm-p10-glue.o aes-gcm-p10.o ghashp10-ppc.o aesp10-ppc.o
quiet_cmd_perl = PERL $@
cmd_perl = $(PERL) $< $(if $(CONFIG_CPU_LITTLE_ENDIAN), linux-ppc64le, linux-ppc64) > $@
-targets += aesp8-ppc.S ghashp8-ppc.S
+targets += aesp10-ppc.S ghashp10-ppc.S
-$(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE
+$(obj)/aesp10-ppc.S $(obj)/ghashp10-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE
$(call if_changed,perl)
-OBJECT_FILES_NON_STANDARD_aesp8-ppc.o := y
-OBJECT_FILES_NON_STANDARD_ghashp8-ppc.o := y
+OBJECT_FILES_NON_STANDARD_aesp10-ppc.o := y
+OBJECT_FILES_NON_STANDARD_ghashp10-ppc.o := y
diff --git a/arch/powerpc/crypto/aes-gcm-p10-glue.c b/arch/powerpc/crypto/aes-gcm-p10-glue.c
index bd3475f5348d..4b6e899895e7 100644
--- a/arch/powerpc/crypto/aes-gcm-p10-glue.c
+++ b/arch/powerpc/crypto/aes-gcm-p10-glue.c
@@ -30,15 +30,15 @@ MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("aes");
-asmlinkage int aes_p8_set_encrypt_key(const u8 *userKey, const int bits,
+asmlinkage int aes_p10_set_encrypt_key(const u8 *userKey, const int bits,
void *key);
-asmlinkage void aes_p8_encrypt(const u8 *in, u8 *out, const void *key);
+asmlinkage void aes_p10_encrypt(const u8 *in, u8 *out, const void *key);
asmlinkage void aes_p10_gcm_encrypt(u8 *in, u8 *out, size_t len,
void *rkey, u8 *iv, void *Xi);
asmlinkage void aes_p10_gcm_decrypt(u8 *in, u8 *out, size_t len,
void *rkey, u8 *iv, void *Xi);
asmlinkage void gcm_init_htable(unsigned char htable[256], unsigned char Xi[16]);
-asmlinkage void gcm_ghash_p8(unsigned char *Xi, unsigned char *Htable,
+asmlinkage void gcm_ghash_p10(unsigned char *Xi, unsigned char *Htable,
unsigned char *aad, unsigned int alen);
struct aes_key {
@@ -93,7 +93,7 @@ static void set_aad(struct gcm_ctx *gctx, struct Hash_ctx *hash,
gctx->aadLen = alen;
i = alen & ~0xf;
if (i) {
- gcm_ghash_p8(nXi, hash->Htable+32, aad, i);
+ gcm_ghash_p10(nXi, hash->Htable+32, aad, i);
aad += i;
alen -= i;
}
@@ -102,7 +102,7 @@ static void set_aad(struct gcm_ctx *gctx, struct Hash_ctx *hash,
nXi[i] ^= aad[i];
memset(gctx->aad_hash, 0, 16);
- gcm_ghash_p8(gctx->aad_hash, hash->Htable+32, nXi, 16);
+ gcm_ghash_p10(gctx->aad_hash, hash->Htable+32, nXi, 16);
} else {
memcpy(gctx->aad_hash, nXi, 16);
}
@@ -115,7 +115,7 @@ static void gcmp10_init(struct gcm_ctx *gctx, u8 *iv, unsigned char *rdkey,
{
__be32 counter = cpu_to_be32(1);
- aes_p8_encrypt(hash->H, hash->H, rdkey);
+ aes_p10_encrypt(hash->H, hash->H, rdkey);
set_subkey(hash->H);
gcm_init_htable(hash->Htable+32, hash->H);
@@ -126,7 +126,7 @@ static void gcmp10_init(struct gcm_ctx *gctx, u8 *iv, unsigned char *rdkey,
/*
* Encrypt counter vector as iv tag and increment counter.
*/
- aes_p8_encrypt(iv, gctx->ivtag, rdkey);
+ aes_p10_encrypt(iv, gctx->ivtag, rdkey);
counter = cpu_to_be32(2);
*((__be32 *)(iv+12)) = counter;
@@ -160,7 +160,7 @@ static void finish_tag(struct gcm_ctx *gctx, struct Hash_ctx *hash, int len)
/*
* hash (AAD len and len)
*/
- gcm_ghash_p8(hash->Htable, hash->Htable+32, aclen, 16);
+ gcm_ghash_p10(hash->Htable, hash->Htable+32, aclen, 16);
for (i = 0; i < 16; i++)
hash->Htable[i] ^= gctx->ivtag[i];
@@ -192,7 +192,7 @@ static int p10_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key,
int ret;
vsx_begin();
- ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
+ ret = aes_p10_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
vsx_end();
return ret ? -EINVAL : 0;
diff --git a/arch/powerpc/crypto/aesp8-ppc.pl b/arch/powerpc/crypto/aesp10-ppc.pl
index 1f22aec27d79..2c06ce2a2c7c 100644
--- a/arch/powerpc/crypto/aesp8-ppc.pl
+++ b/arch/powerpc/crypto/aesp10-ppc.pl
@@ -110,7 +110,7 @@ die "can't locate ppc-xlate.pl";
open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
$FRAME=8*$SIZE_T;
-$prefix="aes_p8";
+$prefix="aes_p10";
$sp="r1";
$vrsave="r12";
diff --git a/arch/powerpc/crypto/ghashp8-ppc.pl b/arch/powerpc/crypto/ghashp10-ppc.pl
index b56603b4a893..27a6b0bec645 100644
--- a/arch/powerpc/crypto/ghashp8-ppc.pl
+++ b/arch/powerpc/crypto/ghashp10-ppc.pl
@@ -64,7 +64,7 @@ $code=<<___;
.text
-.globl .gcm_init_p8
+.globl .gcm_init_p10
lis r0,0xfff0
li r8,0x10
mfspr $vrsave,256
@@ -110,7 +110,7 @@ $code=<<___;
.long 0
.byte 0,12,0x14,0,0,0,2,0
.long 0
-.size .gcm_init_p8,.-.gcm_init_p8
+.size .gcm_init_p10,.-.gcm_init_p10
.globl .gcm_init_htable
lis r0,0xfff0
@@ -237,7 +237,7 @@ $code=<<___;
.long 0
.size .gcm_init_htable,.-.gcm_init_htable
-.globl .gcm_gmult_p8
+.globl .gcm_gmult_p10
lis r0,0xfff8
li r8,0x10
mfspr $vrsave,256
@@ -283,9 +283,9 @@ $code=<<___;
.long 0
.byte 0,12,0x14,0,0,0,2,0
.long 0
-.size .gcm_gmult_p8,.-.gcm_gmult_p8
+.size .gcm_gmult_p10,.-.gcm_gmult_p10
-.globl .gcm_ghash_p8
+.globl .gcm_ghash_p10
lis r0,0xfff8
li r8,0x10
mfspr $vrsave,256
@@ -350,7 +350,7 @@ Loop:
.long 0
.byte 0,12,0x14,0,0,0,4,0
.long 0
-.size .gcm_ghash_p8,.-.gcm_ghash_p8
+.size .gcm_ghash_p10,.-.gcm_ghash_p10
.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 678b5bdc79b1..34e14dfd8e04 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -205,7 +205,6 @@ extern void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number, unsigned long pe_num);
extern int iommu_add_device(struct iommu_table_group *table_group,
struct device *dev);
-extern void iommu_del_device(struct device *dev);
extern long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction);
@@ -229,10 +228,6 @@ static inline int iommu_add_device(struct iommu_table_group *table_group,
{
return 0;
}
-
-static inline void iommu_del_device(struct device *dev)
-{
-}
#endif /* !CONFIG_IOMMU_API */
u64 dma_iommu_get_required_mask(struct device *dev);
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 9972626ddaf6..6a88bfdaa69b 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -165,9 +165,6 @@ static inline bool is_ioremap_addr(const void *x)
return addr >= IOREMAP_BASE && addr < IOREMAP_END;
}
-
-struct seq_file;
-void arch_report_meminfo(struct seq_file *m);
#endif /* CONFIG_PPC64 */
#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index 038ce8d9061d..8920862ffd79 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -144,7 +144,7 @@ static bool dma_iommu_bypass_supported(struct device *dev, u64 mask)
/* We support DMA to/from any memory page via the iommu */
int dma_iommu_dma_supported(struct device *dev, u64 mask)
{
- struct iommu_table *tbl = get_iommu_table_base(dev);
+ struct iommu_table *tbl;
if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
/*
@@ -162,6 +162,8 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask)
return 1;
}
+ tbl = get_iommu_table_base(dev);
+
if (!tbl) {
dev_err(dev, "Warning: IOMMU dma not supported: mask 0x%08llx, table unavailable\n", mask);
return 0;
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 0089dd49b4cb..67f0b01e6ff5 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -518,7 +518,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
/* Convert entry to a dma_addr_t */
entry += tbl->it_offset;
dma_addr = entry << tbl->it_page_shift;
- dma_addr |= (s->offset & ~IOMMU_PAGE_MASK(tbl));
+ dma_addr |= (vaddr & ~IOMMU_PAGE_MASK(tbl));
DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n",
npages, entry, dma_addr);
@@ -905,6 +905,7 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
unsigned int order;
unsigned int nio_pages, io_order;
struct page *page;
+ int tcesize = (1 << tbl->it_page_shift);
size = PAGE_ALIGN(size);
order = get_order(size);
@@ -931,7 +932,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
memset(ret, 0, size);
/* Set up tces to cover the allocated range */
- nio_pages = size >> tbl->it_page_shift;
+ nio_pages = IOMMU_PAGE_ALIGN(size, tbl) >> tbl->it_page_shift;
+
io_order = get_iommu_order(size, tbl);
mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
mask >> tbl->it_page_shift, io_order, 0);
@@ -939,7 +941,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
free_pages((unsigned long)ret, order);
return NULL;
}
- *dma_handle = mapping;
+
+ *dma_handle = mapping | ((u64)ret & (tcesize - 1));
return ret;
}
@@ -950,7 +953,7 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
unsigned int nio_pages;
size = PAGE_ALIGN(size);
- nio_pages = size >> tbl->it_page_shift;
+ nio_pages = IOMMU_PAGE_ALIGN(size, tbl) >> tbl->it_page_shift;
iommu_free(tbl, dma_handle, nio_pages);
size = PAGE_ALIGN(size);
free_pages((unsigned long)vaddr, get_order(size));
@@ -1168,23 +1171,6 @@ int iommu_add_device(struct iommu_table_group *table_group, struct device *dev)
}
EXPORT_SYMBOL_GPL(iommu_add_device);
-void iommu_del_device(struct device *dev)
-{
- /*
- * Some devices might not have IOMMU table and group
- * and we needn't detach them from the associated
- * IOMMU groups
- */
- if (!device_iommu_mapped(dev)) {
- pr_debug("iommu_tce: skipping device %s with no tbl\n",
- dev_name(dev));
- return;
- }
-
- iommu_group_remove_device(dev);
-}
-EXPORT_SYMBOL_GPL(iommu_del_device);
-
/*
* A simple iommu_table_group_ops which only allows reusing the existing
* iommu_table. This handles VFIO for POWER7 or the nested KVM.
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c
index 85bdd7d3652f..48e0eaf1ad61 100644
--- a/arch/powerpc/kernel/isa-bridge.c
+++ b/arch/powerpc/kernel/isa-bridge.c
@@ -93,11 +93,12 @@ static int process_ISA_OF_ranges(struct device_node *isa_node,
}
inval_range:
- if (!phb_io_base_phys) {
+ if (phb_io_base_phys) {
pr_err("no ISA IO ranges or unexpected isa range, mapping 64k\n");
remap_isa_base(phb_io_base_phys, 0x10000);
+ return 0;
}
- return 0;
+ return -EINVAL;
}
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 26245aaf12b8..2297aa764ecd 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -1040,8 +1040,8 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
pte_t entry, unsigned long address, int psize)
{
struct mm_struct *mm = vma->vm_mm;
- unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
- _PAGE_RW | _PAGE_EXEC);
+ unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_SOFT_DIRTY |
+ _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
unsigned long change = pte_val(entry) ^ pte_val(*ptep);
/*
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index ce804b7bf84e..0bd4866d9824 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -795,12 +795,20 @@ void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
goto out;
if (current->active_mm == mm) {
+ unsigned long flags;
+
WARN_ON_ONCE(current->mm != NULL);
- /* Is a kernel thread and is using mm as the lazy tlb */
+ /*
+ * It is a kernel thread and is using mm as the lazy tlb, so
+ * switch it to init_mm. This is not always called from IPI
+ * (e.g., flush_type_needed), so must disable irqs.
+ */
+ local_irq_save(flags);
mmgrab_lazy_tlb(&init_mm);
current->active_mm = &init_mm;
switch_mm_irqs_off(mm, &init_mm, current);
mmdrop_lazy_tlb(mm);
+ local_irq_restore(flags);
}
/*
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index e93aefcfb83f..37043dfc1add 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -101,6 +101,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
bpf_hdr = jit_data->header;
proglen = jit_data->proglen;
extra_pass = true;
+ /* During extra pass, ensure index is reset before repopulating extable entries */
+ cgctx.exentry_idx = 0;
goto skip_init_ctx;
}
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 0d9b7609c7d5..3e2e252016f7 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -265,6 +265,7 @@ config CPM2
config FSL_ULI1575
bool "ULI1575 PCIe south bridge support"
depends on FSL_SOC_BOOKE || PPC_86xx
+ depends on PCI
select FSL_PCI
select GENERIC_ISA_DMA
help
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 233a50e65fce..7725492097b6 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -865,28 +865,3 @@ void __init pnv_pci_init(void)
/* Configure IOMMU DMA hooks */
set_pci_dma_ops(&dma_iommu_ops);
}
-
-static int pnv_tce_iommu_bus_notifier(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- struct device *dev = data;
-
- switch (action) {
- case BUS_NOTIFY_DEL_DEVICE:
- iommu_del_device(dev);
- return 0;
- default:
- return 0;
- }
-}
-
-static struct notifier_block pnv_tce_iommu_bus_nb = {
- .notifier_call = pnv_tce_iommu_bus_notifier,
-};
-
-static int __init pnv_tce_iommu_bus_notifier_init(void)
-{
- bus_register_notifier(&pci_bus_type, &pnv_tce_iommu_bus_nb);
- return 0;
-}
-machine_subsys_initcall_sync(powernv, pnv_tce_iommu_bus_notifier_init);
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 7464fa6e4145..d59e8a98a200 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -91,19 +91,24 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node)
static void iommu_pseries_free_group(struct iommu_table_group *table_group,
const char *node_name)
{
- struct iommu_table *tbl;
-
if (!table_group)
return;
- tbl = table_group->tables[0];
#ifdef CONFIG_IOMMU_API
if (table_group->group) {
iommu_group_put(table_group->group);
BUG_ON(table_group->group);
}
#endif
- iommu_tce_table_put(tbl);
+
+ /* Default DMA window table is at index 0, while DDW at 1. SR-IOV
+ * adapters only have table on index 1.
+ */
+ if (table_group->tables[0])
+ iommu_tce_table_put(table_group->tables[0]);
+
+ if (table_group->tables[1])
+ iommu_tce_table_put(table_group->tables[1]);
kfree(table_group);
}
@@ -312,13 +317,22 @@ static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
{
u64 rc;
+ long rpages = npages;
+ unsigned long limit;
if (!firmware_has_feature(FW_FEATURE_STUFF_TCE))
return tce_free_pSeriesLP(tbl->it_index, tcenum,
tbl->it_page_shift, npages);
- rc = plpar_tce_stuff((u64)tbl->it_index,
- (u64)tcenum << tbl->it_page_shift, 0, npages);
+ do {
+ limit = min_t(unsigned long, rpages, 512);
+
+ rc = plpar_tce_stuff((u64)tbl->it_index,
+ (u64)tcenum << tbl->it_page_shift, 0, limit);
+
+ rpages -= limit;
+ tcenum += limit;
+ } while (rpages > 0 && !rc);
if (rc && printk_ratelimit()) {
printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n");
@@ -1695,31 +1709,6 @@ static int __init disable_multitce(char *str)
__setup("multitce=", disable_multitce);
-static int tce_iommu_bus_notifier(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- struct device *dev = data;
-
- switch (action) {
- case BUS_NOTIFY_DEL_DEVICE:
- iommu_del_device(dev);
- return 0;
- default:
- return 0;
- }
-}
-
-static struct notifier_block tce_iommu_bus_nb = {
- .notifier_call = tce_iommu_bus_notifier,
-};
-
-static int __init tce_iommu_bus_notifier_init(void)
-{
- bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
- return 0;
-}
-machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init);
-
#ifdef CONFIG_SPAPR_TCE_IOMMU
struct iommu_group *pSeries_pci_device_group(struct pci_controller *hose,
struct pci_dev *pdev)
diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile
index 6f5e2727963c..78473d69cd2b 100644
--- a/arch/powerpc/purgatory/Makefile
+++ b/arch/powerpc/purgatory/Makefile
@@ -5,6 +5,11 @@ KCSAN_SANITIZE := n
targets += trampoline_$(BITS).o purgatory.ro
+# When profile-guided optimization is enabled, llvm emits two different
+# overlapping text sections, which is not supported by kexec. Remove profile
+# optimization flags.
+KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS))
+
LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined
$(obj)/purgatory.ro: $(obj)/trampoline_$(BITS).o FORCE
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 728d3c257e4a..70c4c59a1a8f 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -88,7 +88,7 @@ static unsigned long ndump = 64;
static unsigned long nidump = 16;
static unsigned long ncsum = 4096;
static int termch;
-static char tmpstr[128];
+static char tmpstr[KSYM_NAME_LEN];
static int tracing_enabled;
static long bus_error_jmp[JMP_BUF_LEN];
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 348c0fa1fc8c..5966ad97c30c 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -26,6 +26,7 @@ config RISCV
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_HAS_KCOV
select ARCH_HAS_MMIOWB
+ select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PMEM_API
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SET_DIRECT_MAP if MMU
@@ -799,8 +800,11 @@ menu "Power management options"
source "kernel/power/Kconfig"
+# Hibernation is only possible on systems where the SBI implementation has
+# marked its reserved memory as not accessible from, or does not run
+# from the same memory as, Linux
config ARCH_HIBERNATION_POSSIBLE
- def_bool y
+ def_bool NONPORTABLE
config ARCH_HIBERNATION_HEADER
def_bool HIBERNATION
diff --git a/arch/riscv/errata/Makefile b/arch/riscv/errata/Makefile
index a1055965fbee..7b2637c8c332 100644
--- a/arch/riscv/errata/Makefile
+++ b/arch/riscv/errata/Makefile
@@ -1,2 +1,6 @@
+ifdef CONFIG_RELOCATABLE
+KBUILD_CFLAGS += -fno-pie
+endif
+
obj-$(CONFIG_ERRATA_SIFIVE) += sifive/
obj-$(CONFIG_ERRATA_THEAD) += thead/
diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h
index fe6f23006641..ce1ebda1a49a 100644
--- a/arch/riscv/include/asm/hugetlb.h
+++ b/arch/riscv/include/asm/hugetlb.h
@@ -36,6 +36,9 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t pte, int dirty);
+#define __HAVE_ARCH_HUGE_PTEP_GET
+pte_t huge_ptep_get(pte_t *ptep);
+
pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags);
#define arch_make_huge_pte arch_make_huge_pte
diff --git a/arch/riscv/include/asm/kfence.h b/arch/riscv/include/asm/kfence.h
index d887a54042aa..0bbffd528096 100644
--- a/arch/riscv/include/asm/kfence.h
+++ b/arch/riscv/include/asm/kfence.h
@@ -8,41 +8,8 @@
#include <asm-generic/pgalloc.h>
#include <asm/pgtable.h>
-static inline int split_pmd_page(unsigned long addr)
-{
- int i;
- unsigned long pfn = PFN_DOWN(__pa((addr & PMD_MASK)));
- pmd_t *pmd = pmd_off_k(addr);
- pte_t *pte = pte_alloc_one_kernel(&init_mm);
-
- if (!pte)
- return -ENOMEM;
-
- for (i = 0; i < PTRS_PER_PTE; i++)
- set_pte(pte + i, pfn_pte(pfn + i, PAGE_KERNEL));
- set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(pte)), PAGE_TABLE));
-
- flush_tlb_kernel_range(addr, addr + PMD_SIZE);
- return 0;
-}
-
static inline bool arch_kfence_init_pool(void)
{
- int ret;
- unsigned long addr;
- pmd_t *pmd;
-
- for (addr = (unsigned long)__kfence_pool; is_kfence_address((void *)addr);
- addr += PAGE_SIZE) {
- pmd = pmd_off_k(addr);
-
- if (pmd_leaf(*pmd)) {
- ret = split_pmd_page(addr);
- if (ret)
- return false;
- }
- }
-
return true;
}
diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h
index d42c901f9a97..665bbc9b2f84 100644
--- a/arch/riscv/include/asm/perf_event.h
+++ b/arch/riscv/include/asm/perf_event.h
@@ -10,4 +10,11 @@
#include <linux/perf_event.h>
#define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs
+
+#define perf_arch_fetch_caller_regs(regs, __ip) { \
+ (regs)->epc = (__ip); \
+ (regs)->s0 = (unsigned long) __builtin_frame_address(0); \
+ (regs)->sp = current_stack_pointer; \
+ (regs)->status = SR_PP; \
+}
#endif /* _ASM_RISCV_PERF_EVENT_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 2258b27173b0..75970ee2bda2 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -165,8 +165,7 @@ extern struct pt_alloc_ops pt_ops __initdata;
_PAGE_EXEC | _PAGE_WRITE)
#define PAGE_COPY PAGE_READ
-#define PAGE_COPY_EXEC PAGE_EXEC
-#define PAGE_COPY_READ_EXEC PAGE_READ_EXEC
+#define PAGE_COPY_EXEC PAGE_READ_EXEC
#define PAGE_SHARED PAGE_WRITE
#define PAGE_SHARED_EXEC PAGE_WRITE_EXEC
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index fbdccc21418a..153864e4f399 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -23,6 +23,10 @@ ifdef CONFIG_FTRACE
CFLAGS_REMOVE_alternative.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_cpufeature.o = $(CC_FLAGS_FTRACE)
endif
+ifdef CONFIG_RELOCATABLE
+CFLAGS_alternative.o += -fno-pie
+CFLAGS_cpufeature.o += -fno-pie
+endif
ifdef CONFIG_KASAN
KASAN_SANITIZE_alternative.o := n
KASAN_SANITIZE_cpufeature.o := n
diff --git a/arch/riscv/kernel/probes/Makefile b/arch/riscv/kernel/probes/Makefile
index c40139e9ca47..8265ff497977 100644
--- a/arch/riscv/kernel/probes/Makefile
+++ b/arch/riscv/kernel/probes/Makefile
@@ -4,3 +4,5 @@ obj-$(CONFIG_RETHOOK) += rethook.o rethook_trampoline.o
obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o
obj-$(CONFIG_UPROBES) += uprobes.o decode-insn.o simulate-insn.o
CFLAGS_REMOVE_simulate-insn.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_rethook.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_rethook_trampoline.o = $(CC_FLAGS_FTRACE)
diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c
index a163a3e0f0d4..e0ef56dc57b9 100644
--- a/arch/riscv/mm/hugetlbpage.c
+++ b/arch/riscv/mm/hugetlbpage.c
@@ -3,6 +3,30 @@
#include <linux/err.h>
#ifdef CONFIG_RISCV_ISA_SVNAPOT
+pte_t huge_ptep_get(pte_t *ptep)
+{
+ unsigned long pte_num;
+ int i;
+ pte_t orig_pte = ptep_get(ptep);
+
+ if (!pte_present(orig_pte) || !pte_napot(orig_pte))
+ return orig_pte;
+
+ pte_num = napot_pte_num(napot_cont_order(orig_pte));
+
+ for (i = 0; i < pte_num; i++, ptep++) {
+ pte_t pte = ptep_get(ptep);
+
+ if (pte_dirty(pte))
+ orig_pte = pte_mkdirty(orig_pte);
+
+ if (pte_young(pte))
+ orig_pte = pte_mkyoung(orig_pte);
+ }
+
+ return orig_pte;
+}
+
pte_t *huge_pte_alloc(struct mm_struct *mm,
struct vm_area_struct *vma,
unsigned long addr,
@@ -218,6 +242,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
{
pte_t pte = ptep_get(ptep);
unsigned long order;
+ pte_t orig_pte;
int i, pte_num;
if (!pte_napot(pte)) {
@@ -228,9 +253,12 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
order = napot_cont_order(pte);
pte_num = napot_pte_num(order);
ptep = huge_pte_offset(mm, addr, napot_cont_size(order));
+ orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num);
+
+ orig_pte = pte_wrprotect(orig_pte);
for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
- ptep_set_wrprotect(mm, addr, ptep);
+ set_pte_at(mm, addr, ptep, orig_pte);
}
pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 747e5b1ef02d..4fa420faa780 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -23,6 +23,7 @@
#ifdef CONFIG_RELOCATABLE
#include <linux/elf.h>
#endif
+#include <linux/kfence.h>
#include <asm/fixmap.h>
#include <asm/tlbflush.h>
@@ -293,7 +294,7 @@ static const pgprot_t protection_map[16] = {
[VM_EXEC] = PAGE_EXEC,
[VM_EXEC | VM_READ] = PAGE_READ_EXEC,
[VM_EXEC | VM_WRITE] = PAGE_COPY_EXEC,
- [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_READ_EXEC,
+ [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_EXEC,
[VM_SHARED] = PAGE_NONE,
[VM_SHARED | VM_READ] = PAGE_READ,
[VM_SHARED | VM_WRITE] = PAGE_SHARED,
@@ -659,18 +660,19 @@ void __init create_pgd_mapping(pgd_t *pgdp,
create_pgd_next_mapping(nextp, va, pa, sz, prot);
}
-static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
+static uintptr_t __init best_map_size(phys_addr_t pa, uintptr_t va,
+ phys_addr_t size)
{
- if (!(base & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE)
+ if (!(pa & (PGDIR_SIZE - 1)) && !(va & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE)
return PGDIR_SIZE;
- if (!(base & (P4D_SIZE - 1)) && size >= P4D_SIZE)
+ if (!(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE)
return P4D_SIZE;
- if (!(base & (PUD_SIZE - 1)) && size >= PUD_SIZE)
+ if (!(pa & (PUD_SIZE - 1)) && !(va & (PUD_SIZE - 1)) && size >= PUD_SIZE)
return PUD_SIZE;
- if (!(base & (PMD_SIZE - 1)) && size >= PMD_SIZE)
+ if (!(pa & (PMD_SIZE - 1)) && !(va & (PMD_SIZE - 1)) && size >= PMD_SIZE)
return PMD_SIZE;
return PAGE_SIZE;
@@ -922,9 +924,9 @@ static void __init create_kernel_page_table(pgd_t *pgdir, bool early)
static void __init create_fdt_early_page_table(uintptr_t fix_fdt_va,
uintptr_t dtb_pa)
{
+#ifndef CONFIG_BUILTIN_DTB
uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1);
-#ifndef CONFIG_BUILTIN_DTB
/* Make sure the fdt fixmap address is always aligned on PMD size */
BUILD_BUG_ON(FIX_FDT % (PMD_SIZE / PAGE_SIZE));
@@ -1167,14 +1169,16 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
}
static void __init create_linear_mapping_range(phys_addr_t start,
- phys_addr_t end)
+ phys_addr_t end,
+ uintptr_t fixed_map_size)
{
phys_addr_t pa;
uintptr_t va, map_size;
for (pa = start; pa < end; pa += map_size) {
va = (uintptr_t)__va(pa);
- map_size = best_map_size(pa, end - pa);
+ map_size = fixed_map_size ? fixed_map_size :
+ best_map_size(pa, va, end - pa);
create_pgd_mapping(swapper_pg_dir, va, pa, map_size,
pgprot_from_va(va));
@@ -1184,6 +1188,7 @@ static void __init create_linear_mapping_range(phys_addr_t start,
static void __init create_linear_mapping_page_table(void)
{
phys_addr_t start, end;
+ phys_addr_t kfence_pool __maybe_unused;
u64 i;
#ifdef CONFIG_STRICT_KERNEL_RWX
@@ -1197,6 +1202,19 @@ static void __init create_linear_mapping_page_table(void)
memblock_mark_nomap(krodata_start, krodata_size);
#endif
+#ifdef CONFIG_KFENCE
+ /*
+ * kfence pool must be backed by PAGE_SIZE mappings, so allocate it
+ * before we setup the linear mapping so that we avoid using hugepages
+ * for this region.
+ */
+ kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
+ BUG_ON(!kfence_pool);
+
+ memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE);
+ __kfence_pool = __va(kfence_pool);
+#endif
+
/* Map all memory banks in the linear mapping */
for_each_mem_range(i, &start, &end) {
if (start >= end)
@@ -1207,17 +1225,25 @@ static void __init create_linear_mapping_page_table(void)
if (end >= __pa(PAGE_OFFSET) + memory_limit)
end = __pa(PAGE_OFFSET) + memory_limit;
- create_linear_mapping_range(start, end);
+ create_linear_mapping_range(start, end, 0);
}
#ifdef CONFIG_STRICT_KERNEL_RWX
- create_linear_mapping_range(ktext_start, ktext_start + ktext_size);
+ create_linear_mapping_range(ktext_start, ktext_start + ktext_size, 0);
create_linear_mapping_range(krodata_start,
- krodata_start + krodata_size);
+ krodata_start + krodata_size, 0);
memblock_clear_nomap(ktext_start, ktext_size);
memblock_clear_nomap(krodata_start, krodata_size);
#endif
+
+#ifdef CONFIG_KFENCE
+ create_linear_mapping_range(kfence_pool,
+ kfence_pool + KFENCE_POOL_SIZE,
+ PAGE_SIZE);
+
+ memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE);
+#endif
}
static void __init setup_vm_final(void)
diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
index 5730797a6b40..bd2e27f82532 100644
--- a/arch/riscv/purgatory/Makefile
+++ b/arch/riscv/purgatory/Makefile
@@ -35,6 +35,11 @@ CFLAGS_sha256.o := -D__DISABLE_EXPORTS
CFLAGS_string.o := -D__DISABLE_EXPORTS
CFLAGS_ctype.o := -D__DISABLE_EXPORTS
+# When profile-guided optimization is enabled, llvm emits two different
+# overlapping text sections, which is not supported by kexec. Remove profile
+# optimization flags.
+KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS))
+
# When linking purgatory.ro with -r unresolved symbols are not checked,
# also link a purgatory.chk binary without -r to check for unresolved symbols.
PURGATORY_LDFLAGS := -e purgatory_start -z nodefaultlib
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index db20c1589a98..6dab9c1be508 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -469,19 +469,11 @@ config SCHED_SMT
config SCHED_MC
def_bool n
-config SCHED_BOOK
- def_bool n
-
-config SCHED_DRAWER
- def_bool n
-
config SCHED_TOPOLOGY
def_bool y
prompt "Topology scheduler support"
select SCHED_SMT
select SCHED_MC
- select SCHED_BOOK
- select SCHED_DRAWER
help
Topology scheduler support improves the CPU scheduler's decision
making when dealing with machines that have multi-threading,
@@ -716,7 +708,6 @@ config EADM_SCH
config VFIO_CCW
def_tristate n
prompt "Support for VFIO-CCW subchannels"
- depends on S390_CCW_IOMMU
depends on VFIO
select VFIO_MDEV
help
@@ -728,7 +719,7 @@ config VFIO_CCW
config VFIO_AP
def_tristate n
prompt "VFIO support for AP devices"
- depends on S390_AP_IOMMU && KVM
+ depends on KVM
depends on VFIO
depends on ZCRYPT
select VFIO_MDEV
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index acb1f8b53105..c67f59db7a51 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -45,6 +45,13 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat
static pte_t pte_z;
+static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode)
+{
+ start = PAGE_ALIGN_DOWN(__sha(start));
+ end = PAGE_ALIGN(__sha(end));
+ pgtable_populate(start, end, mode);
+}
+
static void kasan_populate_shadow(void)
{
pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY);
@@ -95,17 +102,17 @@ static void kasan_populate_shadow(void)
*/
for_each_physmem_usable_range(i, &start, &end)
- pgtable_populate(__sha(start), __sha(end), POPULATE_KASAN_MAP_SHADOW);
+ kasan_populate(start, end, POPULATE_KASAN_MAP_SHADOW);
if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
untracked_end = VMALLOC_START;
/* shallowly populate kasan shadow for vmalloc and modules */
- pgtable_populate(__sha(VMALLOC_START), __sha(MODULES_END), POPULATE_KASAN_SHALLOW);
+ kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW);
} else {
untracked_end = MODULES_VADDR;
}
/* populate kasan shadow for untracked memory */
- pgtable_populate(__sha(ident_map_size), __sha(untracked_end), POPULATE_KASAN_ZERO_SHADOW);
- pgtable_populate(__sha(MODULES_END), __sha(_REGION1_SIZE), POPULATE_KASAN_ZERO_SHADOW);
+ kasan_populate(ident_map_size, untracked_end, POPULATE_KASAN_ZERO_SHADOW);
+ kasan_populate(MODULES_END, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW);
}
static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr,
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 4ccf66d29fc2..aa95cf6dfabb 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -116,6 +116,7 @@ CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
+CONFIG_NET_TC_SKB_EXT=y
CONFIG_SMC=m
CONFIG_SMC_DIAG=m
CONFIG_INET=y
@@ -591,8 +592,6 @@ CONFIG_VIRTIO_BALLOON=m
CONFIG_VIRTIO_INPUT=y
CONFIG_VHOST_NET=m
CONFIG_VHOST_VSOCK=m
-CONFIG_S390_CCW_IOMMU=y
-CONFIG_S390_AP_IOMMU=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
@@ -703,6 +702,7 @@ CONFIG_IMA_DEFAULT_HASH_SHA256=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA_APPRAISE=y
CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
+CONFIG_INIT_STACK_NONE=y
CONFIG_CRYPTO_USER=m
# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
CONFIG_CRYPTO_PCRYPT=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 693297a2e897..f041945f9148 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -107,6 +107,7 @@ CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
+CONFIG_NET_TC_SKB_EXT=y
CONFIG_SMC=m
CONFIG_SMC_DIAG=m
CONFIG_INET=y
@@ -580,8 +581,6 @@ CONFIG_VIRTIO_BALLOON=m
CONFIG_VIRTIO_INPUT=y
CONFIG_VHOST_NET=m
CONFIG_VHOST_VSOCK=m
-CONFIG_S390_CCW_IOMMU=y
-CONFIG_S390_AP_IOMMU=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
@@ -686,6 +685,7 @@ CONFIG_IMA_DEFAULT_HASH_SHA256=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA_APPRAISE=y
CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
+CONFIG_INIT_STACK_NONE=y
CONFIG_CRYPTO_FIPS=y
CONFIG_CRYPTO_USER=m
# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index 33a232bb68af..6f68b39817ef 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -67,6 +67,7 @@ CONFIG_ZFCP=y
# CONFIG_MISC_FILESYSTEMS is not set
# CONFIG_NETWORK_FILESYSTEMS is not set
CONFIG_LSM="yama,loadpin,safesetid,integrity"
+CONFIG_INIT_STACK_NONE=y
# CONFIG_ZLIB_DFLTCC is not set
CONFIG_XZ_DEC_MICROLZMA=y
CONFIG_PRINTK_TIME=y
diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c
index 7752bd314558..5fae187f947a 100644
--- a/arch/s390/crypto/chacha-glue.c
+++ b/arch/s390/crypto/chacha-glue.c
@@ -82,7 +82,7 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
* it cannot handle a block of data or less, but otherwise
* it can handle data of arbitrary size
*/
- if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20)
+ if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20 || !MACHINE_HAS_VX)
chacha_crypt_generic(state, dst, src, bytes, nrounds);
else
chacha20_crypt_s390(state, dst, src, bytes,
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index a386070f1d56..3cb9d813f022 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -112,7 +112,7 @@ struct compat_statfs64 {
u32 f_namelen;
u32 f_frsize;
u32 f_flags;
- u32 f_spare[4];
+ u32 f_spare[5];
};
/*
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 6822a11c2c8a..c55f3c3365af 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -42,9 +42,6 @@ static inline void update_page_count(int level, long count)
atomic_long_add(count, &direct_pages_count[level]);
}
-struct seq_file;
-void arch_report_meminfo(struct seq_file *m);
-
/*
* The S390 doesn't have any external MMU info: the kernel page
* tables contain all the necessary information.
diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h
index 8e9c582592b3..9e41a74fce9a 100644
--- a/arch/s390/include/asm/physmem_info.h
+++ b/arch/s390/include/asm/physmem_info.h
@@ -3,6 +3,7 @@
#define _ASM_S390_MEM_DETECT_H
#include <linux/types.h>
+#include <asm/page.h>
enum physmem_info_source {
MEM_DETECT_NONE = 0,
@@ -133,7 +134,7 @@ static inline const char *get_rr_type_name(enum reserved_range_type t)
#define for_each_physmem_reserved_type_range(t, range, p_start, p_end) \
for (range = &physmem_info.reserved[t], *p_start = range->start, *p_end = range->end; \
- range && range->end; range = range->chain, \
+ range && range->end; range = range->chain ? __va(range->chain) : NULL, \
*p_start = range ? range->start : 0, *p_end = range ? range->end : 0)
static inline struct reserved_range *__physmem_reserved_next(enum reserved_range_type *t,
@@ -145,7 +146,7 @@ static inline struct reserved_range *__physmem_reserved_next(enum reserved_range
return range;
}
if (range->chain)
- return range->chain;
+ return __va(range->chain);
while (++*t < RR_MAX) {
range = &physmem_info.reserved[*t];
if (range->end)
diff --git a/arch/s390/include/uapi/asm/statfs.h b/arch/s390/include/uapi/asm/statfs.h
index 72604f7792c3..f85b50723dd3 100644
--- a/arch/s390/include/uapi/asm/statfs.h
+++ b/arch/s390/include/uapi/asm/statfs.h
@@ -30,7 +30,7 @@ struct statfs {
unsigned int f_namelen;
unsigned int f_frsize;
unsigned int f_flags;
- unsigned int f_spare[4];
+ unsigned int f_spare[5];
};
struct statfs64 {
@@ -45,7 +45,7 @@ struct statfs64 {
unsigned int f_namelen;
unsigned int f_frsize;
unsigned int f_flags;
- unsigned int f_spare[4];
+ unsigned int f_spare[5];
};
#endif
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 8983837b3565..6b2a051e1f8a 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -10,6 +10,7 @@ CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
# Do not trace early setup code
CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_rethook.o = $(CC_FLAGS_FTRACE)
endif
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 8a617be28bb4..7af69948b290 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -568,9 +568,9 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt)
int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
{
Elf64_Phdr *phdr_notes, *phdr_loads;
+ size_t alloc_size;
int mem_chunk_cnt;
void *ptr, *hdr;
- u32 alloc_size;
u64 hdr_off;
/* If we are not in kdump or zfcp/nvme dump mode return */
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 43de939b7af1..f44f70de9661 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -1935,14 +1935,13 @@ static struct shutdown_action __refdata dump_action = {
static void dump_reipl_run(struct shutdown_trigger *trigger)
{
- unsigned long ipib = (unsigned long) reipl_block_actual;
struct lowcore *abs_lc;
unsigned int csum;
csum = (__force unsigned int)
csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
abs_lc = get_abs_lowcore();
- abs_lc->ipib = ipib;
+ abs_lc->ipib = __pa(reipl_block_actual);
abs_lc->ipib_checksum = csum;
put_abs_lowcore(abs_lc);
dump_run(trigger);
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 9fd19530c9a5..68adf1de8888 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -95,7 +95,7 @@ out:
static void cpu_thread_map(cpumask_t *dst, unsigned int cpu)
{
static cpumask_t mask;
- int i;
+ unsigned int max_cpu;
cpumask_clear(&mask);
if (!cpumask_test_cpu(cpu, &cpu_setup_mask))
@@ -104,9 +104,10 @@ static void cpu_thread_map(cpumask_t *dst, unsigned int cpu)
if (topology_mode != TOPOLOGY_MODE_HW)
goto out;
cpu -= cpu % (smp_cpu_mtid + 1);
- for (i = 0; i <= smp_cpu_mtid; i++) {
- if (cpumask_test_cpu(cpu + i, &cpu_setup_mask))
- cpumask_set_cpu(cpu + i, &mask);
+ max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1);
+ for (; cpu <= max_cpu; cpu++) {
+ if (cpumask_test_cpu(cpu, &cpu_setup_mask))
+ cpumask_set_cpu(cpu, &mask);
}
out:
cpumask_copy(dst, &mask);
@@ -123,25 +124,26 @@ static void add_cpus_to_mask(struct topology_core *tl_core,
unsigned int core;
for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) {
- unsigned int rcore;
- int lcpu, i;
+ unsigned int max_cpu, rcore;
+ int cpu;
rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin;
- lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift);
- if (lcpu < 0)
+ cpu = smp_find_processor_id(rcore << smp_cpu_mt_shift);
+ if (cpu < 0)
continue;
- for (i = 0; i <= smp_cpu_mtid; i++) {
- topo = &cpu_topology[lcpu + i];
+ max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1);
+ for (; cpu <= max_cpu; cpu++) {
+ topo = &cpu_topology[cpu];
topo->drawer_id = drawer->id;
topo->book_id = book->id;
topo->socket_id = socket->id;
topo->core_id = rcore;
- topo->thread_id = lcpu + i;
+ topo->thread_id = cpu;
topo->dedicated = tl_core->d;
- cpumask_set_cpu(lcpu + i, &drawer->mask);
- cpumask_set_cpu(lcpu + i, &book->mask);
- cpumask_set_cpu(lcpu + i, &socket->mask);
- smp_cpu_set_polarization(lcpu + i, tl_core->pp);
+ cpumask_set_cpu(cpu, &drawer->mask);
+ cpumask_set_cpu(cpu, &book->mask);
+ cpumask_set_cpu(cpu, &socket->mask);
+ smp_cpu_set_polarization(cpu, tl_core->pp);
}
}
}
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 5ba3bd8a7b12..ca5a418c58a8 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -4,6 +4,7 @@
* Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
*/
#include <linux/hugetlb.h>
+#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <asm/cacheflush.h>
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 5b22c6e24528..b9dcb4ae6c59 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -667,7 +667,15 @@ static void __init memblock_region_swap(void *a, void *b, int size)
#ifdef CONFIG_KASAN
#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x))
+
+static inline int set_memory_kasan(unsigned long start, unsigned long end)
+{
+ start = PAGE_ALIGN_DOWN(__sha(start));
+ end = PAGE_ALIGN(__sha(end));
+ return set_memory_rwnx(start, (end - start) >> PAGE_SHIFT);
+}
#endif
+
/*
* map whole physical memory to virtual memory (identity mapping)
* we reserve enough space in the vmalloc area for vmemmap to hotplug
@@ -737,10 +745,8 @@ void __init vmem_map_init(void)
}
#ifdef CONFIG_KASAN
- for_each_mem_range(i, &base, &end) {
- set_memory_rwnx(__sha(base),
- (__sha(end) - __sha(base)) >> PAGE_SHIFT);
- }
+ for_each_mem_range(i, &base, &end)
+ set_memory_kasan(base, end);
#endif
set_memory_rox((unsigned long)_stext,
(unsigned long)(_etext - _stext) >> PAGE_SHIFT);
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index 32573b4f9bd2..cc8cf5abea15 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -26,6 +26,7 @@ KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare
KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding
KBUILD_CFLAGS += -Os -m64 -msoft-float -fno-common
KBUILD_CFLAGS += -fno-stack-protector
+KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
KBUILD_CFLAGS += $(CLANG_FLAGS)
KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS))
diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
index dee6f66353b3..a461a950f051 100644
--- a/arch/um/drivers/Makefile
+++ b/arch/um/drivers/Makefile
@@ -16,7 +16,8 @@ mconsole-objs := mconsole_kern.o mconsole_user.o
hostaudio-objs := hostaudio_kern.o
ubd-objs := ubd_kern.o ubd_user.o
port-objs := port_kern.o port_user.o
-harddog-objs := harddog_kern.o harddog_user.o
+harddog-objs := harddog_kern.o
+harddog-builtin-$(CONFIG_UML_WATCHDOG) := harddog_user.o harddog_user_exp.o
rtc-objs := rtc_kern.o rtc_user.o
LDFLAGS_pcap.o = $(shell $(CC) $(KBUILD_CFLAGS) -print-file-name=libpcap.a)
@@ -60,6 +61,7 @@ obj-$(CONFIG_PTY_CHAN) += pty.o
obj-$(CONFIG_TTY_CHAN) += tty.o
obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o
obj-$(CONFIG_UML_WATCHDOG) += harddog.o
+obj-y += $(harddog-builtin-y) $(harddog-builtin-m)
obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
obj-$(CONFIG_UML_RANDOM) += random.o
obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o
diff --git a/arch/um/drivers/harddog.h b/arch/um/drivers/harddog.h
new file mode 100644
index 000000000000..6d9ea60e7133
--- /dev/null
+++ b/arch/um/drivers/harddog.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef UM_WATCHDOG_H
+#define UM_WATCHDOG_H
+
+int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock);
+void stop_watchdog(int in_fd, int out_fd);
+int ping_watchdog(int fd);
+
+#endif /* UM_WATCHDOG_H */
diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c
index e6d4f43deba8..60d1c6cab8a9 100644
--- a/arch/um/drivers/harddog_kern.c
+++ b/arch/um/drivers/harddog_kern.c
@@ -47,6 +47,7 @@
#include <linux/spinlock.h>
#include <linux/uaccess.h>
#include "mconsole.h"
+#include "harddog.h"
MODULE_LICENSE("GPL");
@@ -60,8 +61,6 @@ static int harddog_out_fd = -1;
* Allow only one person to hold it open
*/
-extern int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock);
-
static int harddog_open(struct inode *inode, struct file *file)
{
int err = -EBUSY;
@@ -92,8 +91,6 @@ err:
return err;
}
-extern void stop_watchdog(int in_fd, int out_fd);
-
static int harddog_release(struct inode *inode, struct file *file)
{
/*
@@ -112,8 +109,6 @@ static int harddog_release(struct inode *inode, struct file *file)
return 0;
}
-extern int ping_watchdog(int fd);
-
static ssize_t harddog_write(struct file *file, const char __user *data, size_t len,
loff_t *ppos)
{
diff --git a/arch/um/drivers/harddog_user.c b/arch/um/drivers/harddog_user.c
index 070468d22e39..9ed89304975e 100644
--- a/arch/um/drivers/harddog_user.c
+++ b/arch/um/drivers/harddog_user.c
@@ -7,6 +7,7 @@
#include <unistd.h>
#include <errno.h>
#include <os.h>
+#include "harddog.h"
struct dog_data {
int stdin_fd;
diff --git a/arch/um/drivers/harddog_user_exp.c b/arch/um/drivers/harddog_user_exp.c
new file mode 100644
index 000000000000..c74d4b815d14
--- /dev/null
+++ b/arch/um/drivers/harddog_user_exp.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/export.h>
+#include "harddog.h"
+
+#if IS_MODULE(CONFIG_UML_WATCHDOG)
+EXPORT_SYMBOL(start_watchdog);
+EXPORT_SYMBOL(stop_watchdog);
+EXPORT_SYMBOL(ping_watchdog);
+#endif
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index b39975977c03..fdc2e3abd615 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -305,6 +305,18 @@ ifeq ($(RETPOLINE_CFLAGS),)
endif
endif
+ifdef CONFIG_UNWINDER_ORC
+orc_hash_h := arch/$(SRCARCH)/include/generated/asm/orc_hash.h
+orc_hash_sh := $(srctree)/scripts/orc_hash.sh
+targets += $(orc_hash_h)
+quiet_cmd_orc_hash = GEN $@
+ cmd_orc_hash = mkdir -p $(dir $@); \
+ $(CONFIG_SHELL) $(orc_hash_sh) < $< > $@
+$(orc_hash_h): $(srctree)/arch/x86/include/asm/orc_types.h $(orc_hash_sh) FORCE
+ $(call if_changed,orc_hash)
+archprepare: $(orc_hash_h)
+endif
+
archclean:
$(Q)rm -rf $(objtree)/arch/i386
$(Q)rm -rf $(objtree)/arch/x86_64
diff --git a/arch/x86/crypto/aria-aesni-avx-asm_64.S b/arch/x86/crypto/aria-aesni-avx-asm_64.S
index 7c1abc513f34..9556dacd9841 100644
--- a/arch/x86/crypto/aria-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/aria-aesni-avx-asm_64.S
@@ -773,8 +773,6 @@
.octa 0x3F893781E95FE1576CDA64D2BA0CB204
#ifdef CONFIG_AS_GFNI
-.section .rodata.cst8, "aM", @progbits, 8
-.align 8
/* AES affine: */
#define tf_aff_const BV8(1, 1, 0, 0, 0, 1, 1, 0)
.Ltf_aff_bitmatrix:
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 070cc4ef2672..27f3a7b34bd5 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -349,6 +349,16 @@ static struct event_constraint intel_spr_event_constraints[] = {
EVENT_CONSTRAINT_END
};
+static struct extra_reg intel_gnr_extra_regs[] __read_mostly = {
+ INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+ INTEL_UEVENT_EXTRA_REG(0x02c6, MSR_PEBS_FRONTEND, 0x9, FE),
+ INTEL_UEVENT_EXTRA_REG(0x03c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE),
+ INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE),
+ INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE),
+ EVENT_EXTRA_END
+};
EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
@@ -4074,7 +4084,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
if (x86_pmu.intel_cap.pebs_baseline) {
arr[(*nr)++] = (struct perf_guest_switch_msr){
.msr = MSR_PEBS_DATA_CFG,
- .host = cpuc->pebs_data_cfg,
+ .host = cpuc->active_pebs_data_cfg,
.guest = kvm_pmu->pebs_data_cfg,
};
}
@@ -6496,6 +6506,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_SAPPHIRERAPIDS_X:
case INTEL_FAM6_EMERALDRAPIDS_X:
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
+ x86_pmu.extra_regs = intel_spr_extra_regs;
fallthrough;
case INTEL_FAM6_GRANITERAPIDS_X:
case INTEL_FAM6_GRANITERAPIDS_D:
@@ -6506,7 +6517,8 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_spr_event_constraints;
x86_pmu.pebs_constraints = intel_spr_pebs_event_constraints;
- x86_pmu.extra_regs = intel_spr_extra_regs;
+ if (!x86_pmu.extra_regs)
+ x86_pmu.extra_regs = intel_gnr_extra_regs;
x86_pmu.limit_period = spr_limit_period;
x86_pmu.pebs_ept = 1;
x86_pmu.pebs_aliases = NULL;
@@ -6650,6 +6662,7 @@ __init int intel_pmu_init(void)
pmu->pebs_constraints = intel_grt_pebs_event_constraints;
pmu->extra_regs = intel_grt_extra_regs;
if (is_mtl(boot_cpu_data.x86_model)) {
+ x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].extra_regs = intel_gnr_extra_regs;
x86_pmu.pebs_latency_data = mtl_latency_data_small;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index fa9b209a11fa..d49e90dc04a4 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -6150,6 +6150,7 @@ static struct intel_uncore_type spr_uncore_mdf = {
};
#define UNCORE_SPR_NUM_UNCORE_TYPES 12
+#define UNCORE_SPR_CHA 0
#define UNCORE_SPR_IIO 1
#define UNCORE_SPR_IMC 6
#define UNCORE_SPR_UPI 8
@@ -6460,12 +6461,22 @@ static int uncore_type_max_boxes(struct intel_uncore_type **types,
return max + 1;
}
+#define SPR_MSR_UNC_CBO_CONFIG 0x2FFE
+
void spr_uncore_cpu_init(void)
{
+ struct intel_uncore_type *type;
+ u64 num_cbo;
+
uncore_msr_uncores = uncore_get_uncores(UNCORE_ACCESS_MSR,
UNCORE_SPR_MSR_EXTRA_UNCORES,
spr_msr_uncores);
+ type = uncore_find_type_by_id(uncore_msr_uncores, UNCORE_SPR_CHA);
+ if (type) {
+ rdmsrl(SPR_MSR_UNC_CBO_CONFIG, num_cbo);
+ type->num_boxes = num_cbo;
+ }
spr_uncore_iio_free_running.num_boxes = uncore_type_max_boxes(uncore_msr_uncores, UNCORE_SPR_IIO);
}
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index a5f9474f08e1..6c04b52f139b 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -416,7 +416,7 @@ void __init hyperv_init(void)
goto free_vp_assist_page;
}
- cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online",
+ cpuhp = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "x86/hyperv_init:online",
hv_cpu_init, hv_cpu_die);
if (cpuhp < 0)
goto free_ghcb_page;
diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c
index 1ba5d3b99b16..85d38b9f3586 100644
--- a/arch/x86/hyperv/hv_vtl.c
+++ b/arch/x86/hyperv/hv_vtl.c
@@ -20,6 +20,8 @@ void __init hv_vtl_init_platform(void)
{
pr_info("Linux runs in Hyper-V Virtual Trust Level\n");
+ x86_platform.realmode_reserve = x86_init_noop;
+ x86_platform.realmode_init = x86_init_noop;
x86_init.irqs.pre_vector_init = x86_init_noop;
x86_init.timers.timer_init = x86_init_noop;
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 1e51650b79d7..4f1ce5fc4e19 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
+generated-y += orc_hash.h
generated-y += syscalls_32.h
generated-y += syscalls_64.h
generated-y += syscalls_x32.h
diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h
index c2d6cd78ed0c..78fcde7b1f07 100644
--- a/arch/x86/include/asm/fpu/sched.h
+++ b/arch/x86/include/asm/fpu/sched.h
@@ -39,7 +39,7 @@ extern void fpu_flush_thread(void);
static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
{
if (cpu_feature_enabled(X86_FEATURE_FPU) &&
- !(current->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ !(current->flags & (PF_KTHREAD | PF_USER_WORKER))) {
save_fpregs_to_fpstate(old_fpu);
/*
* The save operation preserved register state, so the
diff --git a/arch/x86/include/asm/orc_header.h b/arch/x86/include/asm/orc_header.h
new file mode 100644
index 000000000000..07bacf3e160e
--- /dev/null
+++ b/arch/x86/include/asm/orc_header.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+
+#ifndef _ORC_HEADER_H
+#define _ORC_HEADER_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <asm/orc_hash.h>
+
+/*
+ * The header is currently a 20-byte hash of the ORC entry definition; see
+ * scripts/orc_hash.sh.
+ */
+#define ORC_HEADER \
+ __used __section(".orc_header") __aligned(4) \
+ static const u8 orc_header[] = { ORC_HASH }
+
+#endif /* _ORC_HEADER_H */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 15ae4d6ba476..5700bb337987 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -27,6 +27,7 @@
extern pgd_t early_top_pgt[PTRS_PER_PGD];
bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
+struct seq_file;
void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm);
void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
bool user);
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 447d4bee25c4..ba3e2554799a 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -513,9 +513,6 @@ extern void native_pagetable_init(void);
#define native_pagetable_init paging_init
#endif
-struct seq_file;
-extern void arch_report_meminfo(struct seq_file *m);
-
enum pg_level {
PG_LEVEL_NONE,
PG_LEVEL_4K,
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 498dc600bd5c..0d02c4aafa6f 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -13,7 +13,9 @@
#include <linux/bitops.h>
+#include <linux/bug.h>
#include <linux/types.h>
+
#include <uapi/asm/vmx.h>
#include <asm/vmxfeatures.h>
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index dd61752f4c96..4070a01c11b7 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -17,6 +17,7 @@ CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_early_printk.o = -pg
CFLAGS_REMOVE_head64.o = -pg
CFLAGS_REMOVE_sev.o = -pg
+CFLAGS_REMOVE_rethook.o = -pg
endif
KASAN_SANITIZE_head$(BITS).o := n
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 6bde05a86b4e..896bc41cb2ba 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -97,7 +97,10 @@ static void init_x2apic_ldr(void)
static int x2apic_phys_probe(void)
{
- if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys()))
+ if (!x2apic_mode)
+ return 0;
+
+ if (x2apic_phys || x2apic_fadt_phys())
return 1;
return apic == &apic_x2apic_phys;
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 5e868b62a7c4..0270925fe013 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -79,7 +79,7 @@ int detect_extended_topology_early(struct cpuinfo_x86 *c)
* initial apic id, which also represents 32-bit extended x2apic id.
*/
c->initial_apicid = edx;
- smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
+ smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx));
#endif
return 0;
}
@@ -109,7 +109,8 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
*/
cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
c->initial_apicid = edx;
- core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
+ core_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
+ smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx));
core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
die_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
pkg_mask_width = die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 0bf6779187dd..f18ca44c904b 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -195,7 +195,6 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
printk("%sCall Trace:\n", log_lvl);
unwind_start(&state, task, regs, stack);
- stack = stack ? : get_stack_pointer(task, regs);
regs = unwind_get_entry_regs(&state, &partial);
/*
@@ -214,9 +213,13 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
* - hardirq stack
* - entry stack
*/
- for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
+ for (stack = stack ?: get_stack_pointer(task, regs);
+ stack;
+ stack = stack_info.next_sp) {
const char *stack_name;
+ stack = PTR_ALIGN(stack, sizeof(long));
+
if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
/*
* We weren't on a valid stack. It's possible that
diff --git a/arch/x86/kernel/fpu/context.h b/arch/x86/kernel/fpu/context.h
index 9fcfa5c4dad7..af5cbdd9bd29 100644
--- a/arch/x86/kernel/fpu/context.h
+++ b/arch/x86/kernel/fpu/context.h
@@ -57,7 +57,7 @@ static inline void fpregs_restore_userregs(void)
struct fpu *fpu = &current->thread.fpu;
int cpu = smp_processor_id();
- if (WARN_ON_ONCE(current->flags & (PF_KTHREAD | PF_IO_WORKER)))
+ if (WARN_ON_ONCE(current->flags & (PF_KTHREAD | PF_USER_WORKER)))
return;
if (!fpregs_state_valid(fpu, cpu)) {
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index caf33486dc5e..1015af1ae562 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -426,7 +426,7 @@ void kernel_fpu_begin_mask(unsigned int kfpu_mask)
this_cpu_write(in_kernel_fpu, true);
- if (!(current->flags & (PF_KTHREAD | PF_IO_WORKER)) &&
+ if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER)) &&
!test_thread_flag(TIF_NEED_FPU_LOAD)) {
set_thread_flag(TIF_NEED_FPU_LOAD);
save_fpregs_to_fpstate(&current->thread.fpu);
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index a5df3e994f04..113c13376e51 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -77,6 +77,15 @@ SYM_CODE_START_NOALIGN(startup_64)
call startup_64_setup_env
popq %rsi
+ /* Now switch to __KERNEL_CS so IRET works reliably */
+ pushq $__KERNEL_CS
+ leaq .Lon_kernel_cs(%rip), %rax
+ pushq %rax
+ lretq
+
+.Lon_kernel_cs:
+ UNWIND_HINT_END_OF_STACK
+
#ifdef CONFIG_AMD_MEM_ENCRYPT
/*
* Activate SEV/SME memory encryption if supported/enabled. This needs to
@@ -90,15 +99,6 @@ SYM_CODE_START_NOALIGN(startup_64)
popq %rsi
#endif
- /* Now switch to __KERNEL_CS so IRET works reliably */
- pushq $__KERNEL_CS
- leaq .Lon_kernel_cs(%rip), %rax
- pushq %rax
- lretq
-
-.Lon_kernel_cs:
- UNWIND_HINT_END_OF_STACK
-
/* Sanitize CPU configuration */
call verify_cpu
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 3ac50b7298d1..4d8e518365f4 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -7,6 +7,9 @@
#include <asm/unwind.h>
#include <asm/orc_types.h>
#include <asm/orc_lookup.h>
+#include <asm/orc_header.h>
+
+ORC_HEADER;
#define orc_warn(fmt, ...) \
printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 123bf8b97a4b..0c9660a07b23 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -253,7 +253,6 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
int nent)
{
struct kvm_cpuid_entry2 *best;
- u64 guest_supported_xcr0 = cpuid_get_supported_xcr0(entries, nent);
best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
if (best) {
@@ -292,21 +291,6 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
vcpu->arch.ia32_misc_enable_msr &
MSR_IA32_MISC_ENABLE_MWAIT);
}
-
- /*
- * Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate
- * the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's
- * requested XCR0 value. The enclave's XFRM must be a subset of XCRO
- * at the time of EENTER, thus adjust the allowed XFRM by the guest's
- * supported XCR0. Similar to XCR0 handling, FP and SSE are forced to
- * '1' even on CPUs that don't support XSAVE.
- */
- best = cpuid_entry2_find(entries, nent, 0x12, 0x1);
- if (best) {
- best->ecx &= guest_supported_xcr0 & 0xffffffff;
- best->edx &= guest_supported_xcr0 >> 32;
- best->ecx |= XFEATURE_MASK_FPSSE;
- }
}
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e542cf285b51..3c300a196bdf 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -229,6 +229,23 @@ static int kvm_recalculate_phys_map(struct kvm_apic_map *new,
u32 physical_id;
/*
+ * For simplicity, KVM always allocates enough space for all possible
+ * xAPIC IDs. Yell, but don't kill the VM, as KVM can continue on
+ * without the optimized map.
+ */
+ if (WARN_ON_ONCE(xapic_id > new->max_apic_id))
+ return -EINVAL;
+
+ /*
+ * Bail if a vCPU was added and/or enabled its APIC between allocating
+ * the map and doing the actual calculations for the map. Note, KVM
+ * hardcodes the x2APIC ID to vcpu_id, i.e. there's no TOCTOU bug if
+ * the compiler decides to reload x2apic_id after this check.
+ */
+ if (x2apic_id > new->max_apic_id)
+ return -E2BIG;
+
+ /*
* Deliberately truncate the vCPU ID when detecting a mismatched APIC
* ID to avoid false positives if the vCPU ID, i.e. x2APIC ID, is a
* 32-bit value. Any unwanted aliasing due to truncation results will
@@ -253,8 +270,7 @@ static int kvm_recalculate_phys_map(struct kvm_apic_map *new,
*/
if (vcpu->kvm->arch.x2apic_format) {
/* See also kvm_apic_match_physical_addr(). */
- if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
- x2apic_id <= new->max_apic_id)
+ if (apic_x2apic_mode(apic) || x2apic_id > 0xff)
new->phys_map[x2apic_id] = apic;
if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index c8961f45e3b1..6eaa3d6994ae 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -7091,7 +7091,10 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm)
*/
slot = NULL;
if (atomic_read(&kvm->nr_memslots_dirty_logging)) {
- slot = gfn_to_memslot(kvm, sp->gfn);
+ struct kvm_memslots *slots;
+
+ slots = kvm_memslots_for_spte_role(kvm, sp->role);
+ slot = __gfn_to_memslot(slots, sp->gfn);
WARN_ON_ONCE(!slot);
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index ca32389f3c36..54089f990c8f 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3510,7 +3510,7 @@ static bool svm_is_vnmi_pending(struct kvm_vcpu *vcpu)
if (!is_vnmi_enabled(svm))
return false;
- return !!(svm->vmcb->control.int_ctl & V_NMI_BLOCKING_MASK);
+ return !!(svm->vmcb->control.int_ctl & V_NMI_PENDING_MASK);
}
static bool svm_set_vnmi_pending(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c
index 0574030b071f..2261b684a7d4 100644
--- a/arch/x86/kvm/vmx/sgx.c
+++ b/arch/x86/kvm/vmx/sgx.c
@@ -170,12 +170,19 @@ static int __handle_encls_ecreate(struct kvm_vcpu *vcpu,
return 1;
}
- /* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. */
+ /*
+ * Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. Note
+ * that the allowed XFRM (XFeature Request Mask) isn't strictly bound
+ * by the supported XCR0. FP+SSE *must* be set in XFRM, even if XSAVE
+ * is unsupported, i.e. even if XCR0 itself is completely unsupported.
+ */
if ((u32)miscselect & ~sgx_12_0->ebx ||
(u32)attributes & ~sgx_12_1->eax ||
(u32)(attributes >> 32) & ~sgx_12_1->ebx ||
(u32)xfrm & ~sgx_12_1->ecx ||
- (u32)(xfrm >> 32) & ~sgx_12_1->edx) {
+ (u32)(xfrm >> 32) & ~sgx_12_1->edx ||
+ xfrm & ~(vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE) ||
+ (xfrm & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
kvm_inject_gp(vcpu, 0);
return 1;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ceb7c5e9cf9e..04b57a336b34 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1446,7 +1446,7 @@ static const u32 msrs_to_save_base[] = {
#endif
MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
- MSR_IA32_SPEC_CTRL,
+ MSR_IA32_SPEC_CTRL, MSR_IA32_TSX_CTRL,
MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH,
MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK,
MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B,
@@ -7155,6 +7155,10 @@ static void kvm_probe_msr_to_save(u32 msr_index)
if (!kvm_cpu_cap_has(X86_FEATURE_XFD))
return;
break;
+ case MSR_IA32_TSX_CTRL:
+ if (!(kvm_get_arch_capabilities() & ARCH_CAP_TSX_CTRL_MSR))
+ return;
+ break;
default:
break;
}
@@ -10754,6 +10758,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;
break;
}
+
+ /* Note, VM-Exits that go down the "slow" path are accounted below. */
+ ++vcpu->stat.exits;
}
/*
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 4fc5c2de2de4..01c5de4c279b 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -7,6 +7,8 @@
*/
#include <linux/linkage.h>
+#include <asm/cpufeatures.h>
+#include <asm/alternative.h>
#include <asm/asm.h>
#include <asm/export.h>
@@ -29,7 +31,7 @@
*/
SYM_FUNC_START(rep_movs_alternative)
cmpq $64,%rcx
- jae .Lunrolled
+ jae .Llarge
cmp $8,%ecx
jae .Lword
@@ -65,6 +67,12 @@ SYM_FUNC_START(rep_movs_alternative)
_ASM_EXTABLE_UA( 2b, .Lcopy_user_tail)
_ASM_EXTABLE_UA( 3b, .Lcopy_user_tail)
+.Llarge:
+0: ALTERNATIVE "jmp .Lunrolled", "rep movsb", X86_FEATURE_ERMS
+1: RET
+
+ _ASM_EXTABLE_UA( 0b, 1b)
+
.p2align 4
.Lunrolled:
10: movq (%rsi),%r8
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 3cdac0f0055d..8192452d1d2d 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -9,6 +9,7 @@
#include <linux/sched/task.h>
#include <asm/set_memory.h>
+#include <asm/cpu_device_id.h>
#include <asm/e820/api.h>
#include <asm/init.h>
#include <asm/page.h>
@@ -261,6 +262,24 @@ static void __init probe_page_size_mask(void)
}
}
+#define INTEL_MATCH(_model) { .vendor = X86_VENDOR_INTEL, \
+ .family = 6, \
+ .model = _model, \
+ }
+/*
+ * INVLPG may not properly flush Global entries
+ * on these CPUs when PCIDs are enabled.
+ */
+static const struct x86_cpu_id invlpg_miss_ids[] = {
+ INTEL_MATCH(INTEL_FAM6_ALDERLAKE ),
+ INTEL_MATCH(INTEL_FAM6_ALDERLAKE_L ),
+ INTEL_MATCH(INTEL_FAM6_ALDERLAKE_N ),
+ INTEL_MATCH(INTEL_FAM6_RAPTORLAKE ),
+ INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_P),
+ INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_S),
+ {}
+};
+
static void setup_pcid(void)
{
if (!IS_ENABLED(CONFIG_X86_64))
@@ -269,6 +288,12 @@ static void setup_pcid(void)
if (!boot_cpu_has(X86_FEATURE_PCID))
return;
+ if (x86_match_cpu(invlpg_miss_ids)) {
+ pr_info("Incomplete global flushes, disabling PCID");
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
+ return;
+ }
+
if (boot_cpu_has(X86_FEATURE_PGE)) {
/*
* This can't be cr4_set_bits_and_update_boot() -- the
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 557f0fe25dff..37db264866b6 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -172,10 +172,10 @@ void __meminit init_trampoline_kaslr(void)
set_p4d(p4d_tramp,
__p4d(_KERNPG_TABLE | __pa(pud_page_tramp)));
- set_pgd(&trampoline_pgd_entry,
- __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp)));
+ trampoline_pgd_entry =
+ __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp));
} else {
- set_pgd(&trampoline_pgd_entry,
- __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
+ trampoline_pgd_entry =
+ __pgd(_KERNPG_TABLE | __pa(pud_page_tramp));
}
}
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 7159cf787613..d1515756e369 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -9,6 +9,7 @@
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
#include <linux/debugfs.h>
#include <linux/pfn.h>
#include <linux/percpu.h>
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 1056bbf55b17..438adb695daa 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -2570,7 +2570,7 @@ out_image:
}
if (bpf_jit_enable > 1)
- bpf_jit_dump(prog->len, proglen, pass + 1, image);
+ bpf_jit_dump(prog->len, proglen, pass + 1, rw_image);
if (image) {
if (!prog->is_func || extra_pass) {
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 8babce71915f..014c508e914d 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -198,7 +198,7 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
i++;
}
kfree(v);
- return 0;
+ return msi_device_populate_sysfs(&dev->dev);
error:
if (ret == -ENOSYS)
@@ -254,7 +254,7 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
dev_dbg(&dev->dev,
"xen: msi --> pirq=%d --> irq=%d\n", pirq, irq);
}
- return 0;
+ return msi_device_populate_sysfs(&dev->dev);
error:
dev_err(&dev->dev, "Failed to create MSI%s! ret=%d!\n",
@@ -346,7 +346,7 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
if (ret < 0)
goto out;
}
- ret = 0;
+ ret = msi_device_populate_sysfs(&dev->dev);
out:
return ret;
}
@@ -394,6 +394,8 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev)
xen_destroy_irq(msidesc->irq + i);
msidesc->irq = 0;
}
+
+ msi_device_destroy_sysfs(&dev->dev);
}
static void xen_pv_teardown_msi_irqs(struct pci_dev *dev)
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 82fec66d46d2..42abd6af1198 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -14,6 +14,11 @@ $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
CFLAGS_sha256.o := -D__DISABLE_EXPORTS
+# When profile-guided optimization is enabled, llvm emits two different
+# overlapping text sections, which is not supported by kexec. Remove profile
+# optimization flags.
+KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS))
+
# When linking purgatory.ro with -r unresolved symbols are not checked,
# also link a purgatory.chk binary without -r to check for unresolved symbols.
PURGATORY_LDFLAGS := -e purgatory_start -z nodefaultlib
diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
index 876d5df157ed..5c01d7e70d90 100644
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@@ -343,7 +343,19 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
struct rt_sigframe *frame;
int err = 0, sig = ksig->sig;
unsigned long sp, ra, tp, ps;
+ unsigned long handler = (unsigned long)ksig->ka.sa.sa_handler;
+ unsigned long handler_fdpic_GOT = 0;
unsigned int base;
+ bool fdpic = IS_ENABLED(CONFIG_BINFMT_ELF_FDPIC) &&
+ (current->personality & FDPIC_FUNCPTRS);
+
+ if (fdpic) {
+ unsigned long __user *fdpic_func_desc =
+ (unsigned long __user *)handler;
+ if (__get_user(handler, &fdpic_func_desc[0]) ||
+ __get_user(handler_fdpic_GOT, &fdpic_func_desc[1]))
+ return -EFAULT;
+ }
sp = regs->areg[1];
@@ -373,20 +385,26 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (ksig->ka.sa.sa_flags & SA_RESTORER) {
- ra = (unsigned long)ksig->ka.sa.sa_restorer;
+ if (fdpic) {
+ unsigned long __user *fdpic_func_desc =
+ (unsigned long __user *)ksig->ka.sa.sa_restorer;
+
+ err |= __get_user(ra, fdpic_func_desc);
+ } else {
+ ra = (unsigned long)ksig->ka.sa.sa_restorer;
+ }
} else {
/* Create sys_rt_sigreturn syscall in stack frame */
err |= gen_return_code(frame->retcode);
-
- if (err) {
- return -EFAULT;
- }
ra = (unsigned long) frame->retcode;
}
- /*
+ if (err)
+ return -EFAULT;
+
+ /*
* Create signal handler execution context.
* Return context not modified until this point.
*/
@@ -394,8 +412,7 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
/* Set up registers for signal handler; preserve the threadptr */
tp = regs->threadptr;
ps = regs->ps;
- start_thread(regs, (unsigned long) ksig->ka.sa.sa_handler,
- (unsigned long) frame);
+ start_thread(regs, handler, (unsigned long)frame);
/* Set up a stack frame for a call4 if userspace uses windowed ABI */
if (ps & PS_WOE_MASK) {
@@ -413,6 +430,8 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
regs->areg[base + 4] = (unsigned long) &frame->uc;
regs->threadptr = tp;
regs->ps = ps;
+ if (fdpic)
+ regs->areg[base + 11] = handler_fdpic_GOT;
pr_debug("SIG rt deliver (%s:%d): signal=%d sp=%p pc=%08lx\n",
current->comm, current->pid, sig, frame, regs->pc);
diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c
index 2a31b1ab0c9f..17a7ef86fd0d 100644
--- a/arch/xtensa/kernel/xtensa_ksyms.c
+++ b/arch/xtensa/kernel/xtensa_ksyms.c
@@ -56,6 +56,8 @@ EXPORT_SYMBOL(empty_zero_page);
*/
extern long long __ashrdi3(long long, int);
extern long long __ashldi3(long long, int);
+extern long long __bswapdi2(long long);
+extern int __bswapsi2(int);
extern long long __lshrdi3(long long, int);
extern int __divsi3(int, int);
extern int __modsi3(int, int);
@@ -66,6 +68,8 @@ extern unsigned long long __umulsidi3(unsigned int, unsigned int);
EXPORT_SYMBOL(__ashldi3);
EXPORT_SYMBOL(__ashrdi3);
+EXPORT_SYMBOL(__bswapdi2);
+EXPORT_SYMBOL(__bswapsi2);
EXPORT_SYMBOL(__lshrdi3);
EXPORT_SYMBOL(__divsi3);
EXPORT_SYMBOL(__modsi3);
diff --git a/arch/xtensa/lib/Makefile b/arch/xtensa/lib/Makefile
index 7ecef0519a27..c9c2614188f7 100644
--- a/arch/xtensa/lib/Makefile
+++ b/arch/xtensa/lib/Makefile
@@ -4,7 +4,7 @@
#
lib-y += memcopy.o memset.o checksum.o \
- ashldi3.o ashrdi3.o lshrdi3.o \
+ ashldi3.o ashrdi3.o bswapdi2.o bswapsi2.o lshrdi3.o \
divsi3.o udivsi3.o modsi3.o umodsi3.o mulsi3.o umulsidi3.o \
usercopy.o strncpy_user.o strnlen_user.o
lib-$(CONFIG_PCI) += pci-auto.o
diff --git a/arch/xtensa/lib/bswapdi2.S b/arch/xtensa/lib/bswapdi2.S
new file mode 100644
index 000000000000..d8e52e05eba6
--- /dev/null
+++ b/arch/xtensa/lib/bswapdi2.S
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */
+#include <linux/linkage.h>
+#include <asm/asmmacro.h>
+#include <asm/core.h>
+
+ENTRY(__bswapdi2)
+
+ abi_entry_default
+ ssai 8
+ srli a4, a2, 16
+ src a4, a4, a2
+ src a4, a4, a4
+ src a4, a2, a4
+ srli a2, a3, 16
+ src a2, a2, a3
+ src a2, a2, a2
+ src a2, a3, a2
+ mov a3, a4
+ abi_ret_default
+
+ENDPROC(__bswapdi2)
diff --git a/arch/xtensa/lib/bswapsi2.S b/arch/xtensa/lib/bswapsi2.S
new file mode 100644
index 000000000000..9c1de1344f79
--- /dev/null
+++ b/arch/xtensa/lib/bswapsi2.S
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */
+#include <linux/linkage.h>
+#include <asm/asmmacro.h>
+#include <asm/core.h>
+
+ENTRY(__bswapsi2)
+
+ abi_entry_default
+ ssai 8
+ srli a3, a2, 16
+ src a3, a3, a2
+ src a3, a3, a3
+ src a2, a2, a3
+ abi_ret_default
+
+ENDPROC(__bswapsi2)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index cab33bd4f252..aaf9903ad7b2 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -34,6 +34,8 @@
#include "blk-ioprio.h"
#include "blk-throttle.h"
+static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu);
+
/*
* blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation.
* blkcg_pol_register_mutex nests outside of it and synchronizes entire
@@ -56,6 +58,8 @@ static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */
bool blkcg_debug_stats = false;
+static DEFINE_RAW_SPINLOCK(blkg_stat_lock);
+
#define BLKG_DESTROY_BATCH_SIZE 64
/*
@@ -163,10 +167,20 @@ static void blkg_free(struct blkcg_gq *blkg)
static void __blkg_release(struct rcu_head *rcu)
{
struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
+ struct blkcg *blkcg = blkg->blkcg;
+ int cpu;
#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
WARN_ON(!bio_list_empty(&blkg->async_bios));
#endif
+ /*
+ * Flush all the non-empty percpu lockless lists before releasing
+ * us, given these stat belongs to us.
+ *
+ * blkg_stat_lock is for serializing blkg stat update
+ */
+ for_each_possible_cpu(cpu)
+ __blkcg_rstat_flush(blkcg, cpu);
/* release the blkcg and parent blkg refs this blkg has been holding */
css_put(&blkg->blkcg->css);
@@ -965,16 +979,12 @@ static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur,
u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
}
-static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
+static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu)
{
- struct blkcg *blkcg = css_to_blkcg(css);
struct llist_head *lhead = per_cpu_ptr(blkcg->lhead, cpu);
struct llist_node *lnode;
struct blkg_iostat_set *bisc, *next_bisc;
-
- /* Root-level stats are sourced from system-wide IO stats */
- if (!cgroup_parent(css->cgroup))
- return;
+ unsigned long flags;
rcu_read_lock();
@@ -983,6 +993,14 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
goto out;
/*
+ * For covering concurrent parent blkg update from blkg_release().
+ *
+ * When flushing from cgroup, cgroup_rstat_lock is always held, so
+ * this lock won't cause contention most of time.
+ */
+ raw_spin_lock_irqsave(&blkg_stat_lock, flags);
+
+ /*
* Iterate only the iostat_cpu's queued in the lockless list.
*/
llist_for_each_entry_safe(bisc, next_bisc, lnode, lnode) {
@@ -1005,13 +1023,19 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
if (parent && parent->parent)
blkcg_iostat_update(parent, &blkg->iostat.cur,
&blkg->iostat.last);
- percpu_ref_put(&blkg->refcnt);
}
-
+ raw_spin_unlock_irqrestore(&blkg_stat_lock, flags);
out:
rcu_read_unlock();
}
+static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
+{
+ /* Root-level stats are sourced from system-wide IO stats */
+ if (cgroup_parent(css->cgroup))
+ __blkcg_rstat_flush(css_to_blkcg(css), cpu);
+}
+
/*
* We source root cgroup stats from the system-wide stats to avoid
* tracking the same information twice and incurring overhead when no
@@ -2089,7 +2113,6 @@ void blk_cgroup_bio_start(struct bio *bio)
llist_add(&bis->lnode, lhead);
WRITE_ONCE(bis->lqueued, true);
- percpu_ref_get(&bis->blkg->refcnt);
}
u64_stats_update_end_irqrestore(&bis->sync, flags);
diff --git a/block/blk-core.c b/block/blk-core.c
index 2ae22bebeb3e..3fc68b944479 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -521,7 +521,7 @@ static inline int bio_check_eod(struct bio *bio)
sector_t maxsector = bdev_nr_sectors(bio->bi_bdev);
unsigned int nr_sectors = bio_sectors(bio);
- if (nr_sectors && maxsector &&
+ if (nr_sectors &&
(nr_sectors > maxsector ||
bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
pr_info_ratelimited("%s: attempt to access beyond end of device\n"
diff --git a/block/blk-map.c b/block/blk-map.c
index 3551c3ff17cf..44d74a30ddac 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -248,7 +248,7 @@ static struct bio *blk_rq_map_bio_alloc(struct request *rq,
{
struct bio *bio;
- if (rq->cmd_flags & REQ_ALLOC_CACHE) {
+ if (rq->cmd_flags & REQ_ALLOC_CACHE && (nr_vecs <= BIO_INLINE_VECS)) {
bio = bio_alloc_bioset(NULL, nr_vecs, rq->cmd_flags, gfp_mask,
&fs_bio_set);
if (!bio)
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 426197312069..cc57e2dd9a0b 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -40,16 +40,20 @@ void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
unsigned int users;
struct blk_mq_tags *tags = hctx->tags;
+ /*
+ * calling test_bit() prior to test_and_set_bit() is intentional,
+ * it avoids dirtying the cacheline if the queue is already active.
+ */
if (blk_mq_is_shared_tags(hctx->flags)) {
struct request_queue *q = hctx->queue;
- if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
+ if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
+ test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
return;
- set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags);
} else {
- if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
+ if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
+ test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
return;
- set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state);
}
spin_lock_irq(&tags->lock);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 720b5061ffe8..decb6ab2d508 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -688,6 +688,10 @@ static void __blk_mq_free_request(struct request *rq)
blk_crypto_free_request(rq);
blk_pm_mark_last_busy(rq);
rq->mq_hctx = NULL;
+
+ if (rq->rq_flags & RQF_MQ_INFLIGHT)
+ __blk_mq_dec_active_requests(hctx);
+
if (rq->tag != BLK_MQ_NO_TAG)
blk_mq_put_tag(hctx->tags, ctx, rq->tag);
if (sched_tag != BLK_MQ_NO_TAG)
@@ -699,15 +703,11 @@ static void __blk_mq_free_request(struct request *rq)
void blk_mq_free_request(struct request *rq)
{
struct request_queue *q = rq->q;
- struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
if ((rq->rq_flags & RQF_USE_SCHED) &&
q->elevator->type->ops.finish_request)
q->elevator->type->ops.finish_request(rq);
- if (rq->rq_flags & RQF_MQ_INFLIGHT)
- __blk_mq_dec_active_requests(hctx);
-
if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
laptop_io_completion(q->disk->bdi);
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 896b4654ab00..4dd59059b788 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -915,6 +915,7 @@ static bool disk_has_partitions(struct gendisk *disk)
void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
{
struct request_queue *q = disk->queue;
+ unsigned int old_model = q->limits.zoned;
switch (model) {
case BLK_ZONED_HM:
@@ -952,7 +953,7 @@ void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
*/
blk_queue_zone_write_granularity(q,
queue_logical_block_size(q));
- } else {
+ } else if (old_model != BLK_ZONED_NONE) {
disk_clear_zone_settings(disk);
}
}
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 53bf5aa6f9ad..7a87506ff8e1 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -730,14 +730,16 @@ void wbt_enable_default(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
struct rq_qos *rqos;
- bool disable_flag = q->elevator &&
- test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags);
+ bool enable = IS_ENABLED(CONFIG_BLK_WBT_MQ);
+
+ if (q->elevator &&
+ test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags))
+ enable = false;
/* Throttling already enabled? */
rqos = wbt_rq_qos(q);
if (rqos) {
- if (!disable_flag &&
- RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
+ if (enable && RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT;
return;
}
@@ -746,7 +748,7 @@ void wbt_enable_default(struct gendisk *disk)
if (!blk_queue_registered(q))
return;
- if (queue_is_mq(q) && !disable_flag)
+ if (queue_is_mq(q) && enable)
wbt_init(disk);
}
EXPORT_SYMBOL_GPL(wbt_enable_default);
diff --git a/block/fops.c b/block/fops.c
index 9871bd6052b4..555b1b9ecd2c 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -505,7 +505,7 @@ static int blkdev_open(struct inode *inode, struct file *filp)
* during an unstable branch.
*/
filp->f_flags |= O_LARGEFILE;
- filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
+ filp->f_mode |= FMODE_BUF_RASYNC;
/*
* Use the file private data to store the holder for exclusive openes.
@@ -519,6 +519,9 @@ static int blkdev_open(struct inode *inode, struct file *filp)
if (IS_ERR(bdev))
return PTR_ERR(bdev);
+ if (bdev_nowait(bdev))
+ filp->f_mode |= FMODE_NOWAIT;
+
filp->f_mapping = bdev->bd_inode->i_mapping;
filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
return 0;
@@ -697,6 +700,16 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
return error;
}
+static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct inode *bd_inode = bdev_file_inode(file);
+
+ if (bdev_read_only(I_BDEV(bd_inode)))
+ return generic_file_readonly_mmap(file, vma);
+
+ return generic_file_mmap(file, vma);
+}
+
const struct file_operations def_blk_fops = {
.open = blkdev_open,
.release = blkdev_release,
@@ -704,7 +717,7 @@ const struct file_operations def_blk_fops = {
.read_iter = blkdev_read_iter,
.write_iter = blkdev_write_iter,
.iopoll = iocb_bio_iopoll,
- .mmap = generic_file_mmap,
+ .mmap = blkdev_mmap,
.fsync = blkdev_fsync,
.unlocked_ioctl = blkdev_ioctl,
#ifdef CONFIG_COMPAT
diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index eca5671ad3f2..50c933f86b21 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -380,9 +380,10 @@ int public_key_verify_signature(const struct public_key *pkey,
struct crypto_wait cwait;
struct crypto_akcipher *tfm;
struct akcipher_request *req;
- struct scatterlist src_sg[2];
+ struct scatterlist src_sg;
char alg_name[CRYPTO_MAX_ALG_NAME];
- char *key, *ptr;
+ char *buf, *ptr;
+ size_t buf_len;
int ret;
pr_devel("==>%s()\n", __func__);
@@ -420,34 +421,37 @@ int public_key_verify_signature(const struct public_key *pkey,
if (!req)
goto error_free_tfm;
- key = kmalloc(pkey->keylen + sizeof(u32) * 2 + pkey->paramlen,
- GFP_KERNEL);
- if (!key)
+ buf_len = max_t(size_t, pkey->keylen + sizeof(u32) * 2 + pkey->paramlen,
+ sig->s_size + sig->digest_size);
+
+ buf = kmalloc(buf_len, GFP_KERNEL);
+ if (!buf)
goto error_free_req;
- memcpy(key, pkey->key, pkey->keylen);
- ptr = key + pkey->keylen;
+ memcpy(buf, pkey->key, pkey->keylen);
+ ptr = buf + pkey->keylen;
ptr = pkey_pack_u32(ptr, pkey->algo);
ptr = pkey_pack_u32(ptr, pkey->paramlen);
memcpy(ptr, pkey->params, pkey->paramlen);
if (pkey->key_is_private)
- ret = crypto_akcipher_set_priv_key(tfm, key, pkey->keylen);
+ ret = crypto_akcipher_set_priv_key(tfm, buf, pkey->keylen);
else
- ret = crypto_akcipher_set_pub_key(tfm, key, pkey->keylen);
+ ret = crypto_akcipher_set_pub_key(tfm, buf, pkey->keylen);
if (ret)
- goto error_free_key;
+ goto error_free_buf;
if (strcmp(pkey->pkey_algo, "sm2") == 0 && sig->data_size) {
ret = cert_sig_digest_update(sig, tfm);
if (ret)
- goto error_free_key;
+ goto error_free_buf;
}
- sg_init_table(src_sg, 2);
- sg_set_buf(&src_sg[0], sig->s, sig->s_size);
- sg_set_buf(&src_sg[1], sig->digest, sig->digest_size);
- akcipher_request_set_crypt(req, src_sg, NULL, sig->s_size,
+ memcpy(buf, sig->s, sig->s_size);
+ memcpy(buf + sig->s_size, sig->digest, sig->digest_size);
+
+ sg_init_one(&src_sg, buf, sig->s_size + sig->digest_size);
+ akcipher_request_set_crypt(req, &src_sg, NULL, sig->s_size,
sig->digest_size);
crypto_init_wait(&cwait);
akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG |
@@ -455,8 +459,8 @@ int public_key_verify_signature(const struct public_key *pkey,
crypto_req_done, &cwait);
ret = crypto_wait_req(crypto_akcipher_verify(req), &cwait);
-error_free_key:
- kfree(key);
+error_free_buf:
+ kfree(buf);
error_free_req:
akcipher_request_free(req);
error_free_tfm:
diff --git a/drivers/accel/ivpu/Kconfig b/drivers/accel/ivpu/Kconfig
index 9bdf168bf1d0..1a4c4ed9d113 100644
--- a/drivers/accel/ivpu/Kconfig
+++ b/drivers/accel/ivpu/Kconfig
@@ -7,6 +7,7 @@ config DRM_ACCEL_IVPU
depends on PCI && PCI_MSI
select FW_LOADER
select SHMEM
+ select GENERIC_ALLOCATOR
help
Choose this option if you have a system that has an 14th generation Intel CPU
or newer. VPU stands for Versatile Processing Unit and it's a CPU-integrated
diff --git a/drivers/accel/ivpu/ivpu_hw_mtl.c b/drivers/accel/ivpu/ivpu_hw_mtl.c
index 382ec127be8e..fef35422c6f0 100644
--- a/drivers/accel/ivpu/ivpu_hw_mtl.c
+++ b/drivers/accel/ivpu/ivpu_hw_mtl.c
@@ -197,6 +197,11 @@ static void ivpu_pll_init_frequency_ratios(struct ivpu_device *vdev)
hw->pll.pn_ratio = clamp_t(u8, fuse_pn_ratio, hw->pll.min_ratio, hw->pll.max_ratio);
}
+static int ivpu_hw_mtl_wait_for_vpuip_bar(struct ivpu_device *vdev)
+{
+ return REGV_POLL_FLD(MTL_VPU_HOST_SS_CPR_RST_CLR, AON, 0, 100);
+}
+
static int ivpu_pll_drive(struct ivpu_device *vdev, bool enable)
{
struct ivpu_hw_info *hw = vdev->hw;
@@ -239,6 +244,12 @@ static int ivpu_pll_drive(struct ivpu_device *vdev, bool enable)
ivpu_err(vdev, "Timed out waiting for PLL ready status\n");
return ret;
}
+
+ ret = ivpu_hw_mtl_wait_for_vpuip_bar(vdev);
+ if (ret) {
+ ivpu_err(vdev, "Timed out waiting for VPUIP bar\n");
+ return ret;
+ }
}
return 0;
@@ -256,7 +267,7 @@ static int ivpu_pll_disable(struct ivpu_device *vdev)
static void ivpu_boot_host_ss_rst_clr_assert(struct ivpu_device *vdev)
{
- u32 val = REGV_RD32(MTL_VPU_HOST_SS_CPR_RST_CLR);
+ u32 val = 0;
val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_CLR, TOP_NOC, val);
val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_CLR, DSS_MAS, val);
@@ -754,9 +765,8 @@ static int ivpu_hw_mtl_power_down(struct ivpu_device *vdev)
{
int ret = 0;
- if (ivpu_hw_mtl_reset(vdev)) {
+ if (!ivpu_hw_mtl_is_idle(vdev) && ivpu_hw_mtl_reset(vdev)) {
ivpu_err(vdev, "Failed to reset the VPU\n");
- ret = -EIO;
}
if (ivpu_pll_disable(vdev)) {
@@ -764,8 +774,10 @@ static int ivpu_hw_mtl_power_down(struct ivpu_device *vdev)
ret = -EIO;
}
- if (ivpu_hw_mtl_d0i3_enable(vdev))
- ivpu_warn(vdev, "Failed to enable D0I3\n");
+ if (ivpu_hw_mtl_d0i3_enable(vdev)) {
+ ivpu_err(vdev, "Failed to enter D0I3\n");
+ ret = -EIO;
+ }
return ret;
}
diff --git a/drivers/accel/ivpu/ivpu_hw_mtl_reg.h b/drivers/accel/ivpu/ivpu_hw_mtl_reg.h
index d83ccfd9a871..593b8ff07417 100644
--- a/drivers/accel/ivpu/ivpu_hw_mtl_reg.h
+++ b/drivers/accel/ivpu/ivpu_hw_mtl_reg.h
@@ -91,6 +91,7 @@
#define MTL_VPU_HOST_SS_CPR_RST_SET_MSS_MAS_MASK BIT_MASK(11)
#define MTL_VPU_HOST_SS_CPR_RST_CLR 0x00000098u
+#define MTL_VPU_HOST_SS_CPR_RST_CLR_AON_MASK BIT_MASK(0)
#define MTL_VPU_HOST_SS_CPR_RST_CLR_TOP_NOC_MASK BIT_MASK(1)
#define MTL_VPU_HOST_SS_CPR_RST_CLR_DSS_MAS_MASK BIT_MASK(10)
#define MTL_VPU_HOST_SS_CPR_RST_CLR_MSS_MAS_MASK BIT_MASK(11)
diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index 3adcfa80fc0e..fa0af59e39ab 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -183,9 +183,7 @@ ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct v
struct ivpu_ipc_info *ipc = vdev->ipc;
int ret;
- ret = mutex_lock_interruptible(&ipc->lock);
- if (ret)
- return ret;
+ mutex_lock(&ipc->lock);
if (!ipc->on) {
ret = -EAGAIN;
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index 3c6f1e16cf2f..d45be0615b47 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -431,6 +431,7 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32
struct ivpu_file_priv *file_priv = file->driver_priv;
struct ivpu_device *vdev = file_priv->vdev;
struct ww_acquire_ctx acquire_ctx;
+ enum dma_resv_usage usage;
struct ivpu_bo *bo;
int ret;
u32 i;
@@ -461,22 +462,28 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32
job->cmd_buf_vpu_addr = bo->vpu_addr + commands_offset;
- ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, 1, &acquire_ctx);
+ ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, buf_count,
+ &acquire_ctx);
if (ret) {
ivpu_warn(vdev, "Failed to lock reservations: %d\n", ret);
return ret;
}
- ret = dma_resv_reserve_fences(bo->base.resv, 1);
- if (ret) {
- ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret);
- goto unlock_reservations;
+ for (i = 0; i < buf_count; i++) {
+ ret = dma_resv_reserve_fences(job->bos[i]->base.resv, 1);
+ if (ret) {
+ ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret);
+ goto unlock_reservations;
+ }
}
- dma_resv_add_fence(bo->base.resv, job->done_fence, DMA_RESV_USAGE_WRITE);
+ for (i = 0; i < buf_count; i++) {
+ usage = (i == CMD_BUF_IDX) ? DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_BOOKKEEP;
+ dma_resv_add_fence(job->bos[i]->base.resv, job->done_fence, usage);
+ }
unlock_reservations:
- drm_gem_unlock_reservations((struct drm_gem_object **)job->bos, 1, &acquire_ctx);
+ drm_gem_unlock_reservations((struct drm_gem_object **)job->bos, buf_count, &acquire_ctx);
wmb(); /* Flush write combining buffers */
diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c
index 694e978aba66..b8b259b3aa63 100644
--- a/drivers/accel/ivpu/ivpu_mmu.c
+++ b/drivers/accel/ivpu/ivpu_mmu.c
@@ -587,16 +587,11 @@ static int ivpu_mmu_strtab_init(struct ivpu_device *vdev)
int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid)
{
struct ivpu_mmu_info *mmu = vdev->mmu;
- int ret;
-
- ret = mutex_lock_interruptible(&mmu->lock);
- if (ret)
- return ret;
+ int ret = 0;
- if (!mmu->on) {
- ret = 0;
+ mutex_lock(&mmu->lock);
+ if (!mmu->on)
goto unlock;
- }
ret = ivpu_mmu_cmdq_write_tlbi_nh_asid(vdev, ssid);
if (ret)
@@ -614,7 +609,7 @@ static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma)
struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab;
u64 *entry;
u64 cd[4];
- int ret;
+ int ret = 0;
if (ssid > IVPU_MMU_CDTAB_ENT_COUNT)
return -EINVAL;
@@ -655,14 +650,9 @@ static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma)
ivpu_dbg(vdev, MMU, "CDTAB %s entry (SSID=%u, dma=%pad): 0x%llx, 0x%llx, 0x%llx, 0x%llx\n",
cd_dma ? "write" : "clear", ssid, &cd_dma, cd[0], cd[1], cd[2], cd[3]);
- ret = mutex_lock_interruptible(&mmu->lock);
- if (ret)
- return ret;
-
- if (!mmu->on) {
- ret = 0;
+ mutex_lock(&mmu->lock);
+ if (!mmu->on)
goto unlock;
- }
ret = ivpu_mmu_cmdq_write_cfgi_all(vdev);
if (ret)
diff --git a/drivers/accel/qaic/qaic_control.c b/drivers/accel/qaic/qaic_control.c
index 9f216eb6f76e..5c57f7b4494e 100644
--- a/drivers/accel/qaic/qaic_control.c
+++ b/drivers/accel/qaic/qaic_control.c
@@ -997,14 +997,34 @@ static void *msg_xfer(struct qaic_device *qdev, struct wrapper_list *wrappers, u
struct xfer_queue_elem elem;
struct wire_msg *out_buf;
struct wrapper_msg *w;
+ long ret = -EAGAIN;
+ int xfer_count = 0;
int retry_count;
- long ret;
if (qdev->in_reset) {
mutex_unlock(&qdev->cntl_mutex);
return ERR_PTR(-ENODEV);
}
+ /* Attempt to avoid a partial commit of a message */
+ list_for_each_entry(w, &wrappers->list, list)
+ xfer_count++;
+
+ for (retry_count = 0; retry_count < QAIC_MHI_RETRY_MAX; retry_count++) {
+ if (xfer_count <= mhi_get_free_desc_count(qdev->cntl_ch, DMA_TO_DEVICE)) {
+ ret = 0;
+ break;
+ }
+ msleep_interruptible(QAIC_MHI_RETRY_WAIT_MS);
+ if (signal_pending(current))
+ break;
+ }
+
+ if (ret) {
+ mutex_unlock(&qdev->cntl_mutex);
+ return ERR_PTR(ret);
+ }
+
elem.seq_num = seq_num;
elem.buf = NULL;
init_completion(&elem.xfer_done);
@@ -1038,16 +1058,9 @@ static void *msg_xfer(struct qaic_device *qdev, struct wrapper_list *wrappers, u
list_for_each_entry(w, &wrappers->list, list) {
kref_get(&w->ref_count);
retry_count = 0;
-retry:
ret = mhi_queue_buf(qdev->cntl_ch, DMA_TO_DEVICE, &w->msg, w->len,
list_is_last(&w->list, &wrappers->list) ? MHI_EOT : MHI_CHAIN);
if (ret) {
- if (ret == -EAGAIN && retry_count++ < QAIC_MHI_RETRY_MAX) {
- msleep_interruptible(QAIC_MHI_RETRY_WAIT_MS);
- if (!signal_pending(current))
- goto retry;
- }
-
qdev->cntl_lost_buf = true;
kref_put(&w->ref_count, free_wrapper);
mutex_unlock(&qdev->cntl_mutex);
@@ -1249,7 +1262,7 @@ dma_cont_failed:
int qaic_manage_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
{
- struct qaic_manage_msg *user_msg;
+ struct qaic_manage_msg *user_msg = data;
struct qaic_device *qdev;
struct manage_msg *msg;
struct qaic_user *usr;
@@ -1258,6 +1271,9 @@ int qaic_manage_ioctl(struct drm_device *dev, void *data, struct drm_file *file_
int usr_rcu_id;
int ret;
+ if (user_msg->len > QAIC_MANAGE_MAX_MSG_LENGTH)
+ return -EINVAL;
+
usr = file_priv->driver_priv;
usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
@@ -1275,13 +1291,6 @@ int qaic_manage_ioctl(struct drm_device *dev, void *data, struct drm_file *file_
return -ENODEV;
}
- user_msg = data;
-
- if (user_msg->len > QAIC_MANAGE_MAX_MSG_LENGTH) {
- ret = -EINVAL;
- goto out;
- }
-
msg = kzalloc(QAIC_MANAGE_MAX_MSG_LENGTH + sizeof(*msg), GFP_KERNEL);
if (!msg) {
ret = -ENOMEM;
diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c
index c0a574cd1b35..e9a1cb779b30 100644
--- a/drivers/accel/qaic/qaic_data.c
+++ b/drivers/accel/qaic/qaic_data.c
@@ -23,6 +23,7 @@
#include <linux/wait.h>
#include <drm/drm_file.h>
#include <drm/drm_gem.h>
+#include <drm/drm_prime.h>
#include <drm/drm_print.h>
#include <uapi/drm/qaic_accel.h>
@@ -591,7 +592,7 @@ static int qaic_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struc
struct qaic_bo *bo = to_qaic_bo(obj);
unsigned long offset = 0;
struct scatterlist *sg;
- int ret;
+ int ret = 0;
if (obj->import_attach)
return -EINVAL;
@@ -616,8 +617,7 @@ static void qaic_free_object(struct drm_gem_object *obj)
if (obj->import_attach) {
/* DMABUF/PRIME Path */
- dma_buf_detach(obj->import_attach->dmabuf, obj->import_attach);
- dma_buf_put(obj->import_attach->dmabuf);
+ drm_prime_gem_destroy(obj, NULL);
} else {
/* Private buffer allocation path */
qaic_free_sgt(bo->sgt);
@@ -663,6 +663,10 @@ int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
if (args->pad)
return -EINVAL;
+ size = PAGE_ALIGN(args->size);
+ if (size == 0)
+ return -EINVAL;
+
usr = file_priv->driver_priv;
usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
if (!usr->qddev) {
@@ -677,12 +681,6 @@ int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
goto unlock_dev_srcu;
}
- size = PAGE_ALIGN(args->size);
- if (size == 0) {
- ret = -EINVAL;
- goto unlock_dev_srcu;
- }
-
bo = qaic_alloc_init_bo();
if (IS_ERR(bo)) {
ret = PTR_ERR(bo);
@@ -926,8 +924,8 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
{
struct qaic_attach_slice_entry *slice_ent;
struct qaic_attach_slice *args = data;
+ int rcu_id, usr_rcu_id, qdev_rcu_id;
struct dma_bridge_chan *dbc;
- int usr_rcu_id, qdev_rcu_id;
struct drm_gem_object *obj;
struct qaic_device *qdev;
unsigned long arg_size;
@@ -936,6 +934,22 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
struct qaic_bo *bo;
int ret;
+ if (args->hdr.count == 0)
+ return -EINVAL;
+
+ arg_size = args->hdr.count * sizeof(*slice_ent);
+ if (arg_size / args->hdr.count != sizeof(*slice_ent))
+ return -EINVAL;
+
+ if (args->hdr.size == 0)
+ return -EINVAL;
+
+ if (!(args->hdr.dir == DMA_TO_DEVICE || args->hdr.dir == DMA_FROM_DEVICE))
+ return -EINVAL;
+
+ if (args->data == 0)
+ return -EINVAL;
+
usr = file_priv->driver_priv;
usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
if (!usr->qddev) {
@@ -950,43 +964,11 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
goto unlock_dev_srcu;
}
- if (args->hdr.count == 0) {
- ret = -EINVAL;
- goto unlock_dev_srcu;
- }
-
- arg_size = args->hdr.count * sizeof(*slice_ent);
- if (arg_size / args->hdr.count != sizeof(*slice_ent)) {
- ret = -EINVAL;
- goto unlock_dev_srcu;
- }
-
if (args->hdr.dbc_id >= qdev->num_dbc) {
ret = -EINVAL;
goto unlock_dev_srcu;
}
- if (args->hdr.size == 0) {
- ret = -EINVAL;
- goto unlock_dev_srcu;
- }
-
- if (!(args->hdr.dir == DMA_TO_DEVICE || args->hdr.dir == DMA_FROM_DEVICE)) {
- ret = -EINVAL;
- goto unlock_dev_srcu;
- }
-
- dbc = &qdev->dbc[args->hdr.dbc_id];
- if (dbc->usr != usr) {
- ret = -EINVAL;
- goto unlock_dev_srcu;
- }
-
- if (args->data == 0) {
- ret = -EINVAL;
- goto unlock_dev_srcu;
- }
-
user_data = u64_to_user_ptr(args->data);
slice_ent = kzalloc(arg_size, GFP_KERNEL);
@@ -1013,9 +995,21 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
bo = to_qaic_bo(obj);
+ if (bo->sliced) {
+ ret = -EINVAL;
+ goto put_bo;
+ }
+
+ dbc = &qdev->dbc[args->hdr.dbc_id];
+ rcu_id = srcu_read_lock(&dbc->ch_lock);
+ if (dbc->usr != usr) {
+ ret = -EINVAL;
+ goto unlock_ch_srcu;
+ }
+
ret = qaic_prepare_bo(qdev, bo, &args->hdr);
if (ret)
- goto put_bo;
+ goto unlock_ch_srcu;
ret = qaic_attach_slicing_bo(qdev, bo, &args->hdr, slice_ent);
if (ret)
@@ -1025,6 +1019,7 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, args->hdr.dir);
bo->dbc = dbc;
+ srcu_read_unlock(&dbc->ch_lock, rcu_id);
drm_gem_object_put(obj);
srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
@@ -1033,6 +1028,8 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
unprepare_bo:
qaic_unprepare_bo(qdev, bo);
+unlock_ch_srcu:
+ srcu_read_unlock(&dbc->ch_lock, rcu_id);
put_bo:
drm_gem_object_put(obj);
free_slice_ent:
@@ -1316,7 +1313,6 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr
received_ts = ktime_get_ns();
size = is_partial ? sizeof(*pexec) : sizeof(*exec);
-
n = (unsigned long)size * args->hdr.count;
if (args->hdr.count == 0 || n / args->hdr.count != size)
return -EINVAL;
@@ -1665,6 +1661,9 @@ int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file
int rcu_id;
int ret;
+ if (args->pad != 0)
+ return -EINVAL;
+
usr = file_priv->driver_priv;
usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
if (!usr->qddev) {
@@ -1679,11 +1678,6 @@ int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file
goto unlock_dev_srcu;
}
- if (args->pad != 0) {
- ret = -EINVAL;
- goto unlock_dev_srcu;
- }
-
if (args->dbc_id >= qdev->num_dbc) {
ret = -EINVAL;
goto unlock_dev_srcu;
@@ -1855,6 +1849,11 @@ void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id)
dbc->usr = NULL;
empty_xfer_list(qdev, dbc);
synchronize_srcu(&dbc->ch_lock);
+ /*
+ * Threads holding channel lock, may add more elements in the xfer_list.
+ * Flush out these elements from xfer_list.
+ */
+ empty_xfer_list(qdev, dbc);
}
void release_dbc(struct qaic_device *qdev, u32 dbc_id)
diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
index ff80eb571729..b5ba550a0c04 100644
--- a/drivers/accel/qaic/qaic_drv.c
+++ b/drivers/accel/qaic/qaic_drv.c
@@ -97,6 +97,7 @@ static int qaic_open(struct drm_device *dev, struct drm_file *file)
cleanup_usr:
cleanup_srcu_struct(&usr->qddev_lock);
+ ida_free(&qaic_usrs, usr->handle);
free_usr:
kfree(usr);
dev_unlock:
@@ -224,6 +225,9 @@ static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id)
struct qaic_user *usr;
qddev = qdev->qddev;
+ qdev->qddev = NULL;
+ if (!qddev)
+ return;
/*
* Existing users get unresolvable errors till they close FDs.
@@ -262,8 +266,8 @@ static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id)
static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
{
+ u16 major = -1, minor = -1;
struct qaic_device *qdev;
- u16 major, minor;
int ret;
/*
diff --git a/drivers/acpi/acpica/achware.h b/drivers/acpi/acpica/achware.h
index ebf8fd373cf7..79bbfe00d241 100644
--- a/drivers/acpi/acpica/achware.h
+++ b/drivers/acpi/acpica/achware.h
@@ -101,8 +101,6 @@ acpi_status
acpi_hw_get_gpe_status(struct acpi_gpe_event_info *gpe_event_info,
acpi_event_status *event_status);
-acpi_status acpi_hw_disable_all_gpes(void);
-
acpi_status acpi_hw_enable_all_runtime_gpes(void);
acpi_status acpi_hw_enable_all_wakeup_gpes(void);
diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h
index 1d6ef9654725..67c2c3b959e1 100644
--- a/drivers/acpi/apei/apei-internal.h
+++ b/drivers/acpi/apei/apei-internal.h
@@ -7,7 +7,6 @@
#ifndef APEI_INTERNAL_H
#define APEI_INTERNAL_H
-#include <linux/cper.h>
#include <linux/acpi.h>
struct apei_exec_context;
@@ -130,10 +129,5 @@ static inline u32 cper_estatus_len(struct acpi_hest_generic_status *estatus)
return sizeof(*estatus) + estatus->data_length;
}
-void cper_estatus_print(const char *pfx,
- const struct acpi_hest_generic_status *estatus);
-int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus);
-int cper_estatus_check(const struct acpi_hest_generic_status *estatus);
-
int apei_osc_setup(void);
#endif
diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c
index c23eb75866d0..7514e38d5640 100644
--- a/drivers/acpi/apei/bert.c
+++ b/drivers/acpi/apei/bert.c
@@ -23,6 +23,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/acpi.h>
+#include <linux/cper.h>
#include <linux/io.h>
#include "apei-internal.h"
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index e8492b3a393a..0800a9d77558 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -516,6 +516,17 @@ static const struct dmi_system_id maingear_laptop[] = {
{ }
};
+static const struct dmi_system_id lg_laptop[] = {
+ {
+ .ident = "LG Electronics 17U70P",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LG Electronics"),
+ DMI_MATCH(DMI_BOARD_NAME, "17U70P"),
+ },
+ },
+ { }
+};
+
struct irq_override_cmp {
const struct dmi_system_id *system;
unsigned char irq;
@@ -532,6 +543,7 @@ static const struct irq_override_cmp override_table[] = {
{ lenovo_laptop, 10, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, true },
{ tongfang_gm_rg, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true },
{ maingear_laptop, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true },
+ { lg_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false },
};
static bool acpi_dev_irq_override(u32 gsi, u8 triggering, u8 polarity,
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 72470b9f16c4..f32570f72b90 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -636,11 +636,19 @@ static int acpi_suspend_enter(suspend_state_t pm_state)
}
/*
- * Disable and clear GPE status before interrupt is enabled. Some GPEs
- * (like wakeup GPE) haven't handler, this can avoid such GPE misfire.
- * acpi_leave_sleep_state will reenable specific GPEs later
+ * Disable all GPE and clear their status bits before interrupts are
+ * enabled. Some GPEs (like wakeup GPEs) have no handlers and this can
+ * prevent them from producing spurious interrups.
+ *
+ * acpi_leave_sleep_state() will reenable specific GPEs later.
+ *
+ * Because this code runs on one CPU with disabled interrupts (all of
+ * the other CPUs are offline at this time), it need not acquire any
+ * sleeping locks which may trigger an implicit preemption point even
+ * if there is no contention, so avoid doing that by using a low-level
+ * library routine here.
*/
- acpi_disable_all_gpes();
+ acpi_hw_disable_all_gpes();
/* Allow EC transactions to happen. */
acpi_ec_unblock_transactions();
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index fb56bfc45096..8fb7672021ee 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -1934,24 +1934,23 @@ static void binder_deferred_fd_close(int fd)
static void binder_transaction_buffer_release(struct binder_proc *proc,
struct binder_thread *thread,
struct binder_buffer *buffer,
- binder_size_t failed_at,
+ binder_size_t off_end_offset,
bool is_failure)
{
int debug_id = buffer->debug_id;
- binder_size_t off_start_offset, buffer_offset, off_end_offset;
+ binder_size_t off_start_offset, buffer_offset;
binder_debug(BINDER_DEBUG_TRANSACTION,
"%d buffer release %d, size %zd-%zd, failed at %llx\n",
proc->pid, buffer->debug_id,
buffer->data_size, buffer->offsets_size,
- (unsigned long long)failed_at);
+ (unsigned long long)off_end_offset);
if (buffer->target_node)
binder_dec_node(buffer->target_node, 1, 0);
off_start_offset = ALIGN(buffer->data_size, sizeof(void *));
- off_end_offset = is_failure && failed_at ? failed_at :
- off_start_offset + buffer->offsets_size;
+
for (buffer_offset = off_start_offset; buffer_offset < off_end_offset;
buffer_offset += sizeof(binder_size_t)) {
struct binder_object_header *hdr;
@@ -2111,6 +2110,21 @@ static void binder_transaction_buffer_release(struct binder_proc *proc,
}
}
+/* Clean up all the objects in the buffer */
+static inline void binder_release_entire_buffer(struct binder_proc *proc,
+ struct binder_thread *thread,
+ struct binder_buffer *buffer,
+ bool is_failure)
+{
+ binder_size_t off_end_offset;
+
+ off_end_offset = ALIGN(buffer->data_size, sizeof(void *));
+ off_end_offset += buffer->offsets_size;
+
+ binder_transaction_buffer_release(proc, thread, buffer,
+ off_end_offset, is_failure);
+}
+
static int binder_translate_binder(struct flat_binder_object *fp,
struct binder_transaction *t,
struct binder_thread *thread)
@@ -2806,7 +2820,7 @@ static int binder_proc_transaction(struct binder_transaction *t,
t_outdated->buffer = NULL;
buffer->transaction = NULL;
trace_binder_transaction_update_buffer_release(buffer);
- binder_transaction_buffer_release(proc, NULL, buffer, 0, 0);
+ binder_release_entire_buffer(proc, NULL, buffer, false);
binder_alloc_free_buf(&proc->alloc, buffer);
kfree(t_outdated);
binder_stats_deleted(BINDER_STAT_TRANSACTION);
@@ -3775,7 +3789,7 @@ binder_free_buf(struct binder_proc *proc,
binder_node_inner_unlock(buf_node);
}
trace_binder_transaction_buffer_release(buffer);
- binder_transaction_buffer_release(proc, thread, buffer, 0, is_failure);
+ binder_release_entire_buffer(proc, thread, buffer, is_failure);
binder_alloc_free_buf(&proc->alloc, buffer);
}
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 55a3c3c2409f..662a2a2e2e84 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -212,8 +212,8 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
mm = alloc->mm;
if (mm) {
- mmap_read_lock(mm);
- vma = vma_lookup(mm, alloc->vma_addr);
+ mmap_write_lock(mm);
+ vma = alloc->vma;
}
if (!vma && need_mm) {
@@ -270,7 +270,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
trace_binder_alloc_page_end(alloc, index);
}
if (mm) {
- mmap_read_unlock(mm);
+ mmap_write_unlock(mm);
mmput(mm);
}
return 0;
@@ -303,21 +303,24 @@ err_page_ptr_cleared:
}
err_no_vma:
if (mm) {
- mmap_read_unlock(mm);
+ mmap_write_unlock(mm);
mmput(mm);
}
return vma ? -ENOMEM : -ESRCH;
}
+static inline void binder_alloc_set_vma(struct binder_alloc *alloc,
+ struct vm_area_struct *vma)
+{
+ /* pairs with smp_load_acquire in binder_alloc_get_vma() */
+ smp_store_release(&alloc->vma, vma);
+}
+
static inline struct vm_area_struct *binder_alloc_get_vma(
struct binder_alloc *alloc)
{
- struct vm_area_struct *vma = NULL;
-
- if (alloc->vma_addr)
- vma = vma_lookup(alloc->mm, alloc->vma_addr);
-
- return vma;
+ /* pairs with smp_store_release in binder_alloc_set_vma() */
+ return smp_load_acquire(&alloc->vma);
}
static bool debug_low_async_space_locked(struct binder_alloc *alloc, int pid)
@@ -380,15 +383,13 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
size_t size, data_offsets_size;
int ret;
- mmap_read_lock(alloc->mm);
+ /* Check binder_alloc is fully initialized */
if (!binder_alloc_get_vma(alloc)) {
- mmap_read_unlock(alloc->mm);
binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
"%d: binder_alloc_buf, no vma\n",
alloc->pid);
return ERR_PTR(-ESRCH);
}
- mmap_read_unlock(alloc->mm);
data_offsets_size = ALIGN(data_size, sizeof(void *)) +
ALIGN(offsets_size, sizeof(void *));
@@ -778,7 +779,9 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc,
buffer->free = 1;
binder_insert_free_buffer(alloc, buffer);
alloc->free_async_space = alloc->buffer_size / 2;
- alloc->vma_addr = vma->vm_start;
+
+ /* Signal binder_alloc is fully initialized */
+ binder_alloc_set_vma(alloc, vma);
return 0;
@@ -808,8 +811,7 @@ void binder_alloc_deferred_release(struct binder_alloc *alloc)
buffers = 0;
mutex_lock(&alloc->mutex);
- BUG_ON(alloc->vma_addr &&
- vma_lookup(alloc->mm, alloc->vma_addr));
+ BUG_ON(alloc->vma);
while ((n = rb_first(&alloc->allocated_buffers))) {
buffer = rb_entry(n, struct binder_buffer, rb_node);
@@ -916,25 +918,17 @@ void binder_alloc_print_pages(struct seq_file *m,
* Make sure the binder_alloc is fully initialized, otherwise we might
* read inconsistent state.
*/
-
- mmap_read_lock(alloc->mm);
- if (binder_alloc_get_vma(alloc) == NULL) {
- mmap_read_unlock(alloc->mm);
- goto uninitialized;
- }
-
- mmap_read_unlock(alloc->mm);
- for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) {
- page = &alloc->pages[i];
- if (!page->page_ptr)
- free++;
- else if (list_empty(&page->lru))
- active++;
- else
- lru++;
+ if (binder_alloc_get_vma(alloc) != NULL) {
+ for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) {
+ page = &alloc->pages[i];
+ if (!page->page_ptr)
+ free++;
+ else if (list_empty(&page->lru))
+ active++;
+ else
+ lru++;
+ }
}
-
-uninitialized:
mutex_unlock(&alloc->mutex);
seq_printf(m, " pages: %d:%d:%d\n", active, lru, free);
seq_printf(m, " pages high watermark: %zu\n", alloc->pages_high);
@@ -969,7 +963,7 @@ int binder_alloc_get_allocated_count(struct binder_alloc *alloc)
*/
void binder_alloc_vma_close(struct binder_alloc *alloc)
{
- alloc->vma_addr = 0;
+ binder_alloc_set_vma(alloc, NULL);
}
/**
diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h
index 0f811ac4bcff..138d1d5af9ce 100644
--- a/drivers/android/binder_alloc.h
+++ b/drivers/android/binder_alloc.h
@@ -75,7 +75,7 @@ struct binder_lru_page {
/**
* struct binder_alloc - per-binder proc state for binder allocator
* @mutex: protects binder_alloc fields
- * @vma_addr: vm_area_struct->vm_start passed to mmap_handler
+ * @vma: vm_area_struct passed to mmap_handler
* (invariant after mmap)
* @mm: copy of task->mm (invariant after open)
* @buffer: base of per-proc address space mapped via mmap
@@ -99,7 +99,7 @@ struct binder_lru_page {
*/
struct binder_alloc {
struct mutex mutex;
- unsigned long vma_addr;
+ struct vm_area_struct *vma;
struct mm_struct *mm;
void __user *buffer;
struct list_head buffers;
diff --git a/drivers/android/binder_alloc_selftest.c b/drivers/android/binder_alloc_selftest.c
index 43a881073a42..c2b323bc3b3a 100644
--- a/drivers/android/binder_alloc_selftest.c
+++ b/drivers/android/binder_alloc_selftest.c
@@ -287,7 +287,7 @@ void binder_selftest_alloc(struct binder_alloc *alloc)
if (!binder_selftest_run)
return;
mutex_lock(&binder_selftest_lock);
- if (!binder_selftest_run || !alloc->vma_addr)
+ if (!binder_selftest_run || !alloc->vma)
goto done;
pr_info("STARTED\n");
binder_selftest_alloc_offset(alloc, end_offset, 0);
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 8bf612bdd61a..b4f246f0cac7 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -5348,7 +5348,7 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
mutex_init(&ap->scsi_scan_mutex);
INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug);
- INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
+ INIT_DELAYED_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
INIT_LIST_HEAD(&ap->eh_done_q);
init_waitqueue_head(&ap->eh_wait_q);
init_completion(&ap->park_req_pending);
@@ -5954,6 +5954,7 @@ static void ata_port_detach(struct ata_port *ap)
WARN_ON(!(ap->pflags & ATA_PFLAG_UNLOADED));
cancel_delayed_work_sync(&ap->hotplug_task);
+ cancel_delayed_work_sync(&ap->scsi_rescan_task);
skip_eh:
/* clean up zpodd on port removal */
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index a6c901811802..6f8d14191593 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -2984,7 +2984,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link,
ehc->i.flags |= ATA_EHI_SETMODE;
/* schedule the scsi_rescan_device() here */
- schedule_work(&(ap->scsi_rescan_task));
+ schedule_delayed_work(&ap->scsi_rescan_task, 0);
} else if (dev->class == ATA_DEV_UNKNOWN &&
ehc->tries[dev->devno] &&
ata_class_enabled(ehc->classes[dev->devno])) {
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 7bb12deab70c..551077cea4e4 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -2694,18 +2694,36 @@ static unsigned int atapi_xlat(struct ata_queued_cmd *qc)
return 0;
}
-static struct ata_device *ata_find_dev(struct ata_port *ap, int devno)
+static struct ata_device *ata_find_dev(struct ata_port *ap, unsigned int devno)
{
- if (!sata_pmp_attached(ap)) {
- if (likely(devno >= 0 &&
- devno < ata_link_max_devices(&ap->link)))
+ /*
+ * For the non-PMP case, ata_link_max_devices() returns 1 (SATA case),
+ * or 2 (IDE master + slave case). However, the former case includes
+ * libsas hosted devices which are numbered per scsi host, leading
+ * to devno potentially being larger than 0 but with each struct
+ * ata_device having its own struct ata_port and struct ata_link.
+ * To accommodate these, ignore devno and always use device number 0.
+ */
+ if (likely(!sata_pmp_attached(ap))) {
+ int link_max_devices = ata_link_max_devices(&ap->link);
+
+ if (link_max_devices == 1)
+ return &ap->link.device[0];
+
+ if (devno < link_max_devices)
return &ap->link.device[devno];
- } else {
- if (likely(devno >= 0 &&
- devno < ap->nr_pmp_links))
- return &ap->pmp_link[devno].device[0];
+
+ return NULL;
}
+ /*
+ * For PMP-attached devices, the device number corresponds to C
+ * (channel) of SCSI [H:C:I:L], indicating the port pmp link
+ * for the device.
+ */
+ if (devno < ap->nr_pmp_links)
+ return &ap->pmp_link[devno].device[0];
+
return NULL;
}
@@ -4579,10 +4597,11 @@ int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
void ata_scsi_dev_rescan(struct work_struct *work)
{
struct ata_port *ap =
- container_of(work, struct ata_port, scsi_rescan_task);
+ container_of(work, struct ata_port, scsi_rescan_task.work);
struct ata_link *link;
struct ata_device *dev;
unsigned long flags;
+ bool delay_rescan = false;
mutex_lock(&ap->scsi_scan_mutex);
spin_lock_irqsave(ap->lock, flags);
@@ -4596,6 +4615,21 @@ void ata_scsi_dev_rescan(struct work_struct *work)
if (scsi_device_get(sdev))
continue;
+ /*
+ * If the rescan work was scheduled because of a resume
+ * event, the port is already fully resumed, but the
+ * SCSI device may not yet be fully resumed. In such
+ * case, executing scsi_rescan_device() may cause a
+ * deadlock with the PM code on device_lock(). Prevent
+ * this by giving up and retrying rescan after a short
+ * delay.
+ */
+ delay_rescan = sdev->sdev_gendev.power.is_suspended;
+ if (delay_rescan) {
+ scsi_device_put(sdev);
+ break;
+ }
+
spin_unlock_irqrestore(ap->lock, flags);
scsi_rescan_device(&(sdev->sdev_gendev));
scsi_device_put(sdev);
@@ -4605,4 +4639,8 @@ void ata_scsi_dev_rescan(struct work_struct *work)
spin_unlock_irqrestore(ap->lock, flags);
mutex_unlock(&ap->scsi_scan_mutex);
+
+ if (delay_rescan)
+ schedule_delayed_work(&ap->scsi_rescan_task,
+ msecs_to_jiffies(5));
}
diff --git a/drivers/auxdisplay/ht16k33.c b/drivers/auxdisplay/ht16k33.c
index 02425991c159..d44814b9562a 100644
--- a/drivers/auxdisplay/ht16k33.c
+++ b/drivers/auxdisplay/ht16k33.c
@@ -820,7 +820,7 @@ static const struct of_device_id ht16k33_of_match[] = {
MODULE_DEVICE_TABLE(of, ht16k33_of_match);
static struct i2c_driver ht16k33_driver = {
- .probe_new = ht16k33_probe,
+ .probe = ht16k33_probe,
.remove = ht16k33_remove,
.driver = {
.name = DRIVER_NAME,
diff --git a/drivers/auxdisplay/lcd2s.c b/drivers/auxdisplay/lcd2s.c
index 135831a16514..6422be0dfe20 100644
--- a/drivers/auxdisplay/lcd2s.c
+++ b/drivers/auxdisplay/lcd2s.c
@@ -365,7 +365,7 @@ static struct i2c_driver lcd2s_i2c_driver = {
.name = "lcd2s",
.of_match_table = lcd2s_of_table,
},
- .probe_new = lcd2s_i2c_probe,
+ .probe = lcd2s_i2c_probe,
.remove = lcd2s_i2c_remove,
.id_table = lcd2s_i2c_id,
};
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index bba3482ddeb8..cbae8be1fe52 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -388,6 +388,16 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
continue;/* skip if itself or no cacheinfo */
for (sib_index = 0; sib_index < cache_leaves(i); sib_index++) {
sib_leaf = per_cpu_cacheinfo_idx(i, sib_index);
+
+ /*
+ * Comparing cache IDs only makes sense if the leaves
+ * belong to the same cache level of same type. Skip
+ * the check if level and type do not match.
+ */
+ if (sib_leaf->level != this_leaf->level ||
+ sib_leaf->type != this_leaf->type)
+ continue;
+
if (cache_leaves_are_shared(this_leaf, sib_leaf)) {
cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map);
cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
@@ -400,11 +410,14 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
coherency_max_size = this_leaf->coherency_line_size;
}
+ /* shared_cpu_map is now populated for the cpu */
+ this_cpu_ci->cpu_map_populated = true;
return 0;
}
static void cache_shared_cpu_map_remove(unsigned int cpu)
{
+ struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
struct cacheinfo *this_leaf, *sib_leaf;
unsigned int sibling, index, sib_index;
@@ -419,6 +432,16 @@ static void cache_shared_cpu_map_remove(unsigned int cpu)
for (sib_index = 0; sib_index < cache_leaves(sibling); sib_index++) {
sib_leaf = per_cpu_cacheinfo_idx(sibling, sib_index);
+
+ /*
+ * Comparing cache IDs only makes sense if the leaves
+ * belong to the same cache level of same type. Skip
+ * the check if level and type do not match.
+ */
+ if (sib_leaf->level != this_leaf->level ||
+ sib_leaf->type != this_leaf->type)
+ continue;
+
if (cache_leaves_are_shared(this_leaf, sib_leaf)) {
cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map);
cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map);
@@ -427,6 +450,9 @@ static void cache_shared_cpu_map_remove(unsigned int cpu)
}
}
}
+
+ /* cpu is no longer populated in the shared map */
+ this_cpu_ci->cpu_map_populated = false;
}
static void free_cache_attributes(unsigned int cpu)
diff --git a/drivers/base/class.c b/drivers/base/class.c
index ac1808d1a2e8..05d9df90f621 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -320,6 +320,7 @@ void class_dev_iter_init(struct class_dev_iter *iter, const struct class *class,
start_knode = &start->p->knode_class;
klist_iter_init_node(&sp->klist_devices, &iter->ki, start_knode);
iter->type = type;
+ iter->sp = sp;
}
EXPORT_SYMBOL_GPL(class_dev_iter_init);
@@ -361,6 +362,7 @@ EXPORT_SYMBOL_GPL(class_dev_iter_next);
void class_dev_iter_exit(struct class_dev_iter *iter)
{
klist_iter_exit(&iter->ki);
+ subsys_put(iter->sp);
}
EXPORT_SYMBOL_GPL(class_dev_iter_exit);
diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c
index 9d79d5ad9102..b58c42f1b1ce 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -812,7 +812,7 @@ static void fw_log_firmware_info(const struct firmware *fw, const char *name, st
char *outbuf;
alg = crypto_alloc_shash("sha256", 0, 0);
- if (!alg)
+ if (IS_ERR(alg))
return;
sha256buf = kmalloc(SHA256_DIGEST_SIZE, GFP_KERNEL);
diff --git a/drivers/base/regmap/Kconfig b/drivers/base/regmap/Kconfig
index 33a8366e22a5..0db2021f7477 100644
--- a/drivers/base/regmap/Kconfig
+++ b/drivers/base/regmap/Kconfig
@@ -4,16 +4,23 @@
# subsystems should select the appropriate symbols.
config REGMAP
+ bool "Register Map support" if KUNIT_ALL_TESTS
default y if (REGMAP_I2C || REGMAP_SPI || REGMAP_SPMI || REGMAP_W1 || REGMAP_AC97 || REGMAP_MMIO || REGMAP_IRQ || REGMAP_SOUNDWIRE || REGMAP_SOUNDWIRE_MBQ || REGMAP_SCCB || REGMAP_I3C || REGMAP_SPI_AVMM || REGMAP_MDIO || REGMAP_FSI)
select IRQ_DOMAIN if REGMAP_IRQ
select MDIO_BUS if REGMAP_MDIO
- bool
+ help
+ Enable support for the Register Map (regmap) access API.
+
+ Usually, this option is automatically selected when needed.
+ However, you may want to enable it manually for running the regmap
+ KUnit tests.
+
+ If unsure, say N.
config REGMAP_KUNIT
tristate "KUnit tests for regmap"
- depends on KUNIT
+ depends on KUNIT && REGMAP
default KUNIT_ALL_TESTS
- select REGMAP
select REGMAP_RAM
config REGMAP_AC97
diff --git a/drivers/base/regmap/regcache-maple.c b/drivers/base/regmap/regcache-maple.c
index 9b1b559107ef..c2e3a0f6c218 100644
--- a/drivers/base/regmap/regcache-maple.c
+++ b/drivers/base/regmap/regcache-maple.c
@@ -203,15 +203,18 @@ static int regcache_maple_sync(struct regmap *map, unsigned int min,
mas_for_each(&mas, entry, max) {
for (r = max(mas.index, lmin); r <= min(mas.last, lmax); r++) {
+ mas_pause(&mas);
+ rcu_read_unlock();
ret = regcache_sync_val(map, r, entry[r - mas.index]);
if (ret != 0)
goto out;
+ rcu_read_lock();
}
}
-out:
rcu_read_unlock();
+out:
map->cache_bypass = false;
return ret;
diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index 029564695dbb..97c681fcf9f6 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c
@@ -284,6 +284,9 @@ static bool regcache_reg_needs_sync(struct regmap *map, unsigned int reg,
{
int ret;
+ if (!regmap_writeable(map, reg))
+ return false;
+
/* If we don't know the chip just got reset, then sync everything. */
if (!map->no_sync_defaults)
return true;
diff --git a/drivers/base/regmap/regmap-sdw.c b/drivers/base/regmap/regmap-sdw.c
index 09899ae99fc1..159c0b740b00 100644
--- a/drivers/base/regmap/regmap-sdw.c
+++ b/drivers/base/regmap/regmap-sdw.c
@@ -59,6 +59,10 @@ static int regmap_sdw_config_check(const struct regmap_config *config)
if (config->pad_bits != 0)
return -ENOTSUPP;
+ /* Only bulk writes are supported not multi-register writes */
+ if (config->can_multi_write)
+ return -ENOTSUPP;
+
return 0;
}
diff --git a/drivers/base/regmap/regmap-spi-avmm.c b/drivers/base/regmap/regmap-spi-avmm.c
index 4c2b94b3e30b..6af692844c19 100644
--- a/drivers/base/regmap/regmap-spi-avmm.c
+++ b/drivers/base/regmap/regmap-spi-avmm.c
@@ -660,7 +660,7 @@ static const struct regmap_bus regmap_spi_avmm_bus = {
.reg_format_endian_default = REGMAP_ENDIAN_NATIVE,
.val_format_endian_default = REGMAP_ENDIAN_NATIVE,
.max_raw_read = SPI_AVMM_VAL_SIZE * MAX_READ_CNT,
- .max_raw_write = SPI_AVMM_VAL_SIZE * MAX_WRITE_CNT,
+ .max_raw_write = SPI_AVMM_REG_SIZE + SPI_AVMM_VAL_SIZE * MAX_WRITE_CNT,
.free_context = spi_avmm_bridge_ctx_free,
};
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index db7851f0e3b8..fa2d3fba6ac9 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -2082,6 +2082,8 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg,
size_t val_count = val_len / val_bytes;
size_t chunk_count, chunk_bytes;
size_t chunk_regs = val_count;
+ size_t max_data = map->max_raw_write - map->format.reg_bytes -
+ map->format.pad_bytes;
int ret, i;
if (!val_count)
@@ -2089,8 +2091,8 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg,
if (map->use_single_write)
chunk_regs = 1;
- else if (map->max_raw_write && val_len > map->max_raw_write)
- chunk_regs = map->max_raw_write / val_bytes;
+ else if (map->max_raw_write && val_len > max_data)
+ chunk_regs = max_data / val_bytes;
chunk_count = val_count / chunk_regs;
chunk_bytes = chunk_regs * val_bytes;
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index b3fedafe301e..864013019d6b 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -2244,6 +2244,7 @@ static void null_destroy_dev(struct nullb *nullb)
struct nullb_device *dev = nullb->dev;
null_del_dev(nullb);
+ null_free_device_storage(dev, false);
null_free_dev(dev);
}
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 39f2903fe25f..bd0e075a5d89 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1334,14 +1334,30 @@ static bool rbd_obj_is_tail(struct rbd_obj_request *obj_req)
/*
* Must be called after rbd_obj_calc_img_extents().
*/
-static bool rbd_obj_copyup_enabled(struct rbd_obj_request *obj_req)
+static void rbd_obj_set_copyup_enabled(struct rbd_obj_request *obj_req)
{
- if (!obj_req->num_img_extents ||
- (rbd_obj_is_entire(obj_req) &&
- !obj_req->img_request->snapc->num_snaps))
- return false;
+ rbd_assert(obj_req->img_request->snapc);
- return true;
+ if (obj_req->img_request->op_type == OBJ_OP_DISCARD) {
+ dout("%s %p objno %llu discard\n", __func__, obj_req,
+ obj_req->ex.oe_objno);
+ return;
+ }
+
+ if (!obj_req->num_img_extents) {
+ dout("%s %p objno %llu not overlapping\n", __func__, obj_req,
+ obj_req->ex.oe_objno);
+ return;
+ }
+
+ if (rbd_obj_is_entire(obj_req) &&
+ !obj_req->img_request->snapc->num_snaps) {
+ dout("%s %p objno %llu entire\n", __func__, obj_req,
+ obj_req->ex.oe_objno);
+ return;
+ }
+
+ obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
}
static u64 rbd_obj_img_extents_bytes(struct rbd_obj_request *obj_req)
@@ -1442,6 +1458,7 @@ __rbd_obj_add_osd_request(struct rbd_obj_request *obj_req,
static struct ceph_osd_request *
rbd_obj_add_osd_request(struct rbd_obj_request *obj_req, int num_ops)
{
+ rbd_assert(obj_req->img_request->snapc);
return __rbd_obj_add_osd_request(obj_req, obj_req->img_request->snapc,
num_ops);
}
@@ -1578,15 +1595,18 @@ static void rbd_img_request_init(struct rbd_img_request *img_request,
mutex_init(&img_request->state_mutex);
}
+/*
+ * Only snap_id is captured here, for reads. For writes, snapshot
+ * context is captured in rbd_img_object_requests() after exclusive
+ * lock is ensured to be held.
+ */
static void rbd_img_capture_header(struct rbd_img_request *img_req)
{
struct rbd_device *rbd_dev = img_req->rbd_dev;
lockdep_assert_held(&rbd_dev->header_rwsem);
- if (rbd_img_is_write(img_req))
- img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc);
- else
+ if (!rbd_img_is_write(img_req))
img_req->snap_id = rbd_dev->spec->snap_id;
if (rbd_dev_parent_get(rbd_dev))
@@ -2233,9 +2253,6 @@ static int rbd_obj_init_write(struct rbd_obj_request *obj_req)
if (ret)
return ret;
- if (rbd_obj_copyup_enabled(obj_req))
- obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
-
obj_req->write_state = RBD_OBJ_WRITE_START;
return 0;
}
@@ -2341,8 +2358,6 @@ static int rbd_obj_init_zeroout(struct rbd_obj_request *obj_req)
if (ret)
return ret;
- if (rbd_obj_copyup_enabled(obj_req))
- obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
if (!obj_req->num_img_extents) {
obj_req->flags |= RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT;
if (rbd_obj_is_entire(obj_req))
@@ -3286,6 +3301,7 @@ again:
case RBD_OBJ_WRITE_START:
rbd_assert(!*result);
+ rbd_obj_set_copyup_enabled(obj_req);
if (rbd_obj_write_is_noop(obj_req))
return true;
@@ -3472,9 +3488,19 @@ static int rbd_img_exclusive_lock(struct rbd_img_request *img_req)
static void rbd_img_object_requests(struct rbd_img_request *img_req)
{
+ struct rbd_device *rbd_dev = img_req->rbd_dev;
struct rbd_obj_request *obj_req;
rbd_assert(!img_req->pending.result && !img_req->pending.num_pending);
+ rbd_assert(!need_exclusive_lock(img_req) ||
+ __rbd_is_lock_owner(rbd_dev));
+
+ if (rbd_img_is_write(img_req)) {
+ rbd_assert(!img_req->snapc);
+ down_read(&rbd_dev->header_rwsem);
+ img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc);
+ up_read(&rbd_dev->header_rwsem);
+ }
for_each_obj_request(img_req, obj_req) {
int result = 0;
@@ -3492,7 +3518,6 @@ static void rbd_img_object_requests(struct rbd_img_request *img_req)
static bool rbd_img_advance(struct rbd_img_request *img_req, int *result)
{
- struct rbd_device *rbd_dev = img_req->rbd_dev;
int ret;
again:
@@ -3513,9 +3538,6 @@ again:
if (*result)
return true;
- rbd_assert(!need_exclusive_lock(img_req) ||
- __rbd_is_lock_owner(rbd_dev));
-
rbd_img_object_requests(img_req);
if (!img_req->pending.num_pending) {
*result = img_req->pending.result;
@@ -3977,6 +3999,10 @@ static int rbd_post_acquire_action(struct rbd_device *rbd_dev)
{
int ret;
+ ret = rbd_dev_refresh(rbd_dev);
+ if (ret)
+ return ret;
+
if (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) {
ret = rbd_object_map_open(rbd_dev);
if (ret)
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 6287f13d1620..1c823750c95a 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -1186,6 +1186,11 @@ static inline bool ublk_queue_ready(struct ublk_queue *ubq)
return ubq->nr_io_ready == ubq->q_depth;
}
+static void ublk_cmd_cancel_cb(struct io_uring_cmd *cmd, unsigned issue_flags)
+{
+ io_uring_cmd_done(cmd, UBLK_IO_RES_ABORT, 0, issue_flags);
+}
+
static void ublk_cancel_queue(struct ublk_queue *ubq)
{
int i;
@@ -1197,8 +1202,8 @@ static void ublk_cancel_queue(struct ublk_queue *ubq)
struct ublk_io *io = &ubq->ios[i];
if (io->flags & UBLK_IO_FLAG_ACTIVE)
- io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0,
- IO_URING_F_UNLOCKED);
+ io_uring_cmd_complete_in_task(io->cmd,
+ ublk_cmd_cancel_cb);
}
/* all io commands are canceled */
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 2b918e28acaa..b47358da92a2 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -348,63 +348,33 @@ static inline void virtblk_request_done(struct request *req)
blk_mq_end_request(req, status);
}
-static void virtblk_complete_batch(struct io_comp_batch *iob)
-{
- struct request *req;
-
- rq_list_for_each(&iob->req_list, req) {
- virtblk_unmap_data(req, blk_mq_rq_to_pdu(req));
- virtblk_cleanup_cmd(req);
- }
- blk_mq_end_request_batch(iob);
-}
-
-static int virtblk_handle_req(struct virtio_blk_vq *vq,
- struct io_comp_batch *iob)
-{
- struct virtblk_req *vbr;
- int req_done = 0;
- unsigned int len;
-
- while ((vbr = virtqueue_get_buf(vq->vq, &len)) != NULL) {
- struct request *req = blk_mq_rq_from_pdu(vbr);
-
- if (likely(!blk_should_fake_timeout(req->q)) &&
- !blk_mq_complete_request_remote(req) &&
- !blk_mq_add_to_batch(req, iob, virtblk_vbr_status(vbr),
- virtblk_complete_batch))
- virtblk_request_done(req);
- req_done++;
- }
-
- return req_done;
-}
-
static void virtblk_done(struct virtqueue *vq)
{
struct virtio_blk *vblk = vq->vdev->priv;
- struct virtio_blk_vq *vblk_vq = &vblk->vqs[vq->index];
- int req_done = 0;
+ bool req_done = false;
+ int qid = vq->index;
+ struct virtblk_req *vbr;
unsigned long flags;
- DEFINE_IO_COMP_BATCH(iob);
+ unsigned int len;
- spin_lock_irqsave(&vblk_vq->lock, flags);
+ spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
do {
virtqueue_disable_cb(vq);
- req_done += virtblk_handle_req(vblk_vq, &iob);
+ while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
+ struct request *req = blk_mq_rq_from_pdu(vbr);
+ if (likely(!blk_should_fake_timeout(req->q)))
+ blk_mq_complete_request(req);
+ req_done = true;
+ }
if (unlikely(virtqueue_is_broken(vq)))
break;
} while (!virtqueue_enable_cb(vq));
- if (req_done) {
- if (!rq_list_empty(iob.req_list))
- iob.complete(&iob);
-
- /* In case queue is stopped waiting for more buffers. */
+ /* In case queue is stopped waiting for more buffers. */
+ if (req_done)
blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
- }
- spin_unlock_irqrestore(&vblk_vq->lock, flags);
+ spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
}
static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx)
@@ -1283,15 +1253,37 @@ static void virtblk_map_queues(struct blk_mq_tag_set *set)
}
}
+static void virtblk_complete_batch(struct io_comp_batch *iob)
+{
+ struct request *req;
+
+ rq_list_for_each(&iob->req_list, req) {
+ virtblk_unmap_data(req, blk_mq_rq_to_pdu(req));
+ virtblk_cleanup_cmd(req);
+ }
+ blk_mq_end_request_batch(iob);
+}
+
static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
{
struct virtio_blk *vblk = hctx->queue->queuedata;
struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx);
+ struct virtblk_req *vbr;
unsigned long flags;
+ unsigned int len;
int found = 0;
spin_lock_irqsave(&vq->lock, flags);
- found = virtblk_handle_req(vq, iob);
+
+ while ((vbr = virtqueue_get_buf(vq->vq, &len)) != NULL) {
+ struct request *req = blk_mq_rq_from_pdu(vbr);
+
+ found++;
+ if (!blk_mq_complete_request_remote(req) &&
+ !blk_mq_add_to_batch(req, iob, virtblk_vbr_status(vbr),
+ virtblk_complete_batch))
+ virtblk_request_done(req);
+ }
if (found)
blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 52e74adbaad6..434fab306777 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -780,7 +780,8 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
ring_req->u.rw.handle = info->handle;
ring_req->operation = rq_data_dir(req) ?
BLKIF_OP_WRITE : BLKIF_OP_READ;
- if (req_op(req) == REQ_OP_FLUSH || req->cmd_flags & REQ_FUA) {
+ if (req_op(req) == REQ_OP_FLUSH ||
+ (req_op(req) == REQ_OP_WRITE && (req->cmd_flags & REQ_FUA))) {
/*
* Ideally we can do an unordered flush-to-disk.
* In case the backend onlysupports barriers, use that.
diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c
index 3a34d7c1475b..52ef44688d38 100644
--- a/drivers/bluetooth/btnxpuart.c
+++ b/drivers/bluetooth/btnxpuart.c
@@ -1319,17 +1319,17 @@ static void nxp_serdev_remove(struct serdev_device *serdev)
hci_free_dev(hdev);
}
-static struct btnxpuart_data w8987_data = {
+static struct btnxpuart_data w8987_data __maybe_unused = {
.helper_fw_name = NULL,
.fw_name = FIRMWARE_W8987,
};
-static struct btnxpuart_data w8997_data = {
+static struct btnxpuart_data w8997_data __maybe_unused = {
.helper_fw_name = FIRMWARE_HELPER,
.fw_name = FIRMWARE_W8997,
};
-static const struct of_device_id nxpuart_of_match_table[] = {
+static const struct of_device_id nxpuart_of_match_table[] __maybe_unused = {
{ .compatible = "nxp,88w8987-bt", .data = &w8987_data },
{ .compatible = "nxp,88w8997-bt", .data = &w8997_data },
{ }
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index 1b064504b388..e30c979535b1 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -78,7 +78,8 @@ enum qca_flags {
QCA_HW_ERROR_EVENT,
QCA_SSR_TRIGGERED,
QCA_BT_OFF,
- QCA_ROM_FW
+ QCA_ROM_FW,
+ QCA_DEBUGFS_CREATED,
};
enum qca_capabilities {
@@ -635,6 +636,9 @@ static void qca_debugfs_init(struct hci_dev *hdev)
if (!hdev->debugfs)
return;
+ if (test_and_set_bit(QCA_DEBUGFS_CREATED, &qca->flags))
+ return;
+
ibs_dir = debugfs_create_dir("ibs", hdev->debugfs);
/* read only */
diff --git a/drivers/char/agp/parisc-agp.c b/drivers/char/agp/parisc-agp.c
index d68d05d5d383..514f9f287a78 100644
--- a/drivers/char/agp/parisc-agp.c
+++ b/drivers/char/agp/parisc-agp.c
@@ -90,6 +90,9 @@ parisc_agp_tlbflush(struct agp_memory *mem)
{
struct _parisc_agp_info *info = &parisc_agp_info;
+ /* force fdc ops to be visible to IOMMU */
+ asm_io_sync();
+
writeq(info->gart_base | ilog2(info->gart_size), info->ioc_regs+IOC_PCOM);
readq(info->ioc_regs+IOC_PCOM); /* flush */
}
@@ -158,6 +161,7 @@ parisc_agp_insert_memory(struct agp_memory *mem, off_t pg_start, int type)
info->gatt[j] =
parisc_agp_mask_memory(agp_bridge,
paddr, type);
+ asm_io_fdc(&info->gatt[j]);
}
}
@@ -191,7 +195,16 @@ static unsigned long
parisc_agp_mask_memory(struct agp_bridge_data *bridge, dma_addr_t addr,
int type)
{
- return SBA_PDIR_VALID_BIT | addr;
+ unsigned ci; /* coherent index */
+ dma_addr_t pa;
+
+ pa = addr & IOVP_MASK;
+ asm("lci 0(%1), %0" : "=r" (ci) : "r" (phys_to_virt(pa)));
+
+ pa |= (ci >> PAGE_SHIFT) & 0xff;/* move CI (8 bits) into lowest byte */
+ pa |= SBA_PDIR_VALID_BIT; /* set "valid" bit */
+
+ return cpu_to_le64(pa);
}
static void
diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index c10a4aa97373..cd48033b804a 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -571,6 +571,10 @@ static int tpm_hwrng_read(struct hwrng *rng, void *data, size_t max, bool wait)
{
struct tpm_chip *chip = container_of(rng, struct tpm_chip, hwrng);
+ /* Give back zero bytes, as TPM chip has not yet fully resumed: */
+ if (chip->flags & TPM_CHIP_FLAG_SUSPENDED)
+ return 0;
+
return tpm_get_random(chip, data, max);
}
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index 4463d0018290..586ca10b0d72 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -412,6 +412,8 @@ int tpm_pm_suspend(struct device *dev)
}
suspended:
+ chip->flags |= TPM_CHIP_FLAG_SUSPENDED;
+
if (rc)
dev_err(dev, "Ignoring error %d while suspending\n", rc);
return 0;
@@ -429,6 +431,14 @@ int tpm_pm_resume(struct device *dev)
if (chip == NULL)
return -ENODEV;
+ chip->flags &= ~TPM_CHIP_FLAG_SUSPENDED;
+
+ /*
+ * Guarantee that SUSPENDED is written last, so that hwrng does not
+ * activate before the chip has been fully resumed.
+ */
+ wmb();
+
return 0;
}
EXPORT_SYMBOL_GPL(tpm_pm_resume);
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index 7af389806643..7db3593941ea 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -122,6 +122,29 @@ static const struct dmi_system_id tpm_tis_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T490s"),
},
},
+ {
+ .callback = tpm_tis_disable_irq,
+ .ident = "ThinkStation P360 Tiny",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkStation P360 Tiny"),
+ },
+ },
+ {
+ .callback = tpm_tis_disable_irq,
+ .ident = "ThinkPad L490",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad L490"),
+ },
+ },
+ {
+ .callback = tpm_tis_disable_irq,
+ .ident = "UPX-TGL",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "AAEON"),
+ },
+ },
{}
};
diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index 02945d53fcef..558144fa707a 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -1209,25 +1209,20 @@ static void tpm_tis_reenable_interrupts(struct tpm_chip *chip)
u32 intmask;
int rc;
- if (chip->ops->clk_enable != NULL)
- chip->ops->clk_enable(chip, true);
-
- /* reenable interrupts that device may have lost or
- * BIOS/firmware may have disabled
+ /*
+ * Re-enable interrupts that device may have lost or BIOS/firmware may
+ * have disabled.
*/
rc = tpm_tis_write8(priv, TPM_INT_VECTOR(priv->locality), priv->irq);
- if (rc < 0)
- goto out;
+ if (rc < 0) {
+ dev_err(&chip->dev, "Setting IRQ failed.\n");
+ return;
+ }
intmask = priv->int_mask | TPM_GLOBAL_INT_ENABLE;
-
- tpm_tis_write32(priv, TPM_INT_ENABLE(priv->locality), intmask);
-
-out:
- if (chip->ops->clk_enable != NULL)
- chip->ops->clk_enable(chip, false);
-
- return;
+ rc = tpm_tis_write32(priv, TPM_INT_ENABLE(priv->locality), intmask);
+ if (rc < 0)
+ dev_err(&chip->dev, "Enabling interrupts failed.\n");
}
int tpm_tis_resume(struct device *dev)
@@ -1235,27 +1230,27 @@ int tpm_tis_resume(struct device *dev)
struct tpm_chip *chip = dev_get_drvdata(dev);
int ret;
- ret = tpm_tis_request_locality(chip, 0);
- if (ret < 0)
+ ret = tpm_chip_start(chip);
+ if (ret)
return ret;
if (chip->flags & TPM_CHIP_FLAG_IRQ)
tpm_tis_reenable_interrupts(chip);
- ret = tpm_pm_resume(dev);
- if (ret)
- goto out;
-
/*
* TPM 1.2 requires self-test on resume. This function actually returns
* an error code but for unknown reason it isn't handled.
*/
if (!(chip->flags & TPM_CHIP_FLAG_TPM2))
tpm1_do_selftest(chip);
-out:
- tpm_tis_relinquish_locality(chip, 0);
- return ret;
+ tpm_chip_stop(chip);
+
+ ret = tpm_pm_resume(dev);
+ if (ret)
+ return ret;
+
+ return 0;
}
EXPORT_SYMBOL_GPL(tpm_tis_resume);
#endif
diff --git a/drivers/char/tpm/tpm_tis_core.h b/drivers/char/tpm/tpm_tis_core.h
index e978f457fd4d..610bfadb6acf 100644
--- a/drivers/char/tpm/tpm_tis_core.h
+++ b/drivers/char/tpm/tpm_tis_core.h
@@ -84,10 +84,10 @@ enum tis_defaults {
#define ILB_REMAP_SIZE 0x100
enum tpm_tis_flags {
- TPM_TIS_ITPM_WORKAROUND = BIT(0),
- TPM_TIS_INVALID_STATUS = BIT(1),
- TPM_TIS_DEFAULT_CANCELLATION = BIT(2),
- TPM_TIS_IRQ_TESTED = BIT(3),
+ TPM_TIS_ITPM_WORKAROUND = 0,
+ TPM_TIS_INVALID_STATUS = 1,
+ TPM_TIS_DEFAULT_CANCELLATION = 2,
+ TPM_TIS_IRQ_TESTED = 3,
};
struct tpm_tis_data {
diff --git a/drivers/clk/clk-composite.c b/drivers/clk/clk-composite.c
index edfa94641bbf..66759fe28fad 100644
--- a/drivers/clk/clk-composite.c
+++ b/drivers/clk/clk-composite.c
@@ -119,7 +119,10 @@ static int clk_composite_determine_rate(struct clk_hw *hw,
if (ret)
continue;
- rate_diff = abs(req->rate - tmp_req.rate);
+ if (req->rate >= tmp_req.rate)
+ rate_diff = req->rate - tmp_req.rate;
+ else
+ rate_diff = tmp_req.rate - req->rate;
if (!rate_diff || !req->best_parent_hw
|| best_rate_diff > rate_diff) {
diff --git a/drivers/clk/clk-loongson2.c b/drivers/clk/clk-loongson2.c
index 70ae1dd2e474..bacdcbb287ac 100644
--- a/drivers/clk/clk-loongson2.c
+++ b/drivers/clk/clk-loongson2.c
@@ -40,7 +40,7 @@ static struct clk_hw *loongson2_clk_register(struct device *dev,
{
int ret;
struct clk_hw *hw;
- struct clk_init_data init;
+ struct clk_init_data init = { };
hw = devm_kzalloc(dev, sizeof(*hw), GFP_KERNEL);
if (!hw)
diff --git a/drivers/clk/mediatek/clk-mt8365.c b/drivers/clk/mediatek/clk-mt8365.c
index 6b4e193f648d..c87a6c4a7967 100644
--- a/drivers/clk/mediatek/clk-mt8365.c
+++ b/drivers/clk/mediatek/clk-mt8365.c
@@ -23,6 +23,7 @@
static DEFINE_SPINLOCK(mt8365_clk_lock);
static const struct mtk_fixed_clk top_fixed_clks[] = {
+ FIXED_CLK(CLK_TOP_CLK_NULL, "clk_null", NULL, 0),
FIXED_CLK(CLK_TOP_I2S0_BCK, "i2s0_bck", NULL, 26000000),
FIXED_CLK(CLK_TOP_DSI0_LNTC_DSICK, "dsi0_lntc_dsick", "clk26m",
75000000),
@@ -559,6 +560,14 @@ static const struct mtk_clk_divider top_adj_divs[] = {
0x324, 16, 8, CLK_DIVIDER_ROUND_CLOSEST),
DIV_ADJ_F(CLK_TOP_APLL12_CK_DIV3, "apll12_ck_div3", "apll_i2s3_sel",
0x324, 24, 8, CLK_DIVIDER_ROUND_CLOSEST),
+ DIV_ADJ_F(CLK_TOP_APLL12_CK_DIV4, "apll12_ck_div4", "apll_tdmout_sel",
+ 0x328, 0, 8, CLK_DIVIDER_ROUND_CLOSEST),
+ DIV_ADJ_F(CLK_TOP_APLL12_CK_DIV4B, "apll12_ck_div4b", "apll_tdmout_sel",
+ 0x328, 8, 8, CLK_DIVIDER_ROUND_CLOSEST),
+ DIV_ADJ_F(CLK_TOP_APLL12_CK_DIV5, "apll12_ck_div5", "apll_tdmin_sel",
+ 0x328, 16, 8, CLK_DIVIDER_ROUND_CLOSEST),
+ DIV_ADJ_F(CLK_TOP_APLL12_CK_DIV5B, "apll12_ck_div5b", "apll_tdmin_sel",
+ 0x328, 24, 8, CLK_DIVIDER_ROUND_CLOSEST),
DIV_ADJ_F(CLK_TOP_APLL12_CK_DIV6, "apll12_ck_div6", "apll_spdif_sel",
0x32c, 0, 8, CLK_DIVIDER_ROUND_CLOSEST),
};
@@ -583,15 +592,15 @@ static const struct mtk_gate_regs top2_cg_regs = {
#define GATE_TOP0(_id, _name, _parent, _shift) \
GATE_MTK(_id, _name, _parent, &top0_cg_regs, \
- _shift, &mtk_clk_gate_ops_no_setclr_inv)
+ _shift, &mtk_clk_gate_ops_no_setclr)
#define GATE_TOP1(_id, _name, _parent, _shift) \
GATE_MTK(_id, _name, _parent, &top1_cg_regs, \
- _shift, &mtk_clk_gate_ops_no_setclr)
+ _shift, &mtk_clk_gate_ops_no_setclr_inv)
#define GATE_TOP2(_id, _name, _parent, _shift) \
GATE_MTK(_id, _name, _parent, &top2_cg_regs, \
- _shift, &mtk_clk_gate_ops_no_setclr)
+ _shift, &mtk_clk_gate_ops_no_setclr_inv)
static const struct mtk_gate top_clk_gates[] = {
GATE_TOP0(CLK_TOP_CONN_32K, "conn_32k", "clk32k", 10),
@@ -696,6 +705,7 @@ static const struct mtk_gate ifr_clks[] = {
GATE_IFR3(CLK_IFR_GCPU, "ifr_gcpu", "axi_sel", 8),
GATE_IFR3(CLK_IFR_TRNG, "ifr_trng", "axi_sel", 9),
GATE_IFR3(CLK_IFR_AUXADC, "ifr_auxadc", "clk26m", 10),
+ GATE_IFR3(CLK_IFR_CPUM, "ifr_cpum", "clk26m", 11),
GATE_IFR3(CLK_IFR_AUXADC_MD, "ifr_auxadc_md", "clk26m", 14),
GATE_IFR3(CLK_IFR_AP_DMA, "ifr_ap_dma", "axi_sel", 18),
GATE_IFR3(CLK_IFR_DEBUGSYS, "ifr_debugsys", "axi_sel", 24),
@@ -717,6 +727,8 @@ static const struct mtk_gate ifr_clks[] = {
GATE_IFR5(CLK_IFR_PWRAP_TMR, "ifr_pwrap_tmr", "clk26m", 12),
GATE_IFR5(CLK_IFR_PWRAP_SPI, "ifr_pwrap_spi", "clk26m", 13),
GATE_IFR5(CLK_IFR_PWRAP_SYS, "ifr_pwrap_sys", "clk26m", 14),
+ GATE_MTK_FLAGS(CLK_IFR_MCU_PM_BK, "ifr_mcu_pm_bk", NULL, &ifr5_cg_regs,
+ 17, &mtk_clk_gate_ops_setclr, CLK_IGNORE_UNUSED),
GATE_IFR5(CLK_IFR_IRRX_26M, "ifr_irrx_26m", "clk26m", 22),
GATE_IFR5(CLK_IFR_IRRX_32K, "ifr_irrx_32k", "clk32k", 23),
GATE_IFR5(CLK_IFR_I2C0_AXI, "ifr_i2c0_axi", "i2c_sel", 24),
diff --git a/drivers/clk/pxa/clk-pxa3xx.c b/drivers/clk/pxa/clk-pxa3xx.c
index 42958a542662..621e298f101a 100644
--- a/drivers/clk/pxa/clk-pxa3xx.c
+++ b/drivers/clk/pxa/clk-pxa3xx.c
@@ -164,7 +164,7 @@ void pxa3xx_clk_update_accr(u32 disable, u32 enable, u32 xclkcfg, u32 mask)
accr &= ~disable;
accr |= enable;
- writel(accr, ACCR);
+ writel(accr, clk_regs + ACCR);
if (xclkcfg)
__asm__("mcr p14, 0, %0, c6, c0, 0\n" : : "r"(xclkcfg));
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 29904395e95f..b2f05d27167e 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -975,7 +975,7 @@ static int __init acpi_cpufreq_probe(struct platform_device *pdev)
/* don't keep reloading if cpufreq_driver exists */
if (cpufreq_get_current_driver())
- return -EEXIST;
+ return -ENODEV;
pr_debug("%s\n", __func__);
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 5a3d4aa0f45a..ddd346a239e0 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -444,9 +444,8 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy)
return 0;
}
-static int amd_pstate_target(struct cpufreq_policy *policy,
- unsigned int target_freq,
- unsigned int relation)
+static int amd_pstate_update_freq(struct cpufreq_policy *policy,
+ unsigned int target_freq, bool fast_switch)
{
struct cpufreq_freqs freqs;
struct amd_cpudata *cpudata = policy->driver_data;
@@ -465,26 +464,51 @@ static int amd_pstate_target(struct cpufreq_policy *policy,
des_perf = DIV_ROUND_CLOSEST(target_freq * cap_perf,
cpudata->max_freq);
- cpufreq_freq_transition_begin(policy, &freqs);
+ WARN_ON(fast_switch && !policy->fast_switch_enabled);
+ /*
+ * If fast_switch is desired, then there aren't any registered
+ * transition notifiers. See comment for
+ * cpufreq_enable_fast_switch().
+ */
+ if (!fast_switch)
+ cpufreq_freq_transition_begin(policy, &freqs);
+
amd_pstate_update(cpudata, min_perf, des_perf,
- max_perf, false, policy->governor->flags);
- cpufreq_freq_transition_end(policy, &freqs, false);
+ max_perf, fast_switch, policy->governor->flags);
+
+ if (!fast_switch)
+ cpufreq_freq_transition_end(policy, &freqs, false);
return 0;
}
+static int amd_pstate_target(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ unsigned int relation)
+{
+ return amd_pstate_update_freq(policy, target_freq, false);
+}
+
+static unsigned int amd_pstate_fast_switch(struct cpufreq_policy *policy,
+ unsigned int target_freq)
+{
+ return amd_pstate_update_freq(policy, target_freq, true);
+}
+
static void amd_pstate_adjust_perf(unsigned int cpu,
unsigned long _min_perf,
unsigned long target_perf,
unsigned long capacity)
{
unsigned long max_perf, min_perf, des_perf,
- cap_perf, lowest_nonlinear_perf;
+ cap_perf, lowest_nonlinear_perf, max_freq;
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
struct amd_cpudata *cpudata = policy->driver_data;
+ unsigned int target_freq;
cap_perf = READ_ONCE(cpudata->highest_perf);
lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
+ max_freq = READ_ONCE(cpudata->max_freq);
des_perf = cap_perf;
if (target_perf < capacity)
@@ -501,6 +525,10 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
if (max_perf < min_perf)
max_perf = min_perf;
+ des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
+ target_freq = div_u64(des_perf * max_freq, max_perf);
+ policy->cur = target_freq;
+
amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
policy->governor->flags);
cpufreq_cpu_put(policy);
@@ -715,6 +743,7 @@ static int amd_pstate_cpu_exit(struct cpufreq_policy *policy)
freq_qos_remove_request(&cpudata->req[1]);
freq_qos_remove_request(&cpudata->req[0]);
+ policy->fast_switch_possible = false;
kfree(cpudata);
return 0;
@@ -1079,7 +1108,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
policy->policy = CPUFREQ_POLICY_POWERSAVE;
if (boot_cpu_has(X86_FEATURE_CPPC)) {
- policy->fast_switch_possible = true;
ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
if (ret)
return ret;
@@ -1102,7 +1130,6 @@ free_cpudata1:
static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
{
pr_debug("CPU %d exiting\n", policy->cpu);
- policy->fast_switch_possible = false;
return 0;
}
@@ -1309,6 +1336,7 @@ static struct cpufreq_driver amd_pstate_driver = {
.flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
.verify = amd_pstate_verify,
.target = amd_pstate_target,
+ .fast_switch = amd_pstate_fast_switch,
.init = amd_pstate_cpu_init,
.exit = amd_pstate_cpu_exit,
.suspend = amd_pstate_cpu_suspend,
diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c
index 1d2cfea9858a..73efbcf5513b 100644
--- a/drivers/cpufreq/pcc-cpufreq.c
+++ b/drivers/cpufreq/pcc-cpufreq.c
@@ -583,7 +583,7 @@ static int __init pcc_cpufreq_probe(struct platform_device *pdev)
/* Skip initialization if another cpufreq driver is there. */
if (cpufreq_get_current_driver())
- return -EEXIST;
+ return -ENODEV;
if (acpi_disabled)
return -ENODEV;
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 23b9ff920d7e..bea9cf31a12d 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -1028,7 +1028,7 @@ static int cxl_mem_get_partition_info(struct cxl_dev_state *cxlds)
* cxl_dev_state_identify() - Send the IDENTIFY command to the device.
* @cxlds: The device data for the operation
*
- * Return: 0 if identify was executed successfully.
+ * Return: 0 if identify was executed successfully or media not ready.
*
* This will dispatch the identify command to the device and on success populate
* structures to be exported to sysfs.
@@ -1041,6 +1041,9 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds)
u32 val;
int rc;
+ if (!cxlds->media_ready)
+ return 0;
+
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_IDENTIFY,
.size_out = sizeof(id),
@@ -1102,6 +1105,13 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds)
struct device *dev = cxlds->dev;
int rc;
+ if (!cxlds->media_ready) {
+ cxlds->dpa_res = DEFINE_RES_MEM(0, 0);
+ cxlds->ram_res = DEFINE_RES_MEM(0, 0);
+ cxlds->pmem_res = DEFINE_RES_MEM(0, 0);
+ return 0;
+ }
+
cxlds->dpa_res =
(struct resource)DEFINE_RES_MEM(0, cxlds->total_bytes);
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index f332fe7af92b..67f4ab6daa34 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -101,23 +101,57 @@ int devm_cxl_port_enumerate_dports(struct cxl_port *port)
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, CXL);
-/*
- * Wait up to @media_ready_timeout for the device to report memory
- * active.
- */
-int cxl_await_media_ready(struct cxl_dev_state *cxlds)
+static int cxl_dvsec_mem_range_valid(struct cxl_dev_state *cxlds, int id)
+{
+ struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+ int d = cxlds->cxl_dvsec;
+ bool valid = false;
+ int rc, i;
+ u32 temp;
+
+ if (id > CXL_DVSEC_RANGE_MAX)
+ return -EINVAL;
+
+ /* Check MEM INFO VALID bit first, give up after 1s */
+ i = 1;
+ do {
+ rc = pci_read_config_dword(pdev,
+ d + CXL_DVSEC_RANGE_SIZE_LOW(id),
+ &temp);
+ if (rc)
+ return rc;
+
+ valid = FIELD_GET(CXL_DVSEC_MEM_INFO_VALID, temp);
+ if (valid)
+ break;
+ msleep(1000);
+ } while (i--);
+
+ if (!valid) {
+ dev_err(&pdev->dev,
+ "Timeout awaiting memory range %d valid after 1s.\n",
+ id);
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static int cxl_dvsec_mem_range_active(struct cxl_dev_state *cxlds, int id)
{
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
int d = cxlds->cxl_dvsec;
bool active = false;
- u64 md_status;
int rc, i;
+ u32 temp;
- for (i = media_ready_timeout; i; i--) {
- u32 temp;
+ if (id > CXL_DVSEC_RANGE_MAX)
+ return -EINVAL;
+ /* Check MEM ACTIVE bit, up to 60s timeout by default */
+ for (i = media_ready_timeout; i; i--) {
rc = pci_read_config_dword(
- pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp);
+ pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(id), &temp);
if (rc)
return rc;
@@ -134,6 +168,39 @@ int cxl_await_media_ready(struct cxl_dev_state *cxlds)
return -ETIMEDOUT;
}
+ return 0;
+}
+
+/*
+ * Wait up to @media_ready_timeout for the device to report memory
+ * active.
+ */
+int cxl_await_media_ready(struct cxl_dev_state *cxlds)
+{
+ struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+ int d = cxlds->cxl_dvsec;
+ int rc, i, hdm_count;
+ u64 md_status;
+ u16 cap;
+
+ rc = pci_read_config_word(pdev,
+ d + CXL_DVSEC_CAP_OFFSET, &cap);
+ if (rc)
+ return rc;
+
+ hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap);
+ for (i = 0; i < hdm_count; i++) {
+ rc = cxl_dvsec_mem_range_valid(cxlds, i);
+ if (rc)
+ return rc;
+ }
+
+ for (i = 0; i < hdm_count; i++) {
+ rc = cxl_dvsec_mem_range_active(cxlds, i);
+ if (rc)
+ return rc;
+ }
+
md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
if (!CXLMDEV_READY(md_status))
return -EIO;
@@ -241,17 +308,36 @@ static void disable_hdm(void *_cxlhdm)
hdm + CXL_HDM_DECODER_CTRL_OFFSET);
}
-static int devm_cxl_enable_hdm(struct device *host, struct cxl_hdm *cxlhdm)
+int devm_cxl_enable_hdm(struct cxl_port *port, struct cxl_hdm *cxlhdm)
{
- void __iomem *hdm = cxlhdm->regs.hdm_decoder;
+ void __iomem *hdm;
u32 global_ctrl;
+ /*
+ * If the hdm capability was not mapped there is nothing to enable and
+ * the caller is responsible for what happens next. For example,
+ * emulate a passthrough decoder.
+ */
+ if (IS_ERR(cxlhdm))
+ return 0;
+
+ hdm = cxlhdm->regs.hdm_decoder;
global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
+
+ /*
+ * If the HDM decoder capability was enabled on entry, skip
+ * registering disable_hdm() since this decode capability may be
+ * owned by platform firmware.
+ */
+ if (global_ctrl & CXL_HDM_DECODER_ENABLE)
+ return 0;
+
writel(global_ctrl | CXL_HDM_DECODER_ENABLE,
hdm + CXL_HDM_DECODER_CTRL_OFFSET);
- return devm_add_action_or_reset(host, disable_hdm, cxlhdm);
+ return devm_add_action_or_reset(&port->dev, disable_hdm, cxlhdm);
}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_enable_hdm, CXL);
int cxl_dvsec_rr_decode(struct device *dev, int d,
struct cxl_endpoint_dvsec_info *info)
@@ -425,7 +511,7 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
if (info->mem_enabled)
return 0;
- rc = devm_cxl_enable_hdm(&port->dev, cxlhdm);
+ rc = devm_cxl_enable_hdm(port, cxlhdm);
if (rc)
return rc;
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index da2068475fa2..e7c284c890bc 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -750,11 +750,10 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
parent_port = parent_dport ? parent_dport->port : NULL;
if (IS_ERR(port)) {
- dev_dbg(uport, "Failed to add %s%s%s%s: %ld\n",
- dev_name(&port->dev),
- parent_port ? " to " : "",
+ dev_dbg(uport, "Failed to add%s%s%s: %ld\n",
+ parent_port ? " port to " : "",
parent_port ? dev_name(&parent_port->dev) : "",
- parent_port ? "" : " (root port)",
+ parent_port ? "" : " root port",
PTR_ERR(port));
} else {
dev_dbg(uport, "%s added%s%s%s\n",
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 044a92d9813e..f93a28538962 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -710,6 +710,7 @@ struct cxl_endpoint_dvsec_info {
struct cxl_hdm;
struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
struct cxl_endpoint_dvsec_info *info);
+int devm_cxl_enable_hdm(struct cxl_port *port, struct cxl_hdm *cxlhdm);
int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
struct cxl_endpoint_dvsec_info *info);
int devm_cxl_add_passthrough_decoder(struct cxl_port *port);
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index db12b6313afb..a2845a7a69d8 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -266,6 +266,7 @@ struct cxl_poison_state {
* @regs: Parsed register blocks
* @cxl_dvsec: Offset to the PCIe device DVSEC
* @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH)
+ * @media_ready: Indicate whether the device media is usable
* @payload_size: Size of space for payload
* (CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register)
* @lsa_size: Size of Label Storage Area
@@ -303,6 +304,7 @@ struct cxl_dev_state {
int cxl_dvsec;
bool rcd;
+ bool media_ready;
size_t payload_size;
size_t lsa_size;
struct mutex mbox_mutex; /* Protects device mailbox and firmware */
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 0465ef963cd6..7c02e55b8042 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -31,6 +31,8 @@
#define CXL_DVSEC_RANGE_BASE_LOW(i) (0x24 + (i * 0x10))
#define CXL_DVSEC_MEM_BASE_LOW_MASK GENMASK(31, 28)
+#define CXL_DVSEC_RANGE_MAX 2
+
/* CXL 2.0 8.1.4: Non-CXL Function Map DVSEC */
#define CXL_DVSEC_FUNCTION_MAP 2
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 10caf180b3fa..519edd0eb196 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -124,6 +124,9 @@ static int cxl_mem_probe(struct device *dev)
struct dentry *dentry;
int rc;
+ if (!cxlds->media_ready)
+ return -EBUSY;
+
/*
* Someone is trying to reattach this device after it lost its port
* connection (an endpoint port previously registered by this memdev was
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index f7a5b8e9c102..0872f2233ed0 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -708,6 +708,12 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
dev_dbg(&pdev->dev, "Failed to map RAS capability.\n");
+ rc = cxl_await_media_ready(cxlds);
+ if (rc == 0)
+ cxlds->media_ready = true;
+ else
+ dev_warn(&pdev->dev, "Media not active (%d)\n", rc);
+
rc = cxl_pci_setup_mailbox(cxlds);
if (rc)
return rc;
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index eb57324c4ad4..c23b6164e1c0 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -60,13 +60,17 @@ static int discover_region(struct device *dev, void *root)
static int cxl_switch_port_probe(struct cxl_port *port)
{
struct cxl_hdm *cxlhdm;
- int rc;
+ int rc, nr_dports;
- rc = devm_cxl_port_enumerate_dports(port);
- if (rc < 0)
- return rc;
+ nr_dports = devm_cxl_port_enumerate_dports(port);
+ if (nr_dports < 0)
+ return nr_dports;
cxlhdm = devm_cxl_setup_hdm(port, NULL);
+ rc = devm_cxl_enable_hdm(port, cxlhdm);
+ if (rc)
+ return rc;
+
if (!IS_ERR(cxlhdm))
return devm_cxl_enumerate_decoders(cxlhdm, NULL);
@@ -75,7 +79,7 @@ static int cxl_switch_port_probe(struct cxl_port *port)
return PTR_ERR(cxlhdm);
}
- if (rc == 1) {
+ if (nr_dports == 1) {
dev_dbg(&port->dev, "Fallback to passthrough decoder\n");
return devm_cxl_add_passthrough_decoder(port);
}
@@ -113,12 +117,6 @@ static int cxl_endpoint_port_probe(struct cxl_port *port)
if (rc)
return rc;
- rc = cxl_await_media_ready(cxlds);
- if (rc) {
- dev_err(&port->dev, "Media not active (%d)\n", rc);
- return rc;
- }
-
rc = devm_cxl_enumerate_decoders(cxlhdm, &info);
if (rc)
return rc;
diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
index 01f2e86f3f7c..12cf6bb2e3ce 100644
--- a/drivers/dma-buf/udmabuf.c
+++ b/drivers/dma-buf/udmabuf.c
@@ -12,7 +12,6 @@
#include <linux/shmem_fs.h>
#include <linux/slab.h>
#include <linux/udmabuf.h>
-#include <linux/hugetlb.h>
#include <linux/vmalloc.h>
#include <linux/iosys-map.h>
@@ -207,9 +206,7 @@ static long udmabuf_create(struct miscdevice *device,
struct udmabuf *ubuf;
struct dma_buf *buf;
pgoff_t pgoff, pgcnt, pgidx, pgbuf = 0, pglimit;
- struct page *page, *hpage = NULL;
- pgoff_t subpgoff, maxsubpgs;
- struct hstate *hpstate;
+ struct page *page;
int seals, ret = -EINVAL;
u32 i, flags;
@@ -245,7 +242,7 @@ static long udmabuf_create(struct miscdevice *device,
if (!memfd)
goto err;
mapping = memfd->f_mapping;
- if (!shmem_mapping(mapping) && !is_file_hugepages(memfd))
+ if (!shmem_mapping(mapping))
goto err;
seals = memfd_fcntl(memfd, F_GET_SEALS, 0);
if (seals == -EINVAL)
@@ -256,48 +253,16 @@ static long udmabuf_create(struct miscdevice *device,
goto err;
pgoff = list[i].offset >> PAGE_SHIFT;
pgcnt = list[i].size >> PAGE_SHIFT;
- if (is_file_hugepages(memfd)) {
- hpstate = hstate_file(memfd);
- pgoff = list[i].offset >> huge_page_shift(hpstate);
- subpgoff = (list[i].offset &
- ~huge_page_mask(hpstate)) >> PAGE_SHIFT;
- maxsubpgs = huge_page_size(hpstate) >> PAGE_SHIFT;
- }
for (pgidx = 0; pgidx < pgcnt; pgidx++) {
- if (is_file_hugepages(memfd)) {
- if (!hpage) {
- hpage = find_get_page_flags(mapping, pgoff,
- FGP_ACCESSED);
- if (!hpage) {
- ret = -EINVAL;
- goto err;
- }
- }
- page = hpage + subpgoff;
- get_page(page);
- subpgoff++;
- if (subpgoff == maxsubpgs) {
- put_page(hpage);
- hpage = NULL;
- subpgoff = 0;
- pgoff++;
- }
- } else {
- page = shmem_read_mapping_page(mapping,
- pgoff + pgidx);
- if (IS_ERR(page)) {
- ret = PTR_ERR(page);
- goto err;
- }
+ page = shmem_read_mapping_page(mapping, pgoff + pgidx);
+ if (IS_ERR(page)) {
+ ret = PTR_ERR(page);
+ goto err;
}
ubuf->pages[pgbuf++] = page;
}
fput(memfd);
memfd = NULL;
- if (hpage) {
- put_page(hpage);
- hpage = NULL;
- }
}
exp_info.ops = &udmabuf_ops;
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 8858470246e1..ee3a219e3a89 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -132,7 +132,7 @@
#define ATC_DST_PIP BIT(12) /* Destination Picture-in-Picture enabled */
#define ATC_SRC_DSCR_DIS BIT(16) /* Src Descriptor fetch disable */
#define ATC_DST_DSCR_DIS BIT(20) /* Dst Descriptor fetch disable */
-#define ATC_FC GENMASK(22, 21) /* Choose Flow Controller */
+#define ATC_FC GENMASK(23, 21) /* Choose Flow Controller */
#define ATC_FC_MEM2MEM 0x0 /* Mem-to-Mem (DMA) */
#define ATC_FC_MEM2PER 0x1 /* Mem-to-Periph (DMA) */
#define ATC_FC_PER2MEM 0x2 /* Periph-to-Mem (DMA) */
@@ -153,8 +153,6 @@
#define ATC_AUTO BIT(31) /* Auto multiple buffer tx enable */
/* Bitfields in CFG */
-#define ATC_PER_MSB(h) ((0x30U & (h)) >> 4) /* Extract most significant bits of a handshaking identifier */
-
#define ATC_SRC_PER GENMASK(3, 0) /* Channel src rq associated with periph handshaking ifc h */
#define ATC_DST_PER GENMASK(7, 4) /* Channel dst rq associated with periph handshaking ifc h */
#define ATC_SRC_REP BIT(8) /* Source Replay Mod */
@@ -181,10 +179,15 @@
#define ATC_DPIP_HOLE GENMASK(15, 0)
#define ATC_DPIP_BOUNDARY GENMASK(25, 16)
-#define ATC_SRC_PER_ID(id) (FIELD_PREP(ATC_SRC_PER_MSB, (id)) | \
- FIELD_PREP(ATC_SRC_PER, (id)))
-#define ATC_DST_PER_ID(id) (FIELD_PREP(ATC_DST_PER_MSB, (id)) | \
- FIELD_PREP(ATC_DST_PER, (id)))
+#define ATC_PER_MSB GENMASK(5, 4) /* Extract MSBs of a handshaking identifier */
+#define ATC_SRC_PER_ID(id) \
+ ({ typeof(id) _id = (id); \
+ FIELD_PREP(ATC_SRC_PER_MSB, FIELD_GET(ATC_PER_MSB, _id)) | \
+ FIELD_PREP(ATC_SRC_PER, _id); })
+#define ATC_DST_PER_ID(id) \
+ ({ typeof(id) _id = (id); \
+ FIELD_PREP(ATC_DST_PER_MSB, FIELD_GET(ATC_PER_MSB, _id)) | \
+ FIELD_PREP(ATC_DST_PER, _id); })
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index 7da6d9b6098e..c3b37168b21f 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -1102,6 +1102,8 @@ at_xdmac_prep_interleaved(struct dma_chan *chan,
NULL,
src_addr, dst_addr,
xt, xt->sgl);
+ if (!first)
+ return NULL;
/* Length of the block is (BLEN+1) microblocks. */
for (i = 0; i < xt->numf - 1; i++)
@@ -1132,8 +1134,9 @@ at_xdmac_prep_interleaved(struct dma_chan *chan,
src_addr, dst_addr,
xt, chunk);
if (!desc) {
- list_splice_tail_init(&first->descs_list,
- &atchan->free_descs_list);
+ if (first)
+ list_splice_tail_init(&first->descs_list,
+ &atchan->free_descs_list);
return NULL;
}
diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index ecbf67c2ad2b..d32deb9b4e3d 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -277,7 +277,6 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
if (wq_dedicated(wq)) {
rc = idxd_wq_set_pasid(wq, pasid);
if (rc < 0) {
- iommu_sva_unbind_device(sva);
dev_err(dev, "wq set pasid failed: %d\n", rc);
goto failed_set_pasid;
}
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 0d9257fbdfb0..b4731fe6bbc1 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -1050,7 +1050,7 @@ static bool _trigger(struct pl330_thread *thrd)
return true;
}
-static bool _start(struct pl330_thread *thrd)
+static bool pl330_start_thread(struct pl330_thread *thrd)
{
switch (_state(thrd)) {
case PL330_STATE_FAULT_COMPLETING:
@@ -1702,7 +1702,7 @@ static int pl330_update(struct pl330_dmac *pl330)
thrd->req_running = -1;
/* Get going again ASAP */
- _start(thrd);
+ pl330_start_thread(thrd);
/* For now, just make a list of callbacks to be done */
list_add_tail(&descdone->rqd, &pl330->req_done);
@@ -2089,7 +2089,7 @@ static void pl330_tasklet(struct tasklet_struct *t)
} else {
/* Make sure the PL330 Channel thread is active */
spin_lock(&pch->thread->dmac->lock);
- _start(pch->thread);
+ pl330_start_thread(pch->thread);
spin_unlock(&pch->thread->dmac->lock);
}
@@ -2107,7 +2107,7 @@ static void pl330_tasklet(struct tasklet_struct *t)
if (power_down) {
pch->active = true;
spin_lock(&pch->thread->dmac->lock);
- _start(pch->thread);
+ pl330_start_thread(pch->thread);
spin_unlock(&pch->thread->dmac->lock);
power_down = false;
}
diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c
index fc3a2a05ab7b..b8329a23728d 100644
--- a/drivers/dma/ti/k3-udma.c
+++ b/drivers/dma/ti/k3-udma.c
@@ -5527,7 +5527,7 @@ static int udma_probe(struct platform_device *pdev)
return ret;
}
-static int udma_pm_suspend(struct device *dev)
+static int __maybe_unused udma_pm_suspend(struct device *dev)
{
struct udma_dev *ud = dev_get_drvdata(dev);
struct dma_device *dma_dev = &ud->ddev;
@@ -5549,7 +5549,7 @@ static int udma_pm_suspend(struct device *dev)
return 0;
}
-static int udma_pm_resume(struct device *dev)
+static int __maybe_unused udma_pm_resume(struct device *dev)
{
struct udma_dev *ud = dev_get_drvdata(dev);
struct dma_device *dma_dev = &ud->ddev;
diff --git a/drivers/edac/qcom_edac.c b/drivers/edac/qcom_edac.c
index 265e0fb39bc7..b2db545c6810 100644
--- a/drivers/edac/qcom_edac.c
+++ b/drivers/edac/qcom_edac.c
@@ -21,30 +21,9 @@
#define TRP_SYN_REG_CNT 6
#define DRP_SYN_REG_CNT 8
-#define LLCC_COMMON_STATUS0 0x0003000c
#define LLCC_LB_CNT_MASK GENMASK(31, 28)
#define LLCC_LB_CNT_SHIFT 28
-/* Single & double bit syndrome register offsets */
-#define TRP_ECC_SB_ERR_SYN0 0x0002304c
-#define TRP_ECC_DB_ERR_SYN0 0x00020370
-#define DRP_ECC_SB_ERR_SYN0 0x0004204c
-#define DRP_ECC_DB_ERR_SYN0 0x00042070
-
-/* Error register offsets */
-#define TRP_ECC_ERROR_STATUS1 0x00020348
-#define TRP_ECC_ERROR_STATUS0 0x00020344
-#define DRP_ECC_ERROR_STATUS1 0x00042048
-#define DRP_ECC_ERROR_STATUS0 0x00042044
-
-/* TRP, DRP interrupt register offsets */
-#define DRP_INTERRUPT_STATUS 0x00041000
-#define TRP_INTERRUPT_0_STATUS 0x00020480
-#define DRP_INTERRUPT_CLEAR 0x00041008
-#define DRP_ECC_ERROR_CNTR_CLEAR 0x00040004
-#define TRP_INTERRUPT_0_CLEAR 0x00020484
-#define TRP_ECC_ERROR_CNTR_CLEAR 0x00020440
-
/* Mask and shift macros */
#define ECC_DB_ERR_COUNT_MASK GENMASK(4, 0)
#define ECC_DB_ERR_WAYS_MASK GENMASK(31, 16)
@@ -60,15 +39,6 @@
#define DRP_TRP_INT_CLEAR GENMASK(1, 0)
#define DRP_TRP_CNT_CLEAR GENMASK(1, 0)
-/* Config registers offsets*/
-#define DRP_ECC_ERROR_CFG 0x00040000
-
-/* Tag RAM, Data RAM interrupt register offsets */
-#define CMN_INTERRUPT_0_ENABLE 0x0003001c
-#define CMN_INTERRUPT_2_ENABLE 0x0003003c
-#define TRP_INTERRUPT_0_ENABLE 0x00020488
-#define DRP_INTERRUPT_ENABLE 0x0004100c
-
#define SB_ERROR_THRESHOLD 0x1
#define SB_ERROR_THRESHOLD_SHIFT 24
#define SB_DB_TRP_INTERRUPT_ENABLE 0x3
@@ -88,9 +58,6 @@ enum {
static const struct llcc_edac_reg_data edac_reg_data[] = {
[LLCC_DRAM_CE] = {
.name = "DRAM Single-bit",
- .synd_reg = DRP_ECC_SB_ERR_SYN0,
- .count_status_reg = DRP_ECC_ERROR_STATUS1,
- .ways_status_reg = DRP_ECC_ERROR_STATUS0,
.reg_cnt = DRP_SYN_REG_CNT,
.count_mask = ECC_SB_ERR_COUNT_MASK,
.ways_mask = ECC_SB_ERR_WAYS_MASK,
@@ -98,9 +65,6 @@ static const struct llcc_edac_reg_data edac_reg_data[] = {
},
[LLCC_DRAM_UE] = {
.name = "DRAM Double-bit",
- .synd_reg = DRP_ECC_DB_ERR_SYN0,
- .count_status_reg = DRP_ECC_ERROR_STATUS1,
- .ways_status_reg = DRP_ECC_ERROR_STATUS0,
.reg_cnt = DRP_SYN_REG_CNT,
.count_mask = ECC_DB_ERR_COUNT_MASK,
.ways_mask = ECC_DB_ERR_WAYS_MASK,
@@ -108,9 +72,6 @@ static const struct llcc_edac_reg_data edac_reg_data[] = {
},
[LLCC_TRAM_CE] = {
.name = "TRAM Single-bit",
- .synd_reg = TRP_ECC_SB_ERR_SYN0,
- .count_status_reg = TRP_ECC_ERROR_STATUS1,
- .ways_status_reg = TRP_ECC_ERROR_STATUS0,
.reg_cnt = TRP_SYN_REG_CNT,
.count_mask = ECC_SB_ERR_COUNT_MASK,
.ways_mask = ECC_SB_ERR_WAYS_MASK,
@@ -118,9 +79,6 @@ static const struct llcc_edac_reg_data edac_reg_data[] = {
},
[LLCC_TRAM_UE] = {
.name = "TRAM Double-bit",
- .synd_reg = TRP_ECC_DB_ERR_SYN0,
- .count_status_reg = TRP_ECC_ERROR_STATUS1,
- .ways_status_reg = TRP_ECC_ERROR_STATUS0,
.reg_cnt = TRP_SYN_REG_CNT,
.count_mask = ECC_DB_ERR_COUNT_MASK,
.ways_mask = ECC_DB_ERR_WAYS_MASK,
@@ -128,7 +86,7 @@ static const struct llcc_edac_reg_data edac_reg_data[] = {
},
};
-static int qcom_llcc_core_setup(struct regmap *llcc_bcast_regmap)
+static int qcom_llcc_core_setup(struct llcc_drv_data *drv, struct regmap *llcc_bcast_regmap)
{
u32 sb_err_threshold;
int ret;
@@ -137,31 +95,31 @@ static int qcom_llcc_core_setup(struct regmap *llcc_bcast_regmap)
* Configure interrupt enable registers such that Tag, Data RAM related
* interrupts are propagated to interrupt controller for servicing
*/
- ret = regmap_update_bits(llcc_bcast_regmap, CMN_INTERRUPT_2_ENABLE,
+ ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->cmn_interrupt_2_enable,
TRP0_INTERRUPT_ENABLE,
TRP0_INTERRUPT_ENABLE);
if (ret)
return ret;
- ret = regmap_update_bits(llcc_bcast_regmap, TRP_INTERRUPT_0_ENABLE,
+ ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->trp_interrupt_0_enable,
SB_DB_TRP_INTERRUPT_ENABLE,
SB_DB_TRP_INTERRUPT_ENABLE);
if (ret)
return ret;
sb_err_threshold = (SB_ERROR_THRESHOLD << SB_ERROR_THRESHOLD_SHIFT);
- ret = regmap_write(llcc_bcast_regmap, DRP_ECC_ERROR_CFG,
+ ret = regmap_write(llcc_bcast_regmap, drv->edac_reg_offset->drp_ecc_error_cfg,
sb_err_threshold);
if (ret)
return ret;
- ret = regmap_update_bits(llcc_bcast_regmap, CMN_INTERRUPT_2_ENABLE,
+ ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->cmn_interrupt_2_enable,
DRP0_INTERRUPT_ENABLE,
DRP0_INTERRUPT_ENABLE);
if (ret)
return ret;
- ret = regmap_write(llcc_bcast_regmap, DRP_INTERRUPT_ENABLE,
+ ret = regmap_write(llcc_bcast_regmap, drv->edac_reg_offset->drp_interrupt_enable,
SB_DB_DRP_INTERRUPT_ENABLE);
return ret;
}
@@ -170,29 +128,33 @@ static int qcom_llcc_core_setup(struct regmap *llcc_bcast_regmap)
static int
qcom_llcc_clear_error_status(int err_type, struct llcc_drv_data *drv)
{
- int ret = 0;
+ int ret;
switch (err_type) {
case LLCC_DRAM_CE:
case LLCC_DRAM_UE:
- ret = regmap_write(drv->bcast_regmap, DRP_INTERRUPT_CLEAR,
+ ret = regmap_write(drv->bcast_regmap,
+ drv->edac_reg_offset->drp_interrupt_clear,
DRP_TRP_INT_CLEAR);
if (ret)
return ret;
- ret = regmap_write(drv->bcast_regmap, DRP_ECC_ERROR_CNTR_CLEAR,
+ ret = regmap_write(drv->bcast_regmap,
+ drv->edac_reg_offset->drp_ecc_error_cntr_clear,
DRP_TRP_CNT_CLEAR);
if (ret)
return ret;
break;
case LLCC_TRAM_CE:
case LLCC_TRAM_UE:
- ret = regmap_write(drv->bcast_regmap, TRP_INTERRUPT_0_CLEAR,
+ ret = regmap_write(drv->bcast_regmap,
+ drv->edac_reg_offset->trp_interrupt_0_clear,
DRP_TRP_INT_CLEAR);
if (ret)
return ret;
- ret = regmap_write(drv->bcast_regmap, TRP_ECC_ERROR_CNTR_CLEAR,
+ ret = regmap_write(drv->bcast_regmap,
+ drv->edac_reg_offset->trp_ecc_error_cntr_clear,
DRP_TRP_CNT_CLEAR);
if (ret)
return ret;
@@ -205,16 +167,54 @@ qcom_llcc_clear_error_status(int err_type, struct llcc_drv_data *drv)
return ret;
}
+struct qcom_llcc_syn_regs {
+ u32 synd_reg;
+ u32 count_status_reg;
+ u32 ways_status_reg;
+};
+
+static void get_reg_offsets(struct llcc_drv_data *drv, int err_type,
+ struct qcom_llcc_syn_regs *syn_regs)
+{
+ const struct llcc_edac_reg_offset *edac_reg_offset = drv->edac_reg_offset;
+
+ switch (err_type) {
+ case LLCC_DRAM_CE:
+ syn_regs->synd_reg = edac_reg_offset->drp_ecc_sb_err_syn0;
+ syn_regs->count_status_reg = edac_reg_offset->drp_ecc_error_status1;
+ syn_regs->ways_status_reg = edac_reg_offset->drp_ecc_error_status0;
+ break;
+ case LLCC_DRAM_UE:
+ syn_regs->synd_reg = edac_reg_offset->drp_ecc_db_err_syn0;
+ syn_regs->count_status_reg = edac_reg_offset->drp_ecc_error_status1;
+ syn_regs->ways_status_reg = edac_reg_offset->drp_ecc_error_status0;
+ break;
+ case LLCC_TRAM_CE:
+ syn_regs->synd_reg = edac_reg_offset->trp_ecc_sb_err_syn0;
+ syn_regs->count_status_reg = edac_reg_offset->trp_ecc_error_status1;
+ syn_regs->ways_status_reg = edac_reg_offset->trp_ecc_error_status0;
+ break;
+ case LLCC_TRAM_UE:
+ syn_regs->synd_reg = edac_reg_offset->trp_ecc_db_err_syn0;
+ syn_regs->count_status_reg = edac_reg_offset->trp_ecc_error_status1;
+ syn_regs->ways_status_reg = edac_reg_offset->trp_ecc_error_status0;
+ break;
+ }
+}
+
/* Dump Syndrome registers data for Tag RAM, Data RAM bit errors*/
static int
dump_syn_reg_values(struct llcc_drv_data *drv, u32 bank, int err_type)
{
struct llcc_edac_reg_data reg_data = edac_reg_data[err_type];
+ struct qcom_llcc_syn_regs regs = { };
int err_cnt, err_ways, ret, i;
u32 synd_reg, synd_val;
+ get_reg_offsets(drv, err_type, &regs);
+
for (i = 0; i < reg_data.reg_cnt; i++) {
- synd_reg = reg_data.synd_reg + (i * 4);
+ synd_reg = regs.synd_reg + (i * 4);
ret = regmap_read(drv->regmaps[bank], synd_reg,
&synd_val);
if (ret)
@@ -224,7 +224,7 @@ dump_syn_reg_values(struct llcc_drv_data *drv, u32 bank, int err_type)
reg_data.name, i, synd_val);
}
- ret = regmap_read(drv->regmaps[bank], reg_data.count_status_reg,
+ ret = regmap_read(drv->regmaps[bank], regs.count_status_reg,
&err_cnt);
if (ret)
goto clear;
@@ -234,7 +234,7 @@ dump_syn_reg_values(struct llcc_drv_data *drv, u32 bank, int err_type)
edac_printk(KERN_CRIT, EDAC_LLCC, "%s: Error count: 0x%4x\n",
reg_data.name, err_cnt);
- ret = regmap_read(drv->regmaps[bank], reg_data.ways_status_reg,
+ ret = regmap_read(drv->regmaps[bank], regs.ways_status_reg,
&err_ways);
if (ret)
goto clear;
@@ -295,7 +295,7 @@ static irqreturn_t llcc_ecc_irq_handler(int irq, void *edev_ctl)
/* Iterate over the banks and look for Tag RAM or Data RAM errors */
for (i = 0; i < drv->num_banks; i++) {
- ret = regmap_read(drv->regmaps[i], DRP_INTERRUPT_STATUS,
+ ret = regmap_read(drv->regmaps[i], drv->edac_reg_offset->drp_interrupt_status,
&drp_error);
if (!ret && (drp_error & SB_ECC_ERROR)) {
@@ -310,7 +310,7 @@ static irqreturn_t llcc_ecc_irq_handler(int irq, void *edev_ctl)
if (!ret)
irq_rc = IRQ_HANDLED;
- ret = regmap_read(drv->regmaps[i], TRP_INTERRUPT_0_STATUS,
+ ret = regmap_read(drv->regmaps[i], drv->edac_reg_offset->trp_interrupt_0_status,
&trp_error);
if (!ret && (trp_error & SB_ECC_ERROR)) {
@@ -342,7 +342,7 @@ static int qcom_llcc_edac_probe(struct platform_device *pdev)
int ecc_irq;
int rc;
- rc = qcom_llcc_core_setup(llcc_driv_data->bcast_regmap);
+ rc = qcom_llcc_core_setup(llcc_driv_data, llcc_driv_data->bcast_regmap);
if (rc)
return rc;
diff --git a/drivers/firmware/arm_ffa/bus.c b/drivers/firmware/arm_ffa/bus.c
index f29d77ecf72d..2b8bfcd010f5 100644
--- a/drivers/firmware/arm_ffa/bus.c
+++ b/drivers/firmware/arm_ffa/bus.c
@@ -15,6 +15,8 @@
#include "common.h"
+static DEFINE_IDA(ffa_bus_id);
+
static int ffa_device_match(struct device *dev, struct device_driver *drv)
{
const struct ffa_device_id *id_table;
@@ -53,7 +55,8 @@ static void ffa_device_remove(struct device *dev)
{
struct ffa_driver *ffa_drv = to_ffa_driver(dev->driver);
- ffa_drv->remove(to_ffa_dev(dev));
+ if (ffa_drv->remove)
+ ffa_drv->remove(to_ffa_dev(dev));
}
static int ffa_device_uevent(const struct device *dev, struct kobj_uevent_env *env)
@@ -130,6 +133,7 @@ static void ffa_release_device(struct device *dev)
{
struct ffa_device *ffa_dev = to_ffa_dev(dev);
+ ida_free(&ffa_bus_id, ffa_dev->id);
kfree(ffa_dev);
}
@@ -170,18 +174,24 @@ bool ffa_device_is_valid(struct ffa_device *ffa_dev)
struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id,
const struct ffa_ops *ops)
{
- int ret;
+ int id, ret;
struct device *dev;
struct ffa_device *ffa_dev;
+ id = ida_alloc_min(&ffa_bus_id, 1, GFP_KERNEL);
+ if (id < 0)
+ return NULL;
+
ffa_dev = kzalloc(sizeof(*ffa_dev), GFP_KERNEL);
- if (!ffa_dev)
+ if (!ffa_dev) {
+ ida_free(&ffa_bus_id, id);
return NULL;
+ }
dev = &ffa_dev->dev;
dev->bus = &ffa_bus_type;
dev->release = ffa_release_device;
- dev_set_name(&ffa_dev->dev, "arm-ffa-%04x", vm_id);
+ dev_set_name(&ffa_dev->dev, "arm-ffa-%d", id);
ffa_dev->vm_id = vm_id;
ffa_dev->ops = ops;
@@ -217,4 +227,5 @@ void arm_ffa_bus_exit(void)
{
ffa_devices_unregister();
bus_unregister(&ffa_bus_type);
+ ida_destroy(&ffa_bus_id);
}
diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c
index fa85c64d3ded..2109cd178ff7 100644
--- a/drivers/firmware/arm_ffa/driver.c
+++ b/drivers/firmware/arm_ffa/driver.c
@@ -193,7 +193,8 @@ __ffa_partition_info_get(u32 uuid0, u32 uuid1, u32 uuid2, u32 uuid3,
int idx, count, flags = 0, sz, buf_sz;
ffa_value_t partition_info;
- if (!buffer || !num_partitions) /* Just get the count for now */
+ if (drv_info->version > FFA_VERSION_1_0 &&
+ (!buffer || !num_partitions)) /* Just get the count for now */
flags = PARTITION_INFO_GET_RETURN_COUNT_ONLY;
mutex_lock(&drv_info->rx_lock);
@@ -420,12 +421,18 @@ ffa_setup_and_transmit(u32 func_id, void *buffer, u32 max_fragsize,
ep_mem_access->receiver = args->attrs[idx].receiver;
ep_mem_access->attrs = args->attrs[idx].attrs;
ep_mem_access->composite_off = COMPOSITE_OFFSET(args->nattrs);
+ ep_mem_access->flag = 0;
+ ep_mem_access->reserved = 0;
}
+ mem_region->handle = 0;
+ mem_region->reserved_0 = 0;
+ mem_region->reserved_1 = 0;
mem_region->ep_count = args->nattrs;
composite = buffer + COMPOSITE_OFFSET(args->nattrs);
composite->total_pg_cnt = ffa_get_num_pages_sg(args->sg);
composite->addr_range_cnt = num_entries;
+ composite->reserved = 0;
length = COMPOSITE_CONSTITUENTS_OFFSET(args->nattrs, num_entries);
frag_len = COMPOSITE_CONSTITUENTS_OFFSET(args->nattrs, 0);
@@ -460,6 +467,7 @@ ffa_setup_and_transmit(u32 func_id, void *buffer, u32 max_fragsize,
constituents->address = sg_phys(args->sg);
constituents->pg_cnt = args->sg->length / FFA_PAGE_SIZE;
+ constituents->reserved = 0;
constituents++;
frag_len += sizeof(struct ffa_mem_region_addr_range);
} while ((args->sg = sg_next(args->sg)));
diff --git a/drivers/firmware/arm_scmi/raw_mode.c b/drivers/firmware/arm_scmi/raw_mode.c
index d40df099fd51..6971dcf72fb9 100644
--- a/drivers/firmware/arm_scmi/raw_mode.c
+++ b/drivers/firmware/arm_scmi/raw_mode.c
@@ -1066,7 +1066,7 @@ static int scmi_xfer_raw_worker_init(struct scmi_raw_mode_info *raw)
raw->wait_wq = alloc_workqueue("scmi-raw-wait-wq-%d",
WQ_UNBOUND | WQ_FREEZABLE |
- WQ_HIGHPRI, WQ_SYSFS, raw->id);
+ WQ_HIGHPRI | WQ_SYSFS, 0, raw->id);
if (!raw->wait_wq)
return -ENOMEM;
diff --git a/drivers/firmware/cirrus/cs_dsp.c b/drivers/firmware/cirrus/cs_dsp.c
index e4ccfb6a8fa5..ec056f6f40ce 100644
--- a/drivers/firmware/cirrus/cs_dsp.c
+++ b/drivers/firmware/cirrus/cs_dsp.c
@@ -2124,6 +2124,7 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware
file, blocks, le32_to_cpu(blk->len),
type, le32_to_cpu(blk->id));
+ region_name = cs_dsp_mem_region_name(type);
mem = cs_dsp_find_region(dsp, type);
if (!mem) {
cs_dsp_err(dsp, "No base for region %x\n", type);
@@ -2147,8 +2148,8 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware
reg = dsp->ops->region_to_reg(mem, reg);
reg += offset;
} else {
- cs_dsp_err(dsp, "No %x for algorithm %x\n",
- type, le32_to_cpu(blk->id));
+ cs_dsp_err(dsp, "No %s for algorithm %x\n",
+ region_name, le32_to_cpu(blk->id));
}
break;
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index abeff7dc0b58..34b9e7876538 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -361,24 +361,6 @@ static void __init efi_debugfs_init(void)
static inline void efi_debugfs_init(void) {}
#endif
-static void refresh_nv_rng_seed(struct work_struct *work)
-{
- u8 seed[EFI_RANDOM_SEED_SIZE];
-
- get_random_bytes(seed, sizeof(seed));
- efi.set_variable(L"RandomSeed", &LINUX_EFI_RANDOM_SEED_TABLE_GUID,
- EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS |
- EFI_VARIABLE_RUNTIME_ACCESS, sizeof(seed), seed);
- memzero_explicit(seed, sizeof(seed));
-}
-static int refresh_nv_rng_seed_notification(struct notifier_block *nb, unsigned long action, void *data)
-{
- static DECLARE_WORK(work, refresh_nv_rng_seed);
- schedule_work(&work);
- return NOTIFY_DONE;
-}
-static struct notifier_block refresh_nv_rng_seed_nb = { .notifier_call = refresh_nv_rng_seed_notification };
-
/*
* We register the efi subsystem with the firmware subsystem and the
* efivars subsystem with the efi subsystem, if the system was booted with
@@ -451,9 +433,6 @@ static int __init efisubsys_init(void)
platform_device_register_simple("efi_secret", 0, NULL, 0);
#endif
- if (efi_rt_services_supported(EFI_RT_SUPPORTED_SET_VARIABLE))
- execute_with_initialized_rng(&refresh_nv_rng_seed_nb);
-
return 0;
err_remove_group:
diff --git a/drivers/firmware/efi/libstub/Makefile.zboot b/drivers/firmware/efi/libstub/Makefile.zboot
index 89ef820f3b34..2c489627a807 100644
--- a/drivers/firmware/efi/libstub/Makefile.zboot
+++ b/drivers/firmware/efi/libstub/Makefile.zboot
@@ -32,7 +32,8 @@ zboot-size-len-$(CONFIG_KERNEL_GZIP) := 0
$(obj)/vmlinuz: $(obj)/vmlinux.bin FORCE
$(call if_changed,$(zboot-method-y))
-OBJCOPYFLAGS_vmlinuz.o := -I binary -O $(EFI_ZBOOT_BFD_TARGET) $(EFI_ZBOOT_OBJCOPY_FLAGS) \
+# avoid eager evaluation to prevent references to non-existent build artifacts
+OBJCOPYFLAGS_vmlinuz.o = -I binary -O $(EFI_ZBOOT_BFD_TARGET) $(EFI_ZBOOT_OBJCOPY_FLAGS) \
--rename-section .data=.gzdata,load,alloc,readonly,contents
$(obj)/vmlinuz.o: $(obj)/vmlinuz FORCE
$(call if_changed,objcopy)
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index 67d5a20802e0..54a2822cae77 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -1133,4 +1133,7 @@ const u8 *__efi_get_smbios_string(const struct efi_smbios_record *record,
void efi_remap_image(unsigned long image_base, unsigned alloc_size,
unsigned long code_size);
+asmlinkage efi_status_t __efiapi
+efi_zboot_entry(efi_handle_t handle, efi_system_table_t *systab);
+
#endif
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 5521f060d58e..f45c6a36551c 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -897,7 +897,7 @@ config GPIO_F7188X
help
This option enables support for GPIOs found on Fintek Super-I/O
chips F71869, F71869A, F71882FG, F71889F and F81866.
- As well as Nuvoton Super-I/O chip NCT6116D.
+ As well as Nuvoton Super-I/O chip NCT6126D.
To compile this driver as a module, choose M here: the module will
be called f7188x-gpio.
diff --git a/drivers/gpio/gpio-f7188x.c b/drivers/gpio/gpio-f7188x.c
index 9effa7769bef..f54ca5a1775e 100644
--- a/drivers/gpio/gpio-f7188x.c
+++ b/drivers/gpio/gpio-f7188x.c
@@ -48,7 +48,7 @@
/*
* Nuvoton devices.
*/
-#define SIO_NCT6116D_ID 0xD283 /* NCT6116D chipset ID */
+#define SIO_NCT6126D_ID 0xD283 /* NCT6126D chipset ID */
#define SIO_LD_GPIO_NUVOTON 0x07 /* GPIO logical device */
@@ -62,7 +62,7 @@ enum chips {
f81866,
f81804,
f81865,
- nct6116d,
+ nct6126d,
};
static const char * const f7188x_names[] = {
@@ -74,7 +74,7 @@ static const char * const f7188x_names[] = {
"f81866",
"f81804",
"f81865",
- "nct6116d",
+ "nct6126d",
};
struct f7188x_sio {
@@ -187,8 +187,8 @@ static int f7188x_gpio_set_config(struct gpio_chip *chip, unsigned offset,
/* Output mode register (0:open drain 1:push-pull). */
#define f7188x_gpio_out_mode(base) ((base) + 3)
-#define f7188x_gpio_dir_invert(type) ((type) == nct6116d)
-#define f7188x_gpio_data_single(type) ((type) == nct6116d)
+#define f7188x_gpio_dir_invert(type) ((type) == nct6126d)
+#define f7188x_gpio_data_single(type) ((type) == nct6126d)
static struct f7188x_gpio_bank f71869_gpio_bank[] = {
F7188X_GPIO_BANK(0, 6, 0xF0, DRVNAME "-0"),
@@ -274,7 +274,7 @@ static struct f7188x_gpio_bank f81865_gpio_bank[] = {
F7188X_GPIO_BANK(60, 5, 0x90, DRVNAME "-6"),
};
-static struct f7188x_gpio_bank nct6116d_gpio_bank[] = {
+static struct f7188x_gpio_bank nct6126d_gpio_bank[] = {
F7188X_GPIO_BANK(0, 8, 0xE0, DRVNAME "-0"),
F7188X_GPIO_BANK(10, 8, 0xE4, DRVNAME "-1"),
F7188X_GPIO_BANK(20, 8, 0xE8, DRVNAME "-2"),
@@ -282,7 +282,7 @@ static struct f7188x_gpio_bank nct6116d_gpio_bank[] = {
F7188X_GPIO_BANK(40, 8, 0xF0, DRVNAME "-4"),
F7188X_GPIO_BANK(50, 8, 0xF4, DRVNAME "-5"),
F7188X_GPIO_BANK(60, 8, 0xF8, DRVNAME "-6"),
- F7188X_GPIO_BANK(70, 1, 0xFC, DRVNAME "-7"),
+ F7188X_GPIO_BANK(70, 8, 0xFC, DRVNAME "-7"),
};
static int f7188x_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
@@ -490,9 +490,9 @@ static int f7188x_gpio_probe(struct platform_device *pdev)
data->nr_bank = ARRAY_SIZE(f81865_gpio_bank);
data->bank = f81865_gpio_bank;
break;
- case nct6116d:
- data->nr_bank = ARRAY_SIZE(nct6116d_gpio_bank);
- data->bank = nct6116d_gpio_bank;
+ case nct6126d:
+ data->nr_bank = ARRAY_SIZE(nct6126d_gpio_bank);
+ data->bank = nct6126d_gpio_bank;
break;
default:
return -ENODEV;
@@ -559,9 +559,9 @@ static int __init f7188x_find(int addr, struct f7188x_sio *sio)
case SIO_F81865_ID:
sio->type = f81865;
break;
- case SIO_NCT6116D_ID:
+ case SIO_NCT6126D_ID:
sio->device = SIO_LD_GPIO_NUVOTON;
- sio->type = nct6116d;
+ sio->type = nct6126d;
break;
default:
pr_info("Unsupported Fintek device 0x%04x\n", devid);
@@ -569,7 +569,7 @@ static int __init f7188x_find(int addr, struct f7188x_sio *sio)
}
/* double check manufacturer where possible */
- if (sio->type != nct6116d) {
+ if (sio->type != nct6126d) {
manid = superio_inw(addr, SIO_FINTEK_MANID);
if (manid != SIO_FINTEK_ID) {
pr_debug("Not a Fintek device at 0x%08x\n", addr);
@@ -581,7 +581,7 @@ static int __init f7188x_find(int addr, struct f7188x_sio *sio)
err = 0;
pr_info("Found %s at %#x\n", f7188x_names[sio->type], (unsigned int)addr);
- if (sio->type != nct6116d)
+ if (sio->type != nct6126d)
pr_info(" revision %d\n", superio_inb(addr, SIO_FINTEK_DEVREV));
err:
diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c
index e6a7049bef64..b32063ac845a 100644
--- a/drivers/gpio/gpio-mockup.c
+++ b/drivers/gpio/gpio-mockup.c
@@ -369,7 +369,7 @@ static void gpio_mockup_debugfs_setup(struct device *dev,
priv->offset = i;
priv->desc = gpiochip_get_desc(gc, i);
- debugfs_create_file(name, 0200, chip->dbg_dir, priv,
+ debugfs_create_file(name, 0600, chip->dbg_dir, priv,
&gpio_mockup_debugfs_ops);
}
}
diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c
index 98939cd4a71e..745e5f67254e 100644
--- a/drivers/gpio/gpio-sifive.c
+++ b/drivers/gpio/gpio-sifive.c
@@ -221,8 +221,12 @@ static int sifive_gpio_probe(struct platform_device *pdev)
return -ENODEV;
}
- for (i = 0; i < ngpio; i++)
- chip->irq_number[i] = platform_get_irq(pdev, i);
+ for (i = 0; i < ngpio; i++) {
+ ret = platform_get_irq(pdev, i);
+ if (ret < 0)
+ return ret;
+ chip->irq_number[i] = ret;
+ }
ret = bgpio_init(&chip->gc, dev, 4,
chip->base + SIFIVE_GPIO_INPUT_VAL,
diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c
index a1c8702f362c..8b49b0abacd5 100644
--- a/drivers/gpio/gpio-sim.c
+++ b/drivers/gpio/gpio-sim.c
@@ -696,6 +696,9 @@ static char **gpio_sim_make_line_names(struct gpio_sim_bank *bank,
char **line_names;
list_for_each_entry(line, &bank->line_list, siblings) {
+ if (line->offset >= bank->num_lines)
+ continue;
+
if (line->name) {
if (line->offset > max_offset)
max_offset = line->offset;
@@ -721,8 +724,13 @@ static char **gpio_sim_make_line_names(struct gpio_sim_bank *bank,
if (!line_names)
return ERR_PTR(-ENOMEM);
- list_for_each_entry(line, &bank->line_list, siblings)
- line_names[line->offset] = line->name;
+ list_for_each_entry(line, &bank->line_list, siblings) {
+ if (line->offset >= bank->num_lines)
+ continue;
+
+ if (line->name && (line->offset <= max_offset))
+ line_names[line->offset] = line->name;
+ }
return line_names;
}
@@ -754,6 +762,9 @@ static int gpio_sim_add_hogs(struct gpio_sim_device *dev)
list_for_each_entry(bank, &dev->bank_list, siblings) {
list_for_each_entry(line, &bank->line_list, siblings) {
+ if (line->offset >= bank->num_lines)
+ continue;
+
if (line->hog)
num_hogs++;
}
@@ -769,6 +780,9 @@ static int gpio_sim_add_hogs(struct gpio_sim_device *dev)
list_for_each_entry(bank, &dev->bank_list, siblings) {
list_for_each_entry(line, &bank->line_list, siblings) {
+ if (line->offset >= bank->num_lines)
+ continue;
+
if (!line->hog)
continue;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 04fb05df805b..5be8ad61523e 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -209,6 +209,8 @@ static int gpiochip_find_base(int ngpio)
break;
/* nope, check the space right after the chip */
base = gdev->base + gdev->ngpio;
+ if (base < GPIO_DYNAMIC_BASE)
+ base = GPIO_DYNAMIC_BASE;
}
if (gpio_is_valid(base)) {
@@ -1743,7 +1745,7 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gc)
}
/* Remove all IRQ mappings and delete the domain */
- if (gc->irq.domain) {
+ if (!gc->irq.domain_is_allocated_externally && gc->irq.domain) {
unsigned int irq;
for (offset = 0; offset < gc->ngpio; offset++) {
@@ -1789,6 +1791,15 @@ int gpiochip_irqchip_add_domain(struct gpio_chip *gc,
gc->to_irq = gpiochip_to_irq;
gc->irq.domain = domain;
+ gc->irq.domain_is_allocated_externally = true;
+
+ /*
+ * Using barrier() here to prevent compiler from reordering
+ * gc->irq.initialized before adding irqdomain.
+ */
+ barrier();
+
+ gc->irq.initialized = true;
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index aeeec211861c..fd6e83795873 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1092,16 +1092,20 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
* S0ix even though the system is suspending to idle, so return false
* in that case.
*/
- if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0))
- dev_warn_once(adev->dev,
+ if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) {
+ dev_err_once(adev->dev,
"Power consumption will be higher as BIOS has not been configured for suspend-to-idle.\n"
"To use suspend-to-idle change the sleep mode in BIOS setup.\n");
+ return false;
+ }
#if !IS_ENABLED(CONFIG_AMD_PMC)
- dev_warn_once(adev->dev,
+ dev_err_once(adev->dev,
"Power consumption will be higher as the kernel has not been compiled with CONFIG_AMD_PMC.\n");
-#endif /* CONFIG_AMD_PMC */
+ return false;
+#else
return true;
+#endif /* CONFIG_AMD_PMC */
}
#endif /* CONFIG_SUSPEND */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index b1ca1ab6d6ad..393b6fb7a71d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1615,6 +1615,7 @@ static const u16 amdgpu_unsupported_pciidlist[] = {
0x5874,
0x5940,
0x5941,
+ 0x5b70,
0x5b72,
0x5b73,
0x5b74,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index f52d0ba91a77..a7d250809da9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -582,7 +582,8 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
if (r)
amdgpu_fence_driver_force_completion(ring);
- if (ring->fence_drv.irq_src)
+ if (!drm_dev_is_unplugged(adev_to_drm(adev)) &&
+ ring->fence_drv.irq_src)
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 4e2531758866..95b0f984acbf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -593,6 +593,8 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
case IP_VERSION(9, 3, 0):
/* GC 10.3.7 */
case IP_VERSION(10, 3, 7):
+ /* GC 11.0.1 */
+ case IP_VERSION(11, 0, 1):
if (amdgpu_tmz == 0) {
adev->gmc.tmz_enabled = false;
dev_info(adev->dev,
@@ -616,7 +618,6 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
case IP_VERSION(10, 3, 1):
/* YELLOW_CARP*/
case IP_VERSION(10, 3, 3):
- case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 4):
/* Don't enable it by default yet.
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
index b07c000fc8ba..4fa019c8aefc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
@@ -241,6 +241,31 @@ int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev,
return 0;
}
+int amdgpu_jpeg_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r, i;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block)) {
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ r = amdgpu_irq_get(adev, &adev->jpeg.inst[i].ras_poison_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+ }
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+ return r;
+}
+
int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev)
{
int err;
@@ -262,7 +287,7 @@ int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev)
adev->jpeg.ras_if = &ras->ras_block.ras_comm;
if (!ras->ras_block.ras_late_init)
- ras->ras_block.ras_late_init = amdgpu_ras_block_late_init;
+ ras->ras_block.ras_late_init = amdgpu_jpeg_ras_late_init;
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
index 0ca76f0f23e9..1471a1ebb034 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
@@ -38,6 +38,7 @@ struct amdgpu_jpeg_reg{
struct amdgpu_jpeg_inst {
struct amdgpu_ring ring_dec;
struct amdgpu_irq_src irq;
+ struct amdgpu_irq_src ras_poison_irq;
struct amdgpu_jpeg_reg external;
};
@@ -72,6 +73,8 @@ int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry);
+int amdgpu_jpeg_ras_late_init(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block);
int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev);
#endif /*__AMDGPU_JPEG_H__*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 2bd1a54ee866..a70103ac0026 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -79,9 +79,10 @@ static void amdgpu_bo_user_destroy(struct ttm_buffer_object *tbo)
static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
+ struct amdgpu_bo *shadow_bo = ttm_to_amdgpu_bo(tbo), *bo;
struct amdgpu_bo_vm *vmbo;
+ bo = shadow_bo->parent;
vmbo = to_amdgpu_bo_vm(bo);
/* in case amdgpu_device_recover_vram got NULL of bo->parent */
if (!list_empty(&vmbo->shadow_list)) {
@@ -139,7 +140,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
places[c].lpfn = visible_pfn;
- else if (adev->gmc.real_vram_size != adev->gmc.visible_vram_size)
+ else
places[c].flags |= TTM_PL_FLAG_TOPDOWN;
if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
@@ -694,11 +695,6 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev,
return r;
*vmbo_ptr = to_amdgpu_bo_vm(bo_ptr);
- INIT_LIST_HEAD(&(*vmbo_ptr)->shadow_list);
- /* Set destroy callback to amdgpu_bo_vm_destroy after vmbo->shadow_list
- * is initialized.
- */
- bo_ptr->tbo.destroy = &amdgpu_bo_vm_destroy;
return r;
}
@@ -715,6 +711,8 @@ void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo)
mutex_lock(&adev->shadow_list_lock);
list_add_tail(&vmbo->shadow_list, &adev->shadow_list);
+ vmbo->shadow->parent = amdgpu_bo_ref(&vmbo->bo);
+ vmbo->shadow->tbo.destroy = &amdgpu_bo_vm_destroy;
mutex_unlock(&adev->shadow_list_lock);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 9d7e6e0e73ed..a150b7a4b4aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -3548,6 +3548,9 @@ static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj,
void *fw_pri_cpu_addr;
int ret;
+ if (adev->psp.vbflash_image_size == 0)
+ return -EINVAL;
+
dev_info(adev->dev, "VBIOS flash to PSP started");
ret = amdgpu_bo_create_kernel(adev, adev->psp.vbflash_image_size,
@@ -3599,13 +3602,13 @@ static ssize_t amdgpu_psp_vbflash_status(struct device *dev,
}
static const struct bin_attribute psp_vbflash_bin_attr = {
- .attr = {.name = "psp_vbflash", .mode = 0664},
+ .attr = {.name = "psp_vbflash", .mode = 0660},
.size = 0,
.write = amdgpu_psp_vbflash_write,
.read = amdgpu_psp_vbflash_read,
};
-static DEVICE_ATTR(psp_vbflash_status, 0444, amdgpu_psp_vbflash_status, NULL);
+static DEVICE_ATTR(psp_vbflash_status, 0440, amdgpu_psp_vbflash_status, NULL);
int amdgpu_psp_sysfs_init(struct amdgpu_device *adev)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index dc474b809604..49de3a3eebc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -581,3 +581,21 @@ void amdgpu_ring_ib_end(struct amdgpu_ring *ring)
if (ring->is_sw_ring)
amdgpu_sw_ring_ib_end(ring);
}
+
+void amdgpu_ring_ib_on_emit_cntl(struct amdgpu_ring *ring)
+{
+ if (ring->is_sw_ring)
+ amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_CONTROL);
+}
+
+void amdgpu_ring_ib_on_emit_ce(struct amdgpu_ring *ring)
+{
+ if (ring->is_sw_ring)
+ amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_CE);
+}
+
+void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring)
+{
+ if (ring->is_sw_ring)
+ amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_DE);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index d8749444b689..2474cb71e476 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -227,6 +227,9 @@ struct amdgpu_ring_funcs {
int (*preempt_ib)(struct amdgpu_ring *ring);
void (*emit_mem_sync)(struct amdgpu_ring *ring);
void (*emit_wave_limit)(struct amdgpu_ring *ring, bool enable);
+ void (*patch_cntl)(struct amdgpu_ring *ring, unsigned offset);
+ void (*patch_ce)(struct amdgpu_ring *ring, unsigned offset);
+ void (*patch_de)(struct amdgpu_ring *ring, unsigned offset);
};
struct amdgpu_ring {
@@ -318,10 +321,16 @@ struct amdgpu_ring {
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
#define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
+#define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o)))
+#define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o)))
+#define amdgpu_ring_patch_de(r, o) ((r)->funcs->patch_de((r), (o)))
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
void amdgpu_ring_ib_begin(struct amdgpu_ring *ring);
void amdgpu_ring_ib_end(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_on_emit_cntl(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_on_emit_ce(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring);
void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
index 62079f0e3ee8..73516abef662 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -105,6 +105,16 @@ static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux)
amdgpu_fence_update_start_timestamp(e->ring,
chunk->sync_seq,
ktime_get());
+ if (chunk->sync_seq ==
+ le32_to_cpu(*(e->ring->fence_drv.cpu_addr + 2))) {
+ if (chunk->cntl_offset <= e->ring->buf_mask)
+ amdgpu_ring_patch_cntl(e->ring,
+ chunk->cntl_offset);
+ if (chunk->ce_offset <= e->ring->buf_mask)
+ amdgpu_ring_patch_ce(e->ring, chunk->ce_offset);
+ if (chunk->de_offset <= e->ring->buf_mask)
+ amdgpu_ring_patch_de(e->ring, chunk->de_offset);
+ }
amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, e->ring,
chunk->start,
chunk->end);
@@ -407,6 +417,17 @@ void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring)
amdgpu_ring_mux_end_ib(mux, ring);
}
+void amdgpu_sw_ring_ib_mark_offset(struct amdgpu_ring *ring, enum amdgpu_ring_mux_offset_type type)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+ unsigned offset;
+
+ offset = ring->wptr & ring->buf_mask;
+
+ amdgpu_ring_mux_ib_mark_offset(mux, ring, offset, type);
+}
+
void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
{
struct amdgpu_mux_entry *e;
@@ -429,6 +450,10 @@ void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *r
}
chunk->start = ring->wptr;
+ /* the initialized value used to check if they are set by the ib submission*/
+ chunk->cntl_offset = ring->buf_mask + 1;
+ chunk->de_offset = ring->buf_mask + 1;
+ chunk->ce_offset = ring->buf_mask + 1;
list_add_tail(&chunk->entry, &e->list);
}
@@ -454,6 +479,41 @@ static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux *mux, struct a
}
}
+void amdgpu_ring_mux_ib_mark_offset(struct amdgpu_ring_mux *mux,
+ struct amdgpu_ring *ring, u64 offset,
+ enum amdgpu_ring_mux_offset_type type)
+{
+ struct amdgpu_mux_entry *e;
+ struct amdgpu_mux_chunk *chunk;
+
+ e = amdgpu_ring_mux_sw_entry(mux, ring);
+ if (!e) {
+ DRM_ERROR("cannot find entry!\n");
+ return;
+ }
+
+ chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry);
+ if (!chunk) {
+ DRM_ERROR("cannot find chunk!\n");
+ return;
+ }
+
+ switch (type) {
+ case AMDGPU_MUX_OFFSET_TYPE_CONTROL:
+ chunk->cntl_offset = offset;
+ break;
+ case AMDGPU_MUX_OFFSET_TYPE_DE:
+ chunk->de_offset = offset;
+ break;
+ case AMDGPU_MUX_OFFSET_TYPE_CE:
+ chunk->ce_offset = offset;
+ break;
+ default:
+ DRM_ERROR("invalid type (%d)\n", type);
+ break;
+ }
+}
+
void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
{
struct amdgpu_mux_entry *e;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
index 4be45fc14954..b22d4fb2a847 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
@@ -50,6 +50,12 @@ struct amdgpu_mux_entry {
struct list_head list;
};
+enum amdgpu_ring_mux_offset_type {
+ AMDGPU_MUX_OFFSET_TYPE_CONTROL,
+ AMDGPU_MUX_OFFSET_TYPE_DE,
+ AMDGPU_MUX_OFFSET_TYPE_CE,
+};
+
struct amdgpu_ring_mux {
struct amdgpu_ring *real_ring;
@@ -72,12 +78,18 @@ struct amdgpu_ring_mux {
* @sync_seq: the fence seqno related with the saved IB.
* @start:- start location on the software ring.
* @end:- end location on the software ring.
+ * @control_offset:- the PRE_RESUME bit position used for resubmission.
+ * @de_offset:- the anchor in write_data for de meta of resubmission.
+ * @ce_offset:- the anchor in write_data for ce meta of resubmission.
*/
struct amdgpu_mux_chunk {
struct list_head entry;
uint32_t sync_seq;
u64 start;
u64 end;
+ u64 cntl_offset;
+ u64 de_offset;
+ u64 ce_offset;
};
int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
@@ -89,6 +101,8 @@ u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ri
u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_ib_mark_offset(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
+ u64 offset, enum amdgpu_ring_mux_offset_type type);
bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux);
u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
@@ -97,6 +111,7 @@ void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring);
void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_ib_mark_offset(struct amdgpu_ring *ring, enum amdgpu_ring_mux_offset_type type);
const char *amdgpu_sw_ring_name(int idx);
unsigned int amdgpu_sw_ring_priority(int idx);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index e63fcc58e8e0..2d94f1b63bd6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -1181,6 +1181,31 @@ int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,
return 0;
}
+int amdgpu_vcn_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r, i;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block)) {
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ r = amdgpu_irq_get(adev, &adev->vcn.inst[i].ras_poison_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+ }
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+ return r;
+}
+
int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev)
{
int err;
@@ -1202,7 +1227,7 @@ int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev)
adev->vcn.ras_if = &ras->ras_block.ras_comm;
if (!ras->ras_block.ras_late_init)
- ras->ras_block.ras_late_init = amdgpu_ras_block_late_init;
+ ras->ras_block.ras_late_init = amdgpu_vcn_ras_late_init;
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index c730949ece7d..f1397ef66fd7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -234,6 +234,7 @@ struct amdgpu_vcn_inst {
struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];
atomic_t sched_score;
struct amdgpu_irq_src irq;
+ struct amdgpu_irq_src ras_poison_irq;
struct amdgpu_vcn_reg external;
struct amdgpu_bo *dpg_sram_bo;
struct dpg_pause_state pause_state;
@@ -400,6 +401,8 @@ void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev,
int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry);
+int amdgpu_vcn_ras_late_init(struct amdgpu_device *adev,
+ struct ras_common_if *ras_block);
int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index df63dc3bca18..051c7194ab4f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -564,7 +564,6 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
return r;
}
- (*vmbo)->shadow->parent = amdgpu_bo_ref(bo);
amdgpu_bo_add_to_shadow_list(*vmbo);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 43d6a9d6a538..afacfb9b5bf6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -800,7 +800,7 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct drm_buddy *mm = &mgr->mm;
- struct drm_buddy_block *block;
+ struct amdgpu_vram_reservation *rsv;
drm_printf(printer, " vis usage:%llu\n",
amdgpu_vram_mgr_vis_usage(mgr));
@@ -812,8 +812,9 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
drm_buddy_print(mm, printer);
drm_printf(printer, "reserved:\n");
- list_for_each_entry(block, &mgr->reserved_pages, link)
- drm_buddy_block_print(mm, block, printer);
+ list_for_each_entry(rsv, &mgr->reserved_pages, blocks)
+ drm_printf(printer, "%#018llx-%#018llx: %llu\n",
+ rsv->start, rsv->start + rsv->size, rsv->size);
mutex_unlock(&mgr->lock);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index f5b5ce1051a2..ab44c1391d52 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -6892,8 +6892,10 @@ static int gfx_v10_0_kiq_resume(struct amdgpu_device *adev)
return r;
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (unlikely(r != 0))
+ if (unlikely(r != 0)) {
+ amdgpu_bo_unreserve(ring->mqd_obj);
return r;
+ }
gfx_v10_0_kiq_init_queue(ring);
amdgpu_bo_kunmap(ring->mqd_obj);
@@ -8152,8 +8154,14 @@ static int gfx_v10_0_set_powergating_state(void *handle,
case IP_VERSION(10, 3, 3):
case IP_VERSION(10, 3, 6):
case IP_VERSION(10, 3, 7):
+ if (!enable)
+ amdgpu_gfx_off_ctrl(adev, false);
+
gfx_v10_cntl_pg(adev, enable);
- amdgpu_gfx_off_ctrl(adev, enable);
+
+ if (enable)
+ amdgpu_gfx_off_ctrl(adev, true);
+
break;
default:
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index f5c376276984..c4940b6ea1c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -4667,24 +4667,27 @@ static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
uint64_t clock;
uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after;
- amdgpu_gfx_off_ctrl(adev, false);
- mutex_lock(&adev->gfx.gpu_clock_mutex);
if (amdgpu_sriov_vf(adev)) {
+ amdgpu_gfx_off_ctrl(adev, false);
+ mutex_lock(&adev->gfx.gpu_clock_mutex);
clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
if (clock_counter_hi_pre != clock_counter_hi_after)
clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
+ mutex_unlock(&adev->gfx.gpu_clock_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
} else {
+ preempt_disable();
clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
if (clock_counter_hi_pre != clock_counter_hi_after)
clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
+ preempt_enable();
}
clock = clock_counter_lo | (clock_counter_hi_after << 32ULL);
- mutex_unlock(&adev->gfx.gpu_clock_mutex);
- amdgpu_gfx_off_ctrl(adev, true);
+
return clock;
}
@@ -5150,8 +5153,14 @@ static int gfx_v11_0_set_powergating_state(void *handle,
break;
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 4):
+ if (!enable)
+ amdgpu_gfx_off_ctrl(adev, false);
+
gfx_v11_cntl_pg(adev, enable);
- amdgpu_gfx_off_ctrl(adev, enable);
+
+ if (enable)
+ amdgpu_gfx_off_ctrl(adev, true);
+
break;
default:
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index f46d4b18a3fa..a674c8a58dc2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -149,16 +149,6 @@ MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
#define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026
#define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1
-#define mmGOLDEN_TSC_COUNT_UPPER_Raven 0x007a
-#define mmGOLDEN_TSC_COUNT_UPPER_Raven_BASE_IDX 0
-#define mmGOLDEN_TSC_COUNT_LOWER_Raven 0x007b
-#define mmGOLDEN_TSC_COUNT_LOWER_Raven_BASE_IDX 0
-
-#define mmGOLDEN_TSC_COUNT_UPPER_Raven2 0x0068
-#define mmGOLDEN_TSC_COUNT_UPPER_Raven2_BASE_IDX 0
-#define mmGOLDEN_TSC_COUNT_LOWER_Raven2 0x0069
-#define mmGOLDEN_TSC_COUNT_LOWER_Raven2_BASE_IDX 0
-
enum ta_ras_gfx_subblock {
/*CPC*/
TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
@@ -765,7 +755,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
struct amdgpu_cu_info *cu_info);
static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds);
static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status);
@@ -3617,8 +3607,10 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
return r;
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (unlikely(r != 0))
+ if (unlikely(r != 0)) {
+ amdgpu_bo_unreserve(ring->mqd_obj);
return r;
+ }
gfx_v9_0_kiq_init_queue(ring);
amdgpu_bo_kunmap(ring->mqd_obj);
@@ -4002,36 +3994,6 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
preempt_enable();
clock = clock_lo | (clock_hi << 32ULL);
break;
- case IP_VERSION(9, 1, 0):
- preempt_disable();
- clock_hi = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven);
- clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven);
- hi_check = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven);
- /* The PWR TSC clock frequency is 100MHz, which sets 32-bit carry over
- * roughly every 42 seconds.
- */
- if (hi_check != clock_hi) {
- clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven);
- clock_hi = hi_check;
- }
- preempt_enable();
- clock = clock_lo | (clock_hi << 32ULL);
- break;
- case IP_VERSION(9, 2, 2):
- preempt_disable();
- clock_hi = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven2);
- clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven2);
- hi_check = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven2);
- /* The PWR TSC clock frequency is 100MHz, which sets 32-bit carry over
- * roughly every 42 seconds.
- */
- if (hi_check != clock_hi) {
- clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven2);
- clock_hi = hi_check;
- }
- preempt_enable();
- clock = clock_lo | (clock_hi << 32ULL);
- break;
default:
amdgpu_gfx_off_ctrl(adev, false);
mutex_lock(&adev->gfx.gpu_clock_mutex);
@@ -5165,7 +5127,8 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
gfx_v9_0_ring_emit_de_meta(ring,
(!amdgpu_sriov_vf(ring->adev) &&
flags & AMDGPU_IB_PREEMPTED) ?
- true : false);
+ true : false,
+ job->gds_size > 0 && job->gds_base != 0);
}
amdgpu_ring_write(ring, header);
@@ -5176,9 +5139,83 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
#endif
lower_32_bits(ib->gpu_addr));
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_ib_on_emit_cntl(ring);
amdgpu_ring_write(ring, control);
}
+static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
+ unsigned offset)
+{
+ u32 control = ring->ring[offset];
+
+ control |= INDIRECT_BUFFER_PRE_RESUME(1);
+ ring->ring[offset] = control;
+}
+
+static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
+ unsigned offset)
+{
+ struct amdgpu_device *adev = ring->adev;
+ void *ce_payload_cpu_addr;
+ uint64_t payload_offset, payload_size;
+
+ payload_size = sizeof(struct v9_ce_ib_state);
+
+ if (ring->is_mes_queue) {
+ payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+ gfx[0].gfx_meta_data) +
+ offsetof(struct v9_gfx_meta_data, ce_payload);
+ ce_payload_cpu_addr =
+ amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
+ } else {
+ payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
+ ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
+ }
+
+ if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
+ memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
+ } else {
+ memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr,
+ (ring->buf_mask + 1 - offset) << 2);
+ payload_size -= (ring->buf_mask + 1 - offset) << 2;
+ memcpy((void *)&ring->ring[0],
+ ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
+ payload_size);
+ }
+}
+
+static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
+ unsigned offset)
+{
+ struct amdgpu_device *adev = ring->adev;
+ void *de_payload_cpu_addr;
+ uint64_t payload_offset, payload_size;
+
+ payload_size = sizeof(struct v9_de_ib_state);
+
+ if (ring->is_mes_queue) {
+ payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+ gfx[0].gfx_meta_data) +
+ offsetof(struct v9_gfx_meta_data, de_payload);
+ de_payload_cpu_addr =
+ amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
+ } else {
+ payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
+ }
+
+ if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
+ memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
+ } else {
+ memcpy((void *)&ring->ring[offset], de_payload_cpu_addr,
+ (ring->buf_mask + 1 - offset) << 2);
+ payload_size -= (ring->buf_mask + 1 - offset) << 2;
+ memcpy((void *)&ring->ring[0],
+ de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
+ payload_size);
+ }
+}
+
static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
struct amdgpu_job *job,
struct amdgpu_ib *ib,
@@ -5374,6 +5411,8 @@ static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
+ amdgpu_ring_ib_on_emit_ce(ring);
+
if (resume)
amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
sizeof(ce_payload) >> 2);
@@ -5407,10 +5446,6 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
amdgpu_ring_alloc(ring, 13);
gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
- /*reset the CP_VMID_PREEMPT after trailing fence*/
- amdgpu_ring_emit_wreg(ring,
- SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
- 0x0);
/* assert IB preemption, emit the trailing fence */
kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
@@ -5433,6 +5468,10 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
}
+ /*reset the CP_VMID_PREEMPT after trailing fence*/
+ amdgpu_ring_emit_wreg(ring,
+ SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
+ 0x0);
amdgpu_ring_commit(ring);
/* deassert preemption condition */
@@ -5440,7 +5479,7 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
return r;
}
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds)
{
struct amdgpu_device *adev = ring->adev;
struct v9_de_ib_state de_payload = {0};
@@ -5471,8 +5510,10 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
PAGE_SIZE);
}
- de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
- de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
+ if (usegds) {
+ de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
+ de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
+ }
cnt = (sizeof(de_payload) >> 2) + 4 - 2;
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
@@ -5483,6 +5524,7 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
+ amdgpu_ring_ib_on_emit_de(ring);
if (resume)
amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
sizeof(de_payload) >> 2);
@@ -6893,6 +6935,9 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
.soft_recovery = gfx_v9_0_ring_soft_recovery,
.emit_mem_sync = gfx_v9_0_emit_mem_sync,
+ .patch_cntl = gfx_v9_0_ring_patch_cntl,
+ .patch_de = gfx_v9_0_ring_patch_de_meta,
+ .patch_ce = gfx_v9_0_ring_patch_ce_meta,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index d95f9fe8f1c5..4116c112e8a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -31,6 +31,8 @@
#include "umc_v8_10.h"
#include "athub/athub_3_0_0_sh_mask.h"
#include "athub/athub_3_0_0_offset.h"
+#include "dcn/dcn_3_2_0_offset.h"
+#include "dcn/dcn_3_2_0_sh_mask.h"
#include "oss/osssys_6_0_0_offset.h"
#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
#include "navi10_enum.h"
@@ -546,7 +548,24 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev,
static unsigned gmc_v11_0_get_vbios_fb_size(struct amdgpu_device *adev)
{
- return 0;
+ u32 d1vga_control = RREG32_SOC15(DCE, 0, regD1VGA_CONTROL);
+ unsigned size;
+
+ if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+ size = AMDGPU_VBIOS_VGA_ALLOCATION;
+ } else {
+ u32 viewport;
+ u32 pitch;
+
+ viewport = RREG32_SOC15(DCE, 0, regHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
+ pitch = RREG32_SOC15(DCE, 0, regHUBPREQ0_DCSURF_SURFACE_PITCH);
+ size = (REG_GET_FIELD(viewport,
+ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(pitch, HUBPREQ0_DCSURF_SURFACE_PITCH, PITCH) *
+ 4);
+ }
+
+ return size;
}
static const struct amdgpu_gmc_funcs gmc_v11_0_gmc_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
index b040f51d9aa9..73e0dc5a10cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -102,13 +102,13 @@ static int jpeg_v2_5_sw_init(void *handle)
/* JPEG DJPEG POISON EVENT */
r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i],
- VCN_2_6__SRCID_DJPEG0_POISON, &adev->jpeg.inst[i].irq);
+ VCN_2_6__SRCID_DJPEG0_POISON, &adev->jpeg.inst[i].ras_poison_irq);
if (r)
return r;
/* JPEG EJPEG POISON EVENT */
r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i],
- VCN_2_6__SRCID_EJPEG0_POISON, &adev->jpeg.inst[i].irq);
+ VCN_2_6__SRCID_EJPEG0_POISON, &adev->jpeg.inst[i].ras_poison_irq);
if (r)
return r;
}
@@ -221,6 +221,9 @@ static int jpeg_v2_5_hw_fini(void *handle)
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS))
jpeg_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG))
+ amdgpu_irq_put(adev, &adev->jpeg.inst[i].ras_poison_irq, 0);
}
return 0;
@@ -569,6 +572,14 @@ static int jpeg_v2_5_set_interrupt_state(struct amdgpu_device *adev,
return 0;
}
+static int jpeg_v2_6_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -593,10 +604,6 @@ static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev,
case VCN_2_0__SRCID__JPEG_DECODE:
amdgpu_fence_process(&adev->jpeg.inst[ip_instance].ring_dec);
break;
- case VCN_2_6__SRCID_DJPEG0_POISON:
- case VCN_2_6__SRCID_EJPEG0_POISON:
- amdgpu_jpeg_process_poison_irq(adev, source, entry);
- break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
entry->src_id, entry->src_data[0]);
@@ -725,6 +732,11 @@ static const struct amdgpu_irq_src_funcs jpeg_v2_5_irq_funcs = {
.process = jpeg_v2_5_process_interrupt,
};
+static const struct amdgpu_irq_src_funcs jpeg_v2_6_ras_irq_funcs = {
+ .set = jpeg_v2_6_set_ras_interrupt_state,
+ .process = amdgpu_jpeg_process_poison_irq,
+};
+
static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev)
{
int i;
@@ -735,6 +747,9 @@ static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev)
adev->jpeg.inst[i].irq.num_types = 1;
adev->jpeg.inst[i].irq.funcs = &jpeg_v2_5_irq_funcs;
+
+ adev->jpeg.inst[i].ras_poison_irq.num_types = 1;
+ adev->jpeg.inst[i].ras_poison_irq.funcs = &jpeg_v2_6_ras_irq_funcs;
}
}
@@ -800,6 +815,7 @@ const struct amdgpu_ras_block_hw_ops jpeg_v2_6_ras_hw_ops = {
static struct amdgpu_jpeg_ras jpeg_v2_6_ras = {
.ras_block = {
.hw_ops = &jpeg_v2_6_ras_hw_ops,
+ .ras_late_init = amdgpu_jpeg_ras_late_init,
},
};
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
index 77e1e64aa1d1..a3d83c9f2c9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
@@ -87,13 +87,13 @@ static int jpeg_v4_0_sw_init(void *handle)
/* JPEG DJPEG POISON EVENT */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
- VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst->irq);
+ VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq);
if (r)
return r;
/* JPEG EJPEG POISON EVENT */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
- VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst->irq);
+ VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq);
if (r)
return r;
@@ -202,7 +202,8 @@ static int jpeg_v4_0_hw_fini(void *handle)
RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS))
jpeg_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
}
- amdgpu_irq_put(adev, &adev->jpeg.inst->irq, 0);
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG))
+ amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0);
return 0;
}
@@ -670,6 +671,14 @@ static int jpeg_v4_0_set_interrupt_state(struct amdgpu_device *adev,
return 0;
}
+static int jpeg_v4_0_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
static int jpeg_v4_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -680,10 +689,6 @@ static int jpeg_v4_0_process_interrupt(struct amdgpu_device *adev,
case VCN_4_0__SRCID__JPEG_DECODE:
amdgpu_fence_process(&adev->jpeg.inst->ring_dec);
break;
- case VCN_4_0__SRCID_DJPEG0_POISON:
- case VCN_4_0__SRCID_EJPEG0_POISON:
- amdgpu_jpeg_process_poison_irq(adev, source, entry);
- break;
default:
DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
entry->src_id, entry->src_data[0]);
@@ -753,10 +758,18 @@ static const struct amdgpu_irq_src_funcs jpeg_v4_0_irq_funcs = {
.process = jpeg_v4_0_process_interrupt,
};
+static const struct amdgpu_irq_src_funcs jpeg_v4_0_ras_irq_funcs = {
+ .set = jpeg_v4_0_set_ras_interrupt_state,
+ .process = amdgpu_jpeg_process_poison_irq,
+};
+
static void jpeg_v4_0_set_irq_funcs(struct amdgpu_device *adev)
{
adev->jpeg.inst->irq.num_types = 1;
adev->jpeg.inst->irq.funcs = &jpeg_v4_0_irq_funcs;
+
+ adev->jpeg.inst->ras_poison_irq.num_types = 1;
+ adev->jpeg.inst->ras_poison_irq.funcs = &jpeg_v4_0_ras_irq_funcs;
}
const struct amdgpu_ip_block_version jpeg_v4_0_ip_block = {
@@ -811,6 +824,7 @@ const struct amdgpu_ras_block_hw_ops jpeg_v4_0_ras_hw_ops = {
static struct amdgpu_jpeg_ras jpeg_v4_0_ras = {
.ras_block = {
.hw_ops = &jpeg_v4_0_ras_hw_ops,
+ .ras_late_init = amdgpu_jpeg_ras_late_init,
},
};
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
index e1b7fca09666..5f10883da6a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -57,7 +57,13 @@ static int psp_v10_0_init_microcode(struct psp_context *psp)
if (err)
return err;
- return psp_init_ta_microcode(psp, ucode_prefix);
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 1, 0)) &&
+ (adev->pdev->revision == 0xa1) &&
+ (psp->securedisplay_context.context.bin_desc.fw_version >= 0x27000008)) {
+ adev->psp.securedisplay_context.context.bin_desc.size_bytes = 0;
+ }
+ return err;
}
static int psp_v10_0_ring_create(struct psp_context *psp,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 6d15d5cd9e07..a2fd1ffadb59 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -301,10 +301,11 @@ static u32 soc15_get_xclk(struct amdgpu_device *adev)
u32 reference_clock = adev->clock.spll.reference_freq;
if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 0) ||
- adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 1) ||
- adev->ip_versions[MP1_HWIP][0] == IP_VERSION(10, 0, 0) ||
- adev->ip_versions[MP1_HWIP][0] == IP_VERSION(10, 0, 1))
+ adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 1))
return 10000;
+ if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(10, 0, 0) ||
+ adev->ip_versions[MP1_HWIP][0] == IP_VERSION(10, 0, 1))
+ return reference_clock / 4;
return reference_clock;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index ab0b45d0ead1..515681c88dcb 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -143,7 +143,7 @@ static int vcn_v2_5_sw_init(void *handle)
/* VCN POISON TRAP */
r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
- VCN_2_6__SRCID_UVD_POISON, &adev->vcn.inst[j].irq);
+ VCN_2_6__SRCID_UVD_POISON, &adev->vcn.inst[j].ras_poison_irq);
if (r)
return r;
}
@@ -354,6 +354,9 @@ static int vcn_v2_5_hw_fini(void *handle)
(adev->vcn.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(VCN, i, mmUVD_STATUS)))
vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
+ amdgpu_irq_put(adev, &adev->vcn.inst[i].ras_poison_irq, 0);
}
return 0;
@@ -1807,6 +1810,14 @@ static int vcn_v2_5_set_interrupt_state(struct amdgpu_device *adev,
return 0;
}
+static int vcn_v2_6_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -1837,9 +1848,6 @@ static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev,
case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY:
amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]);
break;
- case VCN_2_6__SRCID_UVD_POISON:
- amdgpu_vcn_process_poison_irq(adev, source, entry);
- break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
entry->src_id, entry->src_data[0]);
@@ -1854,6 +1862,11 @@ static const struct amdgpu_irq_src_funcs vcn_v2_5_irq_funcs = {
.process = vcn_v2_5_process_interrupt,
};
+static const struct amdgpu_irq_src_funcs vcn_v2_6_ras_irq_funcs = {
+ .set = vcn_v2_6_set_ras_interrupt_state,
+ .process = amdgpu_vcn_process_poison_irq,
+};
+
static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev)
{
int i;
@@ -1863,6 +1876,9 @@ static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev)
continue;
adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs;
+
+ adev->vcn.inst[i].ras_poison_irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst[i].ras_poison_irq.funcs = &vcn_v2_6_ras_irq_funcs;
}
}
@@ -1965,6 +1981,7 @@ const struct amdgpu_ras_block_hw_ops vcn_v2_6_ras_hw_ops = {
static struct amdgpu_vcn_ras vcn_v2_6_ras = {
.ras_block = {
.hw_ops = &vcn_v2_6_ras_hw_ops,
+ .ras_late_init = amdgpu_vcn_ras_late_init,
},
};
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index bf0674039598..da126ff8bcbc 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -129,7 +129,11 @@ static int vcn_v4_0_sw_init(void *handle)
if (adev->vcn.harvest_config & (1 << i))
continue;
- atomic_set(&adev->vcn.inst[i].sched_score, 0);
+ /* Init instance 0 sched_score to 1, so it's scheduled after other instances */
+ if (i == 0)
+ atomic_set(&adev->vcn.inst[i].sched_score, 1);
+ else
+ atomic_set(&adev->vcn.inst[i].sched_score, 0);
/* VCN UNIFIED TRAP */
r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
@@ -139,7 +143,7 @@ static int vcn_v4_0_sw_init(void *handle)
/* VCN POISON TRAP */
r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
- VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst[i].irq);
+ VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst[i].ras_poison_irq);
if (r)
return r;
@@ -305,8 +309,8 @@ static int vcn_v4_0_hw_fini(void *handle)
vcn_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
}
}
-
- amdgpu_irq_put(adev, &adev->vcn.inst[i].irq, 0);
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
+ amdgpu_irq_put(adev, &adev->vcn.inst[i].ras_poison_irq, 0);
}
return 0;
@@ -1976,6 +1980,24 @@ static int vcn_v4_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgp
}
/**
+ * vcn_v4_0_set_ras_interrupt_state - set VCN block RAS interrupt state
+ *
+ * @adev: amdgpu_device pointer
+ * @source: interrupt sources
+ * @type: interrupt types
+ * @state: interrupt states
+ *
+ * Set VCN block RAS interrupt state
+ */
+static int vcn_v4_0_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+/**
* vcn_v4_0_process_interrupt - process VCN block interrupt
*
* @adev: amdgpu_device pointer
@@ -2007,9 +2029,6 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_
case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
break;
- case VCN_4_0__SRCID_UVD_POISON:
- amdgpu_vcn_process_poison_irq(adev, source, entry);
- break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
entry->src_id, entry->src_data[0]);
@@ -2024,6 +2043,11 @@ static const struct amdgpu_irq_src_funcs vcn_v4_0_irq_funcs = {
.process = vcn_v4_0_process_interrupt,
};
+static const struct amdgpu_irq_src_funcs vcn_v4_0_ras_irq_funcs = {
+ .set = vcn_v4_0_set_ras_interrupt_state,
+ .process = amdgpu_vcn_process_poison_irq,
+};
+
/**
* vcn_v4_0_set_irq_funcs - set VCN block interrupt irq functions
*
@@ -2041,6 +2065,9 @@ static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev)
adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
adev->vcn.inst[i].irq.funcs = &vcn_v4_0_irq_funcs;
+
+ adev->vcn.inst[i].ras_poison_irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst[i].ras_poison_irq.funcs = &vcn_v4_0_ras_irq_funcs;
}
}
@@ -2114,6 +2141,7 @@ const struct amdgpu_ras_block_hw_ops vcn_v4_0_ras_hw_ops = {
static struct amdgpu_vcn_ras vcn_v4_0_ras = {
.ras_block = {
.hw_ops = &vcn_v4_0_ras_hw_ops,
+ .ras_late_init = amdgpu_vcn_ras_late_init,
},
};
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 531f173ade2d..c0360dbebd4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -542,8 +542,15 @@ static u32 vi_get_xclk(struct amdgpu_device *adev)
u32 reference_clock = adev->clock.spll.reference_freq;
u32 tmp;
- if (adev->flags & AMD_IS_APU)
- return reference_clock;
+ if (adev->flags & AMD_IS_APU) {
+ switch (adev->asic_type) {
+ case CHIP_STONEY:
+ /* vbios says 48Mhz, but the actual freq is 100Mhz */
+ return 10000;
+ default:
+ return reference_clock;
+ }
+ }
tmp = RREG32_SMC(ixCG_CLKPIN_CNTL_2);
if (REG_GET_FIELD(tmp, CG_CLKPIN_CNTL_2, MUX_TCLK_TO_XCLK))
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 8b4b186c57f5..7acd73e5004f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2479,20 +2479,25 @@ static void dm_gpureset_toggle_interrupts(struct amdgpu_device *adev,
if (acrtc && state->stream_status[i].plane_count != 0) {
irq_source = IRQ_TYPE_PFLIP + acrtc->otg_inst;
rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY;
- DRM_DEBUG_VBL("crtc %d - vupdate irq %sabling: r=%d\n",
- acrtc->crtc_id, enable ? "en" : "dis", rc);
if (rc)
DRM_WARN("Failed to %s pflip interrupts\n",
enable ? "enable" : "disable");
if (enable) {
- rc = amdgpu_dm_crtc_enable_vblank(&acrtc->base);
- if (rc)
- DRM_WARN("Failed to enable vblank interrupts\n");
- } else {
- amdgpu_dm_crtc_disable_vblank(&acrtc->base);
- }
+ if (amdgpu_dm_crtc_vrr_active(to_dm_crtc_state(acrtc->base.state)))
+ rc = amdgpu_dm_crtc_set_vupdate_irq(&acrtc->base, true);
+ } else
+ rc = amdgpu_dm_crtc_set_vupdate_irq(&acrtc->base, false);
+
+ if (rc)
+ DRM_WARN("Failed to %sable vupdate interrupt\n", enable ? "en" : "dis");
+ irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst;
+ /* During gpu-reset we disable and then enable vblank irq, so
+ * don't use amdgpu_irq_get/put() to avoid refcount change.
+ */
+ if (!dc_interrupt_set(adev->dm.dc, irq_source, enable))
+ DRM_WARN("Failed to %sable vblank interrupt\n", enable ? "en" : "dis");
}
}
@@ -2852,7 +2857,7 @@ static int dm_resume(void *handle)
* this is the case when traversing through already created
* MST connectors, should be skipped
*/
- if (aconnector->dc_link->type == dc_connection_mst_branch)
+ if (aconnector && aconnector->mst_root)
continue;
mutex_lock(&aconnector->hpd_lock);
@@ -6737,7 +6742,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder,
int clock, bpp = 0;
bool is_y420 = false;
- if (!aconnector->mst_output_port || !aconnector->dc_sink)
+ if (!aconnector->mst_output_port)
return 0;
mst_port = aconnector->mst_output_port;
@@ -7191,7 +7196,13 @@ static int amdgpu_dm_connector_get_modes(struct drm_connector *connector)
drm_add_modes_noedid(connector, 1920, 1080);
} else {
amdgpu_dm_connector_ddc_get_modes(connector, edid);
- amdgpu_dm_connector_add_common_modes(encoder, connector);
+ /* most eDP supports only timings from its edid,
+ * usually only detailed timings are available
+ * from eDP edid. timings which are not from edid
+ * may damage eDP
+ */
+ if (connector->connector_type != DRM_MODE_CONNECTOR_eDP)
+ amdgpu_dm_connector_add_common_modes(encoder, connector);
amdgpu_dm_connector_add_freesync_modes(connector, edid);
}
amdgpu_dm_fbc_init(connector);
@@ -8193,6 +8204,12 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
if (acrtc_state->abm_level != dm_old_crtc_state->abm_level)
bundle->stream_update.abm_level = &acrtc_state->abm_level;
+ mutex_lock(&dm->dc_lock);
+ if ((acrtc_state->update_type > UPDATE_TYPE_FAST) &&
+ acrtc_state->stream->link->psr_settings.psr_allow_active)
+ amdgpu_dm_psr_disable(acrtc_state->stream);
+ mutex_unlock(&dm->dc_lock);
+
/*
* If FreeSync state on the stream has changed then we need to
* re-adjust the min/max bounds now that DC doesn't handle this
@@ -8206,10 +8223,6 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags);
}
mutex_lock(&dm->dc_lock);
- if ((acrtc_state->update_type > UPDATE_TYPE_FAST) &&
- acrtc_state->stream->link->psr_settings.psr_allow_active)
- amdgpu_dm_psr_disable(acrtc_state->stream);
-
update_planes_and_stream_adapter(dm->dc,
acrtc_state->update_type,
planes_count,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
index e3762e806617..440fc0869a34 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -146,7 +146,6 @@ static void vblank_control_worker(struct work_struct *work)
static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable)
{
- enum dc_irq_source irq_source;
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
struct amdgpu_device *adev = drm_to_adev(crtc->dev);
struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state);
@@ -169,18 +168,9 @@ static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable)
if (rc)
return rc;
- if (amdgpu_in_reset(adev)) {
- irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst;
- /* During gpu-reset we disable and then enable vblank irq, so
- * don't use amdgpu_irq_get/put() to avoid refcount change.
- */
- if (!dc_interrupt_set(adev->dm.dc, irq_source, enable))
- rc = -EBUSY;
- } else {
- rc = (enable)
- ? amdgpu_irq_get(adev, &adev->crtc_irq, acrtc->crtc_id)
- : amdgpu_irq_put(adev, &adev->crtc_irq, acrtc->crtc_id);
- }
+ rc = (enable)
+ ? amdgpu_irq_get(adev, &adev->crtc_irq, acrtc->crtc_id)
+ : amdgpu_irq_put(adev, &adev->crtc_irq, acrtc->crtc_id);
if (rc)
return rc;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 52564b93f7eb..7cde67b7f0c3 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1981,6 +1981,9 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
return result;
}
+static bool commit_minimal_transition_state(struct dc *dc,
+ struct dc_state *transition_base_context);
+
/**
* dc_commit_streams - Commit current stream state
*
@@ -2002,6 +2005,8 @@ enum dc_status dc_commit_streams(struct dc *dc,
struct dc_state *context;
enum dc_status res = DC_OK;
struct dc_validation_set set[MAX_STREAMS] = {0};
+ struct pipe_ctx *pipe;
+ bool handle_exit_odm2to1 = false;
if (dc->ctx->dce_environment == DCE_ENV_VIRTUAL_HW)
return res;
@@ -2026,6 +2031,22 @@ enum dc_status dc_commit_streams(struct dc *dc,
}
}
+ /* Check for case where we are going from odm 2:1 to max
+ * pipe scenario. For these cases, we will call
+ * commit_minimal_transition_state() to exit out of odm 2:1
+ * first before processing new streams
+ */
+ if (stream_count == dc->res_pool->pipe_count) {
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ if (pipe->next_odm_pipe)
+ handle_exit_odm2to1 = true;
+ }
+ }
+
+ if (handle_exit_odm2to1)
+ res = commit_minimal_transition_state(dc, dc->current_state);
+
context = dc_create_state(dc);
if (!context)
goto context_alloc_fail;
@@ -3872,6 +3893,7 @@ static bool commit_minimal_transition_state(struct dc *dc,
unsigned int i, j;
unsigned int pipe_in_use = 0;
bool subvp_in_use = false;
+ bool odm_in_use = false;
if (!transition_context)
return false;
@@ -3900,6 +3922,18 @@ static bool commit_minimal_transition_state(struct dc *dc,
}
}
+ /* If ODM is enabled and we are adding or removing planes from any ODM
+ * pipe, we must use the minimal transition.
+ */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ if (pipe->stream && pipe->next_odm_pipe) {
+ odm_in_use = true;
+ break;
+ }
+ }
+
/* When the OS add a new surface if we have been used all of pipes with odm combine
* and mpc split feature, it need use commit_minimal_transition_state to transition safely.
* After OS exit MPO, it will back to use odm and mpc split with all of pipes, we need
@@ -3908,7 +3942,7 @@ static bool commit_minimal_transition_state(struct dc *dc,
* Reduce the scenarios to use dc_commit_state_no_check in the stage of flip. Especially
* enter/exit MPO when DCN still have enough resources.
*/
- if (pipe_in_use != dc->res_pool->pipe_count && !subvp_in_use) {
+ if (pipe_in_use != dc->res_pool->pipe_count && !subvp_in_use && !odm_in_use) {
dc_release_state(transition_context);
return true;
}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 117d80cb36fb..fe1551393b26 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -1446,6 +1446,26 @@ static int acquire_first_split_pipe(
split_pipe->stream = stream;
return i;
+ } else if (split_pipe->prev_odm_pipe &&
+ split_pipe->prev_odm_pipe->plane_state == split_pipe->plane_state) {
+ split_pipe->prev_odm_pipe->next_odm_pipe = split_pipe->next_odm_pipe;
+ if (split_pipe->next_odm_pipe)
+ split_pipe->next_odm_pipe->prev_odm_pipe = split_pipe->prev_odm_pipe;
+
+ if (split_pipe->prev_odm_pipe->plane_state)
+ resource_build_scaling_params(split_pipe->prev_odm_pipe);
+
+ memset(split_pipe, 0, sizeof(*split_pipe));
+ split_pipe->stream_res.tg = pool->timing_generators[i];
+ split_pipe->plane_res.hubp = pool->hubps[i];
+ split_pipe->plane_res.ipp = pool->ipps[i];
+ split_pipe->plane_res.dpp = pool->dpps[i];
+ split_pipe->stream_res.opp = pool->opps[i];
+ split_pipe->plane_res.mpcc_inst = pool->dpps[i]->inst;
+ split_pipe->pipe_idx = i;
+
+ split_pipe->stream = stream;
+ return i;
}
}
return -1;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
index 422fbf79da64..5403e9399a46 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
@@ -2113,15 +2113,6 @@ void dcn20_optimize_bandwidth(
if (hubbub->funcs->program_compbuf_size)
hubbub->funcs->program_compbuf_size(hubbub, context->bw_ctx.bw.dcn.compbuf_size_kb, true);
- if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
- dc_dmub_srv_p_state_delegate(dc,
- true, context);
- context->bw_ctx.bw.dcn.clk.p_state_change_support = true;
- dc->clk_mgr->clks.fw_based_mclk_switching = true;
- } else {
- dc->clk_mgr->clks.fw_based_mclk_switching = false;
- }
-
dc->clk_mgr->funcs->update_clocks(
dc->clk_mgr,
context,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
index 8263a07f265f..32121db2851e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
@@ -983,36 +983,13 @@ void dcn30_set_disp_pattern_generator(const struct dc *dc,
}
void dcn30_prepare_bandwidth(struct dc *dc,
- struct dc_state *context)
+ struct dc_state *context)
{
- bool p_state_change_support = context->bw_ctx.bw.dcn.clk.p_state_change_support;
- /* Any transition into an FPO config should disable MCLK switching first to avoid
- * driver and FW P-State synchronization issues.
- */
- if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dc->clk_mgr->clks.fw_based_mclk_switching) {
- dc->optimized_required = true;
- context->bw_ctx.bw.dcn.clk.p_state_change_support = false;
- }
-
if (dc->clk_mgr->dc_mode_softmax_enabled)
if (dc->clk_mgr->clks.dramclk_khz <= dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000 &&
context->bw_ctx.bw.dcn.clk.dramclk_khz > dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000)
dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz);
dcn20_prepare_bandwidth(dc, context);
- /*
- * enabled -> enabled: do not disable
- * enabled -> disabled: disable
- * disabled -> enabled: don't care
- * disabled -> disabled: don't care
- */
- if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching)
- dc_dmub_srv_p_state_delegate(dc, false, context);
-
- if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dc->clk_mgr->clks.fw_based_mclk_switching) {
- /* After disabling P-State, restore the original value to ensure we get the correct P-State
- * on the next optimize. */
- context->bw_ctx.bw.dcn.clk.p_state_change_support = p_state_change_support;
- }
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 47beb4ea779d..0c4c3208def1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -138,7 +138,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = {
.urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
.urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
- .pct_ideal_sdp_bw_after_urgent = 100.0,
+ .pct_ideal_sdp_bw_after_urgent = 90.0,
.pct_ideal_fabric_bw_after_urgent = 67.0,
.pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0,
.pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
index a131e30fd7d6..d471d58aba92 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
@@ -980,6 +980,11 @@ static bool detect_link_and_local_sink(struct dc_link *link,
(link->dpcd_caps.dongle_type !=
DISPLAY_DONGLE_DP_HDMI_CONVERTER))
converter_disable_audio = true;
+
+ /* limited link rate to HBR3 for DPIA until we implement USB4 V2 */
+ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
+ link->reported_link_cap.link_rate > LINK_RATE_HIGH3)
+ link->reported_link_cap.link_rate = LINK_RATE_HIGH3;
break;
}
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
index d4b7da526f0a..e8b2fc4002a5 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
@@ -359,5 +359,8 @@ bool link_validate_dpia_bandwidth(const struct dc_stream_state *stream, const un
link[i] = stream[i].link;
bw_needed[i] = dc_bandwidth_in_kbps_from_timing(&stream[i].timing);
}
+
+ ret = dpia_validate_usb4_bw(link, bw_needed, num_streams);
+
return ret;
}
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 58c2246918fd..f4f40459f22b 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -871,13 +871,11 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
}
if (ret == -ENOENT) {
size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf);
- if (size > 0) {
- size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf + size);
- size += amdgpu_dpm_print_clock_levels(adev, OD_VDDC_CURVE, buf + size);
- size += amdgpu_dpm_print_clock_levels(adev, OD_VDDGFX_OFFSET, buf + size);
- size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf + size);
- size += amdgpu_dpm_print_clock_levels(adev, OD_CCLK, buf + size);
- }
+ size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf + size);
+ size += amdgpu_dpm_print_clock_levels(adev, OD_VDDC_CURVE, buf + size);
+ size += amdgpu_dpm_print_clock_levels(adev, OD_VDDGFX_OFFSET, buf + size);
+ size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf + size);
+ size += amdgpu_dpm_print_clock_levels(adev, OD_CCLK, buf + size);
}
if (size == 0)
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
index d6d9e3b1b2c0..02e69ccff3ba 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
@@ -6925,23 +6925,6 @@ static int si_dpm_enable(struct amdgpu_device *adev)
return 0;
}
-static int si_set_temperature_range(struct amdgpu_device *adev)
-{
- int ret;
-
- ret = si_thermal_enable_alert(adev, false);
- if (ret)
- return ret;
- ret = si_thermal_set_temperature_range(adev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX);
- if (ret)
- return ret;
- ret = si_thermal_enable_alert(adev, true);
- if (ret)
- return ret;
-
- return ret;
-}
-
static void si_dpm_disable(struct amdgpu_device *adev)
{
struct rv7xx_power_info *pi = rv770_get_pi(adev);
@@ -7626,18 +7609,6 @@ static int si_dpm_process_interrupt(struct amdgpu_device *adev,
static int si_dpm_late_init(void *handle)
{
- int ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!adev->pm.dpm_enabled)
- return 0;
-
- ret = si_set_temperature_range(adev);
- if (ret)
- return ret;
-#if 0 //TODO ?
- si_dpm_powergate_uvd(adev, true);
-#endif
return 0;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 5633c5797e85..2ddf5198e5c4 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -733,6 +733,24 @@ static int smu_late_init(void *handle)
return ret;
}
+ /*
+ * Explicitly notify PMFW the power mode the system in. Since
+ * the PMFW may boot the ASIC with a different mode.
+ * For those supporting ACDC switch via gpio, PMFW will
+ * handle the switch automatically. Driver involvement
+ * is unnecessary.
+ */
+ if (!smu->dc_controlled_by_gpio) {
+ ret = smu_set_power_source(smu,
+ adev->pm.ac_power ? SMU_POWER_SOURCE_AC :
+ SMU_POWER_SOURCE_DC);
+ if (ret) {
+ dev_err(adev->dev, "Failed to switch to %s mode!\n",
+ adev->pm.ac_power ? "AC" : "DC");
+ return ret;
+ }
+ }
+
if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 1)) ||
(adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 3)))
return 0;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index c4000518dc56..275f708db636 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -3413,26 +3413,8 @@ static int navi10_post_smu_init(struct smu_context *smu)
return 0;
ret = navi10_run_umc_cdr_workaround(smu);
- if (ret) {
+ if (ret)
dev_err(adev->dev, "Failed to apply umc cdr workaround!\n");
- return ret;
- }
-
- if (!smu->dc_controlled_by_gpio) {
- /*
- * For Navi1X, manually switch it to AC mode as PMFW
- * may boot it with DC mode.
- */
- ret = smu_v11_0_set_power_source(smu,
- adev->pm.ac_power ?
- SMU_POWER_SOURCE_AC :
- SMU_POWER_SOURCE_DC);
- if (ret) {
- dev_err(adev->dev, "Failed to switch to %s mode!\n",
- adev->pm.ac_power ? "AC" : "DC");
- return ret;
- }
- }
return ret;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 75f18681e984..85d53597eb07 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -2067,33 +2067,94 @@ static int sienna_cichlid_display_disable_memory_clock_switch(struct smu_context
return ret;
}
+static void sienna_cichlid_get_override_pcie_settings(struct smu_context *smu,
+ uint32_t *gen_speed_override,
+ uint32_t *lane_width_override)
+{
+ struct amdgpu_device *adev = smu->adev;
+
+ *gen_speed_override = 0xff;
+ *lane_width_override = 0xff;
+
+ switch (adev->pdev->device) {
+ case 0x73A0:
+ case 0x73A1:
+ case 0x73A2:
+ case 0x73A3:
+ case 0x73AB:
+ case 0x73AE:
+ /* Bit 7:0: PCIE lane width, 1 to 7 corresponds is x1 to x32 */
+ *lane_width_override = 6;
+ break;
+ case 0x73E0:
+ case 0x73E1:
+ case 0x73E3:
+ *lane_width_override = 4;
+ break;
+ case 0x7420:
+ case 0x7421:
+ case 0x7422:
+ case 0x7423:
+ case 0x7424:
+ *lane_width_override = 3;
+ break;
+ default:
+ break;
+ }
+}
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
static int sienna_cichlid_update_pcie_parameters(struct smu_context *smu,
uint32_t pcie_gen_cap,
uint32_t pcie_width_cap)
{
struct smu_11_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
-
- uint32_t smu_pcie_arg;
+ struct smu_11_0_pcie_table *pcie_table = &dpm_context->dpm_tables.pcie_table;
+ uint32_t gen_speed_override, lane_width_override;
uint8_t *table_member1, *table_member2;
+ uint32_t min_gen_speed, max_gen_speed;
+ uint32_t min_lane_width, max_lane_width;
+ uint32_t smu_pcie_arg;
int ret, i;
GET_PPTABLE_MEMBER(PcieGenSpeed, &table_member1);
GET_PPTABLE_MEMBER(PcieLaneCount, &table_member2);
- /* lclk dpm table setup */
- for (i = 0; i < MAX_PCIE_CONF; i++) {
- dpm_context->dpm_tables.pcie_table.pcie_gen[i] = table_member1[i];
- dpm_context->dpm_tables.pcie_table.pcie_lane[i] = table_member2[i];
+ sienna_cichlid_get_override_pcie_settings(smu,
+ &gen_speed_override,
+ &lane_width_override);
+
+ /* PCIE gen speed override */
+ if (gen_speed_override != 0xff) {
+ min_gen_speed = MIN(pcie_gen_cap, gen_speed_override);
+ max_gen_speed = MIN(pcie_gen_cap, gen_speed_override);
+ } else {
+ min_gen_speed = MAX(0, table_member1[0]);
+ max_gen_speed = MIN(pcie_gen_cap, table_member1[1]);
+ min_gen_speed = min_gen_speed > max_gen_speed ?
+ max_gen_speed : min_gen_speed;
}
+ pcie_table->pcie_gen[0] = min_gen_speed;
+ pcie_table->pcie_gen[1] = max_gen_speed;
+
+ /* PCIE lane width override */
+ if (lane_width_override != 0xff) {
+ min_lane_width = MIN(pcie_width_cap, lane_width_override);
+ max_lane_width = MIN(pcie_width_cap, lane_width_override);
+ } else {
+ min_lane_width = MAX(1, table_member2[0]);
+ max_lane_width = MIN(pcie_width_cap, table_member2[1]);
+ min_lane_width = min_lane_width > max_lane_width ?
+ max_lane_width : min_lane_width;
+ }
+ pcie_table->pcie_lane[0] = min_lane_width;
+ pcie_table->pcie_lane[1] = max_lane_width;
for (i = 0; i < NUM_LINK_LEVELS; i++) {
- smu_pcie_arg = (i << 16) |
- ((table_member1[i] <= pcie_gen_cap) ?
- (table_member1[i] << 8) :
- (pcie_gen_cap << 8)) |
- ((table_member2[i] <= pcie_width_cap) ?
- table_member2[i] :
- pcie_width_cap);
+ smu_pcie_arg = (i << 16 |
+ pcie_table->pcie_gen[i] << 8 |
+ pcie_table->pcie_lane[i]);
ret = smu_cmn_send_smc_msg_with_param(smu,
SMU_MSG_OverridePcieParameters,
@@ -2101,11 +2162,6 @@ static int sienna_cichlid_update_pcie_parameters(struct smu_context *smu,
NULL);
if (ret)
return ret;
-
- if (table_member1[i] > pcie_gen_cap)
- dpm_context->dpm_tables.pcie_table.pcie_gen[i] = pcie_gen_cap;
- if (table_member2[i] > pcie_width_cap)
- dpm_context->dpm_tables.pcie_table.pcie_lane[i] = pcie_width_cap;
}
return 0;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index 7433dcaa16e0..067b4e0b026c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -582,7 +582,7 @@ static int vangogh_print_legacy_clk_levels(struct smu_context *smu,
DpmClocks_t *clk_table = smu->smu_table.clocks_table;
SmuMetrics_legacy_t metrics;
struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
- int i, size = 0, ret = 0;
+ int i, idx, size = 0, ret = 0;
uint32_t cur_value = 0, value = 0, count = 0;
bool cur_value_match_level = false;
@@ -656,7 +656,8 @@ static int vangogh_print_legacy_clk_levels(struct smu_context *smu,
case SMU_MCLK:
case SMU_FCLK:
for (i = 0; i < count; i++) {
- ret = vangogh_get_dpm_clk_limited(smu, clk_type, i, &value);
+ idx = (clk_type == SMU_FCLK || clk_type == SMU_MCLK) ? (count - i - 1) : i;
+ ret = vangogh_get_dpm_clk_limited(smu, clk_type, idx, &value);
if (ret)
return ret;
if (!value)
@@ -683,7 +684,7 @@ static int vangogh_print_clk_levels(struct smu_context *smu,
DpmClocks_t *clk_table = smu->smu_table.clocks_table;
SmuMetrics_t metrics;
struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
- int i, size = 0, ret = 0;
+ int i, idx, size = 0, ret = 0;
uint32_t cur_value = 0, value = 0, count = 0;
bool cur_value_match_level = false;
uint32_t min, max;
@@ -765,7 +766,8 @@ static int vangogh_print_clk_levels(struct smu_context *smu,
case SMU_MCLK:
case SMU_FCLK:
for (i = 0; i < count; i++) {
- ret = vangogh_get_dpm_clk_limited(smu, clk_type, i, &value);
+ idx = (clk_type == SMU_FCLK || clk_type == SMU_MCLK) ? (count - i - 1) : i;
+ ret = vangogh_get_dpm_clk_limited(smu, clk_type, idx, &value);
if (ret)
return ret;
if (!value)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
index 5cdc07165480..8a8ba25c9ad7 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
@@ -494,7 +494,7 @@ static int renoir_set_fine_grain_gfx_freq_parameters(struct smu_context *smu)
static int renoir_print_clk_levels(struct smu_context *smu,
enum smu_clk_type clk_type, char *buf)
{
- int i, size = 0, ret = 0;
+ int i, idx, size = 0, ret = 0;
uint32_t cur_value = 0, value = 0, count = 0, min = 0, max = 0;
SmuMetrics_t metrics;
struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
@@ -594,7 +594,8 @@ static int renoir_print_clk_levels(struct smu_context *smu,
case SMU_VCLK:
case SMU_DCLK:
for (i = 0; i < count; i++) {
- ret = renoir_get_dpm_clk_limited(smu, clk_type, i, &value);
+ idx = (clk_type == SMU_FCLK || clk_type == SMU_MCLK) ? (count - i - 1) : i;
+ ret = renoir_get_dpm_clk_limited(smu, clk_type, idx, &value);
if (ret)
return ret;
if (!value)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index 393c6a7b9609..ca379181081c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -573,11 +573,11 @@ int smu_v13_0_init_power(struct smu_context *smu)
if (smu_power->power_context || smu_power->power_context_size != 0)
return -EINVAL;
- smu_power->power_context = kzalloc(sizeof(struct smu_13_0_dpm_context),
+ smu_power->power_context = kzalloc(sizeof(struct smu_13_0_power_context),
GFP_KERNEL);
if (!smu_power->power_context)
return -ENOMEM;
- smu_power->power_context_size = sizeof(struct smu_13_0_dpm_context);
+ smu_power->power_context_size = sizeof(struct smu_13_0_power_context);
return 0;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 09405ef1e3c8..08577d1b84ec 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -1696,10 +1696,39 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
}
}
- /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
- workload_type = smu_cmn_to_asic_specific_index(smu,
+ if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE &&
+ (((smu->adev->pdev->device == 0x744C) && (smu->adev->pdev->revision == 0xC8)) ||
+ ((smu->adev->pdev->device == 0x744C) && (smu->adev->pdev->revision == 0xCC)))) {
+ ret = smu_cmn_update_table(smu,
+ SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+ WORKLOAD_PPLIB_COMPUTE_BIT,
+ (void *)(&activity_monitor_external),
+ false);
+ if (ret) {
+ dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
+ return ret;
+ }
+
+ ret = smu_cmn_update_table(smu,
+ SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+ WORKLOAD_PPLIB_CUSTOM_BIT,
+ (void *)(&activity_monitor_external),
+ true);
+ if (ret) {
+ dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
+ return ret;
+ }
+
+ workload_type = smu_cmn_to_asic_specific_index(smu,
+ CMN2ASIC_MAPPING_WORKLOAD,
+ PP_SMC_POWER_PROFILE_CUSTOM);
+ } else {
+ /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
+ workload_type = smu_cmn_to_asic_specific_index(smu,
CMN2ASIC_MAPPING_WORKLOAD,
smu->power_profile_mode);
+ }
+
if (workload_type < 0)
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
index 8fa9a36c38b6..6d9760eac16d 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
@@ -478,7 +478,7 @@ static int smu_v13_0_4_get_dpm_level_count(struct smu_context *smu,
static int smu_v13_0_4_print_clk_levels(struct smu_context *smu,
enum smu_clk_type clk_type, char *buf)
{
- int i, size = 0, ret = 0;
+ int i, idx, size = 0, ret = 0;
uint32_t cur_value = 0, value = 0, count = 0;
uint32_t min, max;
@@ -512,7 +512,8 @@ static int smu_v13_0_4_print_clk_levels(struct smu_context *smu,
break;
for (i = 0; i < count; i++) {
- ret = smu_v13_0_4_get_dpm_freq_by_index(smu, clk_type, i, &value);
+ idx = (clk_type == SMU_FCLK || clk_type == SMU_MCLK) ? (count - i - 1) : i;
+ ret = smu_v13_0_4_get_dpm_freq_by_index(smu, clk_type, idx, &value);
if (ret)
break;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c
index 66445964efbd..0081fa607e02 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c
@@ -866,7 +866,7 @@ out:
static int smu_v13_0_5_print_clk_levels(struct smu_context *smu,
enum smu_clk_type clk_type, char *buf)
{
- int i, size = 0, ret = 0;
+ int i, idx, size = 0, ret = 0;
uint32_t cur_value = 0, value = 0, count = 0;
uint32_t min = 0, max = 0;
@@ -898,7 +898,8 @@ static int smu_v13_0_5_print_clk_levels(struct smu_context *smu,
goto print_clk_out;
for (i = 0; i < count; i++) {
- ret = smu_v13_0_5_get_dpm_freq_by_index(smu, clk_type, i, &value);
+ idx = (clk_type == SMU_MCLK) ? (count - i - 1) : i;
+ ret = smu_v13_0_5_get_dpm_freq_by_index(smu, clk_type, idx, &value);
if (ret)
goto print_clk_out;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 3d9ff46706fb..bba621615abf 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -125,6 +125,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0),
MSG_MAP(AllowGpo, PPSMC_MSG_SetGpoAllow, 0),
MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 0),
+ MSG_MAP(NotifyPowerSource, PPSMC_MSG_NotifyPowerSource, 0),
};
static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = {
@@ -1770,6 +1771,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
.enable_mgpu_fan_boost = smu_v13_0_7_enable_mgpu_fan_boost,
.get_power_limit = smu_v13_0_7_get_power_limit,
.set_power_limit = smu_v13_0_set_power_limit,
+ .set_power_source = smu_v13_0_set_power_source,
.get_power_profile_mode = smu_v13_0_7_get_power_profile_mode,
.set_power_profile_mode = smu_v13_0_7_set_power_profile_mode,
.set_tool_table_location = smu_v13_0_set_tool_table_location,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
index 04e56b0b3033..798f36cfcebd 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
@@ -1000,7 +1000,7 @@ out:
static int yellow_carp_print_clk_levels(struct smu_context *smu,
enum smu_clk_type clk_type, char *buf)
{
- int i, size = 0, ret = 0;
+ int i, idx, size = 0, ret = 0;
uint32_t cur_value = 0, value = 0, count = 0;
uint32_t min, max;
@@ -1033,7 +1033,8 @@ static int yellow_carp_print_clk_levels(struct smu_context *smu,
goto print_clk_out;
for (i = 0; i < count; i++) {
- ret = yellow_carp_get_dpm_freq_by_index(smu, clk_type, i, &value);
+ idx = (clk_type == SMU_FCLK || clk_type == SMU_MCLK) ? (count - i - 1) : i;
+ ret = yellow_carp_get_dpm_freq_by_index(smu, clk_type, idx, &value);
if (ret)
goto print_clk_out;
diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c
index fbb070f63e36..6dc1a09504e1 100644
--- a/drivers/gpu/drm/ast/ast_dp.c
+++ b/drivers/gpu/drm/ast/ast_dp.c
@@ -119,53 +119,32 @@ err_astdp_edid_not_ready:
/*
* Launch Aspeed DP
*/
-void ast_dp_launch(struct drm_device *dev, u8 bPower)
+void ast_dp_launch(struct drm_device *dev)
{
- u32 i = 0, j = 0, WaitCount = 1;
- u8 bDPTX = 0;
+ u32 i = 0;
u8 bDPExecute = 1;
-
struct ast_device *ast = to_ast_device(dev);
- // S3 come back, need more time to wait BMC ready.
- if (bPower)
- WaitCount = 300;
-
-
- // Wait total count by different condition.
- for (j = 0; j < WaitCount; j++) {
- bDPTX = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xD1, TX_TYPE_MASK);
-
- if (bDPTX)
- break;
+ // Wait one second then timeout.
+ while (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xD1, ASTDP_MCU_FW_EXECUTING) !=
+ ASTDP_MCU_FW_EXECUTING) {
+ i++;
+ // wait 100 ms
msleep(100);
- }
- // 0xE : ASTDP with DPMCU FW handling
- if (bDPTX == ASTDP_DPMCU_TX) {
- // Wait one second then timeout.
- i = 0;
-
- while (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xD1, COPROCESSOR_LAUNCH) !=
- COPROCESSOR_LAUNCH) {
- i++;
- // wait 100 ms
- msleep(100);
-
- if (i >= 10) {
- // DP would not be ready.
- bDPExecute = 0;
- break;
- }
+ if (i >= 10) {
+ // DP would not be ready.
+ bDPExecute = 0;
+ break;
}
+ }
- if (bDPExecute)
- ast->tx_chip_types |= BIT(AST_TX_ASTDP);
+ if (!bDPExecute)
+ drm_err(dev, "Wait DPMCU executing timeout\n");
- ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xE5,
- (u8) ~ASTDP_HOST_EDID_READ_DONE_MASK,
- ASTDP_HOST_EDID_READ_DONE);
- }
+ ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xE5,
+ (u8) ~ASTDP_HOST_EDID_READ_DONE_MASK,
+ ASTDP_HOST_EDID_READ_DONE);
}
diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h
index a501169cddad..5498a6676f2e 100644
--- a/drivers/gpu/drm/ast/ast_drv.h
+++ b/drivers/gpu/drm/ast/ast_drv.h
@@ -350,9 +350,6 @@ int ast_mode_config_init(struct ast_device *ast);
#define AST_DP501_LINKRATE 0xf014
#define AST_DP501_EDID_DATA 0xf020
-/* Define for Soc scratched reg */
-#define COPROCESSOR_LAUNCH BIT(5)
-
/*
* Display Transmitter Type:
*/
@@ -480,7 +477,7 @@ struct ast_i2c_chan *ast_i2c_create(struct drm_device *dev);
/* aspeed DP */
int ast_astdp_read_edid(struct drm_device *dev, u8 *ediddata);
-void ast_dp_launch(struct drm_device *dev, u8 bPower);
+void ast_dp_launch(struct drm_device *dev);
void ast_dp_power_on_off(struct drm_device *dev, bool no);
void ast_dp_set_on_off(struct drm_device *dev, bool no);
void ast_dp_set_mode(struct drm_crtc *crtc, struct ast_vbios_mode_info *vbios_mode);
diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index f32ce29edba7..1f35438f614a 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -254,8 +254,13 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post)
case 0x0c:
ast->tx_chip_types = AST_TX_DP501_BIT;
}
- } else if (ast->chip == AST2600)
- ast_dp_launch(&ast->base, 0);
+ } else if (ast->chip == AST2600) {
+ if (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xD1, TX_TYPE_MASK) ==
+ ASTDP_DPMCU_TX) {
+ ast->tx_chip_types = AST_TX_ASTDP_BIT;
+ ast_dp_launch(&ast->base);
+ }
+ }
/* Print stuff for diagnostic purposes */
if (ast->tx_chip_types & AST_TX_NONE_BIT)
@@ -264,6 +269,8 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post)
drm_info(dev, "Using Sil164 TMDS transmitter\n");
if (ast->tx_chip_types & AST_TX_DP501_BIT)
drm_info(dev, "Using DP501 DisplayPort transmitter\n");
+ if (ast->tx_chip_types & AST_TX_ASTDP_BIT)
+ drm_info(dev, "Using ASPEED DisplayPort transmitter\n");
return 0;
}
diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index 36374828f6c8..b3c670af6ef2 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c
@@ -1647,6 +1647,8 @@ static int ast_dp501_output_init(struct ast_device *ast)
static int ast_astdp_connector_helper_get_modes(struct drm_connector *connector)
{
void *edid;
+ struct drm_device *dev = connector->dev;
+ struct ast_device *ast = to_ast_device(dev);
int succ;
int count;
@@ -1655,9 +1657,17 @@ static int ast_astdp_connector_helper_get_modes(struct drm_connector *connector)
if (!edid)
goto err_drm_connector_update_edid_property;
+ /*
+ * Protect access to I/O registers from concurrent modesetting
+ * by acquiring the I/O-register lock.
+ */
+ mutex_lock(&ast->ioregs_lock);
+
succ = ast_astdp_read_edid(connector->dev, edid);
if (succ < 0)
- goto err_kfree;
+ goto err_mutex_unlock;
+
+ mutex_unlock(&ast->ioregs_lock);
drm_connector_update_edid_property(connector, edid);
count = drm_add_edid_modes(connector, edid);
@@ -1665,7 +1675,8 @@ static int ast_astdp_connector_helper_get_modes(struct drm_connector *connector)
return count;
-err_kfree:
+err_mutex_unlock:
+ mutex_unlock(&ast->ioregs_lock);
kfree(edid);
err_drm_connector_update_edid_property:
drm_connector_update_edid_property(connector, NULL);
diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c
index 71bb36b865fd..a005aec18a02 100644
--- a/drivers/gpu/drm/ast/ast_post.c
+++ b/drivers/gpu/drm/ast/ast_post.c
@@ -380,7 +380,8 @@ void ast_post_gpu(struct drm_device *dev)
ast_set_def_ext_reg(dev);
if (ast->chip == AST2600) {
- ast_dp_launch(dev, 1);
+ if (ast->tx_chip_types & AST_TX_ASTDP_BIT)
+ ast_dp_launch(dev);
} else if (ast->config_mode == ast_use_p2a) {
if (ast->chip == AST2500)
ast_post_chip_2500(dev);
diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
index 7a748785c545..4676cf2900df 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
@@ -298,6 +298,10 @@ static void ti_sn_bridge_set_refclk_freq(struct ti_sn65dsi86 *pdata)
if (refclk_lut[i] == refclk_rate)
break;
+ /* avoid buffer overflow and "1" is the default rate in the datasheet. */
+ if (i >= refclk_lut_size)
+ i = 1;
+
regmap_update_bits(pdata->regmap, SN_DPPLL_SRC_REG, REFCLK_FREQ_MASK,
REFCLK_FREQ(i));
diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
index 38dab76ae69e..e2e21ce79510 100644
--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
@@ -3404,7 +3404,7 @@ int drm_dp_add_payload_part2(struct drm_dp_mst_topology_mgr *mgr,
/* Skip failed payloads */
if (payload->vc_start_slot == -1) {
- drm_dbg_kms(state->dev, "Part 1 of payload creation for %s failed, skipping part 2\n",
+ drm_dbg_kms(mgr->dev, "Part 1 of payload creation for %s failed, skipping part 2\n",
payload->port->connector->name);
return -EIO;
}
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 6bb1b8b27d7a..fd27f1978635 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -1545,17 +1545,19 @@ static void drm_fb_helper_fill_pixel_fmt(struct fb_var_screeninfo *var,
}
}
-static void __fill_var(struct fb_var_screeninfo *var,
+static void __fill_var(struct fb_var_screeninfo *var, struct fb_info *info,
struct drm_framebuffer *fb)
{
int i;
var->xres_virtual = fb->width;
var->yres_virtual = fb->height;
- var->accel_flags = FB_ACCELF_TEXT;
+ var->accel_flags = 0;
var->bits_per_pixel = drm_format_info_bpp(fb->format, 0);
- var->height = var->width = 0;
+ var->height = info->var.height;
+ var->width = info->var.width;
+
var->left_margin = var->right_margin = 0;
var->upper_margin = var->lower_margin = 0;
var->hsync_len = var->vsync_len = 0;
@@ -1618,7 +1620,7 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
return -EINVAL;
}
- __fill_var(var, fb);
+ __fill_var(var, info, fb);
/*
* fb_pan_display() validates this, but fb_set_par() doesn't and just
@@ -2074,7 +2076,7 @@ static void drm_fb_helper_fill_var(struct fb_info *info,
info->pseudo_palette = fb_helper->pseudo_palette;
info->var.xoffset = 0;
info->var.yoffset = 0;
- __fill_var(&info->var, fb);
+ __fill_var(&info->var, info, fb);
info->var.activate = FB_ACTIVATE_NOW;
drm_fb_helper_fill_pixel_fmt(&info->var, format);
diff --git a/drivers/gpu/drm/drm_managed.c b/drivers/gpu/drm/drm_managed.c
index 4cf214de50c4..c21c3f623033 100644
--- a/drivers/gpu/drm/drm_managed.c
+++ b/drivers/gpu/drm/drm_managed.c
@@ -264,28 +264,10 @@ void drmm_kfree(struct drm_device *dev, void *data)
}
EXPORT_SYMBOL(drmm_kfree);
-static void drmm_mutex_release(struct drm_device *dev, void *res)
+void __drmm_mutex_release(struct drm_device *dev, void *res)
{
struct mutex *lock = res;
mutex_destroy(lock);
}
-
-/**
- * drmm_mutex_init - &drm_device-managed mutex_init()
- * @dev: DRM device
- * @lock: lock to be initialized
- *
- * Returns:
- * 0 on success, or a negative errno code otherwise.
- *
- * This is a &drm_device-managed version of mutex_init(). The initialized
- * lock is automatically destroyed on the final drm_dev_put().
- */
-int drmm_mutex_init(struct drm_device *dev, struct mutex *lock)
-{
- mutex_init(lock);
-
- return drmm_add_action_or_reset(dev, drmm_mutex_release, lock);
-}
-EXPORT_SYMBOL(drmm_mutex_init);
+EXPORT_SYMBOL(__drmm_mutex_release);
diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c
index b1a38e6ce2f8..0cb646cb04ee 100644
--- a/drivers/gpu/drm/drm_panel_orientation_quirks.c
+++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c
@@ -179,7 +179,7 @@ static const struct dmi_system_id orientation_data[] = {
}, { /* AYA NEO AIR */
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "AYANEO"),
- DMI_MATCH(DMI_BOARD_NAME, "AIR"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "AIR"),
},
.driver_data = (void *)&lcd1080x1920_leftside_up,
}, { /* AYA NEO NEXT */
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index ec784e58da5c..414e585ec7dd 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -1335,7 +1335,7 @@ int exynos_g2d_exec_ioctl(struct drm_device *drm_dev, void *data,
/* Let the runqueue know that there is work to do. */
queue_work(g2d->g2d_workq, &g2d->runqueue_work);
- if (runqueue_node->async)
+ if (req->async)
goto out;
wait_for_completion(&runqueue_node->complete);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.h b/drivers/gpu/drm/exynos/exynos_drm_g2d.h
index 74ea3c26dead..1a5ae781b56c 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.h
@@ -34,11 +34,11 @@ static inline int exynos_g2d_exec_ioctl(struct drm_device *dev, void *data,
return -ENODEV;
}
-int g2d_open(struct drm_device *drm_dev, struct drm_file *file)
+static inline int g2d_open(struct drm_device *drm_dev, struct drm_file *file)
{
return 0;
}
-void g2d_close(struct drm_device *drm_dev, struct drm_file *file)
+static inline void g2d_close(struct drm_device *drm_dev, struct drm_file *file)
{ }
#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
index 4d56c8c799c5..f5e1adfcaa51 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
@@ -469,8 +469,6 @@ static int vidi_remove(struct platform_device *pdev)
if (ctx->raw_edid != (struct edid *)fake_edid_info) {
kfree(ctx->raw_edid);
ctx->raw_edid = NULL;
-
- return -EINVAL;
}
component_del(&pdev->dev, &vidi_component_ops);
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index 084a483f9776..2aaaba090cc0 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -1453,6 +1453,18 @@ static u8 tgl_calc_voltage_level(int cdclk)
return 0;
}
+static u8 rplu_calc_voltage_level(int cdclk)
+{
+ if (cdclk > 556800)
+ return 3;
+ else if (cdclk > 480000)
+ return 2;
+ else if (cdclk > 312000)
+ return 1;
+ else
+ return 0;
+}
+
static void icl_readout_refclk(struct drm_i915_private *dev_priv,
struct intel_cdclk_config *cdclk_config)
{
@@ -3242,6 +3254,13 @@ static const struct intel_cdclk_funcs mtl_cdclk_funcs = {
.calc_voltage_level = tgl_calc_voltage_level,
};
+static const struct intel_cdclk_funcs rplu_cdclk_funcs = {
+ .get_cdclk = bxt_get_cdclk,
+ .set_cdclk = bxt_set_cdclk,
+ .modeset_calc_cdclk = bxt_modeset_calc_cdclk,
+ .calc_voltage_level = rplu_calc_voltage_level,
+};
+
static const struct intel_cdclk_funcs tgl_cdclk_funcs = {
.get_cdclk = bxt_get_cdclk,
.set_cdclk = bxt_set_cdclk,
@@ -3384,14 +3403,17 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv)
dev_priv->display.funcs.cdclk = &tgl_cdclk_funcs;
dev_priv->display.cdclk.table = dg2_cdclk_table;
} else if (IS_ALDERLAKE_P(dev_priv)) {
- dev_priv->display.funcs.cdclk = &tgl_cdclk_funcs;
/* Wa_22011320316:adl-p[a0] */
- if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0))
+ if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) {
dev_priv->display.cdclk.table = adlp_a_step_cdclk_table;
- else if (IS_ADLP_RPLU(dev_priv))
+ dev_priv->display.funcs.cdclk = &tgl_cdclk_funcs;
+ } else if (IS_ADLP_RPLU(dev_priv)) {
dev_priv->display.cdclk.table = rplu_cdclk_table;
- else
+ dev_priv->display.funcs.cdclk = &rplu_cdclk_funcs;
+ } else {
dev_priv->display.cdclk.table = adlp_cdclk_table;
+ dev_priv->display.funcs.cdclk = &tgl_cdclk_funcs;
+ }
} else if (IS_ROCKETLAKE(dev_priv)) {
dev_priv->display.funcs.cdclk = &tgl_cdclk_funcs;
dev_priv->display.cdclk.table = rkl_cdclk_table;
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 3c29792137a5..0aae9a1eb3d5 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -1851,9 +1851,17 @@ static void hsw_crtc_disable(struct intel_atomic_state *state,
intel_disable_shared_dpll(old_crtc_state);
- intel_encoders_post_pll_disable(state, crtc);
+ if (!intel_crtc_is_bigjoiner_slave(old_crtc_state)) {
+ struct intel_crtc *slave_crtc;
+
+ intel_encoders_post_pll_disable(state, crtc);
- intel_dmc_disable_pipe(i915, crtc->pipe);
+ intel_dmc_disable_pipe(i915, crtc->pipe);
+
+ for_each_intel_crtc_in_pipe_mask(&i915->drm, slave_crtc,
+ intel_crtc_bigjoiner_slave_pipes(old_crtc_state))
+ intel_dmc_disable_pipe(i915, slave_crtc->pipe);
+ }
}
static void i9xx_pfit_enable(const struct intel_crtc_state *crtc_state)
diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c
index 705915d50565..524bd6da260c 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_aux.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c
@@ -129,7 +129,7 @@ static int intel_dp_aux_sync_len(void)
static int intel_dp_aux_fw_sync_len(void)
{
- int precharge = 16; /* 10-16 */
+ int precharge = 10; /* 10-16 */
int preamble = 8;
return precharge + preamble;
diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c
index 650232c4892b..b183efab04a1 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp.c
+++ b/drivers/gpu/drm/i915/display/intel_hdcp.c
@@ -204,8 +204,6 @@ bool intel_hdcp2_capable(struct intel_connector *connector)
struct intel_digital_port *dig_port = intel_attached_dig_port(connector);
struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
struct intel_hdcp *hdcp = &connector->hdcp;
- struct intel_gt *gt = dev_priv->media_gt;
- struct intel_gsc_uc *gsc = &gt->uc.gsc;
bool capable = false;
/* I915 support for HDCP2.2 */
@@ -213,9 +211,13 @@ bool intel_hdcp2_capable(struct intel_connector *connector)
return false;
/* If MTL+ make sure gsc is loaded and proxy is setup */
- if (intel_hdcp_gsc_cs_required(dev_priv))
- if (!intel_uc_fw_is_running(&gsc->fw))
+ if (intel_hdcp_gsc_cs_required(dev_priv)) {
+ struct intel_gt *gt = dev_priv->media_gt;
+ struct intel_gsc_uc *gsc = gt ? &gt->uc.gsc : NULL;
+
+ if (!gsc || !intel_uc_fw_is_running(&gsc->fw))
return false;
+ }
/* MEI/GSC interface is solid depending on which is used */
mutex_lock(&dev_priv->display.hdcp.comp_mutex);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index a81fa6a20f5a..7b516b1a4915 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -346,8 +346,10 @@ static int live_parallel_switch(void *arg)
continue;
ce = intel_context_create(data[m].ce[0]->engine);
- if (IS_ERR(ce))
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
goto out;
+ }
err = intel_context_pin(ce);
if (err) {
@@ -367,8 +369,10 @@ static int live_parallel_switch(void *arg)
worker = kthread_create_worker(0, "igt/parallel:%s",
data[n].ce[0]->engine->name);
- if (IS_ERR(worker))
+ if (IS_ERR(worker)) {
+ err = PTR_ERR(worker);
goto out;
+ }
data[n].worker = worker;
}
@@ -397,8 +401,10 @@ static int live_parallel_switch(void *arg)
}
}
- if (igt_live_test_end(&t))
- err = -EIO;
+ if (igt_live_test_end(&t)) {
+ err = err ?: -EIO;
+ break;
+ }
}
out:
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 736b89a8ecf5..4202df5b8c12 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -1530,8 +1530,8 @@ static int live_busywait_preempt(void *arg)
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
enum intel_engine_id id;
- int err = -ENOMEM;
u32 *map;
+ int err;
/*
* Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
@@ -1539,13 +1539,17 @@ static int live_busywait_preempt(void *arg)
*/
ctx_hi = kernel_context(gt->i915, NULL);
- if (!ctx_hi)
- return -ENOMEM;
+ if (IS_ERR(ctx_hi))
+ return PTR_ERR(ctx_hi);
+
ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
ctx_lo = kernel_context(gt->i915, NULL);
- if (!ctx_lo)
+ if (IS_ERR(ctx_lo)) {
+ err = PTR_ERR(ctx_lo);
goto err_ctx_hi;
+ }
+
ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 050b8ae7b8e7..3035cba2c6a2 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -877,12 +877,17 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
stream->oa_buffer.last_ctx_id = ctx_id;
}
- /*
- * Clear out the report id and timestamp as a means to detect unlanded
- * reports.
- */
- oa_report_id_clear(stream, report32);
- oa_timestamp_clear(stream, report32);
+ if (is_power_of_2(report_size)) {
+ /*
+ * Clear out the report id and timestamp as a means
+ * to detect unlanded reports.
+ */
+ oa_report_id_clear(stream, report32);
+ oa_timestamp_clear(stream, report32);
+ } else {
+ /* Zero out the entire report */
+ memset(report32, 0, report_size);
+ }
}
if (start_offset != *offset) {
diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
index ff003403fbbc..ffd91a5ee299 100644
--- a/drivers/gpu/drm/lima/lima_sched.c
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -165,7 +165,7 @@ int lima_sched_context_init(struct lima_sched_pipe *pipe,
void lima_sched_context_fini(struct lima_sched_pipe *pipe,
struct lima_sched_context *context)
{
- drm_sched_entity_fini(&context->base);
+ drm_sched_entity_destroy(&context->base);
}
struct dma_fence *lima_sched_context_queue_task(struct lima_sched_task *task)
diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
index 0f2dd26755df..af3ce5a6a636 100644
--- a/drivers/gpu/drm/mgag200/mgag200_mode.c
+++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
@@ -642,6 +642,11 @@ void mgag200_crtc_helper_atomic_enable(struct drm_crtc *crtc, struct drm_atomic_
if (funcs->pixpllc_atomic_update)
funcs->pixpllc_atomic_update(crtc, old_state);
+ if (crtc_state->gamma_lut)
+ mgag200_crtc_set_gamma(mdev, format, crtc_state->gamma_lut->data);
+ else
+ mgag200_crtc_set_gamma_linear(mdev, format);
+
mgag200_enable_display(mdev);
if (funcs->enable_vidrst)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index e16b4b3f8535..8914992378f2 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -1526,8 +1526,6 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
if (!pdev)
return -ENODEV;
- mutex_init(&gmu->lock);
-
gmu->dev = &pdev->dev;
of_dma_configure(gmu->dev, node, true);
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 9fb214f150dd..52da3795b175 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1981,6 +1981,8 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
adreno_gpu = &a6xx_gpu->base;
gpu = &adreno_gpu->base;
+ mutex_init(&a6xx_gpu->gmu.lock);
+
adreno_gpu->registers = NULL;
/*
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h
index 2b3ae84057df..bdcd554fc8a8 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_0_msm8998.h
@@ -98,17 +98,17 @@ static const struct dpu_sspp_cfg msm8998_sspp[] = {
static const struct dpu_lm_cfg msm8998_lm[] = {
LM_BLK("lm_0", LM_0, 0x44000, MIXER_MSM8998_MASK,
- &msm8998_lm_sblk, PINGPONG_0, LM_2, DSPP_0),
+ &msm8998_lm_sblk, PINGPONG_0, LM_1, DSPP_0),
LM_BLK("lm_1", LM_1, 0x45000, MIXER_MSM8998_MASK,
- &msm8998_lm_sblk, PINGPONG_1, LM_5, DSPP_1),
+ &msm8998_lm_sblk, PINGPONG_1, LM_0, DSPP_1),
LM_BLK("lm_2", LM_2, 0x46000, MIXER_MSM8998_MASK,
- &msm8998_lm_sblk, PINGPONG_2, LM_0, 0),
+ &msm8998_lm_sblk, PINGPONG_2, LM_5, 0),
LM_BLK("lm_3", LM_3, 0x47000, MIXER_MSM8998_MASK,
&msm8998_lm_sblk, PINGPONG_MAX, 0, 0),
LM_BLK("lm_4", LM_4, 0x48000, MIXER_MSM8998_MASK,
&msm8998_lm_sblk, PINGPONG_MAX, 0, 0),
LM_BLK("lm_5", LM_5, 0x49000, MIXER_MSM8998_MASK,
- &msm8998_lm_sblk, PINGPONG_3, LM_1, 0),
+ &msm8998_lm_sblk, PINGPONG_3, LM_2, 0),
};
static const struct dpu_pingpong_cfg msm8998_pp[] = {
@@ -134,10 +134,10 @@ static const struct dpu_dspp_cfg msm8998_dspp[] = {
};
static const struct dpu_intf_cfg msm8998_intf[] = {
- INTF_BLK("intf_0", INTF_0, 0x6a000, 0x280, INTF_DP, 0, 25, INTF_SDM845_MASK, MDP_SSPP_TOP0_INTR, 24, 25),
- INTF_BLK("intf_1", INTF_1, 0x6a800, 0x280, INTF_DSI, 0, 25, INTF_SDM845_MASK, MDP_SSPP_TOP0_INTR, 26, 27),
- INTF_BLK("intf_2", INTF_2, 0x6b000, 0x280, INTF_DSI, 1, 25, INTF_SDM845_MASK, MDP_SSPP_TOP0_INTR, 28, 29),
- INTF_BLK("intf_3", INTF_3, 0x6b800, 0x280, INTF_HDMI, 0, 25, INTF_SDM845_MASK, MDP_SSPP_TOP0_INTR, 30, 31),
+ INTF_BLK("intf_0", INTF_0, 0x6a000, 0x280, INTF_DP, 0, 21, INTF_SDM845_MASK, MDP_SSPP_TOP0_INTR, 24, 25),
+ INTF_BLK("intf_1", INTF_1, 0x6a800, 0x280, INTF_DSI, 0, 21, INTF_SDM845_MASK, MDP_SSPP_TOP0_INTR, 26, 27),
+ INTF_BLK("intf_2", INTF_2, 0x6b000, 0x280, INTF_DSI, 1, 21, INTF_SDM845_MASK, MDP_SSPP_TOP0_INTR, 28, 29),
+ INTF_BLK("intf_3", INTF_3, 0x6b800, 0x280, INTF_HDMI, 0, 21, INTF_SDM845_MASK, MDP_SSPP_TOP0_INTR, 30, 31),
};
static const struct dpu_perf_cfg msm8998_perf_data = {
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h
index 282d410269ff..42b0e58624d0 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h
@@ -128,10 +128,10 @@ static const struct dpu_dspp_cfg sm8150_dspp[] = {
};
static const struct dpu_pingpong_cfg sm8150_pp[] = {
- PP_BLK_TE("pingpong_0", PINGPONG_0, 0x70000, MERGE_3D_0, sdm845_pp_sblk_te,
+ PP_BLK("pingpong_0", PINGPONG_0, 0x70000, MERGE_3D_0, sdm845_pp_sblk,
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 8),
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 12)),
- PP_BLK_TE("pingpong_1", PINGPONG_1, 0x70800, MERGE_3D_0, sdm845_pp_sblk_te,
+ PP_BLK("pingpong_1", PINGPONG_1, 0x70800, MERGE_3D_0, sdm845_pp_sblk,
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 9),
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 13)),
PP_BLK("pingpong_2", PINGPONG_2, 0x71000, MERGE_3D_1, sdm845_pp_sblk,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h
index c57400265f28..e3bdfe7b30f1 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h
@@ -116,10 +116,10 @@ static const struct dpu_lm_cfg sc8180x_lm[] = {
};
static const struct dpu_pingpong_cfg sc8180x_pp[] = {
- PP_BLK_TE("pingpong_0", PINGPONG_0, 0x70000, MERGE_3D_0, sdm845_pp_sblk_te,
+ PP_BLK("pingpong_0", PINGPONG_0, 0x70000, MERGE_3D_0, sdm845_pp_sblk,
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 8),
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 12)),
- PP_BLK_TE("pingpong_1", PINGPONG_1, 0x70800, MERGE_3D_0, sdm845_pp_sblk_te,
+ PP_BLK("pingpong_1", PINGPONG_1, 0x70800, MERGE_3D_0, sdm845_pp_sblk,
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 9),
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 13)),
PP_BLK("pingpong_2", PINGPONG_2, 0x71000, MERGE_3D_1, sdm845_pp_sblk,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h
index 2c40229ea515..ed130582873c 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_0_sm8250.h
@@ -129,10 +129,10 @@ static const struct dpu_dspp_cfg sm8250_dspp[] = {
};
static const struct dpu_pingpong_cfg sm8250_pp[] = {
- PP_BLK_TE("pingpong_0", PINGPONG_0, 0x70000, MERGE_3D_0, sdm845_pp_sblk_te,
+ PP_BLK("pingpong_0", PINGPONG_0, 0x70000, MERGE_3D_0, sdm845_pp_sblk,
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 8),
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 12)),
- PP_BLK_TE("pingpong_1", PINGPONG_1, 0x70800, MERGE_3D_0, sdm845_pp_sblk_te,
+ PP_BLK("pingpong_1", PINGPONG_1, 0x70800, MERGE_3D_0, sdm845_pp_sblk,
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 9),
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 13)),
PP_BLK("pingpong_2", PINGPONG_2, 0x71000, MERGE_3D_1, sdm845_pp_sblk,
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h
index 8799ed757119..a46b11730a4d 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_2_sc7180.h
@@ -80,8 +80,8 @@ static const struct dpu_dspp_cfg sc7180_dspp[] = {
};
static const struct dpu_pingpong_cfg sc7180_pp[] = {
- PP_BLK_TE("pingpong_0", PINGPONG_0, 0x70000, 0, sdm845_pp_sblk_te, -1, -1),
- PP_BLK_TE("pingpong_1", PINGPONG_1, 0x70800, 0, sdm845_pp_sblk_te, -1, -1),
+ PP_BLK("pingpong_0", PINGPONG_0, 0x70000, 0, sdm845_pp_sblk, -1, -1),
+ PP_BLK("pingpong_1", PINGPONG_1, 0x70800, 0, sdm845_pp_sblk, -1, -1),
};
static const struct dpu_intf_cfg sc7180_intf[] = {
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h
index 6f04d8f85c92..988d820f7ef2 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_3_sm6115.h
@@ -122,7 +122,6 @@ const struct dpu_mdss_cfg dpu_sm6115_cfg = {
.mdss_irqs = BIT(MDP_SSPP_TOP0_INTR) | \
BIT(MDP_SSPP_TOP0_INTR2) | \
BIT(MDP_SSPP_TOP0_HIST_INTR) | \
- BIT(MDP_INTF0_INTR) | \
BIT(MDP_INTF1_INTR),
};
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h
index 303492d62a5c..c9003dcc1a59 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_6_5_qcm2290.h
@@ -112,7 +112,6 @@ const struct dpu_mdss_cfg dpu_qcm2290_cfg = {
.mdss_irqs = BIT(MDP_SSPP_TOP0_INTR) | \
BIT(MDP_SSPP_TOP0_INTR2) | \
BIT(MDP_SSPP_TOP0_HIST_INTR) | \
- BIT(MDP_INTF0_INTR) | \
BIT(MDP_INTF1_INTR),
};
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h
index ca107ca8de46..4f6a965bcd90 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_0_sm8350.h
@@ -127,22 +127,22 @@ static const struct dpu_dspp_cfg sm8350_dspp[] = {
};
static const struct dpu_pingpong_cfg sm8350_pp[] = {
- PP_BLK_TE("pingpong_0", PINGPONG_0, 0x69000, MERGE_3D_0, sdm845_pp_sblk_te,
+ PP_BLK_DITHER("pingpong_0", PINGPONG_0, 0x69000, MERGE_3D_0, sc7280_pp_sblk,
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 8),
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 12)),
- PP_BLK_TE("pingpong_1", PINGPONG_1, 0x6a000, MERGE_3D_0, sdm845_pp_sblk_te,
+ PP_BLK_DITHER("pingpong_1", PINGPONG_1, 0x6a000, MERGE_3D_0, sc7280_pp_sblk,
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 9),
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 13)),
- PP_BLK("pingpong_2", PINGPONG_2, 0x6b000, MERGE_3D_1, sdm845_pp_sblk,
+ PP_BLK_DITHER("pingpong_2", PINGPONG_2, 0x6b000, MERGE_3D_1, sc7280_pp_sblk,
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 10),
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 14)),
- PP_BLK("pingpong_3", PINGPONG_3, 0x6c000, MERGE_3D_1, sdm845_pp_sblk,
+ PP_BLK_DITHER("pingpong_3", PINGPONG_3, 0x6c000, MERGE_3D_1, sc7280_pp_sblk,
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 11),
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 15)),
- PP_BLK("pingpong_4", PINGPONG_4, 0x6d000, MERGE_3D_2, sdm845_pp_sblk,
+ PP_BLK_DITHER("pingpong_4", PINGPONG_4, 0x6d000, MERGE_3D_2, sc7280_pp_sblk,
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 30),
-1),
- PP_BLK("pingpong_5", PINGPONG_5, 0x6e000, MERGE_3D_2, sdm845_pp_sblk,
+ PP_BLK_DITHER("pingpong_5", PINGPONG_5, 0x6e000, MERGE_3D_2, sc7280_pp_sblk,
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 31),
-1),
};
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h
index 5957de185984..6b2c7eae71d9 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_7_2_sc7280.h
@@ -87,10 +87,10 @@ static const struct dpu_dspp_cfg sc7280_dspp[] = {
};
static const struct dpu_pingpong_cfg sc7280_pp[] = {
- PP_BLK("pingpong_0", PINGPONG_0, 0x69000, 0, sc7280_pp_sblk, -1, -1),
- PP_BLK("pingpong_1", PINGPONG_1, 0x6a000, 0, sc7280_pp_sblk, -1, -1),
- PP_BLK("pingpong_2", PINGPONG_2, 0x6b000, 0, sc7280_pp_sblk, -1, -1),
- PP_BLK("pingpong_3", PINGPONG_3, 0x6c000, 0, sc7280_pp_sblk, -1, -1),
+ PP_BLK_DITHER("pingpong_0", PINGPONG_0, 0x69000, 0, sc7280_pp_sblk, -1, -1),
+ PP_BLK_DITHER("pingpong_1", PINGPONG_1, 0x6a000, 0, sc7280_pp_sblk, -1, -1),
+ PP_BLK_DITHER("pingpong_2", PINGPONG_2, 0x6b000, 0, sc7280_pp_sblk, -1, -1),
+ PP_BLK_DITHER("pingpong_3", PINGPONG_3, 0x6c000, 0, sc7280_pp_sblk, -1, -1),
};
static const struct dpu_intf_cfg sc7280_intf[] = {
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h
index 9aab110b8c44..706d0f13b598 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h
@@ -121,18 +121,18 @@ static const struct dpu_dspp_cfg sc8280xp_dspp[] = {
};
static const struct dpu_pingpong_cfg sc8280xp_pp[] = {
- PP_BLK_TE("pingpong_0", PINGPONG_0, 0x69000, MERGE_3D_0, sdm845_pp_sblk_te,
- DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 8), -1),
- PP_BLK_TE("pingpong_1", PINGPONG_1, 0x6a000, MERGE_3D_0, sdm845_pp_sblk_te,
- DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 9), -1),
- PP_BLK_TE("pingpong_2", PINGPONG_2, 0x6b000, MERGE_3D_1, sdm845_pp_sblk_te,
- DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 10), -1),
- PP_BLK_TE("pingpong_3", PINGPONG_3, 0x6c000, MERGE_3D_1, sdm845_pp_sblk_te,
- DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 11), -1),
- PP_BLK_TE("pingpong_4", PINGPONG_4, 0x6d000, MERGE_3D_2, sdm845_pp_sblk_te,
- DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 30), -1),
- PP_BLK_TE("pingpong_5", PINGPONG_5, 0x6e000, MERGE_3D_2, sdm845_pp_sblk_te,
- DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 31), -1),
+ PP_BLK_DITHER("pingpong_0", PINGPONG_0, 0x69000, MERGE_3D_0, sc7280_pp_sblk,
+ DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 8), -1),
+ PP_BLK_DITHER("pingpong_1", PINGPONG_1, 0x6a000, MERGE_3D_0, sc7280_pp_sblk,
+ DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 9), -1),
+ PP_BLK_DITHER("pingpong_2", PINGPONG_2, 0x6b000, MERGE_3D_1, sc7280_pp_sblk,
+ DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 10), -1),
+ PP_BLK_DITHER("pingpong_3", PINGPONG_3, 0x6c000, MERGE_3D_1, sc7280_pp_sblk,
+ DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 11), -1),
+ PP_BLK_DITHER("pingpong_4", PINGPONG_4, 0x6d000, MERGE_3D_2, sc7280_pp_sblk,
+ DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 30), -1),
+ PP_BLK_DITHER("pingpong_5", PINGPONG_5, 0x6e000, MERGE_3D_2, sc7280_pp_sblk,
+ DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 31), -1),
};
static const struct dpu_merge_3d_cfg sc8280xp_merge_3d[] = {
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
index 02a259b6b426..4ecb3df5cbc0 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_1_sm8450.h
@@ -128,28 +128,28 @@ static const struct dpu_dspp_cfg sm8450_dspp[] = {
};
/* FIXME: interrupts */
static const struct dpu_pingpong_cfg sm8450_pp[] = {
- PP_BLK_TE("pingpong_0", PINGPONG_0, 0x69000, MERGE_3D_0, sdm845_pp_sblk_te,
+ PP_BLK_DITHER("pingpong_0", PINGPONG_0, 0x69000, MERGE_3D_0, sc7280_pp_sblk,
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 8),
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 12)),
- PP_BLK_TE("pingpong_1", PINGPONG_1, 0x6a000, MERGE_3D_0, sdm845_pp_sblk_te,
+ PP_BLK_DITHER("pingpong_1", PINGPONG_1, 0x6a000, MERGE_3D_0, sc7280_pp_sblk,
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 9),
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 13)),
- PP_BLK("pingpong_2", PINGPONG_2, 0x6b000, MERGE_3D_1, sdm845_pp_sblk,
+ PP_BLK_DITHER("pingpong_2", PINGPONG_2, 0x6b000, MERGE_3D_1, sc7280_pp_sblk,
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 10),
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 14)),
- PP_BLK("pingpong_3", PINGPONG_3, 0x6c000, MERGE_3D_1, sdm845_pp_sblk,
+ PP_BLK_DITHER("pingpong_3", PINGPONG_3, 0x6c000, MERGE_3D_1, sc7280_pp_sblk,
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 11),
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 15)),
- PP_BLK("pingpong_4", PINGPONG_4, 0x6d000, MERGE_3D_2, sdm845_pp_sblk,
+ PP_BLK_DITHER("pingpong_4", PINGPONG_4, 0x6d000, MERGE_3D_2, sc7280_pp_sblk,
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 30),
-1),
- PP_BLK("pingpong_5", PINGPONG_5, 0x6e000, MERGE_3D_2, sdm845_pp_sblk,
+ PP_BLK_DITHER("pingpong_5", PINGPONG_5, 0x6e000, MERGE_3D_2, sc7280_pp_sblk,
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 31),
-1),
- PP_BLK("pingpong_6", PINGPONG_6, 0x65800, MERGE_3D_3, sdm845_pp_sblk,
+ PP_BLK_DITHER("pingpong_6", PINGPONG_6, 0x65800, MERGE_3D_3, sc7280_pp_sblk,
-1,
-1),
- PP_BLK("pingpong_7", PINGPONG_7, 0x65c00, MERGE_3D_3, sdm845_pp_sblk,
+ PP_BLK_DITHER("pingpong_7", PINGPONG_7, 0x65c00, MERGE_3D_3, sc7280_pp_sblk,
-1,
-1),
};
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h
index 9e403034093f..d0ab351b6a8b 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_0_sm8550.h
@@ -132,28 +132,28 @@ static const struct dpu_dspp_cfg sm8550_dspp[] = {
&sm8150_dspp_sblk),
};
static const struct dpu_pingpong_cfg sm8550_pp[] = {
- PP_BLK_DIPHER("pingpong_0", PINGPONG_0, 0x69000, MERGE_3D_0, sc7280_pp_sblk,
+ PP_BLK_DITHER("pingpong_0", PINGPONG_0, 0x69000, MERGE_3D_0, sc7280_pp_sblk,
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 8),
-1),
- PP_BLK_DIPHER("pingpong_1", PINGPONG_1, 0x6a000, MERGE_3D_0, sc7280_pp_sblk,
+ PP_BLK_DITHER("pingpong_1", PINGPONG_1, 0x6a000, MERGE_3D_0, sc7280_pp_sblk,
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 9),
-1),
- PP_BLK_DIPHER("pingpong_2", PINGPONG_2, 0x6b000, MERGE_3D_1, sc7280_pp_sblk,
+ PP_BLK_DITHER("pingpong_2", PINGPONG_2, 0x6b000, MERGE_3D_1, sc7280_pp_sblk,
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 10),
-1),
- PP_BLK_DIPHER("pingpong_3", PINGPONG_3, 0x6c000, MERGE_3D_1, sc7280_pp_sblk,
+ PP_BLK_DITHER("pingpong_3", PINGPONG_3, 0x6c000, MERGE_3D_1, sc7280_pp_sblk,
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 11),
-1),
- PP_BLK_DIPHER("pingpong_4", PINGPONG_4, 0x6d000, MERGE_3D_2, sc7280_pp_sblk,
+ PP_BLK_DITHER("pingpong_4", PINGPONG_4, 0x6d000, MERGE_3D_2, sc7280_pp_sblk,
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 30),
-1),
- PP_BLK_DIPHER("pingpong_5", PINGPONG_5, 0x6e000, MERGE_3D_2, sc7280_pp_sblk,
+ PP_BLK_DITHER("pingpong_5", PINGPONG_5, 0x6e000, MERGE_3D_2, sc7280_pp_sblk,
DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR2, 31),
-1),
- PP_BLK_DIPHER("pingpong_6", PINGPONG_6, 0x66000, MERGE_3D_3, sc7280_pp_sblk,
+ PP_BLK_DITHER("pingpong_6", PINGPONG_6, 0x66000, MERGE_3D_3, sc7280_pp_sblk,
-1,
-1),
- PP_BLK_DIPHER("pingpong_7", PINGPONG_7, 0x66400, MERGE_3D_3, sc7280_pp_sblk,
+ PP_BLK_DITHER("pingpong_7", PINGPONG_7, 0x66400, MERGE_3D_3, sc7280_pp_sblk,
-1,
-1),
};
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
index 03f162af1a50..5d994bce696f 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
@@ -491,7 +491,7 @@ static const struct dpu_pingpong_sub_blks sc7280_pp_sblk = {
.len = 0x20, .version = 0x20000},
};
-#define PP_BLK_DIPHER(_name, _id, _base, _merge_3d, _sblk, _done, _rdptr) \
+#define PP_BLK_DITHER(_name, _id, _base, _merge_3d, _sblk, _done, _rdptr) \
{\
.name = _name, .id = _id, \
.base = _base, .len = 0, \
@@ -587,12 +587,12 @@ static const u32 sdm845_nrt_pri_lvl[] = {3, 3, 3, 3, 3, 3, 3, 3};
static const struct dpu_vbif_dynamic_ot_cfg msm8998_ot_rdwr_cfg[] = {
{
- .pps = 1088 * 1920 * 30,
+ .pps = 1920 * 1080 * 30,
.ot_limit = 2,
},
{
- .pps = 1088 * 1920 * 60,
- .ot_limit = 6,
+ .pps = 1920 * 1080 * 60,
+ .ot_limit = 4,
},
{
.pps = 3840 * 2160 * 30,
@@ -705,10 +705,7 @@ static const struct dpu_qos_lut_entry msm8998_qos_linear[] = {
{.fl = 10, .lut = 0x1555b},
{.fl = 11, .lut = 0x5555b},
{.fl = 12, .lut = 0x15555b},
- {.fl = 13, .lut = 0x55555b},
- {.fl = 14, .lut = 0},
- {.fl = 1, .lut = 0x1b},
- {.fl = 0, .lut = 0}
+ {.fl = 0, .lut = 0x55555b}
};
static const struct dpu_qos_lut_entry sdm845_qos_linear[] = {
@@ -730,9 +727,7 @@ static const struct dpu_qos_lut_entry msm8998_qos_macrotile[] = {
{.fl = 10, .lut = 0x1aaff},
{.fl = 11, .lut = 0x5aaff},
{.fl = 12, .lut = 0x15aaff},
- {.fl = 13, .lut = 0x55aaff},
- {.fl = 1, .lut = 0x1aaff},
- {.fl = 0, .lut = 0},
+ {.fl = 0, .lut = 0x55aaff},
};
static const struct dpu_qos_lut_entry sc7180_qos_linear[] = {
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
index 53326f25e40e..17f3e7e4f194 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
@@ -15,7 +15,7 @@
/*
* Register offsets in MDSS register file for the interrupt registers
- * w.r.t. to the MDP base
+ * w.r.t. the MDP base
*/
#define MDP_SSPP_TOP0_OFF 0x0
#define MDP_INTF_0_OFF 0x6A000
@@ -24,20 +24,23 @@
#define MDP_INTF_3_OFF 0x6B800
#define MDP_INTF_4_OFF 0x6C000
#define MDP_INTF_5_OFF 0x6C800
+#define INTF_INTR_EN 0x1c0
+#define INTF_INTR_STATUS 0x1c4
+#define INTF_INTR_CLEAR 0x1c8
#define MDP_AD4_0_OFF 0x7C000
#define MDP_AD4_1_OFF 0x7D000
#define MDP_AD4_INTR_EN_OFF 0x41c
#define MDP_AD4_INTR_CLEAR_OFF 0x424
#define MDP_AD4_INTR_STATUS_OFF 0x420
-#define MDP_INTF_0_OFF_REV_7xxx 0x34000
-#define MDP_INTF_1_OFF_REV_7xxx 0x35000
-#define MDP_INTF_2_OFF_REV_7xxx 0x36000
-#define MDP_INTF_3_OFF_REV_7xxx 0x37000
-#define MDP_INTF_4_OFF_REV_7xxx 0x38000
-#define MDP_INTF_5_OFF_REV_7xxx 0x39000
-#define MDP_INTF_6_OFF_REV_7xxx 0x3a000
-#define MDP_INTF_7_OFF_REV_7xxx 0x3b000
-#define MDP_INTF_8_OFF_REV_7xxx 0x3c000
+#define MDP_INTF_0_OFF_REV_7xxx 0x34000
+#define MDP_INTF_1_OFF_REV_7xxx 0x35000
+#define MDP_INTF_2_OFF_REV_7xxx 0x36000
+#define MDP_INTF_3_OFF_REV_7xxx 0x37000
+#define MDP_INTF_4_OFF_REV_7xxx 0x38000
+#define MDP_INTF_5_OFF_REV_7xxx 0x39000
+#define MDP_INTF_6_OFF_REV_7xxx 0x3a000
+#define MDP_INTF_7_OFF_REV_7xxx 0x3b000
+#define MDP_INTF_8_OFF_REV_7xxx 0x3c000
/**
* struct dpu_intr_reg - array of DPU register sets
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c
index 84ee2efa9c66..b9dddf576c02 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_intf.c
@@ -56,11 +56,6 @@
#define INTF_TPG_RGB_MAPPING 0x11C
#define INTF_PROG_FETCH_START 0x170
#define INTF_PROG_ROT_START 0x174
-
-#define INTF_FRAME_LINE_COUNT_EN 0x0A8
-#define INTF_FRAME_COUNT 0x0AC
-#define INTF_LINE_COUNT 0x0B0
-
#define INTF_MUX 0x25C
#define INTF_STATUS 0x26C
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
index 2d28afdf860e..a3e413d27717 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
@@ -61,6 +61,7 @@ static const struct dpu_wb_cfg *_wb_offset(enum dpu_wb wb,
for (i = 0; i < m->wb_count; i++) {
if (wb == m->wb[i].id) {
b->blk_addr = addr + m->wb[i].base;
+ b->log_mask = DPU_DBG_MASK_WB;
return &m->wb[i];
}
}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hwio.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hwio.h
index feb9a729844a..5acd5683d25a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hwio.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hwio.h
@@ -21,9 +21,6 @@
#define HIST_INTR_EN 0x01c
#define HIST_INTR_STATUS 0x020
#define HIST_INTR_CLEAR 0x024
-#define INTF_INTR_EN 0x1C0
-#define INTF_INTR_STATUS 0x1C4
-#define INTF_INTR_CLEAR 0x1C8
#define SPLIT_DISPLAY_EN 0x2F4
#define SPLIT_DISPLAY_UPPER_PIPE_CTRL 0x2F8
#define DSPP_IGC_COLOR0_RAM_LUTN 0x300
diff --git a/drivers/gpu/drm/msm/dp/dp_audio.c b/drivers/gpu/drm/msm/dp/dp_audio.c
index 6666783e1468..1245c7aa49df 100644
--- a/drivers/gpu/drm/msm/dp/dp_audio.c
+++ b/drivers/gpu/drm/msm/dp/dp_audio.c
@@ -593,6 +593,18 @@ static struct hdmi_codec_pdata codec_data = {
.i2s = 1,
};
+void dp_unregister_audio_driver(struct device *dev, struct dp_audio *dp_audio)
+{
+ struct dp_audio_private *audio_priv;
+
+ audio_priv = container_of(dp_audio, struct dp_audio_private, dp_audio);
+
+ if (audio_priv->audio_pdev) {
+ platform_device_unregister(audio_priv->audio_pdev);
+ audio_priv->audio_pdev = NULL;
+ }
+}
+
int dp_register_audio_driver(struct device *dev,
struct dp_audio *dp_audio)
{
diff --git a/drivers/gpu/drm/msm/dp/dp_audio.h b/drivers/gpu/drm/msm/dp/dp_audio.h
index 84e5f4a5d26b..4ab78880af82 100644
--- a/drivers/gpu/drm/msm/dp/dp_audio.h
+++ b/drivers/gpu/drm/msm/dp/dp_audio.h
@@ -53,6 +53,8 @@ struct dp_audio *dp_audio_get(struct platform_device *pdev,
int dp_register_audio_driver(struct device *dev,
struct dp_audio *dp_audio);
+void dp_unregister_audio_driver(struct device *dev, struct dp_audio *dp_audio);
+
/**
* dp_audio_put()
*
diff --git a/drivers/gpu/drm/msm/dp/dp_catalog.c b/drivers/gpu/drm/msm/dp/dp_catalog.c
index 7a8cf1c8233d..5142aeb705a4 100644
--- a/drivers/gpu/drm/msm/dp/dp_catalog.c
+++ b/drivers/gpu/drm/msm/dp/dp_catalog.c
@@ -620,7 +620,7 @@ void dp_catalog_hpd_config_intr(struct dp_catalog *dp_catalog,
config & DP_DP_HPD_INT_MASK);
}
-void dp_catalog_ctrl_hpd_config(struct dp_catalog *dp_catalog)
+void dp_catalog_ctrl_hpd_enable(struct dp_catalog *dp_catalog)
{
struct dp_catalog_private *catalog = container_of(dp_catalog,
struct dp_catalog_private, dp_catalog);
@@ -635,6 +635,19 @@ void dp_catalog_ctrl_hpd_config(struct dp_catalog *dp_catalog)
dp_write_aux(catalog, REG_DP_DP_HPD_CTRL, DP_DP_HPD_CTRL_HPD_EN);
}
+void dp_catalog_ctrl_hpd_disable(struct dp_catalog *dp_catalog)
+{
+ struct dp_catalog_private *catalog = container_of(dp_catalog,
+ struct dp_catalog_private, dp_catalog);
+
+ u32 reftimer = dp_read_aux(catalog, REG_DP_DP_HPD_REFTIMER);
+
+ reftimer &= ~DP_DP_HPD_REFTIMER_ENABLE;
+ dp_write_aux(catalog, REG_DP_DP_HPD_REFTIMER, reftimer);
+
+ dp_write_aux(catalog, REG_DP_DP_HPD_CTRL, 0);
+}
+
static void dp_catalog_enable_sdp(struct dp_catalog_private *catalog)
{
/* trigger sdp */
diff --git a/drivers/gpu/drm/msm/dp/dp_catalog.h b/drivers/gpu/drm/msm/dp/dp_catalog.h
index 82376a2697ef..38786e855b51 100644
--- a/drivers/gpu/drm/msm/dp/dp_catalog.h
+++ b/drivers/gpu/drm/msm/dp/dp_catalog.h
@@ -104,7 +104,8 @@ bool dp_catalog_ctrl_mainlink_ready(struct dp_catalog *dp_catalog);
void dp_catalog_ctrl_enable_irq(struct dp_catalog *dp_catalog, bool enable);
void dp_catalog_hpd_config_intr(struct dp_catalog *dp_catalog,
u32 intr_mask, bool en);
-void dp_catalog_ctrl_hpd_config(struct dp_catalog *dp_catalog);
+void dp_catalog_ctrl_hpd_enable(struct dp_catalog *dp_catalog);
+void dp_catalog_ctrl_hpd_disable(struct dp_catalog *dp_catalog);
void dp_catalog_ctrl_config_psr(struct dp_catalog *dp_catalog);
void dp_catalog_ctrl_set_psr(struct dp_catalog *dp_catalog, bool enter);
u32 dp_catalog_link_is_connected(struct dp_catalog *dp_catalog);
diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index 3e13acdfa7e5..03b0eda6df54 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -28,6 +28,10 @@
#include "dp_audio.h"
#include "dp_debug.h"
+static bool psr_enabled = false;
+module_param(psr_enabled, bool, 0);
+MODULE_PARM_DESC(psr_enabled, "enable PSR for eDP and DP displays");
+
#define HPD_STRING_SIZE 30
enum {
@@ -326,6 +330,7 @@ static void dp_display_unbind(struct device *dev, struct device *master,
kthread_stop(dp->ev_tsk);
dp_power_client_deinit(dp->power);
+ dp_unregister_audio_driver(dev, dp->audio);
dp_aux_unregister(dp->aux);
dp->drm_dev = NULL;
dp->aux->drm_dev = NULL;
@@ -406,7 +411,7 @@ static int dp_display_process_hpd_high(struct dp_display_private *dp)
edid = dp->panel->edid;
- dp->dp_display.psr_supported = dp->panel->psr_cap.version;
+ dp->dp_display.psr_supported = dp->panel->psr_cap.version && psr_enabled;
dp->audio_supported = drm_detect_monitor_audio(edid);
dp_panel_handle_sink_request(dp->panel);
@@ -615,12 +620,6 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data)
dp->hpd_state = ST_MAINLINK_READY;
}
- /* enable HDP irq_hpd/replug interrupt */
- if (dp->dp_display.internal_hpd)
- dp_catalog_hpd_config_intr(dp->catalog,
- DP_DP_IRQ_HPD_INT_MASK | DP_DP_HPD_REPLUG_INT_MASK,
- true);
-
drm_dbg_dp(dp->drm_dev, "After, type=%d hpd_state=%d\n",
dp->dp_display.connector_type, state);
mutex_unlock(&dp->event_mutex);
@@ -658,12 +657,6 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data)
drm_dbg_dp(dp->drm_dev, "Before, type=%d hpd_state=%d\n",
dp->dp_display.connector_type, state);
- /* disable irq_hpd/replug interrupts */
- if (dp->dp_display.internal_hpd)
- dp_catalog_hpd_config_intr(dp->catalog,
- DP_DP_IRQ_HPD_INT_MASK | DP_DP_HPD_REPLUG_INT_MASK,
- false);
-
/* unplugged, no more irq_hpd handle */
dp_del_event(dp, EV_IRQ_HPD_INT);
@@ -687,10 +680,6 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data)
return 0;
}
- /* disable HPD plug interrupts */
- if (dp->dp_display.internal_hpd)
- dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_PLUG_INT_MASK, false);
-
/*
* We don't need separate work for disconnect as
* connect/attention interrupts are disabled
@@ -706,10 +695,6 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data)
/* signal the disconnect event early to ensure proper teardown */
dp_display_handle_plugged_change(&dp->dp_display, false);
- /* enable HDP plug interrupt to prepare for next plugin */
- if (dp->dp_display.internal_hpd)
- dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_PLUG_INT_MASK, true);
-
drm_dbg_dp(dp->drm_dev, "After, type=%d hpd_state=%d\n",
dp->dp_display.connector_type, state);
@@ -1082,26 +1067,6 @@ void msm_dp_snapshot(struct msm_disp_state *disp_state, struct msm_dp *dp)
mutex_unlock(&dp_display->event_mutex);
}
-static void dp_display_config_hpd(struct dp_display_private *dp)
-{
-
- dp_display_host_init(dp);
- dp_catalog_ctrl_hpd_config(dp->catalog);
-
- /* Enable plug and unplug interrupts only if requested */
- if (dp->dp_display.internal_hpd)
- dp_catalog_hpd_config_intr(dp->catalog,
- DP_DP_HPD_PLUG_INT_MASK |
- DP_DP_HPD_UNPLUG_INT_MASK,
- true);
-
- /* Enable interrupt first time
- * we are leaving dp clocks on during disconnect
- * and never disable interrupt
- */
- enable_irq(dp->irq);
-}
-
void dp_display_set_psr(struct msm_dp *dp_display, bool enter)
{
struct dp_display_private *dp;
@@ -1176,7 +1141,7 @@ static int hpd_event_thread(void *data)
switch (todo->event_id) {
case EV_HPD_INIT_SETUP:
- dp_display_config_hpd(dp_priv);
+ dp_display_host_init(dp_priv);
break;
case EV_HPD_PLUG_INT:
dp_hpd_plug_handle(dp_priv, todo->data);
@@ -1282,7 +1247,6 @@ int dp_display_request_irq(struct msm_dp *dp_display)
dp->irq, rc);
return rc;
}
- disable_irq(dp->irq);
return 0;
}
@@ -1394,13 +1358,8 @@ static int dp_pm_resume(struct device *dev)
/* turn on dp ctrl/phy */
dp_display_host_init(dp);
- dp_catalog_ctrl_hpd_config(dp->catalog);
-
- if (dp->dp_display.internal_hpd)
- dp_catalog_hpd_config_intr(dp->catalog,
- DP_DP_HPD_PLUG_INT_MASK |
- DP_DP_HPD_UNPLUG_INT_MASK,
- true);
+ if (dp_display->is_edp)
+ dp_catalog_ctrl_hpd_enable(dp->catalog);
if (dp_catalog_link_is_connected(dp->catalog)) {
/*
@@ -1568,9 +1527,8 @@ static int dp_display_get_next_bridge(struct msm_dp *dp)
if (aux_bus && dp->is_edp) {
dp_display_host_init(dp_priv);
- dp_catalog_ctrl_hpd_config(dp_priv->catalog);
+ dp_catalog_ctrl_hpd_enable(dp_priv->catalog);
dp_display_host_phy_init(dp_priv);
- enable_irq(dp_priv->irq);
/*
* The code below assumes that the panel will finish probing
@@ -1612,7 +1570,6 @@ static int dp_display_get_next_bridge(struct msm_dp *dp)
error:
if (dp->is_edp) {
- disable_irq(dp_priv->irq);
dp_display_host_phy_exit(dp_priv);
dp_display_host_deinit(dp_priv);
}
@@ -1801,16 +1758,31 @@ void dp_bridge_hpd_enable(struct drm_bridge *bridge)
{
struct msm_dp_bridge *dp_bridge = to_dp_bridge(bridge);
struct msm_dp *dp_display = dp_bridge->dp_display;
+ struct dp_display_private *dp = container_of(dp_display, struct dp_display_private, dp_display);
+
+ mutex_lock(&dp->event_mutex);
+ dp_catalog_ctrl_hpd_enable(dp->catalog);
+
+ /* enable HDP interrupts */
+ dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_INT_MASK, true);
dp_display->internal_hpd = true;
+ mutex_unlock(&dp->event_mutex);
}
void dp_bridge_hpd_disable(struct drm_bridge *bridge)
{
struct msm_dp_bridge *dp_bridge = to_dp_bridge(bridge);
struct msm_dp *dp_display = dp_bridge->dp_display;
+ struct dp_display_private *dp = container_of(dp_display, struct dp_display_private, dp_display);
+
+ mutex_lock(&dp->event_mutex);
+ /* disable HDP interrupts */
+ dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_INT_MASK, false);
+ dp_catalog_ctrl_hpd_disable(dp->catalog);
dp_display->internal_hpd = false;
+ mutex_unlock(&dp->event_mutex);
}
void dp_bridge_hpd_notify(struct drm_bridge *bridge,
diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c
index d77fa9793c54..9c45d641b521 100644
--- a/drivers/gpu/drm/msm/msm_atomic.c
+++ b/drivers/gpu/drm/msm/msm_atomic.c
@@ -155,6 +155,8 @@ static bool can_do_async(struct drm_atomic_state *state,
for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
if (drm_atomic_crtc_needs_modeset(crtc_state))
return false;
+ if (!crtc_state->active)
+ return false;
if (++num_crtcs > 1)
return false;
*async_crtc = crtc;
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index b4cfa44a8a5c..463ca4164f5f 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -449,6 +449,8 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
if (ret)
goto err_cleanup_mode_config;
+ dma_set_max_seg_size(dev, UINT_MAX);
+
/* Bind all our sub-components: */
ret = component_bind_all(dev, ddev);
if (ret)
@@ -459,8 +461,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
if (ret)
goto err_msm_uninit;
- dma_set_max_seg_size(dev, UINT_MAX);
-
msm_gem_shrinker_init(ddev);
if (priv->kms_init) {
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index db6c4e281d75..cd39b9d8abdb 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -219,7 +219,8 @@ static void put_pages(struct drm_gem_object *obj)
}
}
-static struct page **msm_gem_pin_pages_locked(struct drm_gem_object *obj)
+static struct page **msm_gem_pin_pages_locked(struct drm_gem_object *obj,
+ unsigned madv)
{
struct msm_drm_private *priv = obj->dev->dev_private;
struct msm_gem_object *msm_obj = to_msm_bo(obj);
@@ -227,7 +228,9 @@ static struct page **msm_gem_pin_pages_locked(struct drm_gem_object *obj)
msm_gem_assert_locked(obj);
- if (GEM_WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED)) {
+ if (GEM_WARN_ON(msm_obj->madv > madv)) {
+ DRM_DEV_ERROR(obj->dev->dev, "Invalid madv state: %u vs %u\n",
+ msm_obj->madv, madv);
return ERR_PTR(-EBUSY);
}
@@ -248,7 +251,7 @@ struct page **msm_gem_pin_pages(struct drm_gem_object *obj)
struct page **p;
msm_gem_lock(obj);
- p = msm_gem_pin_pages_locked(obj);
+ p = msm_gem_pin_pages_locked(obj, MSM_MADV_WILLNEED);
msm_gem_unlock(obj);
return p;
@@ -473,10 +476,7 @@ int msm_gem_pin_vma_locked(struct drm_gem_object *obj, struct msm_gem_vma *vma)
msm_gem_assert_locked(obj);
- if (GEM_WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED))
- return -EBUSY;
-
- pages = msm_gem_pin_pages_locked(obj);
+ pages = msm_gem_pin_pages_locked(obj, MSM_MADV_WILLNEED);
if (IS_ERR(pages))
return PTR_ERR(pages);
@@ -699,13 +699,7 @@ static void *get_vaddr(struct drm_gem_object *obj, unsigned madv)
if (obj->import_attach)
return ERR_PTR(-ENODEV);
- if (GEM_WARN_ON(msm_obj->madv > madv)) {
- DRM_DEV_ERROR(obj->dev->dev, "Invalid madv state: %u vs %u\n",
- msm_obj->madv, madv);
- return ERR_PTR(-EBUSY);
- }
-
- pages = msm_gem_pin_pages_locked(obj);
+ pages = msm_gem_pin_pages_locked(obj, madv);
if (IS_ERR(pages))
return ERR_CAST(pages);
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index aff18c2f600a..9f5933c75e3d 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -722,7 +722,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
struct msm_drm_private *priv = dev->dev_private;
struct drm_msm_gem_submit *args = data;
struct msm_file_private *ctx = file->driver_priv;
- struct msm_gem_submit *submit;
+ struct msm_gem_submit *submit = NULL;
struct msm_gpu *gpu = priv->gpu;
struct msm_gpu_submitqueue *queue;
struct msm_ringbuffer *ring;
@@ -769,13 +769,15 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
if (out_fence_fd < 0) {
ret = out_fence_fd;
- return ret;
+ goto out_post_unlock;
}
}
submit = submit_create(dev, gpu, queue, args->nr_bos, args->nr_cmds);
- if (IS_ERR(submit))
- return PTR_ERR(submit);
+ if (IS_ERR(submit)) {
+ ret = PTR_ERR(submit);
+ goto out_post_unlock;
+ }
trace_msm_gpu_submit(pid_nr(submit->pid), ring->id, submit->ident,
args->nr_bos, args->nr_cmds);
@@ -962,11 +964,20 @@ out:
if (has_ww_ticket)
ww_acquire_fini(&submit->ticket);
out_unlock:
- if (ret && (out_fence_fd >= 0))
- put_unused_fd(out_fence_fd);
mutex_unlock(&queue->lock);
out_post_unlock:
- msm_gem_submit_put(submit);
+ if (ret && (out_fence_fd >= 0))
+ put_unused_fd(out_fence_fd);
+
+ if (!IS_ERR_OR_NULL(submit)) {
+ msm_gem_submit_put(submit);
+ } else {
+ /*
+ * If the submit hasn't yet taken ownership of the queue
+ * then we need to drop the reference ourself:
+ */
+ msm_submitqueue_put(queue);
+ }
if (!IS_ERR_OR_NULL(post_deps)) {
for (i = 0; i < args->nr_out_syncobjs; ++i) {
kfree(post_deps[i].chain);
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index 418e1e06cdde..5cc8d358cc97 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -234,7 +234,12 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent)
/* Get the pagetable configuration from the domain */
if (adreno_smmu->cookie)
ttbr1_cfg = adreno_smmu->get_ttbr1_cfg(adreno_smmu->cookie);
- if (!ttbr1_cfg)
+
+ /*
+ * If you hit this WARN_ONCE() you are probably missing an entry in
+ * qcom_smmu_impl_of_match[] in arm-smmu-qcom.c
+ */
+ if (WARN_ONCE(!ttbr1_cfg, "No per-process page tables"))
return ERR_PTR(-ENODEV);
pagetable = kzalloc(sizeof(*pagetable), GFP_KERNEL);
@@ -410,7 +415,7 @@ struct msm_mmu *msm_iommu_gpu_new(struct device *dev, struct msm_gpu *gpu, unsig
struct msm_mmu *mmu;
mmu = msm_iommu_new(dev, quirks);
- if (IS_ERR(mmu))
+ if (IS_ERR_OR_NULL(mmu))
return mmu;
iommu = to_msm_iommu(mmu);
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index 8cf096f841a9..a2ae8c21e4dc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -220,6 +220,9 @@ static void nouveau_dsm_pci_probe(struct pci_dev *pdev, acpi_handle *dhandle_out
int optimus_funcs;
struct pci_dev *parent_pdev;
+ if (pdev->vendor != PCI_VENDOR_ID_NVIDIA)
+ return;
+
*has_pr3 = false;
parent_pdev = pci_upstream_bridge(pdev);
if (parent_pdev) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 086b66b60d91..f75c6f09dd2a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -730,7 +730,8 @@ out:
#endif
nouveau_connector_set_edid(nv_connector, edid);
- nouveau_connector_set_encoder(connector, nv_encoder);
+ if (nv_encoder)
+ nouveau_connector_set_encoder(connector, nv_encoder);
return status;
}
@@ -966,7 +967,7 @@ nouveau_connector_get_modes(struct drm_connector *connector)
/* Determine display colour depth for everything except LVDS now,
* DP requires this before mode_valid() is called.
*/
- if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS)
+ if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS && nv_connector->native_mode)
nouveau_connector_detect_depth(connector);
/* Find the native mode if this is a digital panel, if we didn't
@@ -987,7 +988,7 @@ nouveau_connector_get_modes(struct drm_connector *connector)
* "native" mode as some VBIOS tables require us to use the
* pixel clock as part of the lookup...
*/
- if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)
+ if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS && nv_connector->native_mode)
nouveau_connector_detect_depth(connector);
if (nv_encoder->dcb->type == DCB_OUTPUT_TV)
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index cc7c5b4a05fd..7aac9384600e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -137,10 +137,16 @@ nouveau_name(struct drm_device *dev)
static inline bool
nouveau_cli_work_ready(struct dma_fence *fence)
{
- if (!dma_fence_is_signaled(fence))
- return false;
- dma_fence_put(fence);
- return true;
+ bool ret = true;
+
+ spin_lock_irq(fence->lock);
+ if (!dma_fence_is_signaled_locked(fence))
+ ret = false;
+ spin_unlock_irq(fence->lock);
+
+ if (ret == true)
+ dma_fence_put(fence);
+ return ret;
}
static void
diff --git a/drivers/gpu/drm/pl111/pl111_display.c b/drivers/gpu/drm/pl111/pl111_display.c
index 6afdf260a4e2..b9fe926a49e8 100644
--- a/drivers/gpu/drm/pl111/pl111_display.c
+++ b/drivers/gpu/drm/pl111/pl111_display.c
@@ -53,7 +53,7 @@ pl111_mode_valid(struct drm_simple_display_pipe *pipe,
{
struct drm_device *drm = pipe->crtc.dev;
struct pl111_drm_dev_private *priv = drm->dev_private;
- u32 cpp = priv->variant->fb_bpp / 8;
+ u32 cpp = DIV_ROUND_UP(priv->variant->fb_depth, 8);
u64 bw;
/*
diff --git a/drivers/gpu/drm/pl111/pl111_drm.h b/drivers/gpu/drm/pl111/pl111_drm.h
index 2a46b5bd8576..d1fe756444ee 100644
--- a/drivers/gpu/drm/pl111/pl111_drm.h
+++ b/drivers/gpu/drm/pl111/pl111_drm.h
@@ -114,7 +114,7 @@ struct drm_minor;
* extensions to the control register
* @formats: array of supported pixel formats on this variant
* @nformats: the length of the array of supported pixel formats
- * @fb_bpp: desired bits per pixel on the default framebuffer
+ * @fb_depth: desired depth per pixel on the default framebuffer
*/
struct pl111_variant_data {
const char *name;
@@ -126,7 +126,7 @@ struct pl111_variant_data {
bool st_bitmux_control;
const u32 *formats;
unsigned int nformats;
- unsigned int fb_bpp;
+ unsigned int fb_depth;
};
struct pl111_drm_dev_private {
diff --git a/drivers/gpu/drm/pl111/pl111_drv.c b/drivers/gpu/drm/pl111/pl111_drv.c
index 4b2a9e9753f6..43049c8028b2 100644
--- a/drivers/gpu/drm/pl111/pl111_drv.c
+++ b/drivers/gpu/drm/pl111/pl111_drv.c
@@ -308,7 +308,7 @@ static int pl111_amba_probe(struct amba_device *amba_dev,
if (ret < 0)
goto dev_put;
- drm_fbdev_dma_setup(drm, priv->variant->fb_bpp);
+ drm_fbdev_dma_setup(drm, priv->variant->fb_depth);
return 0;
@@ -351,7 +351,7 @@ static const struct pl111_variant_data pl110_variant = {
.is_pl110 = true,
.formats = pl110_pixel_formats,
.nformats = ARRAY_SIZE(pl110_pixel_formats),
- .fb_bpp = 16,
+ .fb_depth = 16,
};
/* RealView, Versatile Express etc use this modern variant */
@@ -376,7 +376,7 @@ static const struct pl111_variant_data pl111_variant = {
.name = "PL111",
.formats = pl111_pixel_formats,
.nformats = ARRAY_SIZE(pl111_pixel_formats),
- .fb_bpp = 32,
+ .fb_depth = 32,
};
static const u32 pl110_nomadik_pixel_formats[] = {
@@ -405,7 +405,7 @@ static const struct pl111_variant_data pl110_nomadik_variant = {
.is_lcdc = true,
.st_bitmux_control = true,
.broken_vblank = true,
- .fb_bpp = 16,
+ .fb_depth = 16,
};
static const struct amba_id pl111_id_table[] = {
diff --git a/drivers/gpu/drm/pl111/pl111_versatile.c b/drivers/gpu/drm/pl111/pl111_versatile.c
index 1b436b75fd39..00c3ebd32359 100644
--- a/drivers/gpu/drm/pl111/pl111_versatile.c
+++ b/drivers/gpu/drm/pl111/pl111_versatile.c
@@ -316,7 +316,7 @@ static const struct pl111_variant_data pl110_integrator = {
.broken_vblank = true,
.formats = pl110_integrator_pixel_formats,
.nformats = ARRAY_SIZE(pl110_integrator_pixel_formats),
- .fb_bpp = 16,
+ .fb_depth = 16,
};
/*
@@ -330,7 +330,7 @@ static const struct pl111_variant_data pl110_impd1 = {
.broken_vblank = true,
.formats = pl110_integrator_pixel_formats,
.nformats = ARRAY_SIZE(pl110_integrator_pixel_formats),
- .fb_bpp = 16,
+ .fb_depth = 15,
};
/*
@@ -343,7 +343,7 @@ static const struct pl111_variant_data pl110_versatile = {
.external_bgr = true,
.formats = pl110_versatile_pixel_formats,
.nformats = ARRAY_SIZE(pl110_versatile_pixel_formats),
- .fb_bpp = 16,
+ .fb_depth = 16,
};
/*
@@ -355,7 +355,7 @@ static const struct pl111_variant_data pl111_realview = {
.name = "PL111 RealView",
.formats = pl111_realview_pixel_formats,
.nformats = ARRAY_SIZE(pl111_realview_pixel_formats),
- .fb_bpp = 16,
+ .fb_depth = 16,
};
/*
@@ -367,7 +367,7 @@ static const struct pl111_variant_data pl111_vexpress = {
.name = "PL111 Versatile Express",
.formats = pl111_realview_pixel_formats,
.nformats = ARRAY_SIZE(pl111_realview_pixel_formats),
- .fb_bpp = 16,
+ .fb_depth = 16,
.broken_clockdivider = true,
};
diff --git a/drivers/gpu/drm/radeon/radeon_fbdev.c b/drivers/gpu/drm/radeon/radeon_fbdev.c
index fe76e29910ef..8f6c3aef0962 100644
--- a/drivers/gpu/drm/radeon/radeon_fbdev.c
+++ b/drivers/gpu/drm/radeon/radeon_fbdev.c
@@ -307,6 +307,7 @@ static void radeon_fbdev_client_unregister(struct drm_client_dev *client)
if (fb_helper->info) {
vga_switcheroo_client_fb_set(rdev->pdev, NULL);
+ drm_helper_force_disable_all(dev);
drm_fb_helper_unregister_info(fb_helper);
} else {
drm_client_release(&fb_helper->client);
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index bdc5af23f005..d3f5ddbc1704 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -459,7 +459,6 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data,
struct radeon_device *rdev = dev->dev_private;
struct drm_radeon_gem_set_domain *args = data;
struct drm_gem_object *gobj;
- struct radeon_bo *robj;
int r;
/* for now if someone requests domain CPU -
@@ -472,13 +471,12 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data,
up_read(&rdev->exclusive_lock);
return -ENOENT;
}
- robj = gem_to_radeon_bo(gobj);
r = radeon_gem_set_domain(gobj, args->read_domains, args->write_domain);
drm_gem_object_put(gobj);
up_read(&rdev->exclusive_lock);
- r = radeon_gem_handle_lockup(robj->rdev, r);
+ r = radeon_gem_handle_lockup(rdev, r);
return r;
}
diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c
index 3377fbc71f65..c4dda908666c 100644
--- a/drivers/gpu/drm/radeon/radeon_irq_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c
@@ -99,6 +99,16 @@ static void radeon_hotplug_work_func(struct work_struct *work)
static void radeon_dp_work_func(struct work_struct *work)
{
+ struct radeon_device *rdev = container_of(work, struct radeon_device,
+ dp_work);
+ struct drm_device *dev = rdev->ddev;
+ struct drm_mode_config *mode_config = &dev->mode_config;
+ struct drm_connector *connector;
+
+ mutex_lock(&mode_config->mutex);
+ list_for_each_entry(connector, &mode_config->connector_list, head)
+ radeon_connector_hotplug(connector);
+ mutex_unlock(&mode_config->mutex);
}
/**
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 8c183639603e..aea5a90ff98b 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -1141,9 +1141,6 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched)
for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
struct drm_sched_rq *rq = &sched->sched_rq[i];
- if (!rq)
- continue;
-
spin_lock(&rq->lock);
list_for_each_entry(s_entity, &rq->entities, list)
/*
diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c
index 7ae5f27df54d..c6bdb9c4ef3e 100644
--- a/drivers/hid/hid-google-hammer.c
+++ b/drivers/hid/hid-google-hammer.c
@@ -587,6 +587,8 @@ static const struct hid_device_id hammer_devices[] = {
{ HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_HAMMER) },
{ HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
+ USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_JEWEL) },
+ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_MAGNEMITE) },
{ HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_MASTERBALL) },
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index d79e946acdcb..5d29abac2300 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -529,6 +529,7 @@
#define USB_DEVICE_ID_GOOGLE_MOONBALL 0x5044
#define USB_DEVICE_ID_GOOGLE_DON 0x5050
#define USB_DEVICE_ID_GOOGLE_EEL 0x5057
+#define USB_DEVICE_ID_GOOGLE_JEWEL 0x5061
#define USB_VENDOR_ID_GOTOP 0x08f2
#define USB_DEVICE_ID_SUPER_Q2 0x007f
diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index 0fcfd85fea0f..5e1a412fd28f 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -286,7 +286,7 @@ static int hidpp_send_message_sync(struct hidpp_device *hidpp,
struct hidpp_report *message,
struct hidpp_report *response)
{
- int ret;
+ int ret = -1;
int max_retries = 3;
mutex_lock(&hidpp->send_mutex);
@@ -300,13 +300,13 @@ static int hidpp_send_message_sync(struct hidpp_device *hidpp,
*/
*response = *message;
- for (; max_retries != 0; max_retries--) {
+ for (; max_retries != 0 && ret; max_retries--) {
ret = __hidpp_send_report(hidpp->hid_dev, message);
if (ret) {
dbg_hid("__hidpp_send_report returned err: %d\n", ret);
memset(response, 0, sizeof(struct hidpp_report));
- goto exit;
+ break;
}
if (!wait_event_timeout(hidpp->wait, hidpp->answer_available,
@@ -314,13 +314,14 @@ static int hidpp_send_message_sync(struct hidpp_device *hidpp,
dbg_hid("%s:timeout waiting for response\n", __func__);
memset(response, 0, sizeof(struct hidpp_report));
ret = -ETIMEDOUT;
+ break;
}
if (response->report_id == REPORT_ID_HIDPP_SHORT &&
response->rap.sub_id == HIDPP_ERROR) {
ret = response->rap.params[1];
dbg_hid("%s:got hidpp error %02X\n", __func__, ret);
- goto exit;
+ break;
}
if ((response->report_id == REPORT_ID_HIDPP_LONG ||
@@ -329,13 +330,12 @@ static int hidpp_send_message_sync(struct hidpp_device *hidpp,
ret = response->fap.params[1];
if (ret != HIDPP20_ERROR_BUSY) {
dbg_hid("%s:got hidpp 2.0 error %02X\n", __func__, ret);
- goto exit;
+ break;
}
dbg_hid("%s:got busy hidpp 2.0 error %02X, retrying\n", __func__, ret);
}
}
-exit:
mutex_unlock(&hidpp->send_mutex);
return ret;
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index 8214896adada..76e5353aca0c 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -2224,7 +2224,9 @@ static void wacom_update_name(struct wacom *wacom, const char *suffix)
} else if (strstr(product_name, "Wacom") ||
strstr(product_name, "wacom") ||
strstr(product_name, "WACOM")) {
- strscpy(name, product_name, sizeof(name));
+ if (strscpy(name, product_name, sizeof(name)) < 0) {
+ hid_warn(wacom->hdev, "String overflow while assembling device name");
+ }
} else {
snprintf(name, sizeof(name), "Wacom %s", product_name);
}
@@ -2242,7 +2244,9 @@ static void wacom_update_name(struct wacom *wacom, const char *suffix)
if (name[strlen(name)-1] == ' ')
name[strlen(name)-1] = '\0';
} else {
- strscpy(name, features->name, sizeof(name));
+ if (strscpy(name, features->name, sizeof(name)) < 0) {
+ hid_warn(wacom->hdev, "String overflow while assembling device name");
+ }
}
snprintf(wacom_wac->name, sizeof(wacom_wac->name), "%s%s",
@@ -2410,8 +2414,13 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless)
goto fail_quirks;
}
- if (features->device_type & WACOM_DEVICETYPE_WL_MONITOR)
+ if (features->device_type & WACOM_DEVICETYPE_WL_MONITOR) {
error = hid_hw_open(hdev);
+ if (error) {
+ hid_err(hdev, "hw open failed\n");
+ goto fail_quirks;
+ }
+ }
wacom_set_shared_values(wacom_wac);
devres_close_group(&hdev->dev, wacom);
@@ -2500,8 +2509,10 @@ static void wacom_wireless_work(struct work_struct *work)
goto fail;
}
- strscpy(wacom_wac->name, wacom_wac1->name,
- sizeof(wacom_wac->name));
+ if (strscpy(wacom_wac->name, wacom_wac1->name,
+ sizeof(wacom_wac->name)) < 0) {
+ hid_warn(wacom->hdev, "String overflow while assembling device name");
+ }
}
return;
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index dc0f7d9a992c..2ccf83837134 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -831,7 +831,7 @@ static int wacom_intuos_inout(struct wacom_wac *wacom)
/* Enter report */
if ((data[1] & 0xfc) == 0xc0) {
/* serial number of the tool */
- wacom->serial[idx] = ((data[3] & 0x0f) << 28) +
+ wacom->serial[idx] = ((__u64)(data[3] & 0x0f) << 28) +
(data[4] << 20) + (data[5] << 12) +
(data[6] << 4) + (data[7] >> 4);
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 007f26d5f1a4..2f4d09ce027a 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -829,11 +829,22 @@ static void vmbus_wait_for_unload(void)
if (completion_done(&vmbus_connection.unload_event))
goto completed;
- for_each_online_cpu(cpu) {
+ for_each_present_cpu(cpu) {
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
+ /*
+ * In a CoCo VM the synic_message_page is not allocated
+ * in hv_synic_alloc(). Instead it is set/cleared in
+ * hv_synic_enable_regs() and hv_synic_disable_regs()
+ * such that it is set only when the CPU is online. If
+ * not all present CPUs are online, the message page
+ * might be NULL, so skip such CPUs.
+ */
page_addr = hv_cpu->synic_message_page;
+ if (!page_addr)
+ continue;
+
msg = (struct hv_message *)page_addr
+ VMBUS_MESSAGE_SINT;
@@ -867,11 +878,14 @@ completed:
* maybe-pending messages on all CPUs to be able to receive new
* messages after we reconnect.
*/
- for_each_online_cpu(cpu) {
+ for_each_present_cpu(cpu) {
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
page_addr = hv_cpu->synic_message_page;
+ if (!page_addr)
+ continue;
+
msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
msg->header.message_type = HVMSG_NONE;
}
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index 64f9ceca887b..542a1d53b303 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -364,13 +364,20 @@ int hv_common_cpu_init(unsigned int cpu)
flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL;
inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
- *inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
- if (!(*inputarg))
- return -ENOMEM;
- if (hv_root_partition) {
- outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
- *outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE;
+ /*
+ * hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory is already
+ * allocated if this CPU was previously online and then taken offline
+ */
+ if (!*inputarg) {
+ *inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
+ if (!(*inputarg))
+ return -ENOMEM;
+
+ if (hv_root_partition) {
+ outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
+ *outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE;
+ }
}
msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX);
@@ -385,24 +392,17 @@ int hv_common_cpu_init(unsigned int cpu)
int hv_common_cpu_die(unsigned int cpu)
{
- unsigned long flags;
- void **inputarg, **outputarg;
- void *mem;
-
- local_irq_save(flags);
-
- inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
- mem = *inputarg;
- *inputarg = NULL;
-
- if (hv_root_partition) {
- outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
- *outputarg = NULL;
- }
-
- local_irq_restore(flags);
-
- kfree(mem);
+ /*
+ * The hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory
+ * is not freed when the CPU goes offline as the hyperv_pcpu_input_arg
+ * may be used by the Hyper-V vPCI driver in reassigning interrupts
+ * as part of the offlining process. The interrupt reassignment
+ * happens *after* the CPUHP_AP_HYPERV_ONLINE state has run and
+ * called this function.
+ *
+ * If a previously offlined CPU is brought back online again, the
+ * originally allocated memory is reused in hv_common_cpu_init().
+ */
return 0;
}
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 1c65a6dfb9fa..67f95a29aeca 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1372,7 +1372,7 @@ static int vmbus_bus_init(void)
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online",
hv_synic_init, hv_synic_cleanup);
if (ret < 0)
- goto err_cpuhp;
+ goto err_alloc;
hyperv_cpuhp_online = ret;
ret = vmbus_connect();
@@ -1392,9 +1392,8 @@ static int vmbus_bus_init(void)
err_connect:
cpuhp_remove_state(hyperv_cpuhp_online);
-err_cpuhp:
- hv_synic_free();
err_alloc:
+ hv_synic_free();
if (vmbus_irq == -1) {
hv_remove_vmbus_handler();
} else {
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index 711f451b6946..89e8ed214ea4 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -402,6 +402,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
trace_id = coresight_trace_id_get_cpu_id(cpu);
if (!IS_VALID_CS_TRACE_ID(trace_id)) {
cpumask_clear_cpu(cpu, mask);
+ coresight_release_path(path);
continue;
}
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index 918d461fcf4a..eaa296ced167 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -942,7 +942,7 @@ tmc_etr_buf_insert_barrier_packet(struct etr_buf *etr_buf, u64 offset)
len = tmc_etr_buf_get_data(etr_buf, offset,
CORESIGHT_BARRIER_PKT_SIZE, &bufp);
- if (WARN_ON(len < CORESIGHT_BARRIER_PKT_SIZE))
+ if (WARN_ON(len < 0 || len < CORESIGHT_BARRIER_PKT_SIZE))
return -EINVAL;
coresight_insert_barrier_packet(bufp);
return offset + CORESIGHT_BARRIER_PKT_SIZE;
diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index c5d87aae39c6..bf23bfb51aea 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -40,6 +40,7 @@
#define DW_IC_CON_BUS_CLEAR_CTRL BIT(11)
#define DW_IC_DATA_CMD_DAT GENMASK(7, 0)
+#define DW_IC_DATA_CMD_FIRST_DATA_BYTE BIT(11)
/*
* Registers offset
diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c
index cec25054bb24..2e079cf20bb5 100644
--- a/drivers/i2c/busses/i2c-designware-slave.c
+++ b/drivers/i2c/busses/i2c-designware-slave.c
@@ -176,6 +176,10 @@ static irqreturn_t i2c_dw_isr_slave(int this_irq, void *dev_id)
do {
regmap_read(dev->map, DW_IC_DATA_CMD, &tmp);
+ if (tmp & DW_IC_DATA_CMD_FIRST_DATA_BYTE)
+ i2c_slave_event(dev->slave,
+ I2C_SLAVE_WRITE_REQUESTED,
+ &val);
val = tmp;
i2c_slave_event(dev->slave, I2C_SLAVE_WRITE_RECEIVED,
&val);
diff --git a/drivers/i2c/busses/i2c-img-scb.c b/drivers/i2c/busses/i2c-img-scb.c
index 8e987945ed45..39c479f96eb5 100644
--- a/drivers/i2c/busses/i2c-img-scb.c
+++ b/drivers/i2c/busses/i2c-img-scb.c
@@ -257,7 +257,7 @@
#define IMG_I2C_TIMEOUT (msecs_to_jiffies(1000))
/*
- * Worst incs are 1 (innacurate) and 16*256 (irregular).
+ * Worst incs are 1 (inaccurate) and 16*256 (irregular).
* So a sensible inc is the logarithmic mean: 64 (2^6), which is
* in the middle of the valid range (0-127).
*/
diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c
index 1af0a637d7f1..4d24ceb57ee7 100644
--- a/drivers/i2c/busses/i2c-imx-lpi2c.c
+++ b/drivers/i2c/busses/i2c-imx-lpi2c.c
@@ -201,8 +201,8 @@ static void lpi2c_imx_stop(struct lpi2c_imx_struct *lpi2c_imx)
/* CLKLO = I2C_CLK_RATIO * CLKHI, SETHOLD = CLKHI, DATAVD = CLKHI/2 */
static int lpi2c_imx_config(struct lpi2c_imx_struct *lpi2c_imx)
{
- u8 prescale, filt, sethold, clkhi, clklo, datavd;
- unsigned int clk_rate, clk_cycle;
+ u8 prescale, filt, sethold, datavd;
+ unsigned int clk_rate, clk_cycle, clkhi, clklo;
enum lpi2c_imx_pincfg pincfg;
unsigned int temp;
diff --git a/drivers/i2c/busses/i2c-mchp-pci1xxxx.c b/drivers/i2c/busses/i2c-mchp-pci1xxxx.c
index b21ffd6df927..5ef136c3ecb1 100644
--- a/drivers/i2c/busses/i2c-mchp-pci1xxxx.c
+++ b/drivers/i2c/busses/i2c-mchp-pci1xxxx.c
@@ -1118,8 +1118,10 @@ static int pci1xxxx_i2c_resume(struct device *dev)
static DEFINE_SIMPLE_DEV_PM_OPS(pci1xxxx_i2c_pm_ops, pci1xxxx_i2c_suspend,
pci1xxxx_i2c_resume);
-static void pci1xxxx_i2c_shutdown(struct pci1xxxx_i2c *i2c)
+static void pci1xxxx_i2c_shutdown(void *data)
{
+ struct pci1xxxx_i2c *i2c = data;
+
pci1xxxx_i2c_config_padctrl(i2c, false);
pci1xxxx_i2c_configure_core_reg(i2c, false);
}
@@ -1156,7 +1158,7 @@ static int pci1xxxx_i2c_probe_pci(struct pci_dev *pdev,
init_completion(&i2c->i2c_xfer_done);
pci1xxxx_i2c_init(i2c);
- ret = devm_add_action(dev, (void (*)(void *))pci1xxxx_i2c_shutdown, i2c);
+ ret = devm_add_action(dev, pci1xxxx_i2c_shutdown, i2c);
if (ret)
return ret;
diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c
index 047dfef7a657..878c076ebdc6 100644
--- a/drivers/i2c/busses/i2c-mv64xxx.c
+++ b/drivers/i2c/busses/i2c-mv64xxx.c
@@ -520,6 +520,17 @@ mv64xxx_i2c_intr(int irq, void *dev_id)
while (readl(drv_data->reg_base + drv_data->reg_offsets.control) &
MV64XXX_I2C_REG_CONTROL_IFLG) {
+ /*
+ * It seems that sometime the controller updates the status
+ * register only after it asserts IFLG in control register.
+ * This may result in weird bugs when in atomic mode. A delay
+ * of 100 ns before reading the status register solves this
+ * issue. This bug does not seem to appear when using
+ * interrupts.
+ */
+ if (drv_data->atomic)
+ ndelay(100);
+
status = readl(drv_data->reg_base + drv_data->reg_offsets.status);
mv64xxx_i2c_fsm(drv_data, status);
mv64xxx_i2c_do_action(drv_data);
diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c
index 2e153f2f71b6..78682388e02e 100644
--- a/drivers/i2c/busses/i2c-qup.c
+++ b/drivers/i2c/busses/i2c-qup.c
@@ -1752,16 +1752,21 @@ nodma:
if (!clk_freq || clk_freq > I2C_MAX_FAST_MODE_PLUS_FREQ) {
dev_err(qup->dev, "clock frequency not supported %d\n",
clk_freq);
- return -EINVAL;
+ ret = -EINVAL;
+ goto fail_dma;
}
qup->base = devm_platform_ioremap_resource(pdev, 0);
- if (IS_ERR(qup->base))
- return PTR_ERR(qup->base);
+ if (IS_ERR(qup->base)) {
+ ret = PTR_ERR(qup->base);
+ goto fail_dma;
+ }
qup->irq = platform_get_irq(pdev, 0);
- if (qup->irq < 0)
- return qup->irq;
+ if (qup->irq < 0) {
+ ret = qup->irq;
+ goto fail_dma;
+ }
if (has_acpi_companion(qup->dev)) {
ret = device_property_read_u32(qup->dev,
@@ -1775,13 +1780,15 @@ nodma:
qup->clk = devm_clk_get(qup->dev, "core");
if (IS_ERR(qup->clk)) {
dev_err(qup->dev, "Could not get core clock\n");
- return PTR_ERR(qup->clk);
+ ret = PTR_ERR(qup->clk);
+ goto fail_dma;
}
qup->pclk = devm_clk_get(qup->dev, "iface");
if (IS_ERR(qup->pclk)) {
dev_err(qup->dev, "Could not get iface clock\n");
- return PTR_ERR(qup->pclk);
+ ret = PTR_ERR(qup->pclk);
+ goto fail_dma;
}
qup_i2c_enable_clocks(qup);
src_clk_freq = clk_get_rate(qup->clk);
diff --git a/drivers/i2c/busses/i2c-sprd.c b/drivers/i2c/busses/i2c-sprd.c
index 4fe15cd78907..ffc54fbf814d 100644
--- a/drivers/i2c/busses/i2c-sprd.c
+++ b/drivers/i2c/busses/i2c-sprd.c
@@ -576,12 +576,14 @@ static int sprd_i2c_remove(struct platform_device *pdev)
struct sprd_i2c *i2c_dev = platform_get_drvdata(pdev);
int ret;
- ret = pm_runtime_resume_and_get(i2c_dev->dev);
+ ret = pm_runtime_get_sync(i2c_dev->dev);
if (ret < 0)
- return ret;
+ dev_err(&pdev->dev, "Failed to resume device (%pe)\n", ERR_PTR(ret));
i2c_del_adapter(&i2c_dev->adap);
- clk_disable_unprepare(i2c_dev->clk);
+
+ if (ret >= 0)
+ clk_disable_unprepare(i2c_dev->clk);
pm_runtime_put_noidle(i2c_dev->dev);
pm_runtime_disable(i2c_dev->dev);
diff --git a/drivers/iio/accel/kionix-kx022a.c b/drivers/iio/accel/kionix-kx022a.c
index f98393d74666..b8636fa8eaeb 100644
--- a/drivers/iio/accel/kionix-kx022a.c
+++ b/drivers/iio/accel/kionix-kx022a.c
@@ -1048,7 +1048,7 @@ int kx022a_probe_internal(struct device *dev)
data->ien_reg = KX022A_REG_INC4;
} else {
irq = fwnode_irq_get_byname(fwnode, "INT2");
- if (irq <= 0)
+ if (irq < 0)
return dev_err_probe(dev, irq, "No suitable IRQ\n");
data->inc_reg = KX022A_REG_INC5;
diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
index 5f7d81b44b1d..282e539157f8 100644
--- a/drivers/iio/accel/st_accel_core.c
+++ b/drivers/iio/accel/st_accel_core.c
@@ -1291,12 +1291,12 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev)
adev = ACPI_COMPANION(indio_dev->dev.parent);
if (!adev)
- return 0;
+ return -ENXIO;
/* Read _ONT data, which should be a package of 6 integers. */
status = acpi_evaluate_object(adev->handle, "_ONT", NULL, &buffer);
if (status == AE_NOT_FOUND) {
- return 0;
+ return -ENXIO;
} else if (ACPI_FAILURE(status)) {
dev_warn(&indio_dev->dev, "failed to execute _ONT: %d\n",
status);
diff --git a/drivers/iio/adc/ad4130.c b/drivers/iio/adc/ad4130.c
index 38394341fd6e..5a5dd5e87ffc 100644
--- a/drivers/iio/adc/ad4130.c
+++ b/drivers/iio/adc/ad4130.c
@@ -1817,6 +1817,11 @@ static const struct clk_ops ad4130_int_clk_ops = {
.unprepare = ad4130_int_clk_unprepare,
};
+static void ad4130_clk_del_provider(void *of_node)
+{
+ of_clk_del_provider(of_node);
+}
+
static int ad4130_setup_int_clk(struct ad4130_state *st)
{
struct device *dev = &st->spi->dev;
@@ -1824,6 +1829,7 @@ static int ad4130_setup_int_clk(struct ad4130_state *st)
struct clk_init_data init;
const char *clk_name;
struct clk *clk;
+ int ret;
if (st->int_pin_sel == AD4130_INT_PIN_CLK ||
st->mclk_sel != AD4130_MCLK_76_8KHZ)
@@ -1843,7 +1849,11 @@ static int ad4130_setup_int_clk(struct ad4130_state *st)
if (IS_ERR(clk))
return PTR_ERR(clk);
- return of_clk_add_provider(of_node, of_clk_src_simple_get, clk);
+ ret = of_clk_add_provider(of_node, of_clk_src_simple_get, clk);
+ if (ret)
+ return ret;
+
+ return devm_add_action_or_reset(dev, ad4130_clk_del_provider, of_node);
}
static int ad4130_setup(struct iio_dev *indio_dev)
diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c
index 55a6ab591016..99bb604b78c8 100644
--- a/drivers/iio/adc/ad7192.c
+++ b/drivers/iio/adc/ad7192.c
@@ -897,10 +897,6 @@ static const struct iio_info ad7195_info = {
__AD719x_CHANNEL(_si, _channel1, -1, _address, NULL, IIO_VOLTAGE, \
BIT(IIO_CHAN_INFO_SCALE), ad7192_calibsys_ext_info)
-#define AD719x_SHORTED_CHANNEL(_si, _channel1, _address) \
- __AD719x_CHANNEL(_si, _channel1, -1, _address, "shorted", IIO_VOLTAGE, \
- BIT(IIO_CHAN_INFO_SCALE), ad7192_calibsys_ext_info)
-
#define AD719x_TEMP_CHANNEL(_si, _address) \
__AD719x_CHANNEL(_si, 0, -1, _address, NULL, IIO_TEMP, 0, NULL)
@@ -908,7 +904,7 @@ static const struct iio_chan_spec ad7192_channels[] = {
AD719x_DIFF_CHANNEL(0, 1, 2, AD7192_CH_AIN1P_AIN2M),
AD719x_DIFF_CHANNEL(1, 3, 4, AD7192_CH_AIN3P_AIN4M),
AD719x_TEMP_CHANNEL(2, AD7192_CH_TEMP),
- AD719x_SHORTED_CHANNEL(3, 2, AD7192_CH_AIN2P_AIN2M),
+ AD719x_DIFF_CHANNEL(3, 2, 2, AD7192_CH_AIN2P_AIN2M),
AD719x_CHANNEL(4, 1, AD7192_CH_AIN1),
AD719x_CHANNEL(5, 2, AD7192_CH_AIN2),
AD719x_CHANNEL(6, 3, AD7192_CH_AIN3),
@@ -922,7 +918,7 @@ static const struct iio_chan_spec ad7193_channels[] = {
AD719x_DIFF_CHANNEL(2, 5, 6, AD7193_CH_AIN5P_AIN6M),
AD719x_DIFF_CHANNEL(3, 7, 8, AD7193_CH_AIN7P_AIN8M),
AD719x_TEMP_CHANNEL(4, AD7193_CH_TEMP),
- AD719x_SHORTED_CHANNEL(5, 2, AD7193_CH_AIN2P_AIN2M),
+ AD719x_DIFF_CHANNEL(5, 2, 2, AD7193_CH_AIN2P_AIN2M),
AD719x_CHANNEL(6, 1, AD7193_CH_AIN1),
AD719x_CHANNEL(7, 2, AD7193_CH_AIN2),
AD719x_CHANNEL(8, 3, AD7193_CH_AIN3),
diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c
index d8570f620785..7e2192870743 100644
--- a/drivers/iio/adc/ad_sigma_delta.c
+++ b/drivers/iio/adc/ad_sigma_delta.c
@@ -584,6 +584,10 @@ static int devm_ad_sd_probe_trigger(struct device *dev, struct iio_dev *indio_de
init_completion(&sigma_delta->completion);
sigma_delta->irq_dis = true;
+
+ /* the IRQ core clears IRQ_DISABLE_UNLAZY flag when freeing an IRQ */
+ irq_set_status_flags(sigma_delta->spi->irq, IRQ_DISABLE_UNLAZY);
+
ret = devm_request_irq(dev, sigma_delta->spi->irq,
ad_sd_data_rdy_trig_poll,
sigma_delta->info->irq_flags | IRQF_NO_AUTOEN,
diff --git a/drivers/iio/adc/imx93_adc.c b/drivers/iio/adc/imx93_adc.c
index a775d2e40567..dce9ec91e4a7 100644
--- a/drivers/iio/adc/imx93_adc.c
+++ b/drivers/iio/adc/imx93_adc.c
@@ -236,8 +236,7 @@ static int imx93_adc_read_raw(struct iio_dev *indio_dev,
{
struct imx93_adc *adc = iio_priv(indio_dev);
struct device *dev = adc->dev;
- long ret;
- u32 vref_uv;
+ int ret;
switch (mask) {
case IIO_CHAN_INFO_RAW:
@@ -253,10 +252,10 @@ static int imx93_adc_read_raw(struct iio_dev *indio_dev,
return IIO_VAL_INT;
case IIO_CHAN_INFO_SCALE:
- ret = vref_uv = regulator_get_voltage(adc->vref);
+ ret = regulator_get_voltage(adc->vref);
if (ret < 0)
return ret;
- *val = vref_uv / 1000;
+ *val = ret / 1000;
*val2 = 12;
return IIO_VAL_FRACTIONAL_LOG2;
diff --git a/drivers/iio/adc/mt6370-adc.c b/drivers/iio/adc/mt6370-adc.c
index bc62e5a9d50d..0bc112135bca 100644
--- a/drivers/iio/adc/mt6370-adc.c
+++ b/drivers/iio/adc/mt6370-adc.c
@@ -19,6 +19,7 @@
#include <dt-bindings/iio/adc/mediatek,mt6370_adc.h>
+#define MT6370_REG_DEV_INFO 0x100
#define MT6370_REG_CHG_CTRL3 0x113
#define MT6370_REG_CHG_CTRL7 0x117
#define MT6370_REG_CHG_ADC 0x121
@@ -27,6 +28,7 @@
#define MT6370_ADC_START_MASK BIT(0)
#define MT6370_ADC_IN_SEL_MASK GENMASK(7, 4)
#define MT6370_AICR_ICHG_MASK GENMASK(7, 2)
+#define MT6370_VENID_MASK GENMASK(7, 4)
#define MT6370_AICR_100_mA 0x0
#define MT6370_AICR_150_mA 0x1
@@ -47,6 +49,10 @@
#define ADC_CONV_TIME_MS 35
#define ADC_CONV_POLLING_TIME_US 1000
+#define MT6370_VID_RT5081 0x8
+#define MT6370_VID_RT5081A 0xA
+#define MT6370_VID_MT6370 0xE
+
struct mt6370_adc_data {
struct device *dev;
struct regmap *regmap;
@@ -55,6 +61,7 @@ struct mt6370_adc_data {
* from being read at the same time.
*/
struct mutex adc_lock;
+ unsigned int vid;
};
static int mt6370_adc_read_channel(struct mt6370_adc_data *priv, int chan,
@@ -98,6 +105,30 @@ adc_unlock:
return ret;
}
+static int mt6370_adc_get_ibus_scale(struct mt6370_adc_data *priv)
+{
+ switch (priv->vid) {
+ case MT6370_VID_RT5081:
+ case MT6370_VID_RT5081A:
+ case MT6370_VID_MT6370:
+ return 3350;
+ default:
+ return 3875;
+ }
+}
+
+static int mt6370_adc_get_ibat_scale(struct mt6370_adc_data *priv)
+{
+ switch (priv->vid) {
+ case MT6370_VID_RT5081:
+ case MT6370_VID_RT5081A:
+ case MT6370_VID_MT6370:
+ return 2680;
+ default:
+ return 3870;
+ }
+}
+
static int mt6370_adc_read_scale(struct mt6370_adc_data *priv,
int chan, int *val1, int *val2)
{
@@ -123,7 +154,7 @@ static int mt6370_adc_read_scale(struct mt6370_adc_data *priv,
case MT6370_AICR_250_mA:
case MT6370_AICR_300_mA:
case MT6370_AICR_350_mA:
- *val1 = 3350;
+ *val1 = mt6370_adc_get_ibus_scale(priv);
break;
default:
*val1 = 5000;
@@ -150,7 +181,7 @@ static int mt6370_adc_read_scale(struct mt6370_adc_data *priv,
case MT6370_ICHG_600_mA:
case MT6370_ICHG_700_mA:
case MT6370_ICHG_800_mA:
- *val1 = 2680;
+ *val1 = mt6370_adc_get_ibat_scale(priv);
break;
default:
*val1 = 5000;
@@ -251,6 +282,20 @@ static const struct iio_chan_spec mt6370_adc_channels[] = {
MT6370_ADC_CHAN(TEMP_JC, IIO_TEMP, 12, BIT(IIO_CHAN_INFO_OFFSET)),
};
+static int mt6370_get_vendor_info(struct mt6370_adc_data *priv)
+{
+ unsigned int dev_info;
+ int ret;
+
+ ret = regmap_read(priv->regmap, MT6370_REG_DEV_INFO, &dev_info);
+ if (ret)
+ return ret;
+
+ priv->vid = FIELD_GET(MT6370_VENID_MASK, dev_info);
+
+ return 0;
+}
+
static int mt6370_adc_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
@@ -272,6 +317,10 @@ static int mt6370_adc_probe(struct platform_device *pdev)
priv->regmap = regmap;
mutex_init(&priv->adc_lock);
+ ret = mt6370_get_vendor_info(priv);
+ if (ret)
+ return dev_err_probe(dev, ret, "Failed to get vid\n");
+
ret = regmap_write(priv->regmap, MT6370_REG_CHG_ADC, 0);
if (ret)
return dev_err_probe(dev, ret, "Failed to reset ADC\n");
diff --git a/drivers/iio/adc/mxs-lradc-adc.c b/drivers/iio/adc/mxs-lradc-adc.c
index bca79a93cbe4..a50f39143d3e 100644
--- a/drivers/iio/adc/mxs-lradc-adc.c
+++ b/drivers/iio/adc/mxs-lradc-adc.c
@@ -757,13 +757,13 @@ static int mxs_lradc_adc_probe(struct platform_device *pdev)
ret = mxs_lradc_adc_trigger_init(iio);
if (ret)
- goto err_trig;
+ return ret;
ret = iio_triggered_buffer_setup(iio, &iio_pollfunc_store_time,
&mxs_lradc_adc_trigger_handler,
&mxs_lradc_adc_buffer_ops);
if (ret)
- return ret;
+ goto err_trig;
adc->vref_mv = mxs_lradc_adc_vref_mv[lradc->soc];
@@ -801,9 +801,9 @@ static int mxs_lradc_adc_probe(struct platform_device *pdev)
err_dev:
mxs_lradc_adc_hw_stop(adc);
- mxs_lradc_adc_trigger_remove(iio);
-err_trig:
iio_triggered_buffer_cleanup(iio);
+err_trig:
+ mxs_lradc_adc_trigger_remove(iio);
return ret;
}
@@ -814,8 +814,8 @@ static int mxs_lradc_adc_remove(struct platform_device *pdev)
iio_device_unregister(iio);
mxs_lradc_adc_hw_stop(adc);
- mxs_lradc_adc_trigger_remove(iio);
iio_triggered_buffer_cleanup(iio);
+ mxs_lradc_adc_trigger_remove(iio);
return 0;
}
diff --git a/drivers/iio/adc/palmas_gpadc.c b/drivers/iio/adc/palmas_gpadc.c
index c1c439215aeb..7dfc9c927a23 100644
--- a/drivers/iio/adc/palmas_gpadc.c
+++ b/drivers/iio/adc/palmas_gpadc.c
@@ -547,7 +547,7 @@ static int palmas_gpadc_read_raw(struct iio_dev *indio_dev,
int adc_chan = chan->channel;
int ret = 0;
- if (adc_chan > PALMAS_ADC_CH_MAX)
+ if (adc_chan >= PALMAS_ADC_CH_MAX)
return -EINVAL;
mutex_lock(&adc->lock);
@@ -595,7 +595,7 @@ static int palmas_gpadc_read_event_config(struct iio_dev *indio_dev,
int adc_chan = chan->channel;
int ret = 0;
- if (adc_chan > PALMAS_ADC_CH_MAX || type != IIO_EV_TYPE_THRESH)
+ if (adc_chan >= PALMAS_ADC_CH_MAX || type != IIO_EV_TYPE_THRESH)
return -EINVAL;
mutex_lock(&adc->lock);
@@ -684,7 +684,7 @@ static int palmas_gpadc_write_event_config(struct iio_dev *indio_dev,
int adc_chan = chan->channel;
int ret;
- if (adc_chan > PALMAS_ADC_CH_MAX || type != IIO_EV_TYPE_THRESH)
+ if (adc_chan >= PALMAS_ADC_CH_MAX || type != IIO_EV_TYPE_THRESH)
return -EINVAL;
mutex_lock(&adc->lock);
@@ -710,7 +710,7 @@ static int palmas_gpadc_read_event_value(struct iio_dev *indio_dev,
int adc_chan = chan->channel;
int ret;
- if (adc_chan > PALMAS_ADC_CH_MAX || type != IIO_EV_TYPE_THRESH)
+ if (adc_chan >= PALMAS_ADC_CH_MAX || type != IIO_EV_TYPE_THRESH)
return -EINVAL;
mutex_lock(&adc->lock);
@@ -744,7 +744,7 @@ static int palmas_gpadc_write_event_value(struct iio_dev *indio_dev,
int old;
int ret;
- if (adc_chan > PALMAS_ADC_CH_MAX || type != IIO_EV_TYPE_THRESH)
+ if (adc_chan >= PALMAS_ADC_CH_MAX || type != IIO_EV_TYPE_THRESH)
return -EINVAL;
mutex_lock(&adc->lock);
diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c
index 1aadb2ad2cab..bd7e2408bf28 100644
--- a/drivers/iio/adc/stm32-adc.c
+++ b/drivers/iio/adc/stm32-adc.c
@@ -2006,16 +2006,15 @@ static int stm32_adc_get_legacy_chan_count(struct iio_dev *indio_dev, struct stm
* to get the *real* number of channels.
*/
ret = device_property_count_u32(dev, "st,adc-diff-channels");
- if (ret < 0)
- return ret;
-
- ret /= (int)(sizeof(struct stm32_adc_diff_channel) / sizeof(u32));
- if (ret > adc_info->max_channels) {
- dev_err(&indio_dev->dev, "Bad st,adc-diff-channels?\n");
- return -EINVAL;
- } else if (ret > 0) {
- adc->num_diff = ret;
- num_channels += ret;
+ if (ret > 0) {
+ ret /= (int)(sizeof(struct stm32_adc_diff_channel) / sizeof(u32));
+ if (ret > adc_info->max_channels) {
+ dev_err(&indio_dev->dev, "Bad st,adc-diff-channels?\n");
+ return -EINVAL;
+ } else if (ret > 0) {
+ adc->num_diff = ret;
+ num_channels += ret;
+ }
}
/* Optional sample time is provided either for each, or all channels */
@@ -2037,6 +2036,7 @@ static int stm32_adc_legacy_chan_init(struct iio_dev *indio_dev,
struct stm32_adc_diff_channel diff[STM32_ADC_CH_MAX];
struct device *dev = &indio_dev->dev;
u32 num_diff = adc->num_diff;
+ int num_se = nchans - num_diff;
int size = num_diff * sizeof(*diff) / sizeof(u32);
int scan_index = 0, ret, i, c;
u32 smp = 0, smps[STM32_ADC_CH_MAX], chans[STM32_ADC_CH_MAX];
@@ -2063,29 +2063,32 @@ static int stm32_adc_legacy_chan_init(struct iio_dev *indio_dev,
scan_index++;
}
}
-
- ret = device_property_read_u32_array(dev, "st,adc-channels", chans,
- nchans);
- if (ret)
- return ret;
-
- for (c = 0; c < nchans; c++) {
- if (chans[c] >= adc_info->max_channels) {
- dev_err(&indio_dev->dev, "Invalid channel %d\n",
- chans[c]);
- return -EINVAL;
+ if (num_se > 0) {
+ ret = device_property_read_u32_array(dev, "st,adc-channels", chans, num_se);
+ if (ret) {
+ dev_err(&indio_dev->dev, "Failed to get st,adc-channels %d\n", ret);
+ return ret;
}
- /* Channel can't be configured both as single-ended & diff */
- for (i = 0; i < num_diff; i++) {
- if (chans[c] == diff[i].vinp) {
- dev_err(&indio_dev->dev, "channel %d misconfigured\n", chans[c]);
+ for (c = 0; c < num_se; c++) {
+ if (chans[c] >= adc_info->max_channels) {
+ dev_err(&indio_dev->dev, "Invalid channel %d\n",
+ chans[c]);
return -EINVAL;
}
+
+ /* Channel can't be configured both as single-ended & diff */
+ for (i = 0; i < num_diff; i++) {
+ if (chans[c] == diff[i].vinp) {
+ dev_err(&indio_dev->dev, "channel %d misconfigured\n",
+ chans[c]);
+ return -EINVAL;
+ }
+ }
+ stm32_adc_chan_init_one(indio_dev, &channels[scan_index],
+ chans[c], 0, scan_index, false);
+ scan_index++;
}
- stm32_adc_chan_init_one(indio_dev, &channels[scan_index],
- chans[c], 0, scan_index, false);
- scan_index++;
}
if (adc->nsmps > 0) {
@@ -2306,7 +2309,7 @@ static int stm32_adc_chan_fw_init(struct iio_dev *indio_dev, bool timestamping)
if (legacy)
ret = stm32_adc_legacy_chan_init(indio_dev, adc, channels,
- num_channels);
+ timestamping ? num_channels - 1 : num_channels);
else
ret = stm32_adc_generic_chan_init(indio_dev, adc, channels);
if (ret < 0)
diff --git a/drivers/iio/addac/ad74413r.c b/drivers/iio/addac/ad74413r.c
index 07e9f6ae16a8..e3366cf5eb31 100644
--- a/drivers/iio/addac/ad74413r.c
+++ b/drivers/iio/addac/ad74413r.c
@@ -1007,7 +1007,7 @@ static int ad74413r_read_raw(struct iio_dev *indio_dev,
ret = ad74413r_get_single_adc_result(indio_dev, chan->channel,
val);
- if (ret)
+ if (ret < 0)
return ret;
ad74413r_adc_to_resistance_result(*val, val);
diff --git a/drivers/iio/dac/Makefile b/drivers/iio/dac/Makefile
index 6c74fea21736..addd97a78838 100644
--- a/drivers/iio/dac/Makefile
+++ b/drivers/iio/dac/Makefile
@@ -17,7 +17,7 @@ obj-$(CONFIG_AD5592R_BASE) += ad5592r-base.o
obj-$(CONFIG_AD5592R) += ad5592r.o
obj-$(CONFIG_AD5593R) += ad5593r.o
obj-$(CONFIG_AD5755) += ad5755.o
-obj-$(CONFIG_AD5755) += ad5758.o
+obj-$(CONFIG_AD5758) += ad5758.o
obj-$(CONFIG_AD5761) += ad5761.o
obj-$(CONFIG_AD5764) += ad5764.o
obj-$(CONFIG_AD5766) += ad5766.o
diff --git a/drivers/iio/dac/mcp4725.c b/drivers/iio/dac/mcp4725.c
index 46bf758760f8..3f5661a3718f 100644
--- a/drivers/iio/dac/mcp4725.c
+++ b/drivers/iio/dac/mcp4725.c
@@ -47,12 +47,18 @@ static int mcp4725_suspend(struct device *dev)
struct mcp4725_data *data = iio_priv(i2c_get_clientdata(
to_i2c_client(dev)));
u8 outbuf[2];
+ int ret;
outbuf[0] = (data->powerdown_mode + 1) << 4;
outbuf[1] = 0;
data->powerdown = true;
- return i2c_master_send(data->client, outbuf, 2);
+ ret = i2c_master_send(data->client, outbuf, 2);
+ if (ret < 0)
+ return ret;
+ else if (ret != 2)
+ return -EIO;
+ return 0;
}
static int mcp4725_resume(struct device *dev)
@@ -60,13 +66,19 @@ static int mcp4725_resume(struct device *dev)
struct mcp4725_data *data = iio_priv(i2c_get_clientdata(
to_i2c_client(dev)));
u8 outbuf[2];
+ int ret;
/* restore previous DAC value */
outbuf[0] = (data->dac_value >> 8) & 0xf;
outbuf[1] = data->dac_value & 0xff;
data->powerdown = false;
- return i2c_master_send(data->client, outbuf, 2);
+ ret = i2c_master_send(data->client, outbuf, 2);
+ if (ret < 0)
+ return ret;
+ else if (ret != 2)
+ return -EIO;
+ return 0;
}
static DEFINE_SIMPLE_DEV_PM_OPS(mcp4725_pm_ops, mcp4725_suspend,
mcp4725_resume);
diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c
index 99576b2c171f..32d7f8364230 100644
--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c
+++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c
@@ -275,9 +275,14 @@ static int inv_icm42600_buffer_preenable(struct iio_dev *indio_dev)
{
struct inv_icm42600_state *st = iio_device_get_drvdata(indio_dev);
struct device *dev = regmap_get_device(st->map);
+ struct inv_icm42600_timestamp *ts = iio_priv(indio_dev);
pm_runtime_get_sync(dev);
+ mutex_lock(&st->lock);
+ inv_icm42600_timestamp_reset(ts);
+ mutex_unlock(&st->lock);
+
return 0;
}
@@ -375,7 +380,6 @@ static int inv_icm42600_buffer_postdisable(struct iio_dev *indio_dev)
struct device *dev = regmap_get_device(st->map);
unsigned int sensor;
unsigned int *watermark;
- struct inv_icm42600_timestamp *ts;
struct inv_icm42600_sensor_conf conf = INV_ICM42600_SENSOR_CONF_INIT;
unsigned int sleep_temp = 0;
unsigned int sleep_sensor = 0;
@@ -385,11 +389,9 @@ static int inv_icm42600_buffer_postdisable(struct iio_dev *indio_dev)
if (indio_dev == st->indio_gyro) {
sensor = INV_ICM42600_SENSOR_GYRO;
watermark = &st->fifo.watermark.gyro;
- ts = iio_priv(st->indio_gyro);
} else if (indio_dev == st->indio_accel) {
sensor = INV_ICM42600_SENSOR_ACCEL;
watermark = &st->fifo.watermark.accel;
- ts = iio_priv(st->indio_accel);
} else {
return -EINVAL;
}
@@ -417,8 +419,6 @@ static int inv_icm42600_buffer_postdisable(struct iio_dev *indio_dev)
if (!st->fifo.on)
ret = inv_icm42600_set_temp_conf(st, false, &sleep_temp);
- inv_icm42600_timestamp_reset(ts);
-
out_unlock:
mutex_unlock(&st->lock);
diff --git a/drivers/iio/industrialio-gts-helper.c b/drivers/iio/industrialio-gts-helper.c
index 8bb68975b259..7653261d2dc2 100644
--- a/drivers/iio/industrialio-gts-helper.c
+++ b/drivers/iio/industrialio-gts-helper.c
@@ -337,6 +337,17 @@ free_gains:
return ret;
}
+static void iio_gts_us_to_int_micro(int *time_us, int *int_micro_times,
+ int num_times)
+{
+ int i;
+
+ for (i = 0; i < num_times; i++) {
+ int_micro_times[i * 2] = time_us[i] / 1000000;
+ int_micro_times[i * 2 + 1] = time_us[i] % 1000000;
+ }
+}
+
/**
* iio_gts_build_avail_time_table - build table of available integration times
* @gts: Gain time scale descriptor
@@ -351,7 +362,7 @@ free_gains:
*/
static int iio_gts_build_avail_time_table(struct iio_gts *gts)
{
- int *times, i, j, idx = 0;
+ int *times, i, j, idx = 0, *int_micro_times;
if (!gts->num_itime)
return 0;
@@ -378,13 +389,24 @@ static int iio_gts_build_avail_time_table(struct iio_gts *gts)
}
}
}
- gts->avail_time_tables = times;
- /*
- * This is just to survive a unlikely corner-case where times in the
- * given time table were not unique. Else we could just trust the
- * gts->num_itime.
- */
- gts->num_avail_time_tables = idx;
+
+ /* create a list of times formatted as list of IIO_VAL_INT_PLUS_MICRO */
+ int_micro_times = kcalloc(idx, sizeof(int) * 2, GFP_KERNEL);
+ if (int_micro_times) {
+ /*
+ * This is just to survive a unlikely corner-case where times in
+ * the given time table were not unique. Else we could just
+ * trust the gts->num_itime.
+ */
+ gts->num_avail_time_tables = idx;
+ iio_gts_us_to_int_micro(times, int_micro_times, idx);
+ }
+
+ gts->avail_time_tables = int_micro_times;
+ kfree(times);
+
+ if (!int_micro_times)
+ return -ENOMEM;
return 0;
}
@@ -683,8 +705,8 @@ int iio_gts_avail_times(struct iio_gts *gts, const int **vals, int *type,
return -EINVAL;
*vals = gts->avail_time_tables;
- *type = IIO_VAL_INT;
- *length = gts->num_avail_time_tables;
+ *type = IIO_VAL_INT_PLUS_MICRO;
+ *length = gts->num_avail_time_tables * 2;
return IIO_AVAIL_LIST;
}
diff --git a/drivers/iio/light/rohm-bu27034.c b/drivers/iio/light/rohm-bu27034.c
index e486dcf35eba..f85194fda6b0 100644
--- a/drivers/iio/light/rohm-bu27034.c
+++ b/drivers/iio/light/rohm-bu27034.c
@@ -231,6 +231,9 @@ struct bu27034_result {
static const struct regmap_range bu27034_volatile_ranges[] = {
{
+ .range_min = BU27034_REG_SYSTEM_CONTROL,
+ .range_max = BU27034_REG_SYSTEM_CONTROL,
+ }, {
.range_min = BU27034_REG_MODE_CONTROL4,
.range_max = BU27034_REG_MODE_CONTROL4,
}, {
@@ -1167,11 +1170,12 @@ static int bu27034_read_raw(struct iio_dev *idev,
switch (mask) {
case IIO_CHAN_INFO_INT_TIME:
- *val = bu27034_get_int_time(data);
- if (*val < 0)
- return *val;
+ *val = 0;
+ *val2 = bu27034_get_int_time(data);
+ if (*val2 < 0)
+ return *val2;
- return IIO_VAL_INT;
+ return IIO_VAL_INT_PLUS_MICRO;
case IIO_CHAN_INFO_SCALE:
return bu27034_get_scale(data, chan->channel, val, val2);
@@ -1229,7 +1233,10 @@ static int bu27034_write_raw(struct iio_dev *idev,
ret = bu27034_set_scale(data, chan->channel, val, val2);
break;
case IIO_CHAN_INFO_INT_TIME:
- ret = bu27034_try_set_int_time(data, val);
+ if (!val)
+ ret = bu27034_try_set_int_time(data, val2);
+ else
+ ret = -EINVAL;
break;
default:
ret = -EINVAL;
@@ -1268,12 +1275,19 @@ static int bu27034_chip_init(struct bu27034_data *data)
int ret, sel;
/* Reset */
- ret = regmap_update_bits(data->regmap, BU27034_REG_SYSTEM_CONTROL,
+ ret = regmap_write_bits(data->regmap, BU27034_REG_SYSTEM_CONTROL,
BU27034_MASK_SW_RESET, BU27034_MASK_SW_RESET);
if (ret)
return dev_err_probe(data->dev, ret, "Sensor reset failed\n");
msleep(1);
+
+ ret = regmap_reinit_cache(data->regmap, &bu27034_regmap);
+ if (ret) {
+ dev_err(data->dev, "Failed to reinit reg cache\n");
+ return ret;
+ }
+
/*
* Read integration time here to ensure it is in regmap cache. We do
* this to speed-up the int-time acquisition in the start of the buffer
diff --git a/drivers/iio/light/vcnl4035.c b/drivers/iio/light/vcnl4035.c
index 14e29330e972..94f5d611e98c 100644
--- a/drivers/iio/light/vcnl4035.c
+++ b/drivers/iio/light/vcnl4035.c
@@ -8,6 +8,7 @@
* TODO: Proximity
*/
#include <linux/bitops.h>
+#include <linux/bitfield.h>
#include <linux/i2c.h>
#include <linux/module.h>
#include <linux/pm_runtime.h>
@@ -42,6 +43,7 @@
#define VCNL4035_ALS_PERS_MASK GENMASK(3, 2)
#define VCNL4035_INT_ALS_IF_H_MASK BIT(12)
#define VCNL4035_INT_ALS_IF_L_MASK BIT(13)
+#define VCNL4035_DEV_ID_MASK GENMASK(7, 0)
/* Default values */
#define VCNL4035_MODE_ALS_ENABLE BIT(0)
@@ -413,6 +415,7 @@ static int vcnl4035_init(struct vcnl4035_data *data)
return ret;
}
+ id = FIELD_GET(VCNL4035_DEV_ID_MASK, id);
if (id != VCNL4035_DEV_ID_VAL) {
dev_err(&data->client->dev, "Wrong id, got %x, expected %x\n",
id, VCNL4035_DEV_ID_VAL);
diff --git a/drivers/iio/magnetometer/tmag5273.c b/drivers/iio/magnetometer/tmag5273.c
index 28bb7efe8df8..e155a75b3cd2 100644
--- a/drivers/iio/magnetometer/tmag5273.c
+++ b/drivers/iio/magnetometer/tmag5273.c
@@ -296,12 +296,13 @@ static int tmag5273_read_raw(struct iio_dev *indio_dev,
return ret;
ret = tmag5273_get_measure(data, &t, &x, &y, &z, &angle, &magnitude);
- if (ret)
- return ret;
pm_runtime_mark_last_busy(data->dev);
pm_runtime_put_autosuspend(data->dev);
+ if (ret)
+ return ret;
+
switch (chan->address) {
case TEMPERATURE:
*val = t;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 93a1c48d0c32..6b3f4384e46a 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -3295,7 +3295,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
route->path_rec->traffic_class = tos;
route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
route->path_rec->rate_selector = IB_SA_EQ;
- route->path_rec->rate = iboe_get_rate(ndev);
+ route->path_rec->rate = IB_RATE_PORT_CURRENT;
dev_put(ndev);
route->path_rec->packet_life_time_selector = IB_SA_EQ;
/* In case ACK timeout is set, use this value to calculate
@@ -4964,7 +4964,7 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
if (!ndev)
return -ENODEV;
- ib.rec.rate = iboe_get_rate(ndev);
+ ib.rec.rate = IB_RATE_PORT_CURRENT;
ib.rec.hop_limit = 1;
ib.rec.mtu = iboe_get_mtu(ndev->mtu);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 4796f6a8828c..e836c9c477f6 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1850,8 +1850,13 @@ static int modify_qp(struct uverbs_attr_bundle *attrs,
attr->path_mtu = cmd->base.path_mtu;
if (cmd->base.attr_mask & IB_QP_PATH_MIG_STATE)
attr->path_mig_state = cmd->base.path_mig_state;
- if (cmd->base.attr_mask & IB_QP_QKEY)
+ if (cmd->base.attr_mask & IB_QP_QKEY) {
+ if (cmd->base.qkey & IB_QP_SET_QKEY && !capable(CAP_NET_RAW)) {
+ ret = -EPERM;
+ goto release_qp;
+ }
attr->qkey = cmd->base.qkey;
+ }
if (cmd->base.attr_mask & IB_QP_RQ_PSN)
attr->rq_psn = cmd->base.rq_psn;
if (cmd->base.attr_mask & IB_QP_SQ_PSN)
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index fbace69672ca..7c9c79c13941 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -222,8 +222,12 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
spin_lock_irq(&ev_queue->lock);
while (list_empty(&ev_queue->event_list)) {
- spin_unlock_irq(&ev_queue->lock);
+ if (ev_queue->is_closed) {
+ spin_unlock_irq(&ev_queue->lock);
+ return -EIO;
+ }
+ spin_unlock_irq(&ev_queue->lock);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
@@ -233,12 +237,6 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
return -ERESTARTSYS;
spin_lock_irq(&ev_queue->lock);
-
- /* If device was disassociated and no event exists set an error */
- if (list_empty(&ev_queue->event_list) && ev_queue->is_closed) {
- spin_unlock_irq(&ev_queue->lock);
- return -EIO;
- }
}
event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index 5a2baf49ecaa..2c95e6f3d47a 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -135,8 +135,6 @@ struct bnxt_re_dev {
struct delayed_work worker;
u8 cur_prio_map;
- u16 active_speed;
- u8 active_width;
/* FP Notification Queue (CQ & SRQ) */
struct tasklet_struct nq_task;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index e86afecfbe46..952811c40c54 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -199,6 +199,7 @@ int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num,
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+ int rc;
memset(port_attr, 0, sizeof(*port_attr));
@@ -228,10 +229,10 @@ int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num,
port_attr->sm_sl = 0;
port_attr->subnet_timeout = 0;
port_attr->init_type_reply = 0;
- port_attr->active_speed = rdev->active_speed;
- port_attr->active_width = rdev->active_width;
+ rc = ib_get_eth_speed(&rdev->ibdev, port_num, &port_attr->active_speed,
+ &port_attr->active_width);
- return 0;
+ return rc;
}
int bnxt_re_get_port_immutable(struct ib_device *ibdev, u32 port_num,
@@ -3341,9 +3342,7 @@ static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *gsi_qp,
udwr.remote_qkey = gsi_sqp->qplib_qp.qkey;
/* post data received in the send queue */
- rc = bnxt_re_post_send_shadow_qp(rdev, gsi_sqp, swr);
-
- return 0;
+ return bnxt_re_post_send_shadow_qp(rdev, gsi_sqp, swr);
}
static void bnxt_re_process_res_rawqp1_wc(struct ib_wc *wc,
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index b9e2f89337e8..3073398a2183 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -1077,8 +1077,6 @@ static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
return rc;
}
dev_info(rdev_to_dev(rdev), "Device registered with IB successfully");
- ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
- &rdev->active_width);
set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
event = netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev) ?
@@ -1336,6 +1334,10 @@ static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable)
{
struct bnxt_qplib_cc_param cc_param = {};
+ /* Do not enable congestion control on VFs */
+ if (rdev->is_virtfn)
+ return;
+
/* Currently enabling only for GenP5 adapters */
if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx))
return;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index f139d4cd1712..8974f6235cfa 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -2056,6 +2056,12 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
u32 pg_sz_lvl;
int rc;
+ if (!cq->dpi) {
+ dev_err(&rcfw->pdev->dev,
+ "FP: CREATE_CQ failed due to NULL DPI\n");
+ return -EINVAL;
+ }
+
hwq_attr.res = res;
hwq_attr.depth = cq->max_wqe;
hwq_attr.stride = sizeof(struct cq_base);
@@ -2069,11 +2075,6 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
CMDQ_BASE_OPCODE_CREATE_CQ,
sizeof(req));
- if (!cq->dpi) {
- dev_err(&rcfw->pdev->dev,
- "FP: CREATE_CQ failed due to NULL DPI\n");
- return -EINVAL;
- }
req.dpi = cpu_to_le32(cq->dpi->dpi);
req.cq_handle = cpu_to_le64(cq->cq_handle);
req.cq_size = cpu_to_le32(cq->hwq.max_elements);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index 126d4f26f75a..81b0c5e879f9 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -215,17 +215,9 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq,
return -EINVAL;
hwq_attr->sginfo->npages = npages;
} else {
- unsigned long sginfo_num_pages = ib_umem_num_dma_blocks(
- hwq_attr->sginfo->umem, hwq_attr->sginfo->pgsize);
-
+ npages = ib_umem_num_dma_blocks(hwq_attr->sginfo->umem,
+ hwq_attr->sginfo->pgsize);
hwq->is_user = true;
- npages = sginfo_num_pages;
- npages = (npages * PAGE_SIZE) /
- BIT_ULL(hwq_attr->sginfo->pgshft);
- if ((sginfo_num_pages * PAGE_SIZE) %
- BIT_ULL(hwq_attr->sginfo->pgshft))
- if (!npages)
- npages++;
}
if (npages == MAX_PBL_LVL_0_PGS && !hwq_attr->sginfo->nopte) {
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 1714a1e23113..b967a17a44be 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -617,16 +617,15 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
/* Free the hwq if it already exist, must be a rereg */
if (mr->hwq.max_elements)
bnxt_qplib_free_hwq(res, &mr->hwq);
- /* Use system PAGE_SIZE */
hwq_attr.res = res;
hwq_attr.depth = pages;
- hwq_attr.stride = buf_pg_size;
+ hwq_attr.stride = sizeof(dma_addr_t);
hwq_attr.type = HWQ_TYPE_MR;
hwq_attr.sginfo = &sginfo;
hwq_attr.sginfo->umem = umem;
hwq_attr.sginfo->npages = pages;
- hwq_attr.sginfo->pgsize = PAGE_SIZE;
- hwq_attr.sginfo->pgshft = PAGE_SHIFT;
+ hwq_attr.sginfo->pgsize = buf_pg_size;
+ hwq_attr.sginfo->pgshft = ilog2(buf_pg_size);
rc = bnxt_qplib_alloc_init_hwq(&mr->hwq, &hwq_attr);
if (rc) {
dev_err(&res->pdev->dev,
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index 8eca6c14d0cf..2a195c4b0f17 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -1403,7 +1403,7 @@ static int pbl_continuous_initialize(struct efa_dev *dev,
*/
static int pbl_indirect_initialize(struct efa_dev *dev, struct pbl_context *pbl)
{
- u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, PAGE_SIZE);
+ u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, EFA_CHUNK_PAYLOAD_SIZE);
struct scatterlist *sgl;
int sg_dma_cnt, err;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 84f1167de1d9..d4c6b9bc0a4e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -4583,11 +4583,9 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
mtu = ib_mtu_enum_to_int(ib_mtu);
if (WARN_ON(mtu <= 0))
return -EINVAL;
-#define MAX_LP_MSG_LEN 16384
- /* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 16KB */
- lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / mtu);
- if (WARN_ON(lp_pktn_ini >= 0xF))
- return -EINVAL;
+#define MIN_LP_MSG_LEN 1024
+ /* mtu * (2 ^ lp_pktn_ini) should be in the range of 1024 to mtu */
+ lp_pktn_ini = ilog2(max(mtu, MIN_LP_MSG_LEN) / mtu);
if (attr_mask & IB_QP_PATH_MTU) {
hr_reg_write(context, QPC_MTU, ib_mtu);
@@ -5012,7 +5010,6 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
static bool check_qp_timeout_cfg_range(struct hns_roce_dev *hr_dev, u8 *timeout)
{
#define QP_ACK_TIMEOUT_MAX_HIP08 20
-#define QP_ACK_TIMEOUT_OFFSET 10
#define QP_ACK_TIMEOUT_MAX 31
if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
@@ -5021,7 +5018,7 @@ static bool check_qp_timeout_cfg_range(struct hns_roce_dev *hr_dev, u8 *timeout)
"local ACK timeout shall be 0 to 20.\n");
return false;
}
- *timeout += QP_ACK_TIMEOUT_OFFSET;
+ *timeout += HNS_ROCE_V2_QP_ACK_TIMEOUT_OFS_HIP08;
} else if (hr_dev->pci_dev->revision > PCI_REVISION_ID_HIP08) {
if (*timeout > QP_ACK_TIMEOUT_MAX) {
ibdev_warn(&hr_dev->ib_dev,
@@ -5307,6 +5304,18 @@ out:
return ret;
}
+static u8 get_qp_timeout_attr(struct hns_roce_dev *hr_dev,
+ struct hns_roce_v2_qp_context *context)
+{
+ u8 timeout;
+
+ timeout = (u8)hr_reg_read(context, QPC_AT);
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ timeout -= HNS_ROCE_V2_QP_ACK_TIMEOUT_OFS_HIP08;
+
+ return timeout;
+}
+
static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr)
@@ -5384,7 +5393,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
qp_attr->max_dest_rd_atomic = 1 << hr_reg_read(&context, QPC_RR_MAX);
qp_attr->min_rnr_timer = (u8)hr_reg_read(&context, QPC_MIN_RNR_TIME);
- qp_attr->timeout = (u8)hr_reg_read(&context, QPC_AT);
+ qp_attr->timeout = get_qp_timeout_attr(hr_dev, &context);
qp_attr->retry_cnt = hr_reg_read(&context, QPC_RETRY_NUM_INIT);
qp_attr->rnr_retry = hr_reg_read(&context, QPC_RNR_NUM_INIT);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 1b44d2434ab4..7033eae2407c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -44,6 +44,8 @@
#define HNS_ROCE_V2_MAX_XRCD_NUM 0x1000000
#define HNS_ROCE_V2_RSV_XRCD_NUM 0
+#define HNS_ROCE_V2_QP_ACK_TIMEOUT_OFS_HIP08 10
+
#define HNS_ROCE_V3_SCCC_SZ 64
#define HNS_ROCE_V3_GMV_ENTRY_SZ 32
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 37a5cf62f88b..14376490ac22 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -33,6 +33,7 @@
#include <linux/vmalloc.h>
#include <rdma/ib_umem.h>
+#include <linux/math.h>
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
#include "hns_roce_hem.h"
@@ -909,6 +910,44 @@ static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev,
return page_cnt;
}
+static u64 cal_pages_per_l1ba(unsigned int ba_per_bt, unsigned int hopnum)
+{
+ return int_pow(ba_per_bt, hopnum - 1);
+}
+
+static unsigned int cal_best_bt_pg_sz(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr,
+ unsigned int pg_shift)
+{
+ unsigned long cap = hr_dev->caps.page_size_cap;
+ struct hns_roce_buf_region *re;
+ unsigned int pgs_per_l1ba;
+ unsigned int ba_per_bt;
+ unsigned int ba_num;
+ int i;
+
+ for_each_set_bit_from(pg_shift, &cap, sizeof(cap) * BITS_PER_BYTE) {
+ if (!(BIT(pg_shift) & cap))
+ continue;
+
+ ba_per_bt = BIT(pg_shift) / BA_BYTE_LEN;
+ ba_num = 0;
+ for (i = 0; i < mtr->hem_cfg.region_count; i++) {
+ re = &mtr->hem_cfg.region[i];
+ if (re->hopnum == 0)
+ continue;
+
+ pgs_per_l1ba = cal_pages_per_l1ba(ba_per_bt, re->hopnum);
+ ba_num += DIV_ROUND_UP(re->count, pgs_per_l1ba);
+ }
+
+ if (ba_num <= ba_per_bt)
+ return pg_shift;
+ }
+
+ return 0;
+}
+
static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
unsigned int ba_page_shift)
{
@@ -917,6 +956,10 @@ static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
hns_roce_hem_list_init(&mtr->hem_list);
if (!cfg->is_direct) {
+ ba_page_shift = cal_best_bt_pg_sz(hr_dev, mtr, ba_page_shift);
+ if (!ba_page_shift)
+ return -ERANGE;
+
ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list,
cfg->region, cfg->region_count,
ba_page_shift);
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index ab5cdf782785..eaa12c124598 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -522,11 +522,6 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
if (!iwqp->user_mode)
cancel_delayed_work_sync(&iwqp->dwork_flush);
- irdma_qp_rem_ref(&iwqp->ibqp);
- wait_for_completion(&iwqp->free_qp);
- irdma_free_lsmm_rsrc(iwqp);
- irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp);
-
if (!iwqp->user_mode) {
if (iwqp->iwscq) {
irdma_clean_cqes(iwqp, iwqp->iwscq);
@@ -534,6 +529,12 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
irdma_clean_cqes(iwqp, iwqp->iwrcq);
}
}
+
+ irdma_qp_rem_ref(&iwqp->ibqp);
+ wait_for_completion(&iwqp->free_qp);
+ irdma_free_lsmm_rsrc(iwqp);
+ irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp);
+
irdma_remove_push_mmap_entries(iwqp);
irdma_free_qp_rsrc(iwqp);
@@ -3291,6 +3292,7 @@ static int irdma_post_send(struct ib_qp *ibqp,
break;
case IB_WR_LOCAL_INV:
info.op_type = IRDMA_OP_TYPE_INV_STAG;
+ info.local_fence = info.read_fence;
info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey;
err = irdma_uk_stag_local_invalidate(ukqp, &info, true);
break;
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
index 1c06920505d2..93257fa5aae8 100644
--- a/drivers/infiniband/hw/mlx5/counters.c
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -209,7 +209,8 @@ static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
!vport_qcounters_supported(dev)) || !port_num)
return &dev->port[0].cnts;
- return &dev->port[port_num - 1].cnts;
+ return is_mdev_switchdev_mode(dev->mdev) ?
+ &dev->port[1].cnts : &dev->port[port_num - 1].cnts;
}
/**
@@ -262,7 +263,7 @@ static struct rdma_hw_stats *
mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- const struct mlx5_ib_counters *cnts = &dev->port[port_num - 1].cnts;
+ const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
return do_alloc_stats(cnts);
}
@@ -329,6 +330,7 @@ static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev,
{
u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
+ struct mlx5_core_dev *mdev;
__be32 val;
int ret, i;
@@ -336,12 +338,16 @@ static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev,
dev->port[port_num].rep->vport == MLX5_VPORT_UPLINK)
return 0;
+ mdev = mlx5_eswitch_get_core_dev(dev->port[port_num].rep->esw);
+ if (!mdev)
+ return -EOPNOTSUPP;
+
MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
MLX5_SET(query_q_counter_in, in, other_vport, 1);
MLX5_SET(query_q_counter_in, in, vport_number,
dev->port[port_num].rep->vport);
MLX5_SET(query_q_counter_in, in, aggregate, 1);
- ret = mlx5_cmd_exec_inout(dev->mdev, query_q_counter, in, out);
+ ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
if (ret)
return ret;
@@ -575,43 +581,53 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
port_num != MLX5_VPORT_PF;
const struct mlx5_ib_counter *names;
- int j = 0, i;
+ int j = 0, i, size;
names = is_vport ? vport_basic_q_cnts : basic_q_cnts;
- for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
+ size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
+ ARRAY_SIZE(basic_q_cnts);
+ for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
- offsets[j] = basic_q_cnts[i].offset;
+ offsets[j] = names[i].offset;
}
names = is_vport ? vport_out_of_seq_q_cnts : out_of_seq_q_cnts;
+ size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
+ ARRAY_SIZE(out_of_seq_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
- for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
+ for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
- offsets[j] = out_of_seq_q_cnts[i].offset;
+ offsets[j] = names[i].offset;
}
}
names = is_vport ? vport_retrans_q_cnts : retrans_q_cnts;
+ size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
+ ARRAY_SIZE(retrans_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
- for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
+ for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
- offsets[j] = retrans_q_cnts[i].offset;
+ offsets[j] = names[i].offset;
}
}
names = is_vport ? vport_extended_err_cnts : extended_err_cnts;
+ size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
+ ARRAY_SIZE(extended_err_cnts);
if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
- for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
+ for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
- offsets[j] = extended_err_cnts[i].offset;
+ offsets[j] = names[i].offset;
}
}
names = is_vport ? vport_roce_accl_cnts : roce_accl_cnts;
+ size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
+ ARRAY_SIZE(roce_accl_cnts);
if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
- for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) {
+ for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
- offsets[j] = roce_accl_cnts[i].offset;
+ offsets[j] = names[i].offset;
}
}
@@ -661,25 +677,37 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
struct mlx5_ib_counters *cnts, u32 port_num)
{
- u32 num_counters, num_op_counters = 0;
+ bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
+ port_num != MLX5_VPORT_PF;
+ u32 num_counters, num_op_counters = 0, size;
- num_counters = ARRAY_SIZE(basic_q_cnts);
+ size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
+ ARRAY_SIZE(basic_q_cnts);
+ num_counters = size;
+ size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
+ ARRAY_SIZE(out_of_seq_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
- num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
+ num_counters += size;
+ size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
+ ARRAY_SIZE(retrans_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
- num_counters += ARRAY_SIZE(retrans_q_cnts);
+ num_counters += size;
+ size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
+ ARRAY_SIZE(extended_err_cnts);
if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
- num_counters += ARRAY_SIZE(extended_err_cnts);
+ num_counters += size;
+ size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
+ ARRAY_SIZE(roce_accl_cnts);
if (MLX5_CAP_GEN(dev->mdev, roce_accl))
- num_counters += ARRAY_SIZE(roce_accl_cnts);
+ num_counters += size;
cnts->num_q_counters = num_counters;
- if (is_mdev_switchdev_mode(dev->mdev) && port_num != MLX5_VPORT_PF)
+ if (is_vport)
goto skip_non_qcounters;
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
@@ -725,11 +753,11 @@ err:
static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
- int num_cnt_ports;
+ int num_cnt_ports = dev->num_ports;
int i, j;
- num_cnt_ports = (!is_mdev_switchdev_mode(dev->mdev) ||
- vport_qcounters_supported(dev)) ? dev->num_ports : 1;
+ if (is_mdev_switchdev_mode(dev->mdev))
+ num_cnt_ports = min(2, num_cnt_ports);
MLX5_SET(dealloc_q_counter_in, in, opcode,
MLX5_CMD_OP_DEALLOC_Q_COUNTER);
@@ -761,15 +789,22 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
{
u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
- int num_cnt_ports;
+ int num_cnt_ports = dev->num_ports;
int err = 0;
int i;
bool is_shared;
MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
- num_cnt_ports = (!is_mdev_switchdev_mode(dev->mdev) ||
- vport_qcounters_supported(dev)) ? dev->num_ports : 1;
+
+ /*
+ * In switchdev we need to allocate two ports, one that is used for
+ * the device Q_counters and it is essentially the real Q_counters of
+ * this device, while the other is used as a helper for PF to be able to
+ * query all other vports.
+ */
+ if (is_mdev_switchdev_mode(dev->mdev))
+ num_cnt_ports = min(2, num_cnt_ports);
for (i = 0; i < num_cnt_ports; i++) {
err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts, i);
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 3008632a6c20..1e419e080b53 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -695,8 +695,6 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_table *ft;
- if (mlx5_ib_shared_ft_allowed(&dev->ib_dev))
- ft_attr.uid = MLX5_SHARED_RESOURCE_UID;
ft_attr.prio = priority;
ft_attr.max_fte = num_entries;
ft_attr.flags = flags;
@@ -2025,6 +2023,237 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject,
return 0;
}
+static int steering_anchor_create_ft(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table *ft;
+
+ if (ft_prio->anchor.ft)
+ return 0;
+
+ ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
+ if (!ns)
+ return -EOPNOTSUPP;
+
+ ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
+ ft_attr.uid = MLX5_SHARED_RESOURCE_UID;
+ ft_attr.prio = 0;
+ ft_attr.max_fte = 2;
+ ft_attr.level = 1;
+
+ ft = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft))
+ return PTR_ERR(ft);
+
+ ft_prio->anchor.ft = ft;
+
+ return 0;
+}
+
+static void steering_anchor_destroy_ft(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.ft) {
+ mlx5_destroy_flow_table(ft_prio->anchor.ft);
+ ft_prio->anchor.ft = NULL;
+ }
+}
+
+static int
+steering_anchor_create_fg_drop(struct mlx5_ib_flow_prio *ft_prio)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ void *flow_group_in;
+ int err = 0;
+
+ if (ft_prio->anchor.fg_drop)
+ return 0;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+
+ fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in);
+ if (IS_ERR(fg)) {
+ err = PTR_ERR(fg);
+ goto out;
+ }
+
+ ft_prio->anchor.fg_drop = fg;
+
+out:
+ kvfree(flow_group_in);
+
+ return err;
+}
+
+static void
+steering_anchor_destroy_fg_drop(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.fg_drop) {
+ mlx5_destroy_flow_group(ft_prio->anchor.fg_drop);
+ ft_prio->anchor.fg_drop = NULL;
+ }
+}
+
+static int
+steering_anchor_create_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ void *flow_group_in;
+ int err = 0;
+
+ if (ft_prio->anchor.fg_goto_table)
+ return 0;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in);
+ if (IS_ERR(fg)) {
+ err = PTR_ERR(fg);
+ goto out;
+ }
+ ft_prio->anchor.fg_goto_table = fg;
+
+out:
+ kvfree(flow_group_in);
+
+ return err;
+}
+
+static void
+steering_anchor_destroy_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.fg_goto_table) {
+ mlx5_destroy_flow_group(ft_prio->anchor.fg_goto_table);
+ ft_prio->anchor.fg_goto_table = NULL;
+ }
+}
+
+static int
+steering_anchor_create_rule_drop(struct mlx5_ib_flow_prio *ft_prio)
+{
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *handle;
+
+ if (ft_prio->anchor.rule_drop)
+ return 0;
+
+ flow_act.fg = ft_prio->anchor.fg_drop;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act,
+ NULL, 0);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ ft_prio->anchor.rule_drop = handle;
+
+ return 0;
+}
+
+static void steering_anchor_destroy_rule_drop(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.rule_drop) {
+ mlx5_del_flow_rules(ft_prio->anchor.rule_drop);
+ ft_prio->anchor.rule_drop = NULL;
+ }
+}
+
+static int
+steering_anchor_create_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *handle;
+
+ if (ft_prio->anchor.rule_goto_table)
+ return 0;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ flow_act.fg = ft_prio->anchor.fg_goto_table;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = ft_prio->flow_table;
+
+ handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act,
+ &dest, 1);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ ft_prio->anchor.rule_goto_table = handle;
+
+ return 0;
+}
+
+static void
+steering_anchor_destroy_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.rule_goto_table) {
+ mlx5_del_flow_rules(ft_prio->anchor.rule_goto_table);
+ ft_prio->anchor.rule_goto_table = NULL;
+ }
+}
+
+static int steering_anchor_create_res(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ int err;
+
+ err = steering_anchor_create_ft(dev, ft_prio, ns_type);
+ if (err)
+ return err;
+
+ err = steering_anchor_create_fg_drop(ft_prio);
+ if (err)
+ goto destroy_ft;
+
+ err = steering_anchor_create_fg_goto_table(ft_prio);
+ if (err)
+ goto destroy_fg_drop;
+
+ err = steering_anchor_create_rule_drop(ft_prio);
+ if (err)
+ goto destroy_fg_goto_table;
+
+ err = steering_anchor_create_rule_goto_table(ft_prio);
+ if (err)
+ goto destroy_rule_drop;
+
+ return 0;
+
+destroy_rule_drop:
+ steering_anchor_destroy_rule_drop(ft_prio);
+destroy_fg_goto_table:
+ steering_anchor_destroy_fg_goto_table(ft_prio);
+destroy_fg_drop:
+ steering_anchor_destroy_fg_drop(ft_prio);
+destroy_ft:
+ steering_anchor_destroy_ft(ft_prio);
+
+ return err;
+}
+
+static void mlx5_steering_anchor_destroy_res(struct mlx5_ib_flow_prio *ft_prio)
+{
+ steering_anchor_destroy_rule_goto_table(ft_prio);
+ steering_anchor_destroy_rule_drop(ft_prio);
+ steering_anchor_destroy_fg_goto_table(ft_prio);
+ steering_anchor_destroy_fg_drop(ft_prio);
+ steering_anchor_destroy_ft(ft_prio);
+}
+
static int steering_anchor_cleanup(struct ib_uobject *uobject,
enum rdma_remove_reason why,
struct uverbs_attr_bundle *attrs)
@@ -2035,6 +2264,9 @@ static int steering_anchor_cleanup(struct ib_uobject *uobject,
return -EBUSY;
mutex_lock(&obj->dev->flow_db->lock);
+ if (!--obj->ft_prio->anchor.rule_goto_table_ref)
+ steering_anchor_destroy_rule_goto_table(obj->ft_prio);
+
put_flow_table(obj->dev, obj->ft_prio, true);
mutex_unlock(&obj->dev->flow_db->lock);
@@ -2042,6 +2274,24 @@ static int steering_anchor_cleanup(struct ib_uobject *uobject,
return 0;
}
+static void fs_cleanup_anchor(struct mlx5_ib_flow_prio *prio,
+ int count)
+{
+ while (count--)
+ mlx5_steering_anchor_destroy_res(&prio[count]);
+}
+
+void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev)
+{
+ fs_cleanup_anchor(dev->flow_db->prios, MLX5_IB_NUM_FLOW_FT);
+ fs_cleanup_anchor(dev->flow_db->egress_prios, MLX5_IB_NUM_FLOW_FT);
+ fs_cleanup_anchor(dev->flow_db->sniffer, MLX5_IB_NUM_SNIFFER_FTS);
+ fs_cleanup_anchor(dev->flow_db->egress, MLX5_IB_NUM_EGRESS_FTS);
+ fs_cleanup_anchor(dev->flow_db->fdb, MLX5_IB_NUM_FDB_FTS);
+ fs_cleanup_anchor(dev->flow_db->rdma_rx, MLX5_IB_NUM_FLOW_FT);
+ fs_cleanup_anchor(dev->flow_db->rdma_tx, MLX5_IB_NUM_FLOW_FT);
+}
+
static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
struct mlx5_ib_flow_matcher *obj)
{
@@ -2182,21 +2432,31 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
return -ENOMEM;
mutex_lock(&dev->flow_db->lock);
+
ft_prio = _get_flow_table(dev, priority, ns_type, 0);
if (IS_ERR(ft_prio)) {
- mutex_unlock(&dev->flow_db->lock);
err = PTR_ERR(ft_prio);
goto free_obj;
}
ft_prio->refcount++;
- ft_id = mlx5_flow_table_id(ft_prio->flow_table);
- mutex_unlock(&dev->flow_db->lock);
+
+ if (!ft_prio->anchor.rule_goto_table_ref) {
+ err = steering_anchor_create_res(dev, ft_prio, ns_type);
+ if (err)
+ goto put_flow_table;
+ }
+
+ ft_prio->anchor.rule_goto_table_ref++;
+
+ ft_id = mlx5_flow_table_id(ft_prio->anchor.ft);
err = uverbs_copy_to(attrs, MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
&ft_id, sizeof(ft_id));
if (err)
- goto put_flow_table;
+ goto destroy_res;
+
+ mutex_unlock(&dev->flow_db->lock);
uobj->object = obj;
obj->dev = dev;
@@ -2205,8 +2465,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
return 0;
+destroy_res:
+ --ft_prio->anchor.rule_goto_table_ref;
+ mlx5_steering_anchor_destroy_res(ft_prio);
put_flow_table:
- mutex_lock(&dev->flow_db->lock);
put_flow_table(dev, ft_prio, true);
mutex_unlock(&dev->flow_db->lock);
free_obj:
diff --git a/drivers/infiniband/hw/mlx5/fs.h b/drivers/infiniband/hw/mlx5/fs.h
index ad320adaf321..b9734904f5f0 100644
--- a/drivers/infiniband/hw/mlx5/fs.h
+++ b/drivers/infiniband/hw/mlx5/fs.h
@@ -10,6 +10,7 @@
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
int mlx5_ib_fs_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev);
#else
static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
{
@@ -21,9 +22,24 @@ static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
mutex_init(&dev->flow_db->lock);
return 0;
}
+
+inline void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev) {}
#endif
+
static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev)
{
+ /* When a steering anchor is created, a special flow table is also
+ * created for the user to reference. Since the user can reference it,
+ * the kernel cannot trust that when the user destroys the steering
+ * anchor, they no longer reference the flow table.
+ *
+ * To address this issue, when a user destroys a steering anchor, only
+ * the flow steering rule in the table is destroyed, but the table
+ * itself is kept to deal with the above scenario. The remaining
+ * resources are only removed when the RDMA device is destroyed, which
+ * is a safe assumption that all references are gone.
+ */
+ mlx5_ib_fs_cleanup_anchor(dev);
kfree(dev->flow_db);
}
#endif /* _MLX5_IB_FS_H */
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 5d45de223c43..f0b394ed7452 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -4275,6 +4275,9 @@ const struct mlx5_ib_profile raw_eth_profile = {
STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
mlx5_ib_stage_post_ib_reg_umr_init,
NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
+ mlx5_ib_stage_delay_drop_init,
+ mlx5_ib_stage_delay_drop_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_RESTRACK,
mlx5_ib_restrack_init,
NULL),
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index efa4dc6e7dee..2dfa6f49a6f4 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -237,8 +237,19 @@ enum {
#define MLX5_IB_NUM_SNIFFER_FTS 2
#define MLX5_IB_NUM_EGRESS_FTS 1
#define MLX5_IB_NUM_FDB_FTS MLX5_BY_PASS_NUM_REGULAR_PRIOS
+
+struct mlx5_ib_anchor {
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg_goto_table;
+ struct mlx5_flow_group *fg_drop;
+ struct mlx5_flow_handle *rule_goto_table;
+ struct mlx5_flow_handle *rule_drop;
+ unsigned int rule_goto_table_ref;
+};
+
struct mlx5_ib_flow_prio {
struct mlx5_flow_table *flow_table;
+ struct mlx5_ib_anchor anchor;
unsigned int refcount;
};
@@ -1587,6 +1598,9 @@ static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev)
MLX5_CAP_PORT_SELECTION(dev->mdev, port_select_flow_table_bypass))
return 0;
+ if (mlx5_lag_is_lacp_owner(dev->mdev) && !dev->lag_active)
+ return 0;
+
return dev->lag_active ||
(MLX5_CAP_GEN(dev->mdev, num_lag_ports) > 1 &&
MLX5_CAP_GEN(dev->mdev, lag_tx_port_affinity));
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 70ca8ffa9256..78b96bfb4e6a 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1237,6 +1237,9 @@ static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid);
MLX5_SET(tisc, tisc, transport_domain, tdn);
+ if (!mlx5_ib_lag_should_assign_affinity(dev) &&
+ mlx5_lag_is_lacp_owner(dev->mdev))
+ MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
if (qp->flags & IB_QP_CREATE_SOURCE_QPN)
MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn);
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index db18ace74d2b..f46c5a5fd0ae 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -115,15 +115,16 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode)
void retransmit_timer(struct timer_list *t)
{
struct rxe_qp *qp = from_timer(qp, t, retrans_timer);
+ unsigned long flags;
rxe_dbg_qp(qp, "retransmit timer fired\n");
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (qp->valid) {
qp->comp.timeout = 1;
rxe_sched_task(&qp->comp.task);
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
@@ -481,11 +482,13 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
static void comp_check_sq_drain_done(struct rxe_qp *qp)
{
- spin_lock_bh(&qp->state_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->state_lock, flags);
if (unlikely(qp_state(qp) == IB_QPS_SQD)) {
if (qp->attr.sq_draining && qp->comp.psn == qp->req.psn) {
qp->attr.sq_draining = 0;
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (qp->ibqp.event_handler) {
struct ib_event ev;
@@ -499,7 +502,7 @@ static void comp_check_sq_drain_done(struct rxe_qp *qp)
return;
}
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
static inline enum comp_state complete_ack(struct rxe_qp *qp,
@@ -625,13 +628,15 @@ static void free_pkt(struct rxe_pkt_info *pkt)
*/
static void reset_retry_timer(struct rxe_qp *qp)
{
+ unsigned long flags;
+
if (qp_type(qp) == IB_QPT_RC && qp->qp_timeout_jiffies) {
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (qp_state(qp) >= IB_QPS_RTS &&
psn_compare(qp->req.psn, qp->comp.psn) > 0)
mod_timer(&qp->retrans_timer,
jiffies + qp->qp_timeout_jiffies);
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
}
@@ -643,18 +648,19 @@ int rxe_completer(struct rxe_qp *qp)
struct rxe_pkt_info *pkt = NULL;
enum comp_state state;
int ret;
+ unsigned long flags;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (!qp->valid || qp_state(qp) == IB_QPS_ERR ||
qp_state(qp) == IB_QPS_RESET) {
bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR);
drain_resp_pkts(qp);
flush_send_queue(qp, notify);
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
goto exit;
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (qp->comp.timeout) {
qp->comp.timeout_retry = 1;
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index 20ff0c0c4605..6ca2a05b6a2a 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -113,8 +113,6 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT);
- spin_unlock_irqrestore(&cq->cq_lock, flags);
-
if ((cq->notify == IB_CQ_NEXT_COMP) ||
(cq->notify == IB_CQ_SOLICITED && solicited)) {
cq->notify = 0;
@@ -122,6 +120,8 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+
return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 2bc7361152ea..cd59666158b1 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -159,6 +159,9 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
pkt->mask = RXE_GRH_MASK;
pkt->paylen = be16_to_cpu(udph->len) - sizeof(*udph);
+ /* remove udp header */
+ skb_pull(skb, sizeof(struct udphdr));
+
rxe_rcv(skb);
return 0;
@@ -401,6 +404,9 @@ static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt)
return -EIO;
}
+ /* remove udp header */
+ skb_pull(skb, sizeof(struct udphdr));
+
rxe_rcv(skb);
return 0;
@@ -412,15 +418,16 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
int err;
int is_request = pkt->mask & RXE_REQ_MASK;
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ unsigned long flags;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if ((is_request && (qp_state(qp) < IB_QPS_RTS)) ||
(!is_request && (qp_state(qp) < IB_QPS_RTR))) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
rxe_dbg_qp(qp, "Packet dropped. QP is not in ready state\n");
goto drop;
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
rxe_icrc_generate(skb, pkt);
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index c5451a4488ca..a0f206431cf8 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -176,6 +176,9 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
spin_lock_init(&qp->rq.producer_lock);
spin_lock_init(&qp->rq.consumer_lock);
+ skb_queue_head_init(&qp->req_pkts);
+ skb_queue_head_init(&qp->resp_pkts);
+
atomic_set(&qp->ssn, 0);
atomic_set(&qp->skb_out, 0);
}
@@ -234,8 +237,6 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
qp->req.opcode = -1;
qp->comp.opcode = -1;
- skb_queue_head_init(&qp->req_pkts);
-
rxe_init_task(&qp->req.task, qp, rxe_requester);
rxe_init_task(&qp->comp.task, qp, rxe_completer);
@@ -279,8 +280,6 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
}
}
- skb_queue_head_init(&qp->resp_pkts);
-
rxe_init_task(&qp->resp.task, qp, rxe_responder);
qp->resp.opcode = OPCODE_NONE;
@@ -300,6 +299,7 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
struct rxe_cq *rcq = to_rcq(init->recv_cq);
struct rxe_cq *scq = to_rcq(init->send_cq);
struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL;
+ unsigned long flags;
rxe_get(pd);
rxe_get(rcq);
@@ -325,10 +325,10 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
if (err)
goto err2;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
qp->attr.qp_state = IB_QPS_RESET;
qp->valid = 1;
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return 0;
@@ -492,24 +492,28 @@ static void rxe_qp_reset(struct rxe_qp *qp)
/* move the qp to the error state */
void rxe_qp_error(struct rxe_qp *qp)
{
- spin_lock_bh(&qp->state_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->state_lock, flags);
qp->attr.qp_state = IB_QPS_ERR;
/* drain work and packet queues */
rxe_sched_task(&qp->resp.task);
rxe_sched_task(&qp->comp.task);
rxe_sched_task(&qp->req.task);
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
static void rxe_qp_sqd(struct rxe_qp *qp, struct ib_qp_attr *attr,
int mask)
{
- spin_lock_bh(&qp->state_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->state_lock, flags);
qp->attr.sq_draining = 1;
rxe_sched_task(&qp->comp.task);
rxe_sched_task(&qp->req.task);
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
/* caller should hold qp->state_lock */
@@ -555,14 +559,16 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
qp->attr.cur_qp_state = attr->qp_state;
if (mask & IB_QP_STATE) {
- spin_lock_bh(&qp->state_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->state_lock, flags);
err = __qp_chk_state(qp, attr, mask);
if (!err) {
qp->attr.qp_state = attr->qp_state;
rxe_dbg_qp(qp, "state -> %s\n",
qps2str[attr->qp_state]);
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (err)
return err;
@@ -688,6 +694,8 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
/* called by the query qp verb */
int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
{
+ unsigned long flags;
+
*attr = qp->attr;
attr->rq_psn = qp->resp.psn;
@@ -708,12 +716,13 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
/* Applications that get this state typically spin on it.
* Yield the processor
*/
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (qp->attr.sq_draining) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
cond_resched();
+ } else {
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
- spin_unlock_bh(&qp->state_lock);
return 0;
}
@@ -736,10 +745,11 @@ int rxe_qp_chk_destroy(struct rxe_qp *qp)
static void rxe_qp_do_cleanup(struct work_struct *work)
{
struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work);
+ unsigned long flags;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
qp->valid = 0;
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
qp->qp_timeout_jiffies = 0;
if (qp_type(qp) == IB_QPT_RC) {
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 2f953cc74256..5861e4244049 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -14,6 +14,7 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct rxe_qp *qp)
{
unsigned int pkt_type;
+ unsigned long flags;
if (unlikely(!qp->valid))
return -EINVAL;
@@ -38,19 +39,19 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
return -EINVAL;
}
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (pkt->mask & RXE_REQ_MASK) {
if (unlikely(qp_state(qp) < IB_QPS_RTR)) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return -EINVAL;
}
} else {
if (unlikely(qp_state(qp) < IB_QPS_RTS)) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return -EINVAL;
}
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 65134a9aefe7..5fe7cbae3031 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -99,17 +99,18 @@ static void req_retry(struct rxe_qp *qp)
void rnr_nak_timer(struct timer_list *t)
{
struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer);
+ unsigned long flags;
rxe_dbg_qp(qp, "nak timer fired\n");
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (qp->valid) {
/* request a send queue retry */
qp->req.need_retry = 1;
qp->req.wait_for_rnr_timer = 0;
rxe_sched_task(&qp->req.task);
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
static void req_check_sq_drain_done(struct rxe_qp *qp)
@@ -118,8 +119,9 @@ static void req_check_sq_drain_done(struct rxe_qp *qp)
unsigned int index;
unsigned int cons;
struct rxe_send_wqe *wqe;
+ unsigned long flags;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (qp_state(qp) == IB_QPS_SQD) {
q = qp->sq.queue;
index = qp->req.wqe_index;
@@ -140,7 +142,7 @@ static void req_check_sq_drain_done(struct rxe_qp *qp)
break;
qp->attr.sq_draining = 0;
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (qp->ibqp.event_handler) {
struct ib_event ev;
@@ -154,7 +156,7 @@ static void req_check_sq_drain_done(struct rxe_qp *qp)
return;
} while (0);
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
static struct rxe_send_wqe *__req_next_wqe(struct rxe_qp *qp)
@@ -173,6 +175,7 @@ static struct rxe_send_wqe *__req_next_wqe(struct rxe_qp *qp)
static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
{
struct rxe_send_wqe *wqe;
+ unsigned long flags;
req_check_sq_drain_done(qp);
@@ -180,13 +183,13 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
if (wqe == NULL)
return NULL;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (unlikely((qp_state(qp) == IB_QPS_SQD) &&
(wqe->state != wqe_state_processing))) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return NULL;
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp);
return wqe;
@@ -676,16 +679,17 @@ int rxe_requester(struct rxe_qp *qp)
struct rxe_queue *q = qp->sq.queue;
struct rxe_ah *ah;
struct rxe_av *av;
+ unsigned long flags;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (unlikely(!qp->valid)) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
goto exit;
}
if (unlikely(qp_state(qp) == IB_QPS_ERR)) {
wqe = __req_next_wqe(qp);
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (wqe)
goto err;
else
@@ -700,10 +704,10 @@ int rxe_requester(struct rxe_qp *qp)
qp->req.wait_psn = 0;
qp->req.need_retry = 0;
qp->req.wait_for_rnr_timer = 0;
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
goto exit;
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
/* we come here if the retransmit timer has fired
* or if the rnr timer has fired. If the retransmit
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 68f6cd188d8e..ee68306555b9 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -489,8 +489,9 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
if (mw->access & IB_ZERO_BASED)
qp->resp.offset = mw->addr;
- rxe_put(mw);
rxe_get(mr);
+ rxe_put(mw);
+ mw = NULL;
} else {
mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE);
if (!mr) {
@@ -1047,6 +1048,7 @@ static enum resp_states do_complete(struct rxe_qp *qp,
struct ib_uverbs_wc *uwc = &cqe.uibwc;
struct rxe_recv_wqe *wqe = qp->resp.wqe;
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ unsigned long flags;
if (!wqe)
goto finish;
@@ -1137,12 +1139,12 @@ static enum resp_states do_complete(struct rxe_qp *qp,
return RESPST_ERR_CQ_OVERFLOW;
finish:
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (unlikely(qp_state(qp) == IB_QPS_ERR)) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return RESPST_CHK_RESOURCE;
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (unlikely(!pkt))
return RESPST_DONE;
@@ -1468,18 +1470,19 @@ int rxe_responder(struct rxe_qp *qp)
enum resp_states state;
struct rxe_pkt_info *pkt = NULL;
int ret;
+ unsigned long flags;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (!qp->valid || qp_state(qp) == IB_QPS_ERR ||
qp_state(qp) == IB_QPS_RESET) {
bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR);
drain_req_pkts(qp);
flush_recv_queue(qp, notify);
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
goto exit;
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index dea605b7f683..4d8f6b8051ff 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -904,10 +904,10 @@ static int rxe_post_send_kernel(struct rxe_qp *qp,
if (!err)
rxe_sched_task(&qp->req.task);
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (qp_state(qp) == IB_QPS_ERR)
rxe_sched_task(&qp->comp.task);
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return err;
}
@@ -917,22 +917,23 @@ static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
{
struct rxe_qp *qp = to_rqp(ibqp);
int err;
+ unsigned long flags;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
/* caller has already called destroy_qp */
if (WARN_ON_ONCE(!qp->valid)) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
rxe_err_qp(qp, "qp has been destroyed");
return -EINVAL;
}
if (unlikely(qp_state(qp) < IB_QPS_RTS)) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
*bad_wr = wr;
rxe_err_qp(qp, "qp not ready to send");
return -EINVAL;
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (qp->is_user) {
/* Utilize process context to do protocol processing */
@@ -1008,22 +1009,22 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
struct rxe_rq *rq = &qp->rq;
unsigned long flags;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
/* caller has already called destroy_qp */
if (WARN_ON_ONCE(!qp->valid)) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
rxe_err_qp(qp, "qp has been destroyed");
return -EINVAL;
}
/* see C10-97.2.1 */
if (unlikely((qp_state(qp) < IB_QPS_INIT))) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
*bad_wr = wr;
rxe_dbg_qp(qp, "qp not ready to post recv");
return -EINVAL;
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (unlikely(qp->srq)) {
*bad_wr = wr;
@@ -1044,10 +1045,10 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
spin_unlock_irqrestore(&rq->producer_lock, flags);
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (qp_state(qp) == IB_QPS_ERR)
rxe_sched_task(&qp->resp.task);
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return err;
}
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index f290cd49698e..92e1e7587af8 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -657,9 +657,13 @@ static int
isert_connect_error(struct rdma_cm_id *cma_id)
{
struct isert_conn *isert_conn = cma_id->qp->qp_context;
+ struct isert_np *isert_np = cma_id->context;
ib_drain_qp(isert_conn->qp);
+
+ mutex_lock(&isert_np->mutex);
list_del_init(&isert_conn->node);
+ mutex_unlock(&isert_np->mutex);
isert_conn->cm_id = NULL;
isert_put_conn(isert_conn);
@@ -2431,6 +2435,7 @@ isert_free_np(struct iscsi_np *np)
{
struct isert_np *isert_np = np->np_context;
struct isert_conn *isert_conn, *n;
+ LIST_HEAD(drop_conn_list);
if (isert_np->cm_id)
rdma_destroy_id(isert_np->cm_id);
@@ -2450,7 +2455,7 @@ isert_free_np(struct iscsi_np *np)
node) {
isert_info("cleaning isert_conn %p state (%d)\n",
isert_conn, isert_conn->state);
- isert_connect_release(isert_conn);
+ list_move_tail(&isert_conn->node, &drop_conn_list);
}
}
@@ -2461,11 +2466,16 @@ isert_free_np(struct iscsi_np *np)
node) {
isert_info("cleaning isert_conn %p state (%d)\n",
isert_conn, isert_conn->state);
- isert_connect_release(isert_conn);
+ list_move_tail(&isert_conn->node, &drop_conn_list);
}
}
mutex_unlock(&isert_np->mutex);
+ list_for_each_entry_safe(isert_conn, n, &drop_conn_list, node) {
+ list_del_init(&isert_conn->node);
+ isert_connect_release(isert_conn);
+ }
+
np->np_context = NULL;
kfree(isert_np);
}
@@ -2560,8 +2570,6 @@ static void isert_wait_conn(struct iscsit_conn *conn)
isert_put_unsol_pending_cmds(conn);
isert_wait4cmds(conn);
isert_wait4logout(isert_conn);
-
- queue_work(isert_release_wq, &isert_conn->release_work);
}
static void isert_free_conn(struct iscsit_conn *conn)
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index edb2e3a25880..cfb50bfe53c3 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -2040,6 +2040,7 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id,
return 0;
}
+/* The caller should do the cleanup in case of error */
static int create_cm(struct rtrs_clt_con *con)
{
struct rtrs_path *s = con->c.path;
@@ -2062,14 +2063,14 @@ static int create_cm(struct rtrs_clt_con *con)
err = rdma_set_reuseaddr(cm_id, 1);
if (err != 0) {
rtrs_err(s, "Set address reuse failed, err: %d\n", err);
- goto destroy_cm;
+ return err;
}
err = rdma_resolve_addr(cm_id, (struct sockaddr *)&clt_path->s.src_addr,
(struct sockaddr *)&clt_path->s.dst_addr,
RTRS_CONNECT_TIMEOUT_MS);
if (err) {
rtrs_err(s, "Failed to resolve address, err: %d\n", err);
- goto destroy_cm;
+ return err;
}
/*
* Combine connection status and session events. This is needed
@@ -2084,29 +2085,15 @@ static int create_cm(struct rtrs_clt_con *con)
if (err == 0)
err = -ETIMEDOUT;
/* Timedout or interrupted */
- goto errr;
- }
- if (con->cm_err < 0) {
- err = con->cm_err;
- goto errr;
+ return err;
}
- if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTING) {
+ if (con->cm_err < 0)
+ return con->cm_err;
+ if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTING)
/* Device removal */
- err = -ECONNABORTED;
- goto errr;
- }
+ return -ECONNABORTED;
return 0;
-
-errr:
- stop_cm(con);
- mutex_lock(&con->con_mutex);
- destroy_con_cq_qp(con);
- mutex_unlock(&con->con_mutex);
-destroy_cm:
- destroy_cm(con);
-
- return err;
}
static void rtrs_clt_path_up(struct rtrs_clt_path *clt_path)
@@ -2334,7 +2321,7 @@ static void rtrs_clt_close_work(struct work_struct *work)
static int init_conns(struct rtrs_clt_path *clt_path)
{
unsigned int cid;
- int err;
+ int err, i;
/*
* On every new session connections increase reconnect counter
@@ -2350,10 +2337,8 @@ static int init_conns(struct rtrs_clt_path *clt_path)
goto destroy;
err = create_cm(to_clt_con(clt_path->s.con[cid]));
- if (err) {
- destroy_con(to_clt_con(clt_path->s.con[cid]));
+ if (err)
goto destroy;
- }
}
err = alloc_path_reqs(clt_path);
if (err)
@@ -2364,15 +2349,21 @@ static int init_conns(struct rtrs_clt_path *clt_path)
return 0;
destroy:
- while (cid--) {
- struct rtrs_clt_con *con = to_clt_con(clt_path->s.con[cid]);
+ /* Make sure we do the cleanup in the order they are created */
+ for (i = 0; i <= cid; i++) {
+ struct rtrs_clt_con *con;
- stop_cm(con);
+ if (!clt_path->s.con[i])
+ break;
- mutex_lock(&con->con_mutex);
- destroy_con_cq_qp(con);
- mutex_unlock(&con->con_mutex);
- destroy_cm(con);
+ con = to_clt_con(clt_path->s.con[i]);
+ if (con->c.cm_id) {
+ stop_cm(con);
+ mutex_lock(&con->con_mutex);
+ destroy_con_cq_qp(con);
+ mutex_unlock(&con->con_mutex);
+ destroy_cm(con);
+ }
destroy_con(con);
}
/*
diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c
index 4bf9d868cc52..3696f367ff51 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs.c
@@ -37,8 +37,10 @@ struct rtrs_iu *rtrs_iu_alloc(u32 iu_num, size_t size, gfp_t gfp_mask,
goto err;
iu->dma_addr = ib_dma_map_single(dma_dev, iu->buf, size, dir);
- if (ib_dma_mapping_error(dma_dev, iu->dma_addr))
+ if (ib_dma_mapping_error(dma_dev, iu->dma_addr)) {
+ kfree(iu->buf);
goto err;
+ }
iu->cqe.done = done;
iu->size = size;
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 37e876d45eb9..641eb86f276e 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -703,7 +703,7 @@ void input_close_device(struct input_handle *handle)
__input_release_device(handle);
- if (!dev->inhibited && !--dev->users) {
+ if (!--dev->users && !dev->inhibited) {
if (dev->poller)
input_dev_poller_stop(dev->poller);
if (dev->close)
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 28be88e0e96a..f33622fe946f 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -281,7 +281,6 @@ static const struct xpad_device {
{ 0x1430, 0xf801, "RedOctane Controller", 0, XTYPE_XBOX360 },
{ 0x146b, 0x0601, "BigBen Interactive XBOX 360 Controller", 0, XTYPE_XBOX360 },
{ 0x146b, 0x0604, "Bigben Interactive DAIJA Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
- { 0x1532, 0x0037, "Razer Sabertooth", 0, XTYPE_XBOX360 },
{ 0x1532, 0x0a00, "Razer Atrox Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE },
{ 0x1532, 0x0a03, "Razer Wildcat", 0, XTYPE_XBOXONE },
{ 0x15e4, 0x3f00, "Power A Mini Pro Elite", 0, XTYPE_XBOX360 },
diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c
index 09489380afda..e79f5497948b 100644
--- a/drivers/input/misc/soc_button_array.c
+++ b/drivers/input/misc/soc_button_array.c
@@ -109,6 +109,27 @@ static const struct dmi_system_id dmi_use_low_level_irq[] = {
};
/*
+ * Some devices have a wrong entry which points to a GPIO which is
+ * required in another driver, so this driver must not claim it.
+ */
+static const struct dmi_system_id dmi_invalid_acpi_index[] = {
+ {
+ /*
+ * Lenovo Yoga Book X90F / X90L, the PNP0C40 home button entry
+ * points to a GPIO which is not a home button and which is
+ * required by the lenovo-yogabook driver.
+ */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "YETI-11"),
+ },
+ .driver_data = (void *)1l,
+ },
+ {} /* Terminating entry */
+};
+
+/*
* Get the Nth GPIO number from the ACPI object.
*/
static int soc_button_lookup_gpio(struct device *dev, int acpi_index,
@@ -137,6 +158,8 @@ soc_button_device_create(struct platform_device *pdev,
struct platform_device *pd;
struct gpio_keys_button *gpio_keys;
struct gpio_keys_platform_data *gpio_keys_pdata;
+ const struct dmi_system_id *dmi_id;
+ int invalid_acpi_index = -1;
int error, gpio, irq;
int n_buttons = 0;
@@ -154,10 +177,17 @@ soc_button_device_create(struct platform_device *pdev,
gpio_keys = (void *)(gpio_keys_pdata + 1);
n_buttons = 0;
+ dmi_id = dmi_first_match(dmi_invalid_acpi_index);
+ if (dmi_id)
+ invalid_acpi_index = (long)dmi_id->driver_data;
+
for (info = button_info; info->name; info++) {
if (info->autorepeat != autorepeat)
continue;
+ if (info->acpi_index == invalid_acpi_index)
+ continue;
+
error = soc_button_lookup_gpio(&pdev->dev, info->acpi_index, &gpio, &irq);
if (error || irq < 0) {
/*
diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index ece97f8c6a3e..2118b2075f43 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -674,10 +674,11 @@ static void process_packet_head_v4(struct psmouse *psmouse)
struct input_dev *dev = psmouse->dev;
struct elantech_data *etd = psmouse->private;
unsigned char *packet = psmouse->packet;
- int id = ((packet[3] & 0xe0) >> 5) - 1;
+ int id;
int pres, traces;
- if (id < 0)
+ id = ((packet[3] & 0xe0) >> 5) - 1;
+ if (id < 0 || id >= ETP_MAX_FINGERS)
return;
etd->mt[id].x = ((packet[1] & 0x0f) << 8) | packet[2];
@@ -707,7 +708,7 @@ static void process_packet_motion_v4(struct psmouse *psmouse)
int id, sid;
id = ((packet[0] & 0xe0) >> 5) - 1;
- if (id < 0)
+ if (id < 0 || id >= ETP_MAX_FINGERS)
return;
sid = ((packet[3] & 0xe0) >> 5) - 1;
@@ -728,7 +729,7 @@ static void process_packet_motion_v4(struct psmouse *psmouse)
input_report_abs(dev, ABS_MT_POSITION_X, etd->mt[id].x);
input_report_abs(dev, ABS_MT_POSITION_Y, etd->mt[id].y);
- if (sid >= 0) {
+ if (sid >= 0 && sid < ETP_MAX_FINGERS) {
etd->mt[sid].x += delta_x2 * weight;
etd->mt[sid].y -= delta_y2 * weight;
input_mt_slot(dev, sid);
diff --git a/drivers/input/touchscreen/cyttsp5.c b/drivers/input/touchscreen/cyttsp5.c
index 30102cb80fac..3c9d07218f48 100644
--- a/drivers/input/touchscreen/cyttsp5.c
+++ b/drivers/input/touchscreen/cyttsp5.c
@@ -560,7 +560,7 @@ static int cyttsp5_hid_output_get_sysinfo(struct cyttsp5 *ts)
static int cyttsp5_hid_output_bl_launch_app(struct cyttsp5 *ts)
{
int rc;
- u8 cmd[HID_OUTPUT_BL_LAUNCH_APP];
+ u8 cmd[HID_OUTPUT_BL_LAUNCH_APP_SIZE];
u16 crc;
put_unaligned_le16(HID_OUTPUT_BL_LAUNCH_APP_SIZE, cmd);
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index db98c3f86e8c..4d800601e8ec 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -282,6 +282,7 @@ config EXYNOS_IOMMU_DEBUG
config IPMMU_VMSA
bool "Renesas VMSA-compatible IPMMU"
depends on ARCH_RENESAS || COMPILE_TEST
+ depends on ARM || ARM64 || COMPILE_TEST
depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE
select IOMMU_API
select IOMMU_IO_PGTABLE_LPAE
@@ -417,22 +418,6 @@ config S390_IOMMU
help
Support for the IOMMU API for s390 PCI devices.
-config S390_CCW_IOMMU
- bool "S390 CCW IOMMU Support"
- depends on S390 && CCW || COMPILE_TEST
- select IOMMU_API
- help
- Enables bits of IOMMU API required by VFIO. The iommu_ops
- is not implemented as it is not necessary for VFIO.
-
-config S390_AP_IOMMU
- bool "S390 AP IOMMU Support"
- depends on S390 && ZCRYPT || COMPILE_TEST
- select IOMMU_API
- help
- Enables bits of IOMMU API required by VFIO. The iommu_ops
- is not implemented as it is not necessary for VFIO.
-
config MTK_IOMMU
tristate "MediaTek IOMMU Support"
depends on ARCH_MEDIATEK || COMPILE_TEST
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index e98f20a9bdd8..9beeceb9d825 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -15,9 +15,7 @@ extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
extern void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid);
extern void amd_iommu_restart_event_logging(struct amd_iommu *iommu);
-extern int amd_iommu_init_devices(void);
-extern void amd_iommu_uninit_devices(void);
-extern void amd_iommu_init_notifier(void);
+extern void amd_iommu_restart_ga_log(struct amd_iommu *iommu);
extern void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid);
#ifdef CONFIG_AMD_IOMMU_DEBUGFS
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 329a406cc37d..c2d80a4e5fb0 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -759,6 +759,30 @@ void amd_iommu_restart_event_logging(struct amd_iommu *iommu)
}
/*
+ * This function restarts event logging in case the IOMMU experienced
+ * an GA log overflow.
+ */
+void amd_iommu_restart_ga_log(struct amd_iommu *iommu)
+{
+ u32 status;
+
+ status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
+ if (status & MMIO_STATUS_GALOG_RUN_MASK)
+ return;
+
+ pr_info_ratelimited("IOMMU GA Log restarting\n");
+
+ iommu_feature_disable(iommu, CONTROL_GALOG_EN);
+ iommu_feature_disable(iommu, CONTROL_GAINT_EN);
+
+ writel(MMIO_STATUS_GALOG_OVERFLOW_MASK,
+ iommu->mmio_base + MMIO_STATUS_OFFSET);
+
+ iommu_feature_enable(iommu, CONTROL_GAINT_EN);
+ iommu_feature_enable(iommu, CONTROL_GALOG_EN);
+}
+
+/*
* This function resets the command buffer if the IOMMU stopped fetching
* commands from it.
*/
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 4a314647d1f7..e8a2e5984acb 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -845,6 +845,7 @@ amd_iommu_set_pci_msi_domain(struct device *dev, struct amd_iommu *iommu) { }
(MMIO_STATUS_EVT_OVERFLOW_INT_MASK | \
MMIO_STATUS_EVT_INT_MASK | \
MMIO_STATUS_PPR_INT_MASK | \
+ MMIO_STATUS_GALOG_OVERFLOW_MASK | \
MMIO_STATUS_GALOG_INT_MASK)
irqreturn_t amd_iommu_int_thread(int irq, void *data)
@@ -868,10 +869,16 @@ irqreturn_t amd_iommu_int_thread(int irq, void *data)
}
#ifdef CONFIG_IRQ_REMAP
- if (status & MMIO_STATUS_GALOG_INT_MASK) {
+ if (status & (MMIO_STATUS_GALOG_INT_MASK |
+ MMIO_STATUS_GALOG_OVERFLOW_MASK)) {
pr_devel("Processing IOMMU GA Log\n");
iommu_poll_ga_log(iommu);
}
+
+ if (status & MMIO_STATUS_GALOG_OVERFLOW_MASK) {
+ pr_info_ratelimited("IOMMU GA Log overflow\n");
+ amd_iommu_restart_ga_log(iommu);
+ }
#endif
if (status & MMIO_STATUS_EVT_OVERFLOW_INT_MASK) {
@@ -2067,14 +2074,10 @@ static struct protection_domain *protection_domain_alloc(unsigned int type)
{
struct io_pgtable_ops *pgtbl_ops;
struct protection_domain *domain;
- int pgtable = amd_iommu_pgtable;
+ int pgtable;
int mode = DEFAULT_PGTABLE_LEVEL;
int ret;
- domain = kzalloc(sizeof(*domain), GFP_KERNEL);
- if (!domain)
- return NULL;
-
/*
* Force IOMMU v1 page table when iommu=pt and
* when allocating domain for pass-through devices.
@@ -2084,8 +2087,16 @@ static struct protection_domain *protection_domain_alloc(unsigned int type)
mode = PAGE_MODE_NONE;
} else if (type == IOMMU_DOMAIN_UNMANAGED) {
pgtable = AMD_IOMMU_V1;
+ } else if (type == IOMMU_DOMAIN_DMA || type == IOMMU_DOMAIN_DMA_FQ) {
+ pgtable = amd_iommu_pgtable;
+ } else {
+ return NULL;
}
+ domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+ if (!domain)
+ return NULL;
+
switch (pgtable) {
case AMD_IOMMU_V1:
ret = protection_domain_init_v1(domain, mode);
@@ -2118,6 +2129,15 @@ out_err:
return NULL;
}
+static inline u64 dma_max_address(void)
+{
+ if (amd_iommu_pgtable == AMD_IOMMU_V1)
+ return ~0ULL;
+
+ /* V2 with 4/5 level page table */
+ return ((1ULL << PM_LEVEL_SHIFT(amd_iommu_gpt_level)) - 1);
+}
+
static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
{
struct protection_domain *domain;
@@ -2134,7 +2154,7 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
return NULL;
domain->domain.geometry.aperture_start = 0;
- domain->domain.geometry.aperture_end = ~0ULL;
+ domain->domain.geometry.aperture_end = dma_max_address();
domain->domain.geometry.force_aperture = true;
return &domain->domain;
@@ -2387,7 +2407,7 @@ static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
unsigned long flags;
spin_lock_irqsave(&dom->lock, flags);
- domain_flush_pages(dom, gather->start, gather->end - gather->start, 1);
+ domain_flush_pages(dom, gather->start, gather->end - gather->start + 1, 1);
amd_iommu_domain_flush_complete(dom);
spin_unlock_irqrestore(&dom->lock, flags);
}
@@ -3493,8 +3513,7 @@ int amd_iommu_activate_guest_mode(void *data)
struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
u64 valid;
- if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
- !entry || entry->lo.fields_vapic.guest_mode)
+ if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || !entry)
return 0;
valid = entry->lo.fields_vapic.valid;
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index ae09c627bc84..c71afda79d64 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -517,6 +517,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
{ .compatible = "qcom,qcm2290-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,qdu1000-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sc7180-smmu-500", .data = &qcom_smmu_500_impl0_data },
+ { .compatible = "qcom,sc7180-smmu-v2", .data = &qcom_smmu_v2_data },
{ .compatible = "qcom,sc7280-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_500_impl0_data },
@@ -561,5 +562,14 @@ struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu)
if (match)
return qcom_smmu_create(smmu, match->data);
+ /*
+ * If you hit this WARN_ON() you are missing an entry in the
+ * qcom_smmu_impl_of_match[] table, and GPU per-process page-
+ * tables will be broken.
+ */
+ WARN(of_device_is_compatible(np, "qcom,adreno-smmu"),
+ "Missing qcom_smmu_impl_of_match entry for: %s",
+ dev_name(smmu->dev));
+
return smmu;
}
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index aecc7d154f28..e93906d6e112 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -781,7 +781,8 @@ static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain)
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
- mtk_iommu_tlb_flush_all(dom->bank->parent_data);
+ if (dom->bank)
+ mtk_iommu_tlb_flush_all(dom->bank->parent_data);
}
static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index ea5a3088bb7e..4054030c3237 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1335,20 +1335,22 @@ static int rk_iommu_probe(struct platform_device *pdev)
for (i = 0; i < iommu->num_irq; i++) {
int irq = platform_get_irq(pdev, i);
- if (irq < 0)
- return irq;
+ if (irq < 0) {
+ err = irq;
+ goto err_pm_disable;
+ }
err = devm_request_irq(iommu->dev, irq, rk_iommu_irq,
IRQF_SHARED, dev_name(dev), iommu);
- if (err) {
- pm_runtime_disable(dev);
- goto err_remove_sysfs;
- }
+ if (err)
+ goto err_pm_disable;
}
dma_set_mask_and_coherent(dev, rk_ops->dma_bit_mask);
return 0;
+err_pm_disable:
+ pm_runtime_disable(dev);
err_remove_sysfs:
iommu_device_sysfs_remove(&iommu->iommu);
err_put_group:
diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
index a610821c8ff2..afd6a1841715 100644
--- a/drivers/irqchip/irq-gic-common.c
+++ b/drivers/irqchip/irq-gic-common.c
@@ -16,7 +16,13 @@ void gic_enable_of_quirks(const struct device_node *np,
const struct gic_quirk *quirks, void *data)
{
for (; quirks->desc; quirks++) {
- if (!of_device_is_compatible(np, quirks->compatible))
+ if (!quirks->compatible && !quirks->property)
+ continue;
+ if (quirks->compatible &&
+ !of_device_is_compatible(np, quirks->compatible))
+ continue;
+ if (quirks->property &&
+ !of_property_read_bool(np, quirks->property))
continue;
if (quirks->init(data))
pr_info("GIC: enabling workaround for %s\n",
@@ -28,7 +34,7 @@ void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
void *data)
{
for (; quirks->desc; quirks++) {
- if (quirks->compatible)
+ if (quirks->compatible || quirks->property)
continue;
if (quirks->iidr != (quirks->mask & iidr))
continue;
diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
index 27e3d4ed4f32..3db4592cda1c 100644
--- a/drivers/irqchip/irq-gic-common.h
+++ b/drivers/irqchip/irq-gic-common.h
@@ -13,6 +13,7 @@
struct gic_quirk {
const char *desc;
const char *compatible;
+ const char *property;
bool (*init)(void *data);
u32 iidr;
u32 mask;
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 6fcee221f201..a605aa79435a 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -39,6 +39,7 @@
#define FLAGS_WORKAROUND_GICR_WAKER_MSM8996 (1ULL << 0)
#define FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539 (1ULL << 1)
+#define FLAGS_WORKAROUND_MTK_GICR_SAVE (1ULL << 2)
#define GIC_IRQ_TYPE_PARTITION (GIC_IRQ_TYPE_LPI + 1)
@@ -1720,6 +1721,15 @@ static bool gic_enable_quirk_msm8996(void *data)
return true;
}
+static bool gic_enable_quirk_mtk_gicr(void *data)
+{
+ struct gic_chip_data *d = data;
+
+ d->flags |= FLAGS_WORKAROUND_MTK_GICR_SAVE;
+
+ return true;
+}
+
static bool gic_enable_quirk_cavium_38539(void *data)
{
struct gic_chip_data *d = data;
@@ -1793,6 +1803,11 @@ static const struct gic_quirk gic_quirks[] = {
.init = gic_enable_quirk_msm8996,
},
{
+ .desc = "GICv3: Mediatek Chromebook GICR save problem",
+ .property = "mediatek,broken-save-restore-fw",
+ .init = gic_enable_quirk_mtk_gicr,
+ },
+ {
.desc = "GICv3: HIP06 erratum 161010803",
.iidr = 0x0204043b,
.mask = 0xffffffff,
@@ -1834,6 +1849,11 @@ static void gic_enable_nmi_support(void)
if (!gic_prio_masking_enabled())
return;
+ if (gic_data.flags & FLAGS_WORKAROUND_MTK_GICR_SAVE) {
+ pr_warn("Skipping NMI enable due to firmware issues\n");
+ return;
+ }
+
ppi_nmi_refs = kcalloc(gic_data.ppi_nr, sizeof(*ppi_nmi_refs), GFP_KERNEL);
if (!ppi_nmi_refs)
return;
diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c
index eada5e0e3eb9..5101a3fb11df 100644
--- a/drivers/irqchip/irq-mbigen.c
+++ b/drivers/irqchip/irq-mbigen.c
@@ -240,26 +240,27 @@ static int mbigen_of_create_domain(struct platform_device *pdev,
struct irq_domain *domain;
struct device_node *np;
u32 num_pins;
+ int ret = 0;
+
+ parent = bus_get_dev_root(&platform_bus_type);
+ if (!parent)
+ return -ENODEV;
for_each_child_of_node(pdev->dev.of_node, np) {
if (!of_property_read_bool(np, "interrupt-controller"))
continue;
- parent = bus_get_dev_root(&platform_bus_type);
- if (parent) {
- child = of_platform_device_create(np, NULL, parent);
- put_device(parent);
- if (!child) {
- of_node_put(np);
- return -ENOMEM;
- }
+ child = of_platform_device_create(np, NULL, parent);
+ if (!child) {
+ ret = -ENOMEM;
+ break;
}
if (of_property_read_u32(child->dev.of_node, "num-pins",
&num_pins) < 0) {
dev_err(&pdev->dev, "No num-pins property\n");
- of_node_put(np);
- return -EINVAL;
+ ret = -EINVAL;
+ break;
}
domain = platform_msi_create_device_domain(&child->dev, num_pins,
@@ -267,12 +268,16 @@ static int mbigen_of_create_domain(struct platform_device *pdev,
&mbigen_domain_ops,
mgn_chip);
if (!domain) {
- of_node_put(np);
- return -ENOMEM;
+ ret = -ENOMEM;
+ break;
}
}
- return 0;
+ put_device(parent);
+ if (ret)
+ of_node_put(np);
+
+ return ret;
}
#ifdef CONFIG_ACPI
diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c
index 2aaa9aad3e87..7da18ef95211 100644
--- a/drivers/irqchip/irq-meson-gpio.c
+++ b/drivers/irqchip/irq-meson-gpio.c
@@ -150,7 +150,7 @@ static const struct meson_gpio_irq_params s4_params = {
INIT_MESON_S4_COMMON_DATA(82)
};
-static const struct of_device_id meson_irq_gpio_matches[] = {
+static const struct of_device_id meson_irq_gpio_matches[] __maybe_unused = {
{ .compatible = "amlogic,meson8-gpio-intc", .data = &meson8_params },
{ .compatible = "amlogic,meson8b-gpio-intc", .data = &meson8b_params },
{ .compatible = "amlogic,meson-gxbb-gpio-intc", .data = &gxbb_params },
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 046c355e120b..6d5ecc10a870 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -50,7 +50,7 @@ void __iomem *mips_gic_base;
static DEFINE_PER_CPU_READ_MOSTLY(unsigned long[GIC_MAX_LONGS], pcpu_masks);
-static DEFINE_SPINLOCK(gic_lock);
+static DEFINE_RAW_SPINLOCK(gic_lock);
static struct irq_domain *gic_irq_domain;
static int gic_shared_intrs;
static unsigned int gic_cpu_pin;
@@ -210,7 +210,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
irq = GIC_HWIRQ_TO_SHARED(d->hwirq);
- spin_lock_irqsave(&gic_lock, flags);
+ raw_spin_lock_irqsave(&gic_lock, flags);
switch (type & IRQ_TYPE_SENSE_MASK) {
case IRQ_TYPE_EDGE_FALLING:
pol = GIC_POL_FALLING_EDGE;
@@ -250,7 +250,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
else
irq_set_chip_handler_name_locked(d, &gic_level_irq_controller,
handle_level_irq, NULL);
- spin_unlock_irqrestore(&gic_lock, flags);
+ raw_spin_unlock_irqrestore(&gic_lock, flags);
return 0;
}
@@ -268,7 +268,7 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *cpumask,
return -EINVAL;
/* Assumption : cpumask refers to a single CPU */
- spin_lock_irqsave(&gic_lock, flags);
+ raw_spin_lock_irqsave(&gic_lock, flags);
/* Re-route this IRQ */
write_gic_map_vp(irq, BIT(mips_cm_vp_id(cpu)));
@@ -279,7 +279,7 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *cpumask,
set_bit(irq, per_cpu_ptr(pcpu_masks, cpu));
irq_data_update_effective_affinity(d, cpumask_of(cpu));
- spin_unlock_irqrestore(&gic_lock, flags);
+ raw_spin_unlock_irqrestore(&gic_lock, flags);
return IRQ_SET_MASK_OK;
}
@@ -357,12 +357,12 @@ static void gic_mask_local_irq_all_vpes(struct irq_data *d)
cd = irq_data_get_irq_chip_data(d);
cd->mask = false;
- spin_lock_irqsave(&gic_lock, flags);
+ raw_spin_lock_irqsave(&gic_lock, flags);
for_each_online_cpu(cpu) {
write_gic_vl_other(mips_cm_vp_id(cpu));
write_gic_vo_rmask(BIT(intr));
}
- spin_unlock_irqrestore(&gic_lock, flags);
+ raw_spin_unlock_irqrestore(&gic_lock, flags);
}
static void gic_unmask_local_irq_all_vpes(struct irq_data *d)
@@ -375,12 +375,12 @@ static void gic_unmask_local_irq_all_vpes(struct irq_data *d)
cd = irq_data_get_irq_chip_data(d);
cd->mask = true;
- spin_lock_irqsave(&gic_lock, flags);
+ raw_spin_lock_irqsave(&gic_lock, flags);
for_each_online_cpu(cpu) {
write_gic_vl_other(mips_cm_vp_id(cpu));
write_gic_vo_smask(BIT(intr));
}
- spin_unlock_irqrestore(&gic_lock, flags);
+ raw_spin_unlock_irqrestore(&gic_lock, flags);
}
static void gic_all_vpes_irq_cpu_online(void)
@@ -393,19 +393,21 @@ static void gic_all_vpes_irq_cpu_online(void)
unsigned long flags;
int i;
- spin_lock_irqsave(&gic_lock, flags);
+ raw_spin_lock_irqsave(&gic_lock, flags);
for (i = 0; i < ARRAY_SIZE(local_intrs); i++) {
unsigned int intr = local_intrs[i];
struct gic_all_vpes_chip_data *cd;
+ if (!gic_local_irq_is_routable(intr))
+ continue;
cd = &gic_all_vpes_chip_data[intr];
write_gic_vl_map(mips_gic_vx_map_reg(intr), cd->map);
if (cd->mask)
write_gic_vl_smask(BIT(intr));
}
- spin_unlock_irqrestore(&gic_lock, flags);
+ raw_spin_unlock_irqrestore(&gic_lock, flags);
}
static struct irq_chip gic_all_vpes_local_irq_controller = {
@@ -435,11 +437,11 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq,
data = irq_get_irq_data(virq);
- spin_lock_irqsave(&gic_lock, flags);
+ raw_spin_lock_irqsave(&gic_lock, flags);
write_gic_map_pin(intr, GIC_MAP_PIN_MAP_TO_PIN | gic_cpu_pin);
write_gic_map_vp(intr, BIT(mips_cm_vp_id(cpu)));
irq_data_update_effective_affinity(data, cpumask_of(cpu));
- spin_unlock_irqrestore(&gic_lock, flags);
+ raw_spin_unlock_irqrestore(&gic_lock, flags);
return 0;
}
@@ -531,12 +533,12 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq,
if (!gic_local_irq_is_routable(intr))
return -EPERM;
- spin_lock_irqsave(&gic_lock, flags);
+ raw_spin_lock_irqsave(&gic_lock, flags);
for_each_online_cpu(cpu) {
write_gic_vl_other(mips_cm_vp_id(cpu));
write_gic_vo_map(mips_gic_vx_map_reg(intr), map);
}
- spin_unlock_irqrestore(&gic_lock, flags);
+ raw_spin_unlock_irqrestore(&gic_lock, flags);
return 0;
}
diff --git a/drivers/leds/rgb/leds-qcom-lpg.c b/drivers/leds/rgb/leds-qcom-lpg.c
index 55a037234df1..1c849814a491 100644
--- a/drivers/leds/rgb/leds-qcom-lpg.c
+++ b/drivers/leds/rgb/leds-qcom-lpg.c
@@ -312,14 +312,14 @@ static int lpg_calc_freq(struct lpg_channel *chan, uint64_t period)
max_res = LPG_RESOLUTION_9BIT;
}
- min_period = (u64)NSEC_PER_SEC *
- div64_u64((1 << pwm_resolution_arr[0]), clk_rate_arr[clk_len - 1]);
+ min_period = div64_u64((u64)NSEC_PER_SEC * (1 << pwm_resolution_arr[0]),
+ clk_rate_arr[clk_len - 1]);
if (period <= min_period)
return -EINVAL;
/* Limit period to largest possible value, to avoid overflows */
- max_period = (u64)NSEC_PER_SEC * max_res * LPG_MAX_PREDIV *
- div64_u64((1 << LPG_MAX_M), 1024);
+ max_period = div64_u64((u64)NSEC_PER_SEC * max_res * LPG_MAX_PREDIV * (1 << LPG_MAX_M),
+ 1024);
if (period > max_period)
period = max_period;
diff --git a/drivers/mailbox/mailbox-test.c b/drivers/mailbox/mailbox-test.c
index c4a705c30331..fc6a12a51b40 100644
--- a/drivers/mailbox/mailbox-test.c
+++ b/drivers/mailbox/mailbox-test.c
@@ -98,6 +98,7 @@ static ssize_t mbox_test_message_write(struct file *filp,
size_t count, loff_t *ppos)
{
struct mbox_test_device *tdev = filp->private_data;
+ char *message;
void *data;
int ret;
@@ -113,12 +114,13 @@ static ssize_t mbox_test_message_write(struct file *filp,
return -EINVAL;
}
- mutex_lock(&tdev->mutex);
-
- tdev->message = kzalloc(MBOX_MAX_MSG_LEN, GFP_KERNEL);
- if (!tdev->message)
+ message = kzalloc(MBOX_MAX_MSG_LEN, GFP_KERNEL);
+ if (!message)
return -ENOMEM;
+ mutex_lock(&tdev->mutex);
+
+ tdev->message = message;
ret = copy_from_user(tdev->message, userbuf, count);
if (ret) {
ret = -EFAULT;
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 9e0c69958587..acffed750e3e 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -1828,7 +1828,7 @@ int dm_cache_metadata_abort(struct dm_cache_metadata *cmd)
* Replacement block manager (new_bm) is created and old_bm destroyed outside of
* cmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
* shrinker associated with the block manager's bufio client vs cmd root_lock).
- * - must take shrinker_mutex without holding cmd->root_lock
+ * - must take shrinker_rwsem without holding cmd->root_lock
*/
new_bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
CACHE_MAX_CONCURRENT_LOCKS);
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 8ba4cbb92351..6d301019e5e3 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1168,13 +1168,10 @@ static int do_resume(struct dm_ioctl *param)
/* Do we need to load a new map ? */
if (new_map) {
sector_t old_size, new_size;
- int srcu_idx;
/* Suspend if it isn't already suspended */
- old_map = dm_get_live_table(md, &srcu_idx);
- if ((param->flags & DM_SKIP_LOCKFS_FLAG) || !old_map)
+ if (param->flags & DM_SKIP_LOCKFS_FLAG)
suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG;
- dm_put_live_table(md, srcu_idx);
if (param->flags & DM_NOFLUSH_FLAG)
suspend_flags |= DM_SUSPEND_NOFLUSH_FLAG;
if (!dm_suspended_md(md))
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 9f5cb52c5763..9dd0409848ab 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -1756,13 +1756,15 @@ int dm_thin_remove_range(struct dm_thin_device *td,
int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *result)
{
- int r;
+ int r = -EINVAL;
uint32_t ref_count;
down_read(&pmd->root_lock);
- r = dm_sm_get_count(pmd->data_sm, b, &ref_count);
- if (!r)
- *result = (ref_count > 1);
+ if (!pmd->fail_io) {
+ r = dm_sm_get_count(pmd->data_sm, b, &ref_count);
+ if (!r)
+ *result = (ref_count > 1);
+ }
up_read(&pmd->root_lock);
return r;
@@ -1770,10 +1772,11 @@ int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *re
int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e)
{
- int r = 0;
+ int r = -EINVAL;
pmd_write_lock(pmd);
- r = dm_sm_inc_blocks(pmd->data_sm, b, e);
+ if (!pmd->fail_io)
+ r = dm_sm_inc_blocks(pmd->data_sm, b, e);
pmd_write_unlock(pmd);
return r;
@@ -1781,10 +1784,11 @@ int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_
int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e)
{
- int r = 0;
+ int r = -EINVAL;
pmd_write_lock(pmd);
- r = dm_sm_dec_blocks(pmd->data_sm, b, e);
+ if (!pmd->fail_io)
+ r = dm_sm_dec_blocks(pmd->data_sm, b, e);
pmd_write_unlock(pmd);
return r;
@@ -1887,7 +1891,7 @@ int dm_pool_abort_metadata(struct dm_pool_metadata *pmd)
* Replacement block manager (new_bm) is created and old_bm destroyed outside of
* pmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
* shrinker associated with the block manager's bufio client vs pmd root_lock).
- * - must take shrinker_mutex without holding pmd->root_lock
+ * - must take shrinker_rwsem without holding pmd->root_lock
*/
new_bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
THIN_MAX_CONCURRENT_LOCKS);
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 464c6b678417..f1d0dcb9db22 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -401,8 +401,7 @@ static int issue_discard(struct discard_op *op, dm_block_t data_b, dm_block_t da
sector_t s = block_to_sectors(tc->pool, data_b);
sector_t len = block_to_sectors(tc->pool, data_e - data_b);
- return __blkdev_issue_discard(tc->pool_dev->bdev, s, len, GFP_NOWAIT,
- &op->bio);
+ return __blkdev_issue_discard(tc->pool_dev->bdev, s, len, GFP_NOIO, &op->bio);
}
static void end_discard(struct discard_op *op, int r)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index ca2dc079c3f4..ae0a88defb41 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1172,7 +1172,8 @@ static inline sector_t max_io_len_target_boundary(struct dm_target *ti,
}
static sector_t __max_io_len(struct dm_target *ti, sector_t sector,
- unsigned int max_granularity)
+ unsigned int max_granularity,
+ unsigned int max_sectors)
{
sector_t target_offset = dm_target_offset(ti, sector);
sector_t len = max_io_len_target_boundary(ti, target_offset);
@@ -1186,13 +1187,13 @@ static sector_t __max_io_len(struct dm_target *ti, sector_t sector,
if (!max_granularity)
return len;
return min_t(sector_t, len,
- min(queue_max_sectors(ti->table->md->queue),
+ min(max_sectors ? : queue_max_sectors(ti->table->md->queue),
blk_chunk_sectors_left(target_offset, max_granularity)));
}
static inline sector_t max_io_len(struct dm_target *ti, sector_t sector)
{
- return __max_io_len(ti, sector, ti->max_io_len);
+ return __max_io_len(ti, sector, ti->max_io_len, 0);
}
int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
@@ -1581,12 +1582,13 @@ static void __send_empty_flush(struct clone_info *ci)
static void __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti,
unsigned int num_bios,
- unsigned int max_granularity)
+ unsigned int max_granularity,
+ unsigned int max_sectors)
{
unsigned int len, bios;
len = min_t(sector_t, ci->sector_count,
- __max_io_len(ti, ci->sector, max_granularity));
+ __max_io_len(ti, ci->sector, max_granularity, max_sectors));
atomic_add(num_bios, &ci->io->io_count);
bios = __send_duplicate_bios(ci, ti, num_bios, &len);
@@ -1623,23 +1625,27 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci,
{
unsigned int num_bios = 0;
unsigned int max_granularity = 0;
+ unsigned int max_sectors = 0;
struct queue_limits *limits = dm_get_queue_limits(ti->table->md);
switch (bio_op(ci->bio)) {
case REQ_OP_DISCARD:
num_bios = ti->num_discard_bios;
+ max_sectors = limits->max_discard_sectors;
if (ti->max_discard_granularity)
- max_granularity = limits->max_discard_sectors;
+ max_granularity = max_sectors;
break;
case REQ_OP_SECURE_ERASE:
num_bios = ti->num_secure_erase_bios;
+ max_sectors = limits->max_secure_erase_sectors;
if (ti->max_secure_erase_granularity)
- max_granularity = limits->max_secure_erase_sectors;
+ max_granularity = max_sectors;
break;
case REQ_OP_WRITE_ZEROES:
num_bios = ti->num_write_zeroes_bios;
+ max_sectors = limits->max_write_zeroes_sectors;
if (ti->max_write_zeroes_granularity)
- max_granularity = limits->max_write_zeroes_sectors;
+ max_granularity = max_sectors;
break;
default:
break;
@@ -1654,7 +1660,8 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci,
if (unlikely(!num_bios))
return BLK_STS_NOTSUPP;
- __send_changing_extent_only(ci, ti, num_bios, max_granularity);
+ __send_changing_extent_only(ci, ti, num_bios,
+ max_granularity, max_sectors);
return BLK_STS_OK;
}
@@ -2808,6 +2815,10 @@ retry:
}
map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
+ if (!map) {
+ /* avoid deadlock with fs/namespace.c:do_mount() */
+ suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG;
+ }
r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE, DMF_SUSPENDED);
if (r)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index f8bc74e16811..6615abf54d3f 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5516,7 +5516,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
sector = raid5_compute_sector(conf, raid_bio->bi_iter.bi_sector, 0,
&dd_idx, NULL);
- end_sector = bio_end_sector(raid_bio);
+ end_sector = sector + bio_sectors(raid_bio);
rcu_read_lock();
if (r5c_big_stripe_cached(conf, sector))
diff --git a/drivers/media/cec/core/cec-adap.c b/drivers/media/cec/core/cec-adap.c
index 769ea6b2e1d0..241b1621b197 100644
--- a/drivers/media/cec/core/cec-adap.c
+++ b/drivers/media/cec/core/cec-adap.c
@@ -1091,7 +1091,8 @@ void cec_received_msg_ts(struct cec_adapter *adap,
mutex_lock(&adap->lock);
dprintk(2, "%s: %*ph\n", __func__, msg->len, msg->msg);
- adap->last_initiator = 0xff;
+ if (!adap->transmit_in_progress)
+ adap->last_initiator = 0xff;
/* Check if this message was for us (directed or broadcast). */
if (!cec_msg_is_broadcast(msg)) {
@@ -1585,7 +1586,7 @@ static void cec_claim_log_addrs(struct cec_adapter *adap, bool block)
*
* This function is called with adap->lock held.
*/
-static int cec_adap_enable(struct cec_adapter *adap)
+int cec_adap_enable(struct cec_adapter *adap)
{
bool enable;
int ret = 0;
@@ -1595,6 +1596,9 @@ static int cec_adap_enable(struct cec_adapter *adap)
if (adap->needs_hpd)
enable = enable && adap->phys_addr != CEC_PHYS_ADDR_INVALID;
+ if (adap->devnode.unregistered)
+ enable = false;
+
if (enable == adap->is_enabled)
return 0;
diff --git a/drivers/media/cec/core/cec-core.c b/drivers/media/cec/core/cec-core.c
index af358e901b5f..7e153c5cad04 100644
--- a/drivers/media/cec/core/cec-core.c
+++ b/drivers/media/cec/core/cec-core.c
@@ -191,6 +191,8 @@ static void cec_devnode_unregister(struct cec_adapter *adap)
mutex_lock(&adap->lock);
__cec_s_phys_addr(adap, CEC_PHYS_ADDR_INVALID, false);
__cec_s_log_addrs(adap, NULL, false);
+ // Disable the adapter (since adap->devnode.unregistered is true)
+ cec_adap_enable(adap);
mutex_unlock(&adap->lock);
cdev_device_del(&devnode->cdev, &devnode->dev);
diff --git a/drivers/media/cec/core/cec-priv.h b/drivers/media/cec/core/cec-priv.h
index b78df931aa74..ed1f8c67626b 100644
--- a/drivers/media/cec/core/cec-priv.h
+++ b/drivers/media/cec/core/cec-priv.h
@@ -47,6 +47,7 @@ int cec_monitor_pin_cnt_inc(struct cec_adapter *adap);
void cec_monitor_pin_cnt_dec(struct cec_adapter *adap);
int cec_adap_status(struct seq_file *file, void *priv);
int cec_thread_func(void *_adap);
+int cec_adap_enable(struct cec_adapter *adap);
void __cec_s_phys_addr(struct cec_adapter *adap, u16 phys_addr, bool block);
int __cec_s_log_addrs(struct cec_adapter *adap,
struct cec_log_addrs *log_addrs, bool block);
diff --git a/drivers/media/dvb-core/dvb_ca_en50221.c b/drivers/media/dvb-core/dvb_ca_en50221.c
index c2d2792227f8..baf64540dc00 100644
--- a/drivers/media/dvb-core/dvb_ca_en50221.c
+++ b/drivers/media/dvb-core/dvb_ca_en50221.c
@@ -151,6 +151,12 @@ struct dvb_ca_private {
/* mutex serializing ioctls */
struct mutex ioctl_mutex;
+
+ /* A mutex used when a device is disconnected */
+ struct mutex remove_mutex;
+
+ /* Whether the device is disconnected */
+ int exit;
};
static void dvb_ca_private_free(struct dvb_ca_private *ca)
@@ -187,7 +193,7 @@ static void dvb_ca_en50221_thread_wakeup(struct dvb_ca_private *ca);
static int dvb_ca_en50221_read_data(struct dvb_ca_private *ca, int slot,
u8 *ebuf, int ecount);
static int dvb_ca_en50221_write_data(struct dvb_ca_private *ca, int slot,
- u8 *ebuf, int ecount);
+ u8 *ebuf, int ecount, int size_write_flag);
/**
* findstr - Safely find needle in haystack.
@@ -370,7 +376,7 @@ static int dvb_ca_en50221_link_init(struct dvb_ca_private *ca, int slot)
ret = dvb_ca_en50221_wait_if_status(ca, slot, STATUSREG_FR, HZ / 10);
if (ret)
return ret;
- ret = dvb_ca_en50221_write_data(ca, slot, buf, 2);
+ ret = dvb_ca_en50221_write_data(ca, slot, buf, 2, CMDREG_SW);
if (ret != 2)
return -EIO;
ret = ca->pub->write_cam_control(ca->pub, slot, CTRLIF_COMMAND, IRQEN);
@@ -778,11 +784,13 @@ exit:
* @buf: The data in this buffer is treated as a complete link-level packet to
* be written.
* @bytes_write: Size of ebuf.
+ * @size_write_flag: A flag on Command Register which says whether the link size
+ * information will be writen or not.
*
* return: Number of bytes written, or < 0 on error.
*/
static int dvb_ca_en50221_write_data(struct dvb_ca_private *ca, int slot,
- u8 *buf, int bytes_write)
+ u8 *buf, int bytes_write, int size_write_flag)
{
struct dvb_ca_slot *sl = &ca->slot_info[slot];
int status;
@@ -817,7 +825,7 @@ static int dvb_ca_en50221_write_data(struct dvb_ca_private *ca, int slot,
/* OK, set HC bit */
status = ca->pub->write_cam_control(ca->pub, slot, CTRLIF_COMMAND,
- IRQEN | CMDREG_HC);
+ IRQEN | CMDREG_HC | size_write_flag);
if (status)
goto exit;
@@ -1508,7 +1516,7 @@ static ssize_t dvb_ca_en50221_io_write(struct file *file,
mutex_lock(&sl->slot_lock);
status = dvb_ca_en50221_write_data(ca, slot, fragbuf,
- fraglen + 2);
+ fraglen + 2, 0);
mutex_unlock(&sl->slot_lock);
if (status == (fraglen + 2)) {
written = 1;
@@ -1709,12 +1717,22 @@ static int dvb_ca_en50221_io_open(struct inode *inode, struct file *file)
dprintk("%s\n", __func__);
- if (!try_module_get(ca->pub->owner))
+ mutex_lock(&ca->remove_mutex);
+
+ if (ca->exit) {
+ mutex_unlock(&ca->remove_mutex);
+ return -ENODEV;
+ }
+
+ if (!try_module_get(ca->pub->owner)) {
+ mutex_unlock(&ca->remove_mutex);
return -EIO;
+ }
err = dvb_generic_open(inode, file);
if (err < 0) {
module_put(ca->pub->owner);
+ mutex_unlock(&ca->remove_mutex);
return err;
}
@@ -1739,6 +1757,7 @@ static int dvb_ca_en50221_io_open(struct inode *inode, struct file *file)
dvb_ca_private_get(ca);
+ mutex_unlock(&ca->remove_mutex);
return 0;
}
@@ -1758,6 +1777,8 @@ static int dvb_ca_en50221_io_release(struct inode *inode, struct file *file)
dprintk("%s\n", __func__);
+ mutex_lock(&ca->remove_mutex);
+
/* mark the CA device as closed */
ca->open = 0;
dvb_ca_en50221_thread_update_delay(ca);
@@ -1768,6 +1789,13 @@ static int dvb_ca_en50221_io_release(struct inode *inode, struct file *file)
dvb_ca_private_put(ca);
+ if (dvbdev->users == 1 && ca->exit == 1) {
+ mutex_unlock(&ca->remove_mutex);
+ wake_up(&dvbdev->wait_queue);
+ } else {
+ mutex_unlock(&ca->remove_mutex);
+ }
+
return err;
}
@@ -1891,6 +1919,7 @@ int dvb_ca_en50221_init(struct dvb_adapter *dvb_adapter,
}
mutex_init(&ca->ioctl_mutex);
+ mutex_init(&ca->remove_mutex);
if (signal_pending(current)) {
ret = -EINTR;
@@ -1933,6 +1962,14 @@ void dvb_ca_en50221_release(struct dvb_ca_en50221 *pubca)
dprintk("%s\n", __func__);
+ mutex_lock(&ca->remove_mutex);
+ ca->exit = 1;
+ mutex_unlock(&ca->remove_mutex);
+
+ if (ca->dvbdev->users < 1)
+ wait_event(ca->dvbdev->wait_queue,
+ ca->dvbdev->users == 1);
+
/* shutdown the thread if there was one */
kthread_stop(ca->thread);
diff --git a/drivers/media/dvb-core/dvb_demux.c b/drivers/media/dvb-core/dvb_demux.c
index 398c86279b5b..7c4d86bfdd6c 100644
--- a/drivers/media/dvb-core/dvb_demux.c
+++ b/drivers/media/dvb-core/dvb_demux.c
@@ -115,12 +115,12 @@ static inline int dvb_dmx_swfilter_payload(struct dvb_demux_feed *feed,
cc = buf[3] & 0x0f;
ccok = ((feed->cc + 1) & 0x0f) == cc;
- feed->cc = cc;
if (!ccok) {
set_buf_flags(feed, DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED);
dprintk_sect_loss("missed packet: %d instead of %d!\n",
cc, (feed->cc + 1) & 0x0f);
}
+ feed->cc = cc;
if (buf[1] & 0x40) // PUSI ?
feed->peslen = 0xfffa;
@@ -300,7 +300,6 @@ static int dvb_dmx_swfilter_section_packet(struct dvb_demux_feed *feed,
cc = buf[3] & 0x0f;
ccok = ((feed->cc + 1) & 0x0f) == cc;
- feed->cc = cc;
if (buf[3] & 0x20) {
/* adaption field present, check for discontinuity_indicator */
@@ -336,6 +335,7 @@ static int dvb_dmx_swfilter_section_packet(struct dvb_demux_feed *feed,
feed->pusi_seen = false;
dvb_dmx_swfilter_section_new(feed);
}
+ feed->cc = cc;
if (buf[1] & 0x40) {
/* PUSI=1 (is set), section boundary is here */
diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c
index cc0a789f09ae..9293b058ab99 100644
--- a/drivers/media/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb-core/dvb_frontend.c
@@ -293,14 +293,22 @@ static int dvb_frontend_get_event(struct dvb_frontend *fe,
}
if (events->eventw == events->eventr) {
- int ret;
+ struct wait_queue_entry wait;
+ int ret = 0;
if (flags & O_NONBLOCK)
return -EWOULDBLOCK;
- ret = wait_event_interruptible(events->wait_queue,
- dvb_frontend_test_event(fepriv, events));
-
+ init_waitqueue_entry(&wait, current);
+ add_wait_queue(&events->wait_queue, &wait);
+ while (!dvb_frontend_test_event(fepriv, events)) {
+ wait_woken(&wait, TASK_INTERRUPTIBLE, 0);
+ if (signal_pending(current)) {
+ ret = -ERESTARTSYS;
+ break;
+ }
+ }
+ remove_wait_queue(&events->wait_queue, &wait);
if (ret < 0)
return ret;
}
diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c
index 8a2febf33ce2..8bb8dd34c223 100644
--- a/drivers/media/dvb-core/dvb_net.c
+++ b/drivers/media/dvb-core/dvb_net.c
@@ -1564,15 +1564,43 @@ static long dvb_net_ioctl(struct file *file,
return dvb_usercopy(file, cmd, arg, dvb_net_do_ioctl);
}
+static int locked_dvb_net_open(struct inode *inode, struct file *file)
+{
+ struct dvb_device *dvbdev = file->private_data;
+ struct dvb_net *dvbnet = dvbdev->priv;
+ int ret;
+
+ if (mutex_lock_interruptible(&dvbnet->remove_mutex))
+ return -ERESTARTSYS;
+
+ if (dvbnet->exit) {
+ mutex_unlock(&dvbnet->remove_mutex);
+ return -ENODEV;
+ }
+
+ ret = dvb_generic_open(inode, file);
+
+ mutex_unlock(&dvbnet->remove_mutex);
+
+ return ret;
+}
+
static int dvb_net_close(struct inode *inode, struct file *file)
{
struct dvb_device *dvbdev = file->private_data;
struct dvb_net *dvbnet = dvbdev->priv;
+ mutex_lock(&dvbnet->remove_mutex);
+
dvb_generic_release(inode, file);
- if(dvbdev->users == 1 && dvbnet->exit == 1)
+ if (dvbdev->users == 1 && dvbnet->exit == 1) {
+ mutex_unlock(&dvbnet->remove_mutex);
wake_up(&dvbdev->wait_queue);
+ } else {
+ mutex_unlock(&dvbnet->remove_mutex);
+ }
+
return 0;
}
@@ -1580,7 +1608,7 @@ static int dvb_net_close(struct inode *inode, struct file *file)
static const struct file_operations dvb_net_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = dvb_net_ioctl,
- .open = dvb_generic_open,
+ .open = locked_dvb_net_open,
.release = dvb_net_close,
.llseek = noop_llseek,
};
@@ -1599,10 +1627,13 @@ void dvb_net_release (struct dvb_net *dvbnet)
{
int i;
+ mutex_lock(&dvbnet->remove_mutex);
dvbnet->exit = 1;
+ mutex_unlock(&dvbnet->remove_mutex);
+
if (dvbnet->dvbdev->users < 1)
wait_event(dvbnet->dvbdev->wait_queue,
- dvbnet->dvbdev->users==1);
+ dvbnet->dvbdev->users == 1);
dvb_unregister_device(dvbnet->dvbdev);
@@ -1621,6 +1652,7 @@ int dvb_net_init (struct dvb_adapter *adap, struct dvb_net *dvbnet,
int i;
mutex_init(&dvbnet->ioctl_mutex);
+ mutex_init(&dvbnet->remove_mutex);
dvbnet->demux = dmx;
for (i=0; i<DVB_NET_DEVICES_MAX; i++)
diff --git a/drivers/media/dvb-core/dvbdev.c b/drivers/media/dvb-core/dvbdev.c
index e9b3ce09e534..a4b05e366ccc 100644
--- a/drivers/media/dvb-core/dvbdev.c
+++ b/drivers/media/dvb-core/dvbdev.c
@@ -27,6 +27,7 @@
#include <media/tuner.h>
static DEFINE_MUTEX(dvbdev_mutex);
+static LIST_HEAD(dvbdevfops_list);
static int dvbdev_debug;
module_param(dvbdev_debug, int, 0644);
@@ -453,14 +454,15 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev,
enum dvb_device_type type, int demux_sink_pads)
{
struct dvb_device *dvbdev;
- struct file_operations *dvbdevfops;
+ struct file_operations *dvbdevfops = NULL;
+ struct dvbdevfops_node *node = NULL, *new_node = NULL;
struct device *clsdev;
int minor;
int id, ret;
mutex_lock(&dvbdev_register_lock);
- if ((id = dvbdev_get_free_id (adap, type)) < 0){
+ if ((id = dvbdev_get_free_id (adap, type)) < 0) {
mutex_unlock(&dvbdev_register_lock);
*pdvbdev = NULL;
pr_err("%s: couldn't find free device id\n", __func__);
@@ -468,18 +470,45 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev,
}
*pdvbdev = dvbdev = kzalloc(sizeof(*dvbdev), GFP_KERNEL);
-
if (!dvbdev){
mutex_unlock(&dvbdev_register_lock);
return -ENOMEM;
}
- dvbdevfops = kmemdup(template->fops, sizeof(*dvbdevfops), GFP_KERNEL);
+ /*
+ * When a device of the same type is probe()d more than once,
+ * the first allocated fops are used. This prevents memory leaks
+ * that can occur when the same device is probe()d repeatedly.
+ */
+ list_for_each_entry(node, &dvbdevfops_list, list_head) {
+ if (node->fops->owner == adap->module &&
+ node->type == type &&
+ node->template == template) {
+ dvbdevfops = node->fops;
+ break;
+ }
+ }
- if (!dvbdevfops){
- kfree (dvbdev);
- mutex_unlock(&dvbdev_register_lock);
- return -ENOMEM;
+ if (dvbdevfops == NULL) {
+ dvbdevfops = kmemdup(template->fops, sizeof(*dvbdevfops), GFP_KERNEL);
+ if (!dvbdevfops) {
+ kfree(dvbdev);
+ mutex_unlock(&dvbdev_register_lock);
+ return -ENOMEM;
+ }
+
+ new_node = kzalloc(sizeof(struct dvbdevfops_node), GFP_KERNEL);
+ if (!new_node) {
+ kfree(dvbdevfops);
+ kfree(dvbdev);
+ mutex_unlock(&dvbdev_register_lock);
+ return -ENOMEM;
+ }
+
+ new_node->fops = dvbdevfops;
+ new_node->type = type;
+ new_node->template = template;
+ list_add_tail (&new_node->list_head, &dvbdevfops_list);
}
memcpy(dvbdev, template, sizeof(struct dvb_device));
@@ -490,20 +519,20 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev,
dvbdev->priv = priv;
dvbdev->fops = dvbdevfops;
init_waitqueue_head (&dvbdev->wait_queue);
-
dvbdevfops->owner = adap->module;
-
list_add_tail (&dvbdev->list_head, &adap->device_list);
-
down_write(&minor_rwsem);
#ifdef CONFIG_DVB_DYNAMIC_MINORS
for (minor = 0; minor < MAX_DVB_MINORS; minor++)
if (dvb_minors[minor] == NULL)
break;
-
if (minor == MAX_DVB_MINORS) {
+ if (new_node) {
+ list_del (&new_node->list_head);
+ kfree(dvbdevfops);
+ kfree(new_node);
+ }
list_del (&dvbdev->list_head);
- kfree(dvbdevfops);
kfree(dvbdev);
up_write(&minor_rwsem);
mutex_unlock(&dvbdev_register_lock);
@@ -512,41 +541,47 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev,
#else
minor = nums2minor(adap->num, type, id);
#endif
-
dvbdev->minor = minor;
dvb_minors[minor] = dvb_device_get(dvbdev);
up_write(&minor_rwsem);
-
ret = dvb_register_media_device(dvbdev, type, minor, demux_sink_pads);
if (ret) {
pr_err("%s: dvb_register_media_device failed to create the mediagraph\n",
__func__);
-
+ if (new_node) {
+ list_del (&new_node->list_head);
+ kfree(dvbdevfops);
+ kfree(new_node);
+ }
dvb_media_device_free(dvbdev);
list_del (&dvbdev->list_head);
- kfree(dvbdevfops);
kfree(dvbdev);
mutex_unlock(&dvbdev_register_lock);
return ret;
}
- mutex_unlock(&dvbdev_register_lock);
-
clsdev = device_create(dvb_class, adap->device,
MKDEV(DVB_MAJOR, minor),
dvbdev, "dvb%d.%s%d", adap->num, dnames[type], id);
if (IS_ERR(clsdev)) {
pr_err("%s: failed to create device dvb%d.%s%d (%ld)\n",
__func__, adap->num, dnames[type], id, PTR_ERR(clsdev));
+ if (new_node) {
+ list_del (&new_node->list_head);
+ kfree(dvbdevfops);
+ kfree(new_node);
+ }
dvb_media_device_free(dvbdev);
list_del (&dvbdev->list_head);
- kfree(dvbdevfops);
kfree(dvbdev);
+ mutex_unlock(&dvbdev_register_lock);
return PTR_ERR(clsdev);
}
+
dprintk("DVB: register adapter%d/%s%d @ minor: %i (0x%02x)\n",
adap->num, dnames[type], id, minor, minor);
+ mutex_unlock(&dvbdev_register_lock);
return 0;
}
EXPORT_SYMBOL(dvb_register_device);
@@ -575,7 +610,6 @@ static void dvb_free_device(struct kref *ref)
{
struct dvb_device *dvbdev = container_of(ref, struct dvb_device, ref);
- kfree (dvbdev->fops);
kfree (dvbdev);
}
@@ -1081,9 +1115,17 @@ error:
static void __exit exit_dvbdev(void)
{
+ struct dvbdevfops_node *node, *next;
+
class_destroy(dvb_class);
cdev_del(&dvb_device_cdev);
unregister_chrdev_region(MKDEV(DVB_MAJOR, 0), MAX_DVB_MINORS);
+
+ list_for_each_entry_safe(node, next, &dvbdevfops_list, list_head) {
+ list_del (&node->list_head);
+ kfree(node->fops);
+ kfree(node);
+ }
}
subsys_initcall(init_dvbdev);
diff --git a/drivers/media/dvb-frontends/mn88443x.c b/drivers/media/dvb-frontends/mn88443x.c
index 1f1753f2ab1a..0782f8377eb2 100644
--- a/drivers/media/dvb-frontends/mn88443x.c
+++ b/drivers/media/dvb-frontends/mn88443x.c
@@ -798,7 +798,7 @@ MODULE_DEVICE_TABLE(i2c, mn88443x_i2c_id);
static struct i2c_driver mn88443x_driver = {
.driver = {
.name = "mn88443x",
- .of_match_table = of_match_ptr(mn88443x_of_match),
+ .of_match_table = mn88443x_of_match,
},
.probe_new = mn88443x_probe,
.remove = mn88443x_remove,
diff --git a/drivers/media/pci/netup_unidvb/netup_unidvb_core.c b/drivers/media/pci/netup_unidvb/netup_unidvb_core.c
index 8287851b5ffd..d85bfbb77a25 100644
--- a/drivers/media/pci/netup_unidvb/netup_unidvb_core.c
+++ b/drivers/media/pci/netup_unidvb/netup_unidvb_core.c
@@ -697,7 +697,7 @@ static void netup_unidvb_dma_fini(struct netup_unidvb_dev *ndev, int num)
netup_unidvb_dma_enable(dma, 0);
msleep(50);
cancel_work_sync(&dma->work);
- del_timer(&dma->timeout);
+ del_timer_sync(&dma->timeout);
}
static int netup_unidvb_dma_setup(struct netup_unidvb_dev *ndev)
@@ -887,12 +887,7 @@ static int netup_unidvb_initdev(struct pci_dev *pci_dev,
ndev->lmmio0, (u32)pci_resource_len(pci_dev, 0),
ndev->lmmio1, (u32)pci_resource_len(pci_dev, 1),
pci_dev->irq);
- if (request_irq(pci_dev->irq, netup_unidvb_isr, IRQF_SHARED,
- "netup_unidvb", pci_dev) < 0) {
- dev_err(&pci_dev->dev,
- "%s(): can't get IRQ %d\n", __func__, pci_dev->irq);
- goto irq_request_err;
- }
+
ndev->dma_size = 2 * 188 *
NETUP_DMA_BLOCKS_COUNT * NETUP_DMA_PACKETS_COUNT;
ndev->dma_virt = dma_alloc_coherent(&pci_dev->dev,
@@ -933,6 +928,14 @@ static int netup_unidvb_initdev(struct pci_dev *pci_dev,
dev_err(&pci_dev->dev, "netup_unidvb: DMA setup failed\n");
goto dma_setup_err;
}
+
+ if (request_irq(pci_dev->irq, netup_unidvb_isr, IRQF_SHARED,
+ "netup_unidvb", pci_dev) < 0) {
+ dev_err(&pci_dev->dev,
+ "%s(): can't get IRQ %d\n", __func__, pci_dev->irq);
+ goto dma_setup_err;
+ }
+
dev_info(&pci_dev->dev,
"netup_unidvb: device has been initialized\n");
return 0;
@@ -951,8 +954,6 @@ spi_setup_err:
dma_free_coherent(&pci_dev->dev, ndev->dma_size,
ndev->dma_virt, ndev->dma_phys);
dma_alloc_err:
- free_irq(pci_dev->irq, pci_dev);
-irq_request_err:
iounmap(ndev->lmmio1);
pci_bar1_error:
iounmap(ndev->lmmio0);
diff --git a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_stateful.c b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_stateful.c
index 29991551cf61..0fbd030026c7 100644
--- a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_stateful.c
+++ b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_stateful.c
@@ -584,6 +584,9 @@ static void mtk_init_vdec_params(struct mtk_vcodec_ctx *ctx)
if (!(ctx->dev->dec_capability & VCODEC_CAPABILITY_4K_DISABLED)) {
for (i = 0; i < num_supported_formats; i++) {
+ if (mtk_video_formats[i].type != MTK_FMT_DEC)
+ continue;
+
mtk_video_formats[i].frmsize.max_width =
VCODEC_DEC_4K_CODED_WIDTH;
mtk_video_formats[i].frmsize.max_height =
diff --git a/drivers/media/platform/qcom/camss/camss-video.c b/drivers/media/platform/qcom/camss/camss-video.c
index 898f32177b12..8640db306026 100644
--- a/drivers/media/platform/qcom/camss/camss-video.c
+++ b/drivers/media/platform/qcom/camss/camss-video.c
@@ -353,7 +353,6 @@ static int video_get_subdev_format(struct camss_video *video,
if (subdev == NULL)
return -EPIPE;
- memset(&fmt, 0, sizeof(fmt));
fmt.pad = pad;
ret = v4l2_subdev_call(subdev, pad, get_fmt, NULL, &fmt);
diff --git a/drivers/media/platform/verisilicon/hantro_v4l2.c b/drivers/media/platform/verisilicon/hantro_v4l2.c
index 835518534e3b..61cfaaf4e927 100644
--- a/drivers/media/platform/verisilicon/hantro_v4l2.c
+++ b/drivers/media/platform/verisilicon/hantro_v4l2.c
@@ -397,10 +397,12 @@ hantro_reset_raw_fmt(struct hantro_ctx *ctx, int bit_depth)
if (!raw_vpu_fmt)
return -EINVAL;
- if (ctx->is_encoder)
+ if (ctx->is_encoder) {
encoded_fmt = &ctx->dst_fmt;
- else
+ ctx->vpu_src_fmt = raw_vpu_fmt;
+ } else {
encoded_fmt = &ctx->src_fmt;
+ }
hantro_reset_fmt(&raw_fmt, raw_vpu_fmt);
raw_fmt.width = encoded_fmt->width;
diff --git a/drivers/media/usb/dvb-usb-v2/ce6230.c b/drivers/media/usb/dvb-usb-v2/ce6230.c
index 44540de1a206..d3b5cb4a24da 100644
--- a/drivers/media/usb/dvb-usb-v2/ce6230.c
+++ b/drivers/media/usb/dvb-usb-v2/ce6230.c
@@ -101,6 +101,10 @@ static int ce6230_i2c_master_xfer(struct i2c_adapter *adap,
if (num > i + 1 && (msg[i+1].flags & I2C_M_RD)) {
if (msg[i].addr ==
ce6230_zl10353_config.demod_address) {
+ if (msg[i].len < 1) {
+ i = -EOPNOTSUPP;
+ break;
+ }
req.cmd = DEMOD_READ;
req.value = msg[i].addr >> 1;
req.index = msg[i].buf[0];
@@ -117,6 +121,10 @@ static int ce6230_i2c_master_xfer(struct i2c_adapter *adap,
} else {
if (msg[i].addr ==
ce6230_zl10353_config.demod_address) {
+ if (msg[i].len < 1) {
+ i = -EOPNOTSUPP;
+ break;
+ }
req.cmd = DEMOD_WRITE;
req.value = msg[i].addr >> 1;
req.index = msg[i].buf[0];
diff --git a/drivers/media/usb/dvb-usb-v2/ec168.c b/drivers/media/usb/dvb-usb-v2/ec168.c
index 7ed0ab9e429b..0e4773fc025c 100644
--- a/drivers/media/usb/dvb-usb-v2/ec168.c
+++ b/drivers/media/usb/dvb-usb-v2/ec168.c
@@ -115,6 +115,10 @@ static int ec168_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[],
while (i < num) {
if (num > i + 1 && (msg[i+1].flags & I2C_M_RD)) {
if (msg[i].addr == ec168_ec100_config.demod_address) {
+ if (msg[i].len < 1) {
+ i = -EOPNOTSUPP;
+ break;
+ }
req.cmd = READ_DEMOD;
req.value = 0;
req.index = 0xff00 + msg[i].buf[0]; /* reg */
@@ -131,6 +135,10 @@ static int ec168_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[],
}
} else {
if (msg[i].addr == ec168_ec100_config.demod_address) {
+ if (msg[i].len < 1) {
+ i = -EOPNOTSUPP;
+ break;
+ }
req.cmd = WRITE_DEMOD;
req.value = msg[i].buf[1]; /* val */
req.index = 0xff00 + msg[i].buf[0]; /* reg */
@@ -139,6 +147,10 @@ static int ec168_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[],
ret = ec168_ctrl_msg(d, &req);
i += 1;
} else {
+ if (msg[i].len < 1) {
+ i = -EOPNOTSUPP;
+ break;
+ }
req.cmd = WRITE_I2C;
req.value = msg[i].buf[0]; /* val */
req.index = 0x0100 + msg[i].addr; /* I2C addr */
diff --git a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
index 795a012d4020..f7884bb56fcc 100644
--- a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
+++ b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
@@ -176,6 +176,10 @@ static int rtl28xxu_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[],
ret = -EOPNOTSUPP;
goto err_mutex_unlock;
} else if (msg[0].addr == 0x10) {
+ if (msg[0].len < 1 || msg[1].len < 1) {
+ ret = -EOPNOTSUPP;
+ goto err_mutex_unlock;
+ }
/* method 1 - integrated demod */
if (msg[0].buf[0] == 0x00) {
/* return demod page from driver cache */
@@ -189,6 +193,10 @@ static int rtl28xxu_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[],
ret = rtl28xxu_ctrl_msg(d, &req);
}
} else if (msg[0].len < 2) {
+ if (msg[0].len < 1) {
+ ret = -EOPNOTSUPP;
+ goto err_mutex_unlock;
+ }
/* method 2 - old I2C */
req.value = (msg[0].buf[0] << 8) | (msg[0].addr << 1);
req.index = CMD_I2C_RD;
@@ -217,8 +225,16 @@ static int rtl28xxu_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[],
ret = -EOPNOTSUPP;
goto err_mutex_unlock;
} else if (msg[0].addr == 0x10) {
+ if (msg[0].len < 1) {
+ ret = -EOPNOTSUPP;
+ goto err_mutex_unlock;
+ }
/* method 1 - integrated demod */
if (msg[0].buf[0] == 0x00) {
+ if (msg[0].len < 2) {
+ ret = -EOPNOTSUPP;
+ goto err_mutex_unlock;
+ }
/* save demod page for later demod access */
dev->page = msg[0].buf[1];
ret = 0;
@@ -231,6 +247,10 @@ static int rtl28xxu_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[],
ret = rtl28xxu_ctrl_msg(d, &req);
}
} else if ((msg[0].len < 23) && (!dev->new_i2c_write)) {
+ if (msg[0].len < 1) {
+ ret = -EOPNOTSUPP;
+ goto err_mutex_unlock;
+ }
/* method 2 - old I2C */
req.value = (msg[0].buf[0] << 8) | (msg[0].addr << 1);
req.index = CMD_I2C_WR;
diff --git a/drivers/media/usb/dvb-usb/az6027.c b/drivers/media/usb/dvb-usb/az6027.c
index 7d78ee09be5e..a31c6f82f4e9 100644
--- a/drivers/media/usb/dvb-usb/az6027.c
+++ b/drivers/media/usb/dvb-usb/az6027.c
@@ -988,6 +988,10 @@ static int az6027_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[], int n
/* write/read request */
if (i + 1 < num && (msg[i + 1].flags & I2C_M_RD)) {
req = 0xB9;
+ if (msg[i].len < 1) {
+ i = -EOPNOTSUPP;
+ break;
+ }
index = (((msg[i].buf[0] << 8) & 0xff00) | (msg[i].buf[1] & 0x00ff));
value = msg[i].addr + (msg[i].len << 8);
length = msg[i + 1].len + 6;
@@ -1001,6 +1005,10 @@ static int az6027_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[], int n
/* demod 16bit addr */
req = 0xBD;
+ if (msg[i].len < 1) {
+ i = -EOPNOTSUPP;
+ break;
+ }
index = (((msg[i].buf[0] << 8) & 0xff00) | (msg[i].buf[1] & 0x00ff));
value = msg[i].addr + (2 << 8);
length = msg[i].len - 2;
@@ -1026,6 +1034,10 @@ static int az6027_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[], int n
} else {
req = 0xBD;
+ if (msg[i].len < 1) {
+ i = -EOPNOTSUPP;
+ break;
+ }
index = msg[i].buf[0] & 0x00FF;
value = msg[i].addr + (1 << 8);
length = msg[i].len - 1;
diff --git a/drivers/media/usb/dvb-usb/digitv.c b/drivers/media/usb/dvb-usb/digitv.c
index 2756815a780b..32134be16914 100644
--- a/drivers/media/usb/dvb-usb/digitv.c
+++ b/drivers/media/usb/dvb-usb/digitv.c
@@ -63,6 +63,10 @@ static int digitv_i2c_xfer(struct i2c_adapter *adap,struct i2c_msg msg[],int num
warn("more than 2 i2c messages at a time is not handled yet. TODO.");
for (i = 0; i < num; i++) {
+ if (msg[i].len < 1) {
+ i = -EOPNOTSUPP;
+ break;
+ }
/* write/read request */
if (i+1 < num && (msg[i+1].flags & I2C_M_RD)) {
if (digitv_ctrl_msg(d, USB_READ_COFDM, msg[i].buf[0], NULL, 0,
diff --git a/drivers/media/usb/dvb-usb/dw2102.c b/drivers/media/usb/dvb-usb/dw2102.c
index 0ca764282c76..8747960e6146 100644
--- a/drivers/media/usb/dvb-usb/dw2102.c
+++ b/drivers/media/usb/dvb-usb/dw2102.c
@@ -946,7 +946,7 @@ static int su3000_read_mac_address(struct dvb_usb_device *d, u8 mac[6])
for (i = 0; i < 6; i++) {
obuf[1] = 0xf0 + i;
if (i2c_transfer(&d->i2c_adap, msg, 2) != 2)
- break;
+ return -1;
else
mac[i] = ibuf[0];
}
diff --git a/drivers/media/usb/pvrusb2/Kconfig b/drivers/media/usb/pvrusb2/Kconfig
index 9501b10b31aa..0df10270dbdf 100644
--- a/drivers/media/usb/pvrusb2/Kconfig
+++ b/drivers/media/usb/pvrusb2/Kconfig
@@ -37,6 +37,7 @@ config VIDEO_PVRUSB2_DVB
bool "pvrusb2 ATSC/DVB support"
default y
depends on VIDEO_PVRUSB2 && DVB_CORE
+ depends on VIDEO_PVRUSB2=m || DVB_CORE=y
select DVB_LGDT330X if MEDIA_SUBDRV_AUTOSELECT
select DVB_S5H1409 if MEDIA_SUBDRV_AUTOSELECT
select DVB_S5H1411 if MEDIA_SUBDRV_AUTOSELECT
diff --git a/drivers/media/usb/ttusb-dec/ttusb_dec.c b/drivers/media/usb/ttusb-dec/ttusb_dec.c
index 38822cedd93a..c4474d4c44e2 100644
--- a/drivers/media/usb/ttusb-dec/ttusb_dec.c
+++ b/drivers/media/usb/ttusb-dec/ttusb_dec.c
@@ -1544,8 +1544,7 @@ static void ttusb_dec_exit_dvb(struct ttusb_dec *dec)
dvb_dmx_release(&dec->demux);
if (dec->fe) {
dvb_unregister_frontend(dec->fe);
- if (dec->fe->ops.release)
- dec->fe->ops.release(dec->fe);
+ dvb_frontend_detach(dec->fe);
}
dvb_unregister_adapter(&dec->adapter);
}
diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c
index 7aefa76a42b3..d631ce4f9f7b 100644
--- a/drivers/media/usb/uvc/uvc_driver.c
+++ b/drivers/media/usb/uvc/uvc_driver.c
@@ -251,14 +251,17 @@ static int uvc_parse_format(struct uvc_device *dev,
/* Find the format descriptor from its GUID. */
fmtdesc = uvc_format_by_guid(&buffer[5]);
- if (fmtdesc != NULL) {
- format->fcc = fmtdesc->fcc;
- } else {
+ if (!fmtdesc) {
+ /*
+ * Unknown video formats are not fatal errors, the
+ * caller will skip this descriptor.
+ */
dev_info(&streaming->intf->dev,
"Unknown video format %pUl\n", &buffer[5]);
- format->fcc = 0;
+ return 0;
}
+ format->fcc = fmtdesc->fcc;
format->bpp = buffer[21];
/*
@@ -675,7 +678,7 @@ static int uvc_parse_streaming(struct uvc_device *dev,
interval = (u32 *)&frame[nframes];
streaming->format = format;
- streaming->nformats = nformats;
+ streaming->nformats = 0;
/* Parse the format descriptors. */
while (buflen > 2 && buffer[1] == USB_DT_CS_INTERFACE) {
@@ -689,7 +692,10 @@ static int uvc_parse_streaming(struct uvc_device *dev,
&interval, buffer, buflen);
if (ret < 0)
goto error;
+ if (!ret)
+ break;
+ streaming->nformats++;
frame += format->nframes;
format++;
diff --git a/drivers/media/v4l2-core/v4l2-mc.c b/drivers/media/v4l2-core/v4l2-mc.c
index bf0c18100664..22fe08fce0a9 100644
--- a/drivers/media/v4l2-core/v4l2-mc.c
+++ b/drivers/media/v4l2-core/v4l2-mc.c
@@ -314,8 +314,7 @@ int v4l2_create_fwnode_links_to_pad(struct v4l2_subdev *src_sd,
{
struct fwnode_handle *endpoint;
- if (!(sink->flags & MEDIA_PAD_FL_SINK) ||
- !is_media_entity_v4l2_subdev(sink->entity))
+ if (!(sink->flags & MEDIA_PAD_FL_SINK))
return -EINVAL;
fwnode_graph_for_each_endpoint(dev_fwnode(src_sd->dev), endpoint) {
diff --git a/drivers/misc/eeprom/Kconfig b/drivers/misc/eeprom/Kconfig
index f0a7531f354c..2d240bfa819f 100644
--- a/drivers/misc/eeprom/Kconfig
+++ b/drivers/misc/eeprom/Kconfig
@@ -6,6 +6,7 @@ config EEPROM_AT24
depends on I2C && SYSFS
select NVMEM
select NVMEM_SYSFS
+ select REGMAP
select REGMAP_I2C
help
Enable this driver to get read/write support to most I2C EEPROMs
diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index f48466960f1b..30d4d0476248 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -316,12 +316,14 @@ static void fastrpc_free_map(struct kref *ref)
if (map->table) {
if (map->attr & FASTRPC_ATTR_SECUREMAP) {
struct qcom_scm_vmperm perm;
+ int vmid = map->fl->cctx->vmperms[0].vmid;
+ u64 src_perms = BIT(QCOM_SCM_VMID_HLOS) | BIT(vmid);
int err = 0;
perm.vmid = QCOM_SCM_VMID_HLOS;
perm.perm = QCOM_SCM_PERM_RWX;
err = qcom_scm_assign_mem(map->phys, map->size,
- &map->fl->cctx->perms, &perm, 1);
+ &src_perms, &perm, 1);
if (err) {
dev_err(map->fl->sctx->dev, "Failed to assign memory phys 0x%llx size 0x%llx err %d",
map->phys, map->size, err);
@@ -787,8 +789,12 @@ static int fastrpc_map_create(struct fastrpc_user *fl, int fd,
goto map_err;
}
- map->phys = sg_dma_address(map->table->sgl);
- map->phys += ((u64)fl->sctx->sid << 32);
+ if (attr & FASTRPC_ATTR_SECUREMAP) {
+ map->phys = sg_phys(map->table->sgl);
+ } else {
+ map->phys = sg_dma_address(map->table->sgl);
+ map->phys += ((u64)fl->sctx->sid << 32);
+ }
map->size = len;
map->va = sg_virt(map->table->sgl);
map->len = len;
@@ -798,9 +804,15 @@ static int fastrpc_map_create(struct fastrpc_user *fl, int fd,
* If subsystem VMIDs are defined in DTSI, then do
* hyp_assign from HLOS to those VM(s)
*/
+ u64 src_perms = BIT(QCOM_SCM_VMID_HLOS);
+ struct qcom_scm_vmperm dst_perms[2] = {0};
+
+ dst_perms[0].vmid = QCOM_SCM_VMID_HLOS;
+ dst_perms[0].perm = QCOM_SCM_PERM_RW;
+ dst_perms[1].vmid = fl->cctx->vmperms[0].vmid;
+ dst_perms[1].perm = QCOM_SCM_PERM_RWX;
map->attr = attr;
- err = qcom_scm_assign_mem(map->phys, (u64)map->size, &fl->cctx->perms,
- fl->cctx->vmperms, fl->cctx->vmcount);
+ err = qcom_scm_assign_mem(map->phys, (u64)map->size, &src_perms, dst_perms, 2);
if (err) {
dev_err(sess->dev, "Failed to assign memory with phys 0x%llx size 0x%llx err %d",
map->phys, map->size, err);
@@ -1892,7 +1904,7 @@ static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp)
req.vaddrout = rsp_msg.vaddr;
/* Add memory to static PD pool, protection thru hypervisor */
- if (req.flags != ADSP_MMAP_REMOTE_HEAP_ADDR && fl->cctx->vmcount) {
+ if (req.flags == ADSP_MMAP_REMOTE_HEAP_ADDR && fl->cctx->vmcount) {
struct qcom_scm_vmperm perm;
perm.vmid = QCOM_SCM_VMID_HLOS;
@@ -2337,8 +2349,10 @@ static void fastrpc_notify_users(struct fastrpc_user *user)
struct fastrpc_invoke_ctx *ctx;
spin_lock(&user->lock);
- list_for_each_entry(ctx, &user->pending, node)
+ list_for_each_entry(ctx, &user->pending, node) {
+ ctx->retval = -EPIPE;
complete(&ctx->work);
+ }
spin_unlock(&user->lock);
}
@@ -2349,7 +2363,9 @@ static void fastrpc_rpmsg_remove(struct rpmsg_device *rpdev)
struct fastrpc_user *user;
unsigned long flags;
+ /* No invocations past this point */
spin_lock_irqsave(&cctx->lock, flags);
+ cctx->rpdev = NULL;
list_for_each_entry(user, &cctx->users, user)
fastrpc_notify_users(user);
spin_unlock_irqrestore(&cctx->lock, flags);
@@ -2368,7 +2384,6 @@ static void fastrpc_rpmsg_remove(struct rpmsg_device *rpdev)
of_platform_depopulate(&rpdev->dev);
- cctx->rpdev = NULL;
fastrpc_channel_ctx_put(cctx);
}
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 2a33b5073cc4..6d01025f1fcf 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -264,6 +264,7 @@ static ssize_t power_ro_lock_store(struct device *dev,
goto out_put;
}
req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_BOOT_WP;
+ req_to_mmc_queue_req(req)->drv_op_result = -EIO;
blk_execute_rq(req, false);
ret = req_to_mmc_queue_req(req)->drv_op_result;
blk_mq_free_request(req);
@@ -651,6 +652,7 @@ static int mmc_blk_ioctl_cmd(struct mmc_blk_data *md,
idatas[0] = idata;
req_to_mmc_queue_req(req)->drv_op =
rpmb ? MMC_DRV_OP_IOCTL_RPMB : MMC_DRV_OP_IOCTL;
+ req_to_mmc_queue_req(req)->drv_op_result = -EIO;
req_to_mmc_queue_req(req)->drv_op_data = idatas;
req_to_mmc_queue_req(req)->ioc_count = 1;
blk_execute_rq(req, false);
@@ -722,6 +724,7 @@ static int mmc_blk_ioctl_multi_cmd(struct mmc_blk_data *md,
}
req_to_mmc_queue_req(req)->drv_op =
rpmb ? MMC_DRV_OP_IOCTL_RPMB : MMC_DRV_OP_IOCTL;
+ req_to_mmc_queue_req(req)->drv_op_result = -EIO;
req_to_mmc_queue_req(req)->drv_op_data = idata;
req_to_mmc_queue_req(req)->ioc_count = n;
blk_execute_rq(req, false);
@@ -2806,6 +2809,7 @@ static int mmc_dbg_card_status_get(void *data, u64 *val)
if (IS_ERR(req))
return PTR_ERR(req);
req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_GET_CARD_STATUS;
+ req_to_mmc_queue_req(req)->drv_op_result = -EIO;
blk_execute_rq(req, false);
ret = req_to_mmc_queue_req(req)->drv_op_result;
if (ret >= 0) {
@@ -2844,6 +2848,7 @@ static int mmc_ext_csd_open(struct inode *inode, struct file *filp)
goto out_free;
}
req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_GET_EXT_CSD;
+ req_to_mmc_queue_req(req)->drv_op_result = -EIO;
req_to_mmc_queue_req(req)->drv_op_data = &ext_csd;
blk_execute_rq(req, false);
err = req_to_mmc_queue_req(req)->drv_op_result;
diff --git a/drivers/mmc/core/pwrseq_sd8787.c b/drivers/mmc/core/pwrseq_sd8787.c
index 2e120ad83020..0c5f5e371e1f 100644
--- a/drivers/mmc/core/pwrseq_sd8787.c
+++ b/drivers/mmc/core/pwrseq_sd8787.c
@@ -28,7 +28,6 @@ struct mmc_pwrseq_sd8787 {
struct mmc_pwrseq pwrseq;
struct gpio_desc *reset_gpio;
struct gpio_desc *pwrdn_gpio;
- u32 reset_pwrdwn_delay_ms;
};
#define to_pwrseq_sd8787(p) container_of(p, struct mmc_pwrseq_sd8787, pwrseq)
@@ -39,7 +38,7 @@ static void mmc_pwrseq_sd8787_pre_power_on(struct mmc_host *host)
gpiod_set_value_cansleep(pwrseq->reset_gpio, 1);
- msleep(pwrseq->reset_pwrdwn_delay_ms);
+ msleep(300);
gpiod_set_value_cansleep(pwrseq->pwrdn_gpio, 1);
}
@@ -51,17 +50,37 @@ static void mmc_pwrseq_sd8787_power_off(struct mmc_host *host)
gpiod_set_value_cansleep(pwrseq->reset_gpio, 0);
}
+static void mmc_pwrseq_wilc1000_pre_power_on(struct mmc_host *host)
+{
+ struct mmc_pwrseq_sd8787 *pwrseq = to_pwrseq_sd8787(host->pwrseq);
+
+ /* The pwrdn_gpio is really CHIP_EN, reset_gpio is RESETN */
+ gpiod_set_value_cansleep(pwrseq->pwrdn_gpio, 1);
+ msleep(5);
+ gpiod_set_value_cansleep(pwrseq->reset_gpio, 1);
+}
+
+static void mmc_pwrseq_wilc1000_power_off(struct mmc_host *host)
+{
+ struct mmc_pwrseq_sd8787 *pwrseq = to_pwrseq_sd8787(host->pwrseq);
+
+ gpiod_set_value_cansleep(pwrseq->reset_gpio, 0);
+ gpiod_set_value_cansleep(pwrseq->pwrdn_gpio, 0);
+}
+
static const struct mmc_pwrseq_ops mmc_pwrseq_sd8787_ops = {
.pre_power_on = mmc_pwrseq_sd8787_pre_power_on,
.power_off = mmc_pwrseq_sd8787_power_off,
};
-static const u32 sd8787_delay_ms = 300;
-static const u32 wilc1000_delay_ms = 5;
+static const struct mmc_pwrseq_ops mmc_pwrseq_wilc1000_ops = {
+ .pre_power_on = mmc_pwrseq_wilc1000_pre_power_on,
+ .power_off = mmc_pwrseq_wilc1000_power_off,
+};
static const struct of_device_id mmc_pwrseq_sd8787_of_match[] = {
- { .compatible = "mmc-pwrseq-sd8787", .data = &sd8787_delay_ms },
- { .compatible = "mmc-pwrseq-wilc1000", .data = &wilc1000_delay_ms },
+ { .compatible = "mmc-pwrseq-sd8787", .data = &mmc_pwrseq_sd8787_ops },
+ { .compatible = "mmc-pwrseq-wilc1000", .data = &mmc_pwrseq_wilc1000_ops },
{/* sentinel */},
};
MODULE_DEVICE_TABLE(of, mmc_pwrseq_sd8787_of_match);
@@ -77,7 +96,6 @@ static int mmc_pwrseq_sd8787_probe(struct platform_device *pdev)
return -ENOMEM;
match = of_match_node(mmc_pwrseq_sd8787_of_match, pdev->dev.of_node);
- pwrseq->reset_pwrdwn_delay_ms = *(u32 *)match->data;
pwrseq->pwrdn_gpio = devm_gpiod_get(dev, "powerdown", GPIOD_OUT_LOW);
if (IS_ERR(pwrseq->pwrdn_gpio))
@@ -88,7 +106,7 @@ static int mmc_pwrseq_sd8787_probe(struct platform_device *pdev)
return PTR_ERR(pwrseq->reset_gpio);
pwrseq->pwrseq.dev = dev;
- pwrseq->pwrseq.ops = &mmc_pwrseq_sd8787_ops;
+ pwrseq->pwrseq.ops = match->data;
pwrseq->pwrseq.owner = THIS_MODULE;
platform_set_drvdata(pdev, pwrseq);
diff --git a/drivers/mmc/host/bcm2835.c b/drivers/mmc/host/bcm2835.c
index 8648f7e63ca1..eea208856ce0 100644
--- a/drivers/mmc/host/bcm2835.c
+++ b/drivers/mmc/host/bcm2835.c
@@ -1403,8 +1403,8 @@ static int bcm2835_probe(struct platform_device *pdev)
host->max_clk = clk_get_rate(clk);
host->irq = platform_get_irq(pdev, 0);
- if (host->irq <= 0) {
- ret = -EINVAL;
+ if (host->irq < 0) {
+ ret = host->irq;
goto err;
}
diff --git a/drivers/mmc/host/litex_mmc.c b/drivers/mmc/host/litex_mmc.c
index 39c6707fdfdb..9af6b0902efe 100644
--- a/drivers/mmc/host/litex_mmc.c
+++ b/drivers/mmc/host/litex_mmc.c
@@ -649,6 +649,7 @@ static struct platform_driver litex_mmc_driver = {
.driver = {
.name = "litex-mmc",
.of_match_table = litex_match,
+ .probe_type = PROBE_PREFER_ASYNCHRONOUS,
},
};
module_platform_driver(litex_mmc_driver);
diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index b8514d9d5e73..ee9a25b900ae 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c
@@ -991,11 +991,8 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id)
if (data && !cmd->error)
data->bytes_xfered = data->blksz * data->blocks;
- if (meson_mmc_bounce_buf_read(data) ||
- meson_mmc_get_next_command(cmd))
- ret = IRQ_WAKE_THREAD;
- else
- ret = IRQ_HANDLED;
+
+ return IRQ_WAKE_THREAD;
}
out:
@@ -1007,9 +1004,6 @@ out:
writel(start, host->regs + SD_EMMC_START);
}
- if (ret == IRQ_HANDLED)
- meson_mmc_request_done(host->mmc, cmd->mrq);
-
return ret;
}
@@ -1192,8 +1186,8 @@ static int meson_mmc_probe(struct platform_device *pdev)
return PTR_ERR(host->regs);
host->irq = platform_get_irq(pdev, 0);
- if (host->irq <= 0)
- return -EINVAL;
+ if (host->irq < 0)
+ return host->irq;
cd_irq = platform_get_irq_optional(pdev, 1);
mmc_gpio_set_cd_irq(mmc, cd_irq);
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index f2b2e8b0574e..696cbef3ff7d 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -1735,7 +1735,8 @@ static void mmci_set_max_busy_timeout(struct mmc_host *mmc)
return;
if (host->variant->busy_timeout && mmc->actual_clock)
- max_busy_timeout = ~0UL / (mmc->actual_clock / MSEC_PER_SEC);
+ max_busy_timeout = U32_MAX / DIV_ROUND_UP(mmc->actual_clock,
+ MSEC_PER_SEC);
mmc->max_busy_timeout = max_busy_timeout;
}
diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index edade0e54a0c..9785ec91654f 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -2680,7 +2680,7 @@ static int msdc_drv_probe(struct platform_device *pdev)
host->irq = platform_get_irq(pdev, 0);
if (host->irq < 0) {
- ret = -EINVAL;
+ ret = host->irq;
goto host_free;
}
diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c
index 629efbe639c4..b4f6a0a2fcb5 100644
--- a/drivers/mmc/host/mvsdio.c
+++ b/drivers/mmc/host/mvsdio.c
@@ -704,7 +704,7 @@ static int mvsd_probe(struct platform_device *pdev)
}
irq = platform_get_irq(pdev, 0);
if (irq < 0)
- return -ENXIO;
+ return irq;
mmc = mmc_alloc_host(sizeof(struct mvsd_host), &pdev->dev);
if (!mmc) {
diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index ce78edfb402b..86454f1182bb 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -1343,7 +1343,7 @@ static int mmc_omap_probe(struct platform_device *pdev)
irq = platform_get_irq(pdev, 0);
if (irq < 0)
- return -ENXIO;
+ return irq;
host->virt_base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
if (IS_ERR(host->virt_base))
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 517dde777413..1e0f2d7774bd 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -1791,9 +1791,11 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
}
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- irq = platform_get_irq(pdev, 0);
- if (res == NULL || irq < 0)
+ if (!res)
return -ENXIO;
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
base = devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR(base))
diff --git a/drivers/mmc/host/owl-mmc.c b/drivers/mmc/host/owl-mmc.c
index 6f9d31a886ba..1bf22b08b373 100644
--- a/drivers/mmc/host/owl-mmc.c
+++ b/drivers/mmc/host/owl-mmc.c
@@ -637,7 +637,7 @@ static int owl_mmc_probe(struct platform_device *pdev)
owl_host->irq = platform_get_irq(pdev, 0);
if (owl_host->irq < 0) {
- ret = -EINVAL;
+ ret = owl_host->irq;
goto err_release_channel;
}
diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index 8f0e639236b1..edf2e6c14dc6 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -829,7 +829,7 @@ static int sdhci_acpi_probe(struct platform_device *pdev)
host->ops = &sdhci_acpi_ops_dflt;
host->irq = platform_get_irq(pdev, 0);
if (host->irq < 0) {
- err = -EINVAL;
+ err = host->irq;
goto err_free;
}
diff --git a/drivers/mmc/host/sdhci-cadence.c b/drivers/mmc/host/sdhci-cadence.c
index b24aa27da50c..d2f625054689 100644
--- a/drivers/mmc/host/sdhci-cadence.c
+++ b/drivers/mmc/host/sdhci-cadence.c
@@ -540,9 +540,11 @@ static int sdhci_cdns_probe(struct platform_device *pdev)
if (host->mmc->caps & MMC_CAP_HW_RESET) {
priv->rst_hw = devm_reset_control_get_optional_exclusive(dev, NULL);
- if (IS_ERR(priv->rst_hw))
- return dev_err_probe(mmc_dev(host->mmc), PTR_ERR(priv->rst_hw),
- "reset controller error\n");
+ if (IS_ERR(priv->rst_hw)) {
+ ret = dev_err_probe(mmc_dev(host->mmc), PTR_ERR(priv->rst_hw),
+ "reset controller error\n");
+ goto free;
+ }
if (priv->rst_hw)
host->mmc_host_ops.card_hw_reset = sdhci_cdns_mmc_hw_reset;
}
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index d7c0c0b9e26c..eebf94604a7f 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -1634,6 +1634,10 @@ sdhci_esdhc_imx_probe_dt(struct platform_device *pdev,
if (ret)
return ret;
+ /* HS400/HS400ES require 8 bit bus */
+ if (!(host->mmc->caps & MMC_CAP_8_BIT_DATA))
+ host->mmc->caps2 &= ~(MMC_CAP2_HS400 | MMC_CAP2_HS400_ES);
+
if (mmc_gpio_get_cd(host->mmc) >= 0)
host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION;
@@ -1724,10 +1728,6 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev)
host->mmc_host_ops.init_card = usdhc_init_card;
}
- err = sdhci_esdhc_imx_probe_dt(pdev, host, imx_data);
- if (err)
- goto disable_ahb_clk;
-
if (imx_data->socdata->flags & ESDHC_FLAG_MAN_TUNING)
sdhci_esdhc_ops.platform_execute_tuning =
esdhc_executing_tuning;
@@ -1735,15 +1735,13 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev)
if (imx_data->socdata->flags & ESDHC_FLAG_ERR004536)
host->quirks |= SDHCI_QUIRK_BROKEN_ADMA;
- if (host->mmc->caps & MMC_CAP_8_BIT_DATA &&
- imx_data->socdata->flags & ESDHC_FLAG_HS400)
+ if (imx_data->socdata->flags & ESDHC_FLAG_HS400)
host->mmc->caps2 |= MMC_CAP2_HS400;
if (imx_data->socdata->flags & ESDHC_FLAG_BROKEN_AUTO_CMD23)
host->quirks2 |= SDHCI_QUIRK2_ACMD23_BROKEN;
- if (host->mmc->caps & MMC_CAP_8_BIT_DATA &&
- imx_data->socdata->flags & ESDHC_FLAG_HS400_ES) {
+ if (imx_data->socdata->flags & ESDHC_FLAG_HS400_ES) {
host->mmc->caps2 |= MMC_CAP2_HS400_ES;
host->mmc_host_ops.hs400_enhanced_strobe =
esdhc_hs400_enhanced_strobe;
@@ -1765,6 +1763,10 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev)
goto disable_ahb_clk;
}
+ err = sdhci_esdhc_imx_probe_dt(pdev, host, imx_data);
+ if (err)
+ goto disable_ahb_clk;
+
sdhci_esdhc_imx_hwinit(host);
err = sdhci_add_host(host);
diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 8ac81d57a3df..1877d583fe8c 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -2479,6 +2479,9 @@ static inline void sdhci_msm_get_of_property(struct platform_device *pdev,
msm_host->ddr_config = DDR_CONFIG_POR_VAL;
of_property_read_u32(node, "qcom,dll-config", &msm_host->dll_config);
+
+ if (of_device_is_compatible(node, "qcom,msm8916-sdhci"))
+ host->quirks2 |= SDHCI_QUIRK2_BROKEN_64_BIT_DMA;
}
static int sdhci_msm_gcc_reset(struct device *dev, struct sdhci_host *host)
diff --git a/drivers/mmc/host/sdhci-spear.c b/drivers/mmc/host/sdhci-spear.c
index d463e2fd5b1a..c79035727b20 100644
--- a/drivers/mmc/host/sdhci-spear.c
+++ b/drivers/mmc/host/sdhci-spear.c
@@ -65,8 +65,8 @@ static int sdhci_probe(struct platform_device *pdev)
host->hw_name = "sdhci";
host->ops = &sdhci_pltfm_ops;
host->irq = platform_get_irq(pdev, 0);
- if (host->irq <= 0) {
- ret = -EINVAL;
+ if (host->irq < 0) {
+ ret = host->irq;
goto err_host;
}
host->quirks = SDHCI_QUIRK_BROKEN_ADMA;
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 0fd4c9d644dd..5cf53348372a 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -1400,7 +1400,7 @@ static int sh_mmcif_probe(struct platform_device *pdev)
irq[0] = platform_get_irq(pdev, 0);
irq[1] = platform_get_irq_optional(pdev, 1);
if (irq[0] < 0)
- return -ENXIO;
+ return irq[0];
reg = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(reg))
diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c
index 3db9f32d6a7b..69dcb8805e05 100644
--- a/drivers/mmc/host/sunxi-mmc.c
+++ b/drivers/mmc/host/sunxi-mmc.c
@@ -1350,8 +1350,8 @@ static int sunxi_mmc_resource_request(struct sunxi_mmc_host *host,
return ret;
host->irq = platform_get_irq(pdev, 0);
- if (host->irq <= 0) {
- ret = -EINVAL;
+ if (host->irq < 0) {
+ ret = host->irq;
goto error_disable_mmc;
}
diff --git a/drivers/mmc/host/usdhi6rol0.c b/drivers/mmc/host/usdhi6rol0.c
index 2f59917b105e..2e17903658fc 100644
--- a/drivers/mmc/host/usdhi6rol0.c
+++ b/drivers/mmc/host/usdhi6rol0.c
@@ -1757,8 +1757,10 @@ static int usdhi6_probe(struct platform_device *pdev)
irq_cd = platform_get_irq_byname(pdev, "card detect");
irq_sd = platform_get_irq_byname(pdev, "data");
irq_sdio = platform_get_irq_byname(pdev, "SDIO");
- if (irq_sd < 0 || irq_sdio < 0)
- return -ENODEV;
+ if (irq_sd < 0)
+ return irq_sd;
+ if (irq_sdio < 0)
+ return irq_sdio;
mmc = mmc_alloc_host(sizeof(struct usdhi6_host), dev);
if (!mmc)
diff --git a/drivers/mmc/host/vub300.c b/drivers/mmc/host/vub300.c
index e4c4bfac3763..9ec593d52f0f 100644
--- a/drivers/mmc/host/vub300.c
+++ b/drivers/mmc/host/vub300.c
@@ -1713,6 +1713,9 @@ static void construct_request_response(struct vub300_mmc_host *vub300,
int bytes = 3 & less_cmd;
int words = less_cmd >> 2;
u8 *r = vub300->resp.response.command_response;
+
+ if (!resp_len)
+ return;
if (bytes == 3) {
cmd->resp[words] = (r[1 + (words << 2)] << 24)
| (r[2 + (words << 2)] << 16)
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 01f1c6792df9..8dc4f5c493fc 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -590,8 +590,8 @@ static void adjust_oob_length(struct mtd_info *mtd, uint64_t start,
(end_page - start_page + 1) * oob_per_page);
}
-static int mtdchar_write_ioctl(struct mtd_info *mtd,
- struct mtd_write_req __user *argp)
+static noinline_for_stack int
+mtdchar_write_ioctl(struct mtd_info *mtd, struct mtd_write_req __user *argp)
{
struct mtd_info *master = mtd_get_master(mtd);
struct mtd_write_req req;
@@ -688,8 +688,8 @@ static int mtdchar_write_ioctl(struct mtd_info *mtd,
return ret;
}
-static int mtdchar_read_ioctl(struct mtd_info *mtd,
- struct mtd_read_req __user *argp)
+static noinline_for_stack int
+mtdchar_read_ioctl(struct mtd_info *mtd, struct mtd_read_req __user *argp)
{
struct mtd_info *master = mtd_get_master(mtd);
struct mtd_read_req req;
diff --git a/drivers/mtd/nand/raw/ingenic/ingenic_ecc.h b/drivers/mtd/nand/raw/ingenic/ingenic_ecc.h
index 2cda439b5e11..017868f59f22 100644
--- a/drivers/mtd/nand/raw/ingenic/ingenic_ecc.h
+++ b/drivers/mtd/nand/raw/ingenic/ingenic_ecc.h
@@ -36,25 +36,25 @@ int ingenic_ecc_correct(struct ingenic_ecc *ecc,
void ingenic_ecc_release(struct ingenic_ecc *ecc);
struct ingenic_ecc *of_ingenic_ecc_get(struct device_node *np);
#else /* CONFIG_MTD_NAND_INGENIC_ECC */
-int ingenic_ecc_calculate(struct ingenic_ecc *ecc,
+static inline int ingenic_ecc_calculate(struct ingenic_ecc *ecc,
struct ingenic_ecc_params *params,
const u8 *buf, u8 *ecc_code)
{
return -ENODEV;
}
-int ingenic_ecc_correct(struct ingenic_ecc *ecc,
+static inline int ingenic_ecc_correct(struct ingenic_ecc *ecc,
struct ingenic_ecc_params *params, u8 *buf,
u8 *ecc_code)
{
return -ENODEV;
}
-void ingenic_ecc_release(struct ingenic_ecc *ecc)
+static inline void ingenic_ecc_release(struct ingenic_ecc *ecc)
{
}
-struct ingenic_ecc *of_ingenic_ecc_get(struct device_node *np)
+static inline struct ingenic_ecc *of_ingenic_ecc_get(struct device_node *np)
{
return ERR_PTR(-ENODEV);
}
diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c
index afb424579f0b..30c15e4e1cc0 100644
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -2457,6 +2457,12 @@ static int marvell_nfc_setup_interface(struct nand_chip *chip, int chipnr,
NDTR1_WAIT_MODE;
}
+ /*
+ * Reset nfc->selected_chip so the next command will cause the timing
+ * registers to be updated in marvell_nfc_select_target().
+ */
+ nfc->selected_chip = NULL;
+
return 0;
}
@@ -2894,10 +2900,6 @@ static int marvell_nfc_init(struct marvell_nfc *nfc)
regmap_update_bits(sysctrl_base, GENCONF_CLK_GATING_CTRL,
GENCONF_CLK_GATING_CTRL_ND_GATE,
GENCONF_CLK_GATING_CTRL_ND_GATE);
-
- regmap_update_bits(sysctrl_base, GENCONF_ND_CLK_CTRL,
- GENCONF_ND_CLK_CTRL_EN,
- GENCONF_ND_CLK_CTRL_EN);
}
/* Configure the DMA if appropriate */
diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c
index 0bb0ad14a2fc..5f29fac8669a 100644
--- a/drivers/mtd/spi-nor/core.c
+++ b/drivers/mtd/spi-nor/core.c
@@ -2018,6 +2018,7 @@ static const struct spi_nor_manufacturer *manufacturers[] = {
static const struct flash_info spi_nor_generic_flash = {
.name = "spi-nor-generic",
+ .n_banks = 1,
/*
* JESD216 rev A doesn't specify the page size, therefore we need a
* sane default.
@@ -2921,7 +2922,8 @@ static void spi_nor_late_init_params(struct spi_nor *nor)
if (nor->flags & SNOR_F_HAS_LOCK && !nor->params->locking_ops)
spi_nor_init_default_locking_ops(nor);
- nor->params->bank_size = div64_u64(nor->params->size, nor->info->n_banks);
+ if (nor->info->n_banks > 1)
+ params->bank_size = div64_u64(params->size, nor->info->n_banks);
}
/**
@@ -2987,6 +2989,7 @@ static void spi_nor_init_default_params(struct spi_nor *nor)
/* Set SPI NOR sizes. */
params->writesize = 1;
params->size = (u64)info->sector_size * info->n_sectors;
+ params->bank_size = params->size;
params->page_size = info->page_size;
if (!(info->flags & SPI_NOR_NO_FR)) {
diff --git a/drivers/mtd/spi-nor/spansion.c b/drivers/mtd/spi-nor/spansion.c
index 15f9a80c10b9..36876aa849ed 100644
--- a/drivers/mtd/spi-nor/spansion.c
+++ b/drivers/mtd/spi-nor/spansion.c
@@ -361,7 +361,7 @@ static int cypress_nor_determine_addr_mode_by_sr1(struct spi_nor *nor,
*/
static int cypress_nor_set_addr_mode_nbytes(struct spi_nor *nor)
{
- struct spi_mem_op op;
+ struct spi_mem_op op = {};
u8 addr_mode;
int ret;
@@ -492,7 +492,7 @@ s25fs256t_post_bfpt_fixup(struct spi_nor *nor,
const struct sfdp_parameter_header *bfpt_header,
const struct sfdp_bfpt *bfpt)
{
- struct spi_mem_op op;
+ struct spi_mem_op op = {};
int ret;
ret = cypress_nor_set_addr_mode_nbytes(nor);
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 3fed888629f7..edbaa1444f8e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3947,7 +3947,11 @@ static int bond_slave_netdev_event(unsigned long event,
unblock_netpoll_tx();
break;
case NETDEV_FEAT_CHANGE:
- bond_compute_features(bond);
+ if (!bond->notifier_ctx) {
+ bond->notifier_ctx = true;
+ bond_compute_features(bond);
+ bond->notifier_ctx = false;
+ }
break;
case NETDEV_RESEND_IGMP:
/* Propagate to master device */
@@ -6342,6 +6346,8 @@ static int bond_init(struct net_device *bond_dev)
if (!bond->wq)
return -ENOMEM;
+ bond->notifier_ctx = false;
+
spin_lock_init(&bond->stats_lock);
netdev_lockdep_set_classes(bond_dev);
diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index 3ceccafd701b..b190007c01be 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -95,7 +95,7 @@ config CAN_AT91
config CAN_BXCAN
tristate "STM32 Basic Extended CAN (bxCAN) devices"
- depends on OF || ARCH_STM32 || COMPILE_TEST
+ depends on ARCH_STM32 || COMPILE_TEST
depends on HAS_IOMEM
select CAN_RX_OFFLOAD
help
diff --git a/drivers/net/can/bxcan.c b/drivers/net/can/bxcan.c
index e26ccd41e3cb..027a8a162fe4 100644
--- a/drivers/net/can/bxcan.c
+++ b/drivers/net/can/bxcan.c
@@ -118,7 +118,7 @@
#define BXCAN_FiR1_REG(b) (0x40 + (b) * 8)
#define BXCAN_FiR2_REG(b) (0x44 + (b) * 8)
-#define BXCAN_FILTER_ID(primary) (primary ? 0 : 14)
+#define BXCAN_FILTER_ID(cfg) ((cfg) == BXCAN_CFG_DUAL_SECONDARY ? 14 : 0)
/* Filter primary register (FMR) bits */
#define BXCAN_FMR_CANSB_MASK GENMASK(13, 8)
@@ -135,6 +135,12 @@ enum bxcan_lec_code {
BXCAN_LEC_UNUSED
};
+enum bxcan_cfg {
+ BXCAN_CFG_SINGLE = 0,
+ BXCAN_CFG_DUAL_PRIMARY,
+ BXCAN_CFG_DUAL_SECONDARY
+};
+
/* Structure of the message buffer */
struct bxcan_mb {
u32 id; /* can identifier */
@@ -167,7 +173,7 @@ struct bxcan_priv {
struct regmap *gcan;
int tx_irq;
int sce_irq;
- bool primary;
+ enum bxcan_cfg cfg;
struct clk *clk;
spinlock_t rmw_lock; /* lock for read-modify-write operations */
unsigned int tx_head;
@@ -202,17 +208,17 @@ static inline void bxcan_rmw(struct bxcan_priv *priv, void __iomem *addr,
spin_unlock_irqrestore(&priv->rmw_lock, flags);
}
-static void bxcan_disable_filters(struct bxcan_priv *priv, bool primary)
+static void bxcan_disable_filters(struct bxcan_priv *priv, enum bxcan_cfg cfg)
{
- unsigned int fid = BXCAN_FILTER_ID(primary);
+ unsigned int fid = BXCAN_FILTER_ID(cfg);
u32 fmask = BIT(fid);
regmap_update_bits(priv->gcan, BXCAN_FA1R_REG, fmask, 0);
}
-static void bxcan_enable_filters(struct bxcan_priv *priv, bool primary)
+static void bxcan_enable_filters(struct bxcan_priv *priv, enum bxcan_cfg cfg)
{
- unsigned int fid = BXCAN_FILTER_ID(primary);
+ unsigned int fid = BXCAN_FILTER_ID(cfg);
u32 fmask = BIT(fid);
/* Filter settings:
@@ -680,7 +686,7 @@ static int bxcan_chip_start(struct net_device *ndev)
BXCAN_BTR_BRP_MASK | BXCAN_BTR_TS1_MASK | BXCAN_BTR_TS2_MASK |
BXCAN_BTR_SJW_MASK, set);
- bxcan_enable_filters(priv, priv->primary);
+ bxcan_enable_filters(priv, priv->cfg);
/* Clear all internal status */
priv->tx_head = 0;
@@ -806,7 +812,7 @@ static void bxcan_chip_stop(struct net_device *ndev)
BXCAN_IER_EPVIE | BXCAN_IER_EWGIE | BXCAN_IER_FOVIE1 |
BXCAN_IER_FFIE1 | BXCAN_IER_FMPIE1 | BXCAN_IER_FOVIE0 |
BXCAN_IER_FFIE0 | BXCAN_IER_FMPIE0 | BXCAN_IER_TMEIE, 0);
- bxcan_disable_filters(priv, priv->primary);
+ bxcan_disable_filters(priv, priv->cfg);
bxcan_enter_sleep_mode(priv);
priv->can.state = CAN_STATE_STOPPED;
}
@@ -931,7 +937,7 @@ static int bxcan_probe(struct platform_device *pdev)
struct clk *clk = NULL;
void __iomem *regs;
struct regmap *gcan;
- bool primary;
+ enum bxcan_cfg cfg;
int err, rx_irq, tx_irq, sce_irq;
regs = devm_platform_ioremap_resource(pdev, 0);
@@ -946,7 +952,13 @@ static int bxcan_probe(struct platform_device *pdev)
return PTR_ERR(gcan);
}
- primary = of_property_read_bool(np, "st,can-primary");
+ if (of_property_read_bool(np, "st,can-primary"))
+ cfg = BXCAN_CFG_DUAL_PRIMARY;
+ else if (of_property_read_bool(np, "st,can-secondary"))
+ cfg = BXCAN_CFG_DUAL_SECONDARY;
+ else
+ cfg = BXCAN_CFG_SINGLE;
+
clk = devm_clk_get(dev, NULL);
if (IS_ERR(clk)) {
dev_err(dev, "failed to get clock\n");
@@ -992,7 +1004,7 @@ static int bxcan_probe(struct platform_device *pdev)
priv->clk = clk;
priv->tx_irq = tx_irq;
priv->sce_irq = sce_irq;
- priv->primary = primary;
+ priv->cfg = cfg;
priv->can.clock.freq = clk_get_rate(clk);
spin_lock_init(&priv->rmw_lock);
priv->tx_head = 0;
diff --git a/drivers/net/can/dev/skb.c b/drivers/net/can/dev/skb.c
index 241ec636e91f..f6d05b3ef59a 100644
--- a/drivers/net/can/dev/skb.c
+++ b/drivers/net/can/dev/skb.c
@@ -54,7 +54,8 @@ int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev,
/* check flag whether this packet has to be looped back */
if (!(dev->flags & IFF_ECHO) ||
(skb->protocol != htons(ETH_P_CAN) &&
- skb->protocol != htons(ETH_P_CANFD))) {
+ skb->protocol != htons(ETH_P_CANFD) &&
+ skb->protocol != htons(ETH_P_CANXL))) {
kfree_skb(skb);
return 0;
}
diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c
index 53e8a914c88b..be189edb256c 100644
--- a/drivers/net/can/kvaser_pciefd.c
+++ b/drivers/net/can/kvaser_pciefd.c
@@ -71,10 +71,12 @@ MODULE_DESCRIPTION("CAN driver for Kvaser CAN/PCIe devices");
#define KVASER_PCIEFD_SYSID_BUILD_REG (KVASER_PCIEFD_SYSID_BASE + 0x14)
/* Shared receive buffer registers */
#define KVASER_PCIEFD_SRB_BASE 0x1f200
+#define KVASER_PCIEFD_SRB_FIFO_LAST_REG (KVASER_PCIEFD_SRB_BASE + 0x1f4)
#define KVASER_PCIEFD_SRB_CMD_REG (KVASER_PCIEFD_SRB_BASE + 0x200)
#define KVASER_PCIEFD_SRB_IEN_REG (KVASER_PCIEFD_SRB_BASE + 0x204)
#define KVASER_PCIEFD_SRB_IRQ_REG (KVASER_PCIEFD_SRB_BASE + 0x20c)
#define KVASER_PCIEFD_SRB_STAT_REG (KVASER_PCIEFD_SRB_BASE + 0x210)
+#define KVASER_PCIEFD_SRB_RX_NR_PACKETS_REG (KVASER_PCIEFD_SRB_BASE + 0x214)
#define KVASER_PCIEFD_SRB_CTRL_REG (KVASER_PCIEFD_SRB_BASE + 0x218)
/* EPCS flash controller registers */
#define KVASER_PCIEFD_SPI_BASE 0x1fc00
@@ -111,6 +113,9 @@ MODULE_DESCRIPTION("CAN driver for Kvaser CAN/PCIe devices");
/* DMA support */
#define KVASER_PCIEFD_SRB_STAT_DMA BIT(24)
+/* SRB current packet level */
+#define KVASER_PCIEFD_SRB_RX_NR_PACKETS_MASK 0xff
+
/* DMA Enable */
#define KVASER_PCIEFD_SRB_CTRL_DMA_ENABLE BIT(0)
@@ -526,7 +531,7 @@ static int kvaser_pciefd_set_tx_irq(struct kvaser_pciefd_can *can)
KVASER_PCIEFD_KCAN_IRQ_TOF | KVASER_PCIEFD_KCAN_IRQ_ABD |
KVASER_PCIEFD_KCAN_IRQ_TAE | KVASER_PCIEFD_KCAN_IRQ_TAL |
KVASER_PCIEFD_KCAN_IRQ_FDIC | KVASER_PCIEFD_KCAN_IRQ_BPP |
- KVASER_PCIEFD_KCAN_IRQ_TAR | KVASER_PCIEFD_KCAN_IRQ_TFD;
+ KVASER_PCIEFD_KCAN_IRQ_TAR;
iowrite32(msk, can->reg_base + KVASER_PCIEFD_KCAN_IEN_REG);
@@ -554,6 +559,8 @@ static void kvaser_pciefd_setup_controller(struct kvaser_pciefd_can *can)
if (can->can.ctrlmode & CAN_CTRLMODE_LISTENONLY)
mode |= KVASER_PCIEFD_KCAN_MODE_LOM;
+ else
+ mode &= ~KVASER_PCIEFD_KCAN_MODE_LOM;
mode |= KVASER_PCIEFD_KCAN_MODE_EEN;
mode |= KVASER_PCIEFD_KCAN_MODE_EPEN;
@@ -572,7 +579,7 @@ static void kvaser_pciefd_start_controller_flush(struct kvaser_pciefd_can *can)
spin_lock_irqsave(&can->lock, irq);
iowrite32(-1, can->reg_base + KVASER_PCIEFD_KCAN_IRQ_REG);
- iowrite32(KVASER_PCIEFD_KCAN_IRQ_ABD | KVASER_PCIEFD_KCAN_IRQ_TFD,
+ iowrite32(KVASER_PCIEFD_KCAN_IRQ_ABD,
can->reg_base + KVASER_PCIEFD_KCAN_IEN_REG);
status = ioread32(can->reg_base + KVASER_PCIEFD_KCAN_STAT_REG);
@@ -615,7 +622,7 @@ static int kvaser_pciefd_bus_on(struct kvaser_pciefd_can *can)
iowrite32(0, can->reg_base + KVASER_PCIEFD_KCAN_IEN_REG);
iowrite32(-1, can->reg_base + KVASER_PCIEFD_KCAN_IRQ_REG);
- iowrite32(KVASER_PCIEFD_KCAN_IRQ_ABD | KVASER_PCIEFD_KCAN_IRQ_TFD,
+ iowrite32(KVASER_PCIEFD_KCAN_IRQ_ABD,
can->reg_base + KVASER_PCIEFD_KCAN_IEN_REG);
mode = ioread32(can->reg_base + KVASER_PCIEFD_KCAN_MODE_REG);
@@ -719,6 +726,7 @@ static int kvaser_pciefd_stop(struct net_device *netdev)
iowrite32(0, can->reg_base + KVASER_PCIEFD_KCAN_IEN_REG);
del_timer(&can->bec_poll_timer);
}
+ can->can.state = CAN_STATE_STOPPED;
close_candev(netdev);
return ret;
@@ -1007,8 +1015,7 @@ static int kvaser_pciefd_setup_can_ctrls(struct kvaser_pciefd *pcie)
SET_NETDEV_DEV(netdev, &pcie->pci->dev);
iowrite32(-1, can->reg_base + KVASER_PCIEFD_KCAN_IRQ_REG);
- iowrite32(KVASER_PCIEFD_KCAN_IRQ_ABD |
- KVASER_PCIEFD_KCAN_IRQ_TFD,
+ iowrite32(KVASER_PCIEFD_KCAN_IRQ_ABD,
can->reg_base + KVASER_PCIEFD_KCAN_IEN_REG);
pcie->can[i] = can;
@@ -1058,6 +1065,7 @@ static int kvaser_pciefd_setup_dma(struct kvaser_pciefd *pcie)
{
int i;
u32 srb_status;
+ u32 srb_packet_count;
dma_addr_t dma_addr[KVASER_PCIEFD_DMA_COUNT];
/* Disable the DMA */
@@ -1085,6 +1093,15 @@ static int kvaser_pciefd_setup_dma(struct kvaser_pciefd *pcie)
KVASER_PCIEFD_SRB_CMD_RDB1,
pcie->reg_base + KVASER_PCIEFD_SRB_CMD_REG);
+ /* Empty Rx FIFO */
+ srb_packet_count = ioread32(pcie->reg_base + KVASER_PCIEFD_SRB_RX_NR_PACKETS_REG) &
+ KVASER_PCIEFD_SRB_RX_NR_PACKETS_MASK;
+ while (srb_packet_count) {
+ /* Drop current packet in FIFO */
+ ioread32(pcie->reg_base + KVASER_PCIEFD_SRB_FIFO_LAST_REG);
+ srb_packet_count--;
+ }
+
srb_status = ioread32(pcie->reg_base + KVASER_PCIEFD_SRB_STAT_REG);
if (!(srb_status & KVASER_PCIEFD_SRB_STAT_DI)) {
dev_err(&pcie->pci->dev, "DMA not idle before enabling\n");
@@ -1425,9 +1442,6 @@ static int kvaser_pciefd_handle_status_packet(struct kvaser_pciefd *pcie,
cmd = KVASER_PCIEFD_KCAN_CMD_AT;
cmd |= ++can->cmd_seq << KVASER_PCIEFD_KCAN_CMD_SEQ_SHIFT;
iowrite32(cmd, can->reg_base + KVASER_PCIEFD_KCAN_CMD_REG);
-
- iowrite32(KVASER_PCIEFD_KCAN_IRQ_TFD,
- can->reg_base + KVASER_PCIEFD_KCAN_IEN_REG);
} else if (p->header[0] & KVASER_PCIEFD_SPACK_IDET &&
p->header[0] & KVASER_PCIEFD_SPACK_IRM &&
cmdseq == (p->header[1] & KVASER_PCIEFD_PACKET_SEQ_MSK) &&
@@ -1714,15 +1728,6 @@ static int kvaser_pciefd_transmit_irq(struct kvaser_pciefd_can *can)
if (irq & KVASER_PCIEFD_KCAN_IRQ_TOF)
netdev_err(can->can.dev, "Tx FIFO overflow\n");
- if (irq & KVASER_PCIEFD_KCAN_IRQ_TFD) {
- u8 count = ioread32(can->reg_base +
- KVASER_PCIEFD_KCAN_TX_NPACKETS_REG) & 0xff;
-
- if (count == 0)
- iowrite32(KVASER_PCIEFD_KCAN_CTRL_EFLUSH,
- can->reg_base + KVASER_PCIEFD_KCAN_CTRL_REG);
- }
-
if (irq & KVASER_PCIEFD_KCAN_IRQ_BPP)
netdev_err(can->can.dev,
"Fail to change bittiming, when not in reset mode\n");
@@ -1824,6 +1829,11 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev,
if (err)
goto err_teardown_can_ctrls;
+ err = request_irq(pcie->pci->irq, kvaser_pciefd_irq_handler,
+ IRQF_SHARED, KVASER_PCIEFD_DRV_NAME, pcie);
+ if (err)
+ goto err_teardown_can_ctrls;
+
iowrite32(KVASER_PCIEFD_SRB_IRQ_DPD0 | KVASER_PCIEFD_SRB_IRQ_DPD1,
pcie->reg_base + KVASER_PCIEFD_SRB_IRQ_REG);
@@ -1844,11 +1854,6 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev,
iowrite32(KVASER_PCIEFD_SRB_CMD_RDB1,
pcie->reg_base + KVASER_PCIEFD_SRB_CMD_REG);
- err = request_irq(pcie->pci->irq, kvaser_pciefd_irq_handler,
- IRQF_SHARED, KVASER_PCIEFD_DRV_NAME, pcie);
- if (err)
- goto err_teardown_can_ctrls;
-
err = kvaser_pciefd_reg_candev(pcie);
if (err)
goto err_free_irq;
@@ -1856,6 +1861,8 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev,
return 0;
err_free_irq:
+ /* Disable PCI interrupts */
+ iowrite32(0, pcie->reg_base + KVASER_PCIEFD_IEN_REG);
free_irq(pcie->pci->irq, pcie);
err_teardown_can_ctrls:
diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c
index cbe831875347..c0215a8770f4 100644
--- a/drivers/net/dsa/lan9303-core.c
+++ b/drivers/net/dsa/lan9303-core.c
@@ -1188,8 +1188,6 @@ static int lan9303_port_fdb_add(struct dsa_switch *ds, int port,
struct lan9303 *chip = ds->priv;
dev_dbg(chip->dev, "%s(%d, %pM, %d)\n", __func__, port, addr, vid);
- if (vid)
- return -EOPNOTSUPP;
return lan9303_alr_add_port(chip, addr, port, false);
}
@@ -1201,8 +1199,6 @@ static int lan9303_port_fdb_del(struct dsa_switch *ds, int port,
struct lan9303 *chip = ds->priv;
dev_dbg(chip->dev, "%s(%d, %pM, %d)\n", __func__, port, addr, vid);
- if (vid)
- return -EOPNOTSUPP;
lan9303_alr_del_port(chip, addr, port);
return 0;
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 9bc54e1348cb..7e773c4ba046 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -399,6 +399,20 @@ static void mt7530_pll_setup(struct mt7530_priv *priv)
core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN);
}
+/* If port 6 is available as a CPU port, always prefer that as the default,
+ * otherwise don't care.
+ */
+static struct dsa_port *
+mt753x_preferred_default_local_cpu_port(struct dsa_switch *ds)
+{
+ struct dsa_port *cpu_dp = dsa_to_port(ds, 6);
+
+ if (dsa_port_is_cpu(cpu_dp))
+ return cpu_dp;
+
+ return NULL;
+}
+
/* Setup port 6 interface mode and TRGMII TX circuit */
static int
mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface)
@@ -985,6 +999,18 @@ unlock_exit:
mutex_unlock(&priv->reg_mutex);
}
+static void
+mt753x_trap_frames(struct mt7530_priv *priv)
+{
+ /* Trap BPDUs to the CPU port(s) */
+ mt7530_rmw(priv, MT753X_BPC, MT753X_BPDU_PORT_FW_MASK,
+ MT753X_BPDU_CPU_ONLY);
+
+ /* Trap LLDP frames with :0E MAC DA to the CPU port(s) */
+ mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_PORT_FW_MASK,
+ MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY));
+}
+
static int
mt753x_cpu_port_enable(struct dsa_switch *ds, int port)
{
@@ -1007,9 +1033,16 @@ mt753x_cpu_port_enable(struct dsa_switch *ds, int port)
UNU_FFP(BIT(port)));
/* Set CPU port number */
- if (priv->id == ID_MT7621)
+ if (priv->id == ID_MT7530 || priv->id == ID_MT7621)
mt7530_rmw(priv, MT7530_MFC, CPU_MASK, CPU_EN | CPU_PORT(port));
+ /* Add the CPU port to the CPU port bitmap for MT7531 and the switch on
+ * the MT7988 SoC. Trapped frames will be forwarded to the CPU port that
+ * is affine to the inbound user port.
+ */
+ if (priv->id == ID_MT7531 || priv->id == ID_MT7988)
+ mt7530_set(priv, MT7531_CFC, MT7531_CPU_PMAP(BIT(port)));
+
/* CPU port gets connected to all user ports of
* the switch.
*/
@@ -2255,6 +2288,8 @@ mt7530_setup(struct dsa_switch *ds)
priv->p6_interface = PHY_INTERFACE_MODE_NA;
+ mt753x_trap_frames(priv);
+
/* Enable and reset MIB counters */
mt7530_mib_reset(ds);
@@ -2352,17 +2387,9 @@ static int
mt7531_setup_common(struct dsa_switch *ds)
{
struct mt7530_priv *priv = ds->priv;
- struct dsa_port *cpu_dp;
int ret, i;
- /* BPDU to CPU port */
- dsa_switch_for_each_cpu_port(cpu_dp, ds) {
- mt7530_rmw(priv, MT7531_CFC, MT7531_CPU_PMAP_MASK,
- BIT(cpu_dp->index));
- break;
- }
- mt7530_rmw(priv, MT753X_BPC, MT753X_BPDU_PORT_FW_MASK,
- MT753X_BPDU_CPU_ONLY);
+ mt753x_trap_frames(priv);
/* Enable and reset MIB counters */
mt7530_mib_reset(ds);
@@ -3085,6 +3112,7 @@ static int mt7988_setup(struct dsa_switch *ds)
const struct dsa_switch_ops mt7530_switch_ops = {
.get_tag_protocol = mtk_get_tag_protocol,
.setup = mt753x_setup,
+ .preferred_default_local_cpu_port = mt753x_preferred_default_local_cpu_port,
.get_strings = mt7530_get_strings,
.get_ethtool_stats = mt7530_get_ethtool_stats,
.get_sset_count = mt7530_get_sset_count,
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h
index 5084f48a8869..08045b035e6a 100644
--- a/drivers/net/dsa/mt7530.h
+++ b/drivers/net/dsa/mt7530.h
@@ -54,6 +54,7 @@ enum mt753x_id {
#define MT7531_MIRROR_PORT_GET(x) (((x) >> 16) & MIRROR_MASK)
#define MT7531_MIRROR_PORT_SET(x) (((x) & MIRROR_MASK) << 16)
#define MT7531_CPU_PMAP_MASK GENMASK(7, 0)
+#define MT7531_CPU_PMAP(x) FIELD_PREP(MT7531_CPU_PMAP_MASK, x)
#define MT753X_MIRROR_REG(id) ((((id) == ID_MT7531) || ((id) == ID_MT7988)) ? \
MT7531_CFC : MT7530_MFC)
@@ -66,6 +67,11 @@ enum mt753x_id {
#define MT753X_BPC 0x24
#define MT753X_BPDU_PORT_FW_MASK GENMASK(2, 0)
+/* Register for :03 and :0E MAC DA frame control */
+#define MT753X_RGAC2 0x2c
+#define MT753X_R0E_PORT_FW_MASK GENMASK(18, 16)
+#define MT753X_R0E_PORT_FW(x) FIELD_PREP(MT753X_R0E_PORT_FW_MASK, x)
+
enum mt753x_bpdu_port_fw {
MT753X_BPDU_FOLLOW_MFC,
MT753X_BPDU_CPU_EXCLUDE = 4,
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 64a2f2f83735..08a46ffd53af 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -7170,7 +7170,7 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
goto out;
}
if (chip->reset)
- usleep_range(1000, 2000);
+ usleep_range(10000, 20000);
/* Detect if the device is configured in single chip addressing mode,
* otherwise continue with address specific smi init/detection.
diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h
index aec9d4fd20e3..d19b6303b91f 100644
--- a/drivers/net/dsa/mv88e6xxx/port.h
+++ b/drivers/net/dsa/mv88e6xxx/port.h
@@ -276,7 +276,7 @@
/* Offset 0x10: Extended Port Control Command */
#define MV88E6393X_PORT_EPC_CMD 0x10
#define MV88E6393X_PORT_EPC_CMD_BUSY 0x8000
-#define MV88E6393X_PORT_EPC_CMD_WRITE 0x0300
+#define MV88E6393X_PORT_EPC_CMD_WRITE 0x3000
#define MV88E6393X_PORT_EPC_INDEX_PORT_ETYPE 0x02
/* Offset 0x11: Extended Port Control Data */
diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
index cfb3faeaa5bf..d172a3e9736c 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -1263,7 +1263,7 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port)
/* Consider the standard Ethernet overhead of 8 octets preamble+SFD,
* 4 octets FCS, 12 octets IFG.
*/
- needed_bit_time_ps = (maxlen + 24) * picos_per_byte;
+ needed_bit_time_ps = (u64)(maxlen + 24) * picos_per_byte;
dev_dbg(ocelot->dev,
"port %d: max frame size %d needs %llu ps at speed %d\n",
diff --git a/drivers/net/dsa/qca/Kconfig b/drivers/net/dsa/qca/Kconfig
index 4347b42c50fd..de9da469908b 100644
--- a/drivers/net/dsa/qca/Kconfig
+++ b/drivers/net/dsa/qca/Kconfig
@@ -20,6 +20,7 @@ config NET_DSA_QCA8K_LEDS_SUPPORT
bool "Qualcomm Atheros QCA8K Ethernet switch family LEDs support"
depends on NET_DSA_QCA8K
depends on LEDS_CLASS=y || LEDS_CLASS=NET_DSA_QCA8K
+ depends on LEDS_TRIGGERS
help
This enabled support for LEDs present on the Qualcomm Atheros
QCA8K Ethernet switch chips.
diff --git a/drivers/net/dsa/rzn1_a5psw.c b/drivers/net/dsa/rzn1_a5psw.c
index 919027cf2012..c37d2e537230 100644
--- a/drivers/net/dsa/rzn1_a5psw.c
+++ b/drivers/net/dsa/rzn1_a5psw.c
@@ -120,6 +120,22 @@ static void a5psw_port_mgmtfwd_set(struct a5psw *a5psw, int port, bool enable)
a5psw_port_pattern_set(a5psw, port, A5PSW_PATTERN_MGMTFWD, enable);
}
+static void a5psw_port_tx_enable(struct a5psw *a5psw, int port, bool enable)
+{
+ u32 mask = A5PSW_PORT_ENA_TX(port);
+ u32 reg = enable ? mask : 0;
+
+ /* Even though the port TX is disabled through TXENA bit in the
+ * PORT_ENA register, it can still send BPDUs. This depends on the tag
+ * configuration added when sending packets from the CPU port to the
+ * switch port. Indeed, when using forced forwarding without filtering,
+ * even disabled ports will be able to send packets that are tagged.
+ * This allows to implement STP support when ports are in a state where
+ * forwarding traffic should be stopped but BPDUs should still be sent.
+ */
+ a5psw_reg_rmw(a5psw, A5PSW_PORT_ENA, mask, reg);
+}
+
static void a5psw_port_enable_set(struct a5psw *a5psw, int port, bool enable)
{
u32 port_ena = 0;
@@ -292,6 +308,22 @@ static int a5psw_set_ageing_time(struct dsa_switch *ds, unsigned int msecs)
return 0;
}
+static void a5psw_port_learning_set(struct a5psw *a5psw, int port, bool learn)
+{
+ u32 mask = A5PSW_INPUT_LEARN_DIS(port);
+ u32 reg = !learn ? mask : 0;
+
+ a5psw_reg_rmw(a5psw, A5PSW_INPUT_LEARN, mask, reg);
+}
+
+static void a5psw_port_rx_block_set(struct a5psw *a5psw, int port, bool block)
+{
+ u32 mask = A5PSW_INPUT_LEARN_BLOCK(port);
+ u32 reg = block ? mask : 0;
+
+ a5psw_reg_rmw(a5psw, A5PSW_INPUT_LEARN, mask, reg);
+}
+
static void a5psw_flooding_set_resolution(struct a5psw *a5psw, int port,
bool set)
{
@@ -308,6 +340,14 @@ static void a5psw_flooding_set_resolution(struct a5psw *a5psw, int port,
a5psw_reg_writel(a5psw, offsets[i], a5psw->bridged_ports);
}
+static void a5psw_port_set_standalone(struct a5psw *a5psw, int port,
+ bool standalone)
+{
+ a5psw_port_learning_set(a5psw, port, !standalone);
+ a5psw_flooding_set_resolution(a5psw, port, !standalone);
+ a5psw_port_mgmtfwd_set(a5psw, port, standalone);
+}
+
static int a5psw_port_bridge_join(struct dsa_switch *ds, int port,
struct dsa_bridge bridge,
bool *tx_fwd_offload,
@@ -323,8 +363,7 @@ static int a5psw_port_bridge_join(struct dsa_switch *ds, int port,
}
a5psw->br_dev = bridge.dev;
- a5psw_flooding_set_resolution(a5psw, port, true);
- a5psw_port_mgmtfwd_set(a5psw, port, false);
+ a5psw_port_set_standalone(a5psw, port, false);
return 0;
}
@@ -334,8 +373,7 @@ static void a5psw_port_bridge_leave(struct dsa_switch *ds, int port,
{
struct a5psw *a5psw = ds->priv;
- a5psw_flooding_set_resolution(a5psw, port, false);
- a5psw_port_mgmtfwd_set(a5psw, port, true);
+ a5psw_port_set_standalone(a5psw, port, true);
/* No more ports bridged */
if (a5psw->bridged_ports == BIT(A5PSW_CPU_PORT))
@@ -344,28 +382,35 @@ static void a5psw_port_bridge_leave(struct dsa_switch *ds, int port,
static void a5psw_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
{
- u32 mask = A5PSW_INPUT_LEARN_DIS(port) | A5PSW_INPUT_LEARN_BLOCK(port);
+ bool learning_enabled, rx_enabled, tx_enabled;
struct a5psw *a5psw = ds->priv;
- u32 reg = 0;
switch (state) {
case BR_STATE_DISABLED:
case BR_STATE_BLOCKING:
- reg |= A5PSW_INPUT_LEARN_DIS(port);
- reg |= A5PSW_INPUT_LEARN_BLOCK(port);
- break;
case BR_STATE_LISTENING:
- reg |= A5PSW_INPUT_LEARN_DIS(port);
+ rx_enabled = false;
+ tx_enabled = false;
+ learning_enabled = false;
break;
case BR_STATE_LEARNING:
- reg |= A5PSW_INPUT_LEARN_BLOCK(port);
+ rx_enabled = false;
+ tx_enabled = false;
+ learning_enabled = true;
break;
case BR_STATE_FORWARDING:
- default:
+ rx_enabled = true;
+ tx_enabled = true;
+ learning_enabled = true;
break;
+ default:
+ dev_err(ds->dev, "invalid STP state: %d\n", state);
+ return;
}
- a5psw_reg_rmw(a5psw, A5PSW_INPUT_LEARN, mask, reg);
+ a5psw_port_learning_set(a5psw, port, learning_enabled);
+ a5psw_port_rx_block_set(a5psw, port, !rx_enabled);
+ a5psw_port_tx_enable(a5psw, port, tx_enabled);
}
static void a5psw_port_fast_age(struct dsa_switch *ds, int port)
@@ -673,7 +718,7 @@ static int a5psw_setup(struct dsa_switch *ds)
}
/* Configure management port */
- reg = A5PSW_CPU_PORT | A5PSW_MGMT_CFG_DISCARD;
+ reg = A5PSW_CPU_PORT | A5PSW_MGMT_CFG_ENABLE;
a5psw_reg_writel(a5psw, A5PSW_MGMT_CFG, reg);
/* Set pattern 0 to forward all frame to mgmt port */
@@ -722,13 +767,15 @@ static int a5psw_setup(struct dsa_switch *ds)
if (dsa_port_is_unused(dp))
continue;
- /* Enable egress flooding for CPU port */
- if (dsa_port_is_cpu(dp))
+ /* Enable egress flooding and learning for CPU port */
+ if (dsa_port_is_cpu(dp)) {
a5psw_flooding_set_resolution(a5psw, port, true);
+ a5psw_port_learning_set(a5psw, port, true);
+ }
- /* Enable management forward only for user ports */
+ /* Enable standalone mode for user ports */
if (dsa_port_is_user(dp))
- a5psw_port_mgmtfwd_set(a5psw, port, true);
+ a5psw_port_set_standalone(a5psw, port, true);
}
return 0;
diff --git a/drivers/net/dsa/rzn1_a5psw.h b/drivers/net/dsa/rzn1_a5psw.h
index c67abd49c013..b869192eef3f 100644
--- a/drivers/net/dsa/rzn1_a5psw.h
+++ b/drivers/net/dsa/rzn1_a5psw.h
@@ -19,6 +19,7 @@
#define A5PSW_PORT_OFFSET(port) (0x400 * (port))
#define A5PSW_PORT_ENA 0x8
+#define A5PSW_PORT_ENA_TX(port) BIT(port)
#define A5PSW_PORT_ENA_RX_SHIFT 16
#define A5PSW_PORT_ENA_TX_RX(port) (BIT((port) + A5PSW_PORT_ENA_RX_SHIFT) | \
BIT(port))
@@ -36,7 +37,7 @@
#define A5PSW_INPUT_LEARN_BLOCK(p) BIT(p)
#define A5PSW_MGMT_CFG 0x20
-#define A5PSW_MGMT_CFG_DISCARD BIT(7)
+#define A5PSW_MGMT_CFG_ENABLE BIT(6)
#define A5PSW_MODE_CFG 0x24
#define A5PSW_MODE_STATS_RESET BIT(31)
diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c
index d2f4358cc550..ba3e7aa1a28f 100644
--- a/drivers/net/ethernet/3com/3c515.c
+++ b/drivers/net/ethernet/3com/3c515.c
@@ -66,8 +66,10 @@ static int max_interrupt_work = 20;
#include <linux/timer.h>
#include <linux/ethtool.h>
#include <linux/bitops.h>
-
#include <linux/uaccess.h>
+
+#include <net/Space.h>
+
#include <asm/io.h>
#include <asm/dma.h>
diff --git a/drivers/net/ethernet/3com/3c589_cs.c b/drivers/net/ethernet/3com/3c589_cs.c
index 82f94b1635bf..5267e9dcd87e 100644
--- a/drivers/net/ethernet/3com/3c589_cs.c
+++ b/drivers/net/ethernet/3com/3c589_cs.c
@@ -195,6 +195,7 @@ static int tc589_probe(struct pcmcia_device *link)
{
struct el3_private *lp;
struct net_device *dev;
+ int ret;
dev_dbg(&link->dev, "3c589_attach()\n");
@@ -218,7 +219,15 @@ static int tc589_probe(struct pcmcia_device *link)
dev->ethtool_ops = &netdev_ethtool_ops;
- return tc589_config(link);
+ ret = tc589_config(link);
+ if (ret)
+ goto err_free_netdev;
+
+ return 0;
+
+err_free_netdev:
+ free_netdev(dev);
+ return ret;
}
static void tc589_detach(struct pcmcia_device *link)
diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c
index 0a9118b8be0c..bc9c81dc00fd 100644
--- a/drivers/net/ethernet/8390/ne.c
+++ b/drivers/net/ethernet/8390/ne.c
@@ -52,6 +52,7 @@ static const char version2[] =
#include <linux/etherdevice.h>
#include <linux/jiffies.h>
#include <linux/platform_device.h>
+#include <net/Space.h>
#include <asm/io.h>
diff --git a/drivers/net/ethernet/8390/smc-ultra.c b/drivers/net/ethernet/8390/smc-ultra.c
index 6e62c37c9400..7465650c8078 100644
--- a/drivers/net/ethernet/8390/smc-ultra.c
+++ b/drivers/net/ethernet/8390/smc-ultra.c
@@ -66,6 +66,7 @@ static const char version[] =
#include <linux/isapnp.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
+#include <net/Space.h>
#include <asm/io.h>
#include <asm/irq.h>
diff --git a/drivers/net/ethernet/8390/wd.c b/drivers/net/ethernet/8390/wd.c
index 5b00c452bede..119021d41451 100644
--- a/drivers/net/ethernet/8390/wd.c
+++ b/drivers/net/ethernet/8390/wd.c
@@ -37,6 +37,7 @@ static const char version[] =
#include <linux/delay.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
+#include <net/Space.h>
#include <asm/io.h>
diff --git a/drivers/net/ethernet/amd/lance.c b/drivers/net/ethernet/amd/lance.c
index 8971665a4b2a..6cf38180cc01 100644
--- a/drivers/net/ethernet/amd/lance.c
+++ b/drivers/net/ethernet/amd/lance.c
@@ -59,6 +59,7 @@ static const char version[] = "lance.c:v1.16 2006/11/09 dplatt@3do.com, becker@c
#include <linux/skbuff.h>
#include <linux/mm.h>
#include <linux/bitops.h>
+#include <net/Space.h>
#include <asm/io.h>
#include <asm/dma.h>
diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c
index f7c597ea5daf..debe5216fe29 100644
--- a/drivers/net/ethernet/amd/pds_core/dev.c
+++ b/drivers/net/ethernet/amd/pds_core/dev.c
@@ -68,9 +68,15 @@ bool pdsc_is_fw_running(struct pdsc *pdsc)
bool pdsc_is_fw_good(struct pdsc *pdsc)
{
- u8 gen = pdsc->fw_status & PDS_CORE_FW_STS_F_GENERATION;
+ bool fw_running = pdsc_is_fw_running(pdsc);
+ u8 gen;
- return pdsc_is_fw_running(pdsc) && gen == pdsc->fw_generation;
+ /* Make sure to update the cached fw_status by calling
+ * pdsc_is_fw_running() before getting the generation
+ */
+ gen = pdsc->fw_status & PDS_CORE_FW_STS_F_GENERATION;
+
+ return fw_running && gen == pdsc->fw_generation;
}
static u8 pdsc_devcmd_status(struct pdsc *pdsc)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
index 33a9574e9e04..32d2c6fac652 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
@@ -1329,7 +1329,7 @@ static enum xgbe_mode xgbe_phy_status_aneg(struct xgbe_prv_data *pdata)
return pdata->phy_if.phy_impl.an_outcome(pdata);
}
-static void xgbe_phy_status_result(struct xgbe_prv_data *pdata)
+static bool xgbe_phy_status_result(struct xgbe_prv_data *pdata)
{
struct ethtool_link_ksettings *lks = &pdata->phy.lks;
enum xgbe_mode mode;
@@ -1367,8 +1367,13 @@ static void xgbe_phy_status_result(struct xgbe_prv_data *pdata)
pdata->phy.duplex = DUPLEX_FULL;
- if (xgbe_set_mode(pdata, mode) && pdata->an_again)
+ if (!xgbe_set_mode(pdata, mode))
+ return false;
+
+ if (pdata->an_again)
xgbe_phy_reconfig_aneg(pdata);
+
+ return true;
}
static void xgbe_phy_status(struct xgbe_prv_data *pdata)
@@ -1398,7 +1403,8 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata)
return;
}
- xgbe_phy_status_result(pdata);
+ if (xgbe_phy_status_result(pdata))
+ return;
if (test_bit(XGBE_LINK_INIT, &pdata->dev_state))
clear_bit(XGBE_LINK_INIT, &pdata->dev_state);
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 38d0cdaf22a5..bf1611cce974 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -2531,9 +2531,9 @@ static int bcm_sysport_probe(struct platform_device *pdev)
priv->irq0 = platform_get_irq(pdev, 0);
if (!priv->is_lite) {
priv->irq1 = platform_get_irq(pdev, 1);
- priv->wol_irq = platform_get_irq(pdev, 2);
+ priv->wol_irq = platform_get_irq_optional(pdev, 2);
} else {
- priv->wol_irq = platform_get_irq(pdev, 1);
+ priv->wol_irq = platform_get_irq_optional(pdev, 1);
}
if (priv->irq0 <= 0 || (priv->irq1 <= 0 && !priv->is_lite)) {
ret = -EINVAL;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 637d162bbcfa..1e7a6f1d4223 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -14294,11 +14294,16 @@ static void bnx2x_io_resume(struct pci_dev *pdev)
bp->fw_seq = SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_mb_header) &
DRV_MSG_SEQ_NUMBER_MASK;
- if (netif_running(dev))
- bnx2x_nic_load(bp, LOAD_NORMAL);
+ if (netif_running(dev)) {
+ if (bnx2x_nic_load(bp, LOAD_NORMAL)) {
+ netdev_err(bp->dev, "Error during driver initialization, try unloading/reloading the driver\n");
+ goto done;
+ }
+ }
netif_device_attach(dev);
+done:
rtnl_unlock();
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index dcd9367f05af..b499bc9c4e06 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -692,7 +692,7 @@ next_tx_int:
__netif_txq_completed_wake(txq, nr_pkts, tx_bytes,
bnxt_tx_avail(bp, txr), bp->tx_wake_thresh,
- READ_ONCE(txr->dev_state) != BNXT_DEV_STATE_CLOSING);
+ READ_ONCE(txr->dev_state) == BNXT_DEV_STATE_CLOSING);
}
static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
@@ -2365,6 +2365,9 @@ static int bnxt_async_event_process(struct bnxt *bp,
struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
u64 ns;
+ if (!ptp)
+ goto async_event_process_exit;
+
spin_lock_bh(&ptp->ptp_lock);
bnxt_ptp_update_current_time(bp);
ns = (((u64)BNXT_EVENT_PHC_RTC_UPDATE(data1) <<
@@ -4763,6 +4766,9 @@ int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap, int bmap_size,
if (event_id == ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY &&
!(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY))
continue;
+ if (event_id == ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE &&
+ !bp->ptp_cfg)
+ continue;
__set_bit(bnxt_async_events_arr[i], async_events_bmap);
}
if (bmap && bmap_size) {
@@ -5350,6 +5356,7 @@ static void bnxt_hwrm_update_rss_hash_cfg(struct bnxt *bp)
if (hwrm_req_init(bp, req, HWRM_VNIC_RSS_QCFG))
return;
+ req->vnic_id = cpu_to_le16(vnic->fw_vnic_id);
/* all contexts configured to same hash_type, zero always exists */
req->rss_ctx_idx = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[0]);
resp = hwrm_req_hold(bp, req);
@@ -8812,6 +8819,9 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
goto err_out;
}
+ if (BNXT_VF(bp))
+ bnxt_hwrm_func_qcfg(bp);
+
rc = bnxt_setup_vnic(bp, 0);
if (rc)
goto err_out;
@@ -11598,6 +11608,7 @@ static void bnxt_tx_timeout(struct net_device *dev, unsigned int txqueue)
static void bnxt_fw_health_check(struct bnxt *bp)
{
struct bnxt_fw_health *fw_health = bp->fw_health;
+ struct pci_dev *pdev = bp->pdev;
u32 val;
if (!fw_health->enabled || test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
@@ -11611,7 +11622,7 @@ static void bnxt_fw_health_check(struct bnxt *bp)
}
val = bnxt_fw_health_readl(bp, BNXT_FW_HEARTBEAT_REG);
- if (val == fw_health->last_fw_heartbeat) {
+ if (val == fw_health->last_fw_heartbeat && pci_device_is_present(pdev)) {
fw_health->arrests++;
goto fw_reset;
}
@@ -11619,7 +11630,7 @@ static void bnxt_fw_health_check(struct bnxt *bp)
fw_health->last_fw_heartbeat = val;
val = bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG);
- if (val != fw_health->last_fw_reset_cnt) {
+ if (val != fw_health->last_fw_reset_cnt && pci_device_is_present(pdev)) {
fw_health->discoveries++;
goto fw_reset;
}
@@ -13025,26 +13036,37 @@ static void bnxt_cfg_ntp_filters(struct bnxt *bp)
#endif /* CONFIG_RFS_ACCEL */
-static int bnxt_udp_tunnel_sync(struct net_device *netdev, unsigned int table)
+static int bnxt_udp_tunnel_set_port(struct net_device *netdev, unsigned int table,
+ unsigned int entry, struct udp_tunnel_info *ti)
{
struct bnxt *bp = netdev_priv(netdev);
- struct udp_tunnel_info ti;
unsigned int cmd;
- udp_tunnel_nic_get_port(netdev, table, 0, &ti);
- if (ti.type == UDP_TUNNEL_TYPE_VXLAN)
+ if (ti->type == UDP_TUNNEL_TYPE_VXLAN)
cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN;
else
cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE;
- if (ti.port)
- return bnxt_hwrm_tunnel_dst_port_alloc(bp, ti.port, cmd);
+ return bnxt_hwrm_tunnel_dst_port_alloc(bp, ti->port, cmd);
+}
+
+static int bnxt_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table,
+ unsigned int entry, struct udp_tunnel_info *ti)
+{
+ struct bnxt *bp = netdev_priv(netdev);
+ unsigned int cmd;
+
+ if (ti->type == UDP_TUNNEL_TYPE_VXLAN)
+ cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN;
+ else
+ cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE;
return bnxt_hwrm_tunnel_dst_port_free(bp, cmd);
}
static const struct udp_tunnel_nic_info bnxt_udp_tunnels = {
- .sync_table = bnxt_udp_tunnel_sync,
+ .set_port = bnxt_udp_tunnel_set_port,
+ .unset_port = bnxt_udp_tunnel_unset_port,
.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
.tables = {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 2dd8ee4a6f75..8fd5071d8b09 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -3831,7 +3831,7 @@ static int bnxt_reset(struct net_device *dev, u32 *flags)
}
}
- if (req & BNXT_FW_RESET_AP) {
+ if (!BNXT_CHIP_P4_PLUS(bp) && (req & BNXT_FW_RESET_AP)) {
/* This feature is not supported in older firmware versions */
if (bp->hwrm_spec_code >= 0x10803) {
if (!bnxt_firmware_reset_ap(dev)) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
index e46689128e32..f3886710e778 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
@@ -952,6 +952,7 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg)
bnxt_ptp_timecounter_init(bp, true);
bnxt_ptp_adjfine_rtc(bp, 0);
}
+ bnxt_hwrm_func_drv_rgtr(bp, NULL, 0, true);
ptp->ptp_info = bnxt_ptp_caps;
if ((bp->fw_cap & BNXT_FW_CAP_PTP_PPS)) {
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index f28ffc31df22..2b5761ad2f92 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -1272,7 +1272,8 @@ static void bcmgenet_get_ethtool_stats(struct net_device *dev,
}
}
-static void bcmgenet_eee_enable_set(struct net_device *dev, bool enable)
+void bcmgenet_eee_enable_set(struct net_device *dev, bool enable,
+ bool tx_lpi_enabled)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
u32 off = priv->hw_params->tbuf_offset + TBUF_ENERGY_CTRL;
@@ -1292,7 +1293,7 @@ static void bcmgenet_eee_enable_set(struct net_device *dev, bool enable)
/* Enable EEE and switch to a 27Mhz clock automatically */
reg = bcmgenet_readl(priv->base + off);
- if (enable)
+ if (tx_lpi_enabled)
reg |= TBUF_EEE_EN | TBUF_PM_EN;
else
reg &= ~(TBUF_EEE_EN | TBUF_PM_EN);
@@ -1313,6 +1314,7 @@ static void bcmgenet_eee_enable_set(struct net_device *dev, bool enable)
priv->eee.eee_enabled = enable;
priv->eee.eee_active = enable;
+ priv->eee.tx_lpi_enabled = tx_lpi_enabled;
}
static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_eee *e)
@@ -1328,6 +1330,7 @@ static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_eee *e)
e->eee_enabled = p->eee_enabled;
e->eee_active = p->eee_active;
+ e->tx_lpi_enabled = p->tx_lpi_enabled;
e->tx_lpi_timer = bcmgenet_umac_readl(priv, UMAC_EEE_LPI_TIMER);
return phy_ethtool_get_eee(dev->phydev, e);
@@ -1337,7 +1340,6 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
struct ethtool_eee *p = &priv->eee;
- int ret = 0;
if (GENET_IS_V1(priv))
return -EOPNOTSUPP;
@@ -1348,16 +1350,11 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
p->eee_enabled = e->eee_enabled;
if (!p->eee_enabled) {
- bcmgenet_eee_enable_set(dev, false);
+ bcmgenet_eee_enable_set(dev, false, false);
} else {
- ret = phy_init_eee(dev->phydev, false);
- if (ret) {
- netif_err(priv, hw, dev, "EEE initialization failed\n");
- return ret;
- }
-
+ p->eee_active = phy_init_eee(dev->phydev, false) >= 0;
bcmgenet_umac_writel(priv, e->tx_lpi_timer, UMAC_EEE_LPI_TIMER);
- bcmgenet_eee_enable_set(dev, true);
+ bcmgenet_eee_enable_set(dev, p->eee_active, e->tx_lpi_enabled);
}
return phy_ethtool_set_eee(dev->phydev, e);
@@ -3450,7 +3447,7 @@ err_clk_disable:
return ret;
}
-static void bcmgenet_netif_stop(struct net_device *dev)
+static void bcmgenet_netif_stop(struct net_device *dev, bool stop_phy)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
@@ -3465,6 +3462,8 @@ static void bcmgenet_netif_stop(struct net_device *dev)
/* Disable MAC transmit. TX DMA disabled must be done before this */
umac_enable_set(priv, CMD_TX_EN, false);
+ if (stop_phy)
+ phy_stop(dev->phydev);
bcmgenet_disable_rx_napi(priv);
bcmgenet_intr_disable(priv);
@@ -3485,7 +3484,7 @@ static int bcmgenet_close(struct net_device *dev)
netif_dbg(priv, ifdown, dev, "bcmgenet_close\n");
- bcmgenet_netif_stop(dev);
+ bcmgenet_netif_stop(dev, false);
/* Really kill the PHY state machine and disconnect from it */
phy_disconnect(dev->phydev);
@@ -4277,9 +4276,6 @@ static int bcmgenet_resume(struct device *d)
if (!device_may_wakeup(d))
phy_resume(dev->phydev);
- if (priv->eee.eee_enabled)
- bcmgenet_eee_enable_set(dev, true);
-
bcmgenet_netif_start(dev);
netif_device_attach(dev);
@@ -4303,7 +4299,7 @@ static int bcmgenet_suspend(struct device *d)
netif_device_detach(dev);
- bcmgenet_netif_stop(dev);
+ bcmgenet_netif_stop(dev, true);
if (!device_may_wakeup(d))
phy_suspend(dev->phydev);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 946f6e283c4e..1985c0ec4da2 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -703,4 +703,7 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv,
void bcmgenet_wol_power_up_cfg(struct bcmgenet_priv *priv,
enum bcmgenet_power_mode mode);
+void bcmgenet_eee_enable_set(struct net_device *dev, bool enable,
+ bool tx_lpi_enabled);
+
#endif /* __BCMGENET_H__ */
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index be042905ada2..c15ed0acdb77 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -87,6 +87,11 @@ static void bcmgenet_mac_config(struct net_device *dev)
reg |= CMD_TX_EN | CMD_RX_EN;
}
bcmgenet_umac_writel(priv, reg, UMAC_CMD);
+
+ priv->eee.eee_active = phy_init_eee(phydev, 0) >= 0;
+ bcmgenet_eee_enable_set(dev,
+ priv->eee.eee_enabled && priv->eee.eee_active,
+ priv->eee.tx_lpi_enabled);
}
/* setup netdev link state when PHY link status change and
diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c
index 06a0c00af99c..276c32c3926a 100644
--- a/drivers/net/ethernet/cirrus/cs89x0.c
+++ b/drivers/net/ethernet/cirrus/cs89x0.c
@@ -72,6 +72,8 @@
#include <linux/gfp.h>
#include <linux/io.h>
+#include <net/Space.h>
+
#include <asm/irq.h>
#include <linux/atomic.h>
#if ALLOW_DMA
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 7e408bcc88de..0defd519ba62 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -1135,8 +1135,8 @@ static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
VLAN_ETH_HLEN : ETH_HLEN;
if (skb->len <= 60 &&
- (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
- is_ipv4_pkt(skb)) {
+ (lancer_chip(adapter) || BE3_chip(adapter) ||
+ skb_vlan_tag_present(skb)) && is_ipv4_pkt(skb)) {
ip = (struct iphdr *)ip_hdr(skb);
pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
index b1871e6c4006..00e50bd30189 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c
@@ -54,6 +54,9 @@ static int phy_mode(enum dpmac_eth_if eth_if, phy_interface_t *if_mode)
case DPMAC_ETH_IF_XFI:
*if_mode = PHY_INTERFACE_MODE_10GBASER;
break;
+ case DPMAC_ETH_IF_CAUI:
+ *if_mode = PHY_INTERFACE_MODE_25GBASER;
+ break;
default:
return -EINVAL;
}
@@ -79,6 +82,8 @@ static enum dpmac_eth_if dpmac_eth_if_mode(phy_interface_t if_mode)
return DPMAC_ETH_IF_XFI;
case PHY_INTERFACE_MODE_1000BASEX:
return DPMAC_ETH_IF_1000BASEX;
+ case PHY_INTERFACE_MODE_25GBASER:
+ return DPMAC_ETH_IF_CAUI;
default:
return DPMAC_ETH_IF_MII;
}
@@ -418,7 +423,7 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac)
mac->phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_ASYM_PAUSE |
MAC_10FD | MAC_100FD | MAC_1000FD | MAC_2500FD | MAC_5000FD |
- MAC_10000FD;
+ MAC_10000FD | MAC_25000FD;
dpaa2_mac_set_supported_interfaces(mac);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 3c4fa26f0f9b..9e1b2536e9a9 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -1229,7 +1229,13 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
if (!skb)
break;
- rx_byte_cnt += skb->len;
+ /* When set, the outer VLAN header is extracted and reported
+ * in the receive buffer descriptor. So rx_byte_cnt should
+ * add the length of the extracted VLAN header.
+ */
+ if (bd_status & ENETC_RXBD_FLAG_VLAN)
+ rx_byte_cnt += VLAN_HLEN;
+ rx_byte_cnt += skb->len + ETH_HLEN;
rx_frm_cnt++;
napi_gro_receive(napi, skb);
@@ -1565,6 +1571,14 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
enetc_build_xdp_buff(rx_ring, bd_status, &rxbd, &i,
&cleaned_cnt, &xdp_buff);
+ /* When set, the outer VLAN header is extracted and reported
+ * in the receive buffer descriptor. So rx_byte_cnt should
+ * add the length of the extracted VLAN header.
+ */
+ if (bd_status & ENETC_RXBD_FLAG_VLAN)
+ rx_byte_cnt += VLAN_HLEN;
+ rx_byte_cnt += xdp_get_buff_len(&xdp_buff);
+
xdp_act = bpf_prog_run_xdp(prog, &xdp_buff);
switch (xdp_act) {
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
index 83c27bbbc6ed..126007ab70f6 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
@@ -181,8 +181,8 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data)
int bw_sum = 0;
u8 bw;
- prio_top = netdev_get_prio_tc_map(ndev, tc_nums - 1);
- prio_next = netdev_get_prio_tc_map(ndev, tc_nums - 2);
+ prio_top = tc_nums - 1;
+ prio_next = tc_nums - 2;
/* Support highest prio and second prio tc in cbs mode */
if (tc != prio_top && tc != prio_next)
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 42ec6ca3bf03..38e5b5abe067 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -3798,7 +3798,6 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
entries_free = fec_enet_get_free_txdesc_num(txq);
if (entries_free < MAX_SKB_FRAGS + 1) {
netdev_err(fep->netdev, "NOT enough BD for SG!\n");
- xdp_return_frame(frame);
return NETDEV_TX_BUSY;
}
@@ -3835,6 +3834,11 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
index = fec_enet_get_bd_index(last_bdp, &txq->bd);
txq->tx_skbuff[index] = NULL;
+ /* Make sure the updates to rest of the descriptor are performed before
+ * transferring ownership.
+ */
+ dma_wmb();
+
/* Send it on its way. Tell FEC it's ready, interrupt when done,
* it's the last BD of the frame, and to put the CRC on the end.
*/
@@ -3844,8 +3848,14 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
/* If this was the last BD in the ring, start at the beginning again. */
bdp = fec_enet_get_nextdesc(last_bdp, &txq->bd);
+ /* Make sure the update to bdp are performed before txq->bd.cur. */
+ dma_wmb();
+
txq->bd.cur = bdp;
+ /* Trigger transmission start */
+ writel(0, txq->bd.reg_desc_active);
+
return 0;
}
@@ -3874,12 +3884,6 @@ static int fec_enet_xdp_xmit(struct net_device *dev,
sent_frames++;
}
- /* Make sure the update to bdp and tx_skbuff are performed. */
- wmb();
-
- /* Trigger transmission start */
- writel(0, txq->bd.reg_desc_active);
-
__netif_tx_unlock(nq);
return sent_frames;
@@ -4478,9 +4482,11 @@ fec_drv_remove(struct platform_device *pdev)
struct device_node *np = pdev->dev.of_node;
int ret;
- ret = pm_runtime_resume_and_get(&pdev->dev);
+ ret = pm_runtime_get_sync(&pdev->dev);
if (ret < 0)
- return ret;
+ dev_err(&pdev->dev,
+ "Failed to resume device in remove callback (%pe)\n",
+ ERR_PTR(ret));
cancel_work_sync(&fep->tx_timeout_work);
fec_ptp_stop(pdev);
@@ -4493,8 +4499,13 @@ fec_drv_remove(struct platform_device *pdev)
of_phy_deregister_fixed_link(np);
of_node_put(fep->phy_node);
- clk_disable_unprepare(fep->clk_ahb);
- clk_disable_unprepare(fep->clk_ipg);
+ /* After pm_runtime_get_sync() failed, the clks are still off, so skip
+ * disabling them again.
+ */
+ if (ret >= 0) {
+ clk_disable_unprepare(fep->clk_ahb);
+ clk_disable_unprepare(fep->clk_ipg);
+ }
pm_runtime_put_noidle(&pdev->dev);
pm_runtime_disable(&pdev->dev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c
index cbbab5b2b402..b85c412683dd 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c
@@ -331,9 +331,25 @@ static int hclge_comm_cmd_csq_done(struct hclge_comm_hw *hw)
return head == hw->cmq.csq.next_to_use;
}
-static void hclge_comm_wait_for_resp(struct hclge_comm_hw *hw,
+static u32 hclge_get_cmdq_tx_timeout(u16 opcode, u32 tx_timeout)
+{
+ static const struct hclge_cmdq_tx_timeout_map cmdq_tx_timeout_map[] = {
+ {HCLGE_OPC_CFG_RST_TRIGGER, HCLGE_COMM_CMDQ_TX_TIMEOUT_500MS},
+ };
+ u32 i;
+
+ for (i = 0; i < ARRAY_SIZE(cmdq_tx_timeout_map); i++)
+ if (cmdq_tx_timeout_map[i].opcode == opcode)
+ return cmdq_tx_timeout_map[i].tx_timeout;
+
+ return tx_timeout;
+}
+
+static void hclge_comm_wait_for_resp(struct hclge_comm_hw *hw, u16 opcode,
bool *is_completed)
{
+ u32 cmdq_tx_timeout = hclge_get_cmdq_tx_timeout(opcode,
+ hw->cmq.tx_timeout);
u32 timeout = 0;
do {
@@ -343,7 +359,7 @@ static void hclge_comm_wait_for_resp(struct hclge_comm_hw *hw,
}
udelay(1);
timeout++;
- } while (timeout < hw->cmq.tx_timeout);
+ } while (timeout < cmdq_tx_timeout);
}
static int hclge_comm_cmd_convert_err_code(u16 desc_ret)
@@ -407,7 +423,8 @@ static int hclge_comm_cmd_check_result(struct hclge_comm_hw *hw,
* if multi descriptors to be sent, use the first one to check
*/
if (HCLGE_COMM_SEND_SYNC(le16_to_cpu(desc->flag)))
- hclge_comm_wait_for_resp(hw, &is_completed);
+ hclge_comm_wait_for_resp(hw, le16_to_cpu(desc->opcode),
+ &is_completed);
if (!is_completed)
ret = -EBADE;
@@ -529,7 +546,7 @@ int hclge_comm_cmd_queue_init(struct pci_dev *pdev, struct hclge_comm_hw *hw)
cmdq->crq.desc_num = HCLGE_COMM_NIC_CMQ_DESC_NUM;
/* Setup Tx write back timeout */
- cmdq->tx_timeout = HCLGE_COMM_CMDQ_TX_TIMEOUT;
+ cmdq->tx_timeout = HCLGE_COMM_CMDQ_TX_TIMEOUT_DEFAULT;
/* Setup queue rings */
ret = hclge_comm_alloc_cmd_queue(hw, HCLGE_COMM_TYPE_CSQ);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
index de72ecbfd5ad..18f1b4bf362d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
@@ -54,7 +54,8 @@
#define HCLGE_COMM_NIC_SW_RST_RDY BIT(HCLGE_COMM_NIC_SW_RST_RDY_B)
#define HCLGE_COMM_NIC_CMQ_DESC_NUM_S 3
#define HCLGE_COMM_NIC_CMQ_DESC_NUM 1024
-#define HCLGE_COMM_CMDQ_TX_TIMEOUT 30000
+#define HCLGE_COMM_CMDQ_TX_TIMEOUT_DEFAULT 30000
+#define HCLGE_COMM_CMDQ_TX_TIMEOUT_500MS 500000
enum hclge_opcode_type {
/* Generic commands */
@@ -360,6 +361,11 @@ struct hclge_comm_caps_bit_map {
u16 local_bit;
};
+struct hclge_cmdq_tx_timeout_map {
+ u32 opcode;
+ u32 tx_timeout;
+};
+
struct hclge_comm_firmware_compat_cmd {
__le32 compat;
u8 rsv[20];
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 4c3e90a1c4d0..d385ffc21876 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -130,7 +130,7 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
.name = "tx_bd_queue",
.cmd = HNAE3_DBG_CMD_TX_BD,
.dentry = HNS3_DBG_DENTRY_TX_BD,
- .buf_len = HNS3_DBG_READ_LEN_4MB,
+ .buf_len = HNS3_DBG_READ_LEN_5MB,
.init = hns3_dbg_bd_file_init,
},
{
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
index 97578eabb7d8..4a5ef8a90a10 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
@@ -10,6 +10,7 @@
#define HNS3_DBG_READ_LEN_128KB 0x20000
#define HNS3_DBG_READ_LEN_1MB 0x100000
#define HNS3_DBG_READ_LEN_4MB 0x400000
+#define HNS3_DBG_READ_LEN_5MB 0x500000
#define HNS3_DBG_WRITE_LEN 1024
#define HNS3_DBG_DATA_STR_LEN 32
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 4fb5406c1951..2689b108f7df 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -8053,12 +8053,15 @@ static void hclge_ae_stop(struct hnae3_handle *handle)
/* If it is not PF reset or FLR, the firmware will disable the MAC,
* so it only need to stop phy here.
*/
- if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) &&
- hdev->reset_type != HNAE3_FUNC_RESET &&
- hdev->reset_type != HNAE3_FLR_RESET) {
- hclge_mac_stop_phy(hdev);
- hclge_update_link_status(hdev);
- return;
+ if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) {
+ hclge_pfc_pause_en_cfg(hdev, HCLGE_PFC_TX_RX_DISABLE,
+ HCLGE_PFC_DISABLE);
+ if (hdev->reset_type != HNAE3_FUNC_RESET &&
+ hdev->reset_type != HNAE3_FLR_RESET) {
+ hclge_mac_stop_phy(hdev);
+ hclge_update_link_status(hdev);
+ return;
+ }
}
hclge_reset_tqp(handle);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 4a33f65190e2..922c0da3660c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -171,8 +171,8 @@ int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx)
return hclge_cmd_send(&hdev->hw, &desc, 1);
}
-static int hclge_pfc_pause_en_cfg(struct hclge_dev *hdev, u8 tx_rx_bitmap,
- u8 pfc_bitmap)
+int hclge_pfc_pause_en_cfg(struct hclge_dev *hdev, u8 tx_rx_bitmap,
+ u8 pfc_bitmap)
{
struct hclge_desc desc;
struct hclge_pfc_en_cmd *pfc = (struct hclge_pfc_en_cmd *)desc.data;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index 68f28a98e380..dd6f1fd486cf 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -164,6 +164,9 @@ struct hclge_bp_to_qs_map_cmd {
u32 rsvd1;
};
+#define HCLGE_PFC_DISABLE 0
+#define HCLGE_PFC_TX_RX_DISABLE 0
+
struct hclge_pfc_en_cmd {
u8 tx_rx_en_bitmap;
u8 pri_en_bitmap;
@@ -235,6 +238,8 @@ void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc);
void hclge_tm_pfc_info_update(struct hclge_dev *hdev);
int hclge_tm_dwrr_cfg(struct hclge_dev *hdev);
int hclge_tm_init_hw(struct hclge_dev *hdev, bool init);
+int hclge_pfc_pause_en_cfg(struct hclge_dev *hdev, u8 tx_rx_bitmap,
+ u8 pfc_bitmap);
int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx);
int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr);
void hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index f24046250341..dd08989a4c7c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -1436,7 +1436,10 @@ static int hclgevf_reset_wait(struct hclgevf_dev *hdev)
* might happen in case reset assertion was made by PF. Yes, this also
* means we might end up waiting bit more even for VF reset.
*/
- msleep(5000);
+ if (hdev->reset_type == HNAE3_VF_FULL_RESET)
+ msleep(5000);
+ else
+ msleep(500);
return 0;
}
diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
index 9abaff1f2aff..39d0fe76a38f 100644
--- a/drivers/net/ethernet/intel/iavf/iavf.h
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -525,7 +525,7 @@ void iavf_set_ethtool_ops(struct net_device *netdev);
void iavf_update_stats(struct iavf_adapter *adapter);
void iavf_reset_interrupt_capability(struct iavf_adapter *adapter);
int iavf_init_interrupt_scheme(struct iavf_adapter *adapter);
-void iavf_irq_enable_queues(struct iavf_adapter *adapter, u32 mask);
+void iavf_irq_enable_queues(struct iavf_adapter *adapter);
void iavf_free_all_tx_resources(struct iavf_adapter *adapter);
void iavf_free_all_rx_resources(struct iavf_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 2de4baff4c20..4a66873882d1 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -359,21 +359,18 @@ static void iavf_irq_disable(struct iavf_adapter *adapter)
}
/**
- * iavf_irq_enable_queues - Enable interrupt for specified queues
+ * iavf_irq_enable_queues - Enable interrupt for all queues
* @adapter: board private structure
- * @mask: bitmap of queues to enable
**/
-void iavf_irq_enable_queues(struct iavf_adapter *adapter, u32 mask)
+void iavf_irq_enable_queues(struct iavf_adapter *adapter)
{
struct iavf_hw *hw = &adapter->hw;
int i;
for (i = 1; i < adapter->num_msix_vectors; i++) {
- if (mask & BIT(i - 1)) {
- wr32(hw, IAVF_VFINT_DYN_CTLN1(i - 1),
- IAVF_VFINT_DYN_CTLN1_INTENA_MASK |
- IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK);
- }
+ wr32(hw, IAVF_VFINT_DYN_CTLN1(i - 1),
+ IAVF_VFINT_DYN_CTLN1_INTENA_MASK |
+ IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK);
}
}
@@ -387,7 +384,7 @@ void iavf_irq_enable(struct iavf_adapter *adapter, bool flush)
struct iavf_hw *hw = &adapter->hw;
iavf_misc_irq_enable(adapter);
- iavf_irq_enable_queues(adapter, ~0);
+ iavf_irq_enable_queues(adapter);
if (flush)
iavf_flush(hw);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_register.h b/drivers/net/ethernet/intel/iavf/iavf_register.h
index bf793332fc9d..a19e88898a0b 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_register.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_register.h
@@ -40,7 +40,7 @@
#define IAVF_VFINT_DYN_CTL01_INTENA_MASK IAVF_MASK(0x1, IAVF_VFINT_DYN_CTL01_INTENA_SHIFT)
#define IAVF_VFINT_DYN_CTL01_ITR_INDX_SHIFT 3
#define IAVF_VFINT_DYN_CTL01_ITR_INDX_MASK IAVF_MASK(0x3, IAVF_VFINT_DYN_CTL01_ITR_INDX_SHIFT)
-#define IAVF_VFINT_DYN_CTLN1(_INTVF) (0x00003800 + ((_INTVF) * 4)) /* _i=0...15 */ /* Reset: VFR */
+#define IAVF_VFINT_DYN_CTLN1(_INTVF) (0x00003800 + ((_INTVF) * 4)) /* _i=0...63 */ /* Reset: VFR */
#define IAVF_VFINT_DYN_CTLN1_INTENA_SHIFT 0
#define IAVF_VFINT_DYN_CTLN1_INTENA_MASK IAVF_MASK(0x1, IAVF_VFINT_DYN_CTLN1_INTENA_SHIFT)
#define IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_SHIFT 2
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index 9afbbdac3590..7c0578b5457b 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -2238,11 +2238,6 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
iavf_process_config(adapter);
adapter->flags |= IAVF_FLAG_SETUP_NETDEV_FEATURES;
- /* Request VLAN offload settings */
- if (VLAN_V2_ALLOWED(adapter))
- iavf_set_vlan_offload_features(adapter, 0,
- netdev->features);
-
iavf_set_queue_vlan_tag_loc(adapter);
was_mac_changed = !ether_addr_equal(netdev->dev_addr,
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 0157f6e98d3e..eb2dc0983776 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -5160,7 +5160,7 @@ ice_aq_read_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr,
*/
int
ice_aq_write_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr,
- u16 bus_addr, __le16 addr, u8 params, u8 *data,
+ u16 bus_addr, __le16 addr, u8 params, const u8 *data,
struct ice_sq_cd *cd)
{
struct ice_aq_desc desc = { 0 };
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 8ba5f935a092..81961a7d6598 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -229,7 +229,7 @@ ice_aq_read_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr,
struct ice_sq_cd *cd);
int
ice_aq_write_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr,
- u16 bus_addr, __le16 addr, u8 params, u8 *data,
+ u16 bus_addr, __le16 addr, u8 params, const u8 *data,
struct ice_sq_cd *cd);
bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw);
#endif /* _ICE_COMMON_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index c6d4926f0fcf..850db8e0e6b0 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -932,10 +932,9 @@ ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring *tx_ring,
if ((first->tx_flags & ICE_TX_FLAGS_HW_VLAN ||
first->tx_flags & ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN) ||
skb->priority != TC_PRIO_CONTROL) {
- first->tx_flags &= ~ICE_TX_FLAGS_VLAN_PR_M;
+ first->vid &= ~VLAN_PRIO_MASK;
/* Mask the lower 3 bits to set the 802.1p priority */
- first->tx_flags |= (skb->priority & 0x7) <<
- ICE_TX_FLAGS_VLAN_PR_S;
+ first->vid |= (skb->priority << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK;
/* if this is not already set it means a VLAN 0 + priority needs
* to be offloaded
*/
diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c
index 2ea8a2b11bcd..75c9de675f20 100644
--- a/drivers/net/ethernet/intel/ice/ice_gnss.c
+++ b/drivers/net/ethernet/intel/ice/ice_gnss.c
@@ -16,8 +16,8 @@
* * number of bytes written - success
* * negative - error code
*/
-static unsigned int
-ice_gnss_do_write(struct ice_pf *pf, unsigned char *buf, unsigned int size)
+static int
+ice_gnss_do_write(struct ice_pf *pf, const unsigned char *buf, unsigned int size)
{
struct ice_aqc_link_topo_addr link_topo;
struct ice_hw *hw = &pf->hw;
@@ -72,39 +72,7 @@ err_out:
dev_err(ice_pf_to_dev(pf), "GNSS failed to write, offset=%u, size=%u, err=%d\n",
offset, size, err);
- return offset;
-}
-
-/**
- * ice_gnss_write_pending - Write all pending data to internal GNSS
- * @work: GNSS write work structure
- */
-static void ice_gnss_write_pending(struct kthread_work *work)
-{
- struct gnss_serial *gnss = container_of(work, struct gnss_serial,
- write_work);
- struct ice_pf *pf = gnss->back;
-
- if (!pf)
- return;
-
- if (!test_bit(ICE_FLAG_GNSS, pf->flags))
- return;
-
- if (!list_empty(&gnss->queue)) {
- struct gnss_write_buf *write_buf = NULL;
- unsigned int bytes;
-
- write_buf = list_first_entry(&gnss->queue,
- struct gnss_write_buf, queue);
-
- bytes = ice_gnss_do_write(pf, write_buf->buf, write_buf->size);
- dev_dbg(ice_pf_to_dev(pf), "%u bytes written to GNSS\n", bytes);
-
- list_del(&write_buf->queue);
- kfree(write_buf->buf);
- kfree(write_buf);
- }
+ return err;
}
/**
@@ -128,12 +96,7 @@ static void ice_gnss_read(struct kthread_work *work)
int err = 0;
pf = gnss->back;
- if (!pf) {
- err = -EFAULT;
- goto exit;
- }
-
- if (!test_bit(ICE_FLAG_GNSS, pf->flags))
+ if (!pf || !test_bit(ICE_FLAG_GNSS, pf->flags))
return;
hw = &pf->hw;
@@ -191,7 +154,6 @@ free_buf:
free_page((unsigned long)buf);
requeue:
kthread_queue_delayed_work(gnss->kworker, &gnss->read_work, delay);
-exit:
if (err)
dev_dbg(ice_pf_to_dev(pf), "GNSS failed to read err=%d\n", err);
}
@@ -220,8 +182,6 @@ static struct gnss_serial *ice_gnss_struct_init(struct ice_pf *pf)
pf->gnss_serial = gnss;
kthread_init_delayed_work(&gnss->read_work, ice_gnss_read);
- INIT_LIST_HEAD(&gnss->queue);
- kthread_init_work(&gnss->write_work, ice_gnss_write_pending);
kworker = kthread_create_worker(0, "ice-gnss-%s", dev_name(dev));
if (IS_ERR(kworker)) {
kfree(gnss);
@@ -281,7 +241,6 @@ static void ice_gnss_close(struct gnss_device *gdev)
if (!gnss)
return;
- kthread_cancel_work_sync(&gnss->write_work);
kthread_cancel_delayed_work_sync(&gnss->read_work);
}
@@ -300,10 +259,7 @@ ice_gnss_write(struct gnss_device *gdev, const unsigned char *buf,
size_t count)
{
struct ice_pf *pf = gnss_get_drvdata(gdev);
- struct gnss_write_buf *write_buf;
struct gnss_serial *gnss;
- unsigned char *cmd_buf;
- int err = count;
/* We cannot write a single byte using our I2C implementation. */
if (count <= 1 || count > ICE_GNSS_TTY_WRITE_BUF)
@@ -319,24 +275,7 @@ ice_gnss_write(struct gnss_device *gdev, const unsigned char *buf,
if (!gnss)
return -ENODEV;
- cmd_buf = kcalloc(count, sizeof(*buf), GFP_KERNEL);
- if (!cmd_buf)
- return -ENOMEM;
-
- memcpy(cmd_buf, buf, count);
- write_buf = kzalloc(sizeof(*write_buf), GFP_KERNEL);
- if (!write_buf) {
- kfree(cmd_buf);
- return -ENOMEM;
- }
-
- write_buf->buf = cmd_buf;
- write_buf->size = count;
- INIT_LIST_HEAD(&write_buf->queue);
- list_add_tail(&write_buf->queue, &gnss->queue);
- kthread_queue_work(gnss->kworker, &gnss->write_work);
-
- return err;
+ return ice_gnss_do_write(pf, buf, count);
}
static const struct gnss_operations ice_gnss_ops = {
@@ -432,7 +371,6 @@ void ice_gnss_exit(struct ice_pf *pf)
if (pf->gnss_serial) {
struct gnss_serial *gnss = pf->gnss_serial;
- kthread_cancel_work_sync(&gnss->write_work);
kthread_cancel_delayed_work_sync(&gnss->read_work);
kthread_destroy_worker(gnss->kworker);
gnss->kworker = NULL;
diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.h b/drivers/net/ethernet/intel/ice/ice_gnss.h
index b8bb8b63d081..75e567ad7059 100644
--- a/drivers/net/ethernet/intel/ice/ice_gnss.h
+++ b/drivers/net/ethernet/intel/ice/ice_gnss.h
@@ -22,26 +22,16 @@
*/
#define ICE_GNSS_UBX_WRITE_BYTES (ICE_MAX_I2C_WRITE_BYTES + 1)
-struct gnss_write_buf {
- struct list_head queue;
- unsigned int size;
- unsigned char *buf;
-};
-
/**
* struct gnss_serial - data used to initialize GNSS TTY port
* @back: back pointer to PF
* @kworker: kwork thread for handling periodic work
* @read_work: read_work function for handling GNSS reads
- * @write_work: write_work function for handling GNSS writes
- * @queue: write buffers queue
*/
struct gnss_serial {
struct ice_pf *back;
struct kthread_worker *kworker;
struct kthread_delayed_work read_work;
- struct kthread_work write_work;
- struct list_head queue;
};
#if IS_ENABLED(CONFIG_GNSS)
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 450317dfcca7..11ae0e41f518 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2745,6 +2745,8 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params)
goto unroll_vector_base;
ice_vsi_map_rings_to_vectors(vsi);
+ vsi->stat_offsets_loaded = false;
+
if (ice_is_xdp_ena_vsi(vsi)) {
ret = ice_vsi_determine_xdp_res(vsi);
if (ret)
@@ -2793,6 +2795,9 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params)
ret = ice_vsi_alloc_ring_stats(vsi);
if (ret)
goto unroll_vector_base;
+
+ vsi->stat_offsets_loaded = false;
+
/* Do not exit if configuring RSS had an issue, at least
* receive traffic on first queue. Hence no need to capture
* return value
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index a1f7c8edc22f..42c318ceff61 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -4802,9 +4802,13 @@ err_init_pf:
static void ice_deinit_dev(struct ice_pf *pf)
{
ice_free_irq_msix_misc(pf);
- ice_clear_interrupt_scheme(pf);
ice_deinit_pf(pf);
ice_deinit_hw(&pf->hw);
+
+ /* Service task is already stopped, so call reset directly. */
+ ice_reset(&pf->hw, ICE_RESET_PFR);
+ pci_wait_for_pending_transaction(pf->pdev);
+ ice_clear_interrupt_scheme(pf);
}
static void ice_init_features(struct ice_pf *pf)
@@ -5094,10 +5098,6 @@ int ice_load(struct ice_pf *pf)
struct ice_vsi *vsi;
int err;
- err = ice_reset(&pf->hw, ICE_RESET_PFR);
- if (err)
- return err;
-
err = ice_init_dev(pf);
if (err)
return err;
@@ -5354,12 +5354,6 @@ static void ice_remove(struct pci_dev *pdev)
ice_setup_mc_magic_wake(pf);
ice_set_wake(pf);
- /* Issue a PFR as part of the prescribed driver unload flow. Do not
- * do it via ice_schedule_reset() since there is no need to rebuild
- * and the service task is already stopped.
- */
- ice_reset(&pf->hw, ICE_RESET_PFR);
- pci_wait_for_pending_transaction(pdev);
pci_disable_device(pdev);
}
@@ -7056,6 +7050,10 @@ int ice_down(struct ice_vsi *vsi)
ice_for_each_txq(vsi, i)
ice_clean_tx_ring(vsi->tx_rings[i]);
+ if (ice_is_xdp_ena_vsi(vsi))
+ ice_for_each_xdp_txq(vsi, i)
+ ice_clean_tx_ring(vsi->xdp_rings[i]);
+
ice_for_each_rxq(vsi, i)
ice_clean_rx_ring(vsi->rx_rings[i]);
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
index f1dca59bd844..588ad8696756 100644
--- a/drivers/net/ethernet/intel/ice/ice_sriov.c
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
@@ -1171,7 +1171,7 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
if (!vf)
return -EINVAL;
- ret = ice_check_vf_ready_for_cfg(vf);
+ ret = ice_check_vf_ready_for_reset(vf);
if (ret)
goto out_put_vf;
@@ -1286,7 +1286,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
goto out_put_vf;
}
- ret = ice_check_vf_ready_for_cfg(vf);
+ ret = ice_check_vf_ready_for_reset(vf);
if (ret)
goto out_put_vf;
@@ -1340,7 +1340,7 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
return -EOPNOTSUPP;
}
- ret = ice_check_vf_ready_for_cfg(vf);
+ ret = ice_check_vf_ready_for_reset(vf);
if (ret)
goto out_put_vf;
@@ -1653,7 +1653,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
if (!vf)
return -EINVAL;
- ret = ice_check_vf_ready_for_cfg(vf);
+ ret = ice_check_vf_ready_for_reset(vf);
if (ret)
goto out_put_vf;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 4fcf2d07eb85..52d0a126eb61 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1152,11 +1152,11 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
unsigned int offset = rx_ring->rx_offset;
struct xdp_buff *xdp = &rx_ring->xdp;
+ u32 cached_ntc = rx_ring->first_desc;
struct ice_tx_ring *xdp_ring = NULL;
struct bpf_prog *xdp_prog = NULL;
u32 ntc = rx_ring->next_to_clean;
u32 cnt = rx_ring->count;
- u32 cached_ntc = ntc;
u32 xdp_xmit = 0;
u32 cached_ntu;
bool failure;
@@ -1664,8 +1664,7 @@ ice_tx_map(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first,
if (first->tx_flags & ICE_TX_FLAGS_HW_VLAN) {
td_cmd |= (u64)ICE_TX_DESC_CMD_IL2TAG1;
- td_tag = (first->tx_flags & ICE_TX_FLAGS_VLAN_M) >>
- ICE_TX_FLAGS_VLAN_S;
+ td_tag = first->vid;
}
dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
@@ -1998,7 +1997,7 @@ ice_tx_prepare_vlan_flags(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first)
* VLAN offloads exclusively so we only care about the VLAN ID here
*/
if (skb_vlan_tag_present(skb)) {
- first->tx_flags |= skb_vlan_tag_get(skb) << ICE_TX_FLAGS_VLAN_S;
+ first->vid = skb_vlan_tag_get(skb);
if (tx_ring->flags & ICE_TX_FLAGS_RING_VLAN_L2TAG2)
first->tx_flags |= ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN;
else
@@ -2388,8 +2387,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring)
offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
(ICE_TX_CTX_DESC_IL2TAG2 <<
ICE_TXD_CTX_QW1_CMD_S));
- offload.cd_l2tag2 = (first->tx_flags & ICE_TX_FLAGS_VLAN_M) >>
- ICE_TX_FLAGS_VLAN_S;
+ offload.cd_l2tag2 = first->vid;
}
/* set up TSO offload */
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index fff0efe28373..166413fc33f4 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -127,10 +127,6 @@ static inline int ice_skb_pad(void)
#define ICE_TX_FLAGS_IPV6 BIT(6)
#define ICE_TX_FLAGS_TUNNEL BIT(7)
#define ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN BIT(8)
-#define ICE_TX_FLAGS_VLAN_M 0xffff0000
-#define ICE_TX_FLAGS_VLAN_PR_M 0xe0000000
-#define ICE_TX_FLAGS_VLAN_PR_S 29
-#define ICE_TX_FLAGS_VLAN_S 16
#define ICE_XDP_PASS 0
#define ICE_XDP_CONSUMED BIT(0)
@@ -182,8 +178,9 @@ struct ice_tx_buf {
unsigned int gso_segs;
unsigned int nr_frags; /* used for mbuf XDP */
};
- u32 type:16; /* &ice_tx_buf_type */
- u32 tx_flags:16;
+ u32 tx_flags:12;
+ u32 type:4; /* &ice_tx_buf_type */
+ u32 vid:16;
DEFINE_DMA_UNMAP_LEN(len);
DEFINE_DMA_UNMAP_ADDR(dma);
};
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
index 89fd6982df09..bf74a2f3a4f8 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
@@ -186,6 +186,25 @@ int ice_check_vf_ready_for_cfg(struct ice_vf *vf)
}
/**
+ * ice_check_vf_ready_for_reset - check if VF is ready to be reset
+ * @vf: VF to check if it's ready to be reset
+ *
+ * The purpose of this function is to ensure that the VF is not in reset,
+ * disabled, and is both initialized and active, thus enabling us to safely
+ * initialize another reset.
+ */
+int ice_check_vf_ready_for_reset(struct ice_vf *vf)
+{
+ int ret;
+
+ ret = ice_check_vf_ready_for_cfg(vf);
+ if (!ret && !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+ ret = -EAGAIN;
+
+ return ret;
+}
+
+/**
* ice_trigger_vf_reset - Reset a VF on HW
* @vf: pointer to the VF structure
* @is_vflr: true if VFLR was issued, false if not
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
index e3cda6fb71ab..a38ef00a3679 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
@@ -215,6 +215,7 @@ u16 ice_get_num_vfs(struct ice_pf *pf);
struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf);
bool ice_is_vf_disabled(struct ice_vf *vf);
int ice_check_vf_ready_for_cfg(struct ice_vf *vf);
+int ice_check_vf_ready_for_reset(struct ice_vf *vf);
void ice_set_vf_state_dis(struct ice_vf *vf);
bool ice_is_any_vf_in_unicast_promisc(struct ice_pf *pf);
void
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
index 97243c616d5d..f4a524f80b11 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
@@ -3955,6 +3955,7 @@ error_handler:
ice_vc_notify_vf_link_state(vf);
break;
case VIRTCHNL_OP_RESET_VF:
+ clear_bit(ICE_VF_STATE_ACTIVE, vf->vf_states);
ops->reset_vf(vf);
break;
case VIRTCHNL_OP_ADD_ETH_ADDR:
diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c
index 205d577bdbba..caf91c6f52b4 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mac.c
+++ b/drivers/net/ethernet/intel/igb/e1000_mac.c
@@ -426,7 +426,7 @@ void igb_mta_set(struct e1000_hw *hw, u32 hash_value)
static u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
{
u32 hash_value, hash_mask;
- u8 bit_shift = 0;
+ u8 bit_shift = 1;
/* Register count multiplied by bits per register */
hash_mask = (hw->mac.mta_reg_count * 32) - 1;
@@ -434,7 +434,7 @@ static u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
/* For a mc_filter_type of 0, bit_shift is the number of left-shifts
* where 0xFF would still fall within the hash mask.
*/
- while (hash_mask >> bit_shift != 0xFF)
+ while (hash_mask >> bit_shift != 0xFF && bit_shift < 4)
bit_shift++;
/* The portion of the address that is used for the hash table
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 7d60da1b7bf4..319ed601eaa1 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -822,6 +822,8 @@ static int igb_set_eeprom(struct net_device *netdev,
*/
ret_val = hw->nvm.ops.read(hw, last_word, 1,
&eeprom_buff[last_word - first_word]);
+ if (ret_val)
+ goto out;
}
/* Device's eeprom is always little-endian, word addressable */
@@ -841,6 +843,7 @@ static int igb_set_eeprom(struct net_device *netdev,
hw->nvm.ops.update(hw);
igb_set_fw_version(adapter);
+out:
kfree(eeprom_buff);
return ret_val;
}
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 58872a4c2540..bb3db387d49c 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -6947,6 +6947,7 @@ static void igb_extts(struct igb_adapter *adapter, int tsintr_tt)
struct e1000_hw *hw = &adapter->hw;
struct ptp_clock_event event;
struct timespec64 ts;
+ unsigned long flags;
if (pin < 0 || pin >= IGB_N_SDP)
return;
@@ -6954,9 +6955,12 @@ static void igb_extts(struct igb_adapter *adapter, int tsintr_tt)
if (hw->mac.type == e1000_82580 ||
hw->mac.type == e1000_i354 ||
hw->mac.type == e1000_i350) {
- s64 ns = rd32(auxstmpl);
+ u64 ns = rd32(auxstmpl);
- ns += ((s64)(rd32(auxstmph) & 0xFF)) << 32;
+ ns += ((u64)(rd32(auxstmph) & 0xFF)) << 32;
+ spin_lock_irqsave(&adapter->tmreg_lock, flags);
+ ns = timecounter_cyc2time(&adapter->tc, ns);
+ spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
ts = ns_to_timespec64(ns);
} else {
ts.tv_nsec = rd32(auxstmpl);
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 1c4676882082..fa764190f270 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -254,6 +254,13 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring)
/* reset BQL for queue */
netdev_tx_reset_queue(txring_txq(tx_ring));
+ /* Zero out the buffer ring */
+ memset(tx_ring->tx_buffer_info, 0,
+ sizeof(*tx_ring->tx_buffer_info) * tx_ring->count);
+
+ /* Zero out the descriptor ring */
+ memset(tx_ring->desc, 0, tx_ring->size);
+
/* reset next_to_use and next_to_clean */
tx_ring->next_to_use = 0;
tx_ring->next_to_clean = 0;
@@ -267,7 +274,7 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring)
*/
void igc_free_tx_resources(struct igc_ring *tx_ring)
{
- igc_clean_tx_ring(tx_ring);
+ igc_disable_tx_ring(tx_ring);
vfree(tx_ring->tx_buffer_info);
tx_ring->tx_buffer_info = NULL;
@@ -6723,6 +6730,9 @@ static void igc_remove(struct pci_dev *pdev)
igc_ptp_stop(adapter);
+ pci_disable_ptm(pdev);
+ pci_clear_master(pdev);
+
set_bit(__IGC_DOWN, &adapter->state);
del_timer_sync(&adapter->watchdog_timer);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 5d83c887a3fc..1726297f2e0d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1256,7 +1256,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
if (!__netif_txq_completed_wake(txq, total_packets, total_bytes,
ixgbe_desc_unused(tx_ring),
TX_WAKE_THRESHOLD,
- netif_carrier_ok(tx_ring->netdev) &&
+ !netif_carrier_ok(tx_ring->netdev) ||
test_bit(__IXGBE_DOWN, &adapter->state)))
++tx_ring->tx_stats.restart_queue;
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
index e1853da280f9..43eb6e871351 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
@@ -981,6 +981,9 @@ int octep_device_setup(struct octep_device *oct)
oct->mmio[i].hw_addr =
ioremap(pci_resource_start(oct->pdev, i * 2),
pci_resource_len(oct->pdev, i * 2));
+ if (!oct->mmio[i].hw_addr)
+ goto unmap_prev;
+
oct->mmio[i].mapped = 1;
}
@@ -1015,7 +1018,9 @@ int octep_device_setup(struct octep_device *oct)
return 0;
unsupported_dev:
- for (i = 0; i < OCTEP_MMIO_REGIONS; i++)
+ i = OCTEP_MMIO_REGIONS;
+unmap_prev:
+ while (i--)
iounmap(oct->mmio[i].hw_addr);
kfree(oct->conf);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 4ad707e758b9..f01d057ad025 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -1878,7 +1878,8 @@ static int nix_check_txschq_alloc_req(struct rvu *rvu, int lvl, u16 pcifunc,
free_cnt = rvu_rsrc_free_count(&txsch->schq);
}
- if (free_cnt < req_schq || req_schq > MAX_TXSCHQ_PER_FUNC)
+ if (free_cnt < req_schq || req->schq[lvl] > MAX_TXSCHQ_PER_FUNC ||
+ req->schq_contig[lvl] > MAX_TXSCHQ_PER_FUNC)
return NIX_AF_ERR_TLX_ALLOC_FAIL;
/* If contiguous queues are needed, check for availability */
@@ -4080,10 +4081,6 @@ int rvu_mbox_handler_nix_set_rx_cfg(struct rvu *rvu, struct nix_rx_cfg *req,
static u64 rvu_get_lbk_link_credits(struct rvu *rvu, u16 lbk_max_frs)
{
- /* CN10k supports 72KB FIFO size and max packet size of 64k */
- if (rvu->hw->lbk_bufsize == 0x12000)
- return (rvu->hw->lbk_bufsize - lbk_max_frs) / 16;
-
return 1600; /* 16 * max LBK datarate = 16 * 100Gbps */
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c
index 51209119f0f2..9f11c1e40737 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c
@@ -1164,10 +1164,8 @@ static u16 __rvu_npc_exact_cmd_rules_cnt_update(struct rvu *rvu, int drop_mcam_i
{
struct npc_exact_table *table;
u16 *cnt, old_cnt;
- bool promisc;
table = rvu->hw->table;
- promisc = table->promisc_mode[drop_mcam_idx];
cnt = &table->cnt_cmd_rules[drop_mcam_idx];
old_cnt = *cnt;
@@ -1179,16 +1177,13 @@ static u16 __rvu_npc_exact_cmd_rules_cnt_update(struct rvu *rvu, int drop_mcam_i
*enable_or_disable_cam = false;
- if (promisc)
- goto done;
-
- /* If all rules are deleted and not already in promisc mode; disable cam */
+ /* If all rules are deleted, disable cam */
if (!*cnt && val < 0) {
*enable_or_disable_cam = true;
goto done;
}
- /* If rule got added and not already in promisc mode; enable cam */
+ /* If rule got added, enable cam */
if (!old_cnt && val > 0) {
*enable_or_disable_cam = true;
goto done;
@@ -1443,7 +1438,6 @@ int rvu_npc_exact_promisc_disable(struct rvu *rvu, u16 pcifunc)
u32 drop_mcam_idx;
bool *promisc;
bool rc;
- u32 cnt;
table = rvu->hw->table;
@@ -1466,17 +1460,8 @@ int rvu_npc_exact_promisc_disable(struct rvu *rvu, u16 pcifunc)
return LMAC_AF_ERR_INVALID_PARAM;
}
*promisc = false;
- cnt = __rvu_npc_exact_cmd_rules_cnt_update(rvu, drop_mcam_idx, 0, NULL);
mutex_unlock(&table->lock);
- /* If no dmac filter entries configured, disable drop rule */
- if (!cnt)
- rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX, false);
- else
- rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX, !*promisc);
-
- dev_dbg(rvu->dev, "%s: disabled promisc mode (cgx=%d lmac=%d, cnt=%d)\n",
- __func__, cgx_id, lmac_id, cnt);
return 0;
}
@@ -1494,7 +1479,6 @@ int rvu_npc_exact_promisc_enable(struct rvu *rvu, u16 pcifunc)
u32 drop_mcam_idx;
bool *promisc;
bool rc;
- u32 cnt;
table = rvu->hw->table;
@@ -1517,17 +1501,8 @@ int rvu_npc_exact_promisc_enable(struct rvu *rvu, u16 pcifunc)
return LMAC_AF_ERR_INVALID_PARAM;
}
*promisc = true;
- cnt = __rvu_npc_exact_cmd_rules_cnt_update(rvu, drop_mcam_idx, 0, NULL);
mutex_unlock(&table->lock);
- /* If no dmac filter entries configured, disable drop rule */
- if (!cnt)
- rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX, false);
- else
- rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX, !*promisc);
-
- dev_dbg(rvu->dev, "%s: Enabled promisc mode (cgx=%d lmac=%d cnt=%d)\n",
- __func__, cgx_id, lmac_id, cnt);
return 0;
}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
index 7045fedfd73a..7af223b0a37f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
@@ -652,9 +652,7 @@ static void otx2_sqe_add_ext(struct otx2_nic *pfvf, struct otx2_snd_queue *sq,
htons(ext->lso_sb - skb_network_offset(skb));
} else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
ext->lso_format = pfvf->hw.lso_tsov6_idx;
-
- ipv6_hdr(skb)->payload_len =
- htons(ext->lso_sb - skb_network_offset(skb));
+ ipv6_hdr(skb)->payload_len = htons(tcp_hdrlen(skb));
} else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
__be16 l3_proto = vlan_get_protocol(skb);
struct udphdr *udph = udp_hdr(skb);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index a75fd072082c..834c644b67db 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -3269,18 +3269,14 @@ static int mtk_open(struct net_device *dev)
eth->dsa_meta[i] = md_dst;
}
} else {
- /* Hardware special tag parsing needs to be disabled if at least
- * one MAC does not use DSA.
+ /* Hardware DSA untagging and VLAN RX offloading need to be
+ * disabled if at least one MAC does not use DSA.
*/
u32 val = mtk_r32(eth, MTK_CDMP_IG_CTRL);
val &= ~MTK_CDMP_STAG_EN;
mtk_w32(eth, val, MTK_CDMP_IG_CTRL);
- val = mtk_r32(eth, MTK_CDMQ_IG_CTRL);
- val &= ~MTK_CDMQ_STAG_EN;
- mtk_w32(eth, val, MTK_CDMQ_IG_CTRL);
-
mtk_w32(eth, 0, MTK_CDMP_EG_CTRL);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index d53de39539a8..d532883b42d7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1920,9 +1920,10 @@ static void mlx5_cmd_err_trace(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod
static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status,
u32 syndrome, int err)
{
+ const char *namep = mlx5_command_str(opcode);
struct mlx5_cmd_stats *stats;
- if (!err)
+ if (!err || !(strcmp(namep, "unknown command opcode")))
return;
stats = &dev->cmd.stats[opcode];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index f40497823e65..7c0f2adbea00 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -490,7 +490,7 @@ static void poll_trace(struct mlx5_fw_tracer *tracer,
(u64)timestamp_low;
break;
default:
- if (tracer_event->event_id >= tracer->str_db.first_string_trace ||
+ if (tracer_event->event_id >= tracer->str_db.first_string_trace &&
tracer_event->event_id <= tracer->str_db.first_string_trace +
tracer->str_db.num_string_trace) {
tracer_event->type = TRACER_EVENT_TYPE_STRING;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index b8987a404d75..8e999f238194 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -327,6 +327,7 @@ struct mlx5e_params {
unsigned int sw_mtu;
int hard_mtu;
bool ptp_rx;
+ __be32 terminate_lkey_be;
};
static inline u8 mlx5e_get_dcb_num_tc(struct mlx5e_params *params)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 9c94807097cb..5ce28ff7685f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -732,7 +732,8 @@ static void mlx5e_rx_compute_wqe_bulk_params(struct mlx5e_params *params,
static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
- struct mlx5e_rq_frags_info *info)
+ struct mlx5e_rq_frags_info *info,
+ u32 *xdp_frag_size)
{
u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu);
int frag_size_max = DEFAULT_FRAG_SIZE;
@@ -845,6 +846,8 @@ out:
info->log_num_frags = order_base_2(info->num_frags);
+ *xdp_frag_size = info->num_frags > 1 && params->xdp_prog ? PAGE_SIZE : 0;
+
return 0;
}
@@ -989,7 +992,8 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
}
default: /* MLX5_WQ_TYPE_CYCLIC */
MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames);
- err = mlx5e_build_rq_frags_info(mdev, params, xsk, &param->frags_info);
+ err = mlx5e_build_rq_frags_info(mdev, params, xsk, &param->frags_info,
+ &param->xdp_frag_size);
if (err)
return err;
ndsegs = param->frags_info.num_frags;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index a5d20f6d6d9c..6800949dafbc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -24,6 +24,7 @@ struct mlx5e_rq_param {
u32 rqc[MLX5_ST_SZ_DW(rqc)];
struct mlx5_wq_param wq;
struct mlx5e_rq_frags_info frags_info;
+ u32 xdp_frag_size;
};
struct mlx5e_sq_param {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
index 7ac1ad9c46de..7e8e96cc5cd0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
@@ -51,7 +51,7 @@ int mlx5e_port_query_buffer(struct mlx5e_priv *priv,
if (err)
goto out;
- for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) {
buffer = MLX5_ADDR_OF(pbmc_reg, out, buffer[i]);
port_buffer->buffer[i].lossy =
MLX5_GET(bufferx_reg, buffer, lossy);
@@ -73,14 +73,24 @@ int mlx5e_port_query_buffer(struct mlx5e_priv *priv,
port_buffer->buffer[i].lossy);
}
- port_buffer->headroom_size = total_used;
+ port_buffer->internal_buffers_size = 0;
+ for (i = MLX5E_MAX_NETWORK_BUFFER; i < MLX5E_TOTAL_BUFFERS; i++) {
+ buffer = MLX5_ADDR_OF(pbmc_reg, out, buffer[i]);
+ port_buffer->internal_buffers_size +=
+ MLX5_GET(bufferx_reg, buffer, size) * port_buff_cell_sz;
+ }
+
port_buffer->port_buffer_size =
MLX5_GET(pbmc_reg, out, port_buffer_size) * port_buff_cell_sz;
- port_buffer->spare_buffer_size =
- port_buffer->port_buffer_size - total_used;
-
- mlx5e_dbg(HW, priv, "total buffer size=%d, spare buffer size=%d\n",
- port_buffer->port_buffer_size,
+ port_buffer->headroom_size = total_used;
+ port_buffer->spare_buffer_size = port_buffer->port_buffer_size -
+ port_buffer->internal_buffers_size -
+ port_buffer->headroom_size;
+
+ mlx5e_dbg(HW, priv,
+ "total buffer size=%u, headroom buffer size=%u, internal buffers size=%u, spare buffer size=%u\n",
+ port_buffer->port_buffer_size, port_buffer->headroom_size,
+ port_buffer->internal_buffers_size,
port_buffer->spare_buffer_size);
out:
kfree(out);
@@ -206,11 +216,11 @@ static int port_update_pool_cfg(struct mlx5_core_dev *mdev,
if (!MLX5_CAP_GEN(mdev, sbcam_reg))
return 0;
- for (i = 0; i < MLX5E_MAX_BUFFER; i++)
+ for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++)
lossless_buff_count += ((port_buffer->buffer[i].size) &&
(!(port_buffer->buffer[i].lossy)));
- for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) {
p = select_sbcm_params(&port_buffer->buffer[i], lossless_buff_count);
err = mlx5e_port_set_sbcm(mdev, 0, i,
MLX5_INGRESS_DIR,
@@ -293,7 +303,7 @@ static int port_set_buffer(struct mlx5e_priv *priv,
if (err)
goto out;
- for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) {
void *buffer = MLX5_ADDR_OF(pbmc_reg, in, buffer[i]);
u64 size = port_buffer->buffer[i].size;
u64 xoff = port_buffer->buffer[i].xoff;
@@ -351,7 +361,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
{
int i;
- for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) {
if (port_buffer->buffer[i].lossy) {
port_buffer->buffer[i].xoff = 0;
port_buffer->buffer[i].xon = 0;
@@ -408,7 +418,7 @@ static int update_buffer_lossy(struct mlx5_core_dev *mdev,
int err;
int i;
- for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) {
prio_count = 0;
lossy_count = 0;
@@ -432,11 +442,11 @@ static int update_buffer_lossy(struct mlx5_core_dev *mdev,
}
if (changed) {
- err = port_update_pool_cfg(mdev, port_buffer);
+ err = update_xoff_threshold(port_buffer, xoff, max_mtu, port_buff_cell_sz);
if (err)
return err;
- err = update_xoff_threshold(port_buffer, xoff, max_mtu, port_buff_cell_sz);
+ err = port_update_pool_cfg(mdev, port_buffer);
if (err)
return err;
@@ -515,7 +525,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
if (change & MLX5E_PORT_BUFFER_PRIO2BUFFER) {
update_prio2buffer = true;
- for (i = 0; i < MLX5E_MAX_BUFFER; i++)
+ for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++)
mlx5e_dbg(HW, priv, "%s: requested to map prio[%d] to buffer %d\n",
__func__, i, prio2buffer[i]);
@@ -530,7 +540,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
}
if (change & MLX5E_PORT_BUFFER_SIZE) {
- for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) {
mlx5e_dbg(HW, priv, "%s: buffer[%d]=%d\n", __func__, i, buffer_size[i]);
if (!port_buffer.buffer[i].lossy && !buffer_size[i]) {
mlx5e_dbg(HW, priv, "%s: lossless buffer[%d] size cannot be zero\n",
@@ -544,7 +554,9 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
mlx5e_dbg(HW, priv, "%s: total buffer requested=%d\n", __func__, total_used);
- if (total_used > port_buffer.port_buffer_size)
+ if (total_used > port_buffer.headroom_size &&
+ (total_used - port_buffer.headroom_size) >
+ port_buffer.spare_buffer_size)
return -EINVAL;
update_buffer = true;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h
index a6ef118de758..f4a19ffbb641 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h
@@ -35,7 +35,8 @@
#include "en.h"
#include "port.h"
-#define MLX5E_MAX_BUFFER 8
+#define MLX5E_MAX_NETWORK_BUFFER 8
+#define MLX5E_TOTAL_BUFFERS 10
#define MLX5E_DEFAULT_CABLE_LEN 7 /* 7 meters */
#define MLX5_BUFFER_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, pcam_reg) && \
@@ -60,8 +61,9 @@ struct mlx5e_bufferx_reg {
struct mlx5e_port_buffer {
u32 port_buffer_size;
u32 spare_buffer_size;
- u32 headroom_size;
- struct mlx5e_bufferx_reg buffer[MLX5E_MAX_BUFFER];
+ u32 headroom_size; /* Buffers 0-7 */
+ u32 internal_buffers_size; /* Buffers 8-9 */
+ struct mlx5e_bufferx_reg buffer[MLX5E_MAX_NETWORK_BUFFER];
};
int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index eb5abd0e55d9..3cbebfba582b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -175,6 +175,8 @@ static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
/* ensure cq space is freed before enabling more cqes */
wmb();
+ mlx5e_txqsq_wake(&ptpsq->txqsq);
+
return work_done == budget;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
index fc923a99b6a4..0380a04c3691 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
@@ -84,7 +84,7 @@ mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state,
int
mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state,
- struct flow_action *flow_action,
+ struct flow_action *flow_action, int from, int to,
struct mlx5_flow_attr *attr,
enum mlx5_flow_namespace_type ns_type)
{
@@ -96,6 +96,11 @@ mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state,
priv = parse_state->flow->priv;
flow_action_for_each(i, act, flow_action) {
+ if (i < from)
+ continue;
+ else if (i > to)
+ break;
+
tc_act = mlx5e_tc_act_get(act->id, ns_type);
if (!tc_act || !tc_act->post_parse)
continue;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
index 0e6e1872ac62..d6c12d0ea55b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
@@ -112,7 +112,7 @@ mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state,
int
mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state,
- struct flow_action *flow_action,
+ struct flow_action *flow_action, int from, int to,
struct mlx5_flow_attr *attr,
enum mlx5_flow_namespace_type ns_type);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index ead38ef69483..a254e728ac95 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -2021,6 +2021,8 @@ void
mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
struct mlx5_flow_attr *attr)
{
+ if (!attr->ct_attr.ft) /* no ct action, return */
+ return;
if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */
return;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
index 20c2d2ecaf93..f0c3464f037f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -492,6 +492,19 @@ void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
mlx5e_encap_dealloc(priv, e);
}
+static void mlx5e_encap_put_locked(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+ lockdep_assert_held(&esw->offloads.encap_tbl_lock);
+
+ if (!refcount_dec_and_test(&e->refcnt))
+ return;
+ list_del(&e->route_list);
+ hash_del_rcu(&e->encap_hlist);
+ mlx5e_encap_dealloc(priv, e);
+}
+
static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
@@ -816,6 +829,8 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
uintptr_t hash_key;
int err = 0;
+ lockdep_assert_held(&esw->offloads.encap_tbl_lock);
+
parse_attr = attr->parse_attr;
tun_info = parse_attr->tun_info[out_index];
mpls_info = &parse_attr->mpls_info[out_index];
@@ -829,7 +844,6 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
hash_key = hash_encap_info(&key);
- mutex_lock(&esw->offloads.encap_tbl_lock);
e = mlx5e_encap_get(priv, &key, hash_key);
/* must verify if encap is valid or not */
@@ -840,15 +854,6 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
goto out_err;
}
- mutex_unlock(&esw->offloads.encap_tbl_lock);
- wait_for_completion(&e->res_ready);
-
- /* Protect against concurrent neigh update. */
- mutex_lock(&esw->offloads.encap_tbl_lock);
- if (e->compl_result < 0) {
- err = -EREMOTEIO;
- goto out_err;
- }
goto attach_flow;
}
@@ -877,15 +882,12 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
INIT_LIST_HEAD(&e->flows);
hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
- mutex_unlock(&esw->offloads.encap_tbl_lock);
if (family == AF_INET)
err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
else if (family == AF_INET6)
err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
- /* Protect against concurrent neigh update. */
- mutex_lock(&esw->offloads.encap_tbl_lock);
complete_all(&e->res_ready);
if (err) {
e->compl_result = err;
@@ -920,18 +922,15 @@ attach_flow:
} else {
flow_flag_set(flow, SLOW);
}
- mutex_unlock(&esw->offloads.encap_tbl_lock);
return err;
out_err:
- mutex_unlock(&esw->offloads.encap_tbl_lock);
if (e)
- mlx5e_encap_put(priv, e);
+ mlx5e_encap_put_locked(priv, e);
return err;
out_err_init:
- mutex_unlock(&esw->offloads.encap_tbl_lock);
kfree(tun_info);
kfree(e);
return err;
@@ -1016,6 +1015,93 @@ out_err:
return err;
}
+int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct netlink_ext_ack *extack,
+ bool *vf_tun)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5_esw_flow_attr *esw_attr;
+ struct net_device *encap_dev = NULL;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_priv *out_priv;
+ struct mlx5_eswitch *esw;
+ int out_index;
+ int err = 0;
+
+ if (!mlx5e_is_eswitch_flow(flow))
+ return 0;
+
+ parse_attr = attr->parse_attr;
+ esw_attr = attr->esw_attr;
+ *vf_tun = false;
+
+ esw = priv->mdev->priv.eswitch;
+ mutex_lock(&esw->offloads.encap_tbl_lock);
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
+ struct net_device *out_dev;
+ int mirred_ifindex;
+
+ if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
+ continue;
+
+ mirred_ifindex = parse_attr->mirred_ifindex[out_index];
+ out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
+ if (!out_dev) {
+ NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
+ err = -ENODEV;
+ goto out;
+ }
+ err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
+ extack, &encap_dev);
+ dev_put(out_dev);
+ if (err)
+ goto out;
+
+ if (esw_attr->dests[out_index].flags &
+ MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
+ !esw_attr->dest_int_port)
+ *vf_tun = true;
+
+ out_priv = netdev_priv(encap_dev);
+ rpriv = out_priv->ppriv;
+ esw_attr->dests[out_index].rep = rpriv->rep;
+ esw_attr->dests[out_index].mdev = out_priv->mdev;
+ }
+
+ if (*vf_tun && esw_attr->out_count > 1) {
+ NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+out:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ return err;
+}
+
+void mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr;
+ int out_index;
+
+ if (!mlx5e_is_eswitch_flow(flow))
+ return;
+
+ esw_attr = attr->esw_attr;
+
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
+ if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
+ continue;
+
+ mlx5e_detach_encap(flow->priv, flow, attr, out_index);
+ kfree(attr->parse_attr->tun_info[out_index]);
+ }
+}
+
static int cmp_route_info(struct mlx5e_route_key *a,
struct mlx5e_route_key *b)
{
@@ -1369,11 +1455,13 @@ static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow;
list_for_each_entry(flow, encap_flows, tmp_list) {
- struct mlx5_flow_attr *attr = flow->attr;
struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_flow_attr *attr;
if (!mlx5e_is_offloaded_flow(flow))
continue;
+
+ attr = mlx5e_tc_get_encap_attr(flow);
esw_attr = attr->esw_attr;
if (flow_flag_test(flow, SLOW))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
index 8ad273dde40e..5d7d67687cbc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
@@ -30,6 +30,15 @@ int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow);
+int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct netlink_ext_ack *extack,
+ bool *vf_tun);
+void mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr);
+
struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info);
int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 47381e949f1f..879d698b6119 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -193,6 +193,8 @@ static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size)
return pi;
}
+void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq);
+
static inline u16 mlx5e_shampo_get_cqe_header_index(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
{
return be16_to_cpu(cqe->shampo.header_entry_index) & (rq->mpwqe.shampo->hd_per_wq - 1);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index ed279f450976..36826b582484 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -86,7 +86,7 @@ static int mlx5e_init_xsk_rq(struct mlx5e_channel *c,
if (err)
return err;
- return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix, 0);
+ return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix, c->napi.napi_id);
}
static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 55b38544422f..891d39b4bfd4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -61,16 +61,19 @@ static void mlx5e_ipsec_handle_tx_limit(struct work_struct *_work)
struct mlx5e_ipsec_sa_entry *sa_entry = dwork->sa_entry;
struct xfrm_state *x = sa_entry->x;
- spin_lock(&x->lock);
+ if (sa_entry->attrs.drop)
+ return;
+
+ spin_lock_bh(&x->lock);
xfrm_state_check_expire(x);
if (x->km.state == XFRM_STATE_EXPIRED) {
sa_entry->attrs.drop = true;
- mlx5e_accel_ipsec_fs_modify(sa_entry);
- }
- spin_unlock(&x->lock);
+ spin_unlock_bh(&x->lock);
- if (sa_entry->attrs.drop)
+ mlx5e_accel_ipsec_fs_modify(sa_entry);
return;
+ }
+ spin_unlock_bh(&x->lock);
queue_delayed_work(sa_entry->ipsec->wq, &dwork->dwork,
MLX5_IPSEC_RESCHED);
@@ -1040,11 +1043,17 @@ err_fs:
return err;
}
-static void mlx5e_xfrm_free_policy(struct xfrm_policy *x)
+static void mlx5e_xfrm_del_policy(struct xfrm_policy *x)
{
struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x);
mlx5e_accel_ipsec_fs_del_pol(pol_entry);
+}
+
+static void mlx5e_xfrm_free_policy(struct xfrm_policy *x)
+{
+ struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x);
+
kfree(pol_entry);
}
@@ -1065,6 +1074,7 @@ static const struct xfrmdev_ops mlx5e_ipsec_packet_xfrmdev_ops = {
.xdo_dev_state_update_curlft = mlx5e_xfrm_update_curlft,
.xdo_dev_policy_add = mlx5e_xfrm_add_policy,
+ .xdo_dev_policy_delete = mlx5e_xfrm_del_policy,
.xdo_dev_policy_free = mlx5e_xfrm_free_policy,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
index df90e19066bc..a3554bde3e07 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c
@@ -305,7 +305,17 @@ static void mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry,
}
mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs);
+
+ /* It is safe to execute the modify below unlocked since the only flows
+ * that could affect this HW object, are create, destroy and this work.
+ *
+ * Creation flow can't co-exist with this modify work, the destruction
+ * flow would cancel this work, and this work is a single entity that
+ * can't conflict with it self.
+ */
+ spin_unlock_bh(&sa_entry->x->lock);
mlx5_accel_esp_modify_xfrm(sa_entry, &attrs);
+ spin_lock_bh(&sa_entry->x->lock);
data.data_offset_condition_operand =
MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET;
@@ -431,7 +441,7 @@ static void mlx5e_ipsec_handle_event(struct work_struct *_work)
aso = sa_entry->ipsec->aso;
attrs = &sa_entry->attrs;
- spin_lock(&sa_entry->x->lock);
+ spin_lock_bh(&sa_entry->x->lock);
ret = mlx5e_ipsec_aso_query(sa_entry, NULL);
if (ret)
goto unlock;
@@ -447,7 +457,7 @@ static void mlx5e_ipsec_handle_event(struct work_struct *_work)
mlx5e_ipsec_handle_limits(sa_entry);
unlock:
- spin_unlock(&sa_entry->x->lock);
+ spin_unlock_bh(&sa_entry->x->lock);
kfree(work);
}
@@ -596,7 +606,8 @@ int mlx5e_ipsec_aso_query(struct mlx5e_ipsec_sa_entry *sa_entry,
do {
ret = mlx5_aso_poll_cq(aso->aso, false);
if (ret)
- usleep_range(2, 10);
+ /* We are in atomic context */
+ udelay(10);
} while (ret && time_is_after_jiffies(expires));
spin_unlock_bh(&aso->lock);
return ret;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index 1f90594499c6..41c396e76457 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -150,10 +150,8 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb,
inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
in = kvzalloc(inlen, GFP_KERNEL);
- if (!in) {
- err = -ENOMEM;
- goto out;
- }
+ if (!in)
+ return -ENOMEM;
if (enable_uc_lb)
lb_flags = MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
@@ -171,14 +169,13 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb,
tirn = tir->tirn;
err = mlx5_core_modify_tir(mdev, tirn, in);
if (err)
- goto out;
+ break;
}
+ mutex_unlock(&mdev->mlx5e_res.hw_objs.td.list_lock);
-out:
kvfree(in);
if (err)
netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err);
- mutex_unlock(&mdev->mlx5e_res.hw_objs.td.list_lock);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 89de92d06483..ebee52a8361a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -926,9 +926,10 @@ static int mlx5e_dcbnl_getbuffer(struct net_device *dev,
if (err)
return err;
- for (i = 0; i < MLX5E_MAX_BUFFER; i++)
+ for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++)
dcb_buffer->buffer_size[i] = port_buffer.buffer[i].size;
- dcb_buffer->total_size = port_buffer.port_buffer_size;
+ dcb_buffer->total_size = port_buffer.port_buffer_size -
+ port_buffer.internal_buffers_size;
return 0;
}
@@ -970,7 +971,7 @@ static int mlx5e_dcbnl_setbuffer(struct net_device *dev,
if (err)
return err;
- for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
+ for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) {
if (port_buffer.buffer[i].size != dcb_buffer->buffer_size[i]) {
changed |= MLX5E_PORT_BUFFER_SIZE;
buffer_size = dcb_buffer->buffer_size;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 2944691f06ad..a5bdf78955d7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -641,7 +641,7 @@ static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
}
static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
- struct mlx5e_rq *rq)
+ u32 xdp_frag_size, struct mlx5e_rq *rq)
{
struct mlx5_core_dev *mdev = c->mdev;
int err;
@@ -665,7 +665,8 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param
if (err)
return err;
- return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, c->napi.napi_id);
+ return __xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, c->napi.napi_id,
+ xdp_frag_size);
}
static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
@@ -727,26 +728,6 @@ static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq)
mlx5e_rq_shampo_hd_free(rq);
}
-static __be32 mlx5e_get_terminate_scatter_list_mkey(struct mlx5_core_dev *dev)
-{
- u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {};
- u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {};
- int res;
-
- if (!MLX5_CAP_GEN(dev, terminate_scatter_list_mkey))
- return MLX5_TERMINATE_SCATTER_LIST_LKEY;
-
- MLX5_SET(query_special_contexts_in, in, opcode,
- MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
- res = mlx5_cmd_exec_inout(dev, query_special_contexts, in, out);
- if (res)
- return MLX5_TERMINATE_SCATTER_LIST_LKEY;
-
- res = MLX5_GET(query_special_contexts_out, out,
- terminate_scatter_list_mkey);
- return cpu_to_be32(res);
-}
-
static int mlx5e_alloc_rq(struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
struct mlx5e_rq_param *rqp,
@@ -908,7 +889,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
/* check if num_frags is not a pow of two */
if (rq->wqe.info.num_frags < (1 << rq->wqe.info.log_num_frags)) {
wqe->data[f].byte_count = 0;
- wqe->data[f].lkey = mlx5e_get_terminate_scatter_list_mkey(mdev);
+ wqe->data[f].lkey = params->terminate_lkey_be;
wqe->data[f].addr = 0;
}
}
@@ -2260,7 +2241,7 @@ static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param
{
int err;
- err = mlx5e_init_rxq_rq(c, params, &c->rq);
+ err = mlx5e_init_rxq_rq(c, params, rq_params->xdp_frag_size, &c->rq);
if (err)
return err;
@@ -5007,6 +4988,8 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
/* RQ */
mlx5e_build_rq_params(mdev, params);
+ params->terminate_lkey_be = mlx5_core_get_terminate_scatter_list_mkey(mdev);
+
params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
/* CQ moderation params */
@@ -5279,12 +5262,16 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
mlx5e_timestamp_init(priv);
+ priv->dfs_root = debugfs_create_dir("nic",
+ mlx5_debugfs_get_dev_root(mdev));
+
fs = mlx5e_fs_init(priv->profile, mdev,
!test_bit(MLX5E_STATE_DESTROYING, &priv->state),
priv->dfs_root);
if (!fs) {
err = -ENOMEM;
mlx5_core_err(mdev, "FS initialization failed, %d\n", err);
+ debugfs_remove_recursive(priv->dfs_root);
return err;
}
priv->fs = fs;
@@ -5305,6 +5292,7 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
mlx5e_health_destroy_reporters(priv);
mlx5e_ktls_cleanup(priv);
mlx5e_fs_cleanup(priv->fs);
+ debugfs_remove_recursive(priv->dfs_root);
priv->fs = NULL;
}
@@ -5851,8 +5839,8 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv)
}
static int
-mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mdev,
- const struct mlx5e_profile *new_profile, void *new_ppriv)
+mlx5e_netdev_init_profile(struct net_device *netdev, struct mlx5_core_dev *mdev,
+ const struct mlx5e_profile *new_profile, void *new_ppriv)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
int err;
@@ -5868,6 +5856,25 @@ mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mde
err = new_profile->init(priv->mdev, priv->netdev);
if (err)
goto priv_cleanup;
+
+ return 0;
+
+priv_cleanup:
+ mlx5e_priv_cleanup(priv);
+ return err;
+}
+
+static int
+mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mdev,
+ const struct mlx5e_profile *new_profile, void *new_ppriv)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
+
+ err = mlx5e_netdev_init_profile(netdev, mdev, new_profile, new_ppriv);
+ if (err)
+ return err;
+
err = mlx5e_attach_netdev(priv);
if (err)
goto profile_cleanup;
@@ -5875,7 +5882,6 @@ mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mde
profile_cleanup:
new_profile->cleanup(priv);
-priv_cleanup:
mlx5e_priv_cleanup(priv);
return err;
}
@@ -5894,6 +5900,12 @@ int mlx5e_netdev_change_profile(struct mlx5e_priv *priv,
priv->profile->cleanup(priv);
mlx5e_priv_cleanup(priv);
+ if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ mlx5e_netdev_init_profile(netdev, mdev, new_profile, new_ppriv);
+ set_bit(MLX5E_STATE_DESTROYING, &priv->state);
+ return -EIO;
+ }
+
err = mlx5e_netdev_attach_profile(netdev, mdev, new_profile, new_ppriv);
if (err) { /* roll back to original profile */
netdev_warn(netdev, "%s: new profile init failed, %d\n", __func__, err);
@@ -5955,8 +5967,11 @@ static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state)
struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev;
- if (!netif_device_present(netdev))
+ if (!netif_device_present(netdev)) {
+ if (test_bit(MLX5E_STATE_DESTROYING, &priv->state))
+ mlx5e_destroy_mdev_resources(mdev);
return -ENODEV;
+ }
mlx5e_detach_netdev(priv);
mlx5e_destroy_mdev_resources(mdev);
@@ -6002,9 +6017,6 @@ static int mlx5e_probe(struct auxiliary_device *adev,
priv->profile = profile;
priv->ppriv = NULL;
- priv->dfs_root = debugfs_create_dir("nic",
- mlx5_debugfs_get_dev_root(priv->mdev));
-
err = profile->init(mdev, netdev);
if (err) {
mlx5_core_err(mdev, "mlx5e_nic_profile init failed, %d\n", err);
@@ -6033,7 +6045,6 @@ err_resume:
err_profile_cleanup:
profile->cleanup(priv);
err_destroy_netdev:
- debugfs_remove_recursive(priv->dfs_root);
mlx5e_destroy_netdev(priv);
err_devlink_port_unregister:
mlx5e_devlink_port_unregister(mlx5e_dev);
@@ -6053,7 +6064,6 @@ static void mlx5e_remove(struct auxiliary_device *adev)
unregister_netdev(priv->netdev);
mlx5e_suspend(adev, state);
priv->profile->cleanup(priv);
- debugfs_remove_recursive(priv->dfs_root);
mlx5e_destroy_netdev(priv);
mlx5e_devlink_port_unregister(mlx5e_dev);
mlx5e_destroy_devlink(mlx5e_dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 1fc386eccaf8..3e7041bd5705 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -30,6 +30,7 @@
* SOFTWARE.
*/
+#include <linux/debugfs.h>
#include <linux/mlx5/fs.h>
#include <net/switchdev.h>
#include <net/pkt_cls.h>
@@ -812,11 +813,15 @@ static int mlx5e_init_ul_rep(struct mlx5_core_dev *mdev,
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ priv->dfs_root = debugfs_create_dir("nic",
+ mlx5_debugfs_get_dev_root(mdev));
+
priv->fs = mlx5e_fs_init(priv->profile, mdev,
!test_bit(MLX5E_STATE_DESTROYING, &priv->state),
priv->dfs_root);
if (!priv->fs) {
netdev_err(priv->netdev, "FS allocation failed\n");
+ debugfs_remove_recursive(priv->dfs_root);
return -ENOMEM;
}
@@ -829,6 +834,7 @@ static int mlx5e_init_ul_rep(struct mlx5_core_dev *mdev,
static void mlx5e_cleanup_rep(struct mlx5e_priv *priv)
{
mlx5e_fs_cleanup(priv->fs);
+ debugfs_remove_recursive(priv->dfs_root);
priv->fs = NULL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 728b82ce4031..b9b1da751a3b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1439,6 +1439,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
mlx5e_hairpin_flow_del(priv, flow);
free_flow_post_acts(flow);
+ mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
kvfree(attr->parse_attr);
kfree(flow->attr);
@@ -1665,11 +1666,9 @@ bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_
int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
{
struct mlx5e_priv *out_priv, *route_priv;
- struct mlx5_devcom *devcom = NULL;
struct mlx5_core_dev *route_mdev;
struct mlx5_eswitch *esw;
u16 vhca_id;
- int err;
out_priv = netdev_priv(out_dev);
esw = out_priv->mdev->priv.eswitch;
@@ -1678,6 +1677,9 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro
vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
if (mlx5_lag_is_active(out_priv->mdev)) {
+ struct mlx5_devcom *devcom;
+ int err;
+
/* In lag case we may get devices from different eswitch instances.
* If we failed to get vport num, it means, mostly, that we on the wrong
* eswitch.
@@ -1686,101 +1688,16 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro
if (err != -ENOENT)
return err;
+ rcu_read_lock();
devcom = out_priv->mdev->priv.devcom;
- esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
- if (!esw)
- return -ENODEV;
- }
-
- err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
- if (devcom)
- mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
- return err;
-}
-
-static int
-set_encap_dests(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow,
- struct mlx5_flow_attr *attr,
- struct netlink_ext_ack *extack,
- bool *vf_tun)
-{
- struct mlx5e_tc_flow_parse_attr *parse_attr;
- struct mlx5_esw_flow_attr *esw_attr;
- struct net_device *encap_dev = NULL;
- struct mlx5e_rep_priv *rpriv;
- struct mlx5e_priv *out_priv;
- int out_index;
- int err = 0;
-
- if (!mlx5e_is_eswitch_flow(flow))
- return 0;
-
- parse_attr = attr->parse_attr;
- esw_attr = attr->esw_attr;
- *vf_tun = false;
-
- for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
- struct net_device *out_dev;
- int mirred_ifindex;
-
- if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
- continue;
-
- mirred_ifindex = parse_attr->mirred_ifindex[out_index];
- out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
- if (!out_dev) {
- NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
- err = -ENODEV;
- goto out;
- }
- err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
- extack, &encap_dev);
- dev_put(out_dev);
- if (err)
- goto out;
-
- if (esw_attr->dests[out_index].flags &
- MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
- !esw_attr->dest_int_port)
- *vf_tun = true;
+ esw = mlx5_devcom_get_peer_data_rcu(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ err = esw ? mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport) : -ENODEV;
+ rcu_read_unlock();
- out_priv = netdev_priv(encap_dev);
- rpriv = out_priv->ppriv;
- esw_attr->dests[out_index].rep = rpriv->rep;
- esw_attr->dests[out_index].mdev = out_priv->mdev;
- }
-
- if (*vf_tun && esw_attr->out_count > 1) {
- NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
- err = -EOPNOTSUPP;
- goto out;
+ return err;
}
-out:
- return err;
-}
-
-static void
-clean_encap_dests(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow,
- struct mlx5_flow_attr *attr)
-{
- struct mlx5_esw_flow_attr *esw_attr;
- int out_index;
-
- if (!mlx5e_is_eswitch_flow(flow))
- return;
-
- esw_attr = attr->esw_attr;
-
- for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
- if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
- continue;
-
- mlx5e_detach_encap(priv, flow, attr, out_index);
- kfree(attr->parse_attr->tun_info[out_index]);
- }
+ return mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
}
static int
@@ -1819,7 +1736,7 @@ post_process_attr(struct mlx5e_tc_flow *flow,
if (err)
goto err_out;
- err = set_encap_dests(flow->priv, flow, attr, extack, &vf_tun);
+ err = mlx5e_tc_tun_encap_dests_set(flow->priv, flow, attr, extack, &vf_tun);
if (err)
goto err_out;
@@ -3943,8 +3860,8 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
struct mlx5_flow_attr *prev_attr;
struct flow_action_entry *act;
struct mlx5e_tc_act *tc_act;
+ int err, i, i_split = 0;
bool is_missable;
- int err, i;
ns_type = mlx5e_get_flow_namespace(flow);
list_add(&attr->list, &flow->attrs);
@@ -3985,7 +3902,8 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
i < flow_action->num_entries - 1)) {
is_missable = tc_act->is_missable ? tc_act->is_missable(act) : false;
- err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
+ err = mlx5e_tc_act_post_parse(parse_state, flow_action, i_split, i, attr,
+ ns_type);
if (err)
goto out_free_post_acts;
@@ -3995,6 +3913,7 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
goto out_free_post_acts;
}
+ i_split = i + 1;
list_add(&attr->list, &flow->attrs);
}
@@ -4009,7 +3928,7 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
}
}
- err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
+ err = mlx5e_tc_act_post_parse(parse_state, flow_action, i_split, i, attr, ns_type);
if (err)
goto out_free_post_acts;
@@ -4323,7 +4242,7 @@ mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *a
if (attr->post_act_handle)
mlx5e_tc_post_act_del(get_post_action(flow->priv), attr->post_act_handle);
- clean_encap_dests(flow->priv, flow, attr);
+ mlx5e_tc_tun_encap_dests_unset(flow->priv, flow, attr);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
mlx5_fc_destroy(counter_dev, attr->counter);
@@ -5301,6 +5220,8 @@ int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv)
goto err_action_counter;
}
+ mlx5_esw_offloads_devcom_init(esw);
+
return 0;
err_action_counter:
@@ -5329,7 +5250,7 @@ void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv)
priv = netdev_priv(rpriv->netdev);
esw = priv->mdev->priv.eswitch;
- mlx5e_tc_clean_fdb_peer_flows(esw);
+ mlx5_esw_offloads_devcom_cleanup(esw);
mlx5e_tc_tun_cleanup(uplink_priv->encap);
@@ -5643,22 +5564,43 @@ bool mlx5e_tc_update_skb_nic(struct mlx5_cqe64 *cqe, struct sk_buff *skb)
0, NULL);
}
+static struct mapping_ctx *
+mlx5e_get_priv_obj_mapping(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tc_table *tc;
+ struct mlx5_eswitch *esw;
+ struct mapping_ctx *ctx;
+
+ if (is_mdev_switchdev_mode(priv->mdev)) {
+ esw = priv->mdev->priv.eswitch;
+ ctx = esw->offloads.reg_c0_obj_pool;
+ } else {
+ tc = mlx5e_fs_get_tc(priv->fs);
+ ctx = tc->mapping;
+ }
+
+ return ctx;
+}
+
int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr,
u64 act_miss_cookie, u32 *act_miss_mapping)
{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_mapped_obj mapped_obj = {};
+ struct mlx5_eswitch *esw;
struct mapping_ctx *ctx;
int err;
- ctx = esw->offloads.reg_c0_obj_pool;
-
+ ctx = mlx5e_get_priv_obj_mapping(priv);
mapped_obj.type = MLX5_MAPPED_OBJ_ACT_MISS;
mapped_obj.act_miss_cookie = act_miss_cookie;
err = mapping_add(ctx, &mapped_obj, act_miss_mapping);
if (err)
return err;
+ if (!is_mdev_switchdev_mode(priv->mdev))
+ return 0;
+
+ esw = priv->mdev->priv.eswitch;
attr->act_id_restore_rule = esw_add_restore_rule(esw, *act_miss_mapping);
if (IS_ERR(attr->act_id_restore_rule))
goto err_rule;
@@ -5673,10 +5615,9 @@ err_rule:
void mlx5e_tc_action_miss_mapping_put(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr,
u32 act_miss_mapping)
{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mapping_ctx *ctx;
+ struct mapping_ctx *ctx = mlx5e_get_priv_obj_mapping(priv);
- ctx = esw->offloads.reg_c0_obj_pool;
- mlx5_del_flow_rules(attr->act_id_restore_rule);
+ if (is_mdev_switchdev_mode(priv->mdev))
+ mlx5_del_flow_rules(attr->act_id_restore_rule);
mapping_remove(ctx, act_miss_mapping);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index df5e780e8e6a..c7eb6b238c2b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -762,6 +762,17 @@ static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_t
}
}
+void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq)
+{
+ if (netif_tx_queue_stopped(sq->txq) &&
+ mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) &&
+ mlx5e_ptpsq_fifo_has_room(sq) &&
+ !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
+ netif_tx_wake_queue(sq->txq);
+ sq->stats->wake++;
+ }
+}
+
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
{
struct mlx5e_sq_stats *stats;
@@ -861,13 +872,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
netdev_tx_completed_queue(sq->txq, npkts, nbytes);
- if (netif_tx_queue_stopped(sq->txq) &&
- mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) &&
- mlx5e_ptpsq_fifo_has_room(sq) &&
- !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
- netif_tx_wake_queue(sq->txq);
- stats->wake++;
- }
+ mlx5e_txqsq_wake(sq);
return (i == MLX5E_TX_CQ_POLL_BUDGET);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index a50bfda18e96..fbb2d963fb7e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -161,20 +161,22 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
}
}
+ /* budget=0 means we may be in IRQ context, do as little as possible */
+ if (unlikely(!budget))
+ goto out;
+
busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq);
if (c->xdp)
busy |= mlx5e_poll_xdpsq_cq(&c->rq_xdpsq.cq);
- if (likely(budget)) { /* budget=0 means: don't poll rx rings */
- if (xsk_open)
- work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget);
+ if (xsk_open)
+ work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget);
- if (likely(budget - work_done))
- work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done);
+ if (likely(budget - work_done))
+ work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done);
- busy |= work_done == budget;
- }
+ busy |= work_done == budget;
mlx5e_poll_ico_cq(&c->icosq.cq);
if (mlx5e_poll_ico_cq(&c->async_icosq.cq))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 1c35d721a31d..3db4866d7880 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -824,7 +824,7 @@ static int comp_irqs_request_pci(struct mlx5_core_dev *dev)
ncomp_eqs = table->num_comp_eqs;
cpus = kcalloc(ncomp_eqs, sizeof(*cpus), GFP_KERNEL);
if (!cpus)
- ret = -ENOMEM;
+ return -ENOMEM;
i = 0;
rcu_read_lock();
@@ -1104,7 +1104,7 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
struct mlx5_eq_table *table = dev->priv.eq_table;
mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
- mlx5_irq_table_destroy(dev);
+ mlx5_irq_table_free_irqs(dev);
mutex_unlock(&table->lock);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 1a042c981713..add6cfa432a5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -342,6 +342,7 @@ struct mlx5_eswitch {
u32 large_group_num;
} params;
struct blocking_notifier_head n_head;
+ bool paired[MLX5_MAX_PORTS];
};
void esw_offloads_disable(struct mlx5_eswitch *esw);
@@ -369,6 +370,8 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs);
void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf);
void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw);
void mlx5_eswitch_disable(struct mlx5_eswitch *esw);
+void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw);
+void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw);
int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
u16 vport, const u8 *mac);
int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
@@ -767,6 +770,8 @@ static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; }
static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) {}
static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {}
+static inline void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw) {}
+static inline void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) {}
static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; }
static inline
int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { return 0; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 69215ffb9999..8d19c20d3447 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -2742,6 +2742,9 @@ static int mlx5_esw_offloads_devcom_event(int event,
mlx5_eswitch_vport_match_metadata_enabled(peer_esw))
break;
+ if (esw->paired[mlx5_get_dev_index(peer_esw->dev)])
+ break;
+
err = mlx5_esw_offloads_set_ns_peer(esw, peer_esw, true);
if (err)
goto err_out;
@@ -2753,14 +2756,18 @@ static int mlx5_esw_offloads_devcom_event(int event,
if (err)
goto err_pair;
+ esw->paired[mlx5_get_dev_index(peer_esw->dev)] = true;
+ peer_esw->paired[mlx5_get_dev_index(esw->dev)] = true;
mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true);
break;
case ESW_OFFLOADS_DEVCOM_UNPAIR:
- if (!mlx5_devcom_is_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS))
+ if (!esw->paired[mlx5_get_dev_index(peer_esw->dev)])
break;
mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false);
+ esw->paired[mlx5_get_dev_index(peer_esw->dev)] = false;
+ peer_esw->paired[mlx5_get_dev_index(esw->dev)] = false;
mlx5_esw_offloads_unpair(peer_esw);
mlx5_esw_offloads_unpair(esw);
mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false);
@@ -2779,7 +2786,7 @@ err_out:
return err;
}
-static void esw_offloads_devcom_init(struct mlx5_eswitch *esw)
+void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw)
{
struct mlx5_devcom *devcom = esw->dev->priv.devcom;
@@ -2802,7 +2809,7 @@ static void esw_offloads_devcom_init(struct mlx5_eswitch *esw)
ESW_OFFLOADS_DEVCOM_PAIR, esw);
}
-static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
+void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
{
struct mlx5_devcom *devcom = esw->dev->priv.devcom;
@@ -3250,8 +3257,6 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
if (err)
goto err_vports;
- esw_offloads_devcom_init(esw);
-
return 0;
err_vports:
@@ -3292,7 +3297,6 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
void esw_offloads_disable(struct mlx5_eswitch *esw)
{
- esw_offloads_devcom_cleanup(esw);
mlx5_eswitch_disable_pf_vf_vports(esw);
esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK);
esw_set_passing_vport_metadata(esw, false);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 144e59480686..ec83e6483d1a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -511,10 +511,11 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
struct mlx5_flow_rule *dst;
void *in_flow_context, *vlan;
void *in_match_value;
+ int reformat_id = 0;
unsigned int inlen;
int dst_cnt_size;
+ u32 *in, action;
void *in_dests;
- u32 *in;
int err;
if (mlx5_set_extended_dest(dev, fte, &extended_dest))
@@ -553,22 +554,42 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(flow_context, in_flow_context, extended_destination,
extended_dest);
- if (extended_dest) {
- u32 action;
- action = fte->action.action &
- ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
- MLX5_SET(flow_context, in_flow_context, action, action);
- } else {
- MLX5_SET(flow_context, in_flow_context, action,
- fte->action.action);
- if (fte->action.pkt_reformat)
- MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
- fte->action.pkt_reformat->id);
+ action = fte->action.action;
+ if (extended_dest)
+ action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+
+ MLX5_SET(flow_context, in_flow_context, action, action);
+
+ if (!extended_dest && fte->action.pkt_reformat) {
+ struct mlx5_pkt_reformat *pkt_reformat = fte->action.pkt_reformat;
+
+ if (pkt_reformat->owner == MLX5_FLOW_RESOURCE_OWNER_SW) {
+ reformat_id = mlx5_fs_dr_action_get_pkt_reformat_id(pkt_reformat);
+ if (reformat_id < 0) {
+ mlx5_core_err(dev,
+ "Unsupported SW-owned pkt_reformat type (%d) in FW-owned table\n",
+ pkt_reformat->reformat_type);
+ err = reformat_id;
+ goto err_out;
+ }
+ } else {
+ reformat_id = fte->action.pkt_reformat->id;
+ }
}
- if (fte->action.modify_hdr)
+
+ MLX5_SET(flow_context, in_flow_context, packet_reformat_id, (u32)reformat_id);
+
+ if (fte->action.modify_hdr) {
+ if (fte->action.modify_hdr->owner == MLX5_FLOW_RESOURCE_OWNER_SW) {
+ mlx5_core_err(dev, "Can't use SW-owned modify_hdr in FW-owned table\n");
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+
MLX5_SET(flow_context, in_flow_context, modify_header_id,
fte->action.modify_hdr->id);
+ }
MLX5_SET(flow_context, in_flow_context, encrypt_decrypt_type,
fte->action.crypto.type);
@@ -885,6 +906,8 @@ static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
pkt_reformat->id = MLX5_GET(alloc_packet_reformat_context_out,
out, packet_reformat_id);
+ pkt_reformat->owner = MLX5_FLOW_RESOURCE_OWNER_FW;
+
kfree(in);
return err;
}
@@ -969,6 +992,7 @@ static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
modify_hdr->id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id);
+ modify_hdr->owner = MLX5_FLOW_RESOURCE_OWNER_FW;
kfree(in);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index f137a0611b77..b043190e50a8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -54,8 +54,14 @@ struct mlx5_flow_definer {
u32 id;
};
+enum mlx5_flow_resource_owner {
+ MLX5_FLOW_RESOURCE_OWNER_FW,
+ MLX5_FLOW_RESOURCE_OWNER_SW,
+};
+
struct mlx5_modify_hdr {
enum mlx5_flow_namespace_type ns_type;
+ enum mlx5_flow_resource_owner owner;
union {
struct mlx5_fs_dr_action action;
u32 id;
@@ -65,6 +71,7 @@ struct mlx5_modify_hdr {
struct mlx5_pkt_reformat {
enum mlx5_flow_namespace_type ns_type;
int reformat_type; /* from mlx5_ifc */
+ enum mlx5_flow_resource_owner owner;
union {
struct mlx5_fs_dr_action action;
u32 id;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
index adefde3ea941..b7d779d08d83 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
@@ -3,6 +3,7 @@
#include <linux/mlx5/vport.h>
#include "lib/devcom.h"
+#include "mlx5_core.h"
static LIST_HEAD(devcom_list);
@@ -13,7 +14,7 @@ static LIST_HEAD(devcom_list);
struct mlx5_devcom_component {
struct {
- void *data;
+ void __rcu *data;
} device[MLX5_DEVCOM_PORTS_SUPPORTED];
mlx5_devcom_event_handler_t handler;
@@ -77,6 +78,7 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev)
if (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_DEVCOM_PORTS_SUPPORTED)
return NULL;
+ mlx5_dev_list_lock();
sguid0 = mlx5_query_nic_system_image_guid(dev);
list_for_each_entry(iter, &devcom_list, list) {
struct mlx5_core_dev *tmp_dev = NULL;
@@ -102,8 +104,10 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev)
if (!priv) {
priv = mlx5_devcom_list_alloc();
- if (!priv)
- return ERR_PTR(-ENOMEM);
+ if (!priv) {
+ devcom = ERR_PTR(-ENOMEM);
+ goto out;
+ }
idx = 0;
new_priv = true;
@@ -112,13 +116,16 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev)
priv->devs[idx] = dev;
devcom = mlx5_devcom_alloc(priv, idx);
if (!devcom) {
- kfree(priv);
- return ERR_PTR(-ENOMEM);
+ if (new_priv)
+ kfree(priv);
+ devcom = ERR_PTR(-ENOMEM);
+ goto out;
}
if (new_priv)
list_add(&priv->list, &devcom_list);
-
+out:
+ mlx5_dev_list_unlock();
return devcom;
}
@@ -131,6 +138,7 @@ void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom)
if (IS_ERR_OR_NULL(devcom))
return;
+ mlx5_dev_list_lock();
priv = devcom->priv;
priv->devs[devcom->idx] = NULL;
@@ -141,10 +149,12 @@ void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom)
break;
if (i != MLX5_DEVCOM_PORTS_SUPPORTED)
- return;
+ goto out;
list_del(&priv->list);
kfree(priv);
+out:
+ mlx5_dev_list_unlock();
}
void mlx5_devcom_register_component(struct mlx5_devcom *devcom,
@@ -162,7 +172,7 @@ void mlx5_devcom_register_component(struct mlx5_devcom *devcom,
comp = &devcom->priv->components[id];
down_write(&comp->sem);
comp->handler = handler;
- comp->device[devcom->idx].data = data;
+ rcu_assign_pointer(comp->device[devcom->idx].data, data);
up_write(&comp->sem);
}
@@ -176,8 +186,9 @@ void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom,
comp = &devcom->priv->components[id];
down_write(&comp->sem);
- comp->device[devcom->idx].data = NULL;
+ RCU_INIT_POINTER(comp->device[devcom->idx].data, NULL);
up_write(&comp->sem);
+ synchronize_rcu();
}
int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
@@ -193,12 +204,15 @@ int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
comp = &devcom->priv->components[id];
down_write(&comp->sem);
- for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
- if (i != devcom->idx && comp->device[i].data) {
- err = comp->handler(event, comp->device[i].data,
- event_data);
+ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) {
+ void *data = rcu_dereference_protected(comp->device[i].data,
+ lockdep_is_held(&comp->sem));
+
+ if (i != devcom->idx && data) {
+ err = comp->handler(event, data, event_data);
break;
}
+ }
up_write(&comp->sem);
return err;
@@ -213,7 +227,7 @@ void mlx5_devcom_set_paired(struct mlx5_devcom *devcom,
comp = &devcom->priv->components[id];
WARN_ON(!rwsem_is_locked(&comp->sem));
- comp->paired = paired;
+ WRITE_ONCE(comp->paired, paired);
}
bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
@@ -222,7 +236,7 @@ bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
if (IS_ERR_OR_NULL(devcom))
return false;
- return devcom->priv->components[id].paired;
+ return READ_ONCE(devcom->priv->components[id].paired);
}
void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
@@ -236,7 +250,7 @@ void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
comp = &devcom->priv->components[id];
down_read(&comp->sem);
- if (!comp->paired) {
+ if (!READ_ONCE(comp->paired)) {
up_read(&comp->sem);
return NULL;
}
@@ -245,7 +259,29 @@ void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
if (i != devcom->idx)
break;
- return comp->device[i].data;
+ return rcu_dereference_protected(comp->device[i].data, lockdep_is_held(&comp->sem));
+}
+
+void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id)
+{
+ struct mlx5_devcom_component *comp;
+ int i;
+
+ if (IS_ERR_OR_NULL(devcom))
+ return NULL;
+
+ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
+ if (i != devcom->idx)
+ break;
+
+ comp = &devcom->priv->components[id];
+ /* This can change concurrently, however 'data' pointer will remain
+ * valid for the duration of RCU read section.
+ */
+ if (!READ_ONCE(comp->paired))
+ return NULL;
+
+ return rcu_dereference(comp->device[i].data);
}
void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
index 94313c18bb64..9a496f4722da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
@@ -41,6 +41,7 @@ bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
enum mlx5_devcom_components id);
+void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id);
void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,
enum mlx5_devcom_components id);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 995eb2d5ace0..d6ee016deae1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -923,7 +923,6 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
}
mlx5_pci_vsc_init(dev);
- dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev);
return 0;
err_clr_master:
@@ -1049,7 +1048,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
dev->dm = mlx5_dm_create(dev);
if (IS_ERR(dev->dm))
- mlx5_core_warn(dev, "Failed to init device memory%d\n", err);
+ mlx5_core_warn(dev, "Failed to init device memory %ld\n", PTR_ERR(dev->dm));
dev->tracer = mlx5_fw_tracer_create(dev);
dev->hv_vhca = mlx5_hv_vhca_create(dev);
@@ -1155,6 +1154,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout
goto err_cmd_cleanup;
}
+ dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev);
mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_UP);
mlx5_start_health_poll(dev);
@@ -1802,15 +1802,16 @@ static void remove_one(struct pci_dev *pdev)
struct devlink *devlink = priv_to_devlink(dev);
set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
- /* mlx5_drain_fw_reset() is using devlink APIs. Hence, we must drain
- * fw_reset before unregistering the devlink.
+ /* mlx5_drain_fw_reset() and mlx5_drain_health_wq() are using
+ * devlink notify APIs.
+ * Hence, we must drain them before unregistering the devlink.
*/
mlx5_drain_fw_reset(dev);
+ mlx5_drain_health_wq(dev);
devlink_unregister(devlink);
mlx5_sriov_disable(pdev);
mlx5_thermal_uninit(dev);
mlx5_crdump_disable(dev);
- mlx5_drain_health_wq(dev);
mlx5_uninit_one(dev);
mlx5_pci_close(dev);
mlx5_mdev_uninit(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 1d879374acaa..229520405d4a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -276,18 +276,6 @@ static inline bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev)
return pci_num_vf(dev->pdev) ? true : false;
}
-static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
-{
- /* LACP owner conditions:
- * 1) Function is physical.
- * 2) LAG is supported by FW.
- * 3) LAG is managed by driver (currently the only option).
- */
- return MLX5_CAP_GEN(dev, vport_group_manager) &&
- (MLX5_CAP_GEN(dev, num_lag_ports) > 1) &&
- MLX5_CAP_GEN(dev, lag_master);
-}
-
int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev);
static inline int mlx5_rescan_drivers(struct mlx5_core_dev *dev)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
index efd0c299c5c7..aa403a5ea34e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
@@ -15,6 +15,7 @@ int mlx5_irq_table_init(struct mlx5_core_dev *dev);
void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
int mlx5_irq_table_create(struct mlx5_core_dev *dev);
void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
+void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev);
int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table);
int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table);
struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 9d735c343a3b..678f0be81375 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -32,6 +32,7 @@
#include <linux/kernel.h>
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/qp.h>
#include "mlx5_core.h"
int mlx5_core_create_mkey(struct mlx5_core_dev *dev, u32 *mkey, u32 *in,
@@ -122,3 +123,23 @@ int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num)
return mlx5_cmd_exec_in(dev, destroy_psv, in);
}
EXPORT_SYMBOL(mlx5_core_destroy_psv);
+
+__be32 mlx5_core_get_terminate_scatter_list_mkey(struct mlx5_core_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {};
+ u32 mkey;
+
+ if (!MLX5_CAP_GEN(dev, terminate_scatter_list_mkey))
+ return MLX5_TERMINATE_SCATTER_LIST_LKEY;
+
+ MLX5_SET(query_special_contexts_in, in, opcode,
+ MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
+ if (mlx5_cmd_exec_inout(dev, query_special_contexts, in, out))
+ return MLX5_TERMINATE_SCATTER_LIST_LKEY;
+
+ mkey = MLX5_GET(query_special_contexts_out, out,
+ terminate_scatter_list_mkey);
+ return cpu_to_be32(mkey);
+}
+EXPORT_SYMBOL(mlx5_core_get_terminate_scatter_list_mkey);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 2245d3b2f393..98412bd5a696 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -32,6 +32,7 @@ struct mlx5_irq {
struct mlx5_irq_pool *pool;
int refcount;
struct msi_map map;
+ u32 pool_index;
};
struct mlx5_irq_table {
@@ -125,14 +126,22 @@ out:
return ret;
}
-static void irq_release(struct mlx5_irq *irq)
+/* mlx5_system_free_irq - Free an IRQ
+ * @irq: IRQ to free
+ *
+ * Free the IRQ and other resources such as rmap from the system.
+ * BUT doesn't free or remove reference from mlx5.
+ * This function is very important for the shutdown flow, where we need to
+ * cleanup system resoruces but keep mlx5 objects alive,
+ * see mlx5_irq_table_free_irqs().
+ */
+static void mlx5_system_free_irq(struct mlx5_irq *irq)
{
struct mlx5_irq_pool *pool = irq->pool;
#ifdef CONFIG_RFS_ACCEL
struct cpu_rmap *rmap;
#endif
- xa_erase(&pool->irqs, irq->map.index);
/* free_irq requires that affinity_hint and rmap will be cleared before
* calling it. To satisfy this requirement, we call
* irq_cpu_rmap_remove() to remove the notifier
@@ -140,14 +149,22 @@ static void irq_release(struct mlx5_irq *irq)
irq_update_affinity_hint(irq->map.virq, NULL);
#ifdef CONFIG_RFS_ACCEL
rmap = mlx5_eq_table_get_rmap(pool->dev);
- if (rmap && irq->map.index)
+ if (rmap)
irq_cpu_rmap_remove(rmap, irq->map.virq);
#endif
- free_cpumask_var(irq->mask);
free_irq(irq->map.virq, &irq->nh);
if (irq->map.index && pci_msix_can_alloc_dyn(pool->dev->pdev))
pci_msix_free_irq(pool->dev->pdev, irq->map);
+}
+
+static void irq_release(struct mlx5_irq *irq)
+{
+ struct mlx5_irq_pool *pool = irq->pool;
+
+ xa_erase(&pool->irqs, irq->pool_index);
+ mlx5_system_free_irq(irq);
+ free_cpumask_var(irq->mask);
kfree(irq);
}
@@ -231,12 +248,13 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
if (!irq)
return ERR_PTR(-ENOMEM);
if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) {
- /* The vector at index 0 was already allocated.
- * Just get the irq number. If dynamic irq is not supported
- * vectors have also been allocated.
+ /* The vector at index 0 is always statically allocated. If
+ * dynamic irq is not supported all vectors are statically
+ * allocated. In both cases just get the irq number and set
+ * the index.
*/
irq->map.virq = pci_irq_vector(dev->pdev, i);
- irq->map.index = 0;
+ irq->map.index = i;
} else {
irq->map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, af_desc);
if (!irq->map.virq) {
@@ -276,11 +294,11 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
}
irq->pool = pool;
irq->refcount = 1;
- irq->map.index = i;
- err = xa_err(xa_store(&pool->irqs, irq->map.index, irq, GFP_KERNEL));
+ irq->pool_index = i;
+ err = xa_err(xa_store(&pool->irqs, irq->pool_index, irq, GFP_KERNEL));
if (err) {
mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
- irq->map.index, err);
+ irq->pool_index, err);
goto err_xa;
}
return irq;
@@ -563,17 +581,23 @@ void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs)
int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
struct mlx5_irq **irqs, struct cpu_rmap **rmap)
{
+ struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
+ struct mlx5_irq_pool *pool = table->pcif_pool;
struct irq_affinity_desc af_desc;
struct mlx5_irq *irq;
+ int offset = 1;
int i;
- af_desc.is_managed = 1;
+ if (!pool->xa_num_irqs.max)
+ offset = 0;
+
+ af_desc.is_managed = false;
for (i = 0; i < nirqs; i++) {
+ cpumask_clear(&af_desc.mask);
cpumask_set_cpu(cpus[i], &af_desc.mask);
- irq = mlx5_irq_request(dev, i + 1, &af_desc, rmap);
+ irq = mlx5_irq_request(dev, i + offset, &af_desc, rmap);
if (IS_ERR(irq))
break;
- cpumask_clear(&af_desc.mask);
irqs[i] = irq;
}
@@ -691,6 +715,25 @@ static void irq_pools_destroy(struct mlx5_irq_table *table)
irq_pool_free(table->pcif_pool);
}
+static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool)
+{
+ struct mlx5_irq *irq;
+ unsigned long index;
+
+ xa_for_each(&pool->irqs, index, irq)
+ mlx5_system_free_irq(irq);
+
+}
+
+static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table)
+{
+ if (table->sf_ctrl_pool) {
+ mlx5_irq_pool_free_irqs(table->sf_comp_pool);
+ mlx5_irq_pool_free_irqs(table->sf_ctrl_pool);
+ }
+ mlx5_irq_pool_free_irqs(table->pcif_pool);
+}
+
/* irq_table API */
int mlx5_irq_table_init(struct mlx5_core_dev *dev)
@@ -774,6 +817,17 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
pci_free_irq_vectors(dev->pdev);
}
+void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev)
+{
+ struct mlx5_irq_table *table = dev->priv.irq_table;
+
+ if (mlx5_core_is_sf(dev))
+ return;
+
+ mlx5_irq_pools_free_irqs(table);
+ pci_free_irq_vectors(dev->pdev);
+}
+
int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
{
if (table->sf_comp_pool)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
index e2f26d0bc615..0692363cf80e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
@@ -63,6 +63,7 @@ static void mlx5_sf_dev_remove(struct auxiliary_device *adev)
struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
struct devlink *devlink = priv_to_devlink(sf_dev->mdev);
+ mlx5_drain_health_wq(sf_dev->mdev);
devlink_unregister(devlink);
mlx5_uninit_one(sf_dev->mdev);
iounmap(sf_dev->mdev->iseg);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
index 0eb9a8d7f282..0f783e7906cb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -1421,9 +1421,13 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn,
}
case DR_ACTION_TYP_TNL_L3_TO_L2:
{
- u8 hw_actions[DR_ACTION_CACHE_LINE_SIZE] = {};
+ u8 *hw_actions;
int ret;
+ hw_actions = kzalloc(DR_ACTION_CACHE_LINE_SIZE, GFP_KERNEL);
+ if (!hw_actions)
+ return -ENOMEM;
+
ret = mlx5dr_ste_set_action_decap_l3_list(dmn->ste_ctx,
data, data_sz,
hw_actions,
@@ -1431,6 +1435,7 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn,
&action->rewrite->num_of_actions);
if (ret) {
mlx5dr_dbg(dmn, "Failed creating decap l3 action list\n");
+ kfree(hw_actions);
return ret;
}
@@ -1440,6 +1445,7 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn,
ret = mlx5dr_ste_alloc_modify_hdr(action);
if (ret) {
mlx5dr_dbg(dmn, "Failed preparing reformat data\n");
+ kfree(hw_actions);
return ret;
}
return 0;
@@ -2129,6 +2135,11 @@ mlx5dr_action_create_aso(struct mlx5dr_domain *dmn, u32 obj_id,
return action;
}
+u32 mlx5dr_action_get_pkt_reformat_id(struct mlx5dr_action *action)
+{
+ return action->reformat->id;
+}
+
int mlx5dr_action_destroy(struct mlx5dr_action *action)
{
if (WARN_ON_ONCE(refcount_read(&action->refcount) > 1))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
index 3835ba3f4dda..1aa525e509f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -117,6 +117,8 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
caps->gvmi = MLX5_CAP_GEN(mdev, vhca_id);
caps->flex_protocols = MLX5_CAP_GEN(mdev, flex_parser_protocols);
caps->sw_format_ver = MLX5_CAP_GEN(mdev, steering_format_version);
+ caps->roce_caps.fl_rc_qp_when_roce_disabled =
+ MLX5_CAP_GEN(mdev, fl_rc_qp_when_roce_disabled);
if (MLX5_CAP_GEN(mdev, roce)) {
err = dr_cmd_query_nic_vport_roce_en(mdev, 0, &roce_en);
@@ -124,7 +126,7 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
return err;
caps->roce_caps.roce_en = roce_en;
- caps->roce_caps.fl_rc_qp_when_roce_disabled =
+ caps->roce_caps.fl_rc_qp_when_roce_disabled |=
MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_disabled);
caps->roce_caps.fl_rc_qp_when_roce_enabled =
MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_enabled);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c
index 13e06a6a6b22..d6947fe13d56 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c
@@ -213,6 +213,8 @@ struct mlx5dr_ptrn_mgr *mlx5dr_ptrn_mgr_create(struct mlx5dr_domain *dmn)
}
INIT_LIST_HEAD(&mgr->ptrn_list);
+ mutex_init(&mgr->modify_hdr_mutex);
+
return mgr;
free_mgr:
@@ -237,5 +239,6 @@ void mlx5dr_ptrn_mgr_destroy(struct mlx5dr_ptrn_mgr *mgr)
}
mlx5dr_icm_pool_destroy(mgr->ptrn_icm_pool);
+ mutex_destroy(&mgr->modify_hdr_mutex);
kfree(mgr);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index 9413aaf51251..e94fbb015efa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -15,7 +15,8 @@ static u32 dr_ste_crc32_calc(const void *input_data, size_t length)
{
u32 crc = crc32(0, input_data, length);
- return (__force u32)htonl(crc);
+ return (__force u32)((crc >> 24) & 0xff) | ((crc << 8) & 0xff0000) |
+ ((crc >> 8) & 0xff00) | ((crc << 24) & 0xff000000);
}
bool mlx5dr_ste_supp_ttl_cs_recalc(struct mlx5dr_cmd_caps *caps)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index 984653756779..cc215beb7436 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -331,8 +331,16 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
}
if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) {
- bool is_decap = fte->action.pkt_reformat->reformat_type ==
- MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
+ bool is_decap;
+
+ if (fte->action.pkt_reformat->owner == MLX5_FLOW_RESOURCE_OWNER_FW) {
+ err = -EINVAL;
+ mlx5dr_err(domain, "FW-owned reformat can't be used in SW rule\n");
+ goto free_actions;
+ }
+
+ is_decap = fte->action.pkt_reformat->reformat_type ==
+ MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
if (is_decap)
actions[num_actions++] =
@@ -661,6 +669,7 @@ static int mlx5_cmd_dr_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns
return -EINVAL;
}
+ pkt_reformat->owner = MLX5_FLOW_RESOURCE_OWNER_SW;
pkt_reformat->action.dr_action = action;
return 0;
@@ -691,6 +700,7 @@ static int mlx5_cmd_dr_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
return -EINVAL;
}
+ modify_hdr->owner = MLX5_FLOW_RESOURCE_OWNER_SW;
modify_hdr->action.dr_action = action;
return 0;
@@ -816,6 +826,19 @@ static u32 mlx5_cmd_dr_get_capabilities(struct mlx5_flow_root_namespace *ns,
return steering_caps;
}
+int mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat)
+{
+ switch (pkt_reformat->reformat_type) {
+ case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
+ case MLX5_REFORMAT_TYPE_L2_TO_NVGRE:
+ case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
+ case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
+ case MLX5_REFORMAT_TYPE_INSERT_HDR:
+ return mlx5dr_action_get_pkt_reformat_id(pkt_reformat->action.dr_action);
+ }
+ return -EOPNOTSUPP;
+}
+
bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev)
{
return mlx5dr_is_supported(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h
index d168622063d5..99a3b2eff6b8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h
@@ -38,6 +38,8 @@ struct mlx5_fs_dr_table {
bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev);
+int mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat);
+
const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void);
#else
@@ -47,6 +49,11 @@ static inline const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void)
return NULL;
}
+static inline u32 mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat)
+{
+ return 0;
+}
+
static inline bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev)
{
return false;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
index 9afd268a2573..d1c04f43d86d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
@@ -150,6 +150,8 @@ mlx5dr_action_create_dest_match_range(struct mlx5dr_domain *dmn,
int mlx5dr_action_destroy(struct mlx5dr_action *action);
+u32 mlx5dr_action_get_pkt_reformat_id(struct mlx5dr_action *action);
+
int mlx5dr_definer_get(struct mlx5dr_domain *dmn, u16 format_id,
u8 *dw_selectors, u8 *byte_selectors,
u8 *match_mask, u32 *definer_id);
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
index afa3b92a6905..0d5a41a2ae01 100644
--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
@@ -245,12 +245,6 @@ static bool mlxbf_gige_rx_packet(struct mlxbf_gige *priv, int *rx_pkts)
skb = priv->rx_skb[rx_pi_rem];
- skb_put(skb, datalen);
-
- skb->ip_summed = CHECKSUM_NONE; /* device did not checksum packet */
-
- skb->protocol = eth_type_trans(skb, netdev);
-
/* Alloc another RX SKB for this same index */
rx_skb = mlxbf_gige_alloc_skb(priv, MLXBF_GIGE_DEFAULT_BUF_SZ,
&rx_buf_dma, DMA_FROM_DEVICE);
@@ -259,6 +253,13 @@ static bool mlxbf_gige_rx_packet(struct mlxbf_gige *priv, int *rx_pkts)
priv->rx_skb[rx_pi_rem] = rx_skb;
dma_unmap_single(priv->dev, *rx_wqe_addr,
MLXBF_GIGE_DEFAULT_BUF_SZ, DMA_FROM_DEVICE);
+
+ skb_put(skb, datalen);
+
+ skb->ip_summed = CHECKSUM_NONE; /* device did not checksum packet */
+
+ skb->protocol = eth_type_trans(skb, netdev);
+
*rx_wqe_addr = rx_buf_dma;
} else if (rx_cqe & MLXBF_GIGE_RX_CQE_PKT_STATUS_MAC_ERR) {
priv->stats.rx_mac_errors++;
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index 2b6e046e1d10..ee2698698d71 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -1039,6 +1039,16 @@ static int lan966x_reset_switch(struct lan966x *lan966x)
reset_control_reset(switch_reset);
+ /* Don't reinitialize the switch core, if it is already initialized. In
+ * case it is initialized twice, some pointers inside the queue system
+ * in HW will get corrupted and then after a while the queue system gets
+ * full and no traffic is passing through the switch. The issue is seen
+ * when loading and unloading the driver and sending traffic through the
+ * switch.
+ */
+ if (lan_rd(lan966x, SYS_RESET_CFG) & SYS_RESET_CFG_CORE_ENA)
+ return 0;
+
lan_wr(SYS_RESET_CFG_CORE_ENA_SET(0), lan966x, SYS_RESET_CFG);
lan_wr(SYS_RAM_INIT_RAM_INIT_SET(1), lan966x, SYS_RAM_INIT);
ret = readx_poll_timeout(lan966x_ram_init, lan966x,
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 06d6292e09b3..d907727c7b7a 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1279,8 +1279,6 @@ static void mana_poll_tx_cq(struct mana_cq *cq)
if (comp_read < 1)
return;
- apc->eth_stats.tx_cqes = comp_read;
-
for (i = 0; i < comp_read; i++) {
struct mana_tx_comp_oob *cqe_oob;
@@ -1363,8 +1361,6 @@ static void mana_poll_tx_cq(struct mana_cq *cq)
WARN_ON_ONCE(1);
cq->work_done = pkt_transmitted;
-
- apc->eth_stats.tx_cqes -= pkt_transmitted;
}
static void mana_post_pkt_rxq(struct mana_rxq *rxq)
@@ -1626,15 +1622,11 @@ static void mana_poll_rx_cq(struct mana_cq *cq)
{
struct gdma_comp *comp = cq->gdma_comp_buf;
struct mana_rxq *rxq = cq->rxq;
- struct mana_port_context *apc;
int comp_read, i;
- apc = netdev_priv(rxq->ndev);
-
comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER);
WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER);
- apc->eth_stats.rx_cqes = comp_read;
rxq->xdp_flush = false;
for (i = 0; i < comp_read; i++) {
@@ -1646,8 +1638,6 @@ static void mana_poll_rx_cq(struct mana_cq *cq)
return;
mana_process_rx_cqe(rxq, cq, &comp[i]);
-
- apc->eth_stats.rx_cqes--;
}
if (rxq->xdp_flush)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
index a64c81410dc1..0dc78679f620 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
@@ -13,11 +13,9 @@ static const struct {
} mana_eth_stats[] = {
{"stop_queue", offsetof(struct mana_ethtool_stats, stop_queue)},
{"wake_queue", offsetof(struct mana_ethtool_stats, wake_queue)},
- {"tx_cqes", offsetof(struct mana_ethtool_stats, tx_cqes)},
{"tx_cq_err", offsetof(struct mana_ethtool_stats, tx_cqe_err)},
{"tx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
tx_cqe_unknown_type)},
- {"rx_cqes", offsetof(struct mana_ethtool_stats, rx_cqes)},
{"rx_coalesced_err", offsetof(struct mana_ethtool_stats,
rx_coalesced_err)},
{"rx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
diff --git a/drivers/net/ethernet/netronome/nfp/nic/main.h b/drivers/net/ethernet/netronome/nfp/nic/main.h
index 094374df42b8..38b8b10b03cd 100644
--- a/drivers/net/ethernet/netronome/nfp/nic/main.h
+++ b/drivers/net/ethernet/netronome/nfp/nic/main.h
@@ -8,7 +8,7 @@
#ifdef CONFIG_DCB
/* DCB feature definitions */
-#define NFP_NET_MAX_DSCP 4
+#define NFP_NET_MAX_DSCP 64
#define NFP_NET_MAX_TC IEEE_8021QAZ_MAX_TCS
#define NFP_NET_MAX_PRIO 8
#define NFP_DCB_CFG_STRIDE 256
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 0605d1ee490d..7a549b834e97 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -6138,6 +6138,7 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
return 0;
out_error:
+ nv_mgmt_release_sema(dev);
if (phystate_orig)
writel(phystate|NVREG_ADAPTCTL_RUNNING, base + NvRegAdapterControl);
out_freering:
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 2edd6bf64a3c..7776d3bdd459 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -1903,7 +1903,7 @@ void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats)
{
u32 i;
- if (!cdev) {
+ if (!cdev || cdev->recov_in_prog) {
memset(stats, 0, sizeof(*stats));
return;
}
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index f9931ecb7baa..4d83ceebdc49 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -269,6 +269,10 @@ struct qede_dev {
#define QEDE_ERR_WARN 3
struct qede_dump_info dump_info;
+ struct delayed_work periodic_task;
+ unsigned long stats_coal_ticks;
+ u32 stats_coal_usecs;
+ spinlock_t stats_lock; /* lock for vport stats access */
};
enum QEDE_STATE {
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 374a86b875a3..95820cf1cd6c 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -429,6 +429,8 @@ static void qede_get_ethtool_stats(struct net_device *dev,
}
}
+ spin_lock(&edev->stats_lock);
+
for (i = 0; i < QEDE_NUM_STATS; i++) {
if (qede_is_irrelevant_stat(edev, i))
continue;
@@ -438,6 +440,8 @@ static void qede_get_ethtool_stats(struct net_device *dev,
buf++;
}
+ spin_unlock(&edev->stats_lock);
+
__qede_unlock(edev);
}
@@ -829,6 +833,7 @@ out:
coal->rx_coalesce_usecs = rx_coal;
coal->tx_coalesce_usecs = tx_coal;
+ coal->stats_block_coalesce_usecs = edev->stats_coal_usecs;
return rc;
}
@@ -842,6 +847,19 @@ int qede_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal,
int i, rc = 0;
u16 rxc, txc;
+ if (edev->stats_coal_usecs != coal->stats_block_coalesce_usecs) {
+ edev->stats_coal_usecs = coal->stats_block_coalesce_usecs;
+ if (edev->stats_coal_usecs) {
+ edev->stats_coal_ticks = usecs_to_jiffies(edev->stats_coal_usecs);
+ schedule_delayed_work(&edev->periodic_task, 0);
+
+ DP_INFO(edev, "Configured stats coal ticks=%lu jiffies\n",
+ edev->stats_coal_ticks);
+ } else {
+ cancel_delayed_work_sync(&edev->periodic_task);
+ }
+ }
+
if (!netif_running(dev)) {
DP_INFO(edev, "Interface is down\n");
return -EINVAL;
@@ -2252,7 +2270,8 @@ out:
}
static const struct ethtool_ops qede_ethtool_ops = {
- .supported_coalesce_params = ETHTOOL_COALESCE_USECS,
+ .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
+ ETHTOOL_COALESCE_STATS_BLOCK_USECS,
.get_link_ksettings = qede_get_link_ksettings,
.set_link_ksettings = qede_set_link_ksettings,
.get_drvinfo = qede_get_drvinfo,
@@ -2303,7 +2322,8 @@ static const struct ethtool_ops qede_ethtool_ops = {
};
static const struct ethtool_ops qede_vf_ethtool_ops = {
- .supported_coalesce_params = ETHTOOL_COALESCE_USECS,
+ .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
+ ETHTOOL_COALESCE_STATS_BLOCK_USECS,
.get_link_ksettings = qede_get_link_ksettings,
.get_drvinfo = qede_get_drvinfo,
.get_msglevel = qede_get_msglevel,
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 4c6c685820e3..4b004a728190 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -307,6 +307,8 @@ void qede_fill_by_demand_stats(struct qede_dev *edev)
edev->ops->get_vport_stats(edev->cdev, &stats);
+ spin_lock(&edev->stats_lock);
+
p_common->no_buff_discards = stats.common.no_buff_discards;
p_common->packet_too_big_discard = stats.common.packet_too_big_discard;
p_common->ttl0_discard = stats.common.ttl0_discard;
@@ -404,6 +406,8 @@ void qede_fill_by_demand_stats(struct qede_dev *edev)
p_ah->tx_1519_to_max_byte_packets =
stats.ah.tx_1519_to_max_byte_packets;
}
+
+ spin_unlock(&edev->stats_lock);
}
static void qede_get_stats64(struct net_device *dev,
@@ -412,9 +416,10 @@ static void qede_get_stats64(struct net_device *dev,
struct qede_dev *edev = netdev_priv(dev);
struct qede_stats_common *p_common;
- qede_fill_by_demand_stats(edev);
p_common = &edev->stats.common;
+ spin_lock(&edev->stats_lock);
+
stats->rx_packets = p_common->rx_ucast_pkts + p_common->rx_mcast_pkts +
p_common->rx_bcast_pkts;
stats->tx_packets = p_common->tx_ucast_pkts + p_common->tx_mcast_pkts +
@@ -434,6 +439,8 @@ static void qede_get_stats64(struct net_device *dev,
stats->collisions = edev->stats.bb.tx_total_collisions;
stats->rx_crc_errors = p_common->rx_crc_errors;
stats->rx_frame_errors = p_common->rx_align_errors;
+
+ spin_unlock(&edev->stats_lock);
}
#ifdef CONFIG_QED_SRIOV
@@ -1063,6 +1070,23 @@ static void qede_unlock(struct qede_dev *edev)
rtnl_unlock();
}
+static void qede_periodic_task(struct work_struct *work)
+{
+ struct qede_dev *edev = container_of(work, struct qede_dev,
+ periodic_task.work);
+
+ qede_fill_by_demand_stats(edev);
+ schedule_delayed_work(&edev->periodic_task, edev->stats_coal_ticks);
+}
+
+static void qede_init_periodic_task(struct qede_dev *edev)
+{
+ INIT_DELAYED_WORK(&edev->periodic_task, qede_periodic_task);
+ spin_lock_init(&edev->stats_lock);
+ edev->stats_coal_usecs = USEC_PER_SEC;
+ edev->stats_coal_ticks = usecs_to_jiffies(USEC_PER_SEC);
+}
+
static void qede_sp_task(struct work_struct *work)
{
struct qede_dev *edev = container_of(work, struct qede_dev,
@@ -1082,6 +1106,7 @@ static void qede_sp_task(struct work_struct *work)
*/
if (test_and_clear_bit(QEDE_SP_RECOVERY, &edev->sp_flags)) {
+ cancel_delayed_work_sync(&edev->periodic_task);
#ifdef CONFIG_QED_SRIOV
/* SRIOV must be disabled outside the lock to avoid a deadlock.
* The recovery of the active VFs is currently not supported.
@@ -1272,6 +1297,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
*/
INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task);
mutex_init(&edev->qede_lock);
+ qede_init_periodic_task(edev);
rc = register_netdev(edev->ndev);
if (rc) {
@@ -1296,6 +1322,11 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
edev->rx_copybreak = QEDE_RX_HDR_SIZE;
qede_log_probe(edev);
+
+ /* retain user config (for example - after recovery) */
+ if (edev->stats_coal_usecs)
+ schedule_delayed_work(&edev->periodic_task, 0);
+
return 0;
err4:
@@ -1364,6 +1395,7 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
unregister_netdev(ndev);
cancel_delayed_work_sync(&edev->sp_task);
+ cancel_delayed_work_sync(&edev->periodic_task);
edev->ops->common->set_power_state(cdev, PCI_D0);
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index c865a4be05ee..4a1b94e5a8ea 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -582,8 +582,7 @@ qcaspi_spi_thread(void *data)
while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
if ((qca->intr_req == qca->intr_svc) &&
- (qca->txr.skb[qca->txr.head] == NULL) &&
- (qca->sync == QCASPI_SYNC_READY))
+ !qca->txr.skb[qca->txr.head])
schedule();
set_current_state(TASK_RUNNING);
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index a7e376e7e689..4b19803a7dd0 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -616,10 +616,10 @@ struct rtl8169_private {
struct work_struct work;
} wk;
- spinlock_t config25_lock;
- spinlock_t mac_ocp_lock;
+ raw_spinlock_t config25_lock;
+ raw_spinlock_t mac_ocp_lock;
- spinlock_t cfg9346_usage_lock;
+ raw_spinlock_t cfg9346_usage_lock;
int cfg9346_usage_count;
unsigned supports_gmii:1;
@@ -671,20 +671,20 @@ static void rtl_lock_config_regs(struct rtl8169_private *tp)
{
unsigned long flags;
- spin_lock_irqsave(&tp->cfg9346_usage_lock, flags);
+ raw_spin_lock_irqsave(&tp->cfg9346_usage_lock, flags);
if (!--tp->cfg9346_usage_count)
RTL_W8(tp, Cfg9346, Cfg9346_Lock);
- spin_unlock_irqrestore(&tp->cfg9346_usage_lock, flags);
+ raw_spin_unlock_irqrestore(&tp->cfg9346_usage_lock, flags);
}
static void rtl_unlock_config_regs(struct rtl8169_private *tp)
{
unsigned long flags;
- spin_lock_irqsave(&tp->cfg9346_usage_lock, flags);
+ raw_spin_lock_irqsave(&tp->cfg9346_usage_lock, flags);
if (!tp->cfg9346_usage_count++)
RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
- spin_unlock_irqrestore(&tp->cfg9346_usage_lock, flags);
+ raw_spin_unlock_irqrestore(&tp->cfg9346_usage_lock, flags);
}
static void rtl_pci_commit(struct rtl8169_private *tp)
@@ -698,10 +698,10 @@ static void rtl_mod_config2(struct rtl8169_private *tp, u8 clear, u8 set)
unsigned long flags;
u8 val;
- spin_lock_irqsave(&tp->config25_lock, flags);
+ raw_spin_lock_irqsave(&tp->config25_lock, flags);
val = RTL_R8(tp, Config2);
RTL_W8(tp, Config2, (val & ~clear) | set);
- spin_unlock_irqrestore(&tp->config25_lock, flags);
+ raw_spin_unlock_irqrestore(&tp->config25_lock, flags);
}
static void rtl_mod_config5(struct rtl8169_private *tp, u8 clear, u8 set)
@@ -709,10 +709,10 @@ static void rtl_mod_config5(struct rtl8169_private *tp, u8 clear, u8 set)
unsigned long flags;
u8 val;
- spin_lock_irqsave(&tp->config25_lock, flags);
+ raw_spin_lock_irqsave(&tp->config25_lock, flags);
val = RTL_R8(tp, Config5);
RTL_W8(tp, Config5, (val & ~clear) | set);
- spin_unlock_irqrestore(&tp->config25_lock, flags);
+ raw_spin_unlock_irqrestore(&tp->config25_lock, flags);
}
static bool rtl_is_8125(struct rtl8169_private *tp)
@@ -899,9 +899,9 @@ static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data)
{
unsigned long flags;
- spin_lock_irqsave(&tp->mac_ocp_lock, flags);
+ raw_spin_lock_irqsave(&tp->mac_ocp_lock, flags);
__r8168_mac_ocp_write(tp, reg, data);
- spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
+ raw_spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
}
static u16 __r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg)
@@ -919,9 +919,9 @@ static u16 r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg)
unsigned long flags;
u16 val;
- spin_lock_irqsave(&tp->mac_ocp_lock, flags);
+ raw_spin_lock_irqsave(&tp->mac_ocp_lock, flags);
val = __r8168_mac_ocp_read(tp, reg);
- spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
+ raw_spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
return val;
}
@@ -932,10 +932,10 @@ static void r8168_mac_ocp_modify(struct rtl8169_private *tp, u32 reg, u16 mask,
unsigned long flags;
u16 data;
- spin_lock_irqsave(&tp->mac_ocp_lock, flags);
+ raw_spin_lock_irqsave(&tp->mac_ocp_lock, flags);
data = __r8168_mac_ocp_read(tp, reg);
__r8168_mac_ocp_write(tp, reg, (data & ~mask) | set);
- spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
+ raw_spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
}
/* Work around a hw issue with RTL8168g PHY, the quirk disables
@@ -1420,14 +1420,14 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
r8168_mac_ocp_modify(tp, 0xc0b6, BIT(0), 0);
}
- spin_lock_irqsave(&tp->config25_lock, flags);
+ raw_spin_lock_irqsave(&tp->config25_lock, flags);
for (i = 0; i < tmp; i++) {
options = RTL_R8(tp, cfg[i].reg) & ~cfg[i].mask;
if (wolopts & cfg[i].opt)
options |= cfg[i].mask;
RTL_W8(tp, cfg[i].reg, options);
}
- spin_unlock_irqrestore(&tp->config25_lock, flags);
+ raw_spin_unlock_irqrestore(&tp->config25_lock, flags);
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
@@ -5179,9 +5179,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
tp->eee_adv = -1;
tp->ocp_base = OCP_STD_PHY_BASE;
- spin_lock_init(&tp->cfg9346_usage_lock);
- spin_lock_init(&tp->config25_lock);
- spin_lock_init(&tp->mac_ocp_lock);
+ raw_spin_lock_init(&tp->cfg9346_usage_lock);
+ raw_spin_lock_init(&tp->config25_lock);
+ raw_spin_lock_init(&tp->mac_ocp_lock);
dev->tstats = devm_netdev_alloc_pcpu_stats(&pdev->dev,
struct pcpu_sw_netstats);
diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c
index 29afaddb598d..fa6d6202b129 100644
--- a/drivers/net/ethernet/renesas/rswitch.c
+++ b/drivers/net/ethernet/renesas/rswitch.c
@@ -347,17 +347,6 @@ out:
return -ENOMEM;
}
-static int rswitch_gwca_ts_queue_alloc(struct rswitch_private *priv)
-{
- struct rswitch_gwca_queue *gq = &priv->gwca.ts_queue;
-
- gq->ring_size = TS_RING_SIZE;
- gq->ts_ring = dma_alloc_coherent(&priv->pdev->dev,
- sizeof(struct rswitch_ts_desc) *
- (gq->ring_size + 1), &gq->ring_dma, GFP_KERNEL);
- return !gq->ts_ring ? -ENOMEM : 0;
-}
-
static void rswitch_desc_set_dptr(struct rswitch_desc *desc, dma_addr_t addr)
{
desc->dptrl = cpu_to_le32(lower_32_bits(addr));
@@ -533,6 +522,28 @@ static void rswitch_gwca_linkfix_free(struct rswitch_private *priv)
gwca->linkfix_table = NULL;
}
+static int rswitch_gwca_ts_queue_alloc(struct rswitch_private *priv)
+{
+ struct rswitch_gwca_queue *gq = &priv->gwca.ts_queue;
+ struct rswitch_ts_desc *desc;
+
+ gq->ring_size = TS_RING_SIZE;
+ gq->ts_ring = dma_alloc_coherent(&priv->pdev->dev,
+ sizeof(struct rswitch_ts_desc) *
+ (gq->ring_size + 1), &gq->ring_dma, GFP_KERNEL);
+
+ if (!gq->ts_ring)
+ return -ENOMEM;
+
+ rswitch_gwca_ts_queue_fill(priv, 0, TS_RING_SIZE);
+ desc = &gq->ts_ring[gq->ring_size];
+ desc->desc.die_dt = DT_LINKFIX;
+ rswitch_desc_set_dptr(&desc->desc, gq->ring_dma);
+ INIT_LIST_HEAD(&priv->gwca.ts_info_list);
+
+ return 0;
+}
+
static struct rswitch_gwca_queue *rswitch_gwca_get(struct rswitch_private *priv)
{
struct rswitch_gwca_queue *gq;
@@ -1485,7 +1496,7 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd
if (rswitch_get_num_cur_queues(gq) >= gq->ring_size - 1) {
netif_stop_subqueue(ndev, 0);
- return ret;
+ return NETDEV_TX_BUSY;
}
if (skb_put_padto(skb, ETH_ZLEN))
@@ -1780,9 +1791,6 @@ static int rswitch_init(struct rswitch_private *priv)
if (err < 0)
goto err_ts_queue_alloc;
- rswitch_gwca_ts_queue_fill(priv, 0, TS_RING_SIZE);
- INIT_LIST_HEAD(&priv->gwca.ts_info_list);
-
for (i = 0; i < RSWITCH_NUM_PORTS; i++) {
err = rswitch_device_alloc(priv, i);
if (err < 0) {
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index d30459dbfe8f..b63e47af6365 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -2950,7 +2950,7 @@ static u32 efx_ef10_extract_event_ts(efx_qword_t *event)
return tstamp;
}
-static void
+static int
efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
{
struct efx_nic *efx = channel->efx;
@@ -2958,13 +2958,14 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
unsigned int tx_ev_desc_ptr;
unsigned int tx_ev_q_label;
unsigned int tx_ev_type;
+ int work_done;
u64 ts_part;
if (unlikely(READ_ONCE(efx->reset_pending)))
- return;
+ return 0;
if (unlikely(EFX_QWORD_FIELD(*event, ESF_DZ_TX_DROP_EVENT)))
- return;
+ return 0;
/* Get the transmit queue */
tx_ev_q_label = EFX_QWORD_FIELD(*event, ESF_DZ_TX_QLABEL);
@@ -2973,8 +2974,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
if (!tx_queue->timestamping) {
/* Transmit completion */
tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, ESF_DZ_TX_DESCR_INDX);
- efx_xmit_done(tx_queue, tx_ev_desc_ptr & tx_queue->ptr_mask);
- return;
+ return efx_xmit_done(tx_queue, tx_ev_desc_ptr & tx_queue->ptr_mask);
}
/* Transmit timestamps are only available for 8XXX series. They result
@@ -3000,6 +3000,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
* fields in the event.
*/
tx_ev_type = EFX_QWORD_FIELD(*event, ESF_EZ_TX_SOFT1);
+ work_done = 0;
switch (tx_ev_type) {
case TX_TIMESTAMP_EVENT_TX_EV_COMPLETION:
@@ -3016,6 +3017,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
tx_queue->completed_timestamp_major = ts_part;
efx_xmit_done_single(tx_queue);
+ work_done = 1;
break;
default:
@@ -3026,6 +3028,8 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
EFX_QWORD_VAL(*event));
break;
}
+
+ return work_done;
}
static void
@@ -3081,13 +3085,16 @@ static void efx_ef10_handle_driver_generated_event(struct efx_channel *channel,
}
}
+#define EFX_NAPI_MAX_TX 512
+
static int efx_ef10_ev_process(struct efx_channel *channel, int quota)
{
struct efx_nic *efx = channel->efx;
efx_qword_t event, *p_event;
unsigned int read_ptr;
- int ev_code;
+ int spent_tx = 0;
int spent = 0;
+ int ev_code;
if (quota <= 0)
return spent;
@@ -3126,7 +3133,11 @@ static int efx_ef10_ev_process(struct efx_channel *channel, int quota)
}
break;
case ESE_DZ_EV_CODE_TX_EV:
- efx_ef10_handle_tx_event(channel, &event);
+ spent_tx += efx_ef10_handle_tx_event(channel, &event);
+ if (spent_tx >= EFX_NAPI_MAX_TX) {
+ spent = quota;
+ goto out;
+ }
break;
case ESE_DZ_EV_CODE_DRIVER_EV:
efx_ef10_handle_driver_event(channel, &event);
diff --git a/drivers/net/ethernet/sfc/ef100_netdev.c b/drivers/net/ethernet/sfc/ef100_netdev.c
index d916877b5a9a..be395cd8770b 100644
--- a/drivers/net/ethernet/sfc/ef100_netdev.c
+++ b/drivers/net/ethernet/sfc/ef100_netdev.c
@@ -378,7 +378,9 @@ int ef100_probe_netdev(struct efx_probe_data *probe_data)
efx->net_dev = net_dev;
SET_NETDEV_DEV(net_dev, &efx->pci_dev->dev);
- net_dev->features |= efx->type->offload_features;
+ /* enable all supported features except rx-fcs and rx-all */
+ net_dev->features |= efx->type->offload_features &
+ ~(NETIF_F_RXFCS | NETIF_F_RXALL);
net_dev->hw_features |= efx->type->offload_features;
net_dev->hw_enc_features |= efx->type->offload_features;
net_dev->vlan_features |= NETIF_F_HW_CSUM | NETIF_F_SG |
diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c
index 4dc643b0d2db..7adde9639c8a 100644
--- a/drivers/net/ethernet/sfc/ef100_nic.c
+++ b/drivers/net/ethernet/sfc/ef100_nic.c
@@ -253,6 +253,8 @@ static void ef100_ev_read_ack(struct efx_channel *channel)
efx_reg(channel->efx, ER_GZ_EVQ_INT_PRIME));
}
+#define EFX_NAPI_MAX_TX 512
+
static int ef100_ev_process(struct efx_channel *channel, int quota)
{
struct efx_nic *efx = channel->efx;
@@ -260,6 +262,7 @@ static int ef100_ev_process(struct efx_channel *channel, int quota)
bool evq_phase, old_evq_phase;
unsigned int read_ptr;
efx_qword_t *p_event;
+ int spent_tx = 0;
int spent = 0;
bool ev_phase;
int ev_type;
@@ -295,7 +298,9 @@ static int ef100_ev_process(struct efx_channel *channel, int quota)
efx_mcdi_process_event(channel, p_event);
break;
case ESE_GZ_EF100_EV_TX_COMPLETION:
- ef100_ev_tx(channel, p_event);
+ spent_tx += ef100_ev_tx(channel, p_event);
+ if (spent_tx >= EFX_NAPI_MAX_TX)
+ spent = quota;
break;
case ESE_GZ_EF100_EV_DRIVER:
netif_info(efx, drv, efx->net_dev,
diff --git a/drivers/net/ethernet/sfc/ef100_tx.c b/drivers/net/ethernet/sfc/ef100_tx.c
index 29ffaf35559d..849e5555bd12 100644
--- a/drivers/net/ethernet/sfc/ef100_tx.c
+++ b/drivers/net/ethernet/sfc/ef100_tx.c
@@ -346,7 +346,7 @@ void ef100_tx_write(struct efx_tx_queue *tx_queue)
ef100_tx_push_buffers(tx_queue);
}
-void ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event)
+int ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event)
{
unsigned int tx_done =
EFX_QWORD_FIELD(*p_event, ESF_GZ_EV_TXCMPL_NUM_DESC);
@@ -357,7 +357,7 @@ void ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event)
unsigned int tx_index = (tx_queue->read_count + tx_done - 1) &
tx_queue->ptr_mask;
- efx_xmit_done(tx_queue, tx_index);
+ return efx_xmit_done(tx_queue, tx_index);
}
/* Add a socket buffer to a TX queue
diff --git a/drivers/net/ethernet/sfc/ef100_tx.h b/drivers/net/ethernet/sfc/ef100_tx.h
index e9e11540fcde..d9a0819c5a72 100644
--- a/drivers/net/ethernet/sfc/ef100_tx.h
+++ b/drivers/net/ethernet/sfc/ef100_tx.h
@@ -20,7 +20,7 @@ void ef100_tx_init(struct efx_tx_queue *tx_queue);
void ef100_tx_write(struct efx_tx_queue *tx_queue);
unsigned int ef100_tx_max_skb_descs(struct efx_nic *efx);
-void ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event);
+int ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event);
netdev_tx_t ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb);
int __ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c
index fcea3ea809d7..41b33a75333c 100644
--- a/drivers/net/ethernet/sfc/efx_channels.c
+++ b/drivers/net/ethernet/sfc/efx_channels.c
@@ -301,6 +301,7 @@ int efx_probe_interrupts(struct efx_nic *efx)
efx->tx_channel_offset = 0;
efx->n_xdp_channels = 0;
efx->xdp_channel_offset = efx->n_channels;
+ efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED;
rc = pci_enable_msi(efx->pci_dev);
if (rc == 0) {
efx_get_channel(efx, 0)->irq = efx->pci_dev->irq;
@@ -322,6 +323,7 @@ int efx_probe_interrupts(struct efx_nic *efx)
efx->tx_channel_offset = efx_separate_tx_channels ? 1 : 0;
efx->n_xdp_channels = 0;
efx->xdp_channel_offset = efx->n_channels;
+ efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED;
efx->legacy_irq = efx->pci_dev->irq;
}
diff --git a/drivers/net/ethernet/sfc/efx_devlink.c b/drivers/net/ethernet/sfc/efx_devlink.c
index 381b805659d3..ef9971cbb695 100644
--- a/drivers/net/ethernet/sfc/efx_devlink.c
+++ b/drivers/net/ethernet/sfc/efx_devlink.c
@@ -171,9 +171,14 @@ static int efx_devlink_info_nvram_partition(struct efx_nic *efx,
rc = efx_mcdi_nvram_metadata(efx, partition_type, NULL, version, NULL,
0);
+
+ /* If the partition does not exist, that is not an error. */
+ if (rc == -ENOENT)
+ return 0;
+
if (rc) {
- netif_err(efx, drv, efx->net_dev, "mcdi nvram %s: failed\n",
- version_name);
+ netif_err(efx, drv, efx->net_dev, "mcdi nvram %s: failed (rc=%d)\n",
+ version_name, rc);
return rc;
}
@@ -187,36 +192,33 @@ static int efx_devlink_info_nvram_partition(struct efx_nic *efx,
static int efx_devlink_info_stored_versions(struct efx_nic *efx,
struct devlink_info_req *req)
{
- int rc;
-
- rc = efx_devlink_info_nvram_partition(efx, req,
- NVRAM_PARTITION_TYPE_BUNDLE,
- DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID);
- if (rc)
- return rc;
-
- rc = efx_devlink_info_nvram_partition(efx, req,
- NVRAM_PARTITION_TYPE_MC_FIRMWARE,
- DEVLINK_INFO_VERSION_GENERIC_FW_MGMT);
- if (rc)
- return rc;
-
- rc = efx_devlink_info_nvram_partition(efx, req,
- NVRAM_PARTITION_TYPE_SUC_FIRMWARE,
- EFX_DEVLINK_INFO_VERSION_FW_MGMT_SUC);
- if (rc)
- return rc;
-
- rc = efx_devlink_info_nvram_partition(efx, req,
- NVRAM_PARTITION_TYPE_EXPANSION_ROM,
- EFX_DEVLINK_INFO_VERSION_FW_EXPROM);
- if (rc)
- return rc;
+ int err;
- rc = efx_devlink_info_nvram_partition(efx, req,
- NVRAM_PARTITION_TYPE_EXPANSION_UEFI,
- EFX_DEVLINK_INFO_VERSION_FW_UEFI);
- return rc;
+ /* We do not care here about the specific error but just if an error
+ * happened. The specific error will be reported inside the call
+ * through system messages, and if any error happened in any call
+ * below, we report it through extack.
+ */
+ err = efx_devlink_info_nvram_partition(efx, req,
+ NVRAM_PARTITION_TYPE_BUNDLE,
+ DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID);
+
+ err |= efx_devlink_info_nvram_partition(efx, req,
+ NVRAM_PARTITION_TYPE_MC_FIRMWARE,
+ DEVLINK_INFO_VERSION_GENERIC_FW_MGMT);
+
+ err |= efx_devlink_info_nvram_partition(efx, req,
+ NVRAM_PARTITION_TYPE_SUC_FIRMWARE,
+ EFX_DEVLINK_INFO_VERSION_FW_MGMT_SUC);
+
+ err |= efx_devlink_info_nvram_partition(efx, req,
+ NVRAM_PARTITION_TYPE_EXPANSION_ROM,
+ EFX_DEVLINK_INFO_VERSION_FW_EXPROM);
+
+ err |= efx_devlink_info_nvram_partition(efx, req,
+ NVRAM_PARTITION_TYPE_EXPANSION_UEFI,
+ EFX_DEVLINK_INFO_VERSION_FW_UEFI);
+ return err;
}
#define EFX_VER_FLAG(_f) \
@@ -587,27 +589,20 @@ static int efx_devlink_info_get(struct devlink *devlink,
{
struct efx_devlink *devlink_private = devlink_priv(devlink);
struct efx_nic *efx = devlink_private->efx;
- int rc;
+ int err;
- /* Several different MCDI commands are used. We report first error
- * through extack returning at that point. Specific error
- * information via system messages.
+ /* Several different MCDI commands are used. We report if errors
+ * happened through extack. Specific error information via system
+ * messages inside the calls.
*/
- rc = efx_devlink_info_board_cfg(efx, req);
- if (rc) {
- NL_SET_ERR_MSG_MOD(extack, "Getting board info failed");
- return rc;
- }
- rc = efx_devlink_info_stored_versions(efx, req);
- if (rc) {
- NL_SET_ERR_MSG_MOD(extack, "Getting stored versions failed");
- return rc;
- }
- rc = efx_devlink_info_running_versions(efx, req);
- if (rc) {
- NL_SET_ERR_MSG_MOD(extack, "Getting running versions failed");
- return rc;
- }
+ err = efx_devlink_info_board_cfg(efx, req);
+
+ err |= efx_devlink_info_stored_versions(efx, req);
+
+ err |= efx_devlink_info_running_versions(efx, req);
+
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "Errors when getting device info. Check system messages");
return 0;
}
diff --git a/drivers/net/ethernet/sfc/siena/efx_channels.c b/drivers/net/ethernet/sfc/siena/efx_channels.c
index 06ed74994e36..1776f7f8a7a9 100644
--- a/drivers/net/ethernet/sfc/siena/efx_channels.c
+++ b/drivers/net/ethernet/sfc/siena/efx_channels.c
@@ -302,6 +302,7 @@ int efx_siena_probe_interrupts(struct efx_nic *efx)
efx->tx_channel_offset = 0;
efx->n_xdp_channels = 0;
efx->xdp_channel_offset = efx->n_channels;
+ efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED;
rc = pci_enable_msi(efx->pci_dev);
if (rc == 0) {
efx_get_channel(efx, 0)->irq = efx->pci_dev->irq;
@@ -323,6 +324,7 @@ int efx_siena_probe_interrupts(struct efx_nic *efx)
efx->tx_channel_offset = efx_siena_separate_tx_channels ? 1 : 0;
efx->n_xdp_channels = 0;
efx->xdp_channel_offset = efx->n_channels;
+ efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED;
efx->legacy_irq = efx->pci_dev->irq;
}
diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c
index 0327639a628a..c004443c1d58 100644
--- a/drivers/net/ethernet/sfc/tc.c
+++ b/drivers/net/ethernet/sfc/tc.c
@@ -624,13 +624,12 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx,
if (!found) { /* We don't care. */
netif_dbg(efx, drv, efx->net_dev,
"Ignoring foreign filter that doesn't egdev us\n");
- rc = -EOPNOTSUPP;
- goto release;
+ return -EOPNOTSUPP;
}
rc = efx_mae_match_check_caps(efx, &match.mask, NULL);
if (rc)
- goto release;
+ return rc;
if (efx_tc_match_is_encap(&match.mask)) {
enum efx_encap_type type;
@@ -639,8 +638,7 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx,
if (type == EFX_ENCAP_TYPE_NONE) {
NL_SET_ERR_MSG_MOD(extack,
"Egress encap match on unsupported tunnel device");
- rc = -EOPNOTSUPP;
- goto release;
+ return -EOPNOTSUPP;
}
rc = efx_mae_check_encap_type_supported(efx, type);
@@ -648,25 +646,24 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx,
NL_SET_ERR_MSG_FMT_MOD(extack,
"Firmware reports no support for %s encap match",
efx_tc_encap_type_name(type));
- goto release;
+ return rc;
}
rc = efx_tc_flower_record_encap_match(efx, &match, type,
extack);
if (rc)
- goto release;
+ return rc;
} else {
/* This is not a tunnel decap rule, ignore it */
netif_dbg(efx, drv, efx->net_dev,
"Ignoring foreign filter without encap match\n");
- rc = -EOPNOTSUPP;
- goto release;
+ return -EOPNOTSUPP;
}
rule = kzalloc(sizeof(*rule), GFP_USER);
if (!rule) {
rc = -ENOMEM;
- goto release;
+ goto out_free;
}
INIT_LIST_HEAD(&rule->acts.list);
rule->cookie = tc->cookie;
@@ -678,7 +675,7 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx,
"Ignoring already-offloaded rule (cookie %lx)\n",
tc->cookie);
rc = -EEXIST;
- goto release;
+ goto out_free;
}
act = kzalloc(sizeof(*act), GFP_USER);
@@ -843,6 +840,7 @@ release:
efx_tc_match_action_ht_params);
efx_tc_free_action_set_list(efx, &rule->acts, false);
}
+out_free:
kfree(rule);
if (match.encap)
efx_tc_flower_release_encap_match(efx, match.encap);
@@ -899,8 +897,7 @@ static int efx_tc_flower_replace(struct efx_nic *efx,
return rc;
if (efx_tc_match_is_encap(&match.mask)) {
NL_SET_ERR_MSG_MOD(extack, "Ingress enc_key matches not supported");
- rc = -EOPNOTSUPP;
- goto release;
+ return -EOPNOTSUPP;
}
if (tc->common.chain_index) {
@@ -924,9 +921,9 @@ static int efx_tc_flower_replace(struct efx_nic *efx,
if (old) {
netif_dbg(efx, drv, efx->net_dev,
"Already offloaded rule (cookie %lx)\n", tc->cookie);
- rc = -EEXIST;
NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded");
- goto release;
+ kfree(rule);
+ return -EEXIST;
}
/* Parse actions */
diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c
index 67e789b96c43..755aa92bf823 100644
--- a/drivers/net/ethernet/sfc/tx_common.c
+++ b/drivers/net/ethernet/sfc/tx_common.c
@@ -249,7 +249,7 @@ void efx_xmit_done_check_empty(struct efx_tx_queue *tx_queue)
}
}
-void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
+int efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
{
unsigned int fill_level, pkts_compl = 0, bytes_compl = 0;
unsigned int efv_pkts_compl = 0;
@@ -279,6 +279,8 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
}
efx_xmit_done_check_empty(tx_queue);
+
+ return pkts_compl + efv_pkts_compl;
}
/* Remove buffers put into a tx_queue for the current packet.
diff --git a/drivers/net/ethernet/sfc/tx_common.h b/drivers/net/ethernet/sfc/tx_common.h
index d87aecbc7bf1..1e9f42938aac 100644
--- a/drivers/net/ethernet/sfc/tx_common.h
+++ b/drivers/net/ethernet/sfc/tx_common.h
@@ -28,7 +28,7 @@ static inline bool efx_tx_buffer_in_use(struct efx_tx_buffer *buffer)
}
void efx_xmit_done_check_empty(struct efx_tx_queue *tx_queue);
-void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
+int efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
unsigned int insert_count);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
index 16a8c361283b..f07905f00f98 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -644,7 +644,8 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
plat_dat->fix_mac_speed = ethqos_fix_mac_speed;
plat_dat->dump_debug_regs = rgmii_dump;
plat_dat->has_gmac4 = 1;
- plat_dat->dwmac4_addrs = &data->dwmac4_addrs;
+ if (ethqos->has_emac3)
+ plat_dat->dwmac4_addrs = &data->dwmac4_addrs;
plat_dat->pmt = 1;
plat_dat->tso_en = of_property_read_bool(np, "snps,tso");
if (of_device_is_compatible(np, "qcom,qcs404-ethqos"))
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 0fca81507a77..87510951f4e8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3873,7 +3873,6 @@ irq_error:
stmmac_hw_teardown(dev);
init_error:
- free_dma_desc_resources(priv, &priv->dma_conf);
phylink_disconnect_phy(priv->phylink);
init_phy_error:
pm_runtime_put(priv->device);
@@ -3891,6 +3890,9 @@ static int stmmac_open(struct net_device *dev)
return PTR_ERR(dma_conf);
ret = __stmmac_open(dev, dma_conf);
+ if (ret)
+ free_dma_desc_resources(priv, dma_conf);
+
kfree(dma_conf);
return ret;
}
@@ -5633,12 +5635,15 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu)
stmmac_release(dev);
ret = __stmmac_open(dev, dma_conf);
- kfree(dma_conf);
if (ret) {
+ free_dma_desc_resources(priv, dma_conf);
+ kfree(dma_conf);
netdev_err(priv->dev, "failed reopening the interface after MTU change\n");
return ret;
}
+ kfree(dma_conf);
+
stmmac_set_rx_mode(dev);
}
@@ -7233,8 +7238,7 @@ int stmmac_dvr_probe(struct device *device,
ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
NETIF_F_RXCSUM;
ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
- NETDEV_XDP_ACT_XSK_ZEROCOPY |
- NETDEV_XDP_ACT_NDO_XMIT;
+ NETDEV_XDP_ACT_XSK_ZEROCOPY;
ret = stmmac_tc_init(priv, priv);
if (!ret) {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c
index 9d4d8c3dad0a..aa6f16d3df64 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c
@@ -117,6 +117,9 @@ int stmmac_xdp_set_prog(struct stmmac_priv *priv, struct bpf_prog *prog,
return -EOPNOTSUPP;
}
+ if (!prog)
+ xdp_features_clear_redirect_target(dev);
+
need_update = !!priv->xdp_prog != !!prog;
if (if_running && need_update)
stmmac_xdp_release(dev);
@@ -131,5 +134,8 @@ int stmmac_xdp_set_prog(struct stmmac_priv *priv, struct bpf_prog *prog,
if (if_running && need_update)
stmmac_xdp_open(dev);
+ if (prog)
+ xdp_features_set_redirect_target(dev, false);
+
return 0;
}
diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index 4ef05bad4613..d61dfa250feb 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -5077,6 +5077,8 @@ err_out_iounmap:
cas_shutdown(cp);
mutex_unlock(&cp->pm_mutex);
+ vfree(cp->fw_data);
+
pci_iounmap(pdev, cp->regs);
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 11cbcd9e2c72..bebcfd5e6b57 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -2068,7 +2068,7 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common)
/* Initialize the Serdes PHY for the port */
ret = am65_cpsw_init_serdes_phy(dev, port_np, port);
if (ret)
- return ret;
+ goto of_node_put;
port->slave.mac_only =
of_property_read_bool(port_np, "ti,mac-only");
diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c
index f9972b8140f9..a03490ba2e5b 100644
--- a/drivers/net/ieee802154/adf7242.c
+++ b/drivers/net/ieee802154/adf7242.c
@@ -1348,3 +1348,5 @@ module_spi_driver(adf7242_driver);
MODULE_AUTHOR("Michael Hennerich <michael.hennerich@analog.com>");
MODULE_DESCRIPTION("ADF7242 IEEE802.15.4 Transceiver Driver");
MODULE_LICENSE("GPL");
+
+MODULE_FIRMWARE(FIRMWARE);
diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c
index 8445c2189d11..31cba9aa7636 100644
--- a/drivers/net/ieee802154/mac802154_hwsim.c
+++ b/drivers/net/ieee802154/mac802154_hwsim.c
@@ -685,7 +685,7 @@ static int hwsim_del_edge_nl(struct sk_buff *msg, struct genl_info *info)
static int hwsim_set_edge_lqi(struct sk_buff *msg, struct genl_info *info)
{
struct nlattr *edge_attrs[MAC802154_HWSIM_EDGE_ATTR_MAX + 1];
- struct hwsim_edge_info *einfo;
+ struct hwsim_edge_info *einfo, *einfo_old;
struct hwsim_phy *phy_v0;
struct hwsim_edge *e;
u32 v0, v1;
@@ -723,8 +723,10 @@ static int hwsim_set_edge_lqi(struct sk_buff *msg, struct genl_info *info)
list_for_each_entry_rcu(e, &phy_v0->edges, list) {
if (e->endpoint->idx == v1) {
einfo->lqi = lqi;
- rcu_assign_pointer(e->info, einfo);
+ einfo_old = rcu_replace_pointer(e->info, einfo,
+ lockdep_is_held(&hwsim_phys_lock));
rcu_read_unlock();
+ kfree_rcu(einfo_old, rcu);
mutex_unlock(&hwsim_phys_lock);
return 0;
}
diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index 2ee80ed140b7..afa1d56d9095 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -119,7 +119,7 @@ enum ipa_status_field_id {
};
/* Size in bytes of an IPA packet status structure */
-#define IPA_STATUS_SIZE sizeof(__le32[4])
+#define IPA_STATUS_SIZE sizeof(__le32[8])
/* IPA status structure decoder; looks up field values for a structure */
static u32 ipa_status_extract(struct ipa *ipa, const void *data,
diff --git a/drivers/net/ipvlan/ipvlan_l3s.c b/drivers/net/ipvlan/ipvlan_l3s.c
index 71712ea25403..d5b05e803219 100644
--- a/drivers/net/ipvlan/ipvlan_l3s.c
+++ b/drivers/net/ipvlan/ipvlan_l3s.c
@@ -102,6 +102,10 @@ static unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
skb->dev = addr->master->dev;
skb->skb_iif = skb->dev->ifindex;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (addr->atype == IPVL_IPV6)
+ IP6CB(skb)->iif = skb->dev->ifindex;
+#endif
len = skb->len + ETH_HLEN;
ipvlan_count_rx(addr->master, len, true, false);
out:
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 3427993f94f7..984dfa5d6c11 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -3997,17 +3997,15 @@ static int macsec_add_dev(struct net_device *dev, sci_t sci, u8 icv_len)
return -ENOMEM;
secy->tx_sc.stats = netdev_alloc_pcpu_stats(struct pcpu_tx_sc_stats);
- if (!secy->tx_sc.stats) {
- free_percpu(macsec->stats);
+ if (!secy->tx_sc.stats)
return -ENOMEM;
- }
secy->tx_sc.md_dst = metadata_dst_alloc(0, METADATA_MACSEC, GFP_KERNEL);
- if (!secy->tx_sc.md_dst) {
- free_percpu(secy->tx_sc.stats);
- free_percpu(macsec->stats);
+ if (!secy->tx_sc.md_dst)
+ /* macsec and secy percpu stats will be freed when unregistering
+ * net_device in macsec_free_netdev()
+ */
return -ENOMEM;
- }
if (sci == MACSEC_UNDEF_SCI)
sci = dev_to_sci(dev, MACSEC_PORT_ES);
diff --git a/drivers/net/mdio/mdio-i2c.c b/drivers/net/mdio/mdio-i2c.c
index 1e0c206d0f2e..da2001ea1f99 100644
--- a/drivers/net/mdio/mdio-i2c.c
+++ b/drivers/net/mdio/mdio-i2c.c
@@ -291,7 +291,8 @@ static int i2c_rollball_mii_cmd(struct mii_bus *bus, int bus_addr, u8 cmd,
return i2c_transfer_rollball(i2c, msgs, ARRAY_SIZE(msgs));
}
-static int i2c_mii_read_rollball(struct mii_bus *bus, int phy_id, int reg)
+static int i2c_mii_read_rollball(struct mii_bus *bus, int phy_id, int devad,
+ int reg)
{
u8 buf[4], res[6];
int bus_addr, ret;
@@ -302,7 +303,7 @@ static int i2c_mii_read_rollball(struct mii_bus *bus, int phy_id, int reg)
return 0xffff;
buf[0] = ROLLBALL_DATA_ADDR;
- buf[1] = (reg >> 16) & 0x1f;
+ buf[1] = devad;
buf[2] = (reg >> 8) & 0xff;
buf[3] = reg & 0xff;
@@ -322,8 +323,8 @@ static int i2c_mii_read_rollball(struct mii_bus *bus, int phy_id, int reg)
return val;
}
-static int i2c_mii_write_rollball(struct mii_bus *bus, int phy_id, int reg,
- u16 val)
+static int i2c_mii_write_rollball(struct mii_bus *bus, int phy_id, int devad,
+ int reg, u16 val)
{
int bus_addr, ret;
u8 buf[6];
@@ -333,7 +334,7 @@ static int i2c_mii_write_rollball(struct mii_bus *bus, int phy_id, int reg,
return 0;
buf[0] = ROLLBALL_DATA_ADDR;
- buf[1] = (reg >> 16) & 0x1f;
+ buf[1] = devad;
buf[2] = (reg >> 8) & 0xff;
buf[3] = reg & 0xff;
buf[4] = val >> 8;
@@ -405,8 +406,8 @@ struct mii_bus *mdio_i2c_alloc(struct device *parent, struct i2c_adapter *i2c,
return ERR_PTR(ret);
}
- mii->read = i2c_mii_read_rollball;
- mii->write = i2c_mii_write_rollball;
+ mii->read_c45 = i2c_mii_read_rollball;
+ mii->write_c45 = i2c_mii_write_rollball;
break;
default:
mii->read = i2c_mii_read_default_c22;
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index f19d48c94fe0..72f25e778840 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -873,7 +873,7 @@ int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
switch (compat->an_mode) {
case DW_AN_C73:
- if (phylink_autoneg_inband(mode)) {
+ if (test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, advertising)) {
ret = xpcs_config_aneg_c73(xpcs, compat);
if (ret)
return ret;
diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index d75f526a20a4..e397e7d642d9 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -44,6 +44,7 @@
#define DP83867_STRAP_STS1 0x006E
#define DP83867_STRAP_STS2 0x006f
#define DP83867_RGMIIDCTL 0x0086
+#define DP83867_DSP_FFE_CFG 0x012c
#define DP83867_RXFCFG 0x0134
#define DP83867_RXFPMD1 0x0136
#define DP83867_RXFPMD2 0x0137
@@ -935,14 +936,33 @@ static int dp83867_phy_reset(struct phy_device *phydev)
{
int err;
- err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESTART);
+ err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESET);
if (err < 0)
return err;
usleep_range(10, 20);
- return phy_modify(phydev, MII_DP83867_PHYCTRL,
+ err = phy_modify(phydev, MII_DP83867_PHYCTRL,
DP83867_PHYCR_FORCE_LINK_GOOD, 0);
+ if (err < 0)
+ return err;
+
+ /* Configure the DSP Feedforward Equalizer Configuration register to
+ * improve short cable (< 1 meter) performance. This will not affect
+ * long cable performance.
+ */
+ err = phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_DSP_FFE_CFG,
+ 0x0e81);
+ if (err < 0)
+ return err;
+
+ err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESTART);
+ if (err < 0)
+ return err;
+
+ usleep_range(10, 20);
+
+ return 0;
}
static void dp83867_link_change_notify(struct phy_device *phydev)
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 389f33a12534..8b3618d3da4a 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -1287,7 +1287,7 @@ EXPORT_SYMBOL_GPL(mdiobus_modify_changed);
* @mask: bit mask of bits to clear
* @set: bit mask of bits to set
*/
-int mdiobus_c45_modify_changed(struct mii_bus *bus, int devad, int addr,
+int mdiobus_c45_modify_changed(struct mii_bus *bus, int addr, int devad,
u32 regnum, u16 mask, u16 set)
{
int err;
diff --git a/drivers/net/phy/mscc/mscc.h b/drivers/net/phy/mscc/mscc.h
index a50235fdf7d9..defe5cc6d4fc 100644
--- a/drivers/net/phy/mscc/mscc.h
+++ b/drivers/net/phy/mscc/mscc.h
@@ -179,6 +179,7 @@ enum rgmii_clock_delay {
#define VSC8502_RGMII_CNTL 20
#define VSC8502_RGMII_RX_DELAY_MASK 0x0070
#define VSC8502_RGMII_TX_DELAY_MASK 0x0007
+#define VSC8502_RGMII_RX_CLK_DISABLE 0x0800
#define MSCC_PHY_WOL_LOWER_MAC_ADDR 21
#define MSCC_PHY_WOL_MID_MAC_ADDR 22
@@ -276,6 +277,7 @@ enum rgmii_clock_delay {
/* Microsemi PHY ID's
* Code assumes lowest nibble is 0
*/
+#define PHY_ID_VSC8501 0x00070530
#define PHY_ID_VSC8502 0x00070630
#define PHY_ID_VSC8504 0x000704c0
#define PHY_ID_VSC8514 0x00070670
diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c
index 62bf99e45af1..28df8a2e4230 100644
--- a/drivers/net/phy/mscc/mscc_main.c
+++ b/drivers/net/phy/mscc/mscc_main.c
@@ -519,16 +519,27 @@ out_unlock:
* * 2.0 ns (which causes the data to be sampled at exactly half way between
* clock transitions at 1000 Mbps) if delays should be enabled
*/
-static int vsc85xx_rgmii_set_skews(struct phy_device *phydev, u32 rgmii_cntl,
- u16 rgmii_rx_delay_mask,
- u16 rgmii_tx_delay_mask)
+static int vsc85xx_update_rgmii_cntl(struct phy_device *phydev, u32 rgmii_cntl,
+ u16 rgmii_rx_delay_mask,
+ u16 rgmii_tx_delay_mask)
{
u16 rgmii_rx_delay_pos = ffs(rgmii_rx_delay_mask) - 1;
u16 rgmii_tx_delay_pos = ffs(rgmii_tx_delay_mask) - 1;
u16 reg_val = 0;
- int rc;
+ u16 mask = 0;
+ int rc = 0;
- mutex_lock(&phydev->lock);
+ /* For traffic to pass, the VSC8502 family needs the RX_CLK disable bit
+ * to be unset for all PHY modes, so do that as part of the paged
+ * register modification.
+ * For some family members (like VSC8530/31/40/41) this bit is reserved
+ * and read-only, and the RX clock is enabled by default.
+ */
+ if (rgmii_cntl == VSC8502_RGMII_CNTL)
+ mask |= VSC8502_RGMII_RX_CLK_DISABLE;
+
+ if (phy_interface_is_rgmii(phydev))
+ mask |= rgmii_rx_delay_mask | rgmii_tx_delay_mask;
if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID ||
phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
@@ -537,31 +548,20 @@ static int vsc85xx_rgmii_set_skews(struct phy_device *phydev, u32 rgmii_cntl,
phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
reg_val |= RGMII_CLK_DELAY_2_0_NS << rgmii_tx_delay_pos;
- rc = phy_modify_paged(phydev, MSCC_PHY_PAGE_EXTENDED_2,
- rgmii_cntl,
- rgmii_rx_delay_mask | rgmii_tx_delay_mask,
- reg_val);
-
- mutex_unlock(&phydev->lock);
+ if (mask)
+ rc = phy_modify_paged(phydev, MSCC_PHY_PAGE_EXTENDED_2,
+ rgmii_cntl, mask, reg_val);
return rc;
}
static int vsc85xx_default_config(struct phy_device *phydev)
{
- int rc;
-
phydev->mdix_ctrl = ETH_TP_MDI_AUTO;
- if (phy_interface_mode_is_rgmii(phydev->interface)) {
- rc = vsc85xx_rgmii_set_skews(phydev, VSC8502_RGMII_CNTL,
- VSC8502_RGMII_RX_DELAY_MASK,
- VSC8502_RGMII_TX_DELAY_MASK);
- if (rc)
- return rc;
- }
-
- return 0;
+ return vsc85xx_update_rgmii_cntl(phydev, VSC8502_RGMII_CNTL,
+ VSC8502_RGMII_RX_DELAY_MASK,
+ VSC8502_RGMII_TX_DELAY_MASK);
}
static int vsc85xx_get_tunable(struct phy_device *phydev,
@@ -1758,13 +1758,11 @@ static int vsc8584_config_init(struct phy_device *phydev)
if (ret)
return ret;
- if (phy_interface_is_rgmii(phydev)) {
- ret = vsc85xx_rgmii_set_skews(phydev, VSC8572_RGMII_CNTL,
- VSC8572_RGMII_RX_DELAY_MASK,
- VSC8572_RGMII_TX_DELAY_MASK);
- if (ret)
- return ret;
- }
+ ret = vsc85xx_update_rgmii_cntl(phydev, VSC8572_RGMII_CNTL,
+ VSC8572_RGMII_RX_DELAY_MASK,
+ VSC8572_RGMII_TX_DELAY_MASK);
+ if (ret)
+ return ret;
ret = genphy_soft_reset(phydev);
if (ret)
@@ -2317,6 +2315,30 @@ static int vsc85xx_probe(struct phy_device *phydev)
/* Microsemi VSC85xx PHYs */
static struct phy_driver vsc85xx_driver[] = {
{
+ .phy_id = PHY_ID_VSC8501,
+ .name = "Microsemi GE VSC8501 SyncE",
+ .phy_id_mask = 0xfffffff0,
+ /* PHY_BASIC_FEATURES */
+ .soft_reset = &genphy_soft_reset,
+ .config_init = &vsc85xx_config_init,
+ .config_aneg = &vsc85xx_config_aneg,
+ .read_status = &vsc85xx_read_status,
+ .handle_interrupt = vsc85xx_handle_interrupt,
+ .config_intr = &vsc85xx_config_intr,
+ .suspend = &genphy_suspend,
+ .resume = &genphy_resume,
+ .probe = &vsc85xx_probe,
+ .set_wol = &vsc85xx_wol_set,
+ .get_wol = &vsc85xx_wol_get,
+ .get_tunable = &vsc85xx_get_tunable,
+ .set_tunable = &vsc85xx_set_tunable,
+ .read_page = &vsc85xx_phy_read_page,
+ .write_page = &vsc85xx_phy_write_page,
+ .get_sset_count = &vsc85xx_get_sset_count,
+ .get_strings = &vsc85xx_get_strings,
+ .get_stats = &vsc85xx_get_stats,
+},
+{
.phy_id = PHY_ID_VSC8502,
.name = "Microsemi GE VSC8502 SyncE",
.phy_id_mask = 0xfffffff0,
@@ -2656,6 +2678,8 @@ static struct phy_driver vsc85xx_driver[] = {
module_phy_driver(vsc85xx_driver);
static struct mdio_device_id __maybe_unused vsc85xx_tbl[] = {
+ { PHY_ID_VSC8501, 0xfffffff0, },
+ { PHY_ID_VSC8502, 0xfffffff0, },
{ PHY_ID_VSC8504, 0xfffffff0, },
{ PHY_ID_VSC8514, 0xfffffff0, },
{ PHY_ID_VSC8530, 0xfffffff0, },
diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c
index 6301a9abfb95..ea1073adc5a1 100644
--- a/drivers/net/phy/mxl-gpy.c
+++ b/drivers/net/phy/mxl-gpy.c
@@ -274,13 +274,6 @@ static int gpy_config_init(struct phy_device *phydev)
return ret < 0 ? ret : 0;
}
-static bool gpy_has_broken_mdint(struct phy_device *phydev)
-{
- /* At least these PHYs are known to have broken interrupt handling */
- return phydev->drv->phy_id == PHY_ID_GPY215B ||
- phydev->drv->phy_id == PHY_ID_GPY215C;
-}
-
static int gpy_probe(struct phy_device *phydev)
{
struct device *dev = &phydev->mdio.dev;
@@ -300,8 +293,7 @@ static int gpy_probe(struct phy_device *phydev)
phydev->priv = priv;
mutex_init(&priv->mbox_lock);
- if (gpy_has_broken_mdint(phydev) &&
- !device_property_present(dev, "maxlinear,use-broken-interrupts"))
+ if (!device_property_present(dev, "maxlinear,use-broken-interrupts"))
phydev->dev_flags |= PHY_F_NO_IRQ;
fw_version = phy_read(phydev, PHY_FWV);
@@ -659,11 +651,9 @@ static irqreturn_t gpy_handle_interrupt(struct phy_device *phydev)
* frame. Therefore, polling is the best we can do and won't do any more
* harm.
* It was observed that this bug happens on link state and link speed
- * changes on a GPY215B and GYP215C independent of the firmware version
- * (which doesn't mean that this list is exhaustive).
+ * changes independent of the firmware version.
*/
- if (gpy_has_broken_mdint(phydev) &&
- (reg & (PHY_IMASK_LSTC | PHY_IMASK_LSPC))) {
+ if (reg & (PHY_IMASK_LSTC | PHY_IMASK_LSPC)) {
reg = gpy_mbox_read(phydev, REG_GPIO0_OUT);
if (reg < 0) {
phy_error(phydev);
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 17d0d0555a79..53598210be6c 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -3021,6 +3021,15 @@ static int phy_led_blink_set(struct led_classdev *led_cdev,
return err;
}
+static void phy_leds_unregister(struct phy_device *phydev)
+{
+ struct phy_led *phyled;
+
+ list_for_each_entry(phyled, &phydev->leds, list) {
+ led_classdev_unregister(&phyled->led_cdev);
+ }
+}
+
static int of_phy_led(struct phy_device *phydev,
struct device_node *led)
{
@@ -3054,7 +3063,7 @@ static int of_phy_led(struct phy_device *phydev,
init_data.fwnode = of_fwnode_handle(led);
init_data.devname_mandatory = true;
- err = devm_led_classdev_register_ext(dev, cdev, &init_data);
+ err = led_classdev_register_ext(dev, cdev, &init_data);
if (err)
return err;
@@ -3083,6 +3092,7 @@ static int of_phy_leds(struct phy_device *phydev)
err = of_phy_led(phydev, led);
if (err) {
of_node_put(led);
+ phy_leds_unregister(phydev);
return err;
}
}
@@ -3305,6 +3315,9 @@ static int phy_remove(struct device *dev)
cancel_delayed_work_sync(&phydev->state_queue);
+ if (IS_ENABLED(CONFIG_PHYLIB_LEDS))
+ phy_leds_unregister(phydev);
+
phydev->state = PHY_DOWN;
sfp_bus_del_upstream(phydev->sfp_bus);
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index a4111f1be375..5efdeb59f4b2 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -188,6 +188,7 @@ static int phylink_interface_max_speed(phy_interface_t interface)
case PHY_INTERFACE_MODE_RGMII_ID:
case PHY_INTERFACE_MODE_RGMII:
case PHY_INTERFACE_MODE_QSGMII:
+ case PHY_INTERFACE_MODE_QUSGMII:
case PHY_INTERFACE_MODE_SGMII:
case PHY_INTERFACE_MODE_GMII:
return SPEED_1000;
@@ -204,7 +205,6 @@ static int phylink_interface_max_speed(phy_interface_t interface)
case PHY_INTERFACE_MODE_10GBASER:
case PHY_INTERFACE_MODE_10GKR:
case PHY_INTERFACE_MODE_USXGMII:
- case PHY_INTERFACE_MODE_QUSGMII:
return SPEED_10000;
case PHY_INTERFACE_MODE_25GBASER:
@@ -2226,6 +2226,12 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
ASSERT_RTNL();
if (pl->phydev) {
+ struct ethtool_link_ksettings phy_kset = *kset;
+
+ linkmode_and(phy_kset.link_modes.advertising,
+ phy_kset.link_modes.advertising,
+ pl->supported);
+
/* We can rely on phylib for this update; we also do not need
* to update the pl->link_config settings:
* - the configuration returned via ksettings_get() will come
@@ -2244,11 +2250,10 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
* the presence of a PHY, this should not be changed as that
* should be determined from the media side advertisement.
*/
- return phy_ethtool_ksettings_set(pl->phydev, kset);
+ return phy_ethtool_ksettings_set(pl->phydev, &phy_kset);
}
config = pl->link_config;
-
/* Mask out unsupported advertisements */
linkmode_and(config.advertising, kset->link_modes.advertising,
pl->supported);
@@ -3294,6 +3299,41 @@ void phylink_decode_usxgmii_word(struct phylink_link_state *state,
EXPORT_SYMBOL_GPL(phylink_decode_usxgmii_word);
/**
+ * phylink_decode_usgmii_word() - decode the USGMII word from a MAC PCS
+ * @state: a pointer to a struct phylink_link_state.
+ * @lpa: a 16 bit value which stores the USGMII auto-negotiation word
+ *
+ * Helper for MAC PCS supporting the USGMII protocol and the auto-negotiation
+ * code word. Decode the USGMII code word and populate the corresponding fields
+ * (speed, duplex) into the phylink_link_state structure. The structure for this
+ * word is the same as the USXGMII word, except it only supports speeds up to
+ * 1Gbps.
+ */
+static void phylink_decode_usgmii_word(struct phylink_link_state *state,
+ uint16_t lpa)
+{
+ switch (lpa & MDIO_USXGMII_SPD_MASK) {
+ case MDIO_USXGMII_10:
+ state->speed = SPEED_10;
+ break;
+ case MDIO_USXGMII_100:
+ state->speed = SPEED_100;
+ break;
+ case MDIO_USXGMII_1000:
+ state->speed = SPEED_1000;
+ break;
+ default:
+ state->link = false;
+ return;
+ }
+
+ if (lpa & MDIO_USXGMII_FULL_DUPLEX)
+ state->duplex = DUPLEX_FULL;
+ else
+ state->duplex = DUPLEX_HALF;
+}
+
+/**
* phylink_mii_c22_pcs_decode_state() - Decode MAC PCS state from MII registers
* @state: a pointer to a &struct phylink_link_state.
* @bmsr: The value of the %MII_BMSR register
@@ -3330,9 +3370,11 @@ void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state,
case PHY_INTERFACE_MODE_SGMII:
case PHY_INTERFACE_MODE_QSGMII:
- case PHY_INTERFACE_MODE_QUSGMII:
phylink_decode_sgmii_word(state, lpa);
break;
+ case PHY_INTERFACE_MODE_QUSGMII:
+ phylink_decode_usgmii_word(state, lpa);
+ break;
default:
state->link = false;
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index d10606f257c4..555b0b1e9a78 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1629,6 +1629,7 @@ static int team_init(struct net_device *dev)
team->dev = dev;
team_set_no_mode(team);
+ team->notifier_ctx = false;
team->pcpu_stats = netdev_alloc_pcpu_stats(struct team_pcpu_stats);
if (!team->pcpu_stats)
@@ -3022,7 +3023,11 @@ static int team_device_event(struct notifier_block *unused,
team_del_slave(port->team->dev, dev);
break;
case NETDEV_FEAT_CHANGE:
- team_compute_features(port->team);
+ if (!port->team->notifier_ctx) {
+ port->team->notifier_ctx = true;
+ team_compute_features(port->team);
+ port->team->notifier_ctx = false;
+ }
break;
case NETDEV_PRECHANGEMTU:
/* Forbid to change mtu of underlaying device */
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index d4d0a41a905a..d75456adc62a 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1977,6 +1977,14 @@ napi_busy:
int queue_len;
spin_lock_bh(&queue->lock);
+
+ if (unlikely(tfile->detached)) {
+ spin_unlock_bh(&queue->lock);
+ rcu_read_unlock();
+ err = -EBUSY;
+ goto free_skb;
+ }
+
__skb_queue_tail(queue, skb);
queue_len = skb_queue_len(queue);
spin_unlock(&queue->lock);
@@ -2512,6 +2520,13 @@ build:
if (tfile->napi_enabled) {
queue = &tfile->sk.sk_write_queue;
spin_lock(&queue->lock);
+
+ if (unlikely(tfile->detached)) {
+ spin_unlock(&queue->lock);
+ kfree_skb(skb);
+ return -EBUSY;
+ }
+
__skb_queue_tail(queue, skb);
spin_unlock(&queue->lock);
ret = 1;
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 6ce8f4f0c70e..db05622f1f70 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -181,9 +181,12 @@ static u32 cdc_ncm_check_tx_max(struct usbnet *dev, u32 new_tx)
else
min = ctx->max_datagram_size + ctx->max_ndp_size + sizeof(struct usb_cdc_ncm_nth32);
- max = min_t(u32, CDC_NCM_NTB_MAX_SIZE_TX, le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize));
- if (max == 0)
+ if (le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize) == 0)
max = CDC_NCM_NTB_MAX_SIZE_TX; /* dwNtbOutMaxSize not set */
+ else
+ max = clamp_t(u32, le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize),
+ USB_CDC_NCM_NTB_MIN_OUT_SIZE,
+ CDC_NCM_NTB_MAX_SIZE_TX);
/* some devices set dwNtbOutMaxSize too low for the above default */
min = min(min, max);
@@ -1244,6 +1247,9 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
* further.
*/
if (skb_out == NULL) {
+ /* If even the smallest allocation fails, abort. */
+ if (ctx->tx_curr_size == USB_CDC_NCM_NTB_MIN_OUT_SIZE)
+ goto alloc_failed;
ctx->tx_low_mem_max_cnt = min(ctx->tx_low_mem_max_cnt + 1,
(unsigned)CDC_NCM_LOW_MEM_MAX_CNT);
ctx->tx_low_mem_val = ctx->tx_low_mem_max_cnt;
@@ -1262,13 +1268,8 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
skb_out = alloc_skb(ctx->tx_curr_size, GFP_ATOMIC);
/* No allocation possible so we will abort */
- if (skb_out == NULL) {
- if (skb != NULL) {
- dev_kfree_skb_any(skb);
- dev->net->stats.tx_dropped++;
- }
- goto exit_no_skb;
- }
+ if (!skb_out)
+ goto alloc_failed;
ctx->tx_low_mem_val--;
}
if (ctx->is_ndp16) {
@@ -1461,6 +1462,11 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
return skb_out;
+alloc_failed:
+ if (skb) {
+ dev_kfree_skb_any(skb);
+ dev->net->stats.tx_dropped++;
+ }
exit_no_skb:
/* Start timer, if there is a remaining non-empty skb */
if (ctx->tx_curr_skb != NULL && n > 0)
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 571e37e67f9c..2e7c7b0cdc54 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1220,7 +1220,9 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x05c6, 0x9080, 8)},
{QMI_FIXED_INTF(0x05c6, 0x9083, 3)},
{QMI_FIXED_INTF(0x05c6, 0x9084, 4)},
+ {QMI_QUIRK_SET_DTR(0x05c6, 0x9091, 2)}, /* Compal RXM-G1 */
{QMI_FIXED_INTF(0x05c6, 0x90b2, 3)}, /* ublox R410M */
+ {QMI_QUIRK_SET_DTR(0x05c6, 0x90db, 2)}, /* Compal RXM-G1 */
{QMI_FIXED_INTF(0x05c6, 0x920d, 0)},
{QMI_FIXED_INTF(0x05c6, 0x920d, 5)},
{QMI_QUIRK_SET_DTR(0x05c6, 0x9625, 4)}, /* YUGA CLM920-NC5 */
@@ -1325,7 +1327,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x2001, 0x7e3d, 4)}, /* D-Link DWM-222 A2 */
{QMI_FIXED_INTF(0x2020, 0x2031, 4)}, /* Olicard 600 */
{QMI_FIXED_INTF(0x2020, 0x2033, 4)}, /* BroadMobi BM806U */
- {QMI_FIXED_INTF(0x2020, 0x2060, 4)}, /* BroadMobi BM818 */
+ {QMI_QUIRK_SET_DTR(0x2020, 0x2060, 4)}, /* BroadMobi BM818 */
{QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)}, /* Sierra Wireless MC7700 */
{QMI_FIXED_INTF(0x114f, 0x68a2, 8)}, /* Sierra Wireless MC7750 */
{QMI_FIXED_INTF(0x1199, 0x68a2, 8)}, /* Sierra Wireless MC7710 in QMI mode */
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index a12ae26db0e2..486b5849033d 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -205,6 +205,8 @@ struct control_buf {
__virtio16 vid;
__virtio64 offloads;
struct virtio_net_ctrl_rss rss;
+ struct virtio_net_ctrl_coal_tx coal_tx;
+ struct virtio_net_ctrl_coal_rx coal_rx;
};
struct virtnet_info {
@@ -1868,6 +1870,38 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
return received;
}
+static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index)
+{
+ virtnet_napi_tx_disable(&vi->sq[qp_index].napi);
+ napi_disable(&vi->rq[qp_index].napi);
+ xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
+}
+
+static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
+{
+ struct net_device *dev = vi->dev;
+ int err;
+
+ err = xdp_rxq_info_reg(&vi->rq[qp_index].xdp_rxq, dev, qp_index,
+ vi->rq[qp_index].napi.napi_id);
+ if (err < 0)
+ return err;
+
+ err = xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq,
+ MEM_TYPE_PAGE_SHARED, NULL);
+ if (err < 0)
+ goto err_xdp_reg_mem_model;
+
+ virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi);
+ virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi);
+
+ return 0;
+
+err_xdp_reg_mem_model:
+ xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
+ return err;
+}
+
static int virtnet_open(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
@@ -1881,22 +1915,20 @@ static int virtnet_open(struct net_device *dev)
if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
schedule_delayed_work(&vi->refill, 0);
- err = xdp_rxq_info_reg(&vi->rq[i].xdp_rxq, dev, i, vi->rq[i].napi.napi_id);
+ err = virtnet_enable_queue_pair(vi, i);
if (err < 0)
- return err;
-
- err = xdp_rxq_info_reg_mem_model(&vi->rq[i].xdp_rxq,
- MEM_TYPE_PAGE_SHARED, NULL);
- if (err < 0) {
- xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
- return err;
- }
-
- virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
- virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi);
+ goto err_enable_qp;
}
return 0;
+
+err_enable_qp:
+ disable_delayed_refill(vi);
+ cancel_delayed_work_sync(&vi->refill);
+
+ for (i--; i >= 0; i--)
+ virtnet_disable_queue_pair(vi, i);
+ return err;
}
static int virtnet_poll_tx(struct napi_struct *napi, int budget)
@@ -2305,11 +2337,8 @@ static int virtnet_close(struct net_device *dev)
/* Make sure refill_work doesn't re-enable napi! */
cancel_delayed_work_sync(&vi->refill);
- for (i = 0; i < vi->max_queue_pairs; i++) {
- virtnet_napi_tx_disable(&vi->sq[i].napi);
- napi_disable(&vi->rq[i].napi);
- xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
- }
+ for (i = 0; i < vi->max_queue_pairs; i++)
+ virtnet_disable_queue_pair(vi, i);
return 0;
}
@@ -2907,12 +2936,10 @@ static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi,
struct ethtool_coalesce *ec)
{
struct scatterlist sgs_tx, sgs_rx;
- struct virtio_net_ctrl_coal_tx coal_tx;
- struct virtio_net_ctrl_coal_rx coal_rx;
- coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs);
- coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames);
- sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx));
+ vi->ctrl->coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs);
+ vi->ctrl->coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames);
+ sg_init_one(&sgs_tx, &vi->ctrl->coal_tx, sizeof(vi->ctrl->coal_tx));
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
VIRTIO_NET_CTRL_NOTF_COAL_TX_SET,
@@ -2923,9 +2950,9 @@ static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi,
vi->tx_usecs = ec->tx_coalesce_usecs;
vi->tx_max_packets = ec->tx_max_coalesced_frames;
- coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs);
- coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames);
- sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx));
+ vi->ctrl->coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs);
+ vi->ctrl->coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames);
+ sg_init_one(&sgs_rx, &vi->ctrl->coal_rx, sizeof(vi->ctrl->coal_rx));
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c
index d62a904d2e42..56326f38fe8a 100644
--- a/drivers/net/wan/lapbether.c
+++ b/drivers/net/wan/lapbether.c
@@ -384,6 +384,9 @@ static int lapbeth_new_device(struct net_device *dev)
ASSERT_RTNL();
+ if (dev->type != ARPHRD_ETHER)
+ return -EINVAL;
+
ndev = alloc_netdev(sizeof(*lapbeth), "lapb%d", NET_NAME_UNKNOWN,
lapbeth_setup);
if (!ndev)
diff --git a/drivers/net/wireless/broadcom/b43/b43.h b/drivers/net/wireless/broadcom/b43/b43.h
index 9fc7c088a539..67b4bac048e5 100644
--- a/drivers/net/wireless/broadcom/b43/b43.h
+++ b/drivers/net/wireless/broadcom/b43/b43.h
@@ -651,7 +651,7 @@ struct b43_iv {
union {
__be16 d16;
__be32 d32;
- } data __packed;
+ } __packed data;
} __packed;
diff --git a/drivers/net/wireless/broadcom/b43legacy/b43legacy.h b/drivers/net/wireless/broadcom/b43legacy/b43legacy.h
index 6b0cec467938..f49365d14619 100644
--- a/drivers/net/wireless/broadcom/b43legacy/b43legacy.h
+++ b/drivers/net/wireless/broadcom/b43legacy/b43legacy.h
@@ -379,7 +379,7 @@ struct b43legacy_iv {
union {
__be16 d16;
__be32 d32;
- } data __packed;
+ } __packed data;
} __packed;
#define B43legacy_PHYMODE(phytype) (1 << (phytype))
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
index ff710b0b5071..00679a990e3d 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
@@ -1039,6 +1039,11 @@ static int brcmf_ops_sdio_probe(struct sdio_func *func,
struct brcmf_sdio_dev *sdiodev;
struct brcmf_bus *bus_if;
+ if (!id) {
+ dev_err(&func->dev, "Error no sdio_device_id passed for %x:%x\n", func->vendor, func->device);
+ return -ENODEV;
+ }
+
brcmf_dbg(SDIO, "Enter\n");
brcmf_dbg(SDIO, "Class=%x\n", func->class);
brcmf_dbg(SDIO, "sdio vendor ID: 0x%04x\n", func->vendor);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
index 59f3e9c5e139..80220685f5e4 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
@@ -2394,6 +2394,9 @@ static void brcmf_pcie_debugfs_create(struct device *dev)
}
#endif
+/* Forward declaration for pci_match_id() call */
+static const struct pci_device_id brcmf_pcie_devid_table[];
+
static int
brcmf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
@@ -2404,6 +2407,14 @@ brcmf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
struct brcmf_core *core;
struct brcmf_bus *bus;
+ if (!id) {
+ id = pci_match_id(brcmf_pcie_devid_table, pdev);
+ if (!id) {
+ pci_err(pdev, "Error could not find pci_device_id for %x:%x\n", pdev->vendor, pdev->device);
+ return -ENODEV;
+ }
+ }
+
brcmf_dbg(PCIE, "Enter %x:%x\n", pdev->vendor, pdev->device);
ret = -ENOMEM;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
index 246843aeb696..2178675ae1a4 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
@@ -1331,6 +1331,9 @@ brcmf_usb_disconnect_cb(struct brcmf_usbdev_info *devinfo)
brcmf_usb_detach(devinfo);
}
+/* Forward declaration for usb_match_id() call */
+static const struct usb_device_id brcmf_usb_devid_table[];
+
static int
brcmf_usb_probe(struct usb_interface *intf, const struct usb_device_id *id)
{
@@ -1342,6 +1345,14 @@ brcmf_usb_probe(struct usb_interface *intf, const struct usb_device_id *id)
u32 num_of_eps;
u8 endpoint_num, ep;
+ if (!id) {
+ id = usb_match_id(intf, brcmf_usb_devid_table);
+ if (!id) {
+ dev_err(&intf->dev, "Error could not find matching usb_device_id\n");
+ return -ENODEV;
+ }
+ }
+
brcmf_dbg(USB, "Enter 0x%04x:0x%04x\n", id->idVendor, id->idProduct);
devinfo = kzalloc(sizeof(*devinfo), GFP_ATOMIC);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
index 5f4a51310add..cb9181f05501 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
@@ -38,7 +38,7 @@ static const struct dmi_system_id dmi_ppag_approved_list[] = {
},
{ .ident = "ASUS",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTek COMPUTER INC."),
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
},
},
{}
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index d9faaae01abd..55219974b92b 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -1664,14 +1664,10 @@ static __le32 iwl_get_mon_reg(struct iwl_fw_runtime *fwrt, u32 alloc_id,
}
static void *
-iwl_dump_ini_mon_fill_header(struct iwl_fw_runtime *fwrt,
- struct iwl_dump_ini_region_data *reg_data,
+iwl_dump_ini_mon_fill_header(struct iwl_fw_runtime *fwrt, u32 alloc_id,
struct iwl_fw_ini_monitor_dump *data,
const struct iwl_fw_mon_regs *addrs)
{
- struct iwl_fw_ini_region_tlv *reg = (void *)reg_data->reg_tlv->data;
- u32 alloc_id = le32_to_cpu(reg->dram_alloc_id);
-
if (!iwl_trans_grab_nic_access(fwrt->trans)) {
IWL_ERR(fwrt, "Failed to get monitor header\n");
return NULL;
@@ -1702,8 +1698,10 @@ iwl_dump_ini_mon_dram_fill_header(struct iwl_fw_runtime *fwrt,
void *data, u32 data_len)
{
struct iwl_fw_ini_monitor_dump *mon_dump = (void *)data;
+ struct iwl_fw_ini_region_tlv *reg = (void *)reg_data->reg_tlv->data;
+ u32 alloc_id = le32_to_cpu(reg->dram_alloc_id);
- return iwl_dump_ini_mon_fill_header(fwrt, reg_data, mon_dump,
+ return iwl_dump_ini_mon_fill_header(fwrt, alloc_id, mon_dump,
&fwrt->trans->cfg->mon_dram_regs);
}
@@ -1713,8 +1711,10 @@ iwl_dump_ini_mon_smem_fill_header(struct iwl_fw_runtime *fwrt,
void *data, u32 data_len)
{
struct iwl_fw_ini_monitor_dump *mon_dump = (void *)data;
+ struct iwl_fw_ini_region_tlv *reg = (void *)reg_data->reg_tlv->data;
+ u32 alloc_id = le32_to_cpu(reg->internal_buffer.alloc_id);
- return iwl_dump_ini_mon_fill_header(fwrt, reg_data, mon_dump,
+ return iwl_dump_ini_mon_fill_header(fwrt, alloc_id, mon_dump,
&fwrt->trans->cfg->mon_smem_regs);
}
@@ -1725,7 +1725,10 @@ iwl_dump_ini_mon_dbgi_fill_header(struct iwl_fw_runtime *fwrt,
{
struct iwl_fw_ini_monitor_dump *mon_dump = (void *)data;
- return iwl_dump_ini_mon_fill_header(fwrt, reg_data, mon_dump,
+ return iwl_dump_ini_mon_fill_header(fwrt,
+ /* no offset calculation later */
+ IWL_FW_INI_ALLOCATION_ID_DBGC1,
+ mon_dump,
&fwrt->trans->cfg->mon_dbgi_regs);
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
index 37aa4676dc94..6d1007f24b4a 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -2732,17 +2732,13 @@ static bool iwl_mvm_wait_d3_notif(struct iwl_notif_wait_data *notif_wait,
if (wowlan_info_ver < 2) {
struct iwl_wowlan_info_notif_v1 *notif_v1 = (void *)pkt->data;
- notif = kmemdup(notif_v1,
- offsetofend(struct iwl_wowlan_info_notif,
- received_beacons),
- GFP_ATOMIC);
-
+ notif = kmemdup(notif_v1, sizeof(*notif), GFP_ATOMIC);
if (!notif)
return false;
notif->tid_tear_down = notif_v1->tid_tear_down;
notif->station_id = notif_v1->station_id;
-
+ memset_after(notif, 0, station_id);
} else {
notif = (void *)pkt->data;
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
index 3963a0d4ed04..652a603c4500 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
@@ -526,6 +526,11 @@ iwl_mvm_ftm_put_target(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
rcu_read_lock();
sta = rcu_dereference(mvm->fw_id_to_mac_id[mvmvif->deflink.ap_sta_id]);
+ if (WARN_ON_ONCE(IS_ERR_OR_NULL(sta))) {
+ rcu_read_unlock();
+ return PTR_ERR_OR_ZERO(sta);
+ }
+
if (sta->mfp && (peer->ftm.trigger_based || peer->ftm.non_trigger_based))
FTM_PUT_FLAG(PMF);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index b35c96cf7ad2..205c09bc9863 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -1091,7 +1091,7 @@ static const struct dmi_system_id dmi_tas_approved_list[] = {
},
{ .ident = "LENOVO",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Lenovo"),
+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
},
},
{ .ident = "DELL",
@@ -1727,8 +1727,7 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
iwl_mvm_tas_init(mvm);
iwl_mvm_leds_sync(mvm);
- if (fw_has_capa(&mvm->fw->ucode_capa,
- IWL_UCODE_TLV_CAPA_RFIM_SUPPORT)) {
+ if (iwl_rfi_supported(mvm)) {
if (iwl_mvm_eval_dsm_rfi(mvm) == DSM_VALUE_RFI_ENABLE)
iwl_rfi_send_config_cmd(mvm, NULL);
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/link.c b/drivers/net/wireless/intel/iwlwifi/mvm/link.c
index eb828de40a3c..3814915cb1a6 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/link.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/link.c
@@ -123,11 +123,13 @@ int iwl_mvm_link_changed(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
if (mvmvif->link[i]->phy_ctxt)
count++;
- /* FIXME: IWL_MVM_FW_MAX_ACTIVE_LINKS_NUM should be
- * defined per HW
- */
- if (count >= IWL_MVM_FW_MAX_ACTIVE_LINKS_NUM)
- return -EINVAL;
+ if (vif->type == NL80211_IFTYPE_AP) {
+ if (count > mvm->fw->ucode_capa.num_beacons)
+ return -EOPNOTSUPP;
+ /* this should be per HW or such */
+ } else if (count >= IWL_MVM_FW_MAX_ACTIVE_LINKS_NUM) {
+ return -EOPNOTSUPP;
+ }
}
/* Catch early if driver tries to activate or deactivate a link
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 0f01b62357c6..17f788a5ff6b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2012-2014, 2018-2022 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2023 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2016-2017 Intel Deutschland GmbH
*/
@@ -3607,7 +3607,8 @@ static bool iwl_mvm_vif_conf_from_sta(struct iwl_mvm *mvm,
struct ieee80211_vif *vif,
struct ieee80211_sta *sta)
{
- unsigned int i;
+ struct ieee80211_link_sta *link_sta;
+ unsigned int link_id;
/* Beacon interval check - firmware will crash if the beacon
* interval is less than 16. We can't avoid connecting at all,
@@ -3616,14 +3617,11 @@ static bool iwl_mvm_vif_conf_from_sta(struct iwl_mvm *mvm,
* wpa_s will blocklist the AP...
*/
- for_each_set_bit(i, (unsigned long *)&sta->valid_links,
- IEEE80211_MLD_MAX_NUM_LINKS) {
- struct ieee80211_link_sta *link_sta =
- link_sta_dereference_protected(sta, i);
+ for_each_sta_active_link(vif, sta, link_sta, link_id) {
struct ieee80211_bss_conf *link_conf =
- link_conf_dereference_protected(vif, i);
+ link_conf_dereference_protected(vif, link_id);
- if (!link_conf || !link_sta)
+ if (!link_conf)
continue;
if (link_conf->beacon_int < IWL_MVM_MIN_BEACON_INTERVAL_TU) {
@@ -3645,24 +3643,23 @@ static void iwl_mvm_vif_set_he_support(struct ieee80211_hw *hw,
bool is_sta)
{
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
- unsigned int i;
+ struct ieee80211_link_sta *link_sta;
+ unsigned int link_id;
- for_each_set_bit(i, (unsigned long *)&sta->valid_links,
- IEEE80211_MLD_MAX_NUM_LINKS) {
- struct ieee80211_link_sta *link_sta =
- link_sta_dereference_protected(sta, i);
+ for_each_sta_active_link(vif, sta, link_sta, link_id) {
struct ieee80211_bss_conf *link_conf =
- link_conf_dereference_protected(vif, i);
+ link_conf_dereference_protected(vif, link_id);
- if (!link_conf || !link_sta || !mvmvif->link[i])
+ if (!link_conf || !mvmvif->link[link_id])
continue;
link_conf->he_support = link_sta->he_cap.has_he;
if (is_sta) {
- mvmvif->link[i]->he_ru_2mhz_block = false;
+ mvmvif->link[link_id]->he_ru_2mhz_block = false;
if (link_sta->he_cap.has_he)
- iwl_mvm_check_he_obss_narrow_bw_ru(hw, vif, i,
+ iwl_mvm_check_he_obss_narrow_bw_ru(hw, vif,
+ link_id,
link_conf);
}
}
@@ -3675,6 +3672,7 @@ iwl_mvm_sta_state_notexist_to_none(struct iwl_mvm *mvm,
struct iwl_mvm_sta_state_ops *callbacks)
{
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+ struct ieee80211_link_sta *link_sta;
unsigned int i;
int ret;
@@ -3699,15 +3697,9 @@ iwl_mvm_sta_state_notexist_to_none(struct iwl_mvm *mvm,
NL80211_TDLS_SETUP);
}
- for (i = 0; i < ARRAY_SIZE(sta->link); i++) {
- struct ieee80211_link_sta *link_sta;
-
- link_sta = link_sta_dereference_protected(sta, i);
- if (!link_sta)
- continue;
-
+ for_each_sta_active_link(vif, sta, link_sta, i)
link_sta->agg.max_rc_amsdu_len = 1;
- }
+
ieee80211_sta_recalc_aggregates(sta);
if (vif->type == NL80211_IFTYPE_STATION && !sta->tdls)
@@ -3725,7 +3717,8 @@ iwl_mvm_sta_state_auth_to_assoc(struct ieee80211_hw *hw,
{
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
struct iwl_mvm_sta *mvm_sta = iwl_mvm_sta_from_mac80211(sta);
- unsigned int i;
+ struct ieee80211_link_sta *link_sta;
+ unsigned int link_id;
lockdep_assert_held(&mvm->mutex);
@@ -3751,14 +3744,13 @@ iwl_mvm_sta_state_auth_to_assoc(struct ieee80211_hw *hw,
if (!mvm->mld_api_is_used)
goto out;
- for_each_set_bit(i, (unsigned long *)&sta->valid_links,
- IEEE80211_MLD_MAX_NUM_LINKS) {
+ for_each_sta_active_link(vif, sta, link_sta, link_id) {
struct ieee80211_bss_conf *link_conf =
- link_conf_dereference_protected(vif, i);
+ link_conf_dereference_protected(vif, link_id);
if (WARN_ON(!link_conf))
return -EINVAL;
- if (!mvmvif->link[i])
+ if (!mvmvif->link[link_id])
continue;
iwl_mvm_link_changed(mvm, vif, link_conf,
@@ -3889,6 +3881,9 @@ int iwl_mvm_mac_sta_state_common(struct ieee80211_hw *hw,
* from the AP now.
*/
iwl_mvm_reset_cca_40mhz_workaround(mvm, vif);
+
+ /* Also free dup data just in case any assertions below fail */
+ kfree(mvm_sta->dup_data);
}
mutex_lock(&mvm->mutex);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c
index fbc2d5ed1006..7fb66c570959 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c
@@ -906,11 +906,12 @@ iwl_mvm_mld_change_vif_links(struct ieee80211_hw *hw,
n_active++;
}
- if (vif->type == NL80211_IFTYPE_AP &&
- n_active > mvm->fw->ucode_capa.num_beacons)
- return -EOPNOTSUPP;
- else if (n_active > 1)
+ if (vif->type == NL80211_IFTYPE_AP) {
+ if (n_active > mvm->fw->ucode_capa.num_beacons)
+ return -EOPNOTSUPP;
+ } else if (n_active > 1) {
return -EOPNOTSUPP;
+ }
}
for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c
index 0bfdf4462755..85a4ce8449ad 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c
@@ -667,15 +667,15 @@ int iwl_mvm_mld_add_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
ret = iwl_mvm_mld_alloc_sta_links(mvm, vif, sta);
if (ret)
return ret;
- }
- spin_lock_init(&mvm_sta->lock);
+ spin_lock_init(&mvm_sta->lock);
- if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status))
- ret = iwl_mvm_alloc_sta_after_restart(mvm, vif, sta);
- else
ret = iwl_mvm_sta_init(mvm, vif, sta, IWL_MVM_INVALID_STA,
STATION_TYPE_PEER);
+ } else {
+ ret = iwl_mvm_alloc_sta_after_restart(mvm, vif, sta);
+ }
+
if (ret)
goto err;
@@ -728,7 +728,7 @@ int iwl_mvm_mld_update_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
struct iwl_mvm_sta *mvm_sta = iwl_mvm_sta_from_mac80211(sta);
struct ieee80211_link_sta *link_sta;
unsigned int link_id;
- int ret = 0;
+ int ret = -EINVAL;
lockdep_assert_held(&mvm->mutex);
@@ -791,8 +791,6 @@ int iwl_mvm_mld_rm_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
lockdep_assert_held(&mvm->mutex);
- kfree(mvm_sta->dup_data);
-
/* flush its queues here since we are freeing mvm_sta */
for_each_sta_active_link(vif, sta, link_sta, link_id) {
struct iwl_mvm_link_sta *mvm_link_sta =
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index 6e7470d3a826..9e5008e0e47f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -2347,6 +2347,7 @@ int iwl_mvm_mld_update_sta_keys(struct iwl_mvm *mvm,
u32 old_sta_mask,
u32 new_sta_mask);
+bool iwl_rfi_supported(struct iwl_mvm *mvm);
int iwl_rfi_send_config_cmd(struct iwl_mvm *mvm,
struct iwl_rfi_lut_entry *rfi_table);
struct iwl_rfi_freq_table_resp_cmd *iwl_rfi_get_freq_table(struct iwl_mvm *mvm);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
index 6d18a1fd649b..fdf60afb0f3f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
@@ -445,6 +445,11 @@ iwl_mvm_update_mcc(struct iwl_mvm *mvm, const char *alpha2,
struct iwl_mcc_update_resp *mcc_resp = (void *)pkt->data;
n_channels = __le32_to_cpu(mcc_resp->n_channels);
+ if (iwl_rx_packet_payload_len(pkt) !=
+ struct_size(mcc_resp, channels, n_channels)) {
+ resp_cp = ERR_PTR(-EINVAL);
+ goto exit;
+ }
resp_len = sizeof(struct iwl_mcc_update_resp) +
n_channels * sizeof(__le32);
resp_cp = kmemdup(mcc_resp, resp_len, GFP_KERNEL);
@@ -456,6 +461,11 @@ iwl_mvm_update_mcc(struct iwl_mvm *mvm, const char *alpha2,
struct iwl_mcc_update_resp_v3 *mcc_resp_v3 = (void *)pkt->data;
n_channels = __le32_to_cpu(mcc_resp_v3->n_channels);
+ if (iwl_rx_packet_payload_len(pkt) !=
+ struct_size(mcc_resp_v3, channels, n_channels)) {
+ resp_cp = ERR_PTR(-EINVAL);
+ goto exit;
+ }
resp_len = sizeof(struct iwl_mcc_update_resp) +
n_channels * sizeof(__le32);
resp_cp = kzalloc(resp_len, GFP_KERNEL);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c
index bb77bc9aa821..2ecd32bed752 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2020 - 2021 Intel Corporation
+ * Copyright (C) 2020 - 2022 Intel Corporation
*/
#include "mvm.h"
@@ -70,6 +70,16 @@ static const struct iwl_rfi_lut_entry iwl_rfi_table[IWL_RFI_LUT_SIZE] = {
PHY_BAND_6, PHY_BAND_6,}},
};
+bool iwl_rfi_supported(struct iwl_mvm *mvm)
+{
+ /* The feature depends on a platform bugfix, so for now
+ * it's always disabled.
+ * When the platform support detection is implemented we should
+ * check FW TLV and platform support instead.
+ */
+ return false;
+}
+
int iwl_rfi_send_config_cmd(struct iwl_mvm *mvm, struct iwl_rfi_lut_entry *rfi_table)
{
int ret;
@@ -81,7 +91,7 @@ int iwl_rfi_send_config_cmd(struct iwl_mvm *mvm, struct iwl_rfi_lut_entry *rfi_t
.len[0] = sizeof(cmd),
};
- if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_RFIM_SUPPORT))
+ if (!iwl_rfi_supported(mvm))
return -EOPNOTSUPP;
lockdep_assert_held(&mvm->mutex);
@@ -113,7 +123,7 @@ struct iwl_rfi_freq_table_resp_cmd *iwl_rfi_get_freq_table(struct iwl_mvm *mvm)
.flags = CMD_WANT_SKB,
};
- if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_RFIM_SUPPORT))
+ if (!iwl_rfi_supported(mvm))
return ERR_PTR(-EOPNOTSUPP);
mutex_lock(&mvm->mutex);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
index a4c1e3bf4ff1..9a20468345e4 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
@@ -2691,6 +2691,8 @@ static void rs_drv_get_rate(void *mvm_r, struct ieee80211_sta *sta,
return;
lq_sta = mvm_sta;
+
+ spin_lock_bh(&lq_sta->pers.lock);
iwl_mvm_hwrate_to_tx_rate_v1(lq_sta->last_rate_n_flags,
info->band, &info->control.rates[0]);
info->control.rates[0].count = 1;
@@ -2705,6 +2707,7 @@ static void rs_drv_get_rate(void *mvm_r, struct ieee80211_sta *sta,
iwl_mvm_hwrate_to_tx_rate_v1(last_ucode_rate, info->band,
&txrc->reported_rate);
}
+ spin_unlock_bh(&lq_sta->pers.lock);
}
static void *rs_drv_alloc_sta(void *mvm_rate, struct ieee80211_sta *sta,
@@ -3261,11 +3264,11 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
/* If it's locked we are in middle of init flow
* just wait for next tx status to update the lq_sta data
*/
- if (!spin_trylock(&mvmsta->deflink.lq_sta.rs_drv.pers.lock))
+ if (!spin_trylock_bh(&mvmsta->deflink.lq_sta.rs_drv.pers.lock))
return;
__iwl_mvm_rs_tx_status(mvm, sta, tid, info, ndp);
- spin_unlock(&mvmsta->deflink.lq_sta.rs_drv.pers.lock);
+ spin_unlock_bh(&mvmsta->deflink.lq_sta.rs_drv.pers.lock);
}
#ifdef CONFIG_MAC80211_DEBUGFS
@@ -4114,9 +4117,9 @@ void iwl_mvm_rs_rate_init(struct iwl_mvm *mvm,
} else {
struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
- spin_lock(&mvmsta->deflink.lq_sta.rs_drv.pers.lock);
+ spin_lock_bh(&mvmsta->deflink.lq_sta.rs_drv.pers.lock);
rs_drv_rate_init(mvm, sta, band);
- spin_unlock(&mvmsta->deflink.lq_sta.rs_drv.pers.lock);
+ spin_unlock_bh(&mvmsta->deflink.lq_sta.rs_drv.pers.lock);
}
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index e1d02c260e69..6226e4e54a51 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -691,6 +691,11 @@ void iwl_mvm_reorder_timer_expired(struct timer_list *t)
rcu_read_lock();
sta = rcu_dereference(buf->mvm->fw_id_to_mac_id[sta_id]);
+ if (WARN_ON_ONCE(IS_ERR_OR_NULL(sta))) {
+ rcu_read_unlock();
+ goto out;
+ }
+
mvmsta = iwl_mvm_sta_from_mac80211(sta);
/* SN is set to the last expired frame + 1 */
@@ -712,6 +717,8 @@ void iwl_mvm_reorder_timer_expired(struct timer_list *t)
entries[index].e.reorder_time +
1 + RX_REORDER_BUF_TIMEOUT_MQ);
}
+
+out:
spin_unlock(&buf->lock);
}
@@ -2512,7 +2519,7 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
RCU_INIT_POINTER(mvm->csa_tx_blocked_vif, NULL);
/* Unblock BCAST / MCAST station */
iwl_mvm_modify_all_sta_disable_tx(mvm, mvmvif, false);
- cancel_delayed_work_sync(&mvm->cs_tx_unblock_dwork);
+ cancel_delayed_work(&mvm->cs_tx_unblock_dwork);
}
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index 5469d634e289..05a54a69c135 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -281,7 +281,7 @@ static void iwl_mvm_rx_agg_session_expired(struct timer_list *t)
* A-MDPU and hence the timer continues to run. Then, the
* timer expires and sta is NULL.
*/
- if (!sta)
+ if (IS_ERR_OR_NULL(sta))
goto unlock;
mvm_sta = iwl_mvm_sta_from_mac80211(sta);
@@ -2089,9 +2089,6 @@ int iwl_mvm_rm_sta(struct iwl_mvm *mvm,
lockdep_assert_held(&mvm->mutex);
- if (iwl_mvm_has_new_rx_api(mvm))
- kfree(mvm_sta->dup_data);
-
ret = iwl_mvm_drain_sta(mvm, mvm_sta, true);
if (ret)
return ret;
@@ -3785,6 +3782,9 @@ static inline u8 *iwl_mvm_get_mac_addr(struct iwl_mvm *mvm,
u8 sta_id = mvmvif->deflink.ap_sta_id;
sta = rcu_dereference_protected(mvm->fw_id_to_mac_id[sta_id],
lockdep_is_held(&mvm->mutex));
+ if (WARN_ON_ONCE(IS_ERR_OR_NULL(sta)))
+ return NULL;
+
return sta->addr;
}
@@ -3822,6 +3822,11 @@ static int __iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
if (keyconf->cipher == WLAN_CIPHER_SUITE_TKIP) {
addr = iwl_mvm_get_mac_addr(mvm, vif, sta);
+ if (!addr) {
+ IWL_ERR(mvm, "Failed to find mac address\n");
+ return -EINVAL;
+ }
+
/* get phase 1 key from mac80211 */
ieee80211_get_key_rx_seq(keyconf, 0, &seq);
ieee80211_get_tkip_rx_p1k(keyconf, addr, seq.tkip.iv32, p1k);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index 10d7178f1071..00719e130438 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -1875,7 +1875,7 @@ static void iwl_mvm_rx_tx_cmd_agg(struct iwl_mvm *mvm,
mvmsta = iwl_mvm_sta_from_staid_rcu(mvm, sta_id);
sta = rcu_dereference(mvm->fw_id_to_mac_id[sta_id]);
- if (WARN_ON_ONCE(!sta || !sta->wme)) {
+ if (WARN_ON_ONCE(IS_ERR_OR_NULL(sta) || !sta->wme)) {
rcu_read_unlock();
return;
}
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index dba112394838..79115eb1c285 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -548,6 +548,8 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
IWL_DEV_INFO(0x54F0, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name),
IWL_DEV_INFO(0x7A70, 0x1691, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690s_name),
IWL_DEV_INFO(0x7A70, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name),
+ IWL_DEV_INFO(0x7AF0, 0x1691, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690s_name),
+ IWL_DEV_INFO(0x7AF0, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name),
IWL_DEV_INFO(0x271C, 0x0214, iwl9260_2ac_cfg, iwl9260_1_name),
IWL_DEV_INFO(0x7E40, 0x1691, iwl_cfg_ma_a0_gf4_a0, iwl_ax411_killer_1690s_name),
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index da1d17b73a25..64002484ccad 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -914,7 +914,10 @@ void mt7615_mac_sta_poll(struct mt7615_dev *dev)
msta = list_first_entry(&sta_poll_list, struct mt7615_sta,
poll_list);
+
+ spin_lock_bh(&dev->sta_poll_lock);
list_del_init(&msta->poll_list);
+ spin_unlock_bh(&dev->sta_poll_lock);
addr = mt7615_mac_wtbl_addr(dev, msta->wcid.idx) + 19 * 4;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac2_mac.h b/drivers/net/wireless/mediatek/mt76/mt76_connac2_mac.h
index a5ec0f631385..fabf637bdf7f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac2_mac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac2_mac.h
@@ -173,7 +173,7 @@ enum {
#define MT_TXS5_MPDU_TX_CNT GENMASK(31, 23)
#define MT_TXS6_MPDU_FAIL_CNT GENMASK(31, 23)
-
+#define MT_TXS7_MPDU_RETRY_BYTE GENMASK(22, 0)
#define MT_TXS7_MPDU_RETRY_CNT GENMASK(31, 23)
/* RXD DW0 */
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
index ee0fbfcd07d6..d39a3cc5e381 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
@@ -608,7 +608,8 @@ bool mt76_connac2_mac_fill_txs(struct mt76_dev *dev, struct mt76_wcid *wcid,
/* PPDU based reporting */
if (FIELD_GET(MT_TXS0_TXS_FORMAT, txs) > 1) {
stats->tx_bytes +=
- le32_get_bits(txs_data[5], MT_TXS5_MPDU_TX_BYTE);
+ le32_get_bits(txs_data[5], MT_TXS5_MPDU_TX_BYTE) -
+ le32_get_bits(txs_data[7], MT_TXS7_MPDU_RETRY_BYTE);
stats->tx_packets +=
le32_get_bits(txs_data[5], MT_TXS5_MPDU_TX_CNT);
stats->tx_failed +=
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c
index 130eb7b4fd91..9b0f6053e0fa 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c
@@ -1004,10 +1004,10 @@ void mt7996_mac_write_txwi(struct mt7996_dev *dev, __le32 *txwi,
{
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct ieee80211_vif *vif = info->control.vif;
- struct mt7996_vif *mvif = (struct mt7996_vif *)vif->drv_priv;
u8 band_idx = (info->hw_queue & MT_TX_HW_QUEUE_PHY) >> 2;
u8 p_fmt, q_idx, omac_idx = 0, wmm_idx = 0;
bool is_8023 = info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP;
+ struct mt7996_vif *mvif;
u16 tx_count = 15;
u32 val;
bool beacon = !!(changed & (BSS_CHANGED_BEACON |
@@ -1015,7 +1015,8 @@ void mt7996_mac_write_txwi(struct mt7996_dev *dev, __le32 *txwi,
bool inband_disc = !!(changed & (BSS_CHANGED_UNSOL_BCAST_PROBE_RESP |
BSS_CHANGED_FILS_DISCOVERY));
- if (vif) {
+ mvif = vif ? (struct mt7996_vif *)vif->drv_priv : NULL;
+ if (mvif) {
omac_idx = mvif->mt76.omac_idx;
wmm_idx = mvif->mt76.wmm_idx;
band_idx = mvif->mt76.band_idx;
@@ -1081,14 +1082,18 @@ void mt7996_mac_write_txwi(struct mt7996_dev *dev, __le32 *txwi,
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
bool mcast = ieee80211_is_data(hdr->frame_control) &&
is_multicast_ether_addr(hdr->addr1);
- u8 idx = mvif->basic_rates_idx;
+ u8 idx = MT7996_BASIC_RATES_TBL;
- if (mcast && mvif->mcast_rates_idx)
- idx = mvif->mcast_rates_idx;
- else if (beacon && mvif->beacon_rates_idx)
- idx = mvif->beacon_rates_idx;
+ if (mvif) {
+ if (mcast && mvif->mcast_rates_idx)
+ idx = mvif->mcast_rates_idx;
+ else if (beacon && mvif->beacon_rates_idx)
+ idx = mvif->beacon_rates_idx;
+ else
+ idx = mvif->basic_rates_idx;
+ }
- txwi[6] |= FIELD_PREP(MT_TXD6_TX_RATE, idx);
+ txwi[6] |= cpu_to_le32(FIELD_PREP(MT_TXD6_TX_RATE, idx));
txwi[3] |= cpu_to_le32(MT_TXD3_BA_DISABLE);
}
}
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
index 8eafbf1cee71..808c1c895113 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
@@ -1803,6 +1803,7 @@ struct rtl8xxxu_priv {
u32 rege9c;
u32 regeb4;
u32 regebc;
+ u32 regrcr;
int next_mbox;
int nr_out_eps;
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
index fd8c8c6d53d6..831639d73657 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
@@ -4171,6 +4171,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw)
RCR_ACCEPT_MGMT_FRAME | RCR_HTC_LOC_CTRL |
RCR_APPEND_PHYSTAT | RCR_APPEND_ICV | RCR_APPEND_MIC;
rtl8xxxu_write32(priv, REG_RCR, val32);
+ priv->regrcr = val32;
if (fops->init_reg_rxfltmap) {
/* Accept all data frames */
@@ -6501,7 +6502,7 @@ static void rtl8xxxu_configure_filter(struct ieee80211_hw *hw,
unsigned int *total_flags, u64 multicast)
{
struct rtl8xxxu_priv *priv = hw->priv;
- u32 rcr = rtl8xxxu_read32(priv, REG_RCR);
+ u32 rcr = priv->regrcr;
dev_dbg(&priv->udev->dev, "%s: changed_flags %08x, total_flags %08x\n",
__func__, changed_flags, *total_flags);
@@ -6547,6 +6548,7 @@ static void rtl8xxxu_configure_filter(struct ieee80211_hw *hw,
*/
rtl8xxxu_write32(priv, REG_RCR, rcr);
+ priv->regrcr = rcr;
*total_flags &= (FIF_ALLMULTI | FIF_FCSFAIL | FIF_BCN_PRBRESP_PROMISC |
FIF_CONTROL | FIF_OTHER_BSS | FIF_PSPOLL |
diff --git a/drivers/net/wireless/realtek/rtw88/mac80211.c b/drivers/net/wireless/realtek/rtw88/mac80211.c
index 7aa6edad0d01..144618bb94c8 100644
--- a/drivers/net/wireless/realtek/rtw88/mac80211.c
+++ b/drivers/net/wireless/realtek/rtw88/mac80211.c
@@ -88,15 +88,6 @@ static int rtw_ops_config(struct ieee80211_hw *hw, u32 changed)
}
}
- if (changed & IEEE80211_CONF_CHANGE_PS) {
- if (hw->conf.flags & IEEE80211_CONF_PS) {
- rtwdev->ps_enabled = true;
- } else {
- rtwdev->ps_enabled = false;
- rtw_leave_lps(rtwdev);
- }
- }
-
if (changed & IEEE80211_CONF_CHANGE_CHANNEL)
rtw_set_channel(rtwdev);
@@ -213,6 +204,7 @@ static int rtw_ops_add_interface(struct ieee80211_hw *hw,
config |= PORT_SET_BCN_CTRL;
rtw_vif_port_config(rtwdev, rtwvif, config);
rtw_core_port_switch(rtwdev, vif);
+ rtw_recalc_lps(rtwdev, vif);
mutex_unlock(&rtwdev->mutex);
@@ -244,6 +236,7 @@ static void rtw_ops_remove_interface(struct ieee80211_hw *hw,
config |= PORT_SET_BCN_CTRL;
rtw_vif_port_config(rtwdev, rtwvif, config);
clear_bit(rtwvif->port, rtwdev->hw_port);
+ rtw_recalc_lps(rtwdev, NULL);
mutex_unlock(&rtwdev->mutex);
}
@@ -438,6 +431,9 @@ static void rtw_ops_bss_info_changed(struct ieee80211_hw *hw,
if (changed & BSS_CHANGED_ERP_SLOT)
rtw_conf_tx(rtwdev, rtwvif);
+ if (changed & BSS_CHANGED_PS)
+ rtw_recalc_lps(rtwdev, NULL);
+
rtw_vif_port_config(rtwdev, rtwvif, config);
mutex_unlock(&rtwdev->mutex);
@@ -918,7 +914,7 @@ static void rtw_ops_sta_rc_update(struct ieee80211_hw *hw,
struct rtw_sta_info *si = (struct rtw_sta_info *)sta->drv_priv;
if (changed & IEEE80211_RC_BW_CHANGED)
- rtw_update_sta_info(rtwdev, si, true);
+ ieee80211_queue_work(rtwdev->hw, &si->rc_work);
}
const struct ieee80211_ops rtw_ops = {
diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c
index 5bf6b4581557..9447a3aae3b5 100644
--- a/drivers/net/wireless/realtek/rtw88/main.c
+++ b/drivers/net/wireless/realtek/rtw88/main.c
@@ -271,8 +271,8 @@ static void rtw_watch_dog_work(struct work_struct *work)
* more than two stations associated to the AP, then we can not enter
* lps, because fw does not handle the overlapped beacon interval
*
- * mac80211 should iterate vifs and determine if driver can enter
- * ps by passing IEEE80211_CONF_PS to us, all we need to do is to
+ * rtw_recalc_lps() iterate vifs and determine if driver can enter
+ * ps by vif->type and vif->cfg.ps, all we need to do here is to
* get that vif and check if device is having traffic more than the
* threshold.
*/
@@ -319,6 +319,17 @@ static u8 rtw_acquire_macid(struct rtw_dev *rtwdev)
return mac_id;
}
+static void rtw_sta_rc_work(struct work_struct *work)
+{
+ struct rtw_sta_info *si = container_of(work, struct rtw_sta_info,
+ rc_work);
+ struct rtw_dev *rtwdev = si->rtwdev;
+
+ mutex_lock(&rtwdev->mutex);
+ rtw_update_sta_info(rtwdev, si, true);
+ mutex_unlock(&rtwdev->mutex);
+}
+
int rtw_sta_add(struct rtw_dev *rtwdev, struct ieee80211_sta *sta,
struct ieee80211_vif *vif)
{
@@ -329,12 +340,14 @@ int rtw_sta_add(struct rtw_dev *rtwdev, struct ieee80211_sta *sta,
if (si->mac_id >= RTW_MAX_MAC_ID_NUM)
return -ENOSPC;
+ si->rtwdev = rtwdev;
si->sta = sta;
si->vif = vif;
si->init_ra_lv = 1;
ewma_rssi_init(&si->avg_rssi);
for (i = 0; i < ARRAY_SIZE(sta->txq); i++)
rtw_txq_init(rtwdev, sta->txq[i]);
+ INIT_WORK(&si->rc_work, rtw_sta_rc_work);
rtw_update_sta_info(rtwdev, si, true);
rtw_fw_media_status_report(rtwdev, si->mac_id, true);
@@ -353,6 +366,8 @@ void rtw_sta_remove(struct rtw_dev *rtwdev, struct ieee80211_sta *sta,
struct rtw_sta_info *si = (struct rtw_sta_info *)sta->drv_priv;
int i;
+ cancel_work_sync(&si->rc_work);
+
rtw_release_macid(rtwdev, si->mac_id);
if (fw_exist)
rtw_fw_media_status_report(rtwdev, si->mac_id, false);
diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index a563285e90ed..9e841f6991a9 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -743,6 +743,7 @@ struct rtw_txq {
DECLARE_EWMA(rssi, 10, 16);
struct rtw_sta_info {
+ struct rtw_dev *rtwdev;
struct ieee80211_sta *sta;
struct ieee80211_vif *vif;
@@ -767,6 +768,8 @@ struct rtw_sta_info {
bool use_cfg_mask;
struct cfg80211_bitrate_mask *mask;
+
+ struct work_struct rc_work;
};
enum rtw_bfee_role {
diff --git a/drivers/net/wireless/realtek/rtw88/ps.c b/drivers/net/wireless/realtek/rtw88/ps.c
index 996365575f44..53933fb38a33 100644
--- a/drivers/net/wireless/realtek/rtw88/ps.c
+++ b/drivers/net/wireless/realtek/rtw88/ps.c
@@ -299,3 +299,46 @@ void rtw_leave_lps_deep(struct rtw_dev *rtwdev)
__rtw_leave_lps_deep(rtwdev);
}
+
+struct rtw_vif_recalc_lps_iter_data {
+ struct rtw_dev *rtwdev;
+ struct ieee80211_vif *found_vif;
+ int count;
+};
+
+static void __rtw_vif_recalc_lps(struct rtw_vif_recalc_lps_iter_data *data,
+ struct ieee80211_vif *vif)
+{
+ if (data->count < 0)
+ return;
+
+ if (vif->type != NL80211_IFTYPE_STATION) {
+ data->count = -1;
+ return;
+ }
+
+ data->count++;
+ data->found_vif = vif;
+}
+
+static void rtw_vif_recalc_lps_iter(void *data, u8 *mac,
+ struct ieee80211_vif *vif)
+{
+ __rtw_vif_recalc_lps(data, vif);
+}
+
+void rtw_recalc_lps(struct rtw_dev *rtwdev, struct ieee80211_vif *new_vif)
+{
+ struct rtw_vif_recalc_lps_iter_data data = { .rtwdev = rtwdev };
+
+ if (new_vif)
+ __rtw_vif_recalc_lps(&data, new_vif);
+ rtw_iterate_vifs(rtwdev, rtw_vif_recalc_lps_iter, &data);
+
+ if (data.count == 1 && data.found_vif->cfg.ps) {
+ rtwdev->ps_enabled = true;
+ } else {
+ rtwdev->ps_enabled = false;
+ rtw_leave_lps(rtwdev);
+ }
+}
diff --git a/drivers/net/wireless/realtek/rtw88/ps.h b/drivers/net/wireless/realtek/rtw88/ps.h
index c194386f6db5..5ae83d2526cf 100644
--- a/drivers/net/wireless/realtek/rtw88/ps.h
+++ b/drivers/net/wireless/realtek/rtw88/ps.h
@@ -23,4 +23,6 @@ void rtw_enter_lps(struct rtw_dev *rtwdev, u8 port_id);
void rtw_leave_lps(struct rtw_dev *rtwdev);
void rtw_leave_lps_deep(struct rtw_dev *rtwdev);
enum rtw_lps_deep_mode rtw_get_lps_deep_mode(struct rtw_dev *rtwdev);
+void rtw_recalc_lps(struct rtw_dev *rtwdev, struct ieee80211_vif *new_vif);
+
#endif
diff --git a/drivers/net/wireless/realtek/rtw88/sdio.c b/drivers/net/wireless/realtek/rtw88/sdio.c
index af0459a79899..06fce7c3adda 100644
--- a/drivers/net/wireless/realtek/rtw88/sdio.c
+++ b/drivers/net/wireless/realtek/rtw88/sdio.c
@@ -87,11 +87,6 @@ static void rtw_sdio_writew(struct rtw_dev *rtwdev, u16 val, u32 addr,
u8 buf[2];
int i;
- if (rtw_sdio_use_memcpy_io(rtwdev, addr, 2)) {
- sdio_writew(rtwsdio->sdio_func, val, addr, err_ret);
- return;
- }
-
*(__le16 *)buf = cpu_to_le16(val);
for (i = 0; i < 2; i++) {
@@ -125,9 +120,6 @@ static u16 rtw_sdio_readw(struct rtw_dev *rtwdev, u32 addr, int *err_ret)
u8 buf[2];
int i;
- if (rtw_sdio_use_memcpy_io(rtwdev, addr, 2))
- return sdio_readw(rtwsdio->sdio_func, addr, err_ret);
-
for (i = 0; i < 2; i++) {
buf[i] = sdio_readb(rtwsdio->sdio_func, addr + i, err_ret);
if (*err_ret)
diff --git a/drivers/net/wireless/realtek/rtw88/usb.h b/drivers/net/wireless/realtek/rtw88/usb.h
index 30647f0dd61c..ad1d7955c6a5 100644
--- a/drivers/net/wireless/realtek/rtw88/usb.h
+++ b/drivers/net/wireless/realtek/rtw88/usb.h
@@ -78,7 +78,7 @@ struct rtw_usb {
u8 pipe_interrupt;
u8 pipe_in;
u8 out_ep[RTW_USB_EP_MAX];
- u8 qsel_to_ep[TX_DESC_QSEL_MAX];
+ int qsel_to_ep[TX_DESC_QSEL_MAX];
u8 usb_txagg_num;
struct workqueue_struct *txwq, *rxwq;
diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c
index 7fc0a26a4d73..bad864d56bd5 100644
--- a/drivers/net/wireless/realtek/rtw89/core.c
+++ b/drivers/net/wireless/realtek/rtw89/core.c
@@ -2531,9 +2531,6 @@ static void rtw89_vif_enter_lps(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwv
rtwvif->tdls_peer)
return;
- if (rtwdev->total_sta_assoc > 1)
- return;
-
if (rtwvif->offchan)
return;
diff --git a/drivers/net/wireless/realtek/rtw89/mac.c b/drivers/net/wireless/realtek/rtw89/mac.c
index b8019cfc11b2..512de491a064 100644
--- a/drivers/net/wireless/realtek/rtw89/mac.c
+++ b/drivers/net/wireless/realtek/rtw89/mac.c
@@ -1425,6 +1425,8 @@ const struct rtw89_mac_size_set rtw89_mac_size = {
.wde_size4 = {RTW89_WDE_PG_64, 0, 4096,},
/* PCIE 64 */
.wde_size6 = {RTW89_WDE_PG_64, 512, 0,},
+ /* 8852B PCIE SCC */
+ .wde_size7 = {RTW89_WDE_PG_64, 510, 2,},
/* DLFW */
.wde_size9 = {RTW89_WDE_PG_64, 0, 1024,},
/* 8852C DLFW */
@@ -1449,6 +1451,8 @@ const struct rtw89_mac_size_set rtw89_mac_size = {
.wde_qt4 = {0, 0, 0, 0,},
/* PCIE 64 */
.wde_qt6 = {448, 48, 0, 16,},
+ /* 8852B PCIE SCC */
+ .wde_qt7 = {446, 48, 0, 16,},
/* 8852C DLFW */
.wde_qt17 = {0, 0, 0, 0,},
/* 8852C PCIE SCC */
diff --git a/drivers/net/wireless/realtek/rtw89/mac.h b/drivers/net/wireless/realtek/rtw89/mac.h
index a8d9847ef0b4..6ba633ccdd03 100644
--- a/drivers/net/wireless/realtek/rtw89/mac.h
+++ b/drivers/net/wireless/realtek/rtw89/mac.h
@@ -792,6 +792,7 @@ struct rtw89_mac_size_set {
const struct rtw89_dle_size wde_size0;
const struct rtw89_dle_size wde_size4;
const struct rtw89_dle_size wde_size6;
+ const struct rtw89_dle_size wde_size7;
const struct rtw89_dle_size wde_size9;
const struct rtw89_dle_size wde_size18;
const struct rtw89_dle_size wde_size19;
@@ -804,6 +805,7 @@ struct rtw89_mac_size_set {
const struct rtw89_wde_quota wde_qt0;
const struct rtw89_wde_quota wde_qt4;
const struct rtw89_wde_quota wde_qt6;
+ const struct rtw89_wde_quota wde_qt7;
const struct rtw89_wde_quota wde_qt17;
const struct rtw89_wde_quota wde_qt18;
const struct rtw89_ple_quota ple_qt4;
diff --git a/drivers/net/wireless/realtek/rtw89/mac80211.c b/drivers/net/wireless/realtek/rtw89/mac80211.c
index ee4588b61b8f..c42e31069035 100644
--- a/drivers/net/wireless/realtek/rtw89/mac80211.c
+++ b/drivers/net/wireless/realtek/rtw89/mac80211.c
@@ -89,15 +89,6 @@ static int rtw89_ops_config(struct ieee80211_hw *hw, u32 changed)
!(hw->conf.flags & IEEE80211_CONF_IDLE))
rtw89_leave_ips(rtwdev);
- if (changed & IEEE80211_CONF_CHANGE_PS) {
- if (hw->conf.flags & IEEE80211_CONF_PS) {
- rtwdev->lps_enabled = true;
- } else {
- rtw89_leave_lps(rtwdev);
- rtwdev->lps_enabled = false;
- }
- }
-
if (changed & IEEE80211_CONF_CHANGE_CHANNEL) {
rtw89_config_entity_chandef(rtwdev, RTW89_SUB_ENTITY_0,
&hw->conf.chandef);
@@ -168,6 +159,8 @@ static int rtw89_ops_add_interface(struct ieee80211_hw *hw,
rtw89_core_txq_init(rtwdev, vif->txq);
rtw89_btc_ntfy_role_info(rtwdev, rtwvif, NULL, BTC_ROLE_START);
+
+ rtw89_recalc_lps(rtwdev);
out:
mutex_unlock(&rtwdev->mutex);
@@ -192,6 +185,7 @@ static void rtw89_ops_remove_interface(struct ieee80211_hw *hw,
rtw89_mac_remove_vif(rtwdev, rtwvif);
rtw89_core_release_bit_map(rtwdev->hw_port, rtwvif->port);
list_del_init(&rtwvif->list);
+ rtw89_recalc_lps(rtwdev);
rtw89_enter_ips_by_hwflags(rtwdev);
mutex_unlock(&rtwdev->mutex);
@@ -451,6 +445,9 @@ static void rtw89_ops_bss_info_changed(struct ieee80211_hw *hw,
if (changed & BSS_CHANGED_CQM)
rtw89_fw_h2c_set_bcn_fltr_cfg(rtwdev, vif, true);
+ if (changed & BSS_CHANGED_PS)
+ rtw89_recalc_lps(rtwdev);
+
mutex_unlock(&rtwdev->mutex);
}
diff --git a/drivers/net/wireless/realtek/rtw89/ps.c b/drivers/net/wireless/realtek/rtw89/ps.c
index fa94335f699a..84201ef19c17 100644
--- a/drivers/net/wireless/realtek/rtw89/ps.c
+++ b/drivers/net/wireless/realtek/rtw89/ps.c
@@ -252,3 +252,29 @@ void rtw89_process_p2p_ps(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif)
rtw89_p2p_disable_all_noa(rtwdev, vif);
rtw89_p2p_update_noa(rtwdev, vif);
}
+
+void rtw89_recalc_lps(struct rtw89_dev *rtwdev)
+{
+ struct ieee80211_vif *vif, *found_vif = NULL;
+ struct rtw89_vif *rtwvif;
+ int count = 0;
+
+ rtw89_for_each_rtwvif(rtwdev, rtwvif) {
+ vif = rtwvif_to_vif(rtwvif);
+
+ if (vif->type != NL80211_IFTYPE_STATION) {
+ count = 0;
+ break;
+ }
+
+ count++;
+ found_vif = vif;
+ }
+
+ if (count == 1 && found_vif->cfg.ps) {
+ rtwdev->lps_enabled = true;
+ } else {
+ rtw89_leave_lps(rtwdev);
+ rtwdev->lps_enabled = false;
+ }
+}
diff --git a/drivers/net/wireless/realtek/rtw89/ps.h b/drivers/net/wireless/realtek/rtw89/ps.h
index 73c008db0426..4c18f49204b2 100644
--- a/drivers/net/wireless/realtek/rtw89/ps.h
+++ b/drivers/net/wireless/realtek/rtw89/ps.h
@@ -15,6 +15,7 @@ void rtw89_enter_ips(struct rtw89_dev *rtwdev);
void rtw89_leave_ips(struct rtw89_dev *rtwdev);
void rtw89_set_coex_ctrl_lps(struct rtw89_dev *rtwdev, bool btc_ctrl);
void rtw89_process_p2p_ps(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif);
+void rtw89_recalc_lps(struct rtw89_dev *rtwdev);
static inline void rtw89_leave_ips_by_hwflags(struct rtw89_dev *rtwdev)
{
diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852b.c b/drivers/net/wireless/realtek/rtw89/rtw8852b.c
index eaa2ea0586bc..6da1b603a9a9 100644
--- a/drivers/net/wireless/realtek/rtw89/rtw8852b.c
+++ b/drivers/net/wireless/realtek/rtw89/rtw8852b.c
@@ -18,25 +18,25 @@
RTW8852B_FW_BASENAME "-" __stringify(RTW8852B_FW_FORMAT_MAX) ".bin"
static const struct rtw89_hfc_ch_cfg rtw8852b_hfc_chcfg_pcie[] = {
- {5, 343, grp_0}, /* ACH 0 */
- {5, 343, grp_0}, /* ACH 1 */
- {5, 343, grp_0}, /* ACH 2 */
- {5, 343, grp_0}, /* ACH 3 */
+ {5, 341, grp_0}, /* ACH 0 */
+ {5, 341, grp_0}, /* ACH 1 */
+ {4, 342, grp_0}, /* ACH 2 */
+ {4, 342, grp_0}, /* ACH 3 */
{0, 0, grp_0}, /* ACH 4 */
{0, 0, grp_0}, /* ACH 5 */
{0, 0, grp_0}, /* ACH 6 */
{0, 0, grp_0}, /* ACH 7 */
- {4, 344, grp_0}, /* B0MGQ */
- {4, 344, grp_0}, /* B0HIQ */
+ {4, 342, grp_0}, /* B0MGQ */
+ {4, 342, grp_0}, /* B0HIQ */
{0, 0, grp_0}, /* B1MGQ */
{0, 0, grp_0}, /* B1HIQ */
{40, 0, 0} /* FWCMDQ */
};
static const struct rtw89_hfc_pub_cfg rtw8852b_hfc_pubcfg_pcie = {
- 448, /* Group 0 */
+ 446, /* Group 0 */
0, /* Group 1 */
- 448, /* Public Max */
+ 446, /* Public Max */
0 /* WP threshold */
};
@@ -49,13 +49,13 @@ static const struct rtw89_hfc_param_ini rtw8852b_hfc_param_ini_pcie[] = {
};
static const struct rtw89_dle_mem rtw8852b_dle_mem_pcie[] = {
- [RTW89_QTA_SCC] = {RTW89_QTA_SCC, &rtw89_mac_size.wde_size6,
- &rtw89_mac_size.ple_size6, &rtw89_mac_size.wde_qt6,
- &rtw89_mac_size.wde_qt6, &rtw89_mac_size.ple_qt18,
+ [RTW89_QTA_SCC] = {RTW89_QTA_SCC, &rtw89_mac_size.wde_size7,
+ &rtw89_mac_size.ple_size6, &rtw89_mac_size.wde_qt7,
+ &rtw89_mac_size.wde_qt7, &rtw89_mac_size.ple_qt18,
&rtw89_mac_size.ple_qt58},
- [RTW89_QTA_WOW] = {RTW89_QTA_WOW, &rtw89_mac_size.wde_size6,
- &rtw89_mac_size.ple_size6, &rtw89_mac_size.wde_qt6,
- &rtw89_mac_size.wde_qt6, &rtw89_mac_size.ple_qt18,
+ [RTW89_QTA_WOW] = {RTW89_QTA_WOW, &rtw89_mac_size.wde_size7,
+ &rtw89_mac_size.ple_size6, &rtw89_mac_size.wde_qt7,
+ &rtw89_mac_size.wde_qt7, &rtw89_mac_size.ple_qt18,
&rtw89_mac_size.ple_qt_52b_wow},
[RTW89_QTA_DLFW] = {RTW89_QTA_DLFW, &rtw89_mac_size.wde_size9,
&rtw89_mac_size.ple_size8, &rtw89_mac_size.wde_qt4,
diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c
index 9a8faaf4c6b6..89c7a1420381 100644
--- a/drivers/net/wireless/virtual/mac80211_hwsim.c
+++ b/drivers/net/wireless/virtual/mac80211_hwsim.c
@@ -5964,10 +5964,11 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info)
ret = -ENOMEM;
goto out_free;
}
+ param.pmsr_capa = pmsr_capa;
+
ret = parse_pmsr_capa(info->attrs[HWSIM_ATTR_PMSR_SUPPORT], pmsr_capa, info);
if (ret)
goto out_free;
- param.pmsr_capa = pmsr_capa;
}
ret = mac80211_hwsim_new_radio(info, &param);
diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.c b/drivers/net/wwan/iosm/iosm_ipc_imem.c
index c066b0040a3f..829515a601b3 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_imem.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_imem.c
@@ -565,24 +565,32 @@ static void ipc_imem_run_state_worker(struct work_struct *instance)
struct ipc_mux_config mux_cfg;
struct iosm_imem *ipc_imem;
u8 ctrl_chl_idx = 0;
+ int ret;
ipc_imem = container_of(instance, struct iosm_imem, run_state_worker);
if (ipc_imem->phase != IPC_P_RUN) {
dev_err(ipc_imem->dev,
"Modem link down. Exit run state worker.");
- return;
+ goto err_out;
}
if (test_and_clear_bit(IOSM_DEVLINK_INIT, &ipc_imem->flag))
ipc_devlink_deinit(ipc_imem->ipc_devlink);
- if (!ipc_imem_setup_cp_mux_cap_init(ipc_imem, &mux_cfg))
- ipc_imem->mux = ipc_mux_init(&mux_cfg, ipc_imem);
+ ret = ipc_imem_setup_cp_mux_cap_init(ipc_imem, &mux_cfg);
+ if (ret < 0)
+ goto err_out;
+
+ ipc_imem->mux = ipc_mux_init(&mux_cfg, ipc_imem);
+ if (!ipc_imem->mux)
+ goto err_out;
+
+ ret = ipc_imem_wwan_channel_init(ipc_imem, mux_cfg.protocol);
+ if (ret < 0)
+ goto err_ipc_mux_deinit;
- ipc_imem_wwan_channel_init(ipc_imem, mux_cfg.protocol);
- if (ipc_imem->mux)
- ipc_imem->mux->wwan = ipc_imem->wwan;
+ ipc_imem->mux->wwan = ipc_imem->wwan;
while (ctrl_chl_idx < IPC_MEM_MAX_CHANNELS) {
if (!ipc_chnl_cfg_get(&chnl_cfg_port, ctrl_chl_idx)) {
@@ -622,6 +630,13 @@ static void ipc_imem_run_state_worker(struct work_struct *instance)
/* Complete all memory stores after setting bit */
smp_mb__after_atomic();
+
+ return;
+
+err_ipc_mux_deinit:
+ ipc_mux_deinit(ipc_imem->mux);
+err_out:
+ ipc_uevent_send(ipc_imem->dev, UEVENT_CD_READY_LINK_DOWN);
}
static void ipc_imem_handle_irq(struct iosm_imem *ipc_imem, int irq)
diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c
index 66b90cc4c346..109cf8930488 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c
@@ -77,8 +77,8 @@ out:
}
/* Initialize wwan channel */
-void ipc_imem_wwan_channel_init(struct iosm_imem *ipc_imem,
- enum ipc_mux_protocol mux_type)
+int ipc_imem_wwan_channel_init(struct iosm_imem *ipc_imem,
+ enum ipc_mux_protocol mux_type)
{
struct ipc_chnl_cfg chnl_cfg = { 0 };
@@ -87,7 +87,7 @@ void ipc_imem_wwan_channel_init(struct iosm_imem *ipc_imem,
/* If modem version is invalid (0xffffffff), do not initialize WWAN. */
if (ipc_imem->cp_version == -1) {
dev_err(ipc_imem->dev, "invalid CP version");
- return;
+ return -EIO;
}
ipc_chnl_cfg_get(&chnl_cfg, ipc_imem->nr_of_channels);
@@ -104,9 +104,13 @@ void ipc_imem_wwan_channel_init(struct iosm_imem *ipc_imem,
/* WWAN registration. */
ipc_imem->wwan = ipc_wwan_init(ipc_imem, ipc_imem->dev);
- if (!ipc_imem->wwan)
+ if (!ipc_imem->wwan) {
dev_err(ipc_imem->dev,
"failed to register the ipc_wwan interfaces");
+ return -ENOMEM;
+ }
+
+ return 0;
}
/* Map SKB to DMA for transfer */
diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.h b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.h
index f8afb217d9e2..026c5bd0f999 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.h
+++ b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.h
@@ -91,9 +91,11 @@ int ipc_imem_sys_wwan_transmit(struct iosm_imem *ipc_imem, int if_id,
* MUX.
* @ipc_imem: Pointer to iosm_imem struct.
* @mux_type: Type of mux protocol.
+ *
+ * Return: 0 on success and failure value on error
*/
-void ipc_imem_wwan_channel_init(struct iosm_imem *ipc_imem,
- enum ipc_mux_protocol mux_type);
+int ipc_imem_wwan_channel_init(struct iosm_imem *ipc_imem,
+ enum ipc_mux_protocol mux_type);
/**
* ipc_imem_sys_devlink_open - Open a Flash/CD Channel link to CP
diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
index d6b166fc5c0e..bff46f7ca59f 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
@@ -626,14 +626,12 @@ static void mux_dl_adb_decode(struct iosm_mux *ipc_mux,
if (adth->signature != cpu_to_le32(IOSM_AGGR_MUX_SIG_ADTH))
goto adb_decode_err;
- if (le16_to_cpu(adth->table_length) < (sizeof(struct mux_adth) -
- sizeof(struct mux_adth_dg)))
+ if (le16_to_cpu(adth->table_length) < sizeof(struct mux_adth))
goto adb_decode_err;
/* Calculate the number of datagrams. */
nr_of_dg = (le16_to_cpu(adth->table_length) -
- sizeof(struct mux_adth) +
- sizeof(struct mux_adth_dg)) /
+ sizeof(struct mux_adth)) /
sizeof(struct mux_adth_dg);
/* Is the datagram table empty ? */
@@ -649,7 +647,7 @@ static void mux_dl_adb_decode(struct iosm_mux *ipc_mux,
}
/* New aggregated datagram table. */
- dg = &adth->dg;
+ dg = adth->dg;
if (mux_dl_process_dg(ipc_mux, adbh, dg, skb, if_id,
nr_of_dg) < 0)
goto adb_decode_err;
@@ -849,7 +847,7 @@ static void ipc_mux_ul_encode_adth(struct iosm_mux *ipc_mux,
adth->if_id = i;
adth->table_length = cpu_to_le16(adth_dg_size);
adth_dg_size -= offsetof(struct mux_adth, dg);
- memcpy(&adth->dg, ul_adb->dg[i], adth_dg_size);
+ memcpy(adth->dg, ul_adb->dg[i], adth_dg_size);
ul_adb->if_cnt++;
}
@@ -1426,14 +1424,13 @@ static int ipc_mux_get_payload_from_adb(struct iosm_mux *ipc_mux,
if (adth->signature == cpu_to_le32(IOSM_AGGR_MUX_SIG_ADTH)) {
nr_of_dg = (le16_to_cpu(adth->table_length) -
- sizeof(struct mux_adth) +
- sizeof(struct mux_adth_dg)) /
+ sizeof(struct mux_adth)) /
sizeof(struct mux_adth_dg);
if (nr_of_dg <= 0)
return payload_size;
- dg = &adth->dg;
+ dg = adth->dg;
for (i = 0; i < nr_of_dg; i++, dg++) {
if (le32_to_cpu(dg->datagram_index) <
diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h
index 5d4e3b89542c..f8df88f816c4 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h
+++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h
@@ -161,7 +161,7 @@ struct mux_adth {
u8 opt_ipv4v6;
__le32 next_table_index;
__le32 reserved2;
- struct mux_adth_dg dg;
+ struct mux_adth_dg dg[];
};
/**
diff --git a/drivers/net/wwan/t7xx/t7xx_pci.c b/drivers/net/wwan/t7xx/t7xx_pci.c
index 226fc1703e90..91256e005b84 100644
--- a/drivers/net/wwan/t7xx/t7xx_pci.c
+++ b/drivers/net/wwan/t7xx/t7xx_pci.c
@@ -45,6 +45,7 @@
#define T7XX_PCI_IREG_BASE 0
#define T7XX_PCI_EREG_BASE 2
+#define T7XX_INIT_TIMEOUT 20
#define PM_SLEEP_DIS_TIMEOUT_MS 20
#define PM_ACK_TIMEOUT_MS 1500
#define PM_AUTOSUSPEND_MS 20000
@@ -96,6 +97,7 @@ static int t7xx_pci_pm_init(struct t7xx_pci_dev *t7xx_dev)
spin_lock_init(&t7xx_dev->md_pm_lock);
init_completion(&t7xx_dev->sleep_lock_acquire);
init_completion(&t7xx_dev->pm_sr_ack);
+ init_completion(&t7xx_dev->init_done);
atomic_set(&t7xx_dev->md_pm_state, MTK_PM_INIT);
device_init_wakeup(&pdev->dev, true);
@@ -124,6 +126,7 @@ void t7xx_pci_pm_init_late(struct t7xx_pci_dev *t7xx_dev)
pm_runtime_mark_last_busy(&t7xx_dev->pdev->dev);
pm_runtime_allow(&t7xx_dev->pdev->dev);
pm_runtime_put_noidle(&t7xx_dev->pdev->dev);
+ complete_all(&t7xx_dev->init_done);
}
static int t7xx_pci_pm_reinit(struct t7xx_pci_dev *t7xx_dev)
@@ -529,6 +532,20 @@ static void t7xx_pci_shutdown(struct pci_dev *pdev)
__t7xx_pci_pm_suspend(pdev);
}
+static int t7xx_pci_pm_prepare(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct t7xx_pci_dev *t7xx_dev;
+
+ t7xx_dev = pci_get_drvdata(pdev);
+ if (!wait_for_completion_timeout(&t7xx_dev->init_done, T7XX_INIT_TIMEOUT * HZ)) {
+ dev_warn(dev, "Not ready for system sleep.\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
static int t7xx_pci_pm_suspend(struct device *dev)
{
return __t7xx_pci_pm_suspend(to_pci_dev(dev));
@@ -555,6 +572,7 @@ static int t7xx_pci_pm_runtime_resume(struct device *dev)
}
static const struct dev_pm_ops t7xx_pci_pm_ops = {
+ .prepare = t7xx_pci_pm_prepare,
.suspend = t7xx_pci_pm_suspend,
.resume = t7xx_pci_pm_resume,
.resume_noirq = t7xx_pci_pm_resume_noirq,
diff --git a/drivers/net/wwan/t7xx/t7xx_pci.h b/drivers/net/wwan/t7xx/t7xx_pci.h
index 112efa534eac..f08f1ab74469 100644
--- a/drivers/net/wwan/t7xx/t7xx_pci.h
+++ b/drivers/net/wwan/t7xx/t7xx_pci.h
@@ -69,6 +69,7 @@ struct t7xx_pci_dev {
struct t7xx_modem *md;
struct t7xx_ccmni_ctrl *ccmni_ctlb;
bool rgu_pci_irq_en;
+ struct completion init_done;
/* Low Power Items */
struct list_head md_pm_entities;
diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c
index f12f903a9dd1..da3e2dce8e70 100644
--- a/drivers/nfc/fdp/fdp.c
+++ b/drivers/nfc/fdp/fdp.c
@@ -762,3 +762,6 @@ EXPORT_SYMBOL(fdp_nci_remove);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("NFC NCI driver for Intel Fields Peak NFC controller");
MODULE_AUTHOR("Robert Dolca <robert.dolca@intel.com>");
+
+MODULE_FIRMWARE(FDP_OTP_PATCH_NAME);
+MODULE_FIRMWARE(FDP_RAM_PATCH_NAME);
diff --git a/drivers/nfc/nfcsim.c b/drivers/nfc/nfcsim.c
index 44eeb17ae48d..a55381f80cd6 100644
--- a/drivers/nfc/nfcsim.c
+++ b/drivers/nfc/nfcsim.c
@@ -336,10 +336,6 @@ static struct dentry *nfcsim_debugfs_root;
static void nfcsim_debugfs_init(void)
{
nfcsim_debugfs_root = debugfs_create_dir("nfcsim", NULL);
-
- if (!nfcsim_debugfs_root)
- pr_err("Could not create debugfs entry\n");
-
}
static void nfcsim_debugfs_remove(void)
diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c
index bc523ca02254..5e4f8848dce0 100644
--- a/drivers/nvme/host/constants.c
+++ b/drivers/nvme/host/constants.c
@@ -21,7 +21,7 @@ static const char * const nvme_ops[] = {
[nvme_cmd_resv_release] = "Reservation Release",
[nvme_cmd_zone_mgmt_send] = "Zone Management Send",
[nvme_cmd_zone_mgmt_recv] = "Zone Management Receive",
- [nvme_cmd_zone_append] = "Zone Management Append",
+ [nvme_cmd_zone_append] = "Zone Append",
};
static const char * const nvme_admin_ops[] = {
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 76e8f8b4098e..fdfcf2781c85 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -397,7 +397,16 @@ void nvme_complete_rq(struct request *req)
trace_nvme_complete_rq(req);
nvme_cleanup_cmd(req);
- if (ctrl->kas)
+ /*
+ * Completions of long-running commands should not be able to
+ * defer sending of periodic keep alives, since the controller
+ * may have completed processing such commands a long time ago
+ * (arbitrarily close to command submission time).
+ * req->deadline - req->timeout is the command submission time
+ * in jiffies.
+ */
+ if (ctrl->kas &&
+ req->deadline - req->timeout >= ctrl->ka_last_check_time)
ctrl->comp_seen = true;
switch (nvme_decide_disposition(req)) {
@@ -1115,7 +1124,7 @@ u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
}
EXPORT_SYMBOL_NS_GPL(nvme_passthru_start, NVME_TARGET_PASSTHRU);
-void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects,
+void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects,
struct nvme_command *cmd, int status)
{
if (effects & NVME_CMD_EFFECTS_CSE_MASK) {
@@ -1132,6 +1141,8 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects,
nvme_queue_scan(ctrl);
flush_work(&ctrl->scan_work);
}
+ if (ns)
+ return;
switch (cmd->common.opcode) {
case nvme_admin_set_features:
@@ -1161,9 +1172,25 @@ EXPORT_SYMBOL_NS_GPL(nvme_passthru_end, NVME_TARGET_PASSTHRU);
* The host should send Keep Alive commands at half of the Keep Alive Timeout
* accounting for transport roundtrip times [..].
*/
+static unsigned long nvme_keep_alive_work_period(struct nvme_ctrl *ctrl)
+{
+ unsigned long delay = ctrl->kato * HZ / 2;
+
+ /*
+ * When using Traffic Based Keep Alive, we need to run
+ * nvme_keep_alive_work at twice the normal frequency, as one
+ * command completion can postpone sending a keep alive command
+ * by up to twice the delay between runs.
+ */
+ if (ctrl->ctratt & NVME_CTRL_ATTR_TBKAS)
+ delay /= 2;
+ return delay;
+}
+
static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl)
{
- queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ / 2);
+ queue_delayed_work(nvme_wq, &ctrl->ka_work,
+ nvme_keep_alive_work_period(ctrl));
}
static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
@@ -1172,6 +1199,20 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
struct nvme_ctrl *ctrl = rq->end_io_data;
unsigned long flags;
bool startka = false;
+ unsigned long rtt = jiffies - (rq->deadline - rq->timeout);
+ unsigned long delay = nvme_keep_alive_work_period(ctrl);
+
+ /*
+ * Subtract off the keepalive RTT so nvme_keep_alive_work runs
+ * at the desired frequency.
+ */
+ if (rtt <= delay) {
+ delay -= rtt;
+ } else {
+ dev_warn(ctrl->device, "long keepalive RTT (%u ms)\n",
+ jiffies_to_msecs(rtt));
+ delay = 0;
+ }
blk_mq_free_request(rq);
@@ -1182,6 +1223,7 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
return RQ_END_IO_NONE;
}
+ ctrl->ka_last_check_time = jiffies;
ctrl->comp_seen = false;
spin_lock_irqsave(&ctrl->lock, flags);
if (ctrl->state == NVME_CTRL_LIVE ||
@@ -1189,7 +1231,7 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
startka = true;
spin_unlock_irqrestore(&ctrl->lock, flags);
if (startka)
- nvme_queue_keep_alive_work(ctrl);
+ queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);
return RQ_END_IO_NONE;
}
@@ -1200,6 +1242,8 @@ static void nvme_keep_alive_work(struct work_struct *work)
bool comp_seen = ctrl->comp_seen;
struct request *rq;
+ ctrl->ka_last_check_time = jiffies;
+
if ((ctrl->ctratt & NVME_CTRL_ATTR_TBKAS) && comp_seen) {
dev_dbg(ctrl->device,
"reschedule traffic based keep-alive timer\n");
@@ -4402,7 +4446,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl)
* that were missed. We identify persistent discovery controllers by
* checking that they started once before, hence are reconnecting back.
*/
- if (test_and_set_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags) &&
+ if (test_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags) &&
nvme_discovery_ctrl(ctrl))
nvme_change_uevent(ctrl, "NVME_EVENT=rediscover");
@@ -4413,6 +4457,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl)
}
nvme_change_uevent(ctrl, "NVME_EVENT=connected");
+ set_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags);
}
EXPORT_SYMBOL_GPL(nvme_start_ctrl);
diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c
index 9e6e56c20ec9..316f3e4ca7cc 100644
--- a/drivers/nvme/host/hwmon.c
+++ b/drivers/nvme/host/hwmon.c
@@ -163,7 +163,9 @@ static umode_t nvme_hwmon_is_visible(const void *_data,
case hwmon_temp_max:
case hwmon_temp_min:
if ((!channel && data->ctrl->wctemp) ||
- (channel && data->log->temp_sensor[channel - 1])) {
+ (channel && data->log->temp_sensor[channel - 1] &&
+ !(data->ctrl->quirks &
+ NVME_QUIRK_NO_SECONDARY_TEMP_THRESH))) {
if (data->ctrl->quirks &
NVME_QUIRK_NO_TEMP_THRESH_CHANGE)
return 0444;
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 0fd0aa571cc9..2130ad65b58c 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -254,7 +254,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
blk_mq_free_request(req);
if (effects)
- nvme_passthru_end(ctrl, effects, cmd, ret);
+ nvme_passthru_end(ctrl, ns, effects, cmd, ret);
return ret;
}
@@ -521,7 +521,7 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
if (cookie != NULL && blk_rq_is_poll(req))
nvme_uring_task_cb(ioucmd, IO_URING_F_UNLOCKED);
else
- io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb);
+ io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb);
return RQ_END_IO_FREE;
}
@@ -543,7 +543,7 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req,
if (cookie != NULL && blk_rq_is_poll(req))
nvme_uring_task_meta_cb(ioucmd, IO_URING_F_UNLOCKED);
else
- io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_meta_cb);
+ io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_meta_cb);
return RQ_END_IO_NONE;
}
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 91a9a55227fa..98001eebd275 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -884,7 +884,6 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
{
if (!head->disk)
return;
- blk_mark_disk_dead(head->disk);
/* make sure all pending bios are cleaned up */
kblockd_schedule_work(&head->requeue_work);
flush_work(&head->requeue_work);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 78308f15e090..9a98c14c552a 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -149,6 +149,11 @@ enum nvme_quirks {
* Reports garbage in the namespace identifiers (eui64, nguid, uuid).
*/
NVME_QUIRK_BOGUS_NID = (1 << 18),
+
+ /*
+ * No temperature thresholds for channels other than 0 (Composite).
+ */
+ NVME_QUIRK_NO_SECONDARY_TEMP_THRESH = (1 << 19),
};
/*
@@ -324,6 +329,7 @@ struct nvme_ctrl {
struct delayed_work ka_work;
struct delayed_work failfast_work;
struct nvme_command ka_cmd;
+ unsigned long ka_last_check_time;
struct work_struct fw_act_work;
unsigned long events;
@@ -1077,7 +1083,7 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
u8 opcode);
u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode);
int nvme_execute_rq(struct request *rq, bool at_head);
-void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects,
+void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects,
struct nvme_command *cmd, int status);
struct nvme_ctrl *nvme_ctrl_from_file(struct file *file);
struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index b027e5e3f4ac..48c60f7fda0b 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2955,7 +2955,7 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev,
* over a single page.
*/
dev->ctrl.max_hw_sectors = min_t(u32,
- NVME_MAX_KB_SZ << 1, dma_max_mapping_size(&pdev->dev) >> 9);
+ NVME_MAX_KB_SZ << 1, dma_opt_mapping_size(&pdev->dev) >> 9);
dev->ctrl.max_segments = NVME_MAX_SEGS;
/*
@@ -3401,6 +3401,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
{ PCI_DEVICE(0x2646, 0x2263), /* KINGSTON A2000 NVMe SSD */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
+ { PCI_DEVICE(0x2646, 0x5013), /* Kingston KC3000, Kingston FURY Renegade */
+ .driver_data = NVME_QUIRK_NO_SECONDARY_TEMP_THRESH, },
{ PCI_DEVICE(0x2646, 0x5018), /* KINGSTON OM8SFP4xxxxP OS21012 NVMe SSD */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x2646, 0x5016), /* KINGSTON OM3PGP4xxxxP OS21011 NVMe SSD */
@@ -3421,6 +3423,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1e4B, 0x1202), /* MAXIO MAP1202 */
.driver_data = NVME_QUIRK_BOGUS_NID, },
+ { PCI_DEVICE(0x1e4B, 0x1602), /* MAXIO MAP1602 */
+ .driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1cc1, 0x5350), /* ADATA XPG GAMMIX S50 */
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1dbe, 0x5236), /* ADATA XPG GAMMIX S70 */
@@ -3440,6 +3444,10 @@ static const struct pci_device_id nvme_id_table[] = {
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE(0x10ec, 0x5763), /* TEAMGROUP T-FORCE CARDEA ZERO Z330 SSD */
.driver_data = NVME_QUIRK_BOGUS_NID, },
+ { PCI_DEVICE(0x1e4b, 0x1602), /* HS-SSD-FUTURE 2048G */
+ .driver_data = NVME_QUIRK_BOGUS_NID, },
+ { PCI_DEVICE(0x10ec, 0x5765), /* TEAMGROUP MP33 2TB SSD */
+ .driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061),
.driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, },
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065),
diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
index 511c980d538d..71a9c1cc57f5 100644
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -243,7 +243,7 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w)
blk_mq_free_request(rq);
if (effects)
- nvme_passthru_end(ctrl, effects, req->cmd, status);
+ nvme_passthru_end(ctrl, ns, effects, req->cmd, status);
}
static enum rq_end_io_ret nvmet_passthru_req_done(struct request *rq,
diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c
index 2e01960f1aeb..7feb643f1370 100644
--- a/drivers/of/overlay.c
+++ b/drivers/of/overlay.c
@@ -811,6 +811,7 @@ static int init_overlay_changeset(struct overlay_changeset *ovcs)
if (!fragment->target) {
pr_err("symbols in overlay, but not in live tree\n");
ret = -EINVAL;
+ of_node_put(node);
goto err_out;
}
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index bc32662c6bb7..2d93d0c4f10d 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -489,7 +489,10 @@ struct hv_pcibus_device {
struct fwnode_handle *fwnode;
/* Protocol version negotiated with the host */
enum pci_protocol_version_t protocol_version;
+
+ struct mutex state_lock;
enum hv_pcibus_state state;
+
struct hv_device *hdev;
resource_size_t low_mmio_space;
resource_size_t high_mmio_space;
@@ -545,19 +548,10 @@ struct hv_dr_state {
struct hv_pcidev_description func[];
};
-enum hv_pcichild_state {
- hv_pcichild_init = 0,
- hv_pcichild_requirements,
- hv_pcichild_resourced,
- hv_pcichild_ejecting,
- hv_pcichild_maximum
-};
-
struct hv_pci_dev {
/* List protected by pci_rescan_remove_lock */
struct list_head list_entry;
refcount_t refs;
- enum hv_pcichild_state state;
struct pci_slot *pci_slot;
struct hv_pcidev_description desc;
bool reported_missing;
@@ -635,6 +629,11 @@ static void hv_arch_irq_unmask(struct irq_data *data)
pbus = pdev->bus;
hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
int_desc = data->chip_data;
+ if (!int_desc) {
+ dev_warn(&hbus->hdev->device, "%s() can not unmask irq %u\n",
+ __func__, data->irq);
+ return;
+ }
local_irq_save(flags);
@@ -2004,12 +2003,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
hv_pci_onchannelcallback(hbus);
spin_unlock_irqrestore(&channel->sched_lock, flags);
- if (hpdev->state == hv_pcichild_ejecting) {
- dev_err_once(&hbus->hdev->device,
- "the device is being ejected\n");
- goto enable_tasklet;
- }
-
udelay(100);
}
@@ -2615,6 +2608,8 @@ static void pci_devices_present_work(struct work_struct *work)
if (!dr)
return;
+ mutex_lock(&hbus->state_lock);
+
/* First, mark all existing children as reported missing. */
spin_lock_irqsave(&hbus->device_list_lock, flags);
list_for_each_entry(hpdev, &hbus->children, list_entry) {
@@ -2696,6 +2691,8 @@ static void pci_devices_present_work(struct work_struct *work)
break;
}
+ mutex_unlock(&hbus->state_lock);
+
kfree(dr);
}
@@ -2844,7 +2841,7 @@ static void hv_eject_device_work(struct work_struct *work)
hpdev = container_of(work, struct hv_pci_dev, wrk);
hbus = hpdev->hbus;
- WARN_ON(hpdev->state != hv_pcichild_ejecting);
+ mutex_lock(&hbus->state_lock);
/*
* Ejection can come before or after the PCI bus has been set up, so
@@ -2882,6 +2879,8 @@ static void hv_eject_device_work(struct work_struct *work)
put_pcichild(hpdev);
put_pcichild(hpdev);
/* hpdev has been freed. Do not use it any more. */
+
+ mutex_unlock(&hbus->state_lock);
}
/**
@@ -2902,7 +2901,6 @@ static void hv_pci_eject_device(struct hv_pci_dev *hpdev)
return;
}
- hpdev->state = hv_pcichild_ejecting;
get_pcichild(hpdev);
INIT_WORK(&hpdev->wrk, hv_eject_device_work);
queue_work(hbus->wq, &hpdev->wrk);
@@ -3331,8 +3329,10 @@ static int hv_pci_enter_d0(struct hv_device *hdev)
struct pci_bus_d0_entry *d0_entry;
struct hv_pci_compl comp_pkt;
struct pci_packet *pkt;
+ bool retry = true;
int ret;
+enter_d0_retry:
/*
* Tell the host that the bus is ready to use, and moved into the
* powered-on state. This includes telling the host which region
@@ -3359,6 +3359,38 @@ static int hv_pci_enter_d0(struct hv_device *hdev)
if (ret)
goto exit;
+ /*
+ * In certain case (Kdump) the pci device of interest was
+ * not cleanly shut down and resource is still held on host
+ * side, the host could return invalid device status.
+ * We need to explicitly request host to release the resource
+ * and try to enter D0 again.
+ */
+ if (comp_pkt.completion_status < 0 && retry) {
+ retry = false;
+
+ dev_err(&hdev->device, "Retrying D0 Entry\n");
+
+ /*
+ * Hv_pci_bus_exit() calls hv_send_resource_released()
+ * to free up resources of its child devices.
+ * In the kdump kernel we need to set the
+ * wslot_res_allocated to 255 so it scans all child
+ * devices to release resources allocated in the
+ * normal kernel before panic happened.
+ */
+ hbus->wslot_res_allocated = 255;
+
+ ret = hv_pci_bus_exit(hdev, true);
+
+ if (ret == 0) {
+ kfree(pkt);
+ goto enter_d0_retry;
+ }
+ dev_err(&hdev->device,
+ "Retrying D0 failed with ret %d\n", ret);
+ }
+
if (comp_pkt.completion_status < 0) {
dev_err(&hdev->device,
"PCI Pass-through VSP failed D0 Entry with status %x\n",
@@ -3401,6 +3433,24 @@ static int hv_pci_query_relations(struct hv_device *hdev)
if (!ret)
ret = wait_for_response(hdev, &comp);
+ /*
+ * In the case of fast device addition/removal, it's possible that
+ * vmbus_sendpacket() or wait_for_response() returns -ENODEV but we
+ * already got a PCI_BUS_RELATIONS* message from the host and the
+ * channel callback already scheduled a work to hbus->wq, which can be
+ * running pci_devices_present_work() -> survey_child_resources() ->
+ * complete(&hbus->survey_event), even after hv_pci_query_relations()
+ * exits and the stack variable 'comp' is no longer valid; as a result,
+ * a hang or a page fault may happen when the complete() calls
+ * raw_spin_lock_irqsave(). Flush hbus->wq before we exit from
+ * hv_pci_query_relations() to avoid the issues. Note: if 'ret' is
+ * -ENODEV, there can't be any more work item scheduled to hbus->wq
+ * after the flush_workqueue(): see vmbus_onoffer_rescind() ->
+ * vmbus_reset_channel_cb(), vmbus_rescind_cleanup() ->
+ * channel->rescind = true.
+ */
+ flush_workqueue(hbus->wq);
+
return ret;
}
@@ -3586,7 +3636,6 @@ static int hv_pci_probe(struct hv_device *hdev,
struct hv_pcibus_device *hbus;
u16 dom_req, dom;
char *name;
- bool enter_d0_retry = true;
int ret;
bridge = devm_pci_alloc_host_bridge(&hdev->device, 0);
@@ -3598,6 +3647,7 @@ static int hv_pci_probe(struct hv_device *hdev,
return -ENOMEM;
hbus->bridge = bridge;
+ mutex_init(&hbus->state_lock);
hbus->state = hv_pcibus_init;
hbus->wslot_res_allocated = -1;
@@ -3703,49 +3753,15 @@ static int hv_pci_probe(struct hv_device *hdev,
if (ret)
goto free_fwnode;
-retry:
ret = hv_pci_query_relations(hdev);
if (ret)
goto free_irq_domain;
- ret = hv_pci_enter_d0(hdev);
- /*
- * In certain case (Kdump) the pci device of interest was
- * not cleanly shut down and resource is still held on host
- * side, the host could return invalid device status.
- * We need to explicitly request host to release the resource
- * and try to enter D0 again.
- * Since the hv_pci_bus_exit() call releases structures
- * of all its child devices, we need to start the retry from
- * hv_pci_query_relations() call, requesting host to send
- * the synchronous child device relations message before this
- * information is needed in hv_send_resources_allocated()
- * call later.
- */
- if (ret == -EPROTO && enter_d0_retry) {
- enter_d0_retry = false;
-
- dev_err(&hdev->device, "Retrying D0 Entry\n");
-
- /*
- * Hv_pci_bus_exit() calls hv_send_resources_released()
- * to free up resources of its child devices.
- * In the kdump kernel we need to set the
- * wslot_res_allocated to 255 so it scans all child
- * devices to release resources allocated in the
- * normal kernel before panic happened.
- */
- hbus->wslot_res_allocated = 255;
- ret = hv_pci_bus_exit(hdev, true);
-
- if (ret == 0)
- goto retry;
+ mutex_lock(&hbus->state_lock);
- dev_err(&hdev->device,
- "Retrying D0 failed with ret %d\n", ret);
- }
+ ret = hv_pci_enter_d0(hdev);
if (ret)
- goto free_irq_domain;
+ goto release_state_lock;
ret = hv_pci_allocate_bridge_windows(hbus);
if (ret)
@@ -3763,12 +3779,15 @@ retry:
if (ret)
goto free_windows;
+ mutex_unlock(&hbus->state_lock);
return 0;
free_windows:
hv_pci_free_bridge_windows(hbus);
exit_d0:
(void) hv_pci_bus_exit(hdev, true);
+release_state_lock:
+ mutex_unlock(&hbus->state_lock);
free_irq_domain:
irq_domain_remove(hbus->irq_domain);
free_fwnode:
@@ -4018,20 +4037,26 @@ static int hv_pci_resume(struct hv_device *hdev)
if (ret)
goto out;
+ mutex_lock(&hbus->state_lock);
+
ret = hv_pci_enter_d0(hdev);
if (ret)
- goto out;
+ goto release_state_lock;
ret = hv_send_resources_allocated(hdev);
if (ret)
- goto out;
+ goto release_state_lock;
prepopulate_bars(hbus);
hv_pci_restore_msi_state(hbus);
hbus->state = hv_pcibus_installed;
+ mutex_unlock(&hbus->state_lock);
return 0;
+
+release_state_lock:
+ mutex_unlock(&hbus->state_lock);
out:
vmbus_close(hdev->channel);
return ret;
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index f4e2a88729fd..c525867760bf 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -6003,8 +6003,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x56c1, aspm_l1_acceptable_latency
#ifdef CONFIG_PCIE_DPC
/*
- * Intel Tiger Lake and Alder Lake BIOS has a bug that clears the DPC
- * RP PIO Log Size of the integrated Thunderbolt PCIe Root Ports.
+ * Intel Ice Lake, Tiger Lake and Alder Lake BIOS has a bug that clears
+ * the DPC RP PIO Log Size of the integrated Thunderbolt PCIe Root
+ * Ports.
*/
static void dpc_log_size(struct pci_dev *dev)
{
@@ -6027,6 +6028,10 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x461f, dpc_log_size);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x462f, dpc_log_size);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x463f, dpc_log_size);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x466e, dpc_log_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x8a1d, dpc_log_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x8a1f, dpc_log_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x8a21, dpc_log_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x8a23, dpc_log_size);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a23, dpc_log_size);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a25, dpc_log_size);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a27, dpc_log_size);
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index c98e4039386d..93b7edb5f1e7 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -677,9 +677,25 @@ static inline u32 armv8pmu_getreset_flags(void)
return value;
}
+static void update_pmuserenr(u64 val)
+{
+ lockdep_assert_irqs_disabled();
+
+ /*
+ * The current PMUSERENR_EL0 value might be the value for the guest.
+ * If that's the case, have KVM keep tracking of the register value
+ * for the host EL0 so that KVM can restore it before returning to
+ * the host EL0. Otherwise, update the register now.
+ */
+ if (kvm_set_pmuserenr(val))
+ return;
+
+ write_pmuserenr(val);
+}
+
static void armv8pmu_disable_user_access(void)
{
- write_pmuserenr(0);
+ update_pmuserenr(0);
}
static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu)
@@ -695,8 +711,7 @@ static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu)
armv8pmu_write_evcntr(i, 0);
}
- write_pmuserenr(0);
- write_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR);
+ update_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR);
}
static void armv8pmu_enable_event(struct perf_event *event)
diff --git a/drivers/phy/amlogic/phy-meson-g12a-mipi-dphy-analog.c b/drivers/phy/amlogic/phy-meson-g12a-mipi-dphy-analog.c
index c14089fa7db4..cabdddbbabfd 100644
--- a/drivers/phy/amlogic/phy-meson-g12a-mipi-dphy-analog.c
+++ b/drivers/phy/amlogic/phy-meson-g12a-mipi-dphy-analog.c
@@ -70,7 +70,7 @@ static int phy_g12a_mipi_dphy_analog_power_on(struct phy *phy)
HHI_MIPI_CNTL1_BANDGAP);
regmap_write(priv->regmap, HHI_MIPI_CNTL2,
- FIELD_PREP(HHI_MIPI_CNTL2_DIF_TX_CTL0, 0x459) |
+ FIELD_PREP(HHI_MIPI_CNTL2_DIF_TX_CTL0, 0x45a) |
FIELD_PREP(HHI_MIPI_CNTL2_DIF_TX_CTL1, 0x2680));
reg = DSI_LANE_CLK;
diff --git a/drivers/phy/mediatek/phy-mtk-hdmi-mt8195.c b/drivers/phy/mediatek/phy-mtk-hdmi-mt8195.c
index caa953780bee..8aa7251de4a9 100644
--- a/drivers/phy/mediatek/phy-mtk-hdmi-mt8195.c
+++ b/drivers/phy/mediatek/phy-mtk-hdmi-mt8195.c
@@ -237,11 +237,11 @@ static int mtk_hdmi_pll_calc(struct mtk_hdmi_phy *hdmi_phy, struct clk_hw *hw,
*/
if (tmds_clk < 54 * MEGA)
txposdiv = 8;
- else if (tmds_clk >= 54 * MEGA && tmds_clk < 148.35 * MEGA)
+ else if (tmds_clk >= 54 * MEGA && (tmds_clk * 100) < 14835 * MEGA)
txposdiv = 4;
- else if (tmds_clk >= 148.35 * MEGA && tmds_clk < 296.7 * MEGA)
+ else if ((tmds_clk * 100) >= 14835 * MEGA && (tmds_clk * 10) < 2967 * MEGA)
txposdiv = 2;
- else if (tmds_clk >= 296.7 * MEGA && tmds_clk <= 594 * MEGA)
+ else if ((tmds_clk * 10) >= 2967 * MEGA && tmds_clk <= 594 * MEGA)
txposdiv = 1;
else
return -EINVAL;
@@ -324,12 +324,12 @@ static int mtk_hdmi_pll_drv_setting(struct clk_hw *hw)
clk_channel_bias = 0x34; /* 20mA */
impedance_en = 0xf;
impedance = 0x36; /* 100ohm */
- } else if (pixel_clk >= 74.175 * MEGA && pixel_clk <= 300 * MEGA) {
+ } else if (((u64)pixel_clk * 1000) >= 74175 * MEGA && pixel_clk <= 300 * MEGA) {
data_channel_bias = 0x34; /* 20mA */
clk_channel_bias = 0x2c; /* 16mA */
impedance_en = 0xf;
impedance = 0x36; /* 100ohm */
- } else if (pixel_clk >= 27 * MEGA && pixel_clk < 74.175 * MEGA) {
+ } else if (pixel_clk >= 27 * MEGA && ((u64)pixel_clk * 1000) < 74175 * MEGA) {
data_channel_bias = 0x14; /* 10mA */
clk_channel_bias = 0x14; /* 10mA */
impedance_en = 0x0;
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 6850e04c329b..87b17e5877ab 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -2472,7 +2472,7 @@ static int qmp_combo_com_init(struct qmp_combo *qmp)
ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs);
if (ret) {
dev_err(qmp->dev, "failed to enable regulators, err=%d\n", ret);
- goto err_unlock;
+ goto err_decrement_count;
}
ret = reset_control_bulk_assert(cfg->num_resets, qmp->resets);
@@ -2522,7 +2522,8 @@ err_assert_reset:
reset_control_bulk_assert(cfg->num_resets, qmp->resets);
err_disable_regulators:
regulator_bulk_disable(cfg->num_vregs, qmp->vregs);
-err_unlock:
+err_decrement_count:
+ qmp->init_count--;
mutex_unlock(&qmp->phy_mutex);
return ret;
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
index 09824be088c9..0c603bc06e09 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
@@ -379,7 +379,7 @@ static int qmp_pcie_msm8996_com_init(struct qmp_phy *qphy)
ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs);
if (ret) {
dev_err(qmp->dev, "failed to enable regulators, err=%d\n", ret);
- goto err_unlock;
+ goto err_decrement_count;
}
ret = reset_control_bulk_assert(cfg->num_resets, qmp->resets);
@@ -409,7 +409,8 @@ err_assert_reset:
reset_control_bulk_assert(cfg->num_resets, qmp->resets);
err_disable_regulators:
regulator_bulk_disable(cfg->num_vregs, qmp->vregs);
-err_unlock:
+err_decrement_count:
+ qmp->init_count--;
mutex_unlock(&qmp->phy_mutex);
return ret;
diff --git a/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c b/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c
index a59063596214..6c237f3cc66d 100644
--- a/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c
+++ b/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c
@@ -115,11 +115,11 @@ struct phy_override_seq {
*
* @cfg_ahb_clk: AHB2PHY interface clock
* @ref_clk: phy reference clock
- * @iface_clk: phy interface clock
* @phy_reset: phy reset control
* @vregs: regulator supplies bulk data
* @phy_initialized: if PHY has been initialized correctly
* @mode: contains the current mode the PHY is in
+ * @update_seq_cfg: tuning parameters for phy init
*/
struct qcom_snps_hsphy {
struct phy *phy;
diff --git a/drivers/pinctrl/meson/pinctrl-meson-axg.c b/drivers/pinctrl/meson/pinctrl-meson-axg.c
index 7bfecdfba177..d249a035c2b9 100644
--- a/drivers/pinctrl/meson/pinctrl-meson-axg.c
+++ b/drivers/pinctrl/meson/pinctrl-meson-axg.c
@@ -400,6 +400,7 @@ static struct meson_pmx_group meson_axg_periphs_groups[] = {
GPIO_GROUP(GPIOA_15),
GPIO_GROUP(GPIOA_16),
GPIO_GROUP(GPIOA_17),
+ GPIO_GROUP(GPIOA_18),
GPIO_GROUP(GPIOA_19),
GPIO_GROUP(GPIOA_20),
diff --git a/drivers/platform/mellanox/mlxbf-pmc.c b/drivers/platform/mellanox/mlxbf-pmc.c
index c2c9b0d3244c..be967d797c28 100644
--- a/drivers/platform/mellanox/mlxbf-pmc.c
+++ b/drivers/platform/mellanox/mlxbf-pmc.c
@@ -1348,9 +1348,8 @@ static int mlxbf_pmc_map_counters(struct device *dev)
for (i = 0; i < pmc->total_blocks; ++i) {
if (strstr(pmc->block_name[i], "tile")) {
- ret = sscanf(pmc->block_name[i], "tile%d", &tile_num);
- if (ret < 0)
- return ret;
+ if (sscanf(pmc->block_name[i], "tile%d", &tile_num) != 1)
+ return -EINVAL;
if (tile_num >= pmc->tile_count)
continue;
diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c
index 535581c0471c..7fc602e01487 100644
--- a/drivers/platform/surface/aggregator/controller.c
+++ b/drivers/platform/surface/aggregator/controller.c
@@ -825,7 +825,7 @@ static int ssam_cplt_init(struct ssam_cplt *cplt, struct device *dev)
cplt->dev = dev;
- cplt->wq = create_workqueue(SSAM_CPLT_WQ_NAME);
+ cplt->wq = alloc_workqueue(SSAM_CPLT_WQ_NAME, WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
if (!cplt->wq)
return -ENOMEM;
diff --git a/drivers/platform/surface/surface_aggregator_tabletsw.c b/drivers/platform/surface/surface_aggregator_tabletsw.c
index 8f52b62d1c19..c0a1a5869246 100644
--- a/drivers/platform/surface/surface_aggregator_tabletsw.c
+++ b/drivers/platform/surface/surface_aggregator_tabletsw.c
@@ -210,6 +210,7 @@ enum ssam_kip_cover_state {
SSAM_KIP_COVER_STATE_LAPTOP = 0x03,
SSAM_KIP_COVER_STATE_FOLDED_CANVAS = 0x04,
SSAM_KIP_COVER_STATE_FOLDED_BACK = 0x05,
+ SSAM_KIP_COVER_STATE_BOOK = 0x06,
};
static const char *ssam_kip_cover_state_name(struct ssam_tablet_sw *sw,
@@ -231,6 +232,9 @@ static const char *ssam_kip_cover_state_name(struct ssam_tablet_sw *sw,
case SSAM_KIP_COVER_STATE_FOLDED_BACK:
return "folded-back";
+ case SSAM_KIP_COVER_STATE_BOOK:
+ return "book";
+
default:
dev_warn(&sw->sdev->dev, "unknown KIP cover state: %u\n", state->state);
return "<unknown>";
@@ -244,6 +248,7 @@ static bool ssam_kip_cover_state_is_tablet_mode(struct ssam_tablet_sw *sw,
case SSAM_KIP_COVER_STATE_DISCONNECTED:
case SSAM_KIP_COVER_STATE_FOLDED_CANVAS:
case SSAM_KIP_COVER_STATE_FOLDED_BACK:
+ case SSAM_KIP_COVER_STATE_BOOK:
return true;
case SSAM_KIP_COVER_STATE_CLOSED:
@@ -335,6 +340,7 @@ enum ssam_pos_state_cover {
SSAM_POS_COVER_LAPTOP = 0x03,
SSAM_POS_COVER_FOLDED_CANVAS = 0x04,
SSAM_POS_COVER_FOLDED_BACK = 0x05,
+ SSAM_POS_COVER_BOOK = 0x06,
};
enum ssam_pos_state_sls {
@@ -367,6 +373,9 @@ static const char *ssam_pos_state_name_cover(struct ssam_tablet_sw *sw, u32 stat
case SSAM_POS_COVER_FOLDED_BACK:
return "folded-back";
+ case SSAM_POS_COVER_BOOK:
+ return "book";
+
default:
dev_warn(&sw->sdev->dev, "unknown device posture for type-cover: %u\n", state);
return "<unknown>";
@@ -416,6 +425,7 @@ static bool ssam_pos_state_is_tablet_mode_cover(struct ssam_tablet_sw *sw, u32 s
case SSAM_POS_COVER_DISCONNECTED:
case SSAM_POS_COVER_FOLDED_CANVAS:
case SSAM_POS_COVER_FOLDED_BACK:
+ case SSAM_POS_COVER_BOOK:
return true;
case SSAM_POS_COVER_CLOSED:
diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c
index d5bb775dadcf..7780705917b7 100644
--- a/drivers/platform/x86/amd/pmf/core.c
+++ b/drivers/platform/x86/amd/pmf/core.c
@@ -245,24 +245,29 @@ static const struct pci_device_id pmf_pci_ids[] = {
{ }
};
-int amd_pmf_init_metrics_table(struct amd_pmf_dev *dev)
+static void amd_pmf_set_dram_addr(struct amd_pmf_dev *dev)
{
u64 phys_addr;
u32 hi, low;
- INIT_DELAYED_WORK(&dev->work_buffer, amd_pmf_get_metrics);
+ phys_addr = virt_to_phys(dev->buf);
+ hi = phys_addr >> 32;
+ low = phys_addr & GENMASK(31, 0);
+
+ amd_pmf_send_cmd(dev, SET_DRAM_ADDR_HIGH, 0, hi, NULL);
+ amd_pmf_send_cmd(dev, SET_DRAM_ADDR_LOW, 0, low, NULL);
+}
+int amd_pmf_init_metrics_table(struct amd_pmf_dev *dev)
+{
/* Get Metrics Table Address */
dev->buf = kzalloc(sizeof(dev->m_table), GFP_KERNEL);
if (!dev->buf)
return -ENOMEM;
- phys_addr = virt_to_phys(dev->buf);
- hi = phys_addr >> 32;
- low = phys_addr & GENMASK(31, 0);
+ INIT_DELAYED_WORK(&dev->work_buffer, amd_pmf_get_metrics);
- amd_pmf_send_cmd(dev, SET_DRAM_ADDR_HIGH, 0, hi, NULL);
- amd_pmf_send_cmd(dev, SET_DRAM_ADDR_LOW, 0, low, NULL);
+ amd_pmf_set_dram_addr(dev);
/*
* Start collecting the metrics data after a small delay
@@ -273,6 +278,18 @@ int amd_pmf_init_metrics_table(struct amd_pmf_dev *dev)
return 0;
}
+static int amd_pmf_resume_handler(struct device *dev)
+{
+ struct amd_pmf_dev *pdev = dev_get_drvdata(dev);
+
+ if (pdev->buf)
+ amd_pmf_set_dram_addr(pdev);
+
+ return 0;
+}
+
+static DEFINE_SIMPLE_DEV_PM_OPS(amd_pmf_pm, NULL, amd_pmf_resume_handler);
+
static void amd_pmf_init_features(struct amd_pmf_dev *dev)
{
int ret;
@@ -280,6 +297,8 @@ static void amd_pmf_init_features(struct amd_pmf_dev *dev)
/* Enable Static Slider */
if (is_apmf_func_supported(dev, APMF_FUNC_STATIC_SLIDER_GRANULAR)) {
amd_pmf_init_sps(dev);
+ dev->pwr_src_notifier.notifier_call = amd_pmf_pwr_src_notify_call;
+ power_supply_reg_notifier(&dev->pwr_src_notifier);
dev_dbg(dev->dev, "SPS enabled and Platform Profiles registered\n");
}
@@ -298,8 +317,10 @@ static void amd_pmf_init_features(struct amd_pmf_dev *dev)
static void amd_pmf_deinit_features(struct amd_pmf_dev *dev)
{
- if (is_apmf_func_supported(dev, APMF_FUNC_STATIC_SLIDER_GRANULAR))
+ if (is_apmf_func_supported(dev, APMF_FUNC_STATIC_SLIDER_GRANULAR)) {
+ power_supply_unreg_notifier(&dev->pwr_src_notifier);
amd_pmf_deinit_sps(dev);
+ }
if (is_apmf_func_supported(dev, APMF_FUNC_AUTO_MODE)) {
amd_pmf_deinit_auto_mode(dev);
@@ -382,9 +403,6 @@ static int amd_pmf_probe(struct platform_device *pdev)
apmf_install_handler(dev);
amd_pmf_dbgfs_register(dev);
- dev->pwr_src_notifier.notifier_call = amd_pmf_pwr_src_notify_call;
- power_supply_reg_notifier(&dev->pwr_src_notifier);
-
dev_info(dev->dev, "registered PMF device successfully\n");
return 0;
@@ -394,7 +412,6 @@ static void amd_pmf_remove(struct platform_device *pdev)
{
struct amd_pmf_dev *dev = platform_get_drvdata(pdev);
- power_supply_unreg_notifier(&dev->pwr_src_notifier);
amd_pmf_deinit_features(dev);
apmf_acpi_deinit(dev);
amd_pmf_dbgfs_unregister(dev);
@@ -413,6 +430,7 @@ static struct platform_driver amd_pmf_driver = {
.name = "amd-pmf",
.acpi_match_table = amd_pmf_acpi_ids,
.dev_groups = amd_pmf_driver_groups,
+ .pm = pm_sleep_ptr(&amd_pmf_pm),
},
.probe = amd_pmf_probe,
.remove_new = amd_pmf_remove,
diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index e2c9a68d12df..fdf7da06af30 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -555,6 +555,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = {
{ KE_KEY, 0x71, { KEY_F13 } }, /* General-purpose button */
{ KE_IGNORE, 0x79, }, /* Charger type dectection notification */
{ KE_KEY, 0x7a, { KEY_ALS_TOGGLE } }, /* Ambient Light Sensor Toggle */
+ { KE_IGNORE, 0x7B, }, /* Charger connect/disconnect notification */
{ KE_KEY, 0x7c, { KEY_MICMUTE } },
{ KE_KEY, 0x7D, { KEY_BLUETOOTH } }, /* Bluetooth Enable */
{ KE_KEY, 0x7E, { KEY_BLUETOOTH } }, /* Bluetooth Disable */
@@ -584,6 +585,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = {
{ KE_KEY, 0xAE, { KEY_FN_F5 } }, /* Fn+F5 fan mode on 2020+ */
{ KE_KEY, 0xB3, { KEY_PROG4 } }, /* AURA */
{ KE_KEY, 0xB5, { KEY_CALC } },
+ { KE_IGNORE, 0xC0, }, /* External display connect/disconnect notification */
{ KE_KEY, 0xC4, { KEY_KBDILLUMUP } },
{ KE_KEY, 0xC5, { KEY_KBDILLUMDOWN } },
{ KE_IGNORE, 0xC6, }, /* Ambient Light Sensor notification */
diff --git a/drivers/platform/x86/intel/ifs/load.c b/drivers/platform/x86/intel/ifs/load.c
index 61dffb4c8a1d..e6ae8265f3a3 100644
--- a/drivers/platform/x86/intel/ifs/load.c
+++ b/drivers/platform/x86/intel/ifs/load.c
@@ -208,7 +208,7 @@ static int scan_chunks_sanity_check(struct device *dev)
continue;
reinit_completion(&ifs_done);
local_work.dev = dev;
- INIT_WORK(&local_work.w, copy_hashes_authenticate_chunks);
+ INIT_WORK_ONSTACK(&local_work.w, copy_hashes_authenticate_chunks);
schedule_work_on(cpu, &local_work.w);
wait_for_completion(&ifs_done);
if (ifsd->loading_error) {
diff --git a/drivers/platform/x86/intel/int3472/clk_and_regulator.c b/drivers/platform/x86/intel/int3472/clk_and_regulator.c
index 1086c3d83494..399f0623ca1b 100644
--- a/drivers/platform/x86/intel/int3472/clk_and_regulator.c
+++ b/drivers/platform/x86/intel/int3472/clk_and_regulator.c
@@ -101,9 +101,11 @@ int skl_int3472_register_clock(struct int3472_discrete_device *int3472,
int3472->clock.ena_gpio = acpi_get_and_request_gpiod(path, agpio->pin_table[0],
"int3472,clk-enable");
- if (IS_ERR(int3472->clock.ena_gpio))
- return dev_err_probe(int3472->dev, PTR_ERR(int3472->clock.ena_gpio),
- "getting clk-enable GPIO\n");
+ if (IS_ERR(int3472->clock.ena_gpio)) {
+ ret = PTR_ERR(int3472->clock.ena_gpio);
+ int3472->clock.ena_gpio = NULL;
+ return dev_err_probe(int3472->dev, ret, "getting clk-enable GPIO\n");
+ }
if (polarity == GPIO_ACTIVE_LOW)
gpiod_toggle_active_low(int3472->clock.ena_gpio);
@@ -199,8 +201,9 @@ int skl_int3472_register_regulator(struct int3472_discrete_device *int3472,
int3472->regulator.gpio = acpi_get_and_request_gpiod(path, agpio->pin_table[0],
"int3472,regulator");
if (IS_ERR(int3472->regulator.gpio)) {
- dev_err(int3472->dev, "Failed to get regulator GPIO line\n");
- return PTR_ERR(int3472->regulator.gpio);
+ ret = PTR_ERR(int3472->regulator.gpio);
+ int3472->regulator.gpio = NULL;
+ return dev_err_probe(int3472->dev, ret, "getting regulator GPIO\n");
}
/* Ensure the pin is in output mode and non-active state */
diff --git a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
index e0572a29212e..02fe360a59c7 100644
--- a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
+++ b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
@@ -304,14 +304,13 @@ struct isst_if_pkg_info {
static struct isst_if_cpu_info *isst_cpu_info;
static struct isst_if_pkg_info *isst_pkg_info;
-#define ISST_MAX_PCI_DOMAINS 8
-
static struct pci_dev *_isst_if_get_pci_dev(int cpu, int bus_no, int dev, int fn)
{
struct pci_dev *matched_pci_dev = NULL;
struct pci_dev *pci_dev = NULL;
+ struct pci_dev *_pci_dev = NULL;
int no_matches = 0, pkg_id;
- int i, bus_number;
+ int bus_number;
if (bus_no < 0 || bus_no >= ISST_MAX_BUS_NUMBER || cpu < 0 ||
cpu >= nr_cpu_ids || cpu >= num_possible_cpus())
@@ -323,12 +322,11 @@ static struct pci_dev *_isst_if_get_pci_dev(int cpu, int bus_no, int dev, int fn
if (bus_number < 0)
return NULL;
- for (i = 0; i < ISST_MAX_PCI_DOMAINS; ++i) {
- struct pci_dev *_pci_dev;
+ for_each_pci_dev(_pci_dev) {
int node;
- _pci_dev = pci_get_domain_bus_and_slot(i, bus_number, PCI_DEVFN(dev, fn));
- if (!_pci_dev)
+ if (_pci_dev->bus->number != bus_number ||
+ _pci_dev->devfn != PCI_DEVFN(dev, fn))
continue;
++no_matches;
diff --git a/drivers/power/supply/ab8500_btemp.c b/drivers/power/supply/ab8500_btemp.c
index 307ee6f71042..6f83e99d2eb7 100644
--- a/drivers/power/supply/ab8500_btemp.c
+++ b/drivers/power/supply/ab8500_btemp.c
@@ -624,10 +624,8 @@ static int ab8500_btemp_get_ext_psy_data(struct device *dev, void *data)
*/
static void ab8500_btemp_external_power_changed(struct power_supply *psy)
{
- struct ab8500_btemp *di = power_supply_get_drvdata(psy);
-
- class_for_each_device(power_supply_class, NULL,
- di->btemp_psy, ab8500_btemp_get_ext_psy_data);
+ class_for_each_device(power_supply_class, NULL, psy,
+ ab8500_btemp_get_ext_psy_data);
}
/* ab8500 btemp driver interrupts and their respective isr */
diff --git a/drivers/power/supply/ab8500_fg.c b/drivers/power/supply/ab8500_fg.c
index 41a7bff9ac37..53560fbb6dcd 100644
--- a/drivers/power/supply/ab8500_fg.c
+++ b/drivers/power/supply/ab8500_fg.c
@@ -2407,10 +2407,8 @@ out:
*/
static void ab8500_fg_external_power_changed(struct power_supply *psy)
{
- struct ab8500_fg *di = power_supply_get_drvdata(psy);
-
- class_for_each_device(power_supply_class, NULL,
- di->fg_psy, ab8500_fg_get_ext_psy_data);
+ class_for_each_device(power_supply_class, NULL, psy,
+ ab8500_fg_get_ext_psy_data);
}
/**
diff --git a/drivers/power/supply/axp288_fuel_gauge.c b/drivers/power/supply/axp288_fuel_gauge.c
index 05f413178462..3be6f3b10ea4 100644
--- a/drivers/power/supply/axp288_fuel_gauge.c
+++ b/drivers/power/supply/axp288_fuel_gauge.c
@@ -507,7 +507,7 @@ static void fuel_gauge_external_power_changed(struct power_supply *psy)
mutex_lock(&info->lock);
info->valid = 0; /* Force updating of the cached registers */
mutex_unlock(&info->lock);
- power_supply_changed(info->bat);
+ power_supply_changed(psy);
}
static struct power_supply_desc fuel_gauge_desc = {
diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c
index de67b985f0a9..dc33f00fcc06 100644
--- a/drivers/power/supply/bq24190_charger.c
+++ b/drivers/power/supply/bq24190_charger.c
@@ -1262,6 +1262,7 @@ static void bq24190_input_current_limit_work(struct work_struct *work)
bq24190_charger_set_property(bdi->charger,
POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT,
&val);
+ power_supply_changed(bdi->charger);
}
/* Sync the input-current-limit with our parent supply (if we have one) */
diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c
index 22cde35eb144..f8636cf86505 100644
--- a/drivers/power/supply/bq25890_charger.c
+++ b/drivers/power/supply/bq25890_charger.c
@@ -750,7 +750,7 @@ static void bq25890_charger_external_power_changed(struct power_supply *psy)
if (bq->chip_version != BQ25892)
return;
- ret = power_supply_get_property_from_supplier(bq->charger,
+ ret = power_supply_get_property_from_supplier(psy,
POWER_SUPPLY_PROP_USB_TYPE,
&val);
if (ret)
@@ -775,6 +775,7 @@ static void bq25890_charger_external_power_changed(struct power_supply *psy)
}
bq25890_field_write(bq, F_IINLIM, input_current_limit);
+ power_supply_changed(psy);
}
static int bq25890_get_chip_state(struct bq25890_device *bq,
@@ -1106,6 +1107,8 @@ static void bq25890_pump_express_work(struct work_struct *data)
dev_info(bq->dev, "Hi-voltage charging requested, input voltage is %d mV\n",
voltage);
+ power_supply_changed(bq->charger);
+
return;
error_print:
bq25890_field_write(bq, F_PUMPX_EN, 0);
diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c
index 5ff6f44fd47b..4296600e8912 100644
--- a/drivers/power/supply/bq27xxx_battery.c
+++ b/drivers/power/supply/bq27xxx_battery.c
@@ -1083,10 +1083,8 @@ static int poll_interval_param_set(const char *val, const struct kernel_param *k
return ret;
mutex_lock(&bq27xxx_list_lock);
- list_for_each_entry(di, &bq27xxx_battery_devices, list) {
- cancel_delayed_work_sync(&di->work);
- schedule_delayed_work(&di->work, 0);
- }
+ list_for_each_entry(di, &bq27xxx_battery_devices, list)
+ mod_delayed_work(system_wq, &di->work, 0);
mutex_unlock(&bq27xxx_list_lock);
return ret;
@@ -1761,60 +1759,6 @@ static int bq27xxx_battery_read_health(struct bq27xxx_device_info *di)
return POWER_SUPPLY_HEALTH_GOOD;
}
-void bq27xxx_battery_update(struct bq27xxx_device_info *di)
-{
- struct bq27xxx_reg_cache cache = {0, };
- bool has_singe_flag = di->opts & BQ27XXX_O_ZERO;
-
- cache.flags = bq27xxx_read(di, BQ27XXX_REG_FLAGS, has_singe_flag);
- if ((cache.flags & 0xff) == 0xff)
- cache.flags = -1; /* read error */
- if (cache.flags >= 0) {
- cache.temperature = bq27xxx_battery_read_temperature(di);
- if (di->regs[BQ27XXX_REG_TTE] != INVALID_REG_ADDR)
- cache.time_to_empty = bq27xxx_battery_read_time(di, BQ27XXX_REG_TTE);
- if (di->regs[BQ27XXX_REG_TTECP] != INVALID_REG_ADDR)
- cache.time_to_empty_avg = bq27xxx_battery_read_time(di, BQ27XXX_REG_TTECP);
- if (di->regs[BQ27XXX_REG_TTF] != INVALID_REG_ADDR)
- cache.time_to_full = bq27xxx_battery_read_time(di, BQ27XXX_REG_TTF);
-
- cache.charge_full = bq27xxx_battery_read_fcc(di);
- cache.capacity = bq27xxx_battery_read_soc(di);
- if (di->regs[BQ27XXX_REG_AE] != INVALID_REG_ADDR)
- cache.energy = bq27xxx_battery_read_energy(di);
- di->cache.flags = cache.flags;
- cache.health = bq27xxx_battery_read_health(di);
- if (di->regs[BQ27XXX_REG_CYCT] != INVALID_REG_ADDR)
- cache.cycle_count = bq27xxx_battery_read_cyct(di);
-
- /* We only have to read charge design full once */
- if (di->charge_design_full <= 0)
- di->charge_design_full = bq27xxx_battery_read_dcap(di);
- }
-
- if ((di->cache.capacity != cache.capacity) ||
- (di->cache.flags != cache.flags))
- power_supply_changed(di->bat);
-
- if (memcmp(&di->cache, &cache, sizeof(cache)) != 0)
- di->cache = cache;
-
- di->last_update = jiffies;
-}
-EXPORT_SYMBOL_GPL(bq27xxx_battery_update);
-
-static void bq27xxx_battery_poll(struct work_struct *work)
-{
- struct bq27xxx_device_info *di =
- container_of(work, struct bq27xxx_device_info,
- work.work);
-
- bq27xxx_battery_update(di);
-
- if (poll_interval > 0)
- schedule_delayed_work(&di->work, poll_interval * HZ);
-}
-
static bool bq27xxx_battery_is_full(struct bq27xxx_device_info *di, int flags)
{
if (di->opts & BQ27XXX_O_ZERO)
@@ -1833,7 +1777,8 @@ static bool bq27xxx_battery_is_full(struct bq27xxx_device_info *di, int flags)
static int bq27xxx_battery_current_and_status(
struct bq27xxx_device_info *di,
union power_supply_propval *val_curr,
- union power_supply_propval *val_status)
+ union power_supply_propval *val_status,
+ struct bq27xxx_reg_cache *cache)
{
bool single_flags = (di->opts & BQ27XXX_O_ZERO);
int curr;
@@ -1845,10 +1790,14 @@ static int bq27xxx_battery_current_and_status(
return curr;
}
- flags = bq27xxx_read(di, BQ27XXX_REG_FLAGS, single_flags);
- if (flags < 0) {
- dev_err(di->dev, "error reading flags\n");
- return flags;
+ if (cache) {
+ flags = cache->flags;
+ } else {
+ flags = bq27xxx_read(di, BQ27XXX_REG_FLAGS, single_flags);
+ if (flags < 0) {
+ dev_err(di->dev, "error reading flags\n");
+ return flags;
+ }
}
if (di->opts & BQ27XXX_O_ZERO) {
@@ -1883,6 +1832,78 @@ static int bq27xxx_battery_current_and_status(
return 0;
}
+static void bq27xxx_battery_update_unlocked(struct bq27xxx_device_info *di)
+{
+ union power_supply_propval status = di->last_status;
+ struct bq27xxx_reg_cache cache = {0, };
+ bool has_singe_flag = di->opts & BQ27XXX_O_ZERO;
+
+ cache.flags = bq27xxx_read(di, BQ27XXX_REG_FLAGS, has_singe_flag);
+ if ((cache.flags & 0xff) == 0xff)
+ cache.flags = -1; /* read error */
+ if (cache.flags >= 0) {
+ cache.temperature = bq27xxx_battery_read_temperature(di);
+ if (di->regs[BQ27XXX_REG_TTE] != INVALID_REG_ADDR)
+ cache.time_to_empty = bq27xxx_battery_read_time(di, BQ27XXX_REG_TTE);
+ if (di->regs[BQ27XXX_REG_TTECP] != INVALID_REG_ADDR)
+ cache.time_to_empty_avg = bq27xxx_battery_read_time(di, BQ27XXX_REG_TTECP);
+ if (di->regs[BQ27XXX_REG_TTF] != INVALID_REG_ADDR)
+ cache.time_to_full = bq27xxx_battery_read_time(di, BQ27XXX_REG_TTF);
+
+ cache.charge_full = bq27xxx_battery_read_fcc(di);
+ cache.capacity = bq27xxx_battery_read_soc(di);
+ if (di->regs[BQ27XXX_REG_AE] != INVALID_REG_ADDR)
+ cache.energy = bq27xxx_battery_read_energy(di);
+ di->cache.flags = cache.flags;
+ cache.health = bq27xxx_battery_read_health(di);
+ if (di->regs[BQ27XXX_REG_CYCT] != INVALID_REG_ADDR)
+ cache.cycle_count = bq27xxx_battery_read_cyct(di);
+
+ /*
+ * On gauges with signed current reporting the current must be
+ * checked to detect charging <-> discharging status changes.
+ */
+ if (!(di->opts & BQ27XXX_O_ZERO))
+ bq27xxx_battery_current_and_status(di, NULL, &status, &cache);
+
+ /* We only have to read charge design full once */
+ if (di->charge_design_full <= 0)
+ di->charge_design_full = bq27xxx_battery_read_dcap(di);
+ }
+
+ if ((di->cache.capacity != cache.capacity) ||
+ (di->cache.flags != cache.flags) ||
+ (di->last_status.intval != status.intval)) {
+ di->last_status.intval = status.intval;
+ power_supply_changed(di->bat);
+ }
+
+ if (memcmp(&di->cache, &cache, sizeof(cache)) != 0)
+ di->cache = cache;
+
+ di->last_update = jiffies;
+
+ if (!di->removed && poll_interval > 0)
+ mod_delayed_work(system_wq, &di->work, poll_interval * HZ);
+}
+
+void bq27xxx_battery_update(struct bq27xxx_device_info *di)
+{
+ mutex_lock(&di->lock);
+ bq27xxx_battery_update_unlocked(di);
+ mutex_unlock(&di->lock);
+}
+EXPORT_SYMBOL_GPL(bq27xxx_battery_update);
+
+static void bq27xxx_battery_poll(struct work_struct *work)
+{
+ struct bq27xxx_device_info *di =
+ container_of(work, struct bq27xxx_device_info,
+ work.work);
+
+ bq27xxx_battery_update(di);
+}
+
/*
* Get the average power in µW
* Return < 0 if something fails.
@@ -1985,10 +2006,8 @@ static int bq27xxx_battery_get_property(struct power_supply *psy,
struct bq27xxx_device_info *di = power_supply_get_drvdata(psy);
mutex_lock(&di->lock);
- if (time_is_before_jiffies(di->last_update + 5 * HZ)) {
- cancel_delayed_work_sync(&di->work);
- bq27xxx_battery_poll(&di->work.work);
- }
+ if (time_is_before_jiffies(di->last_update + 5 * HZ))
+ bq27xxx_battery_update_unlocked(di);
mutex_unlock(&di->lock);
if (psp != POWER_SUPPLY_PROP_PRESENT && di->cache.flags < 0)
@@ -1996,7 +2015,7 @@ static int bq27xxx_battery_get_property(struct power_supply *psy,
switch (psp) {
case POWER_SUPPLY_PROP_STATUS:
- ret = bq27xxx_battery_current_and_status(di, NULL, val);
+ ret = bq27xxx_battery_current_and_status(di, NULL, val, NULL);
break;
case POWER_SUPPLY_PROP_VOLTAGE_NOW:
ret = bq27xxx_battery_voltage(di, val);
@@ -2005,7 +2024,7 @@ static int bq27xxx_battery_get_property(struct power_supply *psy,
val->intval = di->cache.flags < 0 ? 0 : 1;
break;
case POWER_SUPPLY_PROP_CURRENT_NOW:
- ret = bq27xxx_battery_current_and_status(di, val, NULL);
+ ret = bq27xxx_battery_current_and_status(di, val, NULL, NULL);
break;
case POWER_SUPPLY_PROP_CAPACITY:
ret = bq27xxx_simple_value(di->cache.capacity, val);
@@ -2078,8 +2097,8 @@ static void bq27xxx_external_power_changed(struct power_supply *psy)
{
struct bq27xxx_device_info *di = power_supply_get_drvdata(psy);
- cancel_delayed_work_sync(&di->work);
- schedule_delayed_work(&di->work, 0);
+ /* After charger plug in/out wait 0.5s for things to stabilize */
+ mod_delayed_work(system_wq, &di->work, HZ / 2);
}
int bq27xxx_battery_setup(struct bq27xxx_device_info *di)
@@ -2127,22 +2146,18 @@ EXPORT_SYMBOL_GPL(bq27xxx_battery_setup);
void bq27xxx_battery_teardown(struct bq27xxx_device_info *di)
{
- /*
- * power_supply_unregister call bq27xxx_battery_get_property which
- * call bq27xxx_battery_poll.
- * Make sure that bq27xxx_battery_poll will not call
- * schedule_delayed_work again after unregister (which cause OOPS).
- */
- poll_interval = 0;
-
- cancel_delayed_work_sync(&di->work);
-
- power_supply_unregister(di->bat);
-
mutex_lock(&bq27xxx_list_lock);
list_del(&di->list);
mutex_unlock(&bq27xxx_list_lock);
+ /* Set removed to avoid bq27xxx_battery_update() re-queuing the work */
+ mutex_lock(&di->lock);
+ di->removed = true;
+ mutex_unlock(&di->lock);
+
+ cancel_delayed_work_sync(&di->work);
+
+ power_supply_unregister(di->bat);
mutex_destroy(&di->lock);
}
EXPORT_SYMBOL_GPL(bq27xxx_battery_teardown);
diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c
index f8768997333b..6d3c74876339 100644
--- a/drivers/power/supply/bq27xxx_battery_i2c.c
+++ b/drivers/power/supply/bq27xxx_battery_i2c.c
@@ -179,7 +179,7 @@ static int bq27xxx_battery_i2c_probe(struct i2c_client *client)
i2c_set_clientdata(client, di);
if (client->irq) {
- ret = devm_request_threaded_irq(&client->dev, client->irq,
+ ret = request_threaded_irq(client->irq,
NULL, bq27xxx_battery_irq_handler_thread,
IRQF_ONESHOT,
di->name, di);
@@ -209,6 +209,7 @@ static void bq27xxx_battery_i2c_remove(struct i2c_client *client)
{
struct bq27xxx_device_info *di = i2c_get_clientdata(client);
+ free_irq(client->irq, di);
bq27xxx_battery_teardown(di);
mutex_lock(&battery_mutex);
diff --git a/drivers/power/supply/mt6360_charger.c b/drivers/power/supply/mt6360_charger.c
index 92e48e3a4853..1305cba61edd 100644
--- a/drivers/power/supply/mt6360_charger.c
+++ b/drivers/power/supply/mt6360_charger.c
@@ -796,7 +796,9 @@ static int mt6360_charger_probe(struct platform_device *pdev)
mci->vinovp = 6500000;
mutex_init(&mci->chgdet_lock);
platform_set_drvdata(pdev, mci);
- devm_work_autocancel(&pdev->dev, &mci->chrdet_work, mt6360_chrdet_work);
+ ret = devm_work_autocancel(&pdev->dev, &mci->chrdet_work, mt6360_chrdet_work);
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret, "Failed to set delayed work\n");
ret = device_property_read_u32(&pdev->dev, "richtek,vinovp-microvolt", &mci->vinovp);
if (ret)
diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c
index ab986dbace16..3791aec69ddc 100644
--- a/drivers/power/supply/power_supply_core.c
+++ b/drivers/power/supply/power_supply_core.c
@@ -348,6 +348,10 @@ static int __power_supply_is_system_supplied(struct device *dev, void *data)
struct power_supply *psy = dev_get_drvdata(dev);
unsigned int *count = data;
+ if (!psy->desc->get_property(psy, POWER_SUPPLY_PROP_SCOPE, &ret))
+ if (ret.intval == POWER_SUPPLY_SCOPE_DEVICE)
+ return 0;
+
(*count)++;
if (psy->desc->type != POWER_SUPPLY_TYPE_BATTERY)
if (!psy->desc->get_property(psy, POWER_SUPPLY_PROP_ONLINE,
@@ -366,8 +370,8 @@ int power_supply_is_system_supplied(void)
__power_supply_is_system_supplied);
/*
- * If no power class device was found at all, most probably we are
- * running on a desktop system, so assume we are on mains power.
+ * If no system scope power class device was found at all, most probably we
+ * are running on a desktop system, so assume we are on mains power.
*/
if (count == 0)
return 1;
@@ -573,7 +577,7 @@ int power_supply_get_battery_info(struct power_supply *psy,
struct power_supply_battery_info *info;
struct device_node *battery_np = NULL;
struct fwnode_reference_args args;
- struct fwnode_handle *fwnode;
+ struct fwnode_handle *fwnode = NULL;
const char *value;
int err, len, index;
const __be32 *list;
@@ -585,7 +589,7 @@ int power_supply_get_battery_info(struct power_supply *psy,
return -ENODEV;
fwnode = fwnode_handle_get(of_fwnode_handle(battery_np));
- } else {
+ } else if (psy->dev.parent) {
err = fwnode_property_get_reference_args(
dev_fwnode(psy->dev.parent),
"monitored-battery", NULL, 0, 0, &args);
@@ -595,6 +599,9 @@ int power_supply_get_battery_info(struct power_supply *psy,
fwnode = args.fwnode;
}
+ if (!fwnode)
+ return -ENOENT;
+
err = fwnode_property_read_string(fwnode, "compatible", &value);
if (err)
goto out_put_node;
diff --git a/drivers/power/supply/power_supply_leds.c b/drivers/power/supply/power_supply_leds.c
index 702bf83f6e6d..0674483279d7 100644
--- a/drivers/power/supply/power_supply_leds.c
+++ b/drivers/power/supply/power_supply_leds.c
@@ -35,8 +35,9 @@ static void power_supply_update_bat_leds(struct power_supply *psy)
led_trigger_event(psy->charging_full_trig, LED_FULL);
led_trigger_event(psy->charging_trig, LED_OFF);
led_trigger_event(psy->full_trig, LED_FULL);
- led_trigger_event(psy->charging_blink_full_solid_trig,
- LED_FULL);
+ /* Going from blink to LED on requires a LED_OFF event to stop blink */
+ led_trigger_event(psy->charging_blink_full_solid_trig, LED_OFF);
+ led_trigger_event(psy->charging_blink_full_solid_trig, LED_FULL);
break;
case POWER_SUPPLY_STATUS_CHARGING:
led_trigger_event(psy->charging_full_trig, LED_FULL);
diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c
index ba3b125cd66e..06e5b6b0e255 100644
--- a/drivers/power/supply/power_supply_sysfs.c
+++ b/drivers/power/supply/power_supply_sysfs.c
@@ -286,7 +286,8 @@ static ssize_t power_supply_show_property(struct device *dev,
if (ret < 0) {
if (ret == -ENODATA)
- dev_dbg(dev, "driver has no data for `%s' property\n",
+ dev_dbg_ratelimited(dev,
+ "driver has no data for `%s' property\n",
attr->attr.name);
else if (ret != -ENODEV && ret != -EAGAIN)
dev_err_ratelimited(dev,
diff --git a/drivers/power/supply/rt9467-charger.c b/drivers/power/supply/rt9467-charger.c
index 73f744a3155d..ea33693b6977 100644
--- a/drivers/power/supply/rt9467-charger.c
+++ b/drivers/power/supply/rt9467-charger.c
@@ -1023,7 +1023,7 @@ static int rt9467_request_interrupt(struct rt9467_chg_data *data)
for (i = 0; i < num_chg_irqs; i++) {
virq = regmap_irq_get_virq(data->irq_chip_data, chg_irqs[i].hwirq);
if (virq <= 0)
- return dev_err_probe(dev, virq, "Failed to get (%s) irq\n",
+ return dev_err_probe(dev, -EINVAL, "Failed to get (%s) irq\n",
chg_irqs[i].name);
ret = devm_request_threaded_irq(dev, virq, NULL, chg_irqs[i].handler,
diff --git a/drivers/power/supply/sbs-charger.c b/drivers/power/supply/sbs-charger.c
index 75ebcbf0a788..a14e89ac0369 100644
--- a/drivers/power/supply/sbs-charger.c
+++ b/drivers/power/supply/sbs-charger.c
@@ -24,7 +24,7 @@
#define SBS_CHARGER_REG_STATUS 0x13
#define SBS_CHARGER_REG_ALARM_WARNING 0x16
-#define SBS_CHARGER_STATUS_CHARGE_INHIBITED BIT(1)
+#define SBS_CHARGER_STATUS_CHARGE_INHIBITED BIT(0)
#define SBS_CHARGER_STATUS_RES_COLD BIT(9)
#define SBS_CHARGER_STATUS_RES_HOT BIT(10)
#define SBS_CHARGER_STATUS_BATTERY_PRESENT BIT(14)
diff --git a/drivers/power/supply/sc27xx_fuel_gauge.c b/drivers/power/supply/sc27xx_fuel_gauge.c
index 632977f84b95..bd23c4d9fed4 100644
--- a/drivers/power/supply/sc27xx_fuel_gauge.c
+++ b/drivers/power/supply/sc27xx_fuel_gauge.c
@@ -733,13 +733,6 @@ static int sc27xx_fgu_set_property(struct power_supply *psy,
return ret;
}
-static void sc27xx_fgu_external_power_changed(struct power_supply *psy)
-{
- struct sc27xx_fgu_data *data = power_supply_get_drvdata(psy);
-
- power_supply_changed(data->battery);
-}
-
static int sc27xx_fgu_property_is_writeable(struct power_supply *psy,
enum power_supply_property psp)
{
@@ -774,7 +767,7 @@ static const struct power_supply_desc sc27xx_fgu_desc = {
.num_properties = ARRAY_SIZE(sc27xx_fgu_props),
.get_property = sc27xx_fgu_get_property,
.set_property = sc27xx_fgu_set_property,
- .external_power_changed = sc27xx_fgu_external_power_changed,
+ .external_power_changed = power_supply_changed,
.property_is_writeable = sc27xx_fgu_property_is_writeable,
.no_thermal = true,
};
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index dc741ac156c3..698ab7f5004b 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -5256,7 +5256,7 @@ static void rdev_init_debugfs(struct regulator_dev *rdev)
}
rdev->debugfs = debugfs_create_dir(rname, debugfs_root);
- if (!rdev->debugfs) {
+ if (IS_ERR(rdev->debugfs)) {
rdev_warn(rdev, "Failed to create debugfs directory\n");
return;
}
@@ -6178,7 +6178,7 @@ static int __init regulator_init(void)
ret = class_register(&regulator_class);
debugfs_root = debugfs_create_dir("regulator", NULL);
- if (!debugfs_root)
+ if (IS_ERR(debugfs_root))
pr_warn("regulator: Failed to create debugfs directory\n");
#ifdef CONFIG_DEBUG_FS
diff --git a/drivers/regulator/mt6359-regulator.c b/drivers/regulator/mt6359-regulator.c
index 1849566784ab..3eb86ec21d08 100644
--- a/drivers/regulator/mt6359-regulator.c
+++ b/drivers/regulator/mt6359-regulator.c
@@ -951,9 +951,12 @@ static int mt6359_regulator_probe(struct platform_device *pdev)
struct regulator_config config = {};
struct regulator_dev *rdev;
struct mt6359_regulator_info *mt6359_info;
- int i, hw_ver;
+ int i, hw_ver, ret;
+
+ ret = regmap_read(mt6397->regmap, MT6359P_HWCID, &hw_ver);
+ if (ret)
+ return ret;
- regmap_read(mt6397->regmap, MT6359P_HWCID, &hw_ver);
if (hw_ver >= MT6359P_CHIP_VER)
mt6359_info = mt6359p_regulators;
else
diff --git a/drivers/regulator/pca9450-regulator.c b/drivers/regulator/pca9450-regulator.c
index 87a746dcb516..e75dd92f86ca 100644
--- a/drivers/regulator/pca9450-regulator.c
+++ b/drivers/regulator/pca9450-regulator.c
@@ -264,7 +264,7 @@ static const struct pca9450_regulator_desc pca9450a_regulators[] = {
.vsel_reg = PCA9450_REG_BUCK2OUT_DVS0,
.vsel_mask = BUCK2OUT_DVS0_MASK,
.enable_reg = PCA9450_REG_BUCK2CTRL,
- .enable_mask = BUCK1_ENMODE_MASK,
+ .enable_mask = BUCK2_ENMODE_MASK,
.ramp_reg = PCA9450_REG_BUCK2CTRL,
.ramp_mask = BUCK2_RAMP_MASK,
.ramp_delay_table = pca9450_dvs_buck_ramp_table,
@@ -502,7 +502,7 @@ static const struct pca9450_regulator_desc pca9450bc_regulators[] = {
.vsel_reg = PCA9450_REG_BUCK2OUT_DVS0,
.vsel_mask = BUCK2OUT_DVS0_MASK,
.enable_reg = PCA9450_REG_BUCK2CTRL,
- .enable_mask = BUCK1_ENMODE_MASK,
+ .enable_mask = BUCK2_ENMODE_MASK,
.ramp_reg = PCA9450_REG_BUCK2CTRL,
.ramp_mask = BUCK2_RAMP_MASK,
.ramp_delay_table = pca9450_dvs_buck_ramp_table,
diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c
index b0a58c62b1e2..f3b280af0773 100644
--- a/drivers/regulator/qcom-rpmh-regulator.c
+++ b/drivers/regulator/qcom-rpmh-regulator.c
@@ -1057,21 +1057,21 @@ static const struct rpmh_vreg_init_data pm8450_vreg_data[] = {
};
static const struct rpmh_vreg_init_data pm8550_vreg_data[] = {
- RPMH_VREG("ldo1", "ldo%s1", &pmic5_pldo, "vdd-l1-l4-l10"),
+ RPMH_VREG("ldo1", "ldo%s1", &pmic5_nldo515, "vdd-l1-l4-l10"),
RPMH_VREG("ldo2", "ldo%s2", &pmic5_pldo, "vdd-l2-l13-l14"),
- RPMH_VREG("ldo3", "ldo%s3", &pmic5_nldo, "vdd-l3"),
- RPMH_VREG("ldo4", "ldo%s4", &pmic5_nldo, "vdd-l1-l4-l10"),
+ RPMH_VREG("ldo3", "ldo%s3", &pmic5_nldo515, "vdd-l3"),
+ RPMH_VREG("ldo4", "ldo%s4", &pmic5_nldo515, "vdd-l1-l4-l10"),
RPMH_VREG("ldo5", "ldo%s5", &pmic5_pldo, "vdd-l5-l16"),
- RPMH_VREG("ldo6", "ldo%s6", &pmic5_pldo_lv, "vdd-l6-l7"),
- RPMH_VREG("ldo7", "ldo%s7", &pmic5_pldo_lv, "vdd-l6-l7"),
- RPMH_VREG("ldo8", "ldo%s8", &pmic5_pldo_lv, "vdd-l8-l9"),
+ RPMH_VREG("ldo6", "ldo%s6", &pmic5_pldo, "vdd-l6-l7"),
+ RPMH_VREG("ldo7", "ldo%s7", &pmic5_pldo, "vdd-l6-l7"),
+ RPMH_VREG("ldo8", "ldo%s8", &pmic5_pldo, "vdd-l8-l9"),
RPMH_VREG("ldo9", "ldo%s9", &pmic5_pldo, "vdd-l8-l9"),
- RPMH_VREG("ldo10", "ldo%s10", &pmic5_nldo, "vdd-l1-l4-l10"),
- RPMH_VREG("ldo11", "ldo%s11", &pmic5_nldo, "vdd-l11"),
+ RPMH_VREG("ldo10", "ldo%s10", &pmic5_nldo515, "vdd-l1-l4-l10"),
+ RPMH_VREG("ldo11", "ldo%s11", &pmic5_nldo515, "vdd-l11"),
RPMH_VREG("ldo12", "ldo%s12", &pmic5_pldo, "vdd-l12"),
RPMH_VREG("ldo13", "ldo%s13", &pmic5_pldo, "vdd-l2-l13-l14"),
RPMH_VREG("ldo14", "ldo%s14", &pmic5_pldo, "vdd-l2-l13-l14"),
- RPMH_VREG("ldo15", "ldo%s15", &pmic5_pldo, "vdd-l15"),
+ RPMH_VREG("ldo15", "ldo%s15", &pmic5_nldo515, "vdd-l15"),
RPMH_VREG("ldo16", "ldo%s16", &pmic5_pldo, "vdd-l5-l16"),
RPMH_VREG("ldo17", "ldo%s17", &pmic5_pldo, "vdd-l17"),
RPMH_VREG("bob1", "bob%s1", &pmic5_bob, "vdd-bob1"),
@@ -1086,9 +1086,9 @@ static const struct rpmh_vreg_init_data pm8550vs_vreg_data[] = {
RPMH_VREG("smps4", "smp%s4", &pmic5_ftsmps525_lv, "vdd-s4"),
RPMH_VREG("smps5", "smp%s5", &pmic5_ftsmps525_lv, "vdd-s5"),
RPMH_VREG("smps6", "smp%s6", &pmic5_ftsmps525_mv, "vdd-s6"),
- RPMH_VREG("ldo1", "ldo%s1", &pmic5_nldo, "vdd-l1"),
- RPMH_VREG("ldo2", "ldo%s2", &pmic5_nldo, "vdd-l2"),
- RPMH_VREG("ldo3", "ldo%s3", &pmic5_nldo, "vdd-l3"),
+ RPMH_VREG("ldo1", "ldo%s1", &pmic5_nldo515, "vdd-l1"),
+ RPMH_VREG("ldo2", "ldo%s2", &pmic5_nldo515, "vdd-l2"),
+ RPMH_VREG("ldo3", "ldo%s3", &pmic5_nldo515, "vdd-l3"),
{}
};
@@ -1101,9 +1101,9 @@ static const struct rpmh_vreg_init_data pm8550ve_vreg_data[] = {
RPMH_VREG("smps6", "smp%s6", &pmic5_ftsmps525_lv, "vdd-s6"),
RPMH_VREG("smps7", "smp%s7", &pmic5_ftsmps525_lv, "vdd-s7"),
RPMH_VREG("smps8", "smp%s8", &pmic5_ftsmps525_lv, "vdd-s8"),
- RPMH_VREG("ldo1", "ldo%s1", &pmic5_nldo, "vdd-l1"),
- RPMH_VREG("ldo2", "ldo%s2", &pmic5_nldo, "vdd-l2"),
- RPMH_VREG("ldo3", "ldo%s3", &pmic5_nldo, "vdd-l3"),
+ RPMH_VREG("ldo1", "ldo%s1", &pmic5_nldo515, "vdd-l1"),
+ RPMH_VREG("ldo2", "ldo%s2", &pmic5_nldo515, "vdd-l2"),
+ RPMH_VREG("ldo3", "ldo%s3", &pmic5_nldo515, "vdd-l3"),
{}
};
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index ade1369fe5ed..113c509bf6d0 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -127,6 +127,8 @@ static int prepare_itcw(struct itcw *, unsigned int, unsigned int, int,
struct dasd_device *, struct dasd_device *,
unsigned int, int, unsigned int, unsigned int,
unsigned int, unsigned int);
+static int dasd_eckd_query_pprc_status(struct dasd_device *,
+ struct dasd_pprc_data_sc4 *);
/* initial attempt at a probe function. this can be simplified once
* the other detection code is gone */
@@ -3733,6 +3735,26 @@ static int count_exts(unsigned int from, unsigned int to, int trks_per_ext)
return count;
}
+static int dasd_in_copy_relation(struct dasd_device *device)
+{
+ struct dasd_pprc_data_sc4 *temp;
+ int rc;
+
+ if (!dasd_eckd_pprc_enabled(device))
+ return 0;
+
+ temp = kzalloc(sizeof(*temp), GFP_KERNEL);
+ if (!temp)
+ return -ENOMEM;
+
+ rc = dasd_eckd_query_pprc_status(device, temp);
+ if (!rc)
+ rc = temp->dev_info[0].state;
+
+ kfree(temp);
+ return rc;
+}
+
/*
* Release allocated space for a given range or an entire volume.
*/
@@ -3749,6 +3771,7 @@ dasd_eckd_dso_ras(struct dasd_device *device, struct dasd_block *block,
int cur_to_trk, cur_from_trk;
struct dasd_ccw_req *cqr;
u32 beg_cyl, end_cyl;
+ int copy_relation;
struct ccw1 *ccw;
int trks_per_ext;
size_t ras_size;
@@ -3760,6 +3783,10 @@ dasd_eckd_dso_ras(struct dasd_device *device, struct dasd_block *block,
if (dasd_eckd_ras_sanity_checks(device, first_trk, last_trk))
return ERR_PTR(-EINVAL);
+ copy_relation = dasd_in_copy_relation(device);
+ if (copy_relation < 0)
+ return ERR_PTR(copy_relation);
+
rq = req ? blk_mq_rq_to_pdu(req) : NULL;
features = &private->features;
@@ -3788,9 +3815,11 @@ dasd_eckd_dso_ras(struct dasd_device *device, struct dasd_block *block,
/*
* This bit guarantees initialisation of tracks within an extent that is
* not fully specified, but is only supported with a certain feature
- * subset.
+ * subset and for devices not in a copy relation.
*/
- ras_data->op_flags.guarantee_init = !!(features->feature[56] & 0x01);
+ if (features->feature[56] & 0x01 && !copy_relation)
+ ras_data->op_flags.guarantee_init = 1;
+
ras_data->lss = private->conf.ned->ID;
ras_data->dev_addr = private->conf.ned->unit_addr;
ras_data->nr_exts = nr_exts;
diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index 838c9f5313e6..513a7e6eee63 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -552,10 +552,10 @@ static int __dasd_ioctl_information(struct dasd_block *block,
memcpy(dasd_info->type, base->discipline->name, 4);
- spin_lock_irqsave(&block->queue_lock, flags);
+ spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags);
list_for_each(l, &base->ccw_queue)
dasd_info->chanq_len++;
- spin_unlock_irqrestore(&block->queue_lock, flags);
+ spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags);
return 0;
}
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 8eb089b99cde..c0d620ffea61 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -1111,6 +1111,8 @@ static void io_subchannel_verify(struct subchannel *sch)
cdev = sch_get_cdev(sch);
if (cdev)
dev_fsm_event(cdev, DEV_EVENT_VERIFY);
+ else
+ css_schedule_eval(sch->schid);
}
static void io_subchannel_terminate_path(struct subchannel *sch, u8 mask)
@@ -1374,6 +1376,7 @@ void ccw_device_set_notoper(struct ccw_device *cdev)
enum io_sch_action {
IO_SCH_UNREG,
IO_SCH_ORPH_UNREG,
+ IO_SCH_UNREG_CDEV,
IO_SCH_ATTACH,
IO_SCH_UNREG_ATTACH,
IO_SCH_ORPH_ATTACH,
@@ -1406,7 +1409,7 @@ static enum io_sch_action sch_get_action(struct subchannel *sch)
}
if ((sch->schib.pmcw.pam & sch->opm) == 0) {
if (ccw_device_notify(cdev, CIO_NO_PATH) != NOTIFY_OK)
- return IO_SCH_UNREG;
+ return IO_SCH_UNREG_CDEV;
return IO_SCH_DISC;
}
if (device_is_disconnected(cdev))
@@ -1468,6 +1471,7 @@ static int io_subchannel_sch_event(struct subchannel *sch, int process)
case IO_SCH_ORPH_ATTACH:
ccw_device_set_disconnected(cdev);
break;
+ case IO_SCH_UNREG_CDEV:
case IO_SCH_UNREG_ATTACH:
case IO_SCH_UNREG:
if (!cdev)
@@ -1501,6 +1505,7 @@ static int io_subchannel_sch_event(struct subchannel *sch, int process)
if (rc)
goto out;
break;
+ case IO_SCH_UNREG_CDEV:
case IO_SCH_UNREG_ATTACH:
spin_lock_irqsave(sch->lock, flags);
sch_set_cdev(sch, NULL);
diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h
index 5ea6249d8180..641f0dbb65a9 100644
--- a/drivers/s390/cio/qdio.h
+++ b/drivers/s390/cio/qdio.h
@@ -95,7 +95,7 @@ static inline int do_sqbs(u64 token, unsigned char state, int queue,
" lgr 1,%[token]\n"
" .insn rsy,0xeb000000008a,%[qs],%[ccq],0(%[state])"
: [ccq] "+&d" (_ccq), [qs] "+&d" (_queuestart)
- : [state] "d" ((unsigned long)state), [token] "d" (token)
+ : [state] "a" ((unsigned long)state), [token] "d" (token)
: "memory", "cc", "1");
*count = _ccq & 0xff;
*start = _queuestart & 0xff;
diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index 5a05d1cdfec2..a8def50c149b 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -1293,6 +1293,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
return PTR_ERR(kkey);
rc = pkey_keyblob2pkey(kkey, ktp.keylen, &ktp.protkey);
DEBUG_DBG("%s pkey_keyblob2pkey()=%d\n", __func__, rc);
+ memzero_explicit(kkey, ktp.keylen);
kfree(kkey);
if (rc)
break;
@@ -1426,6 +1427,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
kkey, ktp.keylen, &ktp.protkey);
DEBUG_DBG("%s pkey_keyblob2pkey2()=%d\n", __func__, rc);
kfree(apqns);
+ memzero_explicit(kkey, ktp.keylen);
kfree(kkey);
if (rc)
break;
@@ -1552,6 +1554,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
protkey, &protkeylen);
DEBUG_DBG("%s pkey_keyblob2pkey3()=%d\n", __func__, rc);
kfree(apqns);
+ memzero_explicit(kkey, ktp.keylen);
kfree(kkey);
if (rc) {
kfree(protkey);
diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c
index 8acb9eba691b..c2096e4bba31 100644
--- a/drivers/s390/net/ism_drv.c
+++ b/drivers/s390/net/ism_drv.c
@@ -771,14 +771,6 @@ static int __init ism_init(void)
static void __exit ism_exit(void)
{
- struct ism_dev *ism;
-
- mutex_lock(&ism_dev_list.mutex);
- list_for_each_entry(ism, &ism_dev_list.list, list) {
- ism_dev_exit(ism);
- }
- mutex_unlock(&ism_dev_list.mutex);
-
pci_unregister_driver(&ism_driver);
debug_unregister(ism_debug_info);
}
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 5e115e8b2ba4..7c6efde75da6 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -1678,6 +1678,7 @@ struct aac_dev
u32 handle_pci_error;
bool init_reset;
u8 soft_reset_support;
+ u8 use_map_queue;
};
#define aac_adapter_interrupt(dev) \
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index deb32c9f4b3e..3f062e4013ab 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -223,8 +223,12 @@ int aac_fib_setup(struct aac_dev * dev)
struct fib *aac_fib_alloc_tag(struct aac_dev *dev, struct scsi_cmnd *scmd)
{
struct fib *fibptr;
+ u32 blk_tag;
+ int i;
- fibptr = &dev->fibs[scsi_cmd_to_rq(scmd)->tag];
+ blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
+ i = blk_mq_unique_tag_to_tag(blk_tag);
+ fibptr = &dev->fibs[i];
/*
* Null out fields that depend on being zero at the start of
* each I/O
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 68f4dbcfff49..c4a36c0be527 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -19,6 +19,7 @@
#include <linux/compat.h>
#include <linux/blkdev.h>
+#include <linux/blk-mq-pci.h>
#include <linux/completion.h>
#include <linux/init.h>
#include <linux/interrupt.h>
@@ -504,6 +505,15 @@ common_config:
return 0;
}
+static void aac_map_queues(struct Scsi_Host *shost)
+{
+ struct aac_dev *aac = (struct aac_dev *)shost->hostdata;
+
+ blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT],
+ aac->pdev, 0);
+ aac->use_map_queue = true;
+}
+
/**
* aac_change_queue_depth - alter queue depths
* @sdev: SCSI device we are considering
@@ -1488,6 +1498,7 @@ static const struct scsi_host_template aac_driver_template = {
.bios_param = aac_biosparm,
.shost_groups = aac_host_groups,
.slave_configure = aac_slave_configure,
+ .map_queues = aac_map_queues,
.change_queue_depth = aac_change_queue_depth,
.sdev_groups = aac_dev_groups,
.eh_abort_handler = aac_eh_abort,
@@ -1775,6 +1786,8 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
shost->max_lun = AAC_MAX_LUN;
pci_set_drvdata(pdev, shost);
+ shost->nr_hw_queues = aac->max_msix;
+ shost->host_tagset = 1;
error = scsi_add_host(shost, &pdev->dev);
if (error)
@@ -1906,6 +1919,7 @@ static void aac_remove_one(struct pci_dev *pdev)
struct aac_dev *aac = (struct aac_dev *)shost->hostdata;
aac_cancel_rescan_worker(aac);
+ aac->use_map_queue = false;
scsi_remove_host(shost);
__aac_shutdown(aac);
diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index 11ef58204e96..61949f374188 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -493,6 +493,10 @@ static int aac_src_deliver_message(struct fib *fib)
#endif
u16 vector_no;
+ struct scsi_cmnd *scmd;
+ u32 blk_tag;
+ struct Scsi_Host *shost = dev->scsi_host_ptr;
+ struct blk_mq_queue_map *qmap;
atomic_inc(&q->numpending);
@@ -505,8 +509,25 @@ static int aac_src_deliver_message(struct fib *fib)
if ((dev->comm_interface == AAC_COMM_MESSAGE_TYPE3)
&& dev->sa_firmware)
vector_no = aac_get_vector(dev);
- else
- vector_no = fib->vector_no;
+ else {
+ if (!fib->vector_no || !fib->callback_data) {
+ if (shost && dev->use_map_queue) {
+ qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT];
+ vector_no = qmap->mq_map[raw_smp_processor_id()];
+ }
+ /*
+ * We hardcode the vector_no for
+ * reserved commands as a valid shost is
+ * absent during the init
+ */
+ else
+ vector_no = 0;
+ } else {
+ scmd = (struct scsi_cmnd *)fib->callback_data;
+ blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
+ vector_no = blk_mq_unique_tag_to_hwq(blk_tag);
+ }
+ }
if (native_hba) {
if (fib->flags & FIB_CONTEXT_FLAG_NATIVE_HBA_TMF) {
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index 9a322a3a2150..595dca92e8db 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -889,7 +889,7 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
struct lpfc_iocbq *piocbq)
{
uint32_t evt_req_id = 0;
- uint32_t cmd;
+ u16 cmd;
struct lpfc_dmabuf *dmabuf = NULL;
struct lpfc_bsg_event *evt;
struct event_data *evt_dat = NULL;
@@ -915,7 +915,7 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
ct_req = (struct lpfc_sli_ct_request *)bdeBuf1->virt;
evt_req_id = ct_req->FsType;
- cmd = ct_req->CommandResponse.bits.CmdRsp;
+ cmd = be16_to_cpu(ct_req->CommandResponse.bits.CmdRsp);
spin_lock_irqsave(&phba->ct_ev_lock, flags);
list_for_each_entry(evt, &phba->ct_ev_waiters, node) {
@@ -3186,8 +3186,8 @@ lpfc_bsg_diag_loopback_run(struct bsg_job *job)
ctreq->RevisionId.bits.InId = 0;
ctreq->FsType = SLI_CT_ELX_LOOPBACK;
ctreq->FsSubType = 0;
- ctreq->CommandResponse.bits.CmdRsp = ELX_LOOPBACK_DATA;
- ctreq->CommandResponse.bits.Size = size;
+ ctreq->CommandResponse.bits.CmdRsp = cpu_to_be16(ELX_LOOPBACK_DATA);
+ ctreq->CommandResponse.bits.Size = cpu_to_be16(size);
segment_offset = ELX_LOOPBACK_HEADER_SZ;
} else
segment_offset = 0;
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index df5e5b7fdcfe..84aa3571be6d 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -3796,6 +3796,7 @@ struct qla_qpair {
uint64_t retry_term_jiff;
struct qla_tgt_counters tgt_counters;
uint16_t cpuid;
+ bool cpu_mapped;
struct qla_fw_resources fwres ____cacheline_aligned;
struct qla_buf_pool buf_pool;
u32 cmd_cnt;
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index ec0423ec6681..1a955c3ff3d6 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -9426,6 +9426,9 @@ struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *vha, int qos,
qpair->rsp->req = qpair->req;
qpair->rsp->qpair = qpair;
+ if (!qpair->cpu_mapped)
+ qla_cpu_update(qpair, raw_smp_processor_id());
+
if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif) {
if (ha->fw_attributes & BIT_4)
qpair->difdix_supported = 1;
diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h
index cce6e425c121..7b42558a8839 100644
--- a/drivers/scsi/qla2xxx/qla_inline.h
+++ b/drivers/scsi/qla2xxx/qla_inline.h
@@ -539,11 +539,14 @@ qla_mapq_init_qp_cpu_map(struct qla_hw_data *ha,
if (!ha->qp_cpu_map)
return;
mask = pci_irq_get_affinity(ha->pdev, msix->vector_base0);
+ if (!mask)
+ return;
qpair->cpuid = cpumask_first(mask);
for_each_cpu(cpu, mask) {
ha->qp_cpu_map[cpu] = qpair;
}
msix->cpuid = qpair->cpuid;
+ qpair->cpu_mapped = true;
}
static inline void
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 71feda2cdb63..245e3a5d81fd 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -3770,6 +3770,9 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
if (rsp->qpair->cpuid != smp_processor_id() || !rsp->qpair->rcv_intr) {
rsp->qpair->rcv_intr = 1;
+
+ if (!rsp->qpair->cpu_mapped)
+ qla_cpu_update(rsp->qpair, raw_smp_processor_id());
}
#define __update_rsp_in(_is_shadow_hba, _rsp, _rsp_in) \
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index b7c569a42aa4..0226c9279cef 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1463,6 +1463,8 @@ static int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
struct Scsi_Host *host = cmd->device->host;
int rtn = 0;
+ atomic_inc(&cmd->device->iorequest_cnt);
+
/* check if the device is still usable */
if (unlikely(cmd->device->sdev_state == SDEV_DEL)) {
/* in SDEV_DEL we error all commands. DID_NO_CONNECT
@@ -1483,6 +1485,7 @@ static int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
*/
SCSI_LOG_MLQUEUE(3, scmd_printk(KERN_INFO, cmd,
"queuecommand : device blocked\n"));
+ atomic_dec(&cmd->device->iorequest_cnt);
return SCSI_MLQUEUE_DEVICE_BUSY;
}
@@ -1515,6 +1518,7 @@ static int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
trace_scsi_dispatch_cmd_start(cmd);
rtn = host->hostt->queuecommand(host, cmd);
if (rtn) {
+ atomic_dec(&cmd->device->iorequest_cnt);
trace_scsi_dispatch_cmd_error(cmd, rtn);
if (rtn != SCSI_MLQUEUE_DEVICE_BUSY &&
rtn != SCSI_MLQUEUE_TARGET_BUSY)
@@ -1761,7 +1765,6 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
goto out_dec_host_busy;
}
- atomic_inc(&cmd->device->iorequest_cnt);
return BLK_STS_OK;
out_dec_host_busy:
diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c
index 5b230e149c3d..8ffb75be99bc 100644
--- a/drivers/scsi/stex.c
+++ b/drivers/scsi/stex.c
@@ -109,7 +109,9 @@ enum {
TASK_ATTRIBUTE_HEADOFQUEUE = 0x1,
TASK_ATTRIBUTE_ORDERED = 0x2,
TASK_ATTRIBUTE_ACA = 0x4,
+};
+enum {
SS_STS_NORMAL = 0x80000000,
SS_STS_DONE = 0x40000000,
SS_STS_HANDSHAKE = 0x20000000,
@@ -121,7 +123,9 @@ enum {
SS_I2H_REQUEST_RESET = 0x2000,
SS_MU_OPERATIONAL = 0x80000000,
+};
+enum {
STEX_CDB_LENGTH = 16,
STATUS_VAR_LEN = 128,
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index d9ce379c4d2e..659196a2f63a 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1567,6 +1567,8 @@ static int storvsc_device_configure(struct scsi_device *sdevice)
{
blk_queue_rq_timeout(sdevice->request_queue, (storvsc_timeout * HZ));
+ /* storvsc devices don't support MAINTENANCE_IN SCSI cmd */
+ sdevice->no_report_opcodes = 1;
sdevice->no_write_same = 1;
/*
@@ -1780,7 +1782,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
length = scsi_bufflen(scmnd);
payload = (struct vmbus_packet_mpb_array *)&cmd_request->mpb;
- payload_sz = sizeof(cmd_request->mpb);
+ payload_sz = 0;
if (scsi_sg_count(scmnd)) {
unsigned long offset_in_hvpg = offset_in_hvpage(sgl->offset);
@@ -1789,10 +1791,10 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
unsigned long hvpfn, hvpfns_to_add;
int j, i = 0, sg_count;
- if (hvpg_count > MAX_PAGE_BUFFER_COUNT) {
+ payload_sz = (hvpg_count * sizeof(u64) +
+ sizeof(struct vmbus_packet_mpb_array));
- payload_sz = (hvpg_count * sizeof(u64) +
- sizeof(struct vmbus_packet_mpb_array));
+ if (hvpg_count > MAX_PAGE_BUFFER_COUNT) {
payload = kzalloc(payload_sz, GFP_ATOMIC);
if (!payload)
return SCSI_MLQUEUE_DEVICE_BUSY;
diff --git a/drivers/soc/fsl/qe/Kconfig b/drivers/soc/fsl/qe/Kconfig
index 7268c2fbcbc1..e0d096607fef 100644
--- a/drivers/soc/fsl/qe/Kconfig
+++ b/drivers/soc/fsl/qe/Kconfig
@@ -36,7 +36,7 @@ config UCC
config CPM_TSA
tristate "CPM TSA support"
depends on OF && HAS_IOMEM
- depends on CPM1 || COMPILE_TEST
+ depends on CPM1 || (CPM && COMPILE_TEST)
help
Freescale CPM Time Slot Assigner (TSA)
controller.
@@ -47,7 +47,7 @@ config CPM_TSA
config CPM_QMC
tristate "CPM QMC support"
depends on OF && HAS_IOMEM
- depends on CPM1 || (FSL_SOC && COMPILE_TEST)
+ depends on CPM1 || (FSL_SOC && CPM && COMPILE_TEST)
depends on CPM_TSA
help
Freescale CPM QUICC Multichannel Controller
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index 0f43a88b4894..89b775512bef 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -32,4 +32,5 @@ obj-$(CONFIG_QCOM_RPMHPD) += rpmhpd.o
obj-$(CONFIG_QCOM_RPMPD) += rpmpd.o
obj-$(CONFIG_QCOM_KRYO_L2_ACCESSORS) += kryo-l2-accessors.o
obj-$(CONFIG_QCOM_ICC_BWMON) += icc-bwmon.o
-obj-$(CONFIG_QCOM_INLINE_CRYPTO_ENGINE) += ice.o
+qcom_ice-objs += ice.o
+obj-$(CONFIG_QCOM_INLINE_CRYPTO_ENGINE) += qcom_ice.o
diff --git a/drivers/soc/qcom/icc-bwmon.c b/drivers/soc/qcom/icc-bwmon.c
index fd58c5b69897..f65bfeca7ed6 100644
--- a/drivers/soc/qcom/icc-bwmon.c
+++ b/drivers/soc/qcom/icc-bwmon.c
@@ -773,12 +773,12 @@ static int bwmon_probe(struct platform_device *pdev)
bwmon->max_bw_kbps = UINT_MAX;
opp = dev_pm_opp_find_bw_floor(dev, &bwmon->max_bw_kbps, 0);
if (IS_ERR(opp))
- return dev_err_probe(dev, ret, "failed to find max peak bandwidth\n");
+ return dev_err_probe(dev, PTR_ERR(opp), "failed to find max peak bandwidth\n");
bwmon->min_bw_kbps = 0;
opp = dev_pm_opp_find_bw_ceil(dev, &bwmon->min_bw_kbps, 0);
if (IS_ERR(opp))
- return dev_err_probe(dev, ret, "failed to find min peak bandwidth\n");
+ return dev_err_probe(dev, PTR_ERR(opp), "failed to find min peak bandwidth\n");
bwmon->dev = dev;
diff --git a/drivers/soc/qcom/ramp_controller.c b/drivers/soc/qcom/ramp_controller.c
index dc74d2a19de2..5e3ba0be0903 100644
--- a/drivers/soc/qcom/ramp_controller.c
+++ b/drivers/soc/qcom/ramp_controller.c
@@ -296,7 +296,7 @@ static int qcom_ramp_controller_probe(struct platform_device *pdev)
return -ENOMEM;
qrc->desc = device_get_match_data(&pdev->dev);
- if (!qrc)
+ if (!qrc->desc)
return -EINVAL;
qrc->regmap = devm_regmap_init_mmio(&pdev->dev, base, &qrc_regmap_config);
diff --git a/drivers/soc/qcom/rmtfs_mem.c b/drivers/soc/qcom/rmtfs_mem.c
index ce48a9f3b4c8..f83811f51175 100644
--- a/drivers/soc/qcom/rmtfs_mem.c
+++ b/drivers/soc/qcom/rmtfs_mem.c
@@ -233,6 +233,7 @@ static int qcom_rmtfs_mem_probe(struct platform_device *pdev)
num_vmids = 0;
} else if (num_vmids < 0) {
dev_err(&pdev->dev, "failed to count qcom,vmid elements: %d\n", num_vmids);
+ ret = num_vmids;
goto remove_cdev;
} else if (num_vmids > NUM_MAX_VMIDS) {
dev_warn(&pdev->dev,
diff --git a/drivers/soc/qcom/rpmh-rsc.c b/drivers/soc/qcom/rpmh-rsc.c
index f93544f6d796..0dd4363ebac8 100644
--- a/drivers/soc/qcom/rpmh-rsc.c
+++ b/drivers/soc/qcom/rpmh-rsc.c
@@ -1073,7 +1073,7 @@ static int rpmh_rsc_probe(struct platform_device *pdev)
drv->ver.minor = rsc_id & (MINOR_VER_MASK << MINOR_VER_SHIFT);
drv->ver.minor >>= MINOR_VER_SHIFT;
- if (drv->ver.major == 3 && drv->ver.minor >= 0)
+ if (drv->ver.major == 3)
drv->regs = rpmh_rsc_reg_offset_ver_3_0;
else
drv->regs = rpmh_rsc_reg_offset_ver_2_7;
diff --git a/drivers/soc/qcom/rpmhpd.c b/drivers/soc/qcom/rpmhpd.c
index f20e2a49a669..63c35a32065b 100644
--- a/drivers/soc/qcom/rpmhpd.c
+++ b/drivers/soc/qcom/rpmhpd.c
@@ -342,6 +342,21 @@ static const struct rpmhpd_desc sm8150_desc = {
.num_pds = ARRAY_SIZE(sm8150_rpmhpds),
};
+static struct rpmhpd *sa8155p_rpmhpds[] = {
+ [SA8155P_CX] = &cx_w_mx_parent,
+ [SA8155P_CX_AO] = &cx_ao_w_mx_parent,
+ [SA8155P_EBI] = &ebi,
+ [SA8155P_GFX] = &gfx,
+ [SA8155P_MSS] = &mss,
+ [SA8155P_MX] = &mx,
+ [SA8155P_MX_AO] = &mx_ao,
+};
+
+static const struct rpmhpd_desc sa8155p_desc = {
+ .rpmhpds = sa8155p_rpmhpds,
+ .num_pds = ARRAY_SIZE(sa8155p_rpmhpds),
+};
+
/* SM8250 RPMH powerdomains */
static struct rpmhpd *sm8250_rpmhpds[] = {
[SM8250_CX] = &cx_w_mx_parent,
@@ -519,6 +534,7 @@ static const struct rpmhpd_desc sc8280xp_desc = {
static const struct of_device_id rpmhpd_match_table[] = {
{ .compatible = "qcom,qdu1000-rpmhpd", .data = &qdu1000_desc },
+ { .compatible = "qcom,sa8155p-rpmhpd", .data = &sa8155p_desc },
{ .compatible = "qcom,sa8540p-rpmhpd", .data = &sa8540p_desc },
{ .compatible = "qcom,sa8775p-rpmhpd", .data = &sa8775p_desc },
{ .compatible = "qcom,sc7180-rpmhpd", .data = &sc7180_desc },
diff --git a/drivers/soundwire/dmi-quirks.c b/drivers/soundwire/dmi-quirks.c
index 58ea013fa918..2a1096dab63d 100644
--- a/drivers/soundwire/dmi-quirks.c
+++ b/drivers/soundwire/dmi-quirks.c
@@ -100,6 +100,13 @@ static const struct dmi_system_id adr_remap_quirk_table[] = {
.driver_data = (void *)intel_tgl_bios,
},
{
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+ DMI_MATCH(DMI_BOARD_NAME, "8709"),
+ },
+ .driver_data = (void *)intel_tgl_bios,
+ },
+ {
/* quirk used for NUC15 'Bishop County' LAPBC510 and LAPBC710 skews */
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Intel(R) Client Systems"),
diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c
index c296e0bf897b..280455f047a3 100644
--- a/drivers/soundwire/qcom.c
+++ b/drivers/soundwire/qcom.c
@@ -1099,8 +1099,10 @@ static int qcom_swrm_startup(struct snd_pcm_substream *substream,
}
sruntime = sdw_alloc_stream(dai->name);
- if (!sruntime)
- return -ENOMEM;
+ if (!sruntime) {
+ ret = -ENOMEM;
+ goto err_alloc;
+ }
ctrl->sruntime[dai->id] = sruntime;
@@ -1110,12 +1112,19 @@ static int qcom_swrm_startup(struct snd_pcm_substream *substream,
if (ret < 0 && ret != -ENOTSUPP) {
dev_err(dai->dev, "Failed to set sdw stream on %s\n",
codec_dai->name);
- sdw_release_stream(sruntime);
- return ret;
+ goto err_set_stream;
}
}
return 0;
+
+err_set_stream:
+ sdw_release_stream(sruntime);
+err_alloc:
+ pm_runtime_mark_last_busy(ctrl->dev);
+ pm_runtime_put_autosuspend(ctrl->dev);
+
+ return ret;
}
static void qcom_swrm_shutdown(struct snd_pcm_substream *substream,
diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c
index c2191c07442b..379228f22186 100644
--- a/drivers/soundwire/stream.c
+++ b/drivers/soundwire/stream.c
@@ -2021,8 +2021,10 @@ int sdw_stream_add_slave(struct sdw_slave *slave,
skip_alloc_master_rt:
s_rt = sdw_slave_rt_find(slave, stream);
- if (s_rt)
+ if (s_rt) {
+ alloc_slave_rt = false;
goto skip_alloc_slave_rt;
+ }
s_rt = sdw_slave_rt_alloc(slave, m_rt);
if (!s_rt) {
diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c
index 6ddb2dfc0f00..32449bef4415 100644
--- a/drivers/spi/spi-cadence-quadspi.c
+++ b/drivers/spi/spi-cadence-quadspi.c
@@ -1756,8 +1756,11 @@ static int cqspi_probe(struct platform_device *pdev)
cqspi->slow_sram = true;
if (of_device_is_compatible(pdev->dev.of_node,
- "xlnx,versal-ospi-1.0"))
- dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
+ "xlnx,versal-ospi-1.0")) {
+ ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
+ if (ret)
+ goto probe_reset_failed;
+ }
}
ret = devm_request_irq(dev, irq, cqspi_irq_handler, 0,
diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c
index ac85d5562212..26e663369319 100644
--- a/drivers/spi/spi-cadence.c
+++ b/drivers/spi/spi-cadence.c
@@ -12,6 +12,7 @@
#include <linux/gpio/consumer.h>
#include <linux/interrupt.h>
#include <linux/io.h>
+#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of_irq.h>
#include <linux/of_address.h>
@@ -301,49 +302,43 @@ static int cdns_spi_setup_transfer(struct spi_device *spi,
}
/**
- * cdns_spi_fill_tx_fifo - Fills the TX FIFO with as many bytes as possible
+ * cdns_spi_process_fifo - Fills the TX FIFO, and drain the RX FIFO
* @xspi: Pointer to the cdns_spi structure
+ * @ntx: Number of bytes to pack into the TX FIFO
+ * @nrx: Number of bytes to drain from the RX FIFO
*/
-static void cdns_spi_fill_tx_fifo(struct cdns_spi *xspi)
+static void cdns_spi_process_fifo(struct cdns_spi *xspi, int ntx, int nrx)
{
- unsigned long trans_cnt = 0;
+ ntx = clamp(ntx, 0, xspi->tx_bytes);
+ nrx = clamp(nrx, 0, xspi->rx_bytes);
- while ((trans_cnt < xspi->tx_fifo_depth) &&
- (xspi->tx_bytes > 0)) {
+ xspi->tx_bytes -= ntx;
+ xspi->rx_bytes -= nrx;
+ while (ntx || nrx) {
/* When xspi in busy condition, bytes may send failed,
* then spi control did't work thoroughly, add one byte delay
*/
- if (cdns_spi_read(xspi, CDNS_SPI_ISR) &
- CDNS_SPI_IXR_TXFULL)
+ if (cdns_spi_read(xspi, CDNS_SPI_ISR) & CDNS_SPI_IXR_TXFULL)
udelay(10);
- if (xspi->txbuf)
- cdns_spi_write(xspi, CDNS_SPI_TXD, *xspi->txbuf++);
- else
- cdns_spi_write(xspi, CDNS_SPI_TXD, 0);
+ if (ntx) {
+ if (xspi->txbuf)
+ cdns_spi_write(xspi, CDNS_SPI_TXD, *xspi->txbuf++);
+ else
+ cdns_spi_write(xspi, CDNS_SPI_TXD, 0);
- xspi->tx_bytes--;
- trans_cnt++;
- }
-}
+ ntx--;
+ }
-/**
- * cdns_spi_read_rx_fifo - Reads the RX FIFO with as many bytes as possible
- * @xspi: Pointer to the cdns_spi structure
- * @count: Read byte count
- */
-static void cdns_spi_read_rx_fifo(struct cdns_spi *xspi, unsigned long count)
-{
- u8 data;
-
- /* Read out the data from the RX FIFO */
- while (count > 0) {
- data = cdns_spi_read(xspi, CDNS_SPI_RXD);
- if (xspi->rxbuf)
- *xspi->rxbuf++ = data;
- xspi->rx_bytes--;
- count--;
+ if (nrx) {
+ u8 data = cdns_spi_read(xspi, CDNS_SPI_RXD);
+
+ if (xspi->rxbuf)
+ *xspi->rxbuf++ = data;
+
+ nrx--;
+ }
}
}
@@ -381,33 +376,22 @@ static irqreturn_t cdns_spi_irq(int irq, void *dev_id)
spi_finalize_current_transfer(ctlr);
status = IRQ_HANDLED;
} else if (intr_status & CDNS_SPI_IXR_TXOW) {
- int trans_cnt = cdns_spi_read(xspi, CDNS_SPI_THLD);
+ int threshold = cdns_spi_read(xspi, CDNS_SPI_THLD);
+ int trans_cnt = xspi->rx_bytes - xspi->tx_bytes;
+
+ if (threshold > 1)
+ trans_cnt -= threshold;
+
/* Set threshold to one if number of pending are
* less than half fifo
*/
if (xspi->tx_bytes < xspi->tx_fifo_depth >> 1)
cdns_spi_write(xspi, CDNS_SPI_THLD, 1);
- while (trans_cnt) {
- cdns_spi_read_rx_fifo(xspi, 1);
-
- if (xspi->tx_bytes) {
- if (xspi->txbuf)
- cdns_spi_write(xspi, CDNS_SPI_TXD,
- *xspi->txbuf++);
- else
- cdns_spi_write(xspi, CDNS_SPI_TXD, 0);
- xspi->tx_bytes--;
- }
- trans_cnt--;
- }
- if (!xspi->tx_bytes) {
- /* Fixed delay due to controller limitation with
- * RX_NEMPTY incorrect status
- * Xilinx AR:65885 contains more details
- */
- udelay(10);
- cdns_spi_read_rx_fifo(xspi, xspi->rx_bytes);
+ if (xspi->tx_bytes) {
+ cdns_spi_process_fifo(xspi, trans_cnt, trans_cnt);
+ } else {
+ cdns_spi_process_fifo(xspi, 0, trans_cnt);
cdns_spi_write(xspi, CDNS_SPI_IDR,
CDNS_SPI_IXR_DEFAULT);
spi_finalize_current_transfer(ctlr);
@@ -450,16 +434,17 @@ static int cdns_transfer_one(struct spi_controller *ctlr,
xspi->tx_bytes = transfer->len;
xspi->rx_bytes = transfer->len;
- if (!spi_controller_is_slave(ctlr))
+ if (!spi_controller_is_slave(ctlr)) {
cdns_spi_setup_transfer(spi, transfer);
+ } else {
+ /* Set TX empty threshold to half of FIFO depth
+ * only if TX bytes are more than half FIFO depth.
+ */
+ if (xspi->tx_bytes > xspi->tx_fifo_depth)
+ cdns_spi_write(xspi, CDNS_SPI_THLD, xspi->tx_fifo_depth >> 1);
+ }
- /* Set TX empty threshold to half of FIFO depth
- * only if TX bytes are more than half FIFO depth.
- */
- if (xspi->tx_bytes > (xspi->tx_fifo_depth >> 1))
- cdns_spi_write(xspi, CDNS_SPI_THLD, xspi->tx_fifo_depth >> 1);
-
- cdns_spi_fill_tx_fifo(xspi);
+ cdns_spi_process_fifo(xspi, xspi->tx_fifo_depth, 0);
spi_transfer_delay_exec(transfer);
cdns_spi_write(xspi, CDNS_SPI_IER, CDNS_SPI_IXR_DEFAULT);
diff --git a/drivers/spi/spi-dw-mmio.c b/drivers/spi/spi-dw-mmio.c
index 5e6faa98aa85..15f5e9cb54ad 100644
--- a/drivers/spi/spi-dw-mmio.c
+++ b/drivers/spi/spi-dw-mmio.c
@@ -264,17 +264,17 @@ static void dw_spi_elba_set_cs(struct spi_device *spi, bool enable)
struct regmap *syscon = dwsmmio->priv;
u8 cs;
- cs = spi->chip_select;
+ cs = spi_get_chipselect(spi, 0);
if (cs < 2)
- dw_spi_elba_override_cs(syscon, spi->chip_select, enable);
+ dw_spi_elba_override_cs(syscon, spi_get_chipselect(spi, 0), enable);
/*
* The DW SPI controller needs a native CS bit selected to start
* the serial engine.
*/
- spi->chip_select = 0;
+ spi_set_chipselect(spi, 0, 0);
dw_spi_set_cs(spi, enable);
- spi->chip_select = cs;
+ spi_set_chipselect(spi, 0, cs);
}
static int dw_spi_elba_init(struct platform_device *pdev,
diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index 4339485d202c..674cfe05f411 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -1002,7 +1002,9 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr,
static int dspi_setup(struct spi_device *spi)
{
struct fsl_dspi *dspi = spi_controller_get_devdata(spi->controller);
+ u32 period_ns = DIV_ROUND_UP(NSEC_PER_SEC, spi->max_speed_hz);
unsigned char br = 0, pbr = 0, pcssck = 0, cssck = 0;
+ u32 quarter_period_ns = DIV_ROUND_UP(period_ns, 4);
u32 cs_sck_delay = 0, sck_cs_delay = 0;
struct fsl_dspi_platform_data *pdata;
unsigned char pasc = 0, asc = 0;
@@ -1031,6 +1033,19 @@ static int dspi_setup(struct spi_device *spi)
sck_cs_delay = pdata->sck_cs_delay;
}
+ /* Since tCSC and tASC apply to continuous transfers too, avoid SCK
+ * glitches of half a cycle by never allowing tCSC + tASC to go below
+ * half a SCK period.
+ */
+ if (cs_sck_delay < quarter_period_ns)
+ cs_sck_delay = quarter_period_ns;
+ if (sck_cs_delay < quarter_period_ns)
+ sck_cs_delay = quarter_period_ns;
+
+ dev_dbg(&spi->dev,
+ "DSPI controller timing params: CS-to-SCK delay %u ns, SCK-to-CS delay %u ns\n",
+ cs_sck_delay, sck_cs_delay);
+
clkrate = clk_get_rate(dspi->clk);
hz_to_spi_baud(&pbr, &br, spi->max_speed_hz, clkrate);
diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c
index f2341ab99556..4b70038ceb6b 100644
--- a/drivers/spi/spi-fsl-lpspi.c
+++ b/drivers/spi/spi-fsl-lpspi.c
@@ -910,9 +910,14 @@ static int fsl_lpspi_probe(struct platform_device *pdev)
ret = fsl_lpspi_dma_init(&pdev->dev, fsl_lpspi, controller);
if (ret == -EPROBE_DEFER)
goto out_pm_get;
-
if (ret < 0)
dev_err(&pdev->dev, "dma setup error %d, use pio\n", ret);
+ else
+ /*
+ * disable LPSPI module IRQ when enable DMA mode successfully,
+ * to prevent the unexpected LPSPI module IRQ events.
+ */
+ disable_irq(irq);
ret = devm_spi_register_controller(&pdev->dev, controller);
if (ret < 0) {
diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
index ba7be505ec4e..b293428760bc 100644
--- a/drivers/spi/spi-geni-qcom.c
+++ b/drivers/spi/spi-geni-qcom.c
@@ -294,6 +294,8 @@ static void spi_geni_set_cs(struct spi_device *slv, bool set_flag)
mas->cs_flag = set_flag;
/* set xfer_mode to FIFO to complete cs_done in isr */
mas->cur_xfer_mode = GENI_SE_FIFO;
+ geni_se_select_mode(se, mas->cur_xfer_mode);
+
reinit_completion(&mas->cs_done);
if (set_flag)
geni_se_setup_m_cmd(se, SPI_CS_ASSERT, 0);
@@ -644,6 +646,8 @@ static int spi_geni_init(struct spi_geni_master *mas)
geni_se_select_mode(se, GENI_GPI_DMA);
dev_dbg(mas->dev, "Using GPI DMA mode for SPI\n");
break;
+ } else if (ret == -EPROBE_DEFER) {
+ goto out_pm;
}
/*
* in case of failure to get gpi dma channel, we can still do the
diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c
index 21c321f43766..d7432e2219d8 100644
--- a/drivers/spi/spi-mt65xx.c
+++ b/drivers/spi/spi-mt65xx.c
@@ -1275,6 +1275,9 @@ static int mtk_spi_remove(struct platform_device *pdev)
struct mtk_spi *mdata = spi_master_get_devdata(master);
int ret;
+ if (mdata->use_spimem && !completion_done(&mdata->spimem_done))
+ complete(&mdata->spimem_done);
+
ret = pm_runtime_resume_and_get(&pdev->dev);
if (ret < 0)
return ret;
diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c
index 944ef6b42bce..00e5e88e72c4 100644
--- a/drivers/spi/spi-qup.c
+++ b/drivers/spi/spi-qup.c
@@ -1028,23 +1028,8 @@ static int spi_qup_probe(struct platform_device *pdev)
return -ENXIO;
}
- ret = clk_prepare_enable(cclk);
- if (ret) {
- dev_err(dev, "cannot enable core clock\n");
- return ret;
- }
-
- ret = clk_prepare_enable(iclk);
- if (ret) {
- clk_disable_unprepare(cclk);
- dev_err(dev, "cannot enable iface clock\n");
- return ret;
- }
-
master = spi_alloc_master(dev, sizeof(struct spi_qup));
if (!master) {
- clk_disable_unprepare(cclk);
- clk_disable_unprepare(iclk);
dev_err(dev, "cannot allocate master\n");
return -ENOMEM;
}
@@ -1092,6 +1077,19 @@ static int spi_qup_probe(struct platform_device *pdev)
spin_lock_init(&controller->lock);
init_completion(&controller->done);
+ ret = clk_prepare_enable(cclk);
+ if (ret) {
+ dev_err(dev, "cannot enable core clock\n");
+ goto error_dma;
+ }
+
+ ret = clk_prepare_enable(iclk);
+ if (ret) {
+ clk_disable_unprepare(cclk);
+ dev_err(dev, "cannot enable iface clock\n");
+ goto error_dma;
+ }
+
iomode = readl_relaxed(base + QUP_IO_M_MODES);
size = QUP_IO_M_OUTPUT_BLOCK_SIZE(iomode);
@@ -1121,7 +1119,7 @@ static int spi_qup_probe(struct platform_device *pdev)
ret = spi_qup_set_state(controller, QUP_STATE_RESET);
if (ret) {
dev_err(dev, "cannot set RESET state\n");
- goto error_dma;
+ goto error_clk;
}
writel_relaxed(0, base + QUP_OPERATIONAL);
@@ -1145,7 +1143,7 @@ static int spi_qup_probe(struct platform_device *pdev)
ret = devm_request_irq(dev, irq, spi_qup_qup_irq,
IRQF_TRIGGER_HIGH, pdev->name, controller);
if (ret)
- goto error_dma;
+ goto error_clk;
pm_runtime_set_autosuspend_delay(dev, MSEC_PER_SEC);
pm_runtime_use_autosuspend(dev);
@@ -1160,11 +1158,12 @@ static int spi_qup_probe(struct platform_device *pdev)
disable_pm:
pm_runtime_disable(&pdev->dev);
+error_clk:
+ clk_disable_unprepare(cclk);
+ clk_disable_unprepare(iclk);
error_dma:
spi_qup_release_dma(master);
error:
- clk_disable_unprepare(cclk);
- clk_disable_unprepare(iclk);
spi_master_put(master);
return ret;
}
diff --git a/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c b/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c
index 63de214916f5..c079368019e8 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c
@@ -373,7 +373,7 @@ static int ov2680_get_fmt(struct v4l2_subdev *sd,
static int ov2680_detect(struct i2c_client *client)
{
struct i2c_adapter *adapter = client->adapter;
- u32 high, low;
+ u32 high = 0, low = 0;
int ret;
u16 id;
u8 revision;
@@ -383,7 +383,7 @@ static int ov2680_detect(struct i2c_client *client)
ret = ov_read_reg8(client, OV2680_SC_CMMN_CHIP_ID_H, &high);
if (ret) {
- dev_err(&client->dev, "sensor_id_high = 0x%x\n", high);
+ dev_err(&client->dev, "sensor_id_high read failed (%d)\n", ret);
return -ENODEV;
}
ret = ov_read_reg8(client, OV2680_SC_CMMN_CHIP_ID_L, &low);
diff --git a/drivers/staging/media/imx/imx8mq-mipi-csi2.c b/drivers/staging/media/imx/imx8mq-mipi-csi2.c
index 32700cb8bc4d..ca2efcc21efe 100644
--- a/drivers/staging/media/imx/imx8mq-mipi-csi2.c
+++ b/drivers/staging/media/imx/imx8mq-mipi-csi2.c
@@ -354,7 +354,7 @@ static int imx8mq_mipi_csi_start_stream(struct csi_state *state,
struct v4l2_subdev_state *sd_state)
{
int ret;
- u32 hs_settle;
+ u32 hs_settle = 0;
ret = imx8mq_mipi_csi_sw_reset(state);
if (ret)
diff --git a/drivers/staging/octeon/TODO b/drivers/staging/octeon/TODO
index 67a0a1f6b922..044e48e3d65f 100644
--- a/drivers/staging/octeon/TODO
+++ b/drivers/staging/octeon/TODO
@@ -6,4 +6,3 @@ TODO:
- make driver self-contained instead of being split between staging and
arch/mips/cavium-octeon.
-Contact: Aaro Koskinen <aaro.koskinen@iki.fi>
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 834cce50f9b0..b516c2893420 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -364,8 +364,6 @@ struct iscsi_np *iscsit_add_np(
init_completion(&np->np_restart_comp);
INIT_LIST_HEAD(&np->np_list);
- timer_setup(&np->np_login_timer, iscsi_handle_login_thread_timeout, 0);
-
ret = iscsi_target_setup_login_socket(np, sockaddr);
if (ret != 0) {
kfree(np);
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 274bdd7845ca..90b870f234f0 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -811,59 +811,6 @@ void iscsi_post_login_handler(
iscsit_dec_conn_usage_count(conn);
}
-void iscsi_handle_login_thread_timeout(struct timer_list *t)
-{
- struct iscsi_np *np = from_timer(np, t, np_login_timer);
-
- spin_lock_bh(&np->np_thread_lock);
- pr_err("iSCSI Login timeout on Network Portal %pISpc\n",
- &np->np_sockaddr);
-
- if (np->np_login_timer_flags & ISCSI_TF_STOP) {
- spin_unlock_bh(&np->np_thread_lock);
- return;
- }
-
- if (np->np_thread)
- send_sig(SIGINT, np->np_thread, 1);
-
- np->np_login_timer_flags &= ~ISCSI_TF_RUNNING;
- spin_unlock_bh(&np->np_thread_lock);
-}
-
-static void iscsi_start_login_thread_timer(struct iscsi_np *np)
-{
- /*
- * This used the TA_LOGIN_TIMEOUT constant because at this
- * point we do not have access to ISCSI_TPG_ATTRIB(tpg)->login_timeout
- */
- spin_lock_bh(&np->np_thread_lock);
- np->np_login_timer_flags &= ~ISCSI_TF_STOP;
- np->np_login_timer_flags |= ISCSI_TF_RUNNING;
- mod_timer(&np->np_login_timer, jiffies + TA_LOGIN_TIMEOUT * HZ);
-
- pr_debug("Added timeout timer to iSCSI login request for"
- " %u seconds.\n", TA_LOGIN_TIMEOUT);
- spin_unlock_bh(&np->np_thread_lock);
-}
-
-static void iscsi_stop_login_thread_timer(struct iscsi_np *np)
-{
- spin_lock_bh(&np->np_thread_lock);
- if (!(np->np_login_timer_flags & ISCSI_TF_RUNNING)) {
- spin_unlock_bh(&np->np_thread_lock);
- return;
- }
- np->np_login_timer_flags |= ISCSI_TF_STOP;
- spin_unlock_bh(&np->np_thread_lock);
-
- del_timer_sync(&np->np_login_timer);
-
- spin_lock_bh(&np->np_thread_lock);
- np->np_login_timer_flags &= ~ISCSI_TF_RUNNING;
- spin_unlock_bh(&np->np_thread_lock);
-}
-
int iscsit_setup_np(
struct iscsi_np *np,
struct sockaddr_storage *sockaddr)
@@ -1123,10 +1070,13 @@ static struct iscsit_conn *iscsit_alloc_conn(struct iscsi_np *np)
spin_lock_init(&conn->nopin_timer_lock);
spin_lock_init(&conn->response_queue_lock);
spin_lock_init(&conn->state_lock);
+ spin_lock_init(&conn->login_worker_lock);
+ spin_lock_init(&conn->login_timer_lock);
timer_setup(&conn->nopin_response_timer,
iscsit_handle_nopin_response_timeout, 0);
timer_setup(&conn->nopin_timer, iscsit_handle_nopin_timeout, 0);
+ timer_setup(&conn->login_timer, iscsit_login_timeout, 0);
if (iscsit_conn_set_transport(conn, np->np_transport) < 0)
goto free_conn;
@@ -1304,7 +1254,7 @@ static int __iscsi_target_login_thread(struct iscsi_np *np)
goto new_sess_out;
}
- iscsi_start_login_thread_timer(np);
+ iscsit_start_login_timer(conn, current);
pr_debug("Moving to TARG_CONN_STATE_XPT_UP.\n");
conn->conn_state = TARG_CONN_STATE_XPT_UP;
@@ -1417,8 +1367,6 @@ static int __iscsi_target_login_thread(struct iscsi_np *np)
if (ret < 0)
goto new_sess_out;
- iscsi_stop_login_thread_timer(np);
-
if (ret == 1) {
tpg_np = conn->tpg_np;
@@ -1434,7 +1382,7 @@ static int __iscsi_target_login_thread(struct iscsi_np *np)
new_sess_out:
new_sess = true;
old_sess_out:
- iscsi_stop_login_thread_timer(np);
+ iscsit_stop_login_timer(conn);
tpg_np = conn->tpg_np;
iscsi_target_login_sess_out(conn, zero_tsih, new_sess);
new_sess = false;
@@ -1448,7 +1396,6 @@ old_sess_out:
return 1;
exit:
- iscsi_stop_login_thread_timer(np);
spin_lock_bh(&np->np_thread_lock);
np->np_thread_state = ISCSI_NP_THREAD_EXIT;
spin_unlock_bh(&np->np_thread_lock);
diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
index 24040c118e49..fa3fb5f4e6bc 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c
@@ -535,25 +535,6 @@ static void iscsi_target_login_drop(struct iscsit_conn *conn, struct iscsi_login
iscsi_target_login_sess_out(conn, zero_tsih, true);
}
-struct conn_timeout {
- struct timer_list timer;
- struct iscsit_conn *conn;
-};
-
-static void iscsi_target_login_timeout(struct timer_list *t)
-{
- struct conn_timeout *timeout = from_timer(timeout, t, timer);
- struct iscsit_conn *conn = timeout->conn;
-
- pr_debug("Entering iscsi_target_login_timeout >>>>>>>>>>>>>>>>>>>\n");
-
- if (conn->login_kworker) {
- pr_debug("Sending SIGINT to conn->login_kworker %s/%d\n",
- conn->login_kworker->comm, conn->login_kworker->pid);
- send_sig(SIGINT, conn->login_kworker, 1);
- }
-}
-
static void iscsi_target_do_login_rx(struct work_struct *work)
{
struct iscsit_conn *conn = container_of(work,
@@ -562,12 +543,15 @@ static void iscsi_target_do_login_rx(struct work_struct *work)
struct iscsi_np *np = login->np;
struct iscsi_portal_group *tpg = conn->tpg;
struct iscsi_tpg_np *tpg_np = conn->tpg_np;
- struct conn_timeout timeout;
int rc, zero_tsih = login->zero_tsih;
bool state;
pr_debug("entering iscsi_target_do_login_rx, conn: %p, %s:%d\n",
conn, current->comm, current->pid);
+
+ spin_lock(&conn->login_worker_lock);
+ set_bit(LOGIN_FLAGS_WORKER_RUNNING, &conn->login_flags);
+ spin_unlock(&conn->login_worker_lock);
/*
* If iscsi_target_do_login_rx() has been invoked by ->sk_data_ready()
* before initial PDU processing in iscsi_target_start_negotiation()
@@ -597,19 +581,16 @@ static void iscsi_target_do_login_rx(struct work_struct *work)
goto err;
}
- conn->login_kworker = current;
allow_signal(SIGINT);
-
- timeout.conn = conn;
- timer_setup_on_stack(&timeout.timer, iscsi_target_login_timeout, 0);
- mod_timer(&timeout.timer, jiffies + TA_LOGIN_TIMEOUT * HZ);
- pr_debug("Starting login timer for %s/%d\n", current->comm, current->pid);
+ rc = iscsit_set_login_timer_kworker(conn, current);
+ if (rc < 0) {
+ /* The login timer has already expired */
+ pr_debug("iscsi_target_do_login_rx, login failed\n");
+ goto err;
+ }
rc = conn->conn_transport->iscsit_get_login_rx(conn, login);
- del_timer_sync(&timeout.timer);
- destroy_timer_on_stack(&timeout.timer);
flush_signals(current);
- conn->login_kworker = NULL;
if (rc < 0)
goto err;
@@ -646,7 +627,17 @@ static void iscsi_target_do_login_rx(struct work_struct *work)
if (iscsi_target_sk_check_and_clear(conn,
LOGIN_FLAGS_WRITE_ACTIVE))
goto err;
+
+ /*
+ * Set the login timer thread pointer to NULL to prevent the
+ * login process from getting stuck if the initiator
+ * stops sending data.
+ */
+ rc = iscsit_set_login_timer_kworker(conn, NULL);
+ if (rc < 0)
+ goto err;
} else if (rc == 1) {
+ iscsit_stop_login_timer(conn);
cancel_delayed_work(&conn->login_work);
iscsi_target_nego_release(conn);
iscsi_post_login_handler(np, conn, zero_tsih);
@@ -656,6 +647,7 @@ static void iscsi_target_do_login_rx(struct work_struct *work)
err:
iscsi_target_restore_sock_callbacks(conn);
+ iscsit_stop_login_timer(conn);
cancel_delayed_work(&conn->login_work);
iscsi_target_login_drop(conn, login);
iscsit_deaccess_np(np, tpg, tpg_np);
@@ -1130,6 +1122,7 @@ int iscsi_target_locate_portal(
iscsi_target_set_sock_callbacks(conn);
login->np = np;
+ conn->tpg = NULL;
login_req = (struct iscsi_login_req *) login->req;
payload_length = ntoh24(login_req->dlength);
@@ -1197,7 +1190,6 @@ int iscsi_target_locate_portal(
*/
sessiontype = strncmp(s_buf, DISCOVERY, 9);
if (!sessiontype) {
- conn->tpg = iscsit_global->discovery_tpg;
if (!login->leading_connection)
goto get_target;
@@ -1214,9 +1206,11 @@ int iscsi_target_locate_portal(
* Serialize access across the discovery struct iscsi_portal_group to
* process login attempt.
*/
+ conn->tpg = iscsit_global->discovery_tpg;
if (iscsit_access_np(np, conn->tpg) < 0) {
iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE);
+ conn->tpg = NULL;
ret = -1;
goto out;
}
@@ -1368,14 +1362,30 @@ int iscsi_target_start_negotiation(
* and perform connection cleanup now.
*/
ret = iscsi_target_do_login(conn, login);
- if (!ret && iscsi_target_sk_check_and_clear(conn, LOGIN_FLAGS_INITIAL_PDU))
- ret = -1;
+ if (!ret) {
+ spin_lock(&conn->login_worker_lock);
+
+ if (iscsi_target_sk_check_and_clear(conn, LOGIN_FLAGS_INITIAL_PDU))
+ ret = -1;
+ else if (!test_bit(LOGIN_FLAGS_WORKER_RUNNING, &conn->login_flags)) {
+ if (iscsit_set_login_timer_kworker(conn, NULL) < 0) {
+ /*
+ * The timeout has expired already.
+ * Schedule login_work to perform the cleanup.
+ */
+ schedule_delayed_work(&conn->login_work, 0);
+ }
+ }
+
+ spin_unlock(&conn->login_worker_lock);
+ }
if (ret < 0) {
iscsi_target_restore_sock_callbacks(conn);
iscsi_remove_failed_auth_entry(conn);
}
if (ret != 0) {
+ iscsit_stop_login_timer(conn);
cancel_delayed_work_sync(&conn->login_work);
iscsi_target_nego_release(conn);
}
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index 26dc8ed3045b..b14835fcb033 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -1040,6 +1040,57 @@ void iscsit_stop_nopin_timer(struct iscsit_conn *conn)
spin_unlock_bh(&conn->nopin_timer_lock);
}
+void iscsit_login_timeout(struct timer_list *t)
+{
+ struct iscsit_conn *conn = from_timer(conn, t, login_timer);
+ struct iscsi_login *login = conn->login;
+
+ pr_debug("Entering iscsi_target_login_timeout >>>>>>>>>>>>>>>>>>>\n");
+
+ spin_lock_bh(&conn->login_timer_lock);
+ login->login_failed = 1;
+
+ if (conn->login_kworker) {
+ pr_debug("Sending SIGINT to conn->login_kworker %s/%d\n",
+ conn->login_kworker->comm, conn->login_kworker->pid);
+ send_sig(SIGINT, conn->login_kworker, 1);
+ } else {
+ schedule_delayed_work(&conn->login_work, 0);
+ }
+ spin_unlock_bh(&conn->login_timer_lock);
+}
+
+void iscsit_start_login_timer(struct iscsit_conn *conn, struct task_struct *kthr)
+{
+ pr_debug("Login timer started\n");
+
+ conn->login_kworker = kthr;
+ mod_timer(&conn->login_timer, jiffies + TA_LOGIN_TIMEOUT * HZ);
+}
+
+int iscsit_set_login_timer_kworker(struct iscsit_conn *conn, struct task_struct *kthr)
+{
+ struct iscsi_login *login = conn->login;
+ int ret = 0;
+
+ spin_lock_bh(&conn->login_timer_lock);
+ if (login->login_failed) {
+ /* The timer has already expired */
+ ret = -1;
+ } else {
+ conn->login_kworker = kthr;
+ }
+ spin_unlock_bh(&conn->login_timer_lock);
+
+ return ret;
+}
+
+void iscsit_stop_login_timer(struct iscsit_conn *conn)
+{
+ pr_debug("Login timer stopped\n");
+ timer_delete_sync(&conn->login_timer);
+}
+
int iscsit_send_tx_data(
struct iscsit_cmd *cmd,
struct iscsit_conn *conn,
diff --git a/drivers/target/iscsi/iscsi_target_util.h b/drivers/target/iscsi/iscsi_target_util.h
index 33ea799a0850..24b8e577575a 100644
--- a/drivers/target/iscsi/iscsi_target_util.h
+++ b/drivers/target/iscsi/iscsi_target_util.h
@@ -56,6 +56,10 @@ extern void iscsit_handle_nopin_timeout(struct timer_list *t);
extern void __iscsit_start_nopin_timer(struct iscsit_conn *);
extern void iscsit_start_nopin_timer(struct iscsit_conn *);
extern void iscsit_stop_nopin_timer(struct iscsit_conn *);
+extern void iscsit_login_timeout(struct timer_list *t);
+extern void iscsit_start_login_timer(struct iscsit_conn *, struct task_struct *kthr);
+extern void iscsit_stop_login_timer(struct iscsit_conn *);
+extern int iscsit_set_login_timer_kworker(struct iscsit_conn *, struct task_struct *kthr);
extern int iscsit_send_tx_data(struct iscsit_cmd *, struct iscsit_conn *, int);
extern int iscsit_fe_sendpage_sg(struct iscsit_cmd *, struct iscsit_conn *);
extern int iscsit_tx_login_rsp(struct iscsit_conn *, u8, u8);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 86adff2a86ed..687adc9e086c 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -504,6 +504,8 @@ target_setup_session(struct se_portal_group *tpg,
free_sess:
transport_free_session(sess);
+ return ERR_PTR(rc);
+
free_cnt:
target_free_cmd_counter(cmd_cnt);
return ERR_PTR(rc);
diff --git a/drivers/tee/amdtee/amdtee_if.h b/drivers/tee/amdtee/amdtee_if.h
index ff48c3e47375..e2014e21530a 100644
--- a/drivers/tee/amdtee/amdtee_if.h
+++ b/drivers/tee/amdtee/amdtee_if.h
@@ -118,16 +118,18 @@ struct tee_cmd_unmap_shared_mem {
/**
* struct tee_cmd_load_ta - load Trusted Application (TA) binary into TEE
- * @low_addr: [in] bits [31:0] of the physical address of the TA binary
- * @hi_addr: [in] bits [63:32] of the physical address of the TA binary
- * @size: [in] size of TA binary in bytes
- * @ta_handle: [out] return handle of the loaded TA
+ * @low_addr: [in] bits [31:0] of the physical address of the TA binary
+ * @hi_addr: [in] bits [63:32] of the physical address of the TA binary
+ * @size: [in] size of TA binary in bytes
+ * @ta_handle: [out] return handle of the loaded TA
+ * @return_origin: [out] origin of return code after TEE processing
*/
struct tee_cmd_load_ta {
u32 low_addr;
u32 hi_addr;
u32 size;
u32 ta_handle;
+ u32 return_origin;
};
/**
diff --git a/drivers/tee/amdtee/call.c b/drivers/tee/amdtee/call.c
index e8cd9aaa3467..e9b63dcb3194 100644
--- a/drivers/tee/amdtee/call.c
+++ b/drivers/tee/amdtee/call.c
@@ -423,19 +423,23 @@ int handle_load_ta(void *data, u32 size, struct tee_ioctl_open_session_arg *arg)
if (ret) {
arg->ret_origin = TEEC_ORIGIN_COMMS;
arg->ret = TEEC_ERROR_COMMUNICATION;
- } else if (arg->ret == TEEC_SUCCESS) {
- ret = get_ta_refcount(load_cmd.ta_handle);
- if (!ret) {
- arg->ret_origin = TEEC_ORIGIN_COMMS;
- arg->ret = TEEC_ERROR_OUT_OF_MEMORY;
-
- /* Unload the TA on error */
- unload_cmd.ta_handle = load_cmd.ta_handle;
- psp_tee_process_cmd(TEE_CMD_ID_UNLOAD_TA,
- (void *)&unload_cmd,
- sizeof(unload_cmd), &ret);
- } else {
- set_session_id(load_cmd.ta_handle, 0, &arg->session);
+ } else {
+ arg->ret_origin = load_cmd.return_origin;
+
+ if (arg->ret == TEEC_SUCCESS) {
+ ret = get_ta_refcount(load_cmd.ta_handle);
+ if (!ret) {
+ arg->ret_origin = TEEC_ORIGIN_COMMS;
+ arg->ret = TEEC_ERROR_OUT_OF_MEMORY;
+
+ /* Unload the TA on error */
+ unload_cmd.ta_handle = load_cmd.ta_handle;
+ psp_tee_process_cmd(TEE_CMD_ID_UNLOAD_TA,
+ (void *)&unload_cmd,
+ sizeof(unload_cmd), &ret);
+ } else {
+ set_session_id(load_cmd.ta_handle, 0, &arg->session);
+ }
}
}
mutex_unlock(&ta_refcount_mutex);
diff --git a/drivers/tee/optee/smc_abi.c b/drivers/tee/optee/smc_abi.c
index 49702cb08f4f..3861ae06d902 100644
--- a/drivers/tee/optee/smc_abi.c
+++ b/drivers/tee/optee/smc_abi.c
@@ -1004,8 +1004,10 @@ static u32 get_async_notif_value(optee_invoke_fn *invoke_fn, bool *value_valid,
invoke_fn(OPTEE_SMC_GET_ASYNC_NOTIF_VALUE, 0, 0, 0, 0, 0, 0, 0, &res);
- if (res.a0)
+ if (res.a0) {
+ *value_valid = false;
return 0;
+ }
*value_valid = (res.a2 & OPTEE_SMC_ASYNC_NOTIF_VALUE_VALID);
*value_pending = (res.a2 & OPTEE_SMC_ASYNC_NOTIF_VALUE_PENDING);
return res.a1;
diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
index 810231b59dcd..5e1164226ada 100644
--- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
+++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
@@ -131,7 +131,7 @@ static ssize_t available_uuids_show(struct device *dev,
for (i = 0; i < INT3400_THERMAL_MAXIMUM_UUID; i++) {
if (priv->uuid_bitmap & (1 << i))
- length += sysfs_emit_at(buf, length, int3400_thermal_uuids[i]);
+ length += sysfs_emit_at(buf, length, "%s\n", int3400_thermal_uuids[i]);
}
return length;
@@ -149,7 +149,7 @@ static ssize_t current_uuid_show(struct device *dev,
for (i = 0; i <= INT3400_THERMAL_CRITICAL; i++) {
if (priv->os_uuid_mask & BIT(i))
- length += sysfs_emit_at(buf, length, int3400_thermal_uuids[i]);
+ length += sysfs_emit_at(buf, length, "%s\n", int3400_thermal_uuids[i]);
}
if (length)
diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.c b/drivers/thermal/intel/intel_soc_dts_iosf.c
index f99dc7e4ae89..db97499f4f0a 100644
--- a/drivers/thermal/intel/intel_soc_dts_iosf.c
+++ b/drivers/thermal/intel/intel_soc_dts_iosf.c
@@ -398,7 +398,7 @@ struct intel_soc_dts_sensors *intel_soc_dts_iosf_init(
spin_lock_init(&sensors->intr_notify_lock);
mutex_init(&sensors->dts_update_lock);
sensors->intr_type = intr_type;
- sensors->tj_max = tj_max;
+ sensors->tj_max = tj_max * 1000;
if (intr_type == INTEL_SOC_DTS_INTERRUPT_NONE)
notification = false;
else
diff --git a/drivers/thunderbolt/dma_test.c b/drivers/thunderbolt/dma_test.c
index 3bedecb236e0..14bb6dec6c4b 100644
--- a/drivers/thunderbolt/dma_test.c
+++ b/drivers/thunderbolt/dma_test.c
@@ -192,9 +192,9 @@ static int dma_test_start_rings(struct dma_test *dt)
}
ret = tb_xdomain_enable_paths(dt->xd, dt->tx_hopid,
- dt->tx_ring ? dt->tx_ring->hop : 0,
+ dt->tx_ring ? dt->tx_ring->hop : -1,
dt->rx_hopid,
- dt->rx_ring ? dt->rx_ring->hop : 0);
+ dt->rx_ring ? dt->rx_ring->hop : -1);
if (ret) {
dma_test_free_rings(dt);
return ret;
@@ -218,9 +218,9 @@ static void dma_test_stop_rings(struct dma_test *dt)
tb_ring_stop(dt->tx_ring);
ret = tb_xdomain_disable_paths(dt->xd, dt->tx_hopid,
- dt->tx_ring ? dt->tx_ring->hop : 0,
+ dt->tx_ring ? dt->tx_ring->hop : -1,
dt->rx_hopid,
- dt->rx_ring ? dt->rx_ring->hop : 0);
+ dt->rx_ring ? dt->rx_ring->hop : -1);
if (ret)
dev_warn(&dt->svc->dev, "failed to disable DMA paths\n");
diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c
index d76e923fbc6a..e58beac44295 100644
--- a/drivers/thunderbolt/nhi.c
+++ b/drivers/thunderbolt/nhi.c
@@ -54,6 +54,26 @@ static int ring_interrupt_index(const struct tb_ring *ring)
return bit;
}
+static void nhi_mask_interrupt(struct tb_nhi *nhi, int mask, int ring)
+{
+ if (nhi->quirks & QUIRK_AUTO_CLEAR_INT) {
+ u32 val;
+
+ val = ioread32(nhi->iobase + REG_RING_INTERRUPT_BASE + ring);
+ iowrite32(val & ~mask, nhi->iobase + REG_RING_INTERRUPT_BASE + ring);
+ } else {
+ iowrite32(mask, nhi->iobase + REG_RING_INTERRUPT_MASK_CLEAR_BASE + ring);
+ }
+}
+
+static void nhi_clear_interrupt(struct tb_nhi *nhi, int ring)
+{
+ if (nhi->quirks & QUIRK_AUTO_CLEAR_INT)
+ ioread32(nhi->iobase + REG_RING_NOTIFY_BASE + ring);
+ else
+ iowrite32(~0, nhi->iobase + REG_RING_INT_CLEAR + ring);
+}
+
/*
* ring_interrupt_active() - activate/deactivate interrupts for a single ring
*
@@ -61,8 +81,8 @@ static int ring_interrupt_index(const struct tb_ring *ring)
*/
static void ring_interrupt_active(struct tb_ring *ring, bool active)
{
- int reg = REG_RING_INTERRUPT_BASE +
- ring_interrupt_index(ring) / 32 * 4;
+ int index = ring_interrupt_index(ring) / 32 * 4;
+ int reg = REG_RING_INTERRUPT_BASE + index;
int interrupt_bit = ring_interrupt_index(ring) & 31;
int mask = 1 << interrupt_bit;
u32 old, new;
@@ -123,7 +143,11 @@ static void ring_interrupt_active(struct tb_ring *ring, bool active)
"interrupt for %s %d is already %s\n",
RING_TYPE(ring), ring->hop,
active ? "enabled" : "disabled");
- iowrite32(new, ring->nhi->iobase + reg);
+
+ if (active)
+ iowrite32(new, ring->nhi->iobase + reg);
+ else
+ nhi_mask_interrupt(ring->nhi, mask, index);
}
/*
@@ -136,11 +160,11 @@ static void nhi_disable_interrupts(struct tb_nhi *nhi)
int i = 0;
/* disable interrupts */
for (i = 0; i < RING_INTERRUPT_REG_COUNT(nhi); i++)
- iowrite32(0, nhi->iobase + REG_RING_INTERRUPT_BASE + 4 * i);
+ nhi_mask_interrupt(nhi, ~0, 4 * i);
/* clear interrupt status bits */
for (i = 0; i < RING_NOTIFY_REG_COUNT(nhi); i++)
- ioread32(nhi->iobase + REG_RING_NOTIFY_BASE + 4 * i);
+ nhi_clear_interrupt(nhi, 4 * i);
}
/* ring helper methods */
diff --git a/drivers/thunderbolt/nhi_regs.h b/drivers/thunderbolt/nhi_regs.h
index faef165a919c..6ba295815477 100644
--- a/drivers/thunderbolt/nhi_regs.h
+++ b/drivers/thunderbolt/nhi_regs.h
@@ -93,6 +93,8 @@ struct ring_desc {
#define REG_RING_INTERRUPT_BASE 0x38200
#define RING_INTERRUPT_REG_COUNT(nhi) ((31 + 2 * nhi->hop_count) / 32)
+#define REG_RING_INTERRUPT_MASK_CLEAR_BASE 0x38208
+
#define REG_INT_THROTTLING_RATE 0x38c00
/* Interrupt Vector Allocation */
diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c
index 7bfbc9ca9ba4..c1af712ca728 100644
--- a/drivers/thunderbolt/tb.c
+++ b/drivers/thunderbolt/tb.c
@@ -737,6 +737,7 @@ static void tb_scan_port(struct tb_port *port)
{
struct tb_cm *tcm = tb_priv(port->sw->tb);
struct tb_port *upstream_port;
+ bool discovery = false;
struct tb_switch *sw;
int ret;
@@ -804,8 +805,10 @@ static void tb_scan_port(struct tb_port *port)
* tunnels and know which switches were authorized already by
* the boot firmware.
*/
- if (!tcm->hotplug_active)
+ if (!tcm->hotplug_active) {
dev_set_uevent_suppress(&sw->dev, true);
+ discovery = true;
+ }
/*
* At the moment Thunderbolt 2 and beyond (devices with LC) we
@@ -835,10 +838,14 @@ static void tb_scan_port(struct tb_port *port)
* CL0s and CL1 are enabled and supported together.
* Silently ignore CLx enabling in case CLx is not supported.
*/
- ret = tb_switch_enable_clx(sw, TB_CL1);
- if (ret && ret != -EOPNOTSUPP)
- tb_sw_warn(sw, "failed to enable %s on upstream port\n",
- tb_switch_clx_name(TB_CL1));
+ if (discovery) {
+ tb_sw_dbg(sw, "discovery, not touching CL states\n");
+ } else {
+ ret = tb_switch_enable_clx(sw, TB_CL1);
+ if (ret && ret != -EOPNOTSUPP)
+ tb_sw_warn(sw, "failed to enable %s on upstream port\n",
+ tb_switch_clx_name(TB_CL1));
+ }
if (tb_switch_is_clx_enabled(sw, TB_CL1))
/*
diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c
index 9099ae73e78f..4f222673d651 100644
--- a/drivers/thunderbolt/tunnel.c
+++ b/drivers/thunderbolt/tunnel.c
@@ -526,7 +526,7 @@ static int tb_dp_xchg_caps(struct tb_tunnel *tunnel)
* Perform connection manager handshake between IN and OUT ports
* before capabilities exchange can take place.
*/
- ret = tb_dp_cm_handshake(in, out, 1500);
+ ret = tb_dp_cm_handshake(in, out, 3000);
if (ret)
return ret;
diff --git a/drivers/tty/serial/8250/8250_bcm7271.c b/drivers/tty/serial/8250/8250_bcm7271.c
index f801b1f5b46c..af0e1c070187 100644
--- a/drivers/tty/serial/8250/8250_bcm7271.c
+++ b/drivers/tty/serial/8250/8250_bcm7271.c
@@ -1012,7 +1012,7 @@ static int brcmuart_probe(struct platform_device *pdev)
of_property_read_u32(np, "clock-frequency", &clk_rate);
/* See if a Baud clock has been specified */
- baud_mux_clk = of_clk_get_by_name(np, "sw_baud");
+ baud_mux_clk = devm_clk_get(dev, "sw_baud");
if (IS_ERR(baud_mux_clk)) {
if (PTR_ERR(baud_mux_clk) == -EPROBE_DEFER) {
ret = -EPROBE_DEFER;
@@ -1032,7 +1032,7 @@ static int brcmuart_probe(struct platform_device *pdev)
if (clk_rate == 0) {
dev_err(dev, "clock-frequency or clk not defined\n");
ret = -EINVAL;
- goto release_dma;
+ goto err_clk_disable;
}
dev_dbg(dev, "DMA is %senabled\n", priv->dma_enabled ? "" : "not ");
@@ -1119,6 +1119,8 @@ err1:
serial8250_unregister_port(priv->line);
err:
brcmuart_free_bufs(dev, priv);
+err_clk_disable:
+ clk_disable_unprepare(baud_mux_clk);
release_dma:
if (priv->dma_enabled)
brcmuart_arbitration(priv, 0);
@@ -1133,6 +1135,7 @@ static int brcmuart_remove(struct platform_device *pdev)
hrtimer_cancel(&priv->hrt);
serial8250_unregister_port(priv->line);
brcmuart_free_bufs(&pdev->dev, priv);
+ clk_disable_unprepare(priv->baud_mux_clk);
if (priv->dma_enabled)
brcmuart_arbitration(priv, 0);
return 0;
diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c
index 64770c62bbec..b406cba10b0e 100644
--- a/drivers/tty/serial/8250/8250_exar.c
+++ b/drivers/tty/serial/8250/8250_exar.c
@@ -40,9 +40,13 @@
#define PCI_DEVICE_ID_COMMTECH_4224PCIE 0x0020
#define PCI_DEVICE_ID_COMMTECH_4228PCIE 0x0021
#define PCI_DEVICE_ID_COMMTECH_4222PCIE 0x0022
+
#define PCI_DEVICE_ID_EXAR_XR17V4358 0x4358
#define PCI_DEVICE_ID_EXAR_XR17V8358 0x8358
+#define PCI_SUBDEVICE_ID_USR_2980 0x0128
+#define PCI_SUBDEVICE_ID_USR_2981 0x0129
+
#define PCI_DEVICE_ID_SEALEVEL_710xC 0x1001
#define PCI_DEVICE_ID_SEALEVEL_720xC 0x1002
#define PCI_DEVICE_ID_SEALEVEL_740xC 0x1004
@@ -829,6 +833,15 @@ static const struct exar8250_board pbn_exar_XR17V8358 = {
(kernel_ulong_t)&bd \
}
+#define USR_DEVICE(devid, sdevid, bd) { \
+ PCI_DEVICE_SUB( \
+ PCI_VENDOR_ID_USR, \
+ PCI_DEVICE_ID_EXAR_##devid, \
+ PCI_VENDOR_ID_EXAR, \
+ PCI_SUBDEVICE_ID_USR_##sdevid), 0, 0, \
+ (kernel_ulong_t)&bd \
+ }
+
static const struct pci_device_id exar_pci_tbl[] = {
EXAR_DEVICE(ACCESSIO, COM_2S, pbn_exar_XR17C15x),
EXAR_DEVICE(ACCESSIO, COM_4S, pbn_exar_XR17C15x),
@@ -853,6 +866,10 @@ static const struct pci_device_id exar_pci_tbl[] = {
IBM_DEVICE(XR17C152, SATURN_SERIAL_ONE_PORT, pbn_exar_ibm_saturn),
+ /* USRobotics USR298x-OEM PCI Modems */
+ USR_DEVICE(XR17C152, 2980, pbn_exar_XR17C15x),
+ USR_DEVICE(XR17C152, 2981, pbn_exar_XR17C15x),
+
/* Exar Corp. XR17C15[248] Dual/Quad/Octal UART */
EXAR_DEVICE(EXAR, XR17C152, pbn_exar_XR17C15x),
EXAR_DEVICE(EXAR, XR17C154, pbn_exar_XR17C15x),
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index c55be6fda0ca..e80c4f6551a1 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -1920,6 +1920,8 @@ pci_moxa_setup(struct serial_private *priv,
#define PCI_SUBDEVICE_ID_SIIG_DUAL_30 0x2530
#define PCI_VENDOR_ID_ADVANTECH 0x13fe
#define PCI_DEVICE_ID_INTEL_CE4100_UART 0x2e66
+#define PCI_DEVICE_ID_ADVANTECH_PCI1600 0x1600
+#define PCI_DEVICE_ID_ADVANTECH_PCI1600_1611 0x1611
#define PCI_DEVICE_ID_ADVANTECH_PCI3620 0x3620
#define PCI_DEVICE_ID_ADVANTECH_PCI3618 0x3618
#define PCI_DEVICE_ID_ADVANTECH_PCIf618 0xf618
@@ -4085,6 +4087,9 @@ static SIMPLE_DEV_PM_OPS(pciserial_pm_ops, pciserial_suspend_one,
pciserial_resume_one);
static const struct pci_device_id serial_pci_tbl[] = {
+ { PCI_VENDOR_ID_ADVANTECH, PCI_DEVICE_ID_ADVANTECH_PCI1600,
+ PCI_DEVICE_ID_ADVANTECH_PCI1600_1611, PCI_ANY_ID, 0, 0,
+ pbn_b0_4_921600 },
/* Advantech use PCI_DEVICE_ID_ADVANTECH_PCI3620 (0x3620) as 'PCI_SUBVENDOR_ID' */
{ PCI_VENDOR_ID_ADVANTECH, PCI_DEVICE_ID_ADVANTECH_PCI3620,
PCI_DEVICE_ID_ADVANTECH_PCI3620, 0x0001, 0, 0,
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index fe8d79c4ae95..c153ba3a018a 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -669,6 +669,7 @@ EXPORT_SYMBOL_GPL(serial8250_em485_supported);
/**
* serial8250_em485_config() - generic ->rs485_config() callback
* @port: uart port
+ * @termios: termios structure
* @rs485: rs485 settings
*
* Generic callback usable by 8250 uart drivers to activate rs485 settings
diff --git a/drivers/tty/serial/8250/8250_tegra.c b/drivers/tty/serial/8250/8250_tegra.c
index 2509e7f74ccf..89956bbf34d9 100644
--- a/drivers/tty/serial/8250/8250_tegra.c
+++ b/drivers/tty/serial/8250/8250_tegra.c
@@ -113,13 +113,15 @@ static int tegra_uart_probe(struct platform_device *pdev)
ret = serial8250_register_8250_port(&port8250);
if (ret < 0)
- goto err_clkdisable;
+ goto err_ctrl_assert;
platform_set_drvdata(pdev, uart);
uart->line = ret;
return 0;
+err_ctrl_assert:
+ reset_control_assert(uart->rst);
err_clkdisable:
clk_disable_unprepare(uart->clk);
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 398e5aac2e77..3e3fb377d90d 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -762,7 +762,7 @@ config SERIAL_PMACZILOG_CONSOLE
config SERIAL_CPM
tristate "CPM SCC/SMC serial port support"
- depends on CPM2 || CPM1 || (PPC32 && COMPILE_TEST)
+ depends on CPM2 || CPM1
select SERIAL_CORE
help
This driver supports the SCC and SMC serial ports on Motorola
diff --git a/drivers/tty/serial/arc_uart.c b/drivers/tty/serial/arc_uart.c
index 59e25f2b6632..4b2512eef577 100644
--- a/drivers/tty/serial/arc_uart.c
+++ b/drivers/tty/serial/arc_uart.c
@@ -606,10 +606,11 @@ static int arc_serial_probe(struct platform_device *pdev)
}
uart->baud = val;
- port->membase = of_iomap(np, 0);
- if (!port->membase)
+ port->membase = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(port->membase)) {
/* No point of dev_err since UART itself is hosed here */
- return -ENXIO;
+ return PTR_ERR(port->membase);
+ }
port->irq = irq_of_parse_and_map(np, 0);
diff --git a/drivers/tty/serial/cpm_uart/cpm_uart.h b/drivers/tty/serial/cpm_uart/cpm_uart.h
index 0577618e78c0..46c03ed71c31 100644
--- a/drivers/tty/serial/cpm_uart/cpm_uart.h
+++ b/drivers/tty/serial/cpm_uart/cpm_uart.h
@@ -19,8 +19,6 @@ struct gpio_desc;
#include "cpm_uart_cpm2.h"
#elif defined(CONFIG_CPM1)
#include "cpm_uart_cpm1.h"
-#elif defined(CONFIG_COMPILE_TEST)
-#include "cpm_uart_cpm2.h"
#endif
#define SERIAL_CPM_MAJOR 204
diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index c91916e13648..7fd30fcc10c6 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -310,7 +310,7 @@ static const struct lpuart_soc_data ls1021a_data = {
static const struct lpuart_soc_data ls1028a_data = {
.devtype = LS1028A_LPUART,
.iotype = UPIO_MEM32,
- .rx_watermark = 1,
+ .rx_watermark = 0,
};
static struct lpuart_soc_data imx7ulp_data = {
@@ -1495,34 +1495,36 @@ static void lpuart_break_ctl(struct uart_port *port, int break_state)
static void lpuart32_break_ctl(struct uart_port *port, int break_state)
{
- unsigned long temp, modem;
- struct tty_struct *tty;
- unsigned int cflag = 0;
-
- tty = tty_port_tty_get(&port->state->port);
- if (tty) {
- cflag = tty->termios.c_cflag;
- tty_kref_put(tty);
- }
+ unsigned long temp;
- temp = lpuart32_read(port, UARTCTRL) & ~UARTCTRL_SBK;
- modem = lpuart32_read(port, UARTMODIR);
+ temp = lpuart32_read(port, UARTCTRL);
+ /*
+ * LPUART IP now has two known bugs, one is CTS has higher priority than the
+ * break signal, which causes the break signal sending through UARTCTRL_SBK
+ * may impacted by the CTS input if the HW flow control is enabled. It
+ * exists on all platforms we support in this driver.
+ * Another bug is i.MX8QM LPUART may have an additional break character
+ * being sent after SBK was cleared.
+ * To avoid above two bugs, we use Transmit Data Inversion function to send
+ * the break signal instead of UARTCTRL_SBK.
+ */
if (break_state != 0) {
- temp |= UARTCTRL_SBK;
/*
- * LPUART CTS has higher priority than SBK, need to disable CTS before
- * asserting SBK to avoid any interference if flow control is enabled.
+ * Disable the transmitter to prevent any data from being sent out
+ * during break, then invert the TX line to send break.
*/
- if (cflag & CRTSCTS && modem & UARTMODIR_TXCTSE)
- lpuart32_write(port, modem & ~UARTMODIR_TXCTSE, UARTMODIR);
+ temp &= ~UARTCTRL_TE;
+ lpuart32_write(port, temp, UARTCTRL);
+ temp |= UARTCTRL_TXINV;
+ lpuart32_write(port, temp, UARTCTRL);
} else {
- /* Re-enable the CTS when break off. */
- if (cflag & CRTSCTS && !(modem & UARTMODIR_TXCTSE))
- lpuart32_write(port, modem | UARTMODIR_TXCTSE, UARTMODIR);
+ /* Disable the TXINV to turn off break and re-enable transmitter. */
+ temp &= ~UARTCTRL_TXINV;
+ lpuart32_write(port, temp, UARTCTRL);
+ temp |= UARTCTRL_TE;
+ lpuart32_write(port, temp, UARTCTRL);
}
-
- lpuart32_write(port, temp, UARTCTRL);
}
static void lpuart_setup_watermark(struct lpuart_port *sport)
diff --git a/drivers/tty/serial/lantiq.c b/drivers/tty/serial/lantiq.c
index a58e9277dfad..f1387f1024db 100644
--- a/drivers/tty/serial/lantiq.c
+++ b/drivers/tty/serial/lantiq.c
@@ -250,6 +250,7 @@ lqasc_err_int(int irq, void *_port)
struct ltq_uart_port *ltq_port = to_ltq_uart_port(port);
spin_lock_irqsave(&ltq_port->lock, flags);
+ __raw_writel(ASC_IRNCR_EIR, port->membase + LTQ_ASC_IRNCR);
/* clear any pending interrupts */
asc_update_bits(0, ASCWHBSTATE_CLRPE | ASCWHBSTATE_CLRFE |
ASCWHBSTATE_CLRROE, port->membase + LTQ_ASC_WHBSTATE);
diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 08dc3e2a729c..8582479f0211 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -1664,19 +1664,18 @@ static int qcom_geni_serial_probe(struct platform_device *pdev)
uport->private_data = &port->private_data;
platform_set_drvdata(pdev, port);
- ret = uart_add_one_port(drv, uport);
- if (ret)
- return ret;
-
irq_set_status_flags(uport->irq, IRQ_NOAUTOEN);
ret = devm_request_irq(uport->dev, uport->irq, qcom_geni_serial_isr,
IRQF_TRIGGER_HIGH, port->name, uport);
if (ret) {
dev_err(uport->dev, "Failed to get IRQ ret %d\n", ret);
- uart_remove_one_port(drv, uport);
return ret;
}
+ ret = uart_add_one_port(drv, uport);
+ if (ret)
+ return ret;
+
/*
* Set pm_runtime status as ACTIVE so that wakeup_irq gets
* enabled/disabled from dev_pm_arm_wake_irq during system
diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c
index 498ba9c0ee93..829c4be66f3b 100644
--- a/drivers/tty/vt/vc_screen.c
+++ b/drivers/tty/vt/vc_screen.c
@@ -656,10 +656,17 @@ vcs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
}
}
- /* The vcs_size might have changed while we slept to grab
- * the user buffer, so recheck.
+ /* The vc might have been freed or vcs_size might have changed
+ * while we slept to grab the user buffer, so recheck.
* Return data written up to now on failure.
*/
+ vc = vcs_vc(inode, &viewed);
+ if (!vc) {
+ if (written)
+ break;
+ ret = -ENXIO;
+ goto unlock_out;
+ }
size = vcs_size(vc, attr, false);
if (size < 0) {
if (written)
diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
index 202ff71e1b58..51b3c6ae781d 100644
--- a/drivers/ufs/core/ufs-mcq.c
+++ b/drivers/ufs/core/ufs-mcq.c
@@ -150,7 +150,8 @@ static int ufshcd_mcq_config_nr_queues(struct ufs_hba *hba)
u32 hba_maxq, rem, tot_queues;
struct Scsi_Host *host = hba->host;
- hba_maxq = FIELD_GET(MAX_QUEUE_SUP, hba->mcq_capabilities);
+ /* maxq is 0 based value */
+ hba_maxq = FIELD_GET(MAX_QUEUE_SUP, hba->mcq_capabilities) + 1;
tot_queues = UFS_MCQ_NUM_DEV_CMD_QUEUES + read_queues + poll_queues +
rw_queues;
@@ -265,7 +266,7 @@ static int ufshcd_mcq_get_tag(struct ufs_hba *hba,
addr = (le64_to_cpu(cqe->command_desc_base_addr) & CQE_UCD_BA) -
hba->ucdl_dma_addr;
- return div_u64(addr, sizeof(struct utp_transfer_cmd_desc));
+ return div_u64(addr, ufshcd_get_ucd_size(hba));
}
static void ufshcd_mcq_process_cqe(struct ufs_hba *hba,
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 45fd374fe56c..e7e79f515e14 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -2849,10 +2849,10 @@ static void ufshcd_map_queues(struct Scsi_Host *shost)
static void ufshcd_init_lrb(struct ufs_hba *hba, struct ufshcd_lrb *lrb, int i)
{
struct utp_transfer_cmd_desc *cmd_descp = (void *)hba->ucdl_base_addr +
- i * sizeof_utp_transfer_cmd_desc(hba);
+ i * ufshcd_get_ucd_size(hba);
struct utp_transfer_req_desc *utrdlp = hba->utrdl_base_addr;
dma_addr_t cmd_desc_element_addr = hba->ucdl_dma_addr +
- i * sizeof_utp_transfer_cmd_desc(hba);
+ i * ufshcd_get_ucd_size(hba);
u16 response_offset = offsetof(struct utp_transfer_cmd_desc,
response_upiu);
u16 prdt_offset = offsetof(struct utp_transfer_cmd_desc, prd_table);
@@ -3761,7 +3761,7 @@ static int ufshcd_memory_alloc(struct ufs_hba *hba)
size_t utmrdl_size, utrdl_size, ucdl_size;
/* Allocate memory for UTP command descriptors */
- ucdl_size = sizeof_utp_transfer_cmd_desc(hba) * hba->nutrs;
+ ucdl_size = ufshcd_get_ucd_size(hba) * hba->nutrs;
hba->ucdl_base_addr = dmam_alloc_coherent(hba->dev,
ucdl_size,
&hba->ucdl_dma_addr,
@@ -3861,7 +3861,7 @@ static void ufshcd_host_memory_configure(struct ufs_hba *hba)
prdt_offset =
offsetof(struct utp_transfer_cmd_desc, prd_table);
- cmd_desc_size = sizeof_utp_transfer_cmd_desc(hba);
+ cmd_desc_size = ufshcd_get_ucd_size(hba);
cmd_desc_dma_addr = hba->ucdl_dma_addr;
for (i = 0; i < hba->nutrs; i++) {
@@ -8452,7 +8452,7 @@ static void ufshcd_release_sdb_queue(struct ufs_hba *hba, int nutrs)
{
size_t ucdl_size, utrdl_size;
- ucdl_size = sizeof(struct utp_transfer_cmd_desc) * nutrs;
+ ucdl_size = ufshcd_get_ucd_size(hba) * nutrs;
dmam_free_coherent(hba->dev, ucdl_size, hba->ucdl_base_addr,
hba->ucdl_dma_addr);
diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c
index ccfaebca6faa..1dcadef933e3 100644
--- a/drivers/usb/cdns3/cdns3-gadget.c
+++ b/drivers/usb/cdns3/cdns3-gadget.c
@@ -2097,6 +2097,19 @@ int cdns3_ep_config(struct cdns3_endpoint *priv_ep, bool enable)
else
priv_ep->trb_burst_size = 16;
+ /*
+ * In versions preceding DEV_VER_V2, for example, iMX8QM, there exit the bugs
+ * in the DMA. These bugs occur when the trb_burst_size exceeds 16 and the
+ * address is not aligned to 128 Bytes (which is a product of the 64-bit AXI
+ * and AXI maximum burst length of 16 or 0xF+1, dma_axi_ctrl0[3:0]). This
+ * results in data corruption when it crosses the 4K border. The corruption
+ * specifically occurs from the position (4K - (address & 0x7F)) to 4K.
+ *
+ * So force trb_burst_size to 16 at such platform.
+ */
+ if (priv_dev->dev_ver < DEV_VER_V2)
+ priv_ep->trb_burst_size = 16;
+
mult = min_t(u8, mult, EP_CFG_MULT_MAX);
buffering = min_t(u8, buffering, EP_CFG_BUFFERING_MAX);
maxburst = min_t(u8, maxburst, EP_CFG_MAXBURST_MAX);
diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 4bb6d304eb4b..311007b1d904 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -1928,6 +1928,8 @@ static int usbtmc_ioctl_request(struct usbtmc_device_data *data,
if (request.req.wLength > USBTMC_BUFSIZE)
return -EMSGSIZE;
+ if (request.req.wLength == 0) /* Length-0 requests are never IN */
+ request.req.bRequestType &= ~USB_DIR_IN;
is_in = request.req.bRequestType & USB_DIR_IN;
diff --git a/drivers/usb/core/buffer.c b/drivers/usb/core/buffer.c
index fbb087b728dc..268ccbec88f9 100644
--- a/drivers/usb/core/buffer.c
+++ b/drivers/usb/core/buffer.c
@@ -172,3 +172,44 @@ void hcd_buffer_free(
}
dma_free_coherent(hcd->self.sysdev, size, addr, dma);
}
+
+void *hcd_buffer_alloc_pages(struct usb_hcd *hcd,
+ size_t size, gfp_t mem_flags, dma_addr_t *dma)
+{
+ if (size == 0)
+ return NULL;
+
+ if (hcd->localmem_pool)
+ return gen_pool_dma_alloc_align(hcd->localmem_pool,
+ size, dma, PAGE_SIZE);
+
+ /* some USB hosts just use PIO */
+ if (!hcd_uses_dma(hcd)) {
+ *dma = DMA_MAPPING_ERROR;
+ return (void *)__get_free_pages(mem_flags,
+ get_order(size));
+ }
+
+ return dma_alloc_coherent(hcd->self.sysdev,
+ size, dma, mem_flags);
+}
+
+void hcd_buffer_free_pages(struct usb_hcd *hcd,
+ size_t size, void *addr, dma_addr_t dma)
+{
+ if (!addr)
+ return;
+
+ if (hcd->localmem_pool) {
+ gen_pool_free(hcd->localmem_pool,
+ (unsigned long)addr, size);
+ return;
+ }
+
+ if (!hcd_uses_dma(hcd)) {
+ free_pages((unsigned long)addr, get_order(size));
+ return;
+ }
+
+ dma_free_coherent(hcd->self.sysdev, size, addr, dma);
+}
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index e501a03d6c70..fcf68818e999 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -186,6 +186,7 @@ static int connected(struct usb_dev_state *ps)
static void dec_usb_memory_use_count(struct usb_memory *usbm, int *count)
{
struct usb_dev_state *ps = usbm->ps;
+ struct usb_hcd *hcd = bus_to_hcd(ps->dev->bus);
unsigned long flags;
spin_lock_irqsave(&ps->lock, flags);
@@ -194,8 +195,8 @@ static void dec_usb_memory_use_count(struct usb_memory *usbm, int *count)
list_del(&usbm->memlist);
spin_unlock_irqrestore(&ps->lock, flags);
- usb_free_coherent(ps->dev, usbm->size, usbm->mem,
- usbm->dma_handle);
+ hcd_buffer_free_pages(hcd, usbm->size,
+ usbm->mem, usbm->dma_handle);
usbfs_decrease_memory_usage(
usbm->size + sizeof(struct usb_memory));
kfree(usbm);
@@ -234,7 +235,7 @@ static int usbdev_mmap(struct file *file, struct vm_area_struct *vma)
size_t size = vma->vm_end - vma->vm_start;
void *mem;
unsigned long flags;
- dma_addr_t dma_handle;
+ dma_addr_t dma_handle = DMA_MAPPING_ERROR;
int ret;
ret = usbfs_increase_memory_usage(size + sizeof(struct usb_memory));
@@ -247,8 +248,8 @@ static int usbdev_mmap(struct file *file, struct vm_area_struct *vma)
goto error_decrease_mem;
}
- mem = usb_alloc_coherent(ps->dev, size, GFP_USER | __GFP_NOWARN,
- &dma_handle);
+ mem = hcd_buffer_alloc_pages(hcd,
+ size, GFP_USER | __GFP_NOWARN, &dma_handle);
if (!mem) {
ret = -ENOMEM;
goto error_free_usbm;
@@ -264,7 +265,14 @@ static int usbdev_mmap(struct file *file, struct vm_area_struct *vma)
usbm->vma_use_count = 1;
INIT_LIST_HEAD(&usbm->memlist);
- if (hcd->localmem_pool || !hcd_uses_dma(hcd)) {
+ /*
+ * In DMA-unavailable cases, hcd_buffer_alloc_pages allocates
+ * normal pages and assigns DMA_MAPPING_ERROR to dma_handle. Check
+ * whether we are in such cases, and then use remap_pfn_range (or
+ * dma_mmap_coherent) to map normal (or DMA) pages into the user
+ * space, respectively.
+ */
+ if (dma_handle == DMA_MAPPING_ERROR) {
if (remap_pfn_range(vma, vma->vm_start,
virt_to_phys(usbm->mem) >> PAGE_SHIFT,
size, vma->vm_page_prot) < 0) {
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 0beaab932e7d..d68958e151a7 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -1137,7 +1137,7 @@ static int dwc3_core_init(struct dwc3 *dwc)
dwc3_set_incr_burst_type(dwc);
- dwc3_phy_power_on(dwc);
+ ret = dwc3_phy_power_on(dwc);
if (ret)
goto err_exit_phy;
@@ -1929,6 +1929,11 @@ static int dwc3_remove(struct platform_device *pdev)
pm_runtime_disable(&pdev->dev);
pm_runtime_dont_use_autosuspend(&pdev->dev);
pm_runtime_put_noidle(&pdev->dev);
+ /*
+ * HACK: Clear the driver data, which is currently accessed by parent
+ * glue drivers, before allowing the parent to suspend.
+ */
+ platform_set_drvdata(pdev, NULL);
pm_runtime_set_suspended(&pdev->dev);
dwc3_free_event_buffers(dwc);
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index d56457c02996..1f043c31a096 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -1116,6 +1116,7 @@ struct dwc3_scratchpad_array {
* @dis_metastability_quirk: set to disable metastability quirk.
* @dis_split_quirk: set to disable split boundary.
* @wakeup_configured: set if the device is configured for remote wakeup.
+ * @suspended: set to track suspend event due to U3/L2.
* @imod_interval: set the interrupt moderation interval in 250ns
* increments or 0 to disable.
* @max_cfg_eps: current max number of IN eps used across all USB configs.
@@ -1332,6 +1333,7 @@ struct dwc3 {
unsigned dis_split_quirk:1;
unsigned async_callbacks:1;
unsigned wakeup_configured:1;
+ unsigned suspended:1;
u16 imod_interval;
diff --git a/drivers/usb/dwc3/debugfs.c b/drivers/usb/dwc3/debugfs.c
index e4a2560b9dc0..ebf03468fac4 100644
--- a/drivers/usb/dwc3/debugfs.c
+++ b/drivers/usb/dwc3/debugfs.c
@@ -332,6 +332,11 @@ static int dwc3_lsp_show(struct seq_file *s, void *unused)
unsigned int current_mode;
unsigned long flags;
u32 reg;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dwc->dev);
+ if (ret < 0)
+ return ret;
spin_lock_irqsave(&dwc->lock, flags);
reg = dwc3_readl(dwc->regs, DWC3_GSTS);
@@ -350,6 +355,8 @@ static int dwc3_lsp_show(struct seq_file *s, void *unused)
}
spin_unlock_irqrestore(&dwc->lock, flags);
+ pm_runtime_put_sync(dwc->dev);
+
return 0;
}
@@ -395,6 +402,11 @@ static int dwc3_mode_show(struct seq_file *s, void *unused)
struct dwc3 *dwc = s->private;
unsigned long flags;
u32 reg;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dwc->dev);
+ if (ret < 0)
+ return ret;
spin_lock_irqsave(&dwc->lock, flags);
reg = dwc3_readl(dwc->regs, DWC3_GCTL);
@@ -414,6 +426,8 @@ static int dwc3_mode_show(struct seq_file *s, void *unused)
seq_printf(s, "UNKNOWN %08x\n", DWC3_GCTL_PRTCAP(reg));
}
+ pm_runtime_put_sync(dwc->dev);
+
return 0;
}
@@ -463,6 +477,11 @@ static int dwc3_testmode_show(struct seq_file *s, void *unused)
struct dwc3 *dwc = s->private;
unsigned long flags;
u32 reg;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dwc->dev);
+ if (ret < 0)
+ return ret;
spin_lock_irqsave(&dwc->lock, flags);
reg = dwc3_readl(dwc->regs, DWC3_DCTL);
@@ -493,6 +512,8 @@ static int dwc3_testmode_show(struct seq_file *s, void *unused)
seq_printf(s, "UNKNOWN %d\n", reg);
}
+ pm_runtime_put_sync(dwc->dev);
+
return 0;
}
@@ -509,6 +530,7 @@ static ssize_t dwc3_testmode_write(struct file *file,
unsigned long flags;
u32 testmode = 0;
char buf[32];
+ int ret;
if (copy_from_user(&buf, ubuf, min_t(size_t, sizeof(buf) - 1, count)))
return -EFAULT;
@@ -526,10 +548,16 @@ static ssize_t dwc3_testmode_write(struct file *file,
else
testmode = 0;
+ ret = pm_runtime_resume_and_get(dwc->dev);
+ if (ret < 0)
+ return ret;
+
spin_lock_irqsave(&dwc->lock, flags);
dwc3_gadget_set_test_mode(dwc, testmode);
spin_unlock_irqrestore(&dwc->lock, flags);
+ pm_runtime_put_sync(dwc->dev);
+
return count;
}
@@ -548,12 +576,18 @@ static int dwc3_link_state_show(struct seq_file *s, void *unused)
enum dwc3_link_state state;
u32 reg;
u8 speed;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dwc->dev);
+ if (ret < 0)
+ return ret;
spin_lock_irqsave(&dwc->lock, flags);
reg = dwc3_readl(dwc->regs, DWC3_GSTS);
if (DWC3_GSTS_CURMOD(reg) != DWC3_GSTS_CURMOD_DEVICE) {
seq_puts(s, "Not available\n");
spin_unlock_irqrestore(&dwc->lock, flags);
+ pm_runtime_put_sync(dwc->dev);
return 0;
}
@@ -566,6 +600,8 @@ static int dwc3_link_state_show(struct seq_file *s, void *unused)
dwc3_gadget_hs_link_string(state));
spin_unlock_irqrestore(&dwc->lock, flags);
+ pm_runtime_put_sync(dwc->dev);
+
return 0;
}
@@ -584,6 +620,7 @@ static ssize_t dwc3_link_state_write(struct file *file,
char buf[32];
u32 reg;
u8 speed;
+ int ret;
if (copy_from_user(&buf, ubuf, min_t(size_t, sizeof(buf) - 1, count)))
return -EFAULT;
@@ -603,10 +640,15 @@ static ssize_t dwc3_link_state_write(struct file *file,
else
return -EINVAL;
+ ret = pm_runtime_resume_and_get(dwc->dev);
+ if (ret < 0)
+ return ret;
+
spin_lock_irqsave(&dwc->lock, flags);
reg = dwc3_readl(dwc->regs, DWC3_GSTS);
if (DWC3_GSTS_CURMOD(reg) != DWC3_GSTS_CURMOD_DEVICE) {
spin_unlock_irqrestore(&dwc->lock, flags);
+ pm_runtime_put_sync(dwc->dev);
return -EINVAL;
}
@@ -616,12 +658,15 @@ static ssize_t dwc3_link_state_write(struct file *file,
if (speed < DWC3_DSTS_SUPERSPEED &&
state != DWC3_LINK_STATE_RECOV) {
spin_unlock_irqrestore(&dwc->lock, flags);
+ pm_runtime_put_sync(dwc->dev);
return -EINVAL;
}
dwc3_gadget_set_link_state(dwc, state);
spin_unlock_irqrestore(&dwc->lock, flags);
+ pm_runtime_put_sync(dwc->dev);
+
return count;
}
@@ -645,6 +690,11 @@ static int dwc3_tx_fifo_size_show(struct seq_file *s, void *unused)
unsigned long flags;
u32 mdwidth;
u32 val;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dwc->dev);
+ if (ret < 0)
+ return ret;
spin_lock_irqsave(&dwc->lock, flags);
val = dwc3_core_fifo_space(dep, DWC3_TXFIFO);
@@ -657,6 +707,8 @@ static int dwc3_tx_fifo_size_show(struct seq_file *s, void *unused)
seq_printf(s, "%u\n", val);
spin_unlock_irqrestore(&dwc->lock, flags);
+ pm_runtime_put_sync(dwc->dev);
+
return 0;
}
@@ -667,6 +719,11 @@ static int dwc3_rx_fifo_size_show(struct seq_file *s, void *unused)
unsigned long flags;
u32 mdwidth;
u32 val;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dwc->dev);
+ if (ret < 0)
+ return ret;
spin_lock_irqsave(&dwc->lock, flags);
val = dwc3_core_fifo_space(dep, DWC3_RXFIFO);
@@ -679,6 +736,8 @@ static int dwc3_rx_fifo_size_show(struct seq_file *s, void *unused)
seq_printf(s, "%u\n", val);
spin_unlock_irqrestore(&dwc->lock, flags);
+ pm_runtime_put_sync(dwc->dev);
+
return 0;
}
@@ -688,12 +747,19 @@ static int dwc3_tx_request_queue_show(struct seq_file *s, void *unused)
struct dwc3 *dwc = dep->dwc;
unsigned long flags;
u32 val;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dwc->dev);
+ if (ret < 0)
+ return ret;
spin_lock_irqsave(&dwc->lock, flags);
val = dwc3_core_fifo_space(dep, DWC3_TXREQQ);
seq_printf(s, "%u\n", val);
spin_unlock_irqrestore(&dwc->lock, flags);
+ pm_runtime_put_sync(dwc->dev);
+
return 0;
}
@@ -703,12 +769,19 @@ static int dwc3_rx_request_queue_show(struct seq_file *s, void *unused)
struct dwc3 *dwc = dep->dwc;
unsigned long flags;
u32 val;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dwc->dev);
+ if (ret < 0)
+ return ret;
spin_lock_irqsave(&dwc->lock, flags);
val = dwc3_core_fifo_space(dep, DWC3_RXREQQ);
seq_printf(s, "%u\n", val);
spin_unlock_irqrestore(&dwc->lock, flags);
+ pm_runtime_put_sync(dwc->dev);
+
return 0;
}
@@ -718,12 +791,19 @@ static int dwc3_rx_info_queue_show(struct seq_file *s, void *unused)
struct dwc3 *dwc = dep->dwc;
unsigned long flags;
u32 val;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dwc->dev);
+ if (ret < 0)
+ return ret;
spin_lock_irqsave(&dwc->lock, flags);
val = dwc3_core_fifo_space(dep, DWC3_RXINFOQ);
seq_printf(s, "%u\n", val);
spin_unlock_irqrestore(&dwc->lock, flags);
+ pm_runtime_put_sync(dwc->dev);
+
return 0;
}
@@ -733,12 +813,19 @@ static int dwc3_descriptor_fetch_queue_show(struct seq_file *s, void *unused)
struct dwc3 *dwc = dep->dwc;
unsigned long flags;
u32 val;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dwc->dev);
+ if (ret < 0)
+ return ret;
spin_lock_irqsave(&dwc->lock, flags);
val = dwc3_core_fifo_space(dep, DWC3_DESCFETCHQ);
seq_printf(s, "%u\n", val);
spin_unlock_irqrestore(&dwc->lock, flags);
+ pm_runtime_put_sync(dwc->dev);
+
return 0;
}
@@ -748,12 +835,19 @@ static int dwc3_event_queue_show(struct seq_file *s, void *unused)
struct dwc3 *dwc = dep->dwc;
unsigned long flags;
u32 val;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dwc->dev);
+ if (ret < 0)
+ return ret;
spin_lock_irqsave(&dwc->lock, flags);
val = dwc3_core_fifo_space(dep, DWC3_EVENTQ);
seq_printf(s, "%u\n", val);
spin_unlock_irqrestore(&dwc->lock, flags);
+ pm_runtime_put_sync(dwc->dev);
+
return 0;
}
@@ -798,6 +892,11 @@ static int dwc3_trb_ring_show(struct seq_file *s, void *unused)
struct dwc3 *dwc = dep->dwc;
unsigned long flags;
int i;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dwc->dev);
+ if (ret < 0)
+ return ret;
spin_lock_irqsave(&dwc->lock, flags);
if (dep->number <= 1) {
@@ -827,6 +926,8 @@ static int dwc3_trb_ring_show(struct seq_file *s, void *unused)
out:
spin_unlock_irqrestore(&dwc->lock, flags);
+ pm_runtime_put_sync(dwc->dev);
+
return 0;
}
@@ -839,6 +940,11 @@ static int dwc3_ep_info_register_show(struct seq_file *s, void *unused)
u32 lower_32_bits;
u32 upper_32_bits;
u32 reg;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dwc->dev);
+ if (ret < 0)
+ return ret;
spin_lock_irqsave(&dwc->lock, flags);
reg = DWC3_GDBGLSPMUX_EPSELECT(dep->number);
@@ -851,6 +957,8 @@ static int dwc3_ep_info_register_show(struct seq_file *s, void *unused)
seq_printf(s, "0x%016llx\n", ep_info);
spin_unlock_irqrestore(&dwc->lock, flags);
+ pm_runtime_put_sync(dwc->dev);
+
return 0;
}
@@ -910,6 +1018,7 @@ void dwc3_debugfs_init(struct dwc3 *dwc)
dwc->regset->regs = dwc3_regs;
dwc->regset->nregs = ARRAY_SIZE(dwc3_regs);
dwc->regset->base = dwc->regs - DWC3_GLOBALS_REGS_START;
+ dwc->regset->dev = dwc->dev;
root = debugfs_create_dir(dev_name(dwc->dev), usb_debug_root);
dwc->debug_root = root;
diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c
index 959fc925ca7c..79b22abf9727 100644
--- a/drivers/usb/dwc3/dwc3-qcom.c
+++ b/drivers/usb/dwc3/dwc3-qcom.c
@@ -308,7 +308,16 @@ static void dwc3_qcom_interconnect_exit(struct dwc3_qcom *qcom)
/* Only usable in contexts where the role can not change. */
static bool dwc3_qcom_is_host(struct dwc3_qcom *qcom)
{
- struct dwc3 *dwc = platform_get_drvdata(qcom->dwc3);
+ struct dwc3 *dwc;
+
+ /*
+ * FIXME: Fix this layering violation.
+ */
+ dwc = platform_get_drvdata(qcom->dwc3);
+
+ /* Core driver may not have probed yet. */
+ if (!dwc)
+ return false;
return dwc->xhci;
}
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index c0ca4d12f95d..b78599dd705c 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -198,6 +198,7 @@ static void dwc3_gadget_del_and_unmap_request(struct dwc3_ep *dep,
list_del(&req->list);
req->remaining = 0;
req->needs_extra_trb = false;
+ req->num_trbs = 0;
if (req->request.status == -EINPROGRESS)
req->request.status = status;
@@ -2440,6 +2441,7 @@ static int dwc3_gadget_func_wakeup(struct usb_gadget *g, int intf_id)
return -EINVAL;
}
dwc3_resume_gadget(dwc);
+ dwc->suspended = false;
dwc->link_state = DWC3_LINK_STATE_U0;
}
@@ -2699,6 +2701,21 @@ static int dwc3_gadget_soft_disconnect(struct dwc3 *dwc)
return ret;
}
+static int dwc3_gadget_soft_connect(struct dwc3 *dwc)
+{
+ /*
+ * In the Synopsys DWC_usb31 1.90a programming guide section
+ * 4.1.9, it specifies that for a reconnect after a
+ * device-initiated disconnect requires a core soft reset
+ * (DCTL.CSftRst) before enabling the run/stop bit.
+ */
+ dwc3_core_soft_reset(dwc);
+
+ dwc3_event_buffers_setup(dwc);
+ __dwc3_gadget_start(dwc);
+ return dwc3_gadget_run_stop(dwc, true);
+}
+
static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
{
struct dwc3 *dwc = gadget_to_dwc(g);
@@ -2737,21 +2754,10 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
synchronize_irq(dwc->irq_gadget);
- if (!is_on) {
+ if (!is_on)
ret = dwc3_gadget_soft_disconnect(dwc);
- } else {
- /*
- * In the Synopsys DWC_usb31 1.90a programming guide section
- * 4.1.9, it specifies that for a reconnect after a
- * device-initiated disconnect requires a core soft reset
- * (DCTL.CSftRst) before enabling the run/stop bit.
- */
- dwc3_core_soft_reset(dwc);
-
- dwc3_event_buffers_setup(dwc);
- __dwc3_gadget_start(dwc);
- ret = dwc3_gadget_run_stop(dwc, true);
- }
+ else
+ ret = dwc3_gadget_soft_connect(dwc);
pm_runtime_put(dwc->dev);
@@ -3938,6 +3944,8 @@ static void dwc3_gadget_disconnect_interrupt(struct dwc3 *dwc)
{
int reg;
+ dwc->suspended = false;
+
dwc3_gadget_set_link_state(dwc, DWC3_LINK_STATE_RX_DET);
reg = dwc3_readl(dwc->regs, DWC3_DCTL);
@@ -3962,6 +3970,8 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc)
{
u32 reg;
+ dwc->suspended = false;
+
/*
* Ideally, dwc3_reset_gadget() would trigger the function
* drivers to stop any active transfers through ep disable.
@@ -4180,6 +4190,8 @@ static void dwc3_gadget_conndone_interrupt(struct dwc3 *dwc)
static void dwc3_gadget_wakeup_interrupt(struct dwc3 *dwc, unsigned int evtinfo)
{
+ dwc->suspended = false;
+
/*
* TODO take core out of low power mode when that's
* implemented.
@@ -4277,6 +4289,7 @@ static void dwc3_gadget_linksts_change_interrupt(struct dwc3 *dwc,
if (dwc->gadget->wakeup_armed) {
dwc3_gadget_enable_linksts_evts(dwc, false);
dwc3_resume_gadget(dwc);
+ dwc->suspended = false;
}
break;
case DWC3_LINK_STATE_U1:
@@ -4303,8 +4316,10 @@ static void dwc3_gadget_suspend_interrupt(struct dwc3 *dwc,
{
enum dwc3_link_state next = evtinfo & DWC3_LINK_STATE_MASK;
- if (dwc->link_state != next && next == DWC3_LINK_STATE_U3)
+ if (!dwc->suspended && next == DWC3_LINK_STATE_U3) {
+ dwc->suspended = true;
dwc3_suspend_gadget(dwc);
+ }
dwc->link_state = next;
}
@@ -4655,42 +4670,39 @@ void dwc3_gadget_exit(struct dwc3 *dwc)
int dwc3_gadget_suspend(struct dwc3 *dwc)
{
unsigned long flags;
+ int ret;
if (!dwc->gadget_driver)
return 0;
- dwc3_gadget_run_stop(dwc, false);
+ ret = dwc3_gadget_soft_disconnect(dwc);
+ if (ret)
+ goto err;
spin_lock_irqsave(&dwc->lock, flags);
dwc3_disconnect_gadget(dwc);
- __dwc3_gadget_stop(dwc);
spin_unlock_irqrestore(&dwc->lock, flags);
return 0;
+
+err:
+ /*
+ * Attempt to reset the controller's state. Likely no
+ * communication can be established until the host
+ * performs a port reset.
+ */
+ if (dwc->softconnect)
+ dwc3_gadget_soft_connect(dwc);
+
+ return ret;
}
int dwc3_gadget_resume(struct dwc3 *dwc)
{
- int ret;
-
if (!dwc->gadget_driver || !dwc->softconnect)
return 0;
- ret = __dwc3_gadget_start(dwc);
- if (ret < 0)
- goto err0;
-
- ret = dwc3_gadget_run_stop(dwc, true);
- if (ret < 0)
- goto err1;
-
- return 0;
-
-err1:
- __dwc3_gadget_stop(dwc);
-
-err0:
- return ret;
+ return dwc3_gadget_soft_connect(dwc);
}
void dwc3_gadget_process_pending_events(struct dwc3 *dwc)
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index a13c946e0663..f41a385a5c42 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -3535,6 +3535,7 @@ static void ffs_func_unbind(struct usb_configuration *c,
/* Drain any pending AIO completions */
drain_workqueue(ffs->io_completion_wq);
+ ffs_event_add(ffs, FUNCTIONFS_UNBIND);
if (!--opts->refcnt)
functionfs_unbind(ffs);
@@ -3559,7 +3560,6 @@ static void ffs_func_unbind(struct usb_configuration *c,
func->function.ssp_descriptors = NULL;
func->interfaces_nums = NULL;
- ffs_event_add(ffs, FUNCTIONFS_UNBIND);
}
static struct usb_function *ffs_alloc(struct usb_function_instance *fi)
diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c
index 6956ad8ba8dd..a366abb45623 100644
--- a/drivers/usb/gadget/function/u_ether.c
+++ b/drivers/usb/gadget/function/u_ether.c
@@ -17,6 +17,7 @@
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/if_vlan.h>
+#include <linux/string_helpers.h>
#include <linux/usb/composite.h>
#include "u_ether.h"
@@ -965,6 +966,8 @@ int gether_get_host_addr_cdc(struct net_device *net, char *host_addr, int len)
dev = netdev_priv(net);
snprintf(host_addr, len, "%pm", dev->host_mac);
+ string_upper(host_addr, host_addr);
+
return strlen(host_addr);
}
EXPORT_SYMBOL_GPL(gether_get_host_addr_cdc);
diff --git a/drivers/usb/gadget/udc/amd5536udc_pci.c b/drivers/usb/gadget/udc/amd5536udc_pci.c
index c80f9bd51b75..a36913ae31f9 100644
--- a/drivers/usb/gadget/udc/amd5536udc_pci.c
+++ b/drivers/usb/gadget/udc/amd5536udc_pci.c
@@ -170,6 +170,9 @@ static int udc_pci_probe(
retval = -ENODEV;
goto err_probe;
}
+
+ udc = dev;
+
return 0;
err_probe:
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index 4641153e9706..83fd1de14784 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -37,10 +37,14 @@ static const struct bus_type gadget_bus_type;
* @vbus: for udcs who care about vbus status, this value is real vbus status;
* for udcs who do not care about vbus status, this value is always true
* @started: the UDC's started state. True if the UDC had started.
- * @connect_lock: protects udc->vbus, udc->started, gadget->connect, gadget->deactivate related
- * functions. usb_gadget_connect_locked, usb_gadget_disconnect_locked,
- * usb_udc_connect_control_locked, usb_gadget_udc_start_locked, usb_gadget_udc_stop_locked are
- * called with this lock held.
+ * @allow_connect: Indicates whether UDC is allowed to be pulled up.
+ * Set/cleared by gadget_(un)bind_driver() after gadget driver is bound or
+ * unbound.
+ * @connect_lock: protects udc->started, gadget->connect,
+ * gadget->allow_connect and gadget->deactivate. The routines
+ * usb_gadget_connect_locked(), usb_gadget_disconnect_locked(),
+ * usb_udc_connect_control_locked(), usb_gadget_udc_start_locked() and
+ * usb_gadget_udc_stop_locked() are called with this lock held.
*
* This represents the internal data structure which is used by the UDC-class
* to hold information about udc driver and gadget together.
@@ -52,6 +56,8 @@ struct usb_udc {
struct list_head list;
bool vbus;
bool started;
+ bool allow_connect;
+ struct work_struct vbus_work;
struct mutex connect_lock;
};
@@ -692,7 +698,6 @@ out:
}
EXPORT_SYMBOL_GPL(usb_gadget_vbus_disconnect);
-/* Internal version of usb_gadget_connect needs to be called with connect_lock held. */
static int usb_gadget_connect_locked(struct usb_gadget *gadget)
__must_hold(&gadget->udc->connect_lock)
{
@@ -703,15 +708,12 @@ static int usb_gadget_connect_locked(struct usb_gadget *gadget)
goto out;
}
- if (gadget->connected)
- goto out;
-
- if (gadget->deactivated || !gadget->udc->started) {
+ if (gadget->deactivated || !gadget->udc->allow_connect || !gadget->udc->started) {
/*
- * If gadget is deactivated we only save new state.
- * Gadget will be connected automatically after activation.
- *
- * udc first needs to be started before gadget can be pulled up.
+ * If the gadget isn't usable (because it is deactivated,
+ * unbound, or not yet started), we only save the new state.
+ * The gadget will be connected automatically when it is
+ * activated/bound/started.
*/
gadget->connected = true;
goto out;
@@ -749,7 +751,6 @@ int usb_gadget_connect(struct usb_gadget *gadget)
}
EXPORT_SYMBOL_GPL(usb_gadget_connect);
-/* Internal version of usb_gadget_disconnect needs to be called with connect_lock held. */
static int usb_gadget_disconnect_locked(struct usb_gadget *gadget)
__must_hold(&gadget->udc->connect_lock)
{
@@ -767,8 +768,6 @@ static int usb_gadget_disconnect_locked(struct usb_gadget *gadget)
/*
* If gadget is deactivated we only save new state.
* Gadget will stay disconnected after activation.
- *
- * udc should have been started before gadget being pulled down.
*/
gadget->connected = false;
goto out;
@@ -829,10 +828,10 @@ int usb_gadget_deactivate(struct usb_gadget *gadget)
{
int ret = 0;
+ mutex_lock(&gadget->udc->connect_lock);
if (gadget->deactivated)
- goto out;
+ goto unlock;
- mutex_lock(&gadget->udc->connect_lock);
if (gadget->connected) {
ret = usb_gadget_disconnect_locked(gadget);
if (ret)
@@ -848,7 +847,6 @@ int usb_gadget_deactivate(struct usb_gadget *gadget)
unlock:
mutex_unlock(&gadget->udc->connect_lock);
-out:
trace_usb_gadget_deactivate(gadget, ret);
return ret;
@@ -868,10 +866,10 @@ int usb_gadget_activate(struct usb_gadget *gadget)
{
int ret = 0;
+ mutex_lock(&gadget->udc->connect_lock);
if (!gadget->deactivated)
- goto out;
+ goto unlock;
- mutex_lock(&gadget->udc->connect_lock);
gadget->deactivated = false;
/*
@@ -882,7 +880,8 @@ int usb_gadget_activate(struct usb_gadget *gadget)
ret = usb_gadget_connect_locked(gadget);
mutex_unlock(&gadget->udc->connect_lock);
-out:
+unlock:
+ mutex_unlock(&gadget->udc->connect_lock);
trace_usb_gadget_activate(gadget, ret);
return ret;
@@ -1124,12 +1123,21 @@ EXPORT_SYMBOL_GPL(usb_gadget_set_state);
/* Acquire connect_lock before calling this function. */
static void usb_udc_connect_control_locked(struct usb_udc *udc) __must_hold(&udc->connect_lock)
{
- if (udc->vbus && udc->started)
+ if (udc->vbus)
usb_gadget_connect_locked(udc->gadget);
else
usb_gadget_disconnect_locked(udc->gadget);
}
+static void vbus_event_work(struct work_struct *work)
+{
+ struct usb_udc *udc = container_of(work, struct usb_udc, vbus_work);
+
+ mutex_lock(&udc->connect_lock);
+ usb_udc_connect_control_locked(udc);
+ mutex_unlock(&udc->connect_lock);
+}
+
/**
* usb_udc_vbus_handler - updates the udc core vbus status, and try to
* connect or disconnect gadget
@@ -1138,17 +1146,23 @@ static void usb_udc_connect_control_locked(struct usb_udc *udc) __must_hold(&udc
*
* The udc driver calls it when it wants to connect or disconnect gadget
* according to vbus status.
+ *
+ * This function can be invoked from interrupt context by irq handlers of
+ * the gadget drivers, however, usb_udc_connect_control() has to run in
+ * non-atomic context due to the following:
+ * a. Some of the gadget driver implementations expect the ->pullup
+ * callback to be invoked in non-atomic context.
+ * b. usb_gadget_disconnect() acquires udc_lock which is a mutex.
+ * Hence offload invocation of usb_udc_connect_control() to workqueue.
*/
void usb_udc_vbus_handler(struct usb_gadget *gadget, bool status)
{
struct usb_udc *udc = gadget->udc;
- mutex_lock(&udc->connect_lock);
if (udc) {
udc->vbus = status;
- usb_udc_connect_control_locked(udc);
+ schedule_work(&udc->vbus_work);
}
- mutex_unlock(&udc->connect_lock);
}
EXPORT_SYMBOL_GPL(usb_udc_vbus_handler);
@@ -1381,6 +1395,7 @@ int usb_add_gadget(struct usb_gadget *gadget)
mutex_lock(&udc_lock);
list_add_tail(&udc->list, &udc_list);
mutex_unlock(&udc_lock);
+ INIT_WORK(&udc->vbus_work, vbus_event_work);
ret = device_add(&udc->dev);
if (ret)
@@ -1512,6 +1527,7 @@ void usb_del_gadget(struct usb_gadget *gadget)
flush_work(&gadget->work);
device_del(&gadget->dev);
ida_free(&gadget_id_numbers, gadget->id_number);
+ cancel_work_sync(&udc->vbus_work);
device_unregister(&udc->dev);
}
EXPORT_SYMBOL_GPL(usb_del_gadget);
@@ -1583,6 +1599,7 @@ static int gadget_bind_driver(struct device *dev)
goto err_start;
}
usb_gadget_enable_async_callbacks(udc);
+ udc->allow_connect = true;
usb_udc_connect_control_locked(udc);
mutex_unlock(&udc->connect_lock);
@@ -1615,6 +1632,8 @@ static void gadget_unbind_driver(struct device *dev)
kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE);
+ udc->allow_connect = false;
+ cancel_work_sync(&udc->vbus_work);
mutex_lock(&udc->connect_lock);
usb_gadget_disconnect_locked(gadget);
usb_gadget_disable_async_callbacks(udc);
diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c
index aac8bc185afa..eb008e873361 100644
--- a/drivers/usb/gadget/udc/renesas_usb3.c
+++ b/drivers/usb/gadget/udc/renesas_usb3.c
@@ -2877,9 +2877,9 @@ static int renesas_usb3_probe(struct platform_device *pdev)
struct rzv2m_usb3drd *ddata = dev_get_drvdata(pdev->dev.parent);
usb3->drd_reg = ddata->reg;
- ret = devm_request_irq(ddata->dev, ddata->drd_irq,
+ ret = devm_request_irq(&pdev->dev, ddata->drd_irq,
renesas_usb3_otg_irq, 0,
- dev_name(ddata->dev), usb3);
+ dev_name(&pdev->dev), usb3);
if (ret < 0)
return ret;
}
diff --git a/drivers/usb/host/uhci-pci.c b/drivers/usb/host/uhci-pci.c
index 3592f757fe05..7bd2fddde770 100644
--- a/drivers/usb/host/uhci-pci.c
+++ b/drivers/usb/host/uhci-pci.c
@@ -119,11 +119,13 @@ static int uhci_pci_init(struct usb_hcd *hcd)
uhci->rh_numports = uhci_count_ports(hcd);
- /* Intel controllers report the OverCurrent bit active on.
- * VIA controllers report it active off, so we'll adjust the
- * bit value. (It's not standardized in the UHCI spec.)
+ /*
+ * Intel controllers report the OverCurrent bit active on. VIA
+ * and ZHAOXIN controllers report it active off, so we'll adjust
+ * the bit value. (It's not standardized in the UHCI spec.)
*/
- if (to_pci_dev(uhci_dev(uhci))->vendor == PCI_VENDOR_ID_VIA)
+ if (to_pci_dev(uhci_dev(uhci))->vendor == PCI_VENDOR_ID_VIA ||
+ to_pci_dev(uhci_dev(uhci))->vendor == PCI_VENDOR_ID_ZHAOXIN)
uhci->oc_low = 1;
/* HP's server management chip requires a longer port reset delay. */
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index ddb79f23fb3b..79b3691f373f 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/acpi.h>
#include <linux/reset.h>
+#include <linux/suspend.h>
#include "xhci.h"
#include "xhci-trace.h"
@@ -387,7 +388,7 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
if (pdev->vendor == PCI_VENDOR_ID_AMD &&
pdev->device == PCI_DEVICE_ID_AMD_RENOIR_XHCI)
- xhci->quirks |= XHCI_BROKEN_D3COLD;
+ xhci->quirks |= XHCI_BROKEN_D3COLD_S2I;
if (pdev->vendor == PCI_VENDOR_ID_INTEL) {
xhci->quirks |= XHCI_LPM_SUPPORT;
@@ -801,9 +802,16 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup)
* Systems with the TI redriver that loses port status change events
* need to have the registers polled during D3, so avoid D3cold.
*/
- if (xhci->quirks & (XHCI_COMP_MODE_QUIRK | XHCI_BROKEN_D3COLD))
+ if (xhci->quirks & XHCI_COMP_MODE_QUIRK)
pci_d3cold_disable(pdev);
+#ifdef CONFIG_SUSPEND
+ /* d3cold is broken, but only when s2idle is used */
+ if (pm_suspend_target_state == PM_SUSPEND_TO_IDLE &&
+ xhci->quirks & (XHCI_BROKEN_D3COLD_S2I))
+ pci_d3cold_disable(pdev);
+#endif
+
if (xhci->quirks & XHCI_PME_STUCK_QUIRK)
xhci_pme_quirk(hcd);
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 1ad12d5a4857..2bc82b3a2f98 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -276,6 +276,26 @@ static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring,
trace_xhci_inc_enq(ring);
}
+static int xhci_num_trbs_to(struct xhci_segment *start_seg, union xhci_trb *start,
+ struct xhci_segment *end_seg, union xhci_trb *end,
+ unsigned int num_segs)
+{
+ union xhci_trb *last_on_seg;
+ int num = 0;
+ int i = 0;
+
+ do {
+ if (start_seg == end_seg && end >= start)
+ return num + (end - start);
+ last_on_seg = &start_seg->trbs[TRBS_PER_SEGMENT - 1];
+ num += last_on_seg - start;
+ start_seg = start_seg->next;
+ start = start_seg->trbs;
+ } while (i++ <= num_segs);
+
+ return -EINVAL;
+}
+
/*
* Check to see if there's room to enqueue num_trbs on the ring and make sure
* enqueue pointer will not advance into dequeue segment. See rules above.
@@ -2140,6 +2160,7 @@ static int finish_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
u32 trb_comp_code)
{
struct xhci_ep_ctx *ep_ctx;
+ int trbs_freed;
ep_ctx = xhci_get_ep_ctx(xhci, ep->vdev->out_ctx, ep->ep_index);
@@ -2209,9 +2230,15 @@ static int finish_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
}
/* Update ring dequeue pointer */
+ trbs_freed = xhci_num_trbs_to(ep_ring->deq_seg, ep_ring->dequeue,
+ td->last_trb_seg, td->last_trb,
+ ep_ring->num_segs);
+ if (trbs_freed < 0)
+ xhci_dbg(xhci, "Failed to count freed trbs at TD finish\n");
+ else
+ ep_ring->num_trbs_free += trbs_freed;
ep_ring->dequeue = td->last_trb;
ep_ring->deq_seg = td->last_trb_seg;
- ep_ring->num_trbs_free += td->num_trbs - 1;
inc_deq(xhci, ep_ring);
return xhci_td_cleanup(xhci, td, ep_ring, td->status);
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 08d721921b7b..6b690ec91ff3 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1901,7 +1901,7 @@ struct xhci_hcd {
#define XHCI_DISABLE_SPARSE BIT_ULL(38)
#define XHCI_SG_TRB_CACHE_SIZE_QUIRK BIT_ULL(39)
#define XHCI_NO_SOFT_RETRY BIT_ULL(40)
-#define XHCI_BROKEN_D3COLD BIT_ULL(41)
+#define XHCI_BROKEN_D3COLD_S2I BIT_ULL(41)
#define XHCI_EP_CTX_BROKEN_DCS BIT_ULL(42)
#define XHCI_SUSPEND_RESUME_CLKS BIT_ULL(43)
#define XHCI_RESET_TO_DEFAULT BIT_ULL(44)
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 644a55447fd7..fd42e3a0bd18 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -248,6 +248,8 @@ static void option_instat_callback(struct urb *urb);
#define QUECTEL_VENDOR_ID 0x2c7c
/* These Quectel products use Quectel's vendor ID */
#define QUECTEL_PRODUCT_EC21 0x0121
+#define QUECTEL_PRODUCT_EM061K_LTA 0x0123
+#define QUECTEL_PRODUCT_EM061K_LMS 0x0124
#define QUECTEL_PRODUCT_EC25 0x0125
#define QUECTEL_PRODUCT_EG91 0x0191
#define QUECTEL_PRODUCT_EG95 0x0195
@@ -266,6 +268,8 @@ static void option_instat_callback(struct urb *urb);
#define QUECTEL_PRODUCT_RM520N 0x0801
#define QUECTEL_PRODUCT_EC200U 0x0901
#define QUECTEL_PRODUCT_EC200S_CN 0x6002
+#define QUECTEL_PRODUCT_EM061K_LWW 0x6008
+#define QUECTEL_PRODUCT_EM061K_LCN 0x6009
#define QUECTEL_PRODUCT_EC200T 0x6026
#define QUECTEL_PRODUCT_RM500K 0x7001
@@ -1189,6 +1193,18 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0x00, 0x40) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x30) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x30) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LMS, 0xff, 0xff, 0x30) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LMS, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LMS, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LTA, 0xff, 0xff, 0x30) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LTA, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LTA, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LWW, 0xff, 0xff, 0x30) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LWW, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LWW, 0xff, 0xff, 0x40) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0xff, 0xff),
.driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0, 0) },
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index 8931df5a85fd..c54e9805da53 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -406,22 +406,25 @@ static DEF_SCSI_QCMD(queuecommand)
***********************************************************************/
/* Command timeout and abort */
-static int command_abort(struct scsi_cmnd *srb)
+static int command_abort_matching(struct us_data *us, struct scsi_cmnd *srb_match)
{
- struct us_data *us = host_to_us(srb->device->host);
-
- usb_stor_dbg(us, "%s called\n", __func__);
-
/*
* us->srb together with the TIMED_OUT, RESETTING, and ABORTING
* bits are protected by the host lock.
*/
scsi_lock(us_to_host(us));
- /* Is this command still active? */
- if (us->srb != srb) {
+ /* is there any active pending command to abort ? */
+ if (!us->srb) {
scsi_unlock(us_to_host(us));
usb_stor_dbg(us, "-- nothing to abort\n");
+ return SUCCESS;
+ }
+
+ /* Does the command match the passed srb if any ? */
+ if (srb_match && us->srb != srb_match) {
+ scsi_unlock(us_to_host(us));
+ usb_stor_dbg(us, "-- pending command mismatch\n");
return FAILED;
}
@@ -444,6 +447,14 @@ static int command_abort(struct scsi_cmnd *srb)
return SUCCESS;
}
+static int command_abort(struct scsi_cmnd *srb)
+{
+ struct us_data *us = host_to_us(srb->device->host);
+
+ usb_stor_dbg(us, "%s called\n", __func__);
+ return command_abort_matching(us, srb);
+}
+
/*
* This invokes the transport reset mechanism to reset the state of the
* device
@@ -455,6 +466,9 @@ static int device_reset(struct scsi_cmnd *srb)
usb_stor_dbg(us, "%s called\n", __func__);
+ /* abort any pending command before reset */
+ command_abort_matching(us, NULL);
+
/* lock the device pointers and do the reset */
mutex_lock(&(us->dev_mutex));
result = us->transport_reset(us);
diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c
index 8f3e884222ad..66de880b28d0 100644
--- a/drivers/usb/typec/altmodes/displayport.c
+++ b/drivers/usb/typec/altmodes/displayport.c
@@ -516,6 +516,10 @@ static ssize_t pin_assignment_show(struct device *dev,
mutex_unlock(&dp->lock);
+ /* get_current_pin_assignments can return 0 when no matching pin assignments are found */
+ if (len == 0)
+ len++;
+
buf[len - 1] = '\n';
return len;
}
diff --git a/drivers/usb/typec/pd.c b/drivers/usb/typec/pd.c
index 0bcde1ff4d39..8cc66e4467c4 100644
--- a/drivers/usb/typec/pd.c
+++ b/drivers/usb/typec/pd.c
@@ -95,7 +95,7 @@ peak_current_show(struct device *dev, struct device_attribute *attr, char *buf)
static ssize_t
fast_role_swap_current_show(struct device *dev, struct device_attribute *attr, char *buf)
{
- return sysfs_emit(buf, "%u\n", to_pdo(dev)->pdo >> PDO_FIXED_FRS_CURR_SHIFT) & 3;
+ return sysfs_emit(buf, "%u\n", (to_pdo(dev)->pdo >> PDO_FIXED_FRS_CURR_SHIFT) & 3);
}
static DEVICE_ATTR_RO(fast_role_swap_current);
diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c
index 8b075ca82ef6..603dbd44deba 100644
--- a/drivers/usb/typec/tipd/core.c
+++ b/drivers/usb/typec/tipd/core.c
@@ -886,6 +886,9 @@ static void tps6598x_remove(struct i2c_client *client)
{
struct tps6598x *tps = i2c_get_clientdata(client);
+ if (!client->irq)
+ cancel_delayed_work_sync(&tps->wq_poll);
+
tps6598x_disconnect(tps, 0);
typec_unregister_port(tps->port);
usb_role_switch_put(tps->role_sw);
@@ -917,7 +920,7 @@ static int __maybe_unused tps6598x_resume(struct device *dev)
enable_irq(client->irq);
}
- if (client->irq)
+ if (!client->irq)
queue_delayed_work(system_power_efficient_wq, &tps->wq_poll,
msecs_to_jiffies(POLL_INTERVAL));
diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index 2b472ec01dc4..b664ecbb798b 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -132,10 +132,8 @@ static int ucsi_exec_command(struct ucsi *ucsi, u64 cmd)
if (ret)
return ret;
- if (cci & UCSI_CCI_BUSY) {
- ucsi->ops->async_write(ucsi, UCSI_CANCEL, NULL, 0);
- return -EBUSY;
- }
+ if (cmd != UCSI_CANCEL && cci & UCSI_CCI_BUSY)
+ return ucsi_exec_command(ucsi, UCSI_CANCEL);
if (!(cci & UCSI_CCI_COMMAND_COMPLETE))
return -EIO;
@@ -149,6 +147,11 @@ static int ucsi_exec_command(struct ucsi *ucsi, u64 cmd)
return ucsi_read_error(ucsi);
}
+ if (cmd == UCSI_CANCEL && cci & UCSI_CCI_CANCEL_COMPLETE) {
+ ret = ucsi_acknowledge_command(ucsi);
+ return ret ? ret : -EBUSY;
+ }
+
return UCSI_CCI_LENGTH(cci);
}
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index e29e32b306ad..279ac6a558d2 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -3349,10 +3349,10 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
mlx5_vdpa_remove_debugfs(ndev->debugfs);
ndev->debugfs = NULL;
unregister_link_notifier(ndev);
+ _vdpa_unregister_device(dev);
wq = mvdev->wq;
mvdev->wq = NULL;
destroy_workqueue(wq);
- _vdpa_unregister_device(dev);
mgtdev->ndev = NULL;
}
diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index de97e38c3b82..5f5c21674fdc 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -1685,6 +1685,9 @@ static bool vduse_validate_config(struct vduse_dev_config *config)
if (config->vq_num > 0xffff)
return false;
+ if (!config->name[0])
+ return false;
+
if (!device_is_allowed(config->device_id))
return false;
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 3d4dd9420c30..0d2f805468e1 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -860,6 +860,11 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
if (ret)
goto pin_unwind;
+ if (!pfn_valid(phys_pfn)) {
+ ret = -EINVAL;
+ goto pin_unwind;
+ }
+
ret = vfio_add_to_pfn_list(dma, iova, phys_pfn);
if (ret) {
if (put_pfn(phys_pfn, dma->prot) && do_accounting)
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 07181cd8d52e..ae2273196b0c 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -935,13 +935,18 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
err = sock->ops->sendmsg(sock, &msg, len);
if (unlikely(err < 0)) {
+ bool retry = err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS;
+
if (zcopy_used) {
if (vq->heads[ubuf->desc].len == VHOST_DMA_IN_PROGRESS)
vhost_net_ubuf_put(ubufs);
- nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
- % UIO_MAXIOV;
+ if (retry)
+ nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
+ % UIO_MAXIOV;
+ else
+ vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN;
}
- if (err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS) {
+ if (retry) {
vhost_discard_vq_desc(vq, 1);
vhost_net_enable_vq(net, vq);
break;
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 8c1aefc865f0..bf77924d5b60 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -407,7 +407,10 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
+ struct vhost_dev *d = &v->vdev;
+ u64 actual_features;
u64 features;
+ int i;
/*
* It's not allowed to change the features after they have
@@ -422,6 +425,16 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
if (vdpa_set_features(vdpa, features))
return -EINVAL;
+ /* let the vqs know what has been configured */
+ actual_features = ops->get_driver_features(vdpa);
+ for (i = 0; i < d->nvqs; ++i) {
+ struct vhost_virtqueue *vq = d->vqs[i];
+
+ mutex_lock(&vq->mutex);
+ vq->acked_features = actual_features;
+ mutex_unlock(&vq->mutex);
+ }
+
return 0;
}
@@ -594,7 +607,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
if (r)
return r;
- vq->last_avail_idx = vq_state.split.avail_index;
+ if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
+ vq->last_avail_idx = vq_state.packed.last_avail_idx |
+ (vq_state.packed.last_avail_counter << 15);
+ vq->last_used_idx = vq_state.packed.last_used_idx |
+ (vq_state.packed.last_used_counter << 15);
+ } else {
+ vq->last_avail_idx = vq_state.split.avail_index;
+ }
break;
}
@@ -612,9 +632,15 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
break;
case VHOST_SET_VRING_BASE:
- vq_state.split.avail_index = vq->last_avail_idx;
- if (ops->set_vq_state(vdpa, idx, &vq_state))
- r = -EINVAL;
+ if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
+ vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff;
+ vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000);
+ vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff;
+ vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000);
+ } else {
+ vq_state.split.avail_index = vq->last_avail_idx;
+ }
+ r = ops->set_vq_state(vdpa, idx, &vq_state);
break;
case VHOST_SET_VRING_CALL:
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index a92af08e7864..60c9ebd629dd 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -235,7 +235,7 @@ void vhost_dev_flush(struct vhost_dev *dev)
{
struct vhost_flush_struct flush;
- if (dev->worker) {
+ if (dev->worker.vtsk) {
init_completion(&flush.wait_event);
vhost_work_init(&flush.work, vhost_flush_work);
@@ -247,7 +247,7 @@ EXPORT_SYMBOL_GPL(vhost_dev_flush);
void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
{
- if (!dev->worker)
+ if (!dev->worker.vtsk)
return;
if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) {
@@ -255,8 +255,8 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
* sure it was not in the list.
* test_and_set_bit() implies a memory barrier.
*/
- llist_add(&work->node, &dev->worker->work_list);
- wake_up_process(dev->worker->vtsk->task);
+ llist_add(&work->node, &dev->worker.work_list);
+ vhost_task_wake(dev->worker.vtsk);
}
}
EXPORT_SYMBOL_GPL(vhost_work_queue);
@@ -264,7 +264,7 @@ EXPORT_SYMBOL_GPL(vhost_work_queue);
/* A lockless hint for busy polling code to exit the loop */
bool vhost_has_work(struct vhost_dev *dev)
{
- return dev->worker && !llist_empty(&dev->worker->work_list);
+ return !llist_empty(&dev->worker.work_list);
}
EXPORT_SYMBOL_GPL(vhost_has_work);
@@ -333,31 +333,21 @@ static void vhost_vq_reset(struct vhost_dev *dev,
__vhost_vq_meta_reset(vq);
}
-static int vhost_worker(void *data)
+static bool vhost_worker(void *data)
{
struct vhost_worker *worker = data;
struct vhost_work *work, *work_next;
struct llist_node *node;
- for (;;) {
- /* mb paired w/ kthread_stop */
- set_current_state(TASK_INTERRUPTIBLE);
-
- if (vhost_task_should_stop(worker->vtsk)) {
- __set_current_state(TASK_RUNNING);
- break;
- }
-
- node = llist_del_all(&worker->work_list);
- if (!node)
- schedule();
+ node = llist_del_all(&worker->work_list);
+ if (node) {
+ __set_current_state(TASK_RUNNING);
node = llist_reverse_order(node);
/* make sure flag is seen after deletion */
smp_wmb();
llist_for_each_entry_safe(work, work_next, node, node) {
clear_bit(VHOST_WORK_QUEUED, &work->flags);
- __set_current_state(TASK_RUNNING);
kcov_remote_start_common(worker->kcov_handle);
work->fn(work);
kcov_remote_stop();
@@ -365,7 +355,7 @@ static int vhost_worker(void *data)
}
}
- return 0;
+ return !!node;
}
static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
@@ -468,7 +458,8 @@ void vhost_dev_init(struct vhost_dev *dev,
dev->umem = NULL;
dev->iotlb = NULL;
dev->mm = NULL;
- dev->worker = NULL;
+ memset(&dev->worker, 0, sizeof(dev->worker));
+ init_llist_head(&dev->worker.work_list);
dev->iov_limit = iov_limit;
dev->weight = weight;
dev->byte_weight = byte_weight;
@@ -542,47 +533,30 @@ static void vhost_detach_mm(struct vhost_dev *dev)
static void vhost_worker_free(struct vhost_dev *dev)
{
- struct vhost_worker *worker = dev->worker;
-
- if (!worker)
+ if (!dev->worker.vtsk)
return;
- dev->worker = NULL;
- WARN_ON(!llist_empty(&worker->work_list));
- vhost_task_stop(worker->vtsk);
- kfree(worker);
+ WARN_ON(!llist_empty(&dev->worker.work_list));
+ vhost_task_stop(dev->worker.vtsk);
+ dev->worker.kcov_handle = 0;
+ dev->worker.vtsk = NULL;
}
static int vhost_worker_create(struct vhost_dev *dev)
{
- struct vhost_worker *worker;
struct vhost_task *vtsk;
char name[TASK_COMM_LEN];
- int ret;
-
- worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
- if (!worker)
- return -ENOMEM;
- dev->worker = worker;
- worker->kcov_handle = kcov_common_handle();
- init_llist_head(&worker->work_list);
snprintf(name, sizeof(name), "vhost-%d", current->pid);
- vtsk = vhost_task_create(vhost_worker, worker, name);
- if (!vtsk) {
- ret = -ENOMEM;
- goto free_worker;
- }
+ vtsk = vhost_task_create(vhost_worker, &dev->worker, name);
+ if (!vtsk)
+ return -ENOMEM;
- worker->vtsk = vtsk;
+ dev->worker.kcov_handle = kcov_common_handle();
+ dev->worker.vtsk = vtsk;
vhost_task_start(vtsk);
return 0;
-
-free_worker:
- kfree(worker);
- dev->worker = NULL;
- return ret;
}
/* Caller should have device mutex */
@@ -1626,17 +1600,25 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
r = -EFAULT;
break;
}
- if (s.num > 0xffff) {
- r = -EINVAL;
- break;
+ if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
+ vq->last_avail_idx = s.num & 0xffff;
+ vq->last_used_idx = (s.num >> 16) & 0xffff;
+ } else {
+ if (s.num > 0xffff) {
+ r = -EINVAL;
+ break;
+ }
+ vq->last_avail_idx = s.num;
}
- vq->last_avail_idx = s.num;
/* Forget the cached index value. */
vq->avail_idx = vq->last_avail_idx;
break;
case VHOST_GET_VRING_BASE:
s.index = idx;
- s.num = vq->last_avail_idx;
+ if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED))
+ s.num = (u32)vq->last_avail_idx | ((u32)vq->last_used_idx << 16);
+ else
+ s.num = vq->last_avail_idx;
if (copy_to_user(argp, &s, sizeof s))
r = -EFAULT;
break;
@@ -2575,12 +2557,11 @@ EXPORT_SYMBOL_GPL(vhost_disable_notify);
/* Create a new message. */
struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type)
{
- struct vhost_msg_node *node = kmalloc(sizeof *node, GFP_KERNEL);
+ /* Make sure all padding within the structure is initialized. */
+ struct vhost_msg_node *node = kzalloc(sizeof(*node), GFP_KERNEL);
if (!node)
return NULL;
- /* Make sure all padding within the structure is initialized. */
- memset(&node->msg, 0, sizeof node->msg);
node->vq = vq;
node->msg.type = type;
return node;
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 0308638cdeee..fc900be504b3 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -92,13 +92,17 @@ struct vhost_virtqueue {
/* The routine to call when the Guest pings us, or timeout. */
vhost_work_fn_t handle_kick;
- /* Last available index we saw. */
+ /* Last available index we saw.
+ * Values are limited to 0x7fff, and the high bit is used as
+ * a wrap counter when using VIRTIO_F_RING_PACKED. */
u16 last_avail_idx;
/* Caches available index value from user. */
u16 avail_idx;
- /* Last index we used. */
+ /* Last index we used.
+ * Values are limited to 0x7fff, and the high bit is used as
+ * a wrap counter when using VIRTIO_F_RING_PACKED. */
u16 last_used_idx;
/* Used flags */
@@ -154,7 +158,7 @@ struct vhost_dev {
struct vhost_virtqueue **vqs;
int nvqs;
struct eventfd_ctx *log_ctx;
- struct vhost_worker *worker;
+ struct vhost_worker worker;
struct vhost_iotlb *umem;
struct vhost_iotlb *iotlb;
spinlock_t iotlb_lock;
diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 96e91570cdd3..0fdf5f46802c 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -124,7 +124,7 @@ config FB_PROVIDE_GET_FB_UNMAPPED_AREA
depends on FB
help
Allow generic frame-buffer to provide get_fb_unmapped_area
- function.
+ function to provide shareable character device support on nommu.
menuconfig FB_FOREIGN_ENDIAN
bool "Framebuffer foreign endianness support"
diff --git a/drivers/video/fbdev/arcfb.c b/drivers/video/fbdev/arcfb.c
index 024d0ee4f04f..08d15e408413 100644
--- a/drivers/video/fbdev/arcfb.c
+++ b/drivers/video/fbdev/arcfb.c
@@ -590,7 +590,7 @@ err_fb_alloc:
return retval;
}
-static int arcfb_remove(struct platform_device *dev)
+static void arcfb_remove(struct platform_device *dev)
{
struct fb_info *info = platform_get_drvdata(dev);
@@ -601,12 +601,11 @@ static int arcfb_remove(struct platform_device *dev)
vfree((void __force *)info->screen_base);
framebuffer_release(info);
}
- return 0;
}
static struct platform_driver arcfb_driver = {
.probe = arcfb_probe,
- .remove = arcfb_remove,
+ .remove_new = arcfb_remove,
.driver = {
.name = "arcfb",
},
diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c
index b02e4e645035..cba2b113b28b 100644
--- a/drivers/video/fbdev/aty/atyfb_base.c
+++ b/drivers/video/fbdev/aty/atyfb_base.c
@@ -3498,11 +3498,6 @@ static int atyfb_setup_generic(struct pci_dev *pdev, struct fb_info *info,
if (ret)
goto atyfb_setup_generic_fail;
#endif
- if (!(aty_ld_le32(CRTC_GEN_CNTL, par) & CRTC_EXT_DISP_EN))
- par->clk_wr_offset = (inb(R_GENMO) & 0x0CU) >> 2;
- else
- par->clk_wr_offset = aty_ld_8(CLOCK_CNTL, par) & 0x03U;
-
/* according to ATI, we should use clock 3 for acelerated mode */
par->clk_wr_offset = 3;
diff --git a/drivers/video/fbdev/au1100fb.c b/drivers/video/fbdev/au1100fb.c
index 519313b8bb00..648d6cac86e8 100644
--- a/drivers/video/fbdev/au1100fb.c
+++ b/drivers/video/fbdev/au1100fb.c
@@ -520,13 +520,10 @@ failed:
return -ENODEV;
}
-int au1100fb_drv_remove(struct platform_device *dev)
+void au1100fb_drv_remove(struct platform_device *dev)
{
struct au1100fb_device *fbdev = NULL;
- if (!dev)
- return -ENODEV;
-
fbdev = platform_get_drvdata(dev);
#if !defined(CONFIG_FRAMEBUFFER_CONSOLE) && defined(CONFIG_LOGO)
@@ -543,8 +540,6 @@ int au1100fb_drv_remove(struct platform_device *dev)
clk_disable_unprepare(fbdev->lcdclk);
clk_put(fbdev->lcdclk);
}
-
- return 0;
}
#ifdef CONFIG_PM
@@ -593,9 +588,9 @@ static struct platform_driver au1100fb_driver = {
.name = "au1100-lcd",
},
.probe = au1100fb_drv_probe,
- .remove = au1100fb_drv_remove,
+ .remove_new = au1100fb_drv_remove,
.suspend = au1100fb_drv_suspend,
- .resume = au1100fb_drv_resume,
+ .resume = au1100fb_drv_resume,
};
module_platform_driver(au1100fb_driver);
diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c
index b6b22fa4a8a0..aed88ce45bf0 100644
--- a/drivers/video/fbdev/au1200fb.c
+++ b/drivers/video/fbdev/au1200fb.c
@@ -1765,7 +1765,7 @@ failed:
return ret;
}
-static int au1200fb_drv_remove(struct platform_device *dev)
+static void au1200fb_drv_remove(struct platform_device *dev)
{
struct au1200fb_platdata *pd = platform_get_drvdata(dev);
struct fb_info *fbi;
@@ -1788,8 +1788,6 @@ static int au1200fb_drv_remove(struct platform_device *dev)
}
free_irq(platform_get_irq(dev, 0), (void *)dev);
-
- return 0;
}
#ifdef CONFIG_PM
@@ -1840,7 +1838,7 @@ static struct platform_driver au1200fb_driver = {
.pm = AU1200FB_PMOPS,
},
.probe = au1200fb_drv_probe,
- .remove = au1200fb_drv_remove,
+ .remove_new = au1200fb_drv_remove,
};
module_platform_driver(au1200fb_driver);
diff --git a/drivers/video/fbdev/broadsheetfb.c b/drivers/video/fbdev/broadsheetfb.c
index 55e62dd96f9b..b518cacbf7cd 100644
--- a/drivers/video/fbdev/broadsheetfb.c
+++ b/drivers/video/fbdev/broadsheetfb.c
@@ -1193,7 +1193,7 @@ err:
}
-static int broadsheetfb_remove(struct platform_device *dev)
+static void broadsheetfb_remove(struct platform_device *dev)
{
struct fb_info *info = platform_get_drvdata(dev);
@@ -1209,12 +1209,11 @@ static int broadsheetfb_remove(struct platform_device *dev)
module_put(par->board->owner);
framebuffer_release(info);
}
- return 0;
}
static struct platform_driver broadsheetfb_driver = {
.probe = broadsheetfb_probe,
- .remove = broadsheetfb_remove,
+ .remove_new = broadsheetfb_remove,
.driver = {
.name = "broadsheetfb",
},
diff --git a/drivers/video/fbdev/bw2.c b/drivers/video/fbdev/bw2.c
index 9cbadcd18b25..025d663dc6fd 100644
--- a/drivers/video/fbdev/bw2.c
+++ b/drivers/video/fbdev/bw2.c
@@ -352,7 +352,7 @@ out_err:
return err;
}
-static int bw2_remove(struct platform_device *op)
+static void bw2_remove(struct platform_device *op)
{
struct fb_info *info = dev_get_drvdata(&op->dev);
struct bw2_par *par = info->par;
@@ -363,8 +363,6 @@ static int bw2_remove(struct platform_device *op)
of_iounmap(&op->resource[0], info->screen_base, info->fix.smem_len);
framebuffer_release(info);
-
- return 0;
}
static const struct of_device_id bw2_match[] = {
@@ -381,7 +379,7 @@ static struct platform_driver bw2_driver = {
.of_match_table = bw2_match,
},
.probe = bw2_probe,
- .remove = bw2_remove,
+ .remove_new = bw2_remove,
};
static int __init bw2_init(void)
diff --git a/drivers/video/fbdev/core/bitblit.c b/drivers/video/fbdev/core/bitblit.c
index f98e8f298bc1..8587c9da0670 100644
--- a/drivers/video/fbdev/core/bitblit.c
+++ b/drivers/video/fbdev/core/bitblit.c
@@ -247,6 +247,9 @@ static void bit_cursor(struct vc_data *vc, struct fb_info *info, int mode,
cursor.set = 0;
+ if (!vc->vc_font.data)
+ return;
+
c = scr_readw((u16 *) vc->vc_pos);
attribute = get_attribute(info, c);
src = vc->vc_font.data + ((c & charmask) * (w * vc->vc_font.height));
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index e808dc86001c..28739f1cb5e7 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -1468,7 +1468,7 @@ __releases(&info->lock)
}
#if defined(CONFIG_FB_PROVIDE_GET_FB_UNMAPPED_AREA) && !defined(CONFIG_MMU)
-unsigned long get_fb_unmapped_area(struct file *filp,
+static unsigned long get_fb_unmapped_area(struct file *filp,
unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags)
{
diff --git a/drivers/video/fbdev/i810/i810_dvt.c b/drivers/video/fbdev/i810/i810_dvt.c
index b4b3670667ab..2082b5c92e8f 100644
--- a/drivers/video/fbdev/i810/i810_dvt.c
+++ b/drivers/video/fbdev/i810/i810_dvt.c
@@ -14,6 +14,7 @@
#include "i810_regs.h"
#include "i810.h"
+#include "i810_main.h"
struct mode_registers std_modes[] = {
/* 640x480 @ 60Hz */
@@ -276,7 +277,7 @@ void i810fb_fill_var_timings(struct fb_var_screeninfo *var)
var->upper_margin = total - (yres + var->lower_margin + var->vsync_len);
}
-u32 i810_get_watermark(struct fb_var_screeninfo *var,
+u32 i810_get_watermark(const struct fb_var_screeninfo *var,
struct i810fb_par *par)
{
struct mode_registers *params = &par->regs;
diff --git a/drivers/video/fbdev/imsttfb.c b/drivers/video/fbdev/imsttfb.c
index 975dd682fae4..ee7d01ad1406 100644
--- a/drivers/video/fbdev/imsttfb.c
+++ b/drivers/video/fbdev/imsttfb.c
@@ -1452,9 +1452,13 @@ static int init_imstt(struct fb_info *info)
FBINFO_HWACCEL_FILLRECT |
FBINFO_HWACCEL_YPAN;
- fb_alloc_cmap(&info->cmap, 0, 0);
+ if (fb_alloc_cmap(&info->cmap, 0, 0)) {
+ framebuffer_release(info);
+ return -ENODEV;
+ }
if (register_framebuffer(info) < 0) {
+ fb_dealloc_cmap(&info->cmap);
framebuffer_release(info);
return -ENODEV;
}
@@ -1531,8 +1535,10 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto error;
info->pseudo_palette = par->palette;
ret = init_imstt(info);
- if (!ret)
- pci_set_drvdata(pdev, info);
+ if (ret)
+ goto error;
+
+ pci_set_drvdata(pdev, info);
return ret;
error:
diff --git a/drivers/video/fbdev/matrox/matroxfb_maven.c b/drivers/video/fbdev/matrox/matroxfb_maven.c
index 727a10a59811..b15a8ad92ba7 100644
--- a/drivers/video/fbdev/matrox/matroxfb_maven.c
+++ b/drivers/video/fbdev/matrox/matroxfb_maven.c
@@ -1291,7 +1291,7 @@ static struct i2c_driver maven_driver={
.driver = {
.name = "maven",
},
- .probe_new = maven_probe,
+ .probe = maven_probe,
.remove = maven_remove,
.id_table = maven_id,
};
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c
index 1eaa35c27835..477789cff8e0 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c
@@ -491,7 +491,8 @@ static int tpo_td043_probe(struct spi_device *spi)
ddata->vcc_reg = devm_regulator_get(&spi->dev, "vcc");
if (IS_ERR(ddata->vcc_reg)) {
- r = dev_err_probe(&spi->dev, r, "failed to get LCD VCC regulator\n");
+ r = dev_err_probe(&spi->dev, PTR_ERR(ddata->vcc_reg),
+ "failed to get LCD VCC regulator\n");
goto err_regulator;
}
diff --git a/drivers/video/fbdev/ssd1307fb.c b/drivers/video/fbdev/ssd1307fb.c
index 046b9990d27c..132d1a205011 100644
--- a/drivers/video/fbdev/ssd1307fb.c
+++ b/drivers/video/fbdev/ssd1307fb.c
@@ -844,7 +844,7 @@ static const struct i2c_device_id ssd1307fb_i2c_id[] = {
MODULE_DEVICE_TABLE(i2c, ssd1307fb_i2c_id);
static struct i2c_driver ssd1307fb_driver = {
- .probe_new = ssd1307fb_probe,
+ .probe = ssd1307fb_probe,
.remove = ssd1307fb_remove,
.id_table = ssd1307fb_i2c_id,
.driver = {
diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c
index 14c9215284c5..686a234f3899 100644
--- a/drivers/video/fbdev/stifb.c
+++ b/drivers/video/fbdev/stifb.c
@@ -741,7 +741,7 @@ ngleClearOverlayPlanes(struct stifb_info *fb, int mask, int data)
packed_len = (fb->info.var.xres << 16) | fb->info.var.yres;
NGLE_SET_DSTXY(fb, packed_dst);
- /* Write zeroes to overlay planes */
+ /* Write zeroes to overlay planes */
NGLE_QUICK_SET_IMAGE_BITMAP_OP(fb,
IBOvals(RopSrc, MaskAddrOffset(0),
BitmapExtent08, StaticReg(0),
@@ -1297,14 +1297,14 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref)
break;
default:
#ifdef FALLBACK_TO_1BPP
- printk(KERN_WARNING
+ printk(KERN_WARNING
"stifb: Unsupported graphics card (id=0x%08x) "
"- now trying 1bpp mode instead\n",
fb->id);
bpp = 1; /* default to 1 bpp */
break;
#else
- printk(KERN_WARNING
+ printk(KERN_WARNING
"stifb: Unsupported graphics card (id=0x%08x) "
"- skipping.\n",
fb->id);
diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c
index 216d49c9d47e..dabc30a09f96 100644
--- a/drivers/video/fbdev/udlfb.c
+++ b/drivers/video/fbdev/udlfb.c
@@ -27,6 +27,8 @@
#include <video/udlfb.h>
#include "edid.h"
+#define OUT_EP_NUM 1 /* The endpoint number we will use */
+
static const struct fb_fix_screeninfo dlfb_fix = {
.id = "udlfb",
.type = FB_TYPE_PACKED_PIXELS,
@@ -1541,24 +1543,16 @@ static const struct device_attribute fb_device_attrs[] = {
static int dlfb_select_std_channel(struct dlfb_data *dlfb)
{
int ret;
- void *buf;
static const u8 set_def_chn[] = {
0x57, 0xCD, 0xDC, 0xA7,
0x1C, 0x88, 0x5E, 0x15,
0x60, 0xFE, 0xC6, 0x97,
0x16, 0x3D, 0x47, 0xF2 };
- buf = kmemdup(set_def_chn, sizeof(set_def_chn), GFP_KERNEL);
-
- if (!buf)
- return -ENOMEM;
-
- ret = usb_control_msg(dlfb->udev, usb_sndctrlpipe(dlfb->udev, 0),
- NR_USB_REQUEST_CHANNEL,
+ ret = usb_control_msg_send(dlfb->udev, 0, NR_USB_REQUEST_CHANNEL,
(USB_DIR_OUT | USB_TYPE_VENDOR), 0, 0,
- buf, sizeof(set_def_chn), USB_CTRL_SET_TIMEOUT);
-
- kfree(buf);
+ &set_def_chn, sizeof(set_def_chn), USB_CTRL_SET_TIMEOUT,
+ GFP_KERNEL);
return ret;
}
@@ -1652,7 +1646,7 @@ static int dlfb_usb_probe(struct usb_interface *intf,
struct fb_info *info;
int retval;
struct usb_device *usbdev = interface_to_usbdev(intf);
- struct usb_endpoint_descriptor *out;
+ static u8 out_ep[] = {OUT_EP_NUM + USB_DIR_OUT, 0};
/* usb initialization */
dlfb = kzalloc(sizeof(*dlfb), GFP_KERNEL);
@@ -1666,9 +1660,9 @@ static int dlfb_usb_probe(struct usb_interface *intf,
dlfb->udev = usb_get_dev(usbdev);
usb_set_intfdata(intf, dlfb);
- retval = usb_find_common_endpoints(intf->cur_altsetting, NULL, &out, NULL, NULL);
- if (retval) {
- dev_err(&intf->dev, "Device should have at lease 1 bulk endpoint!\n");
+ if (!usb_check_bulk_endpoints(intf, out_ep)) {
+ dev_err(&intf->dev, "Invalid DisplayLink device!\n");
+ retval = -EINVAL;
goto error;
}
@@ -1927,7 +1921,8 @@ retry:
}
/* urb->transfer_buffer_length set to actual before submit */
- usb_fill_bulk_urb(urb, dlfb->udev, usb_sndbulkpipe(dlfb->udev, 1),
+ usb_fill_bulk_urb(urb, dlfb->udev,
+ usb_sndbulkpipe(dlfb->udev, OUT_EP_NUM),
buf, size, dlfb_urb_completion, unode);
urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
index 1f5219e12cc3..7beaf2c41fbb 100644
--- a/drivers/xen/pvcalls-back.c
+++ b/drivers/xen/pvcalls-back.c
@@ -325,8 +325,10 @@ static struct sock_mapping *pvcalls_new_active_socket(
void *page;
map = kzalloc(sizeof(*map), GFP_KERNEL);
- if (map == NULL)
+ if (map == NULL) {
+ sock_release(sock);
return NULL;
+ }
map->fedata = fedata;
map->sock = sock;
@@ -418,10 +420,8 @@ static int pvcalls_back_connect(struct xenbus_device *dev,
req->u.connect.ref,
req->u.connect.evtchn,
sock);
- if (!map) {
+ if (!map)
ret = -EFAULT;
- sock_release(sock);
- }
out:
rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++);
@@ -561,7 +561,6 @@ static void __pvcalls_back_accept(struct work_struct *work)
sock);
if (!map) {
ret = -EFAULT;
- sock_release(sock);
goto out_error;
}
diff --git a/fs/Kconfig b/fs/Kconfig
index cc07a0cd3172..18d034ec7953 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -368,14 +368,7 @@ config NFS_V4_2_SSC_HELPER
source "net/sunrpc/Kconfig"
source "fs/ceph/Kconfig"
-source "fs/cifs/Kconfig"
-source "fs/ksmbd/Kconfig"
-
-config SMBFS_COMMON
- tristate
- default y if CIFS=y || SMB_SERVER=y
- default m if CIFS=m || SMB_SERVER=m
-
+source "fs/smb/Kconfig"
source "fs/coda/Kconfig"
source "fs/afs/Kconfig"
source "fs/9p/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index 4709eba1303c..e513aaee0603 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -89,9 +89,7 @@ obj-$(CONFIG_LOCKD) += lockd/
obj-$(CONFIG_NLS) += nls/
obj-y += unicode/
obj-$(CONFIG_SYSV_FS) += sysv/
-obj-$(CONFIG_SMBFS_COMMON) += smbfs_common/
-obj-$(CONFIG_CIFS) += cifs/
-obj-$(CONFIG_SMB_SERVER) += ksmbd/
+obj-$(CONFIG_SMBFS) += smb/
obj-$(CONFIG_HPFS_FS) += hpfs/
obj-$(CONFIG_NTFS_FS) += ntfs/
obj-$(CONFIG_NTFS3_FS) += ntfs3/
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 4dd97afa536c..5219182e52e1 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -1358,6 +1358,7 @@ static int afs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
op->dentry = dentry;
op->create.mode = S_IFDIR | mode;
op->create.reason = afs_edit_dir_for_mkdir;
+ op->mtime = current_time(dir);
op->ops = &afs_mkdir_operation;
return afs_do_sync_operation(op);
}
@@ -1661,6 +1662,7 @@ static int afs_create(struct mnt_idmap *idmap, struct inode *dir,
op->dentry = dentry;
op->create.mode = S_IFREG | mode;
op->create.reason = afs_edit_dir_for_create;
+ op->mtime = current_time(dir);
op->ops = &afs_create_operation;
return afs_do_sync_operation(op);
@@ -1796,6 +1798,7 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
op->ops = &afs_symlink_operation;
op->create.reason = afs_edit_dir_for_symlink;
op->create.symlink = content;
+ op->mtime = current_time(dir);
return afs_do_sync_operation(op);
error:
diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c
index d1c7068b4346..58452b86e672 100644
--- a/fs/afs/vl_probe.c
+++ b/fs/afs/vl_probe.c
@@ -115,8 +115,8 @@ responded:
}
}
- if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) &&
- rtt_us < server->probe.rtt) {
+ rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us);
+ if (rtt_us < server->probe.rtt) {
server->probe.rtt = rtt_us;
server->rtt = rtt_us;
alist->preferred = index;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index c822d6006033..8750b99c3f56 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -731,6 +731,7 @@ static int afs_writepages_region(struct address_space *mapping,
* (changing page->mapping to NULL), or even swizzled
* back from swapper_space to tmpfs file mapping
*/
+try_again:
if (wbc->sync_mode != WB_SYNC_NONE) {
ret = folio_lock_killable(folio);
if (ret < 0) {
@@ -757,12 +758,14 @@ static int afs_writepages_region(struct address_space *mapping,
#ifdef CONFIG_AFS_FSCACHE
folio_wait_fscache(folio);
#endif
- } else {
- start += folio_size(folio);
+ goto try_again;
}
+
+ start += folio_size(folio);
if (wbc->sync_mode == WB_SYNC_NONE) {
if (skips >= 5 || need_resched()) {
*_next = start;
+ folio_batch_release(&fbatch);
_leave(" = 0 [%llx]", *_next);
return 0;
}
diff --git a/fs/aio.c b/fs/aio.c
index b0b17bd098bb..77e33619de40 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -530,7 +530,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
for (i = 0; i < nr_pages; i++) {
struct page *page;
page = find_or_create_page(file->f_mapping,
- i, GFP_HIGHUSER | __GFP_ZERO);
+ i, GFP_USER | __GFP_ZERO);
if (!page)
break;
pr_debug("pid(%d) page[%d]->count=%d\n",
@@ -571,7 +571,7 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
ctx->user_id = ctx->mmap_base;
ctx->nr_events = nr_events; /* trusted copy */
- ring = kmap_atomic(ctx->ring_pages[0]);
+ ring = page_address(ctx->ring_pages[0]);
ring->nr = nr_events; /* user copy */
ring->id = ~0U;
ring->head = ring->tail = 0;
@@ -579,7 +579,6 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
ring->compat_features = AIO_RING_COMPAT_FEATURES;
ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
ring->header_length = sizeof(struct aio_ring);
- kunmap_atomic(ring);
flush_dcache_page(ctx->ring_pages[0]);
return 0;
@@ -682,9 +681,8 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
* we are protected from page migration
* changes ring_pages by ->ring_lock.
*/
- ring = kmap_atomic(ctx->ring_pages[0]);
+ ring = page_address(ctx->ring_pages[0]);
ring->id = ctx->id;
- kunmap_atomic(ring);
return 0;
}
@@ -1025,9 +1023,8 @@ static void user_refill_reqs_available(struct kioctx *ctx)
* against ctx->completed_events below will make sure we do the
* safe/right thing.
*/
- ring = kmap_atomic(ctx->ring_pages[0]);
+ ring = page_address(ctx->ring_pages[0]);
head = ring->head;
- kunmap_atomic(ring);
refill_reqs_available(ctx, head, ctx->tail);
}
@@ -1133,12 +1130,11 @@ static void aio_complete(struct aio_kiocb *iocb)
if (++tail >= ctx->nr_events)
tail = 0;
- ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
+ ev_page = page_address(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
event = ev_page + pos % AIO_EVENTS_PER_PAGE;
*event = iocb->ki_res;
- kunmap_atomic(ev_page);
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb,
@@ -1152,10 +1148,9 @@ static void aio_complete(struct aio_kiocb *iocb)
ctx->tail = tail;
- ring = kmap_atomic(ctx->ring_pages[0]);
+ ring = page_address(ctx->ring_pages[0]);
head = ring->head;
ring->tail = tail;
- kunmap_atomic(ring);
flush_dcache_page(ctx->ring_pages[0]);
ctx->completed_events++;
@@ -1215,10 +1210,9 @@ static long aio_read_events_ring(struct kioctx *ctx,
mutex_lock(&ctx->ring_lock);
/* Access to ->ring_pages here is protected by ctx->ring_lock. */
- ring = kmap_atomic(ctx->ring_pages[0]);
+ ring = page_address(ctx->ring_pages[0]);
head = ring->head;
tail = ring->tail;
- kunmap_atomic(ring);
/*
* Ensure that once we've read the current tail pointer, that
@@ -1250,10 +1244,9 @@ static long aio_read_events_ring(struct kioctx *ctx,
avail = min(avail, nr - ret);
avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos);
- ev = kmap(page);
+ ev = page_address(page);
copy_ret = copy_to_user(event + ret, ev + pos,
sizeof(*ev) * avail);
- kunmap(page);
if (unlikely(copy_ret)) {
ret = -EFAULT;
@@ -1265,9 +1258,8 @@ static long aio_read_events_ring(struct kioctx *ctx,
head %= ctx->nr_events;
}
- ring = kmap_atomic(ctx->ring_pages[0]);
+ ring = page_address(ctx->ring_pages[0]);
ring->head = head;
- kunmap_atomic(ring);
flush_dcache_page(ctx->ring_pages[0]);
pr_debug("%li h%u t%u\n", ret, head, tail);
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index 6baf90b08e0e..93046c9dc461 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -600,7 +600,7 @@ static int autofs_dir_symlink(struct mnt_idmap *idmap,
p_ino = autofs_dentry_ino(dentry->d_parent);
p_ino->count++;
- dir->i_mtime = current_time(dir);
+ dir->i_mtime = dir->i_ctime = current_time(dir);
return 0;
}
@@ -633,7 +633,7 @@ static int autofs_dir_unlink(struct inode *dir, struct dentry *dentry)
d_inode(dentry)->i_size = 0;
clear_nlink(d_inode(dentry));
- dir->i_mtime = current_time(dir);
+ dir->i_mtime = dir->i_ctime = current_time(dir);
spin_lock(&sbi->lookup_lock);
__autofs_add_expiring(dentry);
@@ -749,7 +749,7 @@ static int autofs_dir_mkdir(struct mnt_idmap *idmap,
p_ino = autofs_dentry_ino(dentry->d_parent);
p_ino->count++;
inc_nlink(dir);
- dir->i_mtime = current_time(dir);
+ dir->i_mtime = dir->i_ctime = current_time(dir);
return 0;
}
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index aac240430efe..ce083e99ef68 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -71,6 +71,16 @@ bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq)
return atomic_read(&wq->pending) > wq->thresh * 2;
}
+static void btrfs_init_workqueue(struct btrfs_workqueue *wq,
+ struct btrfs_fs_info *fs_info)
+{
+ wq->fs_info = fs_info;
+ atomic_set(&wq->pending, 0);
+ INIT_LIST_HEAD(&wq->ordered_list);
+ spin_lock_init(&wq->list_lock);
+ spin_lock_init(&wq->thres_lock);
+}
+
struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
const char *name, unsigned int flags,
int limit_active, int thresh)
@@ -80,9 +90,9 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
if (!ret)
return NULL;
- ret->fs_info = fs_info;
+ btrfs_init_workqueue(ret, fs_info);
+
ret->limit_active = limit_active;
- atomic_set(&ret->pending, 0);
if (thresh == 0)
thresh = DFT_THRESHOLD;
/* For low threshold, disabling threshold is a better choice */
@@ -106,9 +116,33 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
return NULL;
}
- INIT_LIST_HEAD(&ret->ordered_list);
- spin_lock_init(&ret->list_lock);
- spin_lock_init(&ret->thres_lock);
+ trace_btrfs_workqueue_alloc(ret, name);
+ return ret;
+}
+
+struct btrfs_workqueue *btrfs_alloc_ordered_workqueue(
+ struct btrfs_fs_info *fs_info, const char *name,
+ unsigned int flags)
+{
+ struct btrfs_workqueue *ret;
+
+ ret = kzalloc(sizeof(*ret), GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ btrfs_init_workqueue(ret, fs_info);
+
+ /* Ordered workqueues don't allow @max_active adjustments. */
+ ret->limit_active = 1;
+ ret->current_active = 1;
+ ret->thresh = NO_THRESHOLD;
+
+ ret->normal_wq = alloc_ordered_workqueue("btrfs-%s", flags, name);
+ if (!ret->normal_wq) {
+ kfree(ret);
+ return NULL;
+ }
+
trace_btrfs_workqueue_alloc(ret, name);
return ret;
}
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 6e2596ddae10..30f66c5e2e6e 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -31,6 +31,9 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
unsigned int flags,
int limit_active,
int thresh);
+struct btrfs_workqueue *btrfs_alloc_ordered_workqueue(
+ struct btrfs_fs_info *fs_info, const char *name,
+ unsigned int flags);
void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func,
btrfs_func_t ordered_func, btrfs_func_t ordered_free);
void btrfs_queue_work(struct btrfs_workqueue *wq,
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 5379c4714905..12b12443efaa 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -27,6 +27,17 @@ struct btrfs_failed_bio {
atomic_t repair_count;
};
+/* Is this a data path I/O that needs storage layer checksum and repair? */
+static inline bool is_data_bbio(struct btrfs_bio *bbio)
+{
+ return bbio->inode && is_data_inode(&bbio->inode->vfs_inode);
+}
+
+static bool bbio_has_ordered_extent(struct btrfs_bio *bbio)
+{
+ return is_data_bbio(bbio) && btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE;
+}
+
/*
* Initialize a btrfs_bio structure. This skips the embedded bio itself as it
* is already initialized by the block layer.
@@ -61,20 +72,6 @@ struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
return bbio;
}
-static blk_status_t btrfs_bio_extract_ordered_extent(struct btrfs_bio *bbio)
-{
- struct btrfs_ordered_extent *ordered;
- int ret;
-
- ordered = btrfs_lookup_ordered_extent(bbio->inode, bbio->file_offset);
- if (WARN_ON_ONCE(!ordered))
- return BLK_STS_IOERR;
- ret = btrfs_extract_ordered_extent(bbio, ordered);
- btrfs_put_ordered_extent(ordered);
-
- return errno_to_blk_status(ret);
-}
-
static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
struct btrfs_bio *orig_bbio,
u64 map_length, bool use_append)
@@ -95,13 +92,41 @@ static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
btrfs_bio_init(bbio, fs_info, NULL, orig_bbio);
bbio->inode = orig_bbio->inode;
bbio->file_offset = orig_bbio->file_offset;
- if (!(orig_bbio->bio.bi_opf & REQ_BTRFS_ONE_ORDERED))
- orig_bbio->file_offset += map_length;
-
+ orig_bbio->file_offset += map_length;
+ if (bbio_has_ordered_extent(bbio)) {
+ refcount_inc(&orig_bbio->ordered->refs);
+ bbio->ordered = orig_bbio->ordered;
+ }
atomic_inc(&orig_bbio->pending_ios);
return bbio;
}
+/* Free a bio that was never submitted to the underlying device. */
+static void btrfs_cleanup_bio(struct btrfs_bio *bbio)
+{
+ if (bbio_has_ordered_extent(bbio))
+ btrfs_put_ordered_extent(bbio->ordered);
+ bio_put(&bbio->bio);
+}
+
+static void __btrfs_bio_end_io(struct btrfs_bio *bbio)
+{
+ if (bbio_has_ordered_extent(bbio)) {
+ struct btrfs_ordered_extent *ordered = bbio->ordered;
+
+ bbio->end_io(bbio);
+ btrfs_put_ordered_extent(ordered);
+ } else {
+ bbio->end_io(bbio);
+ }
+}
+
+void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
+{
+ bbio->bio.bi_status = status;
+ __btrfs_bio_end_io(bbio);
+}
+
static void btrfs_orig_write_end_io(struct bio *bio);
static void btrfs_bbio_propagate_error(struct btrfs_bio *bbio,
@@ -130,12 +155,12 @@ static void btrfs_orig_bbio_end_io(struct btrfs_bio *bbio)
if (bbio->bio.bi_status)
btrfs_bbio_propagate_error(bbio, orig_bbio);
- bio_put(&bbio->bio);
+ btrfs_cleanup_bio(bbio);
bbio = orig_bbio;
}
if (atomic_dec_and_test(&bbio->pending_ios))
- bbio->end_io(bbio);
+ __btrfs_bio_end_io(bbio);
}
static int next_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror)
@@ -327,10 +352,10 @@ static void btrfs_end_bio_work(struct work_struct *work)
struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);
/* Metadata reads are checked and repaired by the submitter. */
- if (bbio->inode && !(bbio->bio.bi_opf & REQ_META))
+ if (is_data_bbio(bbio))
btrfs_check_read_bio(bbio, bbio->bio.bi_private);
else
- bbio->end_io(bbio);
+ btrfs_orig_bbio_end_io(bbio);
}
static void btrfs_simple_end_io(struct bio *bio)
@@ -348,7 +373,7 @@ static void btrfs_simple_end_io(struct bio *bio)
INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
} else {
- if (bio_op(bio) == REQ_OP_ZONE_APPEND)
+ if (bio_op(bio) == REQ_OP_ZONE_APPEND && !bio->bi_status)
btrfs_record_physical_zoned(bbio);
btrfs_orig_bbio_end_io(bbio);
}
@@ -361,8 +386,7 @@ static void btrfs_raid56_end_io(struct bio *bio)
btrfs_bio_counter_dec(bioc->fs_info);
bbio->mirror_num = bioc->mirror_num;
- if (bio_op(bio) == REQ_OP_READ && bbio->inode &&
- !(bbio->bio.bi_opf & REQ_META))
+ if (bio_op(bio) == REQ_OP_READ && is_data_bbio(bbio))
btrfs_check_read_bio(bbio, NULL);
else
btrfs_orig_bbio_end_io(bbio);
@@ -472,13 +496,12 @@ static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
static void __btrfs_submit_bio(struct bio *bio, struct btrfs_io_context *bioc,
struct btrfs_io_stripe *smap, int mirror_num)
{
- /* Do not leak our private flag into the block layer. */
- bio->bi_opf &= ~REQ_BTRFS_ONE_ORDERED;
-
if (!bioc) {
/* Single mirror read/write fast path. */
btrfs_bio(bio)->mirror_num = mirror_num;
bio->bi_iter.bi_sector = smap->physical >> SECTOR_SHIFT;
+ if (bio_op(bio) != REQ_OP_READ)
+ btrfs_bio(bio)->orig_physical = smap->physical;
bio->bi_private = smap->dev;
bio->bi_end_io = btrfs_simple_end_io;
btrfs_submit_dev_bio(smap->dev, bio);
@@ -574,27 +597,20 @@ static void run_one_async_free(struct btrfs_work *work)
static bool should_async_write(struct btrfs_bio *bbio)
{
- /*
- * If the I/O is not issued by fsync and friends, (->sync_writers != 0),
- * then try to defer the submission to a workqueue to parallelize the
- * checksum calculation.
- */
- if (atomic_read(&bbio->inode->sync_writers))
+ /* Submit synchronously if the checksum implementation is fast. */
+ if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags))
return false;
/*
- * Submit metadata writes synchronously if the checksum implementation
- * is fast, or we are on a zoned device that wants I/O to be submitted
- * in order.
+ * Try to defer the submission to a workqueue to parallelize the
+ * checksum calculation unless the I/O is issued synchronously.
*/
- if (bbio->bio.bi_opf & REQ_META) {
- struct btrfs_fs_info *fs_info = bbio->fs_info;
+ if (op_is_sync(bbio->bio.bi_opf))
+ return false;
- if (btrfs_is_zoned(fs_info))
- return false;
- if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags))
- return false;
- }
+ /* Zoned devices require I/O to be submitted in order. */
+ if ((bbio->bio.bi_opf & REQ_META) && btrfs_is_zoned(bbio->fs_info))
+ return false;
return true;
}
@@ -622,10 +638,7 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,
btrfs_init_work(&async->work, run_one_async_start, run_one_async_done,
run_one_async_free);
- if (op_is_sync(bbio->bio.bi_opf))
- btrfs_queue_work(fs_info->hipri_workers, &async->work);
- else
- btrfs_queue_work(fs_info->workers, &async->work);
+ btrfs_queue_work(fs_info->workers, &async->work);
return true;
}
@@ -635,7 +648,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
struct btrfs_fs_info *fs_info = bbio->fs_info;
struct btrfs_bio *orig_bbio = bbio;
struct bio *bio = &bbio->bio;
- u64 logical = bio->bi_iter.bi_sector << 9;
+ u64 logical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
u64 length = bio->bi_iter.bi_size;
u64 map_length = length;
bool use_append = btrfs_use_zone_append(bbio);
@@ -645,8 +658,8 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
int error;
btrfs_bio_counter_inc_blocked(fs_info);
- error = __btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
- &bioc, &smap, &mirror_num, 1);
+ error = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
+ &bioc, &smap, &mirror_num, 1);
if (error) {
ret = errno_to_blk_status(error);
goto fail;
@@ -665,7 +678,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
* Save the iter for the end_io handler and preload the checksums for
* data reads.
*/
- if (bio_op(bio) == REQ_OP_READ && inode && !(bio->bi_opf & REQ_META)) {
+ if (bio_op(bio) == REQ_OP_READ && is_data_bbio(bbio)) {
bbio->saved_iter = bio->bi_iter;
ret = btrfs_lookup_bio_sums(bbio);
if (ret)
@@ -676,9 +689,6 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
if (use_append) {
bio->bi_opf &= ~REQ_OP_WRITE;
bio->bi_opf |= REQ_OP_ZONE_APPEND;
- ret = btrfs_bio_extract_ordered_extent(bbio);
- if (ret)
- goto fail_put_bio;
}
/*
@@ -695,6 +705,10 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
ret = btrfs_bio_csum(bbio);
if (ret)
goto fail_put_bio;
+ } else if (use_append) {
+ ret = btrfs_alloc_dummy_sum(bbio);
+ if (ret)
+ goto fail_put_bio;
}
}
@@ -704,7 +718,7 @@ done:
fail_put_bio:
if (map_length < length)
- bio_put(bio);
+ btrfs_cleanup_bio(bbio);
fail:
btrfs_bio_counter_dec(fs_info);
btrfs_bio_end_io(orig_bbio, ret);
@@ -811,10 +825,6 @@ void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_
goto fail;
if (dev_replace) {
- if (btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE && btrfs_is_zoned(fs_info)) {
- bbio->bio.bi_opf &= ~REQ_OP_WRITE;
- bbio->bio.bi_opf |= REQ_OP_ZONE_APPEND;
- }
ASSERT(smap.dev == fs_info->dev_replace.srcdev);
smap.dev = fs_info->dev_replace.tgtdev;
}
diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h
index a8eca3a65673..ca79decee060 100644
--- a/fs/btrfs/bio.h
+++ b/fs/btrfs/bio.h
@@ -39,8 +39,8 @@ struct btrfs_bio {
union {
/*
- * Data checksumming and original I/O information for internal
- * use in the btrfs_submit_bio machinery.
+ * For data reads: checksumming and original I/O information.
+ * (for internal use in the btrfs_submit_bio machinery only)
*/
struct {
u8 *csum;
@@ -48,7 +48,20 @@ struct btrfs_bio {
struct bvec_iter saved_iter;
};
- /* For metadata parentness verification. */
+ /*
+ * For data writes:
+ * - ordered extent covering the bio
+ * - pointer to the checksums for this bio
+ * - original physical address from the allocator
+ * (for zone append only)
+ */
+ struct {
+ struct btrfs_ordered_extent *ordered;
+ struct btrfs_ordered_sum *sums;
+ u64 orig_physical;
+ };
+
+ /* For metadata reads: parentness verification. */
struct btrfs_tree_parent_check parent_check;
};
@@ -84,15 +97,7 @@ void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info,
struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
struct btrfs_fs_info *fs_info,
btrfs_bio_end_io_t end_io, void *private);
-
-static inline void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
-{
- bbio->bio.bi_status = status;
- bbio->end_io(bbio);
-}
-
-/* Bio only refers to one ordered extent. */
-#define REQ_BTRFS_ONE_ORDERED REQ_DRV
+void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status);
/* Submit using blkcg_punt_bio_submit. */
#define REQ_BTRFS_CGROUP_PUNT REQ_FS_PRIVATE
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 957ad1c31c4f..48ae509f2ac2 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -95,14 +95,21 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags)
}
allowed &= flags;
- if (allowed & BTRFS_BLOCK_GROUP_RAID6)
+ /* Select the highest-redundancy RAID level. */
+ if (allowed & BTRFS_BLOCK_GROUP_RAID1C4)
+ allowed = BTRFS_BLOCK_GROUP_RAID1C4;
+ else if (allowed & BTRFS_BLOCK_GROUP_RAID6)
allowed = BTRFS_BLOCK_GROUP_RAID6;
+ else if (allowed & BTRFS_BLOCK_GROUP_RAID1C3)
+ allowed = BTRFS_BLOCK_GROUP_RAID1C3;
else if (allowed & BTRFS_BLOCK_GROUP_RAID5)
allowed = BTRFS_BLOCK_GROUP_RAID5;
else if (allowed & BTRFS_BLOCK_GROUP_RAID10)
allowed = BTRFS_BLOCK_GROUP_RAID10;
else if (allowed & BTRFS_BLOCK_GROUP_RAID1)
allowed = BTRFS_BLOCK_GROUP_RAID1;
+ else if (allowed & BTRFS_BLOCK_GROUP_DUP)
+ allowed = BTRFS_BLOCK_GROUP_DUP;
else if (allowed & BTRFS_BLOCK_GROUP_RAID0)
allowed = BTRFS_BLOCK_GROUP_RAID0;
@@ -1633,11 +1640,14 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
+ trace_btrfs_add_unused_block_group(bg);
spin_lock(&fs_info->unused_bgs_lock);
if (list_empty(&bg->bg_list)) {
btrfs_get_block_group(bg);
- trace_btrfs_add_unused_block_group(bg);
list_add_tail(&bg->bg_list, &fs_info->unused_bgs);
+ } else {
+ /* Pull out the block group from the reclaim_bgs list. */
+ list_move_tail(&bg->bg_list, &fs_info->unused_bgs);
}
spin_unlock(&fs_info->unused_bgs_lock);
}
@@ -1791,8 +1801,15 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
}
spin_unlock(&bg->lock);
- /* Get out fast, in case we're unmounting the filesystem */
- if (btrfs_fs_closing(fs_info)) {
+ /*
+ * Get out fast, in case we're read-only or unmounting the
+ * filesystem. It is OK to drop block groups from the list even
+ * for the read-only case. As we did sb_start_write(),
+ * "mount -o remount,ro" won't happen and read-only filesystem
+ * means it is forced read-only due to a fatal error. So, it
+ * never gets back to read-write to let us reclaim again.
+ */
+ if (btrfs_need_cleaner_sleep(fs_info)) {
up_write(&space_info->groups_sem);
goto next;
}
@@ -1823,11 +1840,27 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
}
next:
+ if (ret)
+ btrfs_mark_bg_to_reclaim(bg);
btrfs_put_block_group(bg);
+
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
+ /*
+ * Reclaiming all the block groups in the list can take really
+ * long. Prioritize cleaning up unused block groups.
+ */
+ btrfs_delete_unused_bgs(fs_info);
+ /*
+ * If we are interrupted by a balance, we can just bail out. The
+ * cleaner thread restart again if necessary.
+ */
+ if (!mutex_trylock(&fs_info->reclaim_bgs_lock))
+ goto end;
spin_lock(&fs_info->unused_bgs_lock);
}
spin_unlock(&fs_info->unused_bgs_lock);
mutex_unlock(&fs_info->reclaim_bgs_lock);
+end:
btrfs_exclop_finish(fs_info);
sb_end_write(fs_info->sb);
}
@@ -1973,7 +2006,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
/* For RAID5/6 adjust to a full IO stripe length */
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
- io_stripe_size = nr_data_stripes(map) << BTRFS_STRIPE_LEN_SHIFT;
+ io_stripe_size = btrfs_stripe_nr_to_offset(nr_data_stripes(map));
buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS);
if (!buf) {
@@ -2818,10 +2851,20 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
}
ret = inc_block_group_ro(cache, 0);
- if (!do_chunk_alloc || ret == -ETXTBSY)
- goto unlock_out;
if (!ret)
goto out;
+ if (ret == -ETXTBSY)
+ goto unlock_out;
+
+ /*
+ * Skip chunk alloction if the bg is SYSTEM, this is to avoid system
+ * chunk allocation storm to exhaust the system chunk array. Otherwise
+ * we still want to try our best to mark the block group read-only.
+ */
+ if (!do_chunk_alloc && ret == -ENOSPC &&
+ (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM))
+ goto unlock_out;
+
alloc_flags = btrfs_get_alloc_profile(fs_info, cache->space_info->flags);
ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
if (ret < 0)
@@ -3511,9 +3554,9 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
- set_extent_dirty(&trans->transaction->pinned_extents,
- bytenr, bytenr + num_bytes - 1,
- GFP_NOFS | __GFP_NOFAIL);
+ set_extent_bit(&trans->transaction->pinned_extents,
+ bytenr, bytenr + num_bytes - 1,
+ EXTENT_DIRTY, NULL);
}
spin_lock(&trans->transaction->dirty_bgs_lock);
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index cc0e4b37db2d..f204addc3fe8 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -162,7 +162,14 @@ struct btrfs_block_group {
*/
struct list_head cluster_list;
- /* For delayed block group creation or deletion of empty block groups */
+ /*
+ * Used for several lists:
+ *
+ * 1) struct btrfs_fs_info::unused_bgs
+ * 2) struct btrfs_fs_info::reclaim_bgs
+ * 3) struct btrfs_transaction::deleted_bgs
+ * 4) struct btrfs_trans_handle::new_bgs
+ */
struct list_head bg_list;
/* For read-only block groups */
diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c
index ac18c43fadad..6279d200cf83 100644
--- a/fs/btrfs/block-rsv.c
+++ b/fs/btrfs/block-rsv.c
@@ -541,3 +541,22 @@ try_reserve:
return ERR_PTR(ret);
}
+
+int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *rsv)
+{
+ u64 needed_bytes;
+ int ret;
+
+ /* 1 for slack space, 1 for updating the inode */
+ needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) +
+ btrfs_calc_metadata_size(fs_info, 1);
+
+ spin_lock(&rsv->lock);
+ if (rsv->reserved < needed_bytes)
+ ret = -ENOSPC;
+ else
+ ret = 0;
+ spin_unlock(&rsv->lock);
+ return ret;
+}
diff --git a/fs/btrfs/block-rsv.h b/fs/btrfs/block-rsv.h
index 6dc781709aca..b0bd12b8652f 100644
--- a/fs/btrfs/block-rsv.h
+++ b/fs/btrfs/block-rsv.h
@@ -82,6 +82,8 @@ void btrfs_release_global_block_rsv(struct btrfs_fs_info *fs_info);
struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u32 blocksize);
+int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *rsv);
static inline void btrfs_unuse_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv,
u32 blocksize)
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index ec2ae4406c16..d47a927b3504 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -116,9 +116,6 @@ struct btrfs_inode {
unsigned long runtime_flags;
- /* Keep track of who's O_SYNC/fsyncing currently */
- atomic_t sync_writers;
-
/* full 64 bit generation number, struct vfs_inode doesn't have a big
* enough field for this.
*/
@@ -335,7 +332,7 @@ static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode,
if (btrfs_is_free_space_inode(inode))
return;
trace_btrfs_inode_mod_outstanding_extents(inode->root, btrfs_ino(inode),
- mod);
+ mod, inode->outstanding_extents);
}
/*
@@ -407,30 +404,12 @@ static inline bool btrfs_inode_can_compress(const struct btrfs_inode *inode)
return true;
}
-/*
- * btrfs_inode_item stores flags in a u64, btrfs_inode stores them in two
- * separate u32s. These two functions convert between the two representations.
- */
-static inline u64 btrfs_inode_combine_flags(u32 flags, u32 ro_flags)
-{
- return (flags | ((u64)ro_flags << 32));
-}
-
-static inline void btrfs_inode_split_flags(u64 inode_item_flags,
- u32 *flags, u32 *ro_flags)
-{
- *flags = (u32)inode_item_flags;
- *ro_flags = (u32)(inode_item_flags >> 32);
-}
-
/* Array of bytes with variable length, hexadecimal format 0x1234 */
#define CSUM_FMT "0x%*phN"
#define CSUM_FMT_VALUE(size, bytes) size, bytes
int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page,
u32 pgoff, u8 *csum, const u8 * const csum_expected);
-int btrfs_extract_ordered_extent(struct btrfs_bio *bbio,
- struct btrfs_ordered_extent *ordered);
bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
u32 bio_offset, struct bio_vec *bv);
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 82e49d985019..3caf339c4bb3 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -1459,13 +1459,13 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
struct btrfs_fs_info *fs_info = state->fs_info;
int ret;
u64 length;
- struct btrfs_io_context *multi = NULL;
+ struct btrfs_io_context *bioc = NULL;
+ struct btrfs_io_stripe smap, *map;
struct btrfs_device *device;
length = len;
- ret = btrfs_map_block(fs_info, BTRFS_MAP_READ,
- bytenr, &length, &multi, mirror_num);
-
+ ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, bytenr, &length, &bioc,
+ NULL, &mirror_num, 0);
if (ret) {
block_ctx_out->start = 0;
block_ctx_out->dev_bytenr = 0;
@@ -1478,21 +1478,26 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
return ret;
}
- device = multi->stripes[0].dev;
+ if (bioc)
+ map = &bioc->stripes[0];
+ else
+ map = &smap;
+
+ device = map->dev;
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state) ||
!device->bdev || !device->name)
block_ctx_out->dev = NULL;
else
block_ctx_out->dev = btrfsic_dev_state_lookup(
device->bdev->bd_dev);
- block_ctx_out->dev_bytenr = multi->stripes[0].physical;
+ block_ctx_out->dev_bytenr = map->physical;
block_ctx_out->start = bytenr;
block_ctx_out->len = len;
block_ctx_out->datav = NULL;
block_ctx_out->pagev = NULL;
block_ctx_out->mem_to_free = NULL;
- kfree(multi);
+ kfree(bioc);
if (NULL == block_ctx_out->dev) {
ret = -ENXIO;
pr_info("btrfsic: error, cannot lookup dev (#1)!\n");
@@ -1565,7 +1570,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
bio = bio_alloc(block_ctx->dev->bdev, num_pages - i,
REQ_OP_READ, GFP_NOFS);
- bio->bi_iter.bi_sector = dev_bytenr >> 9;
+ bio->bi_iter.bi_sector = dev_bytenr >> SECTOR_SHIFT;
for (j = i; j < num_pages; j++) {
ret = bio_add_page(bio, block_ctx->pagev[j],
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 2d0493f0a184..8818ed5c390f 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -37,7 +37,7 @@
#include "file-item.h"
#include "super.h"
-struct bio_set btrfs_compressed_bioset;
+static struct bio_set btrfs_compressed_bioset;
static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
@@ -211,8 +211,6 @@ static noinline void end_compressed_writeback(const struct compressed_bio *cb)
for (i = 0; i < ret; i++) {
struct folio *folio = fbatch.folios[i];
- if (errno)
- folio_set_error(folio);
btrfs_page_clamp_clear_writeback(fs_info, &folio->page,
cb->start, cb->len);
}
@@ -226,13 +224,8 @@ static void btrfs_finish_compressed_write_work(struct work_struct *work)
struct compressed_bio *cb =
container_of(work, struct compressed_bio, write_end_work);
- /*
- * Ok, we're the last bio for this extent, step one is to call back
- * into the FS and do all the end_io operations.
- */
- btrfs_writepage_endio_finish_ordered(cb->bbio.inode, NULL,
- cb->start, cb->start + cb->len - 1,
- cb->bbio.bio.bi_status == BLK_STS_OK);
+ btrfs_finish_ordered_extent(cb->bbio.ordered, NULL, cb->start, cb->len,
+ cb->bbio.bio.bi_status == BLK_STS_OK);
if (cb->writeback)
end_compressed_writeback(cb);
@@ -281,32 +274,31 @@ static void btrfs_add_compressed_bio_pages(struct compressed_bio *cb)
* This also checksums the file bytes and gets things ready for
* the end io hooks.
*/
-void btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
- unsigned int len, u64 disk_start,
- unsigned int compressed_len,
- struct page **compressed_pages,
- unsigned int nr_pages,
- blk_opf_t write_flags,
- bool writeback)
+void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
+ struct page **compressed_pages,
+ unsigned int nr_pages,
+ blk_opf_t write_flags,
+ bool writeback)
{
+ struct btrfs_inode *inode = BTRFS_I(ordered->inode);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct compressed_bio *cb;
- ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
- IS_ALIGNED(len, fs_info->sectorsize));
-
- write_flags |= REQ_BTRFS_ONE_ORDERED;
+ ASSERT(IS_ALIGNED(ordered->file_offset, fs_info->sectorsize));
+ ASSERT(IS_ALIGNED(ordered->num_bytes, fs_info->sectorsize));
- cb = alloc_compressed_bio(inode, start, REQ_OP_WRITE | write_flags,
+ cb = alloc_compressed_bio(inode, ordered->file_offset,
+ REQ_OP_WRITE | write_flags,
end_compressed_bio_write);
- cb->start = start;
- cb->len = len;
+ cb->start = ordered->file_offset;
+ cb->len = ordered->num_bytes;
cb->compressed_pages = compressed_pages;
- cb->compressed_len = compressed_len;
+ cb->compressed_len = ordered->disk_num_bytes;
cb->writeback = writeback;
INIT_WORK(&cb->write_end_work, btrfs_finish_compressed_write_work);
cb->nr_pages = nr_pages;
- cb->bbio.bio.bi_iter.bi_sector = disk_start >> SECTOR_SHIFT;
+ cb->bbio.bio.bi_iter.bi_sector = ordered->disk_bytenr >> SECTOR_SHIFT;
+ cb->bbio.ordered = ordered;
btrfs_add_compressed_bio_pages(cb);
btrfs_submit_bio(&cb->bbio, 0);
@@ -421,7 +413,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
*/
if (!em || cur < em->start ||
(cur + fs_info->sectorsize > extent_map_end(em)) ||
- (em->block_start >> 9) != orig_bio->bi_iter.bi_sector) {
+ (em->block_start >> SECTOR_SHIFT) != orig_bio->bi_iter.bi_sector) {
free_extent_map(em);
unlock_extent(tree, cur, page_end, NULL);
unlock_page(page);
@@ -472,7 +464,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
* After the compressed pages are read, we copy the bytes into the
* bio we were passed and then call the bio end_io calls
*/
-void btrfs_submit_compressed_read(struct btrfs_bio *bbio, int mirror_num)
+void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
{
struct btrfs_inode *inode = bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
@@ -538,7 +530,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio, int mirror_num)
if (memstall)
psi_memstall_leave(&pflags);
- btrfs_submit_bio(&cb->bbio, mirror_num);
+ btrfs_submit_bio(&cb->bbio, 0);
return;
out_free_compressed_pages:
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 19ab2abeddc0..03bb9d143fa7 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -10,6 +10,7 @@
#include "bio.h"
struct btrfs_inode;
+struct btrfs_ordered_extent;
/*
* We want to make sure that amount of RAM required to uncompress an extent is
@@ -86,14 +87,12 @@ int btrfs_decompress(int type, const u8 *data_in, struct page *dest_page,
int btrfs_decompress_buf2page(const char *buf, u32 buf_len,
struct compressed_bio *cb, u32 decompressed);
-void btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
- unsigned int len, u64 disk_start,
- unsigned int compressed_len,
+void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
struct page **compressed_pages,
unsigned int nr_pages,
blk_opf_t write_flags,
bool writeback);
-void btrfs_submit_compressed_read(struct btrfs_bio *bbio, int mirror_num);
+void btrfs_submit_compressed_read(struct btrfs_bio *bbio);
unsigned int btrfs_compress_str2level(unsigned int type, const char *str);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 2ff2961b1183..a4cb4b642987 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -37,8 +37,6 @@ static int push_node_left(struct btrfs_trans_handle *trans,
static int balance_node_right(struct btrfs_trans_handle *trans,
struct extent_buffer *dst_buf,
struct extent_buffer *src_buf);
-static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
- int level, int slot);
static const struct btrfs_csums {
u16 size;
@@ -150,13 +148,19 @@ static inline void copy_leaf_items(const struct extent_buffer *dst,
nr_items * sizeof(struct btrfs_item));
}
+/* This exists for btrfs-progs usages. */
+u16 btrfs_csum_type_size(u16 type)
+{
+ return btrfs_csums[type].size;
+}
+
int btrfs_super_csum_size(const struct btrfs_super_block *s)
{
u16 t = btrfs_super_csum_type(s);
/*
* csum type is validated at mount time
*/
- return btrfs_csums[t].size;
+ return btrfs_csum_type_size(t);
}
const char *btrfs_super_csum_name(u16 csum_type)
@@ -417,9 +421,13 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
&refs, &flags);
if (ret)
return ret;
- if (refs == 0) {
- ret = -EROFS;
- btrfs_handle_fs_error(fs_info, ret, NULL);
+ if (unlikely(refs == 0)) {
+ btrfs_crit(fs_info,
+ "found 0 references for tree block at bytenr %llu level %d root %llu",
+ buf->start, btrfs_header_level(buf),
+ btrfs_root_id(root));
+ ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
return ret;
}
} else {
@@ -464,10 +472,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
return ret;
}
if (new_flags != 0) {
- int level = btrfs_header_level(buf);
-
- ret = btrfs_set_disk_extent_flags(trans, buf,
- new_flags, level);
+ ret = btrfs_set_disk_extent_flags(trans, buf, new_flags);
if (ret)
return ret;
}
@@ -583,9 +588,14 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
parent_start = buf->start;
- atomic_inc(&cow->refs);
ret = btrfs_tree_mod_log_insert_root(root->node, cow, true);
- BUG_ON(ret < 0);
+ if (ret < 0) {
+ btrfs_tree_unlock(cow);
+ free_extent_buffer(cow);
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+ atomic_inc(&cow->refs);
rcu_assign_pointer(root->node, cow);
btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
@@ -594,8 +604,14 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
add_root_to_dirty_list(root);
} else {
WARN_ON(trans->transid != btrfs_header_generation(parent));
- btrfs_tree_mod_log_insert_key(parent, parent_slot,
- BTRFS_MOD_LOG_KEY_REPLACE);
+ ret = btrfs_tree_mod_log_insert_key(parent, parent_slot,
+ BTRFS_MOD_LOG_KEY_REPLACE);
+ if (ret) {
+ btrfs_tree_unlock(cow);
+ free_extent_buffer(cow);
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
btrfs_set_node_blockptr(parent, parent_slot,
cow->start);
btrfs_set_node_ptr_generation(parent, parent_slot,
@@ -1028,8 +1044,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
child = btrfs_read_node_slot(mid, 0);
if (IS_ERR(child)) {
ret = PTR_ERR(child);
- btrfs_handle_fs_error(fs_info, ret, NULL);
- goto enospc;
+ goto out;
}
btrfs_tree_lock(child);
@@ -1038,11 +1053,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (ret) {
btrfs_tree_unlock(child);
free_extent_buffer(child);
- goto enospc;
+ goto out;
}
ret = btrfs_tree_mod_log_insert_root(root->node, child, true);
- BUG_ON(ret < 0);
+ if (ret < 0) {
+ btrfs_tree_unlock(child);
+ free_extent_buffer(child);
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
rcu_assign_pointer(root->node, child);
add_root_to_dirty_list(root);
@@ -1070,7 +1090,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (IS_ERR(left)) {
ret = PTR_ERR(left);
left = NULL;
- goto enospc;
+ goto out;
}
__btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
@@ -1079,7 +1099,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
BTRFS_NESTING_LEFT_COW);
if (wret) {
ret = wret;
- goto enospc;
+ goto out;
}
}
@@ -1088,7 +1108,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (IS_ERR(right)) {
ret = PTR_ERR(right);
right = NULL;
- goto enospc;
+ goto out;
}
__btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
@@ -1097,7 +1117,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
BTRFS_NESTING_RIGHT_COW);
if (wret) {
ret = wret;
- goto enospc;
+ goto out;
}
}
@@ -1119,7 +1139,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (btrfs_header_nritems(right) == 0) {
btrfs_clear_buffer_dirty(trans, right);
btrfs_tree_unlock(right);
- del_ptr(root, path, level + 1, pslot + 1);
+ ret = btrfs_del_ptr(trans, root, path, level + 1, pslot + 1);
+ if (ret < 0) {
+ free_extent_buffer_stale(right);
+ right = NULL;
+ goto out;
+ }
root_sub_used(root, right->len);
btrfs_free_tree_block(trans, btrfs_root_id(root), right,
0, 1);
@@ -1130,7 +1155,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_node_key(right, &right_key, 0);
ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
BTRFS_MOD_LOG_KEY_REPLACE);
- BUG_ON(ret < 0);
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
btrfs_set_node_key(parent, &right_key, pslot + 1);
btrfs_mark_buffer_dirty(parent);
}
@@ -1145,15 +1173,19 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
* otherwise we would have pulled some pointers from the
* right
*/
- if (!left) {
- ret = -EROFS;
- btrfs_handle_fs_error(fs_info, ret, NULL);
- goto enospc;
+ if (unlikely(!left)) {
+ btrfs_crit(fs_info,
+"missing left child when middle child only has 1 item, parent bytenr %llu level %d mid bytenr %llu root %llu",
+ parent->start, btrfs_header_level(parent),
+ mid->start, btrfs_root_id(root));
+ ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
+ goto out;
}
wret = balance_node_right(trans, mid, left);
if (wret < 0) {
ret = wret;
- goto enospc;
+ goto out;
}
if (wret == 1) {
wret = push_node_left(trans, left, mid, 1);
@@ -1165,7 +1197,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (btrfs_header_nritems(mid) == 0) {
btrfs_clear_buffer_dirty(trans, mid);
btrfs_tree_unlock(mid);
- del_ptr(root, path, level + 1, pslot);
+ ret = btrfs_del_ptr(trans, root, path, level + 1, pslot);
+ if (ret < 0) {
+ free_extent_buffer_stale(mid);
+ mid = NULL;
+ goto out;
+ }
root_sub_used(root, mid->len);
btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
free_extent_buffer_stale(mid);
@@ -1176,7 +1213,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_node_key(mid, &mid_key, 0);
ret = btrfs_tree_mod_log_insert_key(parent, pslot,
BTRFS_MOD_LOG_KEY_REPLACE);
- BUG_ON(ret < 0);
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
btrfs_set_node_key(parent, &mid_key, pslot);
btrfs_mark_buffer_dirty(parent);
}
@@ -1202,7 +1242,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (orig_ptr !=
btrfs_node_blockptr(path->nodes[level], path->slots[level]))
BUG();
-enospc:
+out:
if (right) {
btrfs_tree_unlock(right);
free_extent_buffer(right);
@@ -1278,7 +1318,12 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
btrfs_node_key(mid, &disk_key, 0);
ret = btrfs_tree_mod_log_insert_key(parent, pslot,
BTRFS_MOD_LOG_KEY_REPLACE);
- BUG_ON(ret < 0);
+ if (ret < 0) {
+ btrfs_tree_unlock(left);
+ free_extent_buffer(left);
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
btrfs_set_node_key(parent, &disk_key, pslot);
btrfs_mark_buffer_dirty(parent);
if (btrfs_header_nritems(left) > orig_slot) {
@@ -1333,7 +1378,12 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
btrfs_node_key(right, &disk_key, 0);
ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
BTRFS_MOD_LOG_KEY_REPLACE);
- BUG_ON(ret < 0);
+ if (ret < 0) {
+ btrfs_tree_unlock(right);
+ free_extent_buffer(right);
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
btrfs_set_node_key(parent, &disk_key, pslot + 1);
btrfs_mark_buffer_dirty(parent);
@@ -2379,6 +2429,87 @@ done:
}
/*
+ * Search the tree again to find a leaf with smaller keys.
+ * Returns 0 if it found something.
+ * Returns 1 if there are no smaller keys.
+ * Returns < 0 on error.
+ *
+ * This may release the path, and so you may lose any locks held at the
+ * time you call it.
+ */
+static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
+{
+ struct btrfs_key key;
+ struct btrfs_key orig_key;
+ struct btrfs_disk_key found_key;
+ int ret;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
+ orig_key = key;
+
+ if (key.offset > 0) {
+ key.offset--;
+ } else if (key.type > 0) {
+ key.type--;
+ key.offset = (u64)-1;
+ } else if (key.objectid > 0) {
+ key.objectid--;
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+ } else {
+ return 1;
+ }
+
+ btrfs_release_path(path);
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret <= 0)
+ return ret;
+
+ /*
+ * Previous key not found. Even if we were at slot 0 of the leaf we had
+ * before releasing the path and calling btrfs_search_slot(), we now may
+ * be in a slot pointing to the same original key - this can happen if
+ * after we released the path, one of more items were moved from a
+ * sibling leaf into the front of the leaf we had due to an insertion
+ * (see push_leaf_right()).
+ * If we hit this case and our slot is > 0 and just decrement the slot
+ * so that the caller does not process the same key again, which may or
+ * may not break the caller, depending on its logic.
+ */
+ if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) {
+ btrfs_item_key(path->nodes[0], &found_key, path->slots[0]);
+ ret = comp_keys(&found_key, &orig_key);
+ if (ret == 0) {
+ if (path->slots[0] > 0) {
+ path->slots[0]--;
+ return 0;
+ }
+ /*
+ * At slot 0, same key as before, it means orig_key is
+ * the lowest, leftmost, key in the tree. We're done.
+ */
+ return 1;
+ }
+ }
+
+ btrfs_item_key(path->nodes[0], &found_key, 0);
+ ret = comp_keys(&found_key, &key);
+ /*
+ * We might have had an item with the previous key in the tree right
+ * before we released our path. And after we released our path, that
+ * item might have been pushed to the first slot (0) of the leaf we
+ * were holding due to a tree balance. Alternatively, an item with the
+ * previous key can exist as the only element of a leaf (big fat item).
+ * Therefore account for these 2 cases, so that our callers (like
+ * btrfs_previous_item) don't miss an existing item with a key matching
+ * the previous key we computed above.
+ */
+ if (ret <= 0)
+ return 0;
+ return 1;
+}
+
+/*
* helper to use instead of search slot if no exact match is needed but
* instead the next or previous item should be returned.
* When find_higher is true, the next higher item is returned, the next lower
@@ -2552,6 +2683,7 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
if (slot > 0) {
btrfs_item_key(eb, &disk_key, slot - 1);
if (unlikely(comp_keys(&disk_key, new_key) >= 0)) {
+ btrfs_print_leaf(eb);
btrfs_crit(fs_info,
"slot %u key (%llu %u %llu) new key (%llu %u %llu)",
slot, btrfs_disk_key_objectid(&disk_key),
@@ -2559,13 +2691,13 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
btrfs_disk_key_offset(&disk_key),
new_key->objectid, new_key->type,
new_key->offset);
- btrfs_print_leaf(eb);
BUG();
}
}
if (slot < btrfs_header_nritems(eb) - 1) {
btrfs_item_key(eb, &disk_key, slot + 1);
if (unlikely(comp_keys(&disk_key, new_key) <= 0)) {
+ btrfs_print_leaf(eb);
btrfs_crit(fs_info,
"slot %u key (%llu %u %llu) new key (%llu %u %llu)",
slot, btrfs_disk_key_objectid(&disk_key),
@@ -2573,7 +2705,6 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
btrfs_disk_key_offset(&disk_key),
new_key->objectid, new_key->type,
new_key->offset);
- btrfs_print_leaf(eb);
BUG();
}
}
@@ -2626,7 +2757,7 @@ static bool check_sibling_keys(struct extent_buffer *left,
btrfs_item_key_to_cpu(right, &right_first, 0);
}
- if (btrfs_comp_cpu_keys(&left_last, &right_first) >= 0) {
+ if (unlikely(btrfs_comp_cpu_keys(&left_last, &right_first) >= 0)) {
btrfs_crit(left->fs_info, "left extent buffer:");
btrfs_print_tree(left, false);
btrfs_crit(left->fs_info, "right extent buffer:");
@@ -2703,8 +2834,8 @@ static int push_node_left(struct btrfs_trans_handle *trans,
if (push_items < src_nritems) {
/*
- * Don't call btrfs_tree_mod_log_insert_move() here, key removal
- * was already fully logged by btrfs_tree_mod_log_eb_copy() above.
+ * btrfs_tree_mod_log_eb_copy handles logging the move, so we
+ * don't need to do an explicit tree mod log operation for it.
*/
memmove_extent_buffer(src, btrfs_node_key_ptr_offset(src, 0),
btrfs_node_key_ptr_offset(src, push_items),
@@ -2765,8 +2896,11 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
btrfs_abort_transaction(trans, ret);
return ret;
}
- ret = btrfs_tree_mod_log_insert_move(dst, push_items, 0, dst_nritems);
- BUG_ON(ret < 0);
+
+ /*
+ * btrfs_tree_mod_log_eb_copy handles logging the move, so we don't
+ * need to do an explicit tree mod log operation for it.
+ */
memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(dst, push_items),
btrfs_node_key_ptr_offset(dst, 0),
(dst_nritems) *
@@ -2840,7 +2974,12 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
old = root->node;
ret = btrfs_tree_mod_log_insert_root(root->node, c, false);
- BUG_ON(ret < 0);
+ if (ret < 0) {
+ btrfs_free_tree_block(trans, btrfs_root_id(root), c, 0, 1);
+ btrfs_tree_unlock(c);
+ free_extent_buffer(c);
+ return ret;
+ }
rcu_assign_pointer(root->node, c);
/* the super has an extra ref to root->node */
@@ -2861,10 +3000,10 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
* slot and level indicate where you want the key to go, and
* blocknr is the block the key points to.
*/
-static void insert_ptr(struct btrfs_trans_handle *trans,
- struct btrfs_path *path,
- struct btrfs_disk_key *key, u64 bytenr,
- int slot, int level)
+static int insert_ptr(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
+ struct btrfs_disk_key *key, u64 bytenr,
+ int slot, int level)
{
struct extent_buffer *lower;
int nritems;
@@ -2880,7 +3019,10 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
if (level) {
ret = btrfs_tree_mod_log_insert_move(lower, slot + 1,
slot, nritems - slot);
- BUG_ON(ret < 0);
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
}
memmove_extent_buffer(lower,
btrfs_node_key_ptr_offset(lower, slot + 1),
@@ -2890,7 +3032,10 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
if (level) {
ret = btrfs_tree_mod_log_insert_key(lower, slot,
BTRFS_MOD_LOG_KEY_ADD);
- BUG_ON(ret < 0);
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
}
btrfs_set_node_key(lower, key, slot);
btrfs_set_node_blockptr(lower, slot, bytenr);
@@ -2898,6 +3043,8 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
btrfs_set_node_ptr_generation(lower, slot, trans->transid);
btrfs_set_header_nritems(lower, nritems + 1);
btrfs_mark_buffer_dirty(lower);
+
+ return 0;
}
/*
@@ -2962,6 +3109,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
ret = btrfs_tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid);
if (ret) {
+ btrfs_tree_unlock(split);
+ free_extent_buffer(split);
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -2975,8 +3124,13 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(c);
btrfs_mark_buffer_dirty(split);
- insert_ptr(trans, path, &disk_key, split->start,
- path->slots[level + 1] + 1, level + 1);
+ ret = insert_ptr(trans, path, &disk_key, split->start,
+ path->slots[level + 1] + 1, level + 1);
+ if (ret < 0) {
+ btrfs_tree_unlock(split);
+ free_extent_buffer(split);
+ return ret;
+ }
if (path->slots[level] >= mid) {
path->slots[level] -= mid;
@@ -2996,7 +3150,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
* and nr indicate which items in the leaf to check. This totals up the
* space used both by the item structs and the item data
*/
-static int leaf_space_used(struct extent_buffer *l, int start, int nr)
+static int leaf_space_used(const struct extent_buffer *l, int start, int nr)
{
int data_len;
int nritems = btrfs_header_nritems(l);
@@ -3016,7 +3170,7 @@ static int leaf_space_used(struct extent_buffer *l, int start, int nr)
* the start of the leaf data. IOW, how much room
* the leaf has left for both items and data
*/
-noinline int btrfs_leaf_free_space(struct extent_buffer *leaf)
+int btrfs_leaf_free_space(const struct extent_buffer *leaf)
{
struct btrfs_fs_info *fs_info = leaf->fs_info;
int nritems = btrfs_header_nritems(leaf);
@@ -3453,16 +3607,17 @@ out:
* split the path's leaf in two, making sure there is at least data_size
* available for the resulting leaf level of the path.
*/
-static noinline void copy_for_split(struct btrfs_trans_handle *trans,
- struct btrfs_path *path,
- struct extent_buffer *l,
- struct extent_buffer *right,
- int slot, int mid, int nritems)
+static noinline int copy_for_split(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
+ struct extent_buffer *l,
+ struct extent_buffer *right,
+ int slot, int mid, int nritems)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
int data_copy_size;
int rt_data_off;
int i;
+ int ret;
struct btrfs_disk_key disk_key;
struct btrfs_map_token token;
@@ -3487,7 +3642,9 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
btrfs_set_header_nritems(l, mid);
btrfs_item_key(right, &disk_key, 0);
- insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1);
+ ret = insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1);
+ if (ret < 0)
+ return ret;
btrfs_mark_buffer_dirty(right);
btrfs_mark_buffer_dirty(l);
@@ -3505,6 +3662,8 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
}
BUG_ON(path->slots[0] < 0);
+
+ return 0;
}
/*
@@ -3703,8 +3862,13 @@ again:
if (split == 0) {
if (mid <= slot) {
btrfs_set_header_nritems(right, 0);
- insert_ptr(trans, path, &disk_key,
- right->start, path->slots[1] + 1, 1);
+ ret = insert_ptr(trans, path, &disk_key,
+ right->start, path->slots[1] + 1, 1);
+ if (ret < 0) {
+ btrfs_tree_unlock(right);
+ free_extent_buffer(right);
+ return ret;
+ }
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = right;
@@ -3712,8 +3876,13 @@ again:
path->slots[1] += 1;
} else {
btrfs_set_header_nritems(right, 0);
- insert_ptr(trans, path, &disk_key,
- right->start, path->slots[1], 1);
+ ret = insert_ptr(trans, path, &disk_key,
+ right->start, path->slots[1], 1);
+ if (ret < 0) {
+ btrfs_tree_unlock(right);
+ free_extent_buffer(right);
+ return ret;
+ }
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = right;
@@ -3729,7 +3898,12 @@ again:
return ret;
}
- copy_for_split(trans, path, l, right, slot, mid, nritems);
+ ret = copy_for_split(trans, path, l, right, slot, mid, nritems);
+ if (ret < 0) {
+ btrfs_tree_unlock(right);
+ free_extent_buffer(right);
+ return ret;
+ }
if (split == 2) {
BUG_ON(num_doubles != 0);
@@ -3826,7 +4000,12 @@ static noinline int split_item(struct btrfs_path *path,
struct btrfs_disk_key disk_key;
leaf = path->nodes[0];
- BUG_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item));
+ /*
+ * Shouldn't happen because the caller must have previously called
+ * setup_leaf_for_split() to make room for the new item in the leaf.
+ */
+ if (WARN_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item)))
+ return -ENOSPC;
orig_slot = path->slots[0];
orig_offset = btrfs_item_offset(leaf, path->slots[0]);
@@ -4273,9 +4452,11 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
*
* the tree should have been previously balanced so the deletion does not
* empty a node.
+ *
+ * This is exported for use inside btrfs-progs, don't un-export it.
*/
-static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
- int level, int slot)
+int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct btrfs_path *path, int level, int slot)
{
struct extent_buffer *parent = path->nodes[level];
u32 nritems;
@@ -4286,7 +4467,10 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
if (level) {
ret = btrfs_tree_mod_log_insert_move(parent, slot,
slot + 1, nritems - slot - 1);
- BUG_ON(ret < 0);
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
}
memmove_extent_buffer(parent,
btrfs_node_key_ptr_offset(parent, slot),
@@ -4296,7 +4480,10 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
} else if (level) {
ret = btrfs_tree_mod_log_insert_key(parent, slot,
BTRFS_MOD_LOG_KEY_REMOVE);
- BUG_ON(ret < 0);
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
}
nritems--;
@@ -4312,6 +4499,7 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
fixup_low_keys(path, &disk_key, level + 1);
}
btrfs_mark_buffer_dirty(parent);
+ return 0;
}
/*
@@ -4324,13 +4512,17 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
* The path must have already been setup for deleting the leaf, including
* all the proper balancing. path->nodes[1] must be locked.
*/
-static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct extent_buffer *leaf)
+static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct extent_buffer *leaf)
{
+ int ret;
+
WARN_ON(btrfs_header_generation(leaf) != trans->transid);
- del_ptr(root, path, 1, path->slots[1]);
+ ret = btrfs_del_ptr(trans, root, path, 1, path->slots[1]);
+ if (ret < 0)
+ return ret;
/*
* btrfs_free_extent is expensive, we want to make sure we
@@ -4343,6 +4535,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
atomic_inc(&leaf->refs);
btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1);
free_extent_buffer_stale(leaf);
+ return 0;
}
/*
* delete the item at the leaf level in path. If that empties
@@ -4392,7 +4585,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
btrfs_set_header_level(leaf, 0);
} else {
btrfs_clear_buffer_dirty(trans, leaf);
- btrfs_del_leaf(trans, root, path, leaf);
+ ret = btrfs_del_leaf(trans, root, path, leaf);
+ if (ret < 0)
+ return ret;
}
} else {
int used = leaf_space_used(leaf, 0, nritems);
@@ -4416,7 +4611,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
/* push_leaf_left fixes the path.
* make sure the path still points to our leaf
- * for possible call to del_ptr below
+ * for possible call to btrfs_del_ptr below
*/
slot = path->slots[1];
atomic_inc(&leaf->refs);
@@ -4453,7 +4648,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (btrfs_header_nritems(leaf) == 0) {
path->slots[1] = slot;
- btrfs_del_leaf(trans, root, path, leaf);
+ ret = btrfs_del_leaf(trans, root, path, leaf);
+ if (ret < 0)
+ return ret;
free_extent_buffer(leaf);
ret = 0;
} else {
@@ -4474,86 +4671,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
}
/*
- * search the tree again to find a leaf with lesser keys
- * returns 0 if it found something or 1 if there are no lesser leaves.
- * returns < 0 on io errors.
- *
- * This may release the path, and so you may lose any locks held at the
- * time you call it.
- */
-int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
-{
- struct btrfs_key key;
- struct btrfs_key orig_key;
- struct btrfs_disk_key found_key;
- int ret;
-
- btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
- orig_key = key;
-
- if (key.offset > 0) {
- key.offset--;
- } else if (key.type > 0) {
- key.type--;
- key.offset = (u64)-1;
- } else if (key.objectid > 0) {
- key.objectid--;
- key.type = (u8)-1;
- key.offset = (u64)-1;
- } else {
- return 1;
- }
-
- btrfs_release_path(path);
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret <= 0)
- return ret;
-
- /*
- * Previous key not found. Even if we were at slot 0 of the leaf we had
- * before releasing the path and calling btrfs_search_slot(), we now may
- * be in a slot pointing to the same original key - this can happen if
- * after we released the path, one of more items were moved from a
- * sibling leaf into the front of the leaf we had due to an insertion
- * (see push_leaf_right()).
- * If we hit this case and our slot is > 0 and just decrement the slot
- * so that the caller does not process the same key again, which may or
- * may not break the caller, depending on its logic.
- */
- if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) {
- btrfs_item_key(path->nodes[0], &found_key, path->slots[0]);
- ret = comp_keys(&found_key, &orig_key);
- if (ret == 0) {
- if (path->slots[0] > 0) {
- path->slots[0]--;
- return 0;
- }
- /*
- * At slot 0, same key as before, it means orig_key is
- * the lowest, leftmost, key in the tree. We're done.
- */
- return 1;
- }
- }
-
- btrfs_item_key(path->nodes[0], &found_key, 0);
- ret = comp_keys(&found_key, &key);
- /*
- * We might have had an item with the previous key in the tree right
- * before we released our path. And after we released our path, that
- * item might have been pushed to the first slot (0) of the leaf we
- * were holding due to a tree balance. Alternatively, an item with the
- * previous key can exist as the only element of a leaf (big fat item).
- * Therefore account for these 2 cases, so that our callers (like
- * btrfs_previous_item) don't miss an existing item with a key matching
- * the previous key we computed above.
- */
- if (ret <= 0)
- return 0;
- return 1;
-}
-
-/*
* A helper function to walk down the tree starting at min_key, and looking
* for nodes or leaves that are have a minimum transaction id.
* This is used by the btree defrag code, and tree logging
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4c1986cd5bed..f2d2b313bde5 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -541,6 +541,8 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
struct extent_buffer **cow_ret, u64 new_root_objectid);
int btrfs_block_can_be_shared(struct btrfs_root *root,
struct extent_buffer *buf);
+int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct btrfs_path *path, int level, int slot);
void btrfs_extend_item(struct btrfs_path *path, u32 data_size);
void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end);
int btrfs_split_item(struct btrfs_trans_handle *trans,
@@ -633,7 +635,6 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
return btrfs_insert_empty_items(trans, root, path, &batch);
}
-int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
u64 time_seq);
@@ -686,7 +687,7 @@ static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
{
return btrfs_next_old_item(root, p, 0);
}
-int btrfs_leaf_free_space(struct extent_buffer *leaf);
+int btrfs_leaf_free_space(const struct extent_buffer *leaf);
static inline int is_fstree(u64 rootid)
{
@@ -702,6 +703,7 @@ static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root)
return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID;
}
+u16 btrfs_csum_type_size(u16 type);
int btrfs_super_csum_size(const struct btrfs_super_block *s);
const char *btrfs_super_csum_name(u16 csum_type);
const char *btrfs_super_csum_driver(u16 csum_type);
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index 8065341d831a..f2ff4cbe8656 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -1040,7 +1040,8 @@ static int defrag_one_locked_target(struct btrfs_inode *inode,
clear_extent_bit(&inode->io_tree, start, start + len - 1,
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
EXTENT_DEFRAG, cached_state);
- set_extent_defrag(&inode->io_tree, start, start + len - 1, cached_state);
+ set_extent_bit(&inode->io_tree, start, start + len - 1,
+ EXTENT_DELALLOC | EXTENT_DEFRAG, cached_state);
/* Update the page status */
for (i = start_index - first_index; i <= last_index - first_index; i++) {
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 0b32432d7d56..6a13cf00218b 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -407,7 +407,6 @@ static inline void drop_delayed_ref(struct btrfs_delayed_ref_root *delayed_refs,
RB_CLEAR_NODE(&ref->ref_node);
if (!list_empty(&ref->add_list))
list_del(&ref->add_list);
- ref->in_tree = 0;
btrfs_put_delayed_ref(ref);
atomic_dec(&delayed_refs->num_entries);
}
@@ -507,6 +506,7 @@ struct btrfs_delayed_ref_head *btrfs_select_ref_head(
{
struct btrfs_delayed_ref_head *head;
+ lockdep_assert_held(&delayed_refs->lock);
again:
head = find_ref_head(delayed_refs, delayed_refs->run_delayed_start,
true);
@@ -531,7 +531,7 @@ again:
href_node);
}
- head->processing = 1;
+ head->processing = true;
WARN_ON(delayed_refs->num_heads_ready == 0);
delayed_refs->num_heads_ready--;
delayed_refs->run_delayed_start = head->bytenr +
@@ -549,31 +549,35 @@ void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
RB_CLEAR_NODE(&head->href_node);
atomic_dec(&delayed_refs->num_entries);
delayed_refs->num_heads--;
- if (head->processing == 0)
+ if (!head->processing)
delayed_refs->num_heads_ready--;
}
/*
* Helper to insert the ref_node to the tail or merge with tail.
*
- * Return 0 for insert.
- * Return >0 for merge.
+ * Return false if the ref was inserted.
+ * Return true if the ref was merged into an existing one (and therefore can be
+ * freed by the caller).
*/
-static int insert_delayed_ref(struct btrfs_delayed_ref_root *root,
- struct btrfs_delayed_ref_head *href,
- struct btrfs_delayed_ref_node *ref)
+static bool insert_delayed_ref(struct btrfs_delayed_ref_root *root,
+ struct btrfs_delayed_ref_head *href,
+ struct btrfs_delayed_ref_node *ref)
{
struct btrfs_delayed_ref_node *exist;
int mod;
- int ret = 0;
spin_lock(&href->lock);
exist = tree_insert(&href->ref_tree, ref);
- if (!exist)
- goto inserted;
+ if (!exist) {
+ if (ref->action == BTRFS_ADD_DELAYED_REF)
+ list_add_tail(&ref->add_list, &href->ref_add_list);
+ atomic_inc(&root->num_entries);
+ spin_unlock(&href->lock);
+ return false;
+ }
/* Now we are sure we can merge */
- ret = 1;
if (exist->action == ref->action) {
mod = ref->ref_mod;
} else {
@@ -600,13 +604,7 @@ static int insert_delayed_ref(struct btrfs_delayed_ref_root *root,
if (exist->ref_mod == 0)
drop_delayed_ref(root, href, exist);
spin_unlock(&href->lock);
- return ret;
-inserted:
- if (ref->action == BTRFS_ADD_DELAYED_REF)
- list_add_tail(&ref->add_list, &href->ref_add_list);
- atomic_inc(&root->num_entries);
- spin_unlock(&href->lock);
- return ret;
+ return true;
}
/*
@@ -699,34 +697,38 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
bool is_system)
{
int count_mod = 1;
- int must_insert_reserved = 0;
+ bool must_insert_reserved = false;
/* If reserved is provided, it must be a data extent. */
BUG_ON(!is_data && reserved);
- /*
- * The head node stores the sum of all the mods, so dropping a ref
- * should drop the sum in the head node by one.
- */
- if (action == BTRFS_UPDATE_DELAYED_HEAD)
+ switch (action) {
+ case BTRFS_UPDATE_DELAYED_HEAD:
count_mod = 0;
- else if (action == BTRFS_DROP_DELAYED_REF)
+ break;
+ case BTRFS_DROP_DELAYED_REF:
+ /*
+ * The head node stores the sum of all the mods, so dropping a ref
+ * should drop the sum in the head node by one.
+ */
count_mod = -1;
-
- /*
- * BTRFS_ADD_DELAYED_EXTENT means that we need to update the reserved
- * accounting when the extent is finally added, or if a later
- * modification deletes the delayed ref without ever inserting the
- * extent into the extent allocation tree. ref->must_insert_reserved
- * is the flag used to record that accounting mods are required.
- *
- * Once we record must_insert_reserved, switch the action to
- * BTRFS_ADD_DELAYED_REF because other special casing is not required.
- */
- if (action == BTRFS_ADD_DELAYED_EXTENT)
- must_insert_reserved = 1;
- else
- must_insert_reserved = 0;
+ break;
+ case BTRFS_ADD_DELAYED_EXTENT:
+ /*
+ * BTRFS_ADD_DELAYED_EXTENT means that we need to update the
+ * reserved accounting when the extent is finally added, or if a
+ * later modification deletes the delayed ref without ever
+ * inserting the extent into the extent allocation tree.
+ * ref->must_insert_reserved is the flag used to record that
+ * accounting mods are required.
+ *
+ * Once we record must_insert_reserved, switch the action to
+ * BTRFS_ADD_DELAYED_REF because other special casing is not
+ * required.
+ */
+ must_insert_reserved = true;
+ break;
+ }
refcount_set(&head_ref->refs, 1);
head_ref->bytenr = bytenr;
@@ -738,7 +740,7 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
head_ref->ref_tree = RB_ROOT_CACHED;
INIT_LIST_HEAD(&head_ref->ref_add_list);
RB_CLEAR_NODE(&head_ref->href_node);
- head_ref->processing = 0;
+ head_ref->processing = false;
head_ref->total_ref_mod = count_mod;
spin_lock_init(&head_ref->lock);
mutex_init(&head_ref->mutex);
@@ -763,11 +765,11 @@ static noinline struct btrfs_delayed_ref_head *
add_delayed_ref_head(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head_ref,
struct btrfs_qgroup_extent_record *qrecord,
- int action, int *qrecord_inserted_ret)
+ int action, bool *qrecord_inserted_ret)
{
struct btrfs_delayed_ref_head *existing;
struct btrfs_delayed_ref_root *delayed_refs;
- int qrecord_inserted = 0;
+ bool qrecord_inserted = false;
delayed_refs = &trans->transaction->delayed_refs;
@@ -777,7 +779,7 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
delayed_refs, qrecord))
kfree(qrecord);
else
- qrecord_inserted = 1;
+ qrecord_inserted = true;
}
trace_add_delayed_ref_head(trans->fs_info, head_ref, action);
@@ -853,8 +855,6 @@ static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
ref->num_bytes = num_bytes;
ref->ref_mod = 1;
ref->action = action;
- ref->is_head = 0;
- ref->in_tree = 1;
ref->seq = seq;
ref->type = ref_type;
RB_CLEAR_NODE(&ref->ref_node);
@@ -875,11 +875,11 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
- int qrecord_inserted;
+ bool qrecord_inserted;
bool is_system;
+ bool merged;
int action = generic_ref->action;
int level = generic_ref->tree_ref.level;
- int ret;
u64 bytenr = generic_ref->bytenr;
u64 num_bytes = generic_ref->len;
u64 parent = generic_ref->parent;
@@ -935,7 +935,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
head_ref = add_delayed_ref_head(trans, head_ref, record,
action, &qrecord_inserted);
- ret = insert_delayed_ref(delayed_refs, head_ref, &ref->node);
+ merged = insert_delayed_ref(delayed_refs, head_ref, &ref->node);
spin_unlock(&delayed_refs->lock);
/*
@@ -947,7 +947,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
trace_add_delayed_tree_ref(fs_info, &ref->node, ref,
action == BTRFS_ADD_DELAYED_EXTENT ?
BTRFS_ADD_DELAYED_REF : action);
- if (ret > 0)
+ if (merged)
kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
if (qrecord_inserted)
@@ -968,9 +968,9 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
- int qrecord_inserted;
+ bool qrecord_inserted;
int action = generic_ref->action;
- int ret;
+ bool merged;
u64 bytenr = generic_ref->bytenr;
u64 num_bytes = generic_ref->len;
u64 parent = generic_ref->parent;
@@ -1027,7 +1027,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
head_ref = add_delayed_ref_head(trans, head_ref, record,
action, &qrecord_inserted);
- ret = insert_delayed_ref(delayed_refs, head_ref, &ref->node);
+ merged = insert_delayed_ref(delayed_refs, head_ref, &ref->node);
spin_unlock(&delayed_refs->lock);
/*
@@ -1039,7 +1039,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
trace_add_delayed_data_ref(trans->fs_info, &ref->node, ref,
action == BTRFS_ADD_DELAYED_EXTENT ?
BTRFS_ADD_DELAYED_REF : action);
- if (ret > 0)
+ if (merged)
kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index b54261fe509b..b8e14b0ba5f1 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -48,9 +48,6 @@ struct btrfs_delayed_ref_node {
unsigned int action:8;
unsigned int type:8;
- /* is this node still in the rbtree? */
- unsigned int is_head:1;
- unsigned int in_tree:1;
};
struct btrfs_delayed_extent_op {
@@ -70,20 +67,26 @@ struct btrfs_delayed_extent_op {
struct btrfs_delayed_ref_head {
u64 bytenr;
u64 num_bytes;
- refcount_t refs;
+ /*
+ * For insertion into struct btrfs_delayed_ref_root::href_root.
+ * Keep it in the same cache line as 'bytenr' for more efficient
+ * searches in the rbtree.
+ */
+ struct rb_node href_node;
/*
* the mutex is held while running the refs, and it is also
* held when checking the sum of reference modifications.
*/
struct mutex mutex;
+ refcount_t refs;
+
+ /* Protects 'ref_tree' and 'ref_add_list'. */
spinlock_t lock;
struct rb_root_cached ref_tree;
/* accumulate add BTRFS_ADD_DELAYED_REF nodes to this ref_add_list. */
struct list_head ref_add_list;
- struct rb_node href_node;
-
struct btrfs_delayed_extent_op *extent_op;
/*
@@ -113,10 +116,10 @@ struct btrfs_delayed_ref_head {
* we need to update the in ram accounting to properly reflect
* the free has happened.
*/
- unsigned int must_insert_reserved:1;
- unsigned int is_data:1;
- unsigned int is_system:1;
- unsigned int processing:1;
+ bool must_insert_reserved;
+ bool is_data;
+ bool is_system;
+ bool processing;
};
struct btrfs_delayed_tree_ref {
@@ -337,7 +340,7 @@ static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
{
WARN_ON(refcount_read(&ref->refs) == 0);
if (refcount_dec_and_test(&ref->refs)) {
- WARN_ON(ref->in_tree);
+ WARN_ON(!RB_EMPTY_NODE(&ref->ref_node));
switch (ref->type) {
case BTRFS_TREE_BLOCK_REF_KEY:
case BTRFS_SHARED_BLOCK_REF_KEY:
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 2d00600ff413..5f10965fd72b 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -41,7 +41,7 @@
* All new writes will be written to both target and source devices, so even
* if replace gets canceled, sources device still contains up-to-date data.
*
- * Location: handle_ops_on_dev_replace() from __btrfs_map_block()
+ * Location: handle_ops_on_dev_replace() from btrfs_map_block()
* Start: btrfs_dev_replace_start()
* End: btrfs_dev_replace_finishing()
* Content: Latest data/metadata
@@ -795,8 +795,8 @@ static int btrfs_set_target_alloc_state(struct btrfs_device *srcdev,
while (!find_first_extent_bit(&srcdev->alloc_state, start,
&found_start, &found_end,
CHUNK_ALLOCATED, &cached_state)) {
- ret = set_extent_bits(&tgtdev->alloc_state, found_start,
- found_end, CHUNK_ALLOCATED);
+ ret = set_extent_bit(&tgtdev->alloc_state, found_start,
+ found_end, CHUNK_ALLOCATED, NULL);
if (ret)
break;
start = found_end + 1;
diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c
index a6d77fe41e1a..944a7340f6a4 100644
--- a/fs/btrfs/discard.c
+++ b/fs/btrfs/discard.c
@@ -73,6 +73,23 @@ static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
return &discard_ctl->discard_list[block_group->discard_index];
}
+/*
+ * Determine if async discard should be running.
+ *
+ * @discard_ctl: discard control
+ *
+ * Check if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
+ */
+static bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
+{
+ struct btrfs_fs_info *fs_info = container_of(discard_ctl,
+ struct btrfs_fs_info,
+ discard_ctl);
+
+ return (!(fs_info->sb->s_flags & SB_RDONLY) &&
+ test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
+}
+
static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
struct btrfs_block_group *block_group)
{
@@ -545,23 +562,6 @@ static void btrfs_discard_workfn(struct work_struct *work)
}
/*
- * Determine if async discard should be running.
- *
- * @discard_ctl: discard control
- *
- * Check if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
- */
-bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
-{
- struct btrfs_fs_info *fs_info = container_of(discard_ctl,
- struct btrfs_fs_info,
- discard_ctl);
-
- return (!(fs_info->sb->s_flags & SB_RDONLY) &&
- test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
-}
-
-/*
* Recalculate the base delay.
*
* @discard_ctl: discard control
diff --git a/fs/btrfs/discard.h b/fs/btrfs/discard.h
index 57b9202f427f..dddb0f9101ba 100644
--- a/fs/btrfs/discard.h
+++ b/fs/btrfs/discard.h
@@ -24,7 +24,6 @@ void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
struct btrfs_block_group *block_group);
void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
bool override);
-bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl);
/* Update operations */
void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index fbf9006c6234..7513388b0567 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -60,15 +60,6 @@
BTRFS_SUPER_FLAG_METADUMP |\
BTRFS_SUPER_FLAG_METADUMP_V2)
-static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
-static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
- struct btrfs_fs_info *fs_info);
-static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
-static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
- struct extent_io_tree *dirty_pages,
- int mark);
-static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
- struct extent_io_tree *pinned_extents);
static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info);
static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info);
@@ -96,7 +87,7 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result)
crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
first_page_part - BTRFS_CSUM_SIZE);
- for (i = 1; i < num_pages; i++) {
+ for (i = 1; i < num_pages && INLINE_EXTENT_BUFFER_PAGES > 1; i++) {
kaddr = page_address(buf->pages[i]);
crypto_shash_update(shash, kaddr, PAGE_SIZE);
}
@@ -110,35 +101,27 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result)
* detect blocks that either didn't get written at all or got written
* in the wrong place.
*/
-static int verify_parent_transid(struct extent_io_tree *io_tree,
- struct extent_buffer *eb, u64 parent_transid,
- int atomic)
+int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, int atomic)
{
- struct extent_state *cached_state = NULL;
- int ret;
+ if (!extent_buffer_uptodate(eb))
+ return 0;
if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
- return 0;
+ return 1;
if (atomic)
return -EAGAIN;
- lock_extent(io_tree, eb->start, eb->start + eb->len - 1, &cached_state);
- if (extent_buffer_uptodate(eb) &&
- btrfs_header_generation(eb) == parent_transid) {
- ret = 0;
- goto out;
- }
- btrfs_err_rl(eb->fs_info,
+ if (!extent_buffer_uptodate(eb) ||
+ btrfs_header_generation(eb) != parent_transid) {
+ btrfs_err_rl(eb->fs_info,
"parent transid verify failed on logical %llu mirror %u wanted %llu found %llu",
eb->start, eb->read_mirror,
parent_transid, btrfs_header_generation(eb));
- ret = 1;
- clear_extent_buffer_uptodate(eb);
-out:
- unlock_extent(io_tree, eb->start, eb->start + eb->len - 1,
- &cached_state);
- return ret;
+ clear_extent_buffer_uptodate(eb);
+ return 0;
+ }
+ return 1;
}
static bool btrfs_supported_super_csum(u16 csum_type)
@@ -180,69 +163,10 @@ int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
return 0;
}
-int btrfs_verify_level_key(struct extent_buffer *eb, int level,
- struct btrfs_key *first_key, u64 parent_transid)
-{
- struct btrfs_fs_info *fs_info = eb->fs_info;
- int found_level;
- struct btrfs_key found_key;
- int ret;
-
- found_level = btrfs_header_level(eb);
- if (found_level != level) {
- WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
- KERN_ERR "BTRFS: tree level check failed\n");
- btrfs_err(fs_info,
-"tree level mismatch detected, bytenr=%llu level expected=%u has=%u",
- eb->start, level, found_level);
- return -EIO;
- }
-
- if (!first_key)
- return 0;
-
- /*
- * For live tree block (new tree blocks in current transaction),
- * we need proper lock context to avoid race, which is impossible here.
- * So we only checks tree blocks which is read from disk, whose
- * generation <= fs_info->last_trans_committed.
- */
- if (btrfs_header_generation(eb) > fs_info->last_trans_committed)
- return 0;
-
- /* We have @first_key, so this @eb must have at least one item */
- if (btrfs_header_nritems(eb) == 0) {
- btrfs_err(fs_info,
- "invalid tree nritems, bytenr=%llu nritems=0 expect >0",
- eb->start);
- WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
- return -EUCLEAN;
- }
-
- if (found_level)
- btrfs_node_key_to_cpu(eb, &found_key, 0);
- else
- btrfs_item_key_to_cpu(eb, &found_key, 0);
- ret = btrfs_comp_cpu_keys(first_key, &found_key);
-
- if (ret) {
- WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
- KERN_ERR "BTRFS: tree first key check failed\n");
- btrfs_err(fs_info,
-"tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)",
- eb->start, parent_transid, first_key->objectid,
- first_key->type, first_key->offset,
- found_key.objectid, found_key.type,
- found_key.offset);
- }
- return ret;
-}
-
static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb,
int mirror_num)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
- u64 start = eb->start;
int i, num_pages = num_extent_pages(eb);
int ret = 0;
@@ -251,12 +175,14 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb,
for (i = 0; i < num_pages; i++) {
struct page *p = eb->pages[i];
+ u64 start = max_t(u64, eb->start, page_offset(p));
+ u64 end = min_t(u64, eb->start + eb->len, page_offset(p) + PAGE_SIZE);
+ u32 len = end - start;
- ret = btrfs_repair_io_failure(fs_info, 0, start, PAGE_SIZE,
- start, p, start - page_offset(p), mirror_num);
+ ret = btrfs_repair_io_failure(fs_info, 0, start, len,
+ start, p, offset_in_page(start), mirror_num);
if (ret)
break;
- start += PAGE_SIZE;
}
return ret;
@@ -311,12 +237,34 @@ int btrfs_read_extent_buffer(struct extent_buffer *eb,
return ret;
}
-static int csum_one_extent_buffer(struct extent_buffer *eb)
+/*
+ * Checksum a dirty tree block before IO.
+ */
+blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
{
+ struct extent_buffer *eb = bbio->private;
struct btrfs_fs_info *fs_info = eb->fs_info;
+ u64 found_start = btrfs_header_bytenr(eb);
u8 result[BTRFS_CSUM_SIZE];
int ret;
+ /* Btree blocks are always contiguous on disk. */
+ if (WARN_ON_ONCE(bbio->file_offset != eb->start))
+ return BLK_STS_IOERR;
+ if (WARN_ON_ONCE(bbio->bio.bi_iter.bi_size != eb->len))
+ return BLK_STS_IOERR;
+
+ if (test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags)) {
+ WARN_ON_ONCE(found_start != 0);
+ return BLK_STS_OK;
+ }
+
+ if (WARN_ON_ONCE(found_start != eb->start))
+ return BLK_STS_IOERR;
+ if (WARN_ON(!btrfs_page_test_uptodate(fs_info, eb->pages[0], eb->start,
+ eb->len)))
+ return BLK_STS_IOERR;
+
ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid,
offsetof(struct btrfs_header, fsid),
BTRFS_FSID_SIZE) == 0);
@@ -325,7 +273,7 @@ static int csum_one_extent_buffer(struct extent_buffer *eb)
if (btrfs_header_level(eb))
ret = btrfs_check_node(eb);
else
- ret = btrfs_check_leaf_full(eb);
+ ret = btrfs_check_leaf(eb);
if (ret < 0)
goto error;
@@ -343,8 +291,7 @@ static int csum_one_extent_buffer(struct extent_buffer *eb)
goto error;
}
write_extent_buffer(eb, result, 0, fs_info->csum_size);
-
- return 0;
+ return BLK_STS_OK;
error:
btrfs_print_tree(eb, 0);
@@ -358,103 +305,10 @@ error:
*/
WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG) ||
btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID);
- return ret;
-}
-
-/* Checksum all dirty extent buffers in one bio_vec */
-static int csum_dirty_subpage_buffers(struct btrfs_fs_info *fs_info,
- struct bio_vec *bvec)
-{
- struct page *page = bvec->bv_page;
- u64 bvec_start = page_offset(page) + bvec->bv_offset;
- u64 cur;
- int ret = 0;
-
- for (cur = bvec_start; cur < bvec_start + bvec->bv_len;
- cur += fs_info->nodesize) {
- struct extent_buffer *eb;
- bool uptodate;
-
- eb = find_extent_buffer(fs_info, cur);
- uptodate = btrfs_subpage_test_uptodate(fs_info, page, cur,
- fs_info->nodesize);
-
- /* A dirty eb shouldn't disappear from buffer_radix */
- if (WARN_ON(!eb))
- return -EUCLEAN;
-
- if (WARN_ON(cur != btrfs_header_bytenr(eb))) {
- free_extent_buffer(eb);
- return -EUCLEAN;
- }
- if (WARN_ON(!uptodate)) {
- free_extent_buffer(eb);
- return -EUCLEAN;
- }
-
- ret = csum_one_extent_buffer(eb);
- free_extent_buffer(eb);
- if (ret < 0)
- return ret;
- }
- return ret;
-}
-
-/*
- * Checksum a dirty tree block before IO. This has extra checks to make sure
- * we only fill in the checksum field in the first page of a multi-page block.
- * For subpage extent buffers we need bvec to also read the offset in the page.
- */
-static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct bio_vec *bvec)
-{
- struct page *page = bvec->bv_page;
- u64 start = page_offset(page);
- u64 found_start;
- struct extent_buffer *eb;
-
- if (fs_info->nodesize < PAGE_SIZE)
- return csum_dirty_subpage_buffers(fs_info, bvec);
-
- eb = (struct extent_buffer *)page->private;
- if (page != eb->pages[0])
- return 0;
-
- found_start = btrfs_header_bytenr(eb);
-
- if (test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags)) {
- WARN_ON(found_start != 0);
- return 0;
- }
-
- /*
- * Please do not consolidate these warnings into a single if.
- * It is useful to know what went wrong.
- */
- if (WARN_ON(found_start != start))
- return -EUCLEAN;
- if (WARN_ON(!PageUptodate(page)))
- return -EUCLEAN;
-
- return csum_one_extent_buffer(eb);
-}
-
-blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
-{
- struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
- struct bvec_iter iter;
- struct bio_vec bv;
- int ret = 0;
-
- bio_for_each_segment(bv, &bbio->bio, iter) {
- ret = csum_dirty_buffer(fs_info, &bv);
- if (ret)
- break;
- }
-
return errno_to_blk_status(ret);
}
-static int check_tree_block_fsid(struct extent_buffer *eb)
+static bool check_tree_block_fsid(struct extent_buffer *eb)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs;
@@ -474,18 +328,18 @@ static int check_tree_block_fsid(struct extent_buffer *eb)
metadata_uuid = fs_devices->fsid;
if (!memcmp(fsid, metadata_uuid, BTRFS_FSID_SIZE))
- return 0;
+ return false;
list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list)
if (!memcmp(fsid, seed_devs->fsid, BTRFS_FSID_SIZE))
- return 0;
+ return false;
- return 1;
+ return true;
}
/* Do basic extent buffer checks at read time */
-static int validate_extent_buffer(struct extent_buffer *eb,
- struct btrfs_tree_parent_check *check)
+int btrfs_validate_extent_buffer(struct extent_buffer *eb,
+ struct btrfs_tree_parent_check *check)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
u64 found_start;
@@ -582,7 +436,7 @@ static int validate_extent_buffer(struct extent_buffer *eb,
* that we don't try and read the other copies of this block, just
* return -EIO.
*/
- if (found_level == 0 && btrfs_check_leaf_full(eb)) {
+ if (found_level == 0 && btrfs_check_leaf(eb)) {
set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
ret = -EIO;
}
@@ -590,9 +444,7 @@ static int validate_extent_buffer(struct extent_buffer *eb,
if (found_level > 0 && btrfs_check_node(eb))
ret = -EIO;
- if (!ret)
- set_extent_buffer_uptodate(eb);
- else
+ if (ret)
btrfs_err(fs_info,
"read time tree block corruption detected on logical %llu mirror %u",
eb->start, eb->read_mirror);
@@ -600,105 +452,6 @@ out:
return ret;
}
-static int validate_subpage_buffer(struct page *page, u64 start, u64 end,
- int mirror, struct btrfs_tree_parent_check *check)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
- struct extent_buffer *eb;
- bool reads_done;
- int ret = 0;
-
- ASSERT(check);
-
- /*
- * We don't allow bio merge for subpage metadata read, so we should
- * only get one eb for each endio hook.
- */
- ASSERT(end == start + fs_info->nodesize - 1);
- ASSERT(PagePrivate(page));
-
- eb = find_extent_buffer(fs_info, start);
- /*
- * When we are reading one tree block, eb must have been inserted into
- * the radix tree. If not, something is wrong.
- */
- ASSERT(eb);
-
- reads_done = atomic_dec_and_test(&eb->io_pages);
- /* Subpage read must finish in page read */
- ASSERT(reads_done);
-
- eb->read_mirror = mirror;
- if (test_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags)) {
- ret = -EIO;
- goto err;
- }
- ret = validate_extent_buffer(eb, check);
- if (ret < 0)
- goto err;
-
- set_extent_buffer_uptodate(eb);
-
- free_extent_buffer(eb);
- return ret;
-err:
- /*
- * end_bio_extent_readpage decrements io_pages in case of error,
- * make sure it has something to decrement.
- */
- atomic_inc(&eb->io_pages);
- clear_extent_buffer_uptodate(eb);
- free_extent_buffer(eb);
- return ret;
-}
-
-int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
- struct page *page, u64 start, u64 end,
- int mirror)
-{
- struct extent_buffer *eb;
- int ret = 0;
- int reads_done;
-
- ASSERT(page->private);
-
- if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE)
- return validate_subpage_buffer(page, start, end, mirror,
- &bbio->parent_check);
-
- eb = (struct extent_buffer *)page->private;
-
- /*
- * The pending IO might have been the only thing that kept this buffer
- * in memory. Make sure we have a ref for all this other checks
- */
- atomic_inc(&eb->refs);
-
- reads_done = atomic_dec_and_test(&eb->io_pages);
- if (!reads_done)
- goto err;
-
- eb->read_mirror = mirror;
- if (test_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags)) {
- ret = -EIO;
- goto err;
- }
- ret = validate_extent_buffer(eb, &bbio->parent_check);
-err:
- if (ret) {
- /*
- * our io error hook is going to dec the io pages
- * again, we have to make sure it has something
- * to decrement
- */
- atomic_inc(&eb->io_pages);
- clear_extent_buffer_uptodate(eb);
- }
- free_extent_buffer(eb);
-
- return ret;
-}
-
#ifdef CONFIG_MIGRATION
static int btree_migrate_folio(struct address_space *mapping,
struct folio *dst, struct folio *src, enum migrate_mode mode)
@@ -995,13 +748,18 @@ int btrfs_global_root_insert(struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct rb_node *tmp;
+ int ret = 0;
write_lock(&fs_info->global_root_lock);
tmp = rb_find_add(&root->rb_node, &fs_info->global_root_tree, global_root_cmp);
write_unlock(&fs_info->global_root_lock);
- ASSERT(!tmp);
- return tmp ? -EEXIST : 0;
+ if (tmp) {
+ ret = -EEXIST;
+ btrfs_warn(fs_info, "global root %llu %llu already exists",
+ root->root_key.objectid, root->root_key.offset);
+ }
+ return ret;
}
void btrfs_global_root_delete(struct btrfs_root *root)
@@ -1390,8 +1148,7 @@ static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
spin_lock(&fs_info->fs_roots_radix_lock);
root = radix_tree_lookup(&fs_info->fs_roots_radix,
(unsigned long)root_id);
- if (root)
- root = btrfs_grab_root(root);
+ root = btrfs_grab_root(root);
spin_unlock(&fs_info->fs_roots_radix_lock);
return root;
}
@@ -1405,31 +1162,28 @@ static struct btrfs_root *btrfs_get_global_root(struct btrfs_fs_info *fs_info,
.offset = 0,
};
- if (objectid == BTRFS_ROOT_TREE_OBJECTID)
+ switch (objectid) {
+ case BTRFS_ROOT_TREE_OBJECTID:
return btrfs_grab_root(fs_info->tree_root);
- if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
+ case BTRFS_EXTENT_TREE_OBJECTID:
return btrfs_grab_root(btrfs_global_root(fs_info, &key));
- if (objectid == BTRFS_CHUNK_TREE_OBJECTID)
+ case BTRFS_CHUNK_TREE_OBJECTID:
return btrfs_grab_root(fs_info->chunk_root);
- if (objectid == BTRFS_DEV_TREE_OBJECTID)
+ case BTRFS_DEV_TREE_OBJECTID:
return btrfs_grab_root(fs_info->dev_root);
- if (objectid == BTRFS_CSUM_TREE_OBJECTID)
+ case BTRFS_CSUM_TREE_OBJECTID:
return btrfs_grab_root(btrfs_global_root(fs_info, &key));
- if (objectid == BTRFS_QUOTA_TREE_OBJECTID)
- return btrfs_grab_root(fs_info->quota_root) ?
- fs_info->quota_root : ERR_PTR(-ENOENT);
- if (objectid == BTRFS_UUID_TREE_OBJECTID)
- return btrfs_grab_root(fs_info->uuid_root) ?
- fs_info->uuid_root : ERR_PTR(-ENOENT);
- if (objectid == BTRFS_BLOCK_GROUP_TREE_OBJECTID)
- return btrfs_grab_root(fs_info->block_group_root) ?
- fs_info->block_group_root : ERR_PTR(-ENOENT);
- if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID) {
- struct btrfs_root *root = btrfs_global_root(fs_info, &key);
-
- return btrfs_grab_root(root) ? root : ERR_PTR(-ENOENT);
- }
- return NULL;
+ case BTRFS_QUOTA_TREE_OBJECTID:
+ return btrfs_grab_root(fs_info->quota_root);
+ case BTRFS_UUID_TREE_OBJECTID:
+ return btrfs_grab_root(fs_info->uuid_root);
+ case BTRFS_BLOCK_GROUP_TREE_OBJECTID:
+ return btrfs_grab_root(fs_info->block_group_root);
+ case BTRFS_FREE_SPACE_TREE_OBJECTID:
+ return btrfs_grab_root(btrfs_global_root(fs_info, &key));
+ default:
+ return NULL;
+ }
}
int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
@@ -1985,7 +1739,6 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
{
btrfs_destroy_workqueue(fs_info->fixup_workers);
btrfs_destroy_workqueue(fs_info->delalloc_workers);
- btrfs_destroy_workqueue(fs_info->hipri_workers);
btrfs_destroy_workqueue(fs_info->workers);
if (fs_info->endio_workers)
destroy_workqueue(fs_info->endio_workers);
@@ -2177,12 +1930,10 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
{
u32 max_active = fs_info->thread_pool_size;
unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
+ unsigned int ordered_flags = WQ_MEM_RECLAIM | WQ_FREEZABLE;
fs_info->workers =
btrfs_alloc_workqueue(fs_info, "worker", flags, max_active, 16);
- fs_info->hipri_workers =
- btrfs_alloc_workqueue(fs_info, "worker-high",
- flags | WQ_HIGHPRI, max_active, 16);
fs_info->delalloc_workers =
btrfs_alloc_workqueue(fs_info, "delalloc",
@@ -2196,7 +1947,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
btrfs_alloc_workqueue(fs_info, "cache", flags, max_active, 0);
fs_info->fixup_workers =
- btrfs_alloc_workqueue(fs_info, "fixup", flags, 1, 0);
+ btrfs_alloc_ordered_workqueue(fs_info, "fixup", ordered_flags);
fs_info->endio_workers =
alloc_workqueue("btrfs-endio", flags, max_active);
@@ -2215,11 +1966,12 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
btrfs_alloc_workqueue(fs_info, "delayed-meta", flags,
max_active, 0);
fs_info->qgroup_rescan_workers =
- btrfs_alloc_workqueue(fs_info, "qgroup-rescan", flags, 1, 0);
+ btrfs_alloc_ordered_workqueue(fs_info, "qgroup-rescan",
+ ordered_flags);
fs_info->discard_ctl.discard_workers =
- alloc_workqueue("btrfs_discard", WQ_UNBOUND | WQ_FREEZABLE, 1);
+ alloc_ordered_workqueue("btrfs_discard", WQ_FREEZABLE);
- if (!(fs_info->workers && fs_info->hipri_workers &&
+ if (!(fs_info->workers &&
fs_info->delalloc_workers && fs_info->flush_workers &&
fs_info->endio_workers && fs_info->endio_meta_workers &&
fs_info->compressed_write_workers &&
@@ -2259,6 +2011,9 @@ static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
if (!strstr(crypto_shash_driver_name(csum_shash), "generic"))
set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
break;
+ case BTRFS_CSUM_TYPE_XXHASH:
+ set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
+ break;
default:
break;
}
@@ -2636,6 +2391,14 @@ int btrfs_validate_super(struct btrfs_fs_info *fs_info,
ret = -EINVAL;
}
+ if (memcmp(fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid,
+ BTRFS_FSID_SIZE) != 0) {
+ btrfs_err(fs_info,
+ "dev_item UUID does not match metadata fsid: %pU != %pU",
+ fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid);
+ ret = -EINVAL;
+ }
+
/*
* Artificial requirement for block-group-tree to force newer features
* (free-space-tree, no-holes) so the test matrix is smaller.
@@ -2648,14 +2411,6 @@ int btrfs_validate_super(struct btrfs_fs_info *fs_info,
ret = -EINVAL;
}
- if (memcmp(fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid,
- BTRFS_FSID_SIZE) != 0) {
- btrfs_err(fs_info,
- "dev_item UUID does not match metadata fsid: %pU != %pU",
- fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid);
- ret = -EINVAL;
- }
-
/*
* Hint to catch really bogus numbers, bitflips or so, more exact checks are
* done later
@@ -2841,6 +2596,7 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
/* We can't trust the free space cache either */
btrfs_set_opt(fs_info->mount_opt, CLEAR_CACHE);
+ btrfs_warn(fs_info, "try to load backup roots slot %d", i);
ret = read_backup_root(fs_info, i);
backup_index = ret;
if (ret < 0)
@@ -4655,28 +4411,10 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
btrfs_close_devices(fs_info->fs_devices);
}
-int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
- int atomic)
-{
- int ret;
- struct inode *btree_inode = buf->pages[0]->mapping->host;
-
- ret = extent_buffer_uptodate(buf);
- if (!ret)
- return ret;
-
- ret = verify_parent_transid(&BTRFS_I(btree_inode)->io_tree, buf,
- parent_transid, atomic);
- if (ret == -EAGAIN)
- return ret;
- return !ret;
-}
-
void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
{
struct btrfs_fs_info *fs_info = buf->fs_info;
u64 transid = btrfs_header_generation(buf);
- int was_dirty;
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
/*
@@ -4691,19 +4429,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
if (transid != fs_info->generation)
WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, found %llu running %llu\n",
buf->start, transid, fs_info->generation);
- was_dirty = set_extent_buffer_dirty(buf);
- if (!was_dirty)
- percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
- buf->len,
- fs_info->dirty_metadata_batch);
+ set_extent_buffer_dirty(buf);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
/*
- * Since btrfs_mark_buffer_dirty() can be called with item pointer set
- * but item data not updated.
- * So here we should only check item pointers, not item data.
+ * btrfs_check_leaf() won't check item data if we don't have WRITTEN
+ * set, so this will only validate the basic structure of the items.
*/
- if (btrfs_header_level(buf) == 0 &&
- btrfs_check_leaf_relaxed(buf)) {
+ if (btrfs_header_level(buf) == 0 && btrfs_check_leaf(buf)) {
btrfs_print_leaf(buf);
ASSERT(0);
}
@@ -4833,13 +4565,12 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
}
-static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
- struct btrfs_fs_info *fs_info)
+static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
+ struct btrfs_fs_info *fs_info)
{
struct rb_node *node;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_delayed_ref_node *ref;
- int ret = 0;
delayed_refs = &trans->delayed_refs;
@@ -4847,7 +4578,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
if (atomic_read(&delayed_refs->num_entries) == 0) {
spin_unlock(&delayed_refs->lock);
btrfs_debug(fs_info, "delayed_refs has NO entry");
- return ret;
+ return;
}
while ((node = rb_first_cached(&delayed_refs->href_root)) != NULL) {
@@ -4864,7 +4595,6 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
while ((n = rb_first_cached(&head->ref_tree)) != NULL) {
ref = rb_entry(n, struct btrfs_delayed_ref_node,
ref_node);
- ref->in_tree = 0;
rb_erase_cached(&ref->ref_node, &head->ref_tree);
RB_CLEAR_NODE(&ref->ref_node);
if (!list_empty(&ref->add_list))
@@ -4909,8 +4639,6 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
btrfs_qgroup_destroy_extent_records(trans);
spin_unlock(&delayed_refs->lock);
-
- return ret;
}
static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
@@ -4936,7 +4664,11 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
*/
inode = igrab(&btrfs_inode->vfs_inode);
if (inode) {
+ unsigned int nofs_flag;
+
+ nofs_flag = memalloc_nofs_save();
invalidate_inode_pages2(inode->i_mapping);
+ memalloc_nofs_restore(nofs_flag);
iput(inode);
}
spin_lock(&root->delalloc_lock);
@@ -5042,7 +4774,12 @@ static void btrfs_cleanup_bg_io(struct btrfs_block_group *cache)
inode = cache->io_ctl.inode;
if (inode) {
+ unsigned int nofs_flag;
+
+ nofs_flag = memalloc_nofs_save();
invalidate_inode_pages2(inode->i_mapping);
+ memalloc_nofs_restore(nofs_flag);
+
BTRFS_I(inode)->generation = 0;
cache->io_ctl.inode = NULL;
iput(inode);
@@ -5126,8 +4863,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
EXTENT_DIRTY);
btrfs_destroy_pinned_extent(fs_info, &cur_trans->pinned_extents);
- btrfs_free_redirty_list(cur_trans);
-
cur_trans->state =TRANS_STATE_COMPLETED;
wake_up(&cur_trans->commit_wait);
}
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 4d5772330110..b03767f4d7ed 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -31,8 +31,6 @@ struct btrfs_tree_parent_check;
void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info);
void btrfs_init_fs_info(struct btrfs_fs_info *fs_info);
-int btrfs_verify_level_key(struct extent_buffer *eb, int level,
- struct btrfs_key *first_key, u64 parent_transid);
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
struct btrfs_tree_parent_check *check);
struct extent_buffer *btrfs_find_create_tree_block(
@@ -84,9 +82,8 @@ void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info);
void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info);
void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
-int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
- struct page *page, u64 start, u64 end,
- int mirror);
+int btrfs_validate_extent_buffer(struct extent_buffer *eb,
+ struct btrfs_tree_parent_check *check);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info);
#endif
diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c
index 29a225836e28..a2315a4b8b75 100644
--- a/fs/btrfs/extent-io-tree.c
+++ b/fs/btrfs/extent-io-tree.c
@@ -533,6 +533,16 @@ static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
}
/*
+ * Detect if extent bits request NOWAIT semantics and set the gfp mask accordingly,
+ * unset the EXTENT_NOWAIT bit.
+ */
+static void set_gfp_mask_from_bits(u32 *bits, gfp_t *mask)
+{
+ *mask = (*bits & EXTENT_NOWAIT ? GFP_NOWAIT : GFP_NOFS);
+ *bits &= EXTENT_NOWAIT - 1;
+}
+
+/*
* Clear some bits on a range in the tree. This may require splitting or
* inserting elements in the tree, so the gfp mask is used to indicate which
* allocations or sleeping are allowed.
@@ -546,7 +556,7 @@ static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
*/
int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, struct extent_state **cached_state,
- gfp_t mask, struct extent_changeset *changeset)
+ struct extent_changeset *changeset)
{
struct extent_state *state;
struct extent_state *cached;
@@ -556,7 +566,9 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int clear = 0;
int wake;
int delete = (bits & EXTENT_CLEAR_ALL_BITS);
+ gfp_t mask;
+ set_gfp_mask_from_bits(&bits, &mask);
btrfs_debug_check_extent_io_range(tree, start, end);
trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits);
@@ -953,7 +965,8 @@ out:
/*
* Set some bits on a range in the tree. This may require allocations or
- * sleeping, so the gfp mask is used to indicate what is allowed.
+ * sleeping. By default all allocations use GFP_NOFS, use EXTENT_NOWAIT for
+ * GFP_NOWAIT.
*
* If any of the exclusive bits are set, this will fail with -EEXIST if some
* part of the range already has the desired bits set. The extent_state of the
@@ -968,7 +981,7 @@ static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, u64 *failed_start,
struct extent_state **failed_state,
struct extent_state **cached_state,
- struct extent_changeset *changeset, gfp_t mask)
+ struct extent_changeset *changeset)
{
struct extent_state *state;
struct extent_state *prealloc = NULL;
@@ -978,7 +991,9 @@ static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u64 last_start;
u64 last_end;
u32 exclusive_bits = (bits & EXTENT_LOCKED);
+ gfp_t mask;
+ set_gfp_mask_from_bits(&bits, &mask);
btrfs_debug_check_extent_io_range(tree, start, end);
trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits);
@@ -1188,10 +1203,10 @@ out:
}
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- u32 bits, struct extent_state **cached_state, gfp_t mask)
+ u32 bits, struct extent_state **cached_state)
{
return __set_extent_bit(tree, start, end, bits, NULL, NULL,
- cached_state, NULL, mask);
+ cached_state, NULL);
}
/*
@@ -1687,8 +1702,7 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
*/
ASSERT(!(bits & EXTENT_LOCKED));
- return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL,
- changeset, GFP_NOFS);
+ return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, changeset);
}
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
@@ -1700,8 +1714,7 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
*/
ASSERT(!(bits & EXTENT_LOCKED));
- return __clear_extent_bit(tree, start, end, bits, NULL, GFP_NOFS,
- changeset);
+ return __clear_extent_bit(tree, start, end, bits, NULL, changeset);
}
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
@@ -1711,7 +1724,7 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
u64 failed_start;
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
- NULL, cached, NULL, GFP_NOFS);
+ NULL, cached, NULL);
if (err == -EEXIST) {
if (failed_start > start)
clear_extent_bit(tree, start, failed_start - 1,
@@ -1733,7 +1746,7 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
u64 failed_start;
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
- &failed_state, cached_state, NULL, GFP_NOFS);
+ &failed_state, cached_state, NULL);
while (err == -EEXIST) {
if (failed_start != start)
clear_extent_bit(tree, start, failed_start - 1,
@@ -1743,7 +1756,7 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
&failed_state);
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
&failed_start, &failed_state,
- cached_state, NULL, GFP_NOFS);
+ cached_state, NULL);
}
return err;
}
diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h
index 21766e49ec02..fbd3b275ab1c 100644
--- a/fs/btrfs/extent-io-tree.h
+++ b/fs/btrfs/extent-io-tree.h
@@ -43,6 +43,15 @@ enum {
* want the extent states to go away.
*/
ENUM_BIT(EXTENT_CLEAR_ALL_BITS),
+
+ /*
+ * This must be last.
+ *
+ * Bit not representing a state but a request for NOWAIT semantics,
+ * e.g. when allocating memory, and must be masked out from the other
+ * bits.
+ */
+ ENUM_BIT(EXTENT_NOWAIT)
};
#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
@@ -127,22 +136,20 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, struct extent_changeset *changeset);
int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- u32 bits, struct extent_state **cached, gfp_t mask,
+ u32 bits, struct extent_state **cached,
struct extent_changeset *changeset);
static inline int clear_extent_bit(struct extent_io_tree *tree, u64 start,
u64 end, u32 bits,
struct extent_state **cached)
{
- return __clear_extent_bit(tree, start, end, bits, cached,
- GFP_NOFS, NULL);
+ return __clear_extent_bit(tree, start, end, bits, cached, NULL);
}
static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached)
{
- return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, cached,
- GFP_NOFS, NULL);
+ return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, cached, NULL);
}
static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
@@ -154,31 +161,13 @@ static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, struct extent_changeset *changeset);
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- u32 bits, struct extent_state **cached_state, gfp_t mask);
-
-static inline int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start,
- u64 end, u32 bits)
-{
- return set_extent_bit(tree, start, end, bits, NULL, GFP_NOWAIT);
-}
-
-static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
- u64 end, u32 bits)
-{
- return set_extent_bit(tree, start, end, bits, NULL, GFP_NOFS);
-}
+ u32 bits, struct extent_state **cached_state);
static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached_state)
{
return __clear_extent_bit(tree, start, end, EXTENT_UPTODATE,
- cached_state, GFP_NOFS, NULL);
-}
-
-static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start,
- u64 end, gfp_t mask)
-{
- return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL, mask);
+ cached_state, NULL);
}
static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
@@ -193,29 +182,6 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, u32 clear_bits,
struct extent_state **cached_state);
-static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start,
- u64 end, u32 extra_bits,
- struct extent_state **cached_state)
-{
- return set_extent_bit(tree, start, end,
- EXTENT_DELALLOC | extra_bits,
- cached_state, GFP_NOFS);
-}
-
-static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start,
- u64 end, struct extent_state **cached_state)
-{
- return set_extent_bit(tree, start, end,
- EXTENT_DELALLOC | EXTENT_DEFRAG,
- cached_state, GFP_NOFS);
-}
-
-static inline int set_extent_new(struct extent_io_tree *tree, u64 start,
- u64 end)
-{
- return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, GFP_NOFS);
-}
-
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, u32 bits,
struct extent_state **cached_state);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5cd289de4e92..911908ea5f6f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -73,8 +73,8 @@ int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
u64 start, u64 num_bytes)
{
u64 end = start + num_bytes - 1;
- set_extent_bits(&fs_info->excluded_extents, start, end,
- EXTENT_UPTODATE);
+ set_extent_bit(&fs_info->excluded_extents, start, end,
+ EXTENT_UPTODATE, NULL);
return 0;
}
@@ -402,7 +402,7 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
}
}
- btrfs_print_leaf((struct extent_buffer *)eb);
+ btrfs_print_leaf(eb);
btrfs_err(eb->fs_info,
"eb %llu iref 0x%lx invalid extent inline ref type %d",
eb->start, (unsigned long)iref, type);
@@ -1164,15 +1164,10 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
* should not happen at all.
*/
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
+ btrfs_print_leaf(path->nodes[0]);
btrfs_crit(trans->fs_info,
-"adding refs to an existing tree ref, bytenr %llu num_bytes %llu root_objectid %llu",
- bytenr, num_bytes, root_objectid);
- if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) {
- WARN_ON(1);
- btrfs_crit(trans->fs_info,
- "path->slots[0]=%d path->nodes[0]:", path->slots[0]);
- btrfs_print_leaf(path->nodes[0]);
- }
+"adding refs to an existing tree ref, bytenr %llu num_bytes %llu root_objectid %llu slot %u",
+ bytenr, num_bytes, root_objectid, path->slots[0]);
return -EUCLEAN;
}
update_inline_extent_backref(path, iref, refs_to_add, extent_op);
@@ -1208,11 +1203,11 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
{
int j, ret = 0;
u64 bytes_left, end;
- u64 aligned_start = ALIGN(start, 1 << 9);
+ u64 aligned_start = ALIGN(start, 1 << SECTOR_SHIFT);
if (WARN_ON(start != aligned_start)) {
len -= aligned_start - start;
- len = round_down(len, 1 << 9);
+ len = round_down(len, 1 << SECTOR_SHIFT);
start = aligned_start;
}
@@ -1250,7 +1245,8 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
}
if (size) {
- ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
+ ret = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
+ size >> SECTOR_SHIFT,
GFP_NOFS);
if (!ret)
*discarded_bytes += size;
@@ -1267,7 +1263,8 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
}
if (bytes_left) {
- ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
+ ret = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
+ bytes_left >> SECTOR_SHIFT,
GFP_NOFS);
if (!ret)
*discarded_bytes += bytes_left;
@@ -1500,7 +1497,7 @@ out:
static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op,
- int insert_reserved)
+ bool insert_reserved)
{
int ret = 0;
struct btrfs_delayed_data_ref *ref;
@@ -1650,7 +1647,7 @@ out:
static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op,
- int insert_reserved)
+ bool insert_reserved)
{
int ret = 0;
struct btrfs_delayed_tree_ref *ref;
@@ -1690,7 +1687,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op,
- int insert_reserved)
+ bool insert_reserved)
{
int ret = 0;
@@ -1748,7 +1745,7 @@ static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_ref
struct btrfs_delayed_ref_head *head)
{
spin_lock(&delayed_refs->lock);
- head->processing = 0;
+ head->processing = false;
delayed_refs->num_heads_ready++;
spin_unlock(&delayed_refs->lock);
btrfs_delayed_ref_unlock(head);
@@ -1900,7 +1897,7 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_delayed_extent_op *extent_op;
struct btrfs_delayed_ref_node *ref;
- int must_insert_reserved = 0;
+ bool must_insert_reserved;
int ret;
delayed_refs = &trans->transaction->delayed_refs;
@@ -1916,7 +1913,6 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
return -EAGAIN;
}
- ref->in_tree = 0;
rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree);
RB_CLEAR_NODE(&ref->ref_node);
if (!list_empty(&ref->add_list))
@@ -1943,7 +1939,7 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
* spin lock.
*/
must_insert_reserved = locked_ref->must_insert_reserved;
- locked_ref->must_insert_reserved = 0;
+ locked_ref->must_insert_reserved = false;
extent_op = locked_ref->extent_op;
locked_ref->extent_op = NULL;
@@ -2155,10 +2151,10 @@ out:
}
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
- struct extent_buffer *eb, u64 flags,
- int level)
+ struct extent_buffer *eb, u64 flags)
{
struct btrfs_delayed_extent_op *extent_op;
+ int level = btrfs_header_level(eb);
int ret;
extent_op = btrfs_alloc_delayed_extent_op();
@@ -2510,8 +2506,8 @@ static int pin_down_extent(struct btrfs_trans_handle *trans,
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
- set_extent_dirty(&trans->transaction->pinned_extents, bytenr,
- bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
+ set_extent_bit(&trans->transaction->pinned_extents, bytenr,
+ bytenr + num_bytes - 1, EXTENT_DIRTY, NULL);
return 0;
}
@@ -2838,6 +2834,13 @@ static int do_free_extent_accounting(struct btrfs_trans_handle *trans,
return ret;
}
+#define abort_and_dump(trans, path, fmt, args...) \
+({ \
+ btrfs_abort_transaction(trans, -EUCLEAN); \
+ btrfs_print_leaf(path->nodes[0]); \
+ btrfs_crit(trans->fs_info, fmt, ##args); \
+})
+
/*
* Drop one or more refs of @node.
*
@@ -2978,10 +2981,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
if (!found_extent) {
if (iref) {
- btrfs_crit(info,
-"invalid iref, no EXTENT/METADATA_ITEM found but has inline extent ref");
- btrfs_abort_transaction(trans, -EUCLEAN);
- goto err_dump;
+ abort_and_dump(trans, path,
+"invalid iref slot %u, no EXTENT/METADATA_ITEM found but has inline extent ref",
+ path->slots[0]);
+ ret = -EUCLEAN;
+ goto out;
}
/* Must be SHARED_* item, remove the backref first */
ret = remove_extent_backref(trans, extent_root, path,
@@ -3029,11 +3033,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
if (ret) {
- btrfs_err(info,
- "umm, got %d back from search, was looking for %llu",
- ret, bytenr);
if (ret > 0)
btrfs_print_leaf(path->nodes[0]);
+ btrfs_err(info,
+ "umm, got %d back from search, was looking for %llu, slot %d",
+ ret, bytenr, path->slots[0]);
}
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
@@ -3042,12 +3046,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
extent_slot = path->slots[0];
}
} else if (WARN_ON(ret == -ENOENT)) {
- btrfs_print_leaf(path->nodes[0]);
- btrfs_err(info,
- "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
- bytenr, parent, root_objectid, owner_objectid,
- owner_offset);
- btrfs_abort_transaction(trans, ret);
+ abort_and_dump(trans, path,
+"unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu slot %d",
+ bytenr, parent, root_objectid, owner_objectid,
+ owner_offset, path->slots[0]);
goto out;
} else {
btrfs_abort_transaction(trans, ret);
@@ -3067,14 +3069,15 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
key.type == BTRFS_EXTENT_ITEM_KEY) {
struct btrfs_tree_block_info *bi;
+
if (item_size < sizeof(*ei) + sizeof(*bi)) {
- btrfs_crit(info,
-"invalid extent item size for key (%llu, %u, %llu) owner %llu, has %u expect >= %zu",
- key.objectid, key.type, key.offset,
- owner_objectid, item_size,
- sizeof(*ei) + sizeof(*bi));
- btrfs_abort_transaction(trans, -EUCLEAN);
- goto err_dump;
+ abort_and_dump(trans, path,
+"invalid extent item size for key (%llu, %u, %llu) slot %u owner %llu, has %u expect >= %zu",
+ key.objectid, key.type, key.offset,
+ path->slots[0], owner_objectid, item_size,
+ sizeof(*ei) + sizeof(*bi));
+ ret = -EUCLEAN;
+ goto out;
}
bi = (struct btrfs_tree_block_info *)(ei + 1);
WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
@@ -3082,11 +3085,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
refs = btrfs_extent_refs(leaf, ei);
if (refs < refs_to_drop) {
- btrfs_crit(info,
- "trying to drop %d refs but we only have %llu for bytenr %llu",
- refs_to_drop, refs, bytenr);
- btrfs_abort_transaction(trans, -EUCLEAN);
- goto err_dump;
+ abort_and_dump(trans, path,
+ "trying to drop %d refs but we only have %llu for bytenr %llu slot %u",
+ refs_to_drop, refs, bytenr, path->slots[0]);
+ ret = -EUCLEAN;
+ goto out;
}
refs -= refs_to_drop;
@@ -3099,10 +3102,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
*/
if (iref) {
if (!found_extent) {
- btrfs_crit(info,
-"invalid iref, got inlined extent ref but no EXTENT/METADATA_ITEM found");
- btrfs_abort_transaction(trans, -EUCLEAN);
- goto err_dump;
+ abort_and_dump(trans, path,
+"invalid iref, got inlined extent ref but no EXTENT/METADATA_ITEM found, slot %u",
+ path->slots[0]);
+ ret = -EUCLEAN;
+ goto out;
}
} else {
btrfs_set_extent_refs(leaf, ei, refs);
@@ -3121,21 +3125,21 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
if (found_extent) {
if (is_data && refs_to_drop !=
extent_data_ref_count(path, iref)) {
- btrfs_crit(info,
- "invalid refs_to_drop, current refs %u refs_to_drop %u",
- extent_data_ref_count(path, iref),
- refs_to_drop);
- btrfs_abort_transaction(trans, -EUCLEAN);
- goto err_dump;
+ abort_and_dump(trans, path,
+ "invalid refs_to_drop, current refs %u refs_to_drop %u slot %u",
+ extent_data_ref_count(path, iref),
+ refs_to_drop, path->slots[0]);
+ ret = -EUCLEAN;
+ goto out;
}
if (iref) {
if (path->slots[0] != extent_slot) {
- btrfs_crit(info,
-"invalid iref, extent item key (%llu %u %llu) doesn't have wanted iref",
- key.objectid, key.type,
- key.offset);
- btrfs_abort_transaction(trans, -EUCLEAN);
- goto err_dump;
+ abort_and_dump(trans, path,
+"invalid iref, extent item key (%llu %u %llu) slot %u doesn't have wanted iref",
+ key.objectid, key.type,
+ key.offset, path->slots[0]);
+ ret = -EUCLEAN;
+ goto out;
}
} else {
/*
@@ -3145,10 +3149,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
* [ EXTENT/METADATA_ITEM ][ SHARED_* ITEM ]
*/
if (path->slots[0] != extent_slot + 1) {
- btrfs_crit(info,
- "invalid SHARED_* item, previous item is not EXTENT/METADATA_ITEM");
- btrfs_abort_transaction(trans, -EUCLEAN);
- goto err_dump;
+ abort_and_dump(trans, path,
+ "invalid SHARED_* item slot %u, previous item is not EXTENT/METADATA_ITEM",
+ path->slots[0]);
+ ret = -EUCLEAN;
+ goto out;
}
path->slots[0] = extent_slot;
num_to_del = 2;
@@ -3170,19 +3175,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
out:
btrfs_free_path(path);
return ret;
-err_dump:
- /*
- * Leaf dump can take up a lot of log buffer, so we only do full leaf
- * dump for debug build.
- */
- if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) {
- btrfs_crit(info, "path->slots[0]=%d extent_slot=%d",
- path->slots[0], extent_slot);
- btrfs_print_leaf(path->nodes[0]);
- }
-
- btrfs_free_path(path);
- return -EUCLEAN;
}
/*
@@ -3219,7 +3211,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
goto out;
btrfs_delete_ref_head(delayed_refs, head);
- head->processing = 0;
+ head->processing = false;
spin_unlock(&head->lock);
spin_unlock(&delayed_refs->lock);
@@ -4804,7 +4796,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
!test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
lockdep_owner = BTRFS_FS_TREE_OBJECTID;
- /* btrfs_clean_tree_block() accesses generation field. */
+ /* btrfs_clear_buffer_dirty() accesses generation field. */
btrfs_set_header_generation(buf, trans->transid);
/*
@@ -4836,15 +4828,17 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
* EXTENT bit to differentiate dirty pages.
*/
if (buf->log_index == 0)
- set_extent_dirty(&root->dirty_log_pages, buf->start,
- buf->start + buf->len - 1, GFP_NOFS);
+ set_extent_bit(&root->dirty_log_pages, buf->start,
+ buf->start + buf->len - 1,
+ EXTENT_DIRTY, NULL);
else
- set_extent_new(&root->dirty_log_pages, buf->start,
- buf->start + buf->len - 1);
+ set_extent_bit(&root->dirty_log_pages, buf->start,
+ buf->start + buf->len - 1,
+ EXTENT_NEW, NULL);
} else {
buf->log_index = -1;
- set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
- buf->start + buf->len - 1, GFP_NOFS);
+ set_extent_bit(&trans->transaction->dirty_pages, buf->start,
+ buf->start + buf->len - 1, EXTENT_DIRTY, NULL);
}
/* this returns a buffer locked for blocking */
return buf;
@@ -5102,8 +5096,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
BUG_ON(ret); /* -ENOMEM */
ret = btrfs_dec_ref(trans, root, eb, 0);
BUG_ON(ret); /* -ENOMEM */
- ret = btrfs_set_disk_extent_flags(trans, eb, flag,
- btrfs_header_level(eb));
+ ret = btrfs_set_disk_extent_flags(trans, eb, flag);
BUG_ON(ret); /* -ENOMEM */
wc->flags[level] |= flag;
}
@@ -5985,9 +5978,8 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
ret = btrfs_issue_discard(device->bdev, start, len,
&bytes);
if (!ret)
- set_extent_bits(&device->alloc_state, start,
- start + bytes - 1,
- CHUNK_TRIMMED);
+ set_extent_bit(&device->alloc_state, start,
+ start + bytes - 1, CHUNK_TRIMMED, NULL);
mutex_unlock(&fs_info->chunk_mutex);
if (ret)
diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
index 0c958fc1b3b8..429d5c570061 100644
--- a/fs/btrfs/extent-tree.h
+++ b/fs/btrfs/extent-tree.h
@@ -141,7 +141,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref);
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
- struct extent_buffer *eb, u64 flags, int level);
+ struct extent_buffer *eb, u64 flags);
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref);
int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a1adadd5d25d..a91d5ad27984 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -98,33 +98,16 @@ void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info)
*/
struct btrfs_bio_ctrl {
struct btrfs_bio *bbio;
- int mirror_num;
enum btrfs_compression_type compress_type;
u32 len_to_oe_boundary;
blk_opf_t opf;
btrfs_bio_end_io_t end_io_func;
struct writeback_control *wbc;
-
- /*
- * This is for metadata read, to provide the extra needed verification
- * info. This has to be provided for submit_one_bio(), as
- * submit_one_bio() can submit a bio if it ends at stripe boundary. If
- * no such parent_check is provided, the metadata can hit false alert at
- * endio time.
- */
- struct btrfs_tree_parent_check *parent_check;
-
- /*
- * Tell writepage not to lock the state bits for this range, it still
- * does the unlocking.
- */
- bool extent_locked;
};
static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
{
struct btrfs_bio *bbio = bio_ctrl->bbio;
- int mirror_num = bio_ctrl->mirror_num;
if (!bbio)
return;
@@ -132,25 +115,11 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
/* Caller should ensure the bio has at least some range added */
ASSERT(bbio->bio.bi_iter.bi_size);
- if (!is_data_inode(&bbio->inode->vfs_inode)) {
- if (btrfs_op(&bbio->bio) != BTRFS_MAP_WRITE) {
- /*
- * For metadata read, we should have the parent_check,
- * and copy it to bbio for metadata verification.
- */
- ASSERT(bio_ctrl->parent_check);
- memcpy(&bbio->parent_check,
- bio_ctrl->parent_check,
- sizeof(struct btrfs_tree_parent_check));
- }
- bbio->bio.bi_opf |= REQ_META;
- }
-
if (btrfs_op(&bbio->bio) == BTRFS_MAP_READ &&
bio_ctrl->compress_type != BTRFS_COMPRESS_NONE)
- btrfs_submit_compressed_read(bbio, mirror_num);
+ btrfs_submit_compressed_read(bbio);
else
- btrfs_submit_bio(bbio, mirror_num);
+ btrfs_submit_bio(bbio, 0);
/* The bbio is owned by the end_io handler now */
bio_ctrl->bbio = NULL;
@@ -248,8 +217,6 @@ static int process_one_page(struct btrfs_fs_info *fs_info,
if (page_ops & PAGE_SET_ORDERED)
btrfs_page_clamp_set_ordered(fs_info, page, start, len);
- if (page_ops & PAGE_SET_ERROR)
- btrfs_page_clamp_set_error(fs_info, page, start, len);
if (page_ops & PAGE_START_WRITEBACK) {
btrfs_page_clamp_clear_dirty(fs_info, page, start, len);
btrfs_page_clamp_set_writeback(fs_info, page, start, len);
@@ -295,9 +262,6 @@ static int __process_pages_contig(struct address_space *mapping,
ASSERT(processed_end && *processed_end == start);
}
- if ((page_ops & PAGE_SET_ERROR) && start_index <= end_index)
- mapping_set_error(mapping, -EIO);
-
folio_batch_init(&fbatch);
while (index <= end_index) {
int found_folios;
@@ -506,6 +470,15 @@ void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
start, end, page_ops, NULL);
}
+static bool btrfs_verify_page(struct page *page, u64 start)
+{
+ if (!fsverity_active(page->mapping->host) ||
+ PageUptodate(page) ||
+ start >= i_size_read(page->mapping->host))
+ return true;
+ return fsverity_verify_page(page);
+}
+
static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
{
struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
@@ -513,20 +486,10 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
ASSERT(page_offset(page) <= start &&
start + len <= page_offset(page) + PAGE_SIZE);
- if (uptodate) {
- if (fsverity_active(page->mapping->host) &&
- !PageError(page) &&
- !PageUptodate(page) &&
- start < i_size_read(page->mapping->host) &&
- !fsverity_verify_page(page)) {
- btrfs_page_set_error(fs_info, page, start, len);
- } else {
- btrfs_page_set_uptodate(fs_info, page, start, len);
- }
- } else {
+ if (uptodate && btrfs_verify_page(page, start))
+ btrfs_page_set_uptodate(fs_info, page, start, len);
+ else
btrfs_page_clear_uptodate(fs_info, page, start, len);
- btrfs_page_set_error(fs_info, page, start, len);
- }
if (!btrfs_is_subpage(fs_info, page))
unlock_page(page);
@@ -554,7 +517,6 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
len = end + 1 - start;
btrfs_page_clear_uptodate(fs_info, page, start, len);
- btrfs_page_set_error(fs_info, page, start, len);
ret = err < 0 ? err : -EIO;
mapping_set_error(page->mapping, ret);
}
@@ -574,8 +536,6 @@ static void end_bio_extent_writepage(struct btrfs_bio *bbio)
struct bio *bio = &bbio->bio;
int error = blk_status_to_errno(bio->bi_status);
struct bio_vec *bvec;
- u64 start;
- u64 end;
struct bvec_iter_all iter_all;
ASSERT(!bio_flagged(bio, BIO_CLONED));
@@ -584,6 +544,8 @@ static void end_bio_extent_writepage(struct btrfs_bio *bbio)
struct inode *inode = page->mapping->host;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
const u32 sectorsize = fs_info->sectorsize;
+ u64 start = page_offset(page) + bvec->bv_offset;
+ u32 len = bvec->bv_len;
/* Our read/write should always be sector aligned. */
if (!IS_ALIGNED(bvec->bv_offset, sectorsize))
@@ -595,12 +557,12 @@ static void end_bio_extent_writepage(struct btrfs_bio *bbio)
"incomplete page write with offset %u and length %u",
bvec->bv_offset, bvec->bv_len);
- start = page_offset(page) + bvec->bv_offset;
- end = start + bvec->bv_len - 1;
-
- end_extent_writepage(page, error, start, end);
-
- btrfs_page_clear_writeback(fs_info, page, start, bvec->bv_len);
+ btrfs_finish_ordered_extent(bbio->ordered, page, start, len, !error);
+ if (error) {
+ btrfs_page_clear_uptodate(fs_info, page, start, len);
+ mapping_set_error(page->mapping, error);
+ }
+ btrfs_page_clear_writeback(fs_info, page, start, len);
}
bio_put(bio);
@@ -686,35 +648,6 @@ static void begin_page_read(struct btrfs_fs_info *fs_info, struct page *page)
}
/*
- * Find extent buffer for a givne bytenr.
- *
- * This is for end_bio_extent_readpage(), thus we can't do any unsafe locking
- * in endio context.
- */
-static struct extent_buffer *find_extent_buffer_readpage(
- struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)
-{
- struct extent_buffer *eb;
-
- /*
- * For regular sectorsize, we can use page->private to grab extent
- * buffer
- */
- if (fs_info->nodesize >= PAGE_SIZE) {
- ASSERT(PagePrivate(page) && page->private);
- return (struct extent_buffer *)page->private;
- }
-
- /* For subpage case, we need to lookup buffer radix tree */
- rcu_read_lock();
- eb = radix_tree_lookup(&fs_info->buffer_radix,
- bytenr >> fs_info->sectorsize_bits);
- rcu_read_unlock();
- ASSERT(eb);
- return eb;
-}
-
-/*
* after a readpage IO is done, we need to:
* clear the uptodate bits on error
* set the uptodate bits if things worked
@@ -735,7 +668,6 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
* larger than UINT_MAX, u32 here is enough.
*/
u32 bio_offset = 0;
- int mirror;
struct bvec_iter_all iter_all;
ASSERT(!bio_flagged(bio, BIO_CLONED));
@@ -775,11 +707,6 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
end = start + bvec->bv_len - 1;
len = bvec->bv_len;
- mirror = bbio->mirror_num;
- if (uptodate && !is_data_inode(inode) &&
- btrfs_validate_metadata_buffer(bbio, page, start, end, mirror))
- uptodate = false;
-
if (likely(uptodate)) {
loff_t i_size = i_size_read(inode);
pgoff_t end_index = i_size >> PAGE_SHIFT;
@@ -800,19 +727,12 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
zero_user_segment(page, zero_start,
offset_in_page(end) + 1);
}
- } else if (!is_data_inode(inode)) {
- struct extent_buffer *eb;
-
- eb = find_extent_buffer_readpage(fs_info, page, start);
- set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
- eb->read_mirror = mirror;
- atomic_dec(&eb->io_pages);
}
/* Update page status and unlock. */
end_page_read(page, uptodate, start, len);
endio_readpage_release_extent(&processed, BTRFS_I(inode),
- start, end, PageUptodate(page));
+ start, end, uptodate);
ASSERT(bio_offset + len > bio_offset);
bio_offset += len;
@@ -906,13 +826,8 @@ static void alloc_new_bio(struct btrfs_inode *inode,
bio_ctrl->bbio = bbio;
bio_ctrl->len_to_oe_boundary = U32_MAX;
- /*
- * Limit the extent to the ordered boundary for Zone Append.
- * Compressed bios aren't submitted directly, so it doesn't apply to
- * them.
- */
- if (bio_ctrl->compress_type == BTRFS_COMPRESS_NONE &&
- btrfs_use_zone_append(bbio)) {
+ /* Limit data write bios to the ordered boundary. */
+ if (bio_ctrl->wbc) {
struct btrfs_ordered_extent *ordered;
ordered = btrfs_lookup_ordered_extent(inode, file_offset);
@@ -920,11 +835,9 @@ static void alloc_new_bio(struct btrfs_inode *inode,
bio_ctrl->len_to_oe_boundary = min_t(u32, U32_MAX,
ordered->file_offset +
ordered->disk_num_bytes - file_offset);
- btrfs_put_ordered_extent(ordered);
+ bbio->ordered = ordered;
}
- }
- if (bio_ctrl->wbc) {
/*
* Pick the last added device to support cgroup writeback. For
* multi-device file systems this means blk-cgroup policies have
@@ -1125,7 +1038,6 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
ret = set_page_extent_mapped(page);
if (ret < 0) {
unlock_extent(tree, start, end, NULL);
- btrfs_page_set_error(fs_info, page, start, PAGE_SIZE);
unlock_page(page);
return ret;
}
@@ -1329,11 +1241,9 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
}
ret = btrfs_run_delalloc_range(inode, page, delalloc_start,
delalloc_end, &page_started, &nr_written, wbc);
- if (ret) {
- btrfs_page_set_error(inode->root->fs_info, page,
- page_offset(page), PAGE_SIZE);
+ if (ret)
return ret;
- }
+
/*
* delalloc_end is already one less than the total length, so
* we don't subtract one from PAGE_SIZE
@@ -1438,7 +1348,6 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
struct extent_map *em;
int ret = 0;
int nr = 0;
- bool compressed;
ret = btrfs_writepage_cow_fixup(page);
if (ret) {
@@ -1448,12 +1357,6 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
return 1;
}
- /*
- * we don't want to touch the inode after unlocking the page,
- * so we update the mapping writeback index now
- */
- bio_ctrl->wbc->nr_to_write--;
-
bio_ctrl->end_io_func = end_bio_extent_writepage;
while (cur <= end) {
u64 disk_bytenr;
@@ -1486,7 +1389,6 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
em = btrfs_get_extent(inode, NULL, 0, cur, end - cur + 1);
if (IS_ERR(em)) {
- btrfs_page_set_error(fs_info, page, cur, end - cur + 1);
ret = PTR_ERR_OR_ZERO(em);
goto out_error;
}
@@ -1497,10 +1399,14 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
ASSERT(cur < end);
ASSERT(IS_ALIGNED(em->start, fs_info->sectorsize));
ASSERT(IS_ALIGNED(em->len, fs_info->sectorsize));
+
block_start = em->block_start;
- compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
disk_bytenr = em->block_start + extent_offset;
+ ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
+ ASSERT(block_start != EXTENT_MAP_HOLE);
+ ASSERT(block_start != EXTENT_MAP_INLINE);
+
/*
* Note that em_end from extent_map_end() and dirty_range_end from
* find_next_dirty_byte() are all exclusive
@@ -1509,22 +1415,6 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
free_extent_map(em);
em = NULL;
- /*
- * compressed and inline extents are written through other
- * paths in the FS
- */
- if (compressed || block_start == EXTENT_MAP_HOLE ||
- block_start == EXTENT_MAP_INLINE) {
- if (compressed)
- nr++;
- else
- btrfs_writepage_endio_finish_ordered(inode,
- page, cur, cur + iosize - 1, true);
- btrfs_page_clear_dirty(fs_info, page, cur, iosize);
- cur += iosize;
- continue;
- }
-
btrfs_set_range_writeback(inode, cur, cur + iosize - 1);
if (!PageWriteback(page)) {
btrfs_err(inode->root->fs_info,
@@ -1572,7 +1462,6 @@ static int __extent_writepage(struct page *page, struct btrfs_bio_ctrl *bio_ctrl
{
struct folio *folio = page_folio(page);
struct inode *inode = page->mapping->host;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
const u64 page_start = page_offset(page);
const u64 page_end = page_start + PAGE_SIZE - 1;
int ret;
@@ -1585,9 +1474,6 @@ static int __extent_writepage(struct page *page, struct btrfs_bio_ctrl *bio_ctrl
WARN_ON(!PageLocked(page));
- btrfs_page_clear_error(btrfs_sb(inode->i_sb), page,
- page_offset(page), PAGE_SIZE);
-
pg_offset = offset_in_page(i_size);
if (page->index > end_index ||
(page->index == end_index && !pg_offset)) {
@@ -1600,77 +1486,30 @@ static int __extent_writepage(struct page *page, struct btrfs_bio_ctrl *bio_ctrl
memzero_page(page, pg_offset, PAGE_SIZE - pg_offset);
ret = set_page_extent_mapped(page);
- if (ret < 0) {
- SetPageError(page);
+ if (ret < 0)
goto done;
- }
- if (!bio_ctrl->extent_locked) {
- ret = writepage_delalloc(BTRFS_I(inode), page, bio_ctrl->wbc);
- if (ret == 1)
- return 0;
- if (ret)
- goto done;
- }
+ ret = writepage_delalloc(BTRFS_I(inode), page, bio_ctrl->wbc);
+ if (ret == 1)
+ return 0;
+ if (ret)
+ goto done;
ret = __extent_writepage_io(BTRFS_I(inode), page, bio_ctrl, i_size, &nr);
if (ret == 1)
return 0;
+ bio_ctrl->wbc->nr_to_write--;
+
done:
if (nr == 0) {
/* make sure the mapping tag for page dirty gets cleared */
set_page_writeback(page);
end_page_writeback(page);
}
- /*
- * Here we used to have a check for PageError() and then set @ret and
- * call end_extent_writepage().
- *
- * But in fact setting @ret here will cause different error paths
- * between subpage and regular sectorsize.
- *
- * For regular page size, we never submit current page, but only add
- * current page to current bio.
- * The bio submission can only happen in next page.
- * Thus if we hit the PageError() branch, @ret is already set to
- * non-zero value and will not get updated for regular sectorsize.
- *
- * But for subpage case, it's possible we submit part of current page,
- * thus can get PageError() set by submitted bio of the same page,
- * while our @ret is still 0.
- *
- * So here we unify the behavior and don't set @ret.
- * Error can still be properly passed to higher layer as page will
- * be set error, here we just don't handle the IO failure.
- *
- * NOTE: This is just a hotfix for subpage.
- * The root fix will be properly ending ordered extent when we hit
- * an error during writeback.
- *
- * But that needs a bigger refactoring, as we not only need to grab the
- * submitted OE, but also need to know exactly at which bytenr we hit
- * the error.
- * Currently the full page based __extent_writepage_io() is not
- * capable of that.
- */
- if (PageError(page))
+ if (ret)
end_extent_writepage(page, ret, page_start, page_end);
- if (bio_ctrl->extent_locked) {
- struct writeback_control *wbc = bio_ctrl->wbc;
-
- /*
- * If bio_ctrl->extent_locked, it's from extent_write_locked_range(),
- * the page can either be locked by lock_page() or
- * process_one_page().
- * Let btrfs_page_unlock_writer() handle both cases.
- */
- ASSERT(wbc);
- btrfs_page_unlock_writer(fs_info, page, wbc->range_start,
- wbc->range_end + 1 - wbc->range_start);
- } else {
- unlock_page(page);
- }
+ unlock_page(page);
ASSERT(ret <= 0);
return ret;
}
@@ -1681,52 +1520,26 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
TASK_UNINTERRUPTIBLE);
}
-static void end_extent_buffer_writeback(struct extent_buffer *eb)
-{
- clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
- smp_mb__after_atomic();
- wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
-}
-
/*
* Lock extent buffer status and pages for writeback.
*
- * May try to flush write bio if we can't get the lock.
- *
- * Return 0 if the extent buffer doesn't need to be submitted.
- * (E.g. the extent buffer is not dirty)
- * Return >0 is the extent buffer is submitted to bio.
- * Return <0 if something went wrong, no page is locked.
+ * Return %false if the extent buffer doesn't need to be submitted (e.g. the
+ * extent buffer is not dirty)
+ * Return %true is the extent buffer is submitted to bio.
*/
-static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb,
- struct btrfs_bio_ctrl *bio_ctrl)
+static noinline_for_stack bool lock_extent_buffer_for_io(struct extent_buffer *eb,
+ struct writeback_control *wbc)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
- int i, num_pages;
- int flush = 0;
- int ret = 0;
+ bool ret = false;
- if (!btrfs_try_tree_write_lock(eb)) {
- submit_write_bio(bio_ctrl, 0);
- flush = 1;
- btrfs_tree_lock(eb);
- }
-
- if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
+ btrfs_tree_lock(eb);
+ while (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
btrfs_tree_unlock(eb);
- if (bio_ctrl->wbc->sync_mode != WB_SYNC_ALL)
- return 0;
- if (!flush) {
- submit_write_bio(bio_ctrl, 0);
- flush = 1;
- }
- while (1) {
- wait_on_extent_buffer_writeback(eb);
- btrfs_tree_lock(eb);
- if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags))
- break;
- btrfs_tree_unlock(eb);
- }
+ if (wbc->sync_mode != WB_SYNC_ALL)
+ return false;
+ wait_on_extent_buffer_writeback(eb);
+ btrfs_tree_lock(eb);
}
/*
@@ -1742,45 +1555,19 @@ static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb
percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
-eb->len,
fs_info->dirty_metadata_batch);
- ret = 1;
+ ret = true;
} else {
spin_unlock(&eb->refs_lock);
}
-
btrfs_tree_unlock(eb);
-
- /*
- * Either we don't need to submit any tree block, or we're submitting
- * subpage eb.
- * Subpage metadata doesn't use page locking at all, so we can skip
- * the page locking.
- */
- if (!ret || fs_info->nodesize < PAGE_SIZE)
- return ret;
-
- num_pages = num_extent_pages(eb);
- for (i = 0; i < num_pages; i++) {
- struct page *p = eb->pages[i];
-
- if (!trylock_page(p)) {
- if (!flush) {
- submit_write_bio(bio_ctrl, 0);
- flush = 1;
- }
- lock_page(p);
- }
- }
-
return ret;
}
-static void set_btree_ioerr(struct page *page, struct extent_buffer *eb)
+static void set_btree_ioerr(struct extent_buffer *eb)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
- btrfs_page_set_error(fs_info, page, eb->start, eb->len);
- if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
- return;
+ set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
/*
* A read may stumble upon this buffer later, make sure that it gets an
@@ -1794,7 +1581,7 @@ static void set_btree_ioerr(struct page *page, struct extent_buffer *eb)
* return a 0 because we are readonly if we don't modify the err seq for
* the superblock.
*/
- mapping_set_error(page->mapping, -EIO);
+ mapping_set_error(eb->fs_info->btree_inode->i_mapping, -EIO);
/*
* If writeback for a btree extent that doesn't belong to a log tree
@@ -1869,101 +1656,34 @@ static struct extent_buffer *find_extent_buffer_nolock(
return NULL;
}
-/*
- * The endio function for subpage extent buffer write.
- *
- * Unlike end_bio_extent_buffer_writepage(), we only call end_page_writeback()
- * after all extent buffers in the page has finished their writeback.
- */
-static void end_bio_subpage_eb_writepage(struct btrfs_bio *bbio)
+static void extent_buffer_write_end_io(struct btrfs_bio *bbio)
{
- struct bio *bio = &bbio->bio;
- struct btrfs_fs_info *fs_info;
- struct bio_vec *bvec;
+ struct extent_buffer *eb = bbio->private;
+ struct btrfs_fs_info *fs_info = eb->fs_info;
+ bool uptodate = !bbio->bio.bi_status;
struct bvec_iter_all iter_all;
+ struct bio_vec *bvec;
+ u32 bio_offset = 0;
- fs_info = btrfs_sb(bio_first_page_all(bio)->mapping->host->i_sb);
- ASSERT(fs_info->nodesize < PAGE_SIZE);
+ if (!uptodate)
+ set_btree_ioerr(eb);
- ASSERT(!bio_flagged(bio, BIO_CLONED));
- bio_for_each_segment_all(bvec, bio, iter_all) {
+ bio_for_each_segment_all(bvec, &bbio->bio, iter_all) {
+ u64 start = eb->start + bio_offset;
struct page *page = bvec->bv_page;
- u64 bvec_start = page_offset(page) + bvec->bv_offset;
- u64 bvec_end = bvec_start + bvec->bv_len - 1;
- u64 cur_bytenr = bvec_start;
-
- ASSERT(IS_ALIGNED(bvec->bv_len, fs_info->nodesize));
-
- /* Iterate through all extent buffers in the range */
- while (cur_bytenr <= bvec_end) {
- struct extent_buffer *eb;
- int done;
-
- /*
- * Here we can't use find_extent_buffer(), as it may
- * try to lock eb->refs_lock, which is not safe in endio
- * context.
- */
- eb = find_extent_buffer_nolock(fs_info, cur_bytenr);
- ASSERT(eb);
-
- cur_bytenr = eb->start + eb->len;
-
- ASSERT(test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags));
- done = atomic_dec_and_test(&eb->io_pages);
- ASSERT(done);
-
- if (bio->bi_status ||
- test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
- ClearPageUptodate(page);
- set_btree_ioerr(page, eb);
- }
+ u32 len = bvec->bv_len;
- btrfs_subpage_clear_writeback(fs_info, page, eb->start,
- eb->len);
- end_extent_buffer_writeback(eb);
- /*
- * free_extent_buffer() will grab spinlock which is not
- * safe in endio context. Thus here we manually dec
- * the ref.
- */
- atomic_dec(&eb->refs);
- }
+ if (!uptodate)
+ btrfs_page_clear_uptodate(fs_info, page, start, len);
+ btrfs_page_clear_writeback(fs_info, page, start, len);
+ bio_offset += len;
}
- bio_put(bio);
-}
-static void end_bio_extent_buffer_writepage(struct btrfs_bio *bbio)
-{
- struct bio *bio = &bbio->bio;
- struct bio_vec *bvec;
- struct extent_buffer *eb;
- int done;
- struct bvec_iter_all iter_all;
-
- ASSERT(!bio_flagged(bio, BIO_CLONED));
- bio_for_each_segment_all(bvec, bio, iter_all) {
- struct page *page = bvec->bv_page;
-
- eb = (struct extent_buffer *)page->private;
- BUG_ON(!eb);
- done = atomic_dec_and_test(&eb->io_pages);
-
- if (bio->bi_status ||
- test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
- ClearPageUptodate(page);
- set_btree_ioerr(page, eb);
- }
-
- end_page_writeback(page);
-
- if (!done)
- continue;
-
- end_extent_buffer_writeback(eb);
- }
+ clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
+ smp_mb__after_atomic();
+ wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
- bio_put(bio);
+ bio_put(&bbio->bio);
}
static void prepare_eb_write(struct extent_buffer *eb)
@@ -1973,7 +1693,6 @@ static void prepare_eb_write(struct extent_buffer *eb)
unsigned long end;
clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
- atomic_set(&eb->io_pages, num_extent_pages(eb));
/* Set btree blocks beyond nritems with 0 to avoid stale content */
nritems = btrfs_header_nritems(eb);
@@ -1995,63 +1714,49 @@ static void prepare_eb_write(struct extent_buffer *eb)
}
}
-/*
- * Unlike the work in write_one_eb(), we rely completely on extent locking.
- * Page locking is only utilized at minimum to keep the VMM code happy.
- */
-static void write_one_subpage_eb(struct extent_buffer *eb,
- struct btrfs_bio_ctrl *bio_ctrl)
-{
- struct btrfs_fs_info *fs_info = eb->fs_info;
- struct page *page = eb->pages[0];
- bool no_dirty_ebs = false;
-
- prepare_eb_write(eb);
-
- /* clear_page_dirty_for_io() in subpage helper needs page locked */
- lock_page(page);
- btrfs_subpage_set_writeback(fs_info, page, eb->start, eb->len);
-
- /* Check if this is the last dirty bit to update nr_written */
- no_dirty_ebs = btrfs_subpage_clear_and_test_dirty(fs_info, page,
- eb->start, eb->len);
- if (no_dirty_ebs)
- clear_page_dirty_for_io(page);
-
- bio_ctrl->end_io_func = end_bio_subpage_eb_writepage;
-
- submit_extent_page(bio_ctrl, eb->start, page, eb->len,
- eb->start - page_offset(page));
- unlock_page(page);
- /*
- * Submission finished without problem, if no range of the page is
- * dirty anymore, we have submitted a page. Update nr_written in wbc.
- */
- if (no_dirty_ebs)
- bio_ctrl->wbc->nr_to_write--;
-}
-
static noinline_for_stack void write_one_eb(struct extent_buffer *eb,
- struct btrfs_bio_ctrl *bio_ctrl)
+ struct writeback_control *wbc)
{
- u64 disk_bytenr = eb->start;
- int i, num_pages;
+ struct btrfs_fs_info *fs_info = eb->fs_info;
+ struct btrfs_bio *bbio;
prepare_eb_write(eb);
- bio_ctrl->end_io_func = end_bio_extent_buffer_writepage;
-
- num_pages = num_extent_pages(eb);
- for (i = 0; i < num_pages; i++) {
- struct page *p = eb->pages[i];
-
- clear_page_dirty_for_io(p);
- set_page_writeback(p);
- submit_extent_page(bio_ctrl, disk_bytenr, p, PAGE_SIZE, 0);
- disk_bytenr += PAGE_SIZE;
- bio_ctrl->wbc->nr_to_write--;
+ bbio = btrfs_bio_alloc(INLINE_EXTENT_BUFFER_PAGES,
+ REQ_OP_WRITE | REQ_META | wbc_to_write_flags(wbc),
+ eb->fs_info, extent_buffer_write_end_io, eb);
+ bbio->bio.bi_iter.bi_sector = eb->start >> SECTOR_SHIFT;
+ bio_set_dev(&bbio->bio, fs_info->fs_devices->latest_dev->bdev);
+ wbc_init_bio(wbc, &bbio->bio);
+ bbio->inode = BTRFS_I(eb->fs_info->btree_inode);
+ bbio->file_offset = eb->start;
+ if (fs_info->nodesize < PAGE_SIZE) {
+ struct page *p = eb->pages[0];
+
+ lock_page(p);
+ btrfs_subpage_set_writeback(fs_info, p, eb->start, eb->len);
+ if (btrfs_subpage_clear_and_test_dirty(fs_info, p, eb->start,
+ eb->len)) {
+ clear_page_dirty_for_io(p);
+ wbc->nr_to_write--;
+ }
+ __bio_add_page(&bbio->bio, p, eb->len, eb->start - page_offset(p));
+ wbc_account_cgroup_owner(wbc, p, eb->len);
unlock_page(p);
+ } else {
+ for (int i = 0; i < num_extent_pages(eb); i++) {
+ struct page *p = eb->pages[i];
+
+ lock_page(p);
+ clear_page_dirty_for_io(p);
+ set_page_writeback(p);
+ __bio_add_page(&bbio->bio, p, PAGE_SIZE, 0);
+ wbc_account_cgroup_owner(wbc, p, PAGE_SIZE);
+ wbc->nr_to_write--;
+ unlock_page(p);
+ }
}
+ btrfs_submit_bio(bbio, 0);
}
/*
@@ -2068,14 +1773,13 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb,
* Return >=0 for the number of submitted extent buffers.
* Return <0 for fatal error.
*/
-static int submit_eb_subpage(struct page *page, struct btrfs_bio_ctrl *bio_ctrl)
+static int submit_eb_subpage(struct page *page, struct writeback_control *wbc)
{
struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
int submitted = 0;
u64 page_start = page_offset(page);
int bit_start = 0;
int sectors_per_node = fs_info->nodesize >> fs_info->sectorsize_bits;
- int ret;
/* Lock and write each dirty extent buffers in the range */
while (bit_start < fs_info->subpage_info->bitmap_nr_bits) {
@@ -2121,25 +1825,13 @@ static int submit_eb_subpage(struct page *page, struct btrfs_bio_ctrl *bio_ctrl)
if (!eb)
continue;
- ret = lock_extent_buffer_for_io(eb, bio_ctrl);
- if (ret == 0) {
- free_extent_buffer(eb);
- continue;
+ if (lock_extent_buffer_for_io(eb, wbc)) {
+ write_one_eb(eb, wbc);
+ submitted++;
}
- if (ret < 0) {
- free_extent_buffer(eb);
- goto cleanup;
- }
- write_one_subpage_eb(eb, bio_ctrl);
free_extent_buffer(eb);
- submitted++;
}
return submitted;
-
-cleanup:
- /* We hit error, end bio for the submitted extent buffers */
- submit_write_bio(bio_ctrl, ret);
- return ret;
}
/*
@@ -2162,7 +1854,7 @@ cleanup:
* previous call.
* Return <0 for fatal error.
*/
-static int submit_eb_page(struct page *page, struct btrfs_bio_ctrl *bio_ctrl,
+static int submit_eb_page(struct page *page, struct writeback_control *wbc,
struct extent_buffer **eb_context)
{
struct address_space *mapping = page->mapping;
@@ -2174,7 +1866,7 @@ static int submit_eb_page(struct page *page, struct btrfs_bio_ctrl *bio_ctrl,
return 0;
if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE)
- return submit_eb_subpage(page, bio_ctrl);
+ return submit_eb_subpage(page, wbc);
spin_lock(&mapping->private_lock);
if (!PagePrivate(page)) {
@@ -2207,8 +1899,7 @@ static int submit_eb_page(struct page *page, struct btrfs_bio_ctrl *bio_ctrl,
* If for_sync, this hole will be filled with
* trasnsaction commit.
*/
- if (bio_ctrl->wbc->sync_mode == WB_SYNC_ALL &&
- !bio_ctrl->wbc->for_sync)
+ if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)
ret = -EAGAIN;
else
ret = 0;
@@ -2218,13 +1909,12 @@ static int submit_eb_page(struct page *page, struct btrfs_bio_ctrl *bio_ctrl,
*eb_context = eb;
- ret = lock_extent_buffer_for_io(eb, bio_ctrl);
- if (ret <= 0) {
+ if (!lock_extent_buffer_for_io(eb, wbc)) {
btrfs_revert_meta_write_pointer(cache, eb);
if (cache)
btrfs_put_block_group(cache);
free_extent_buffer(eb);
- return ret;
+ return 0;
}
if (cache) {
/*
@@ -2233,7 +1923,7 @@ static int submit_eb_page(struct page *page, struct btrfs_bio_ctrl *bio_ctrl,
btrfs_schedule_zone_finish_bg(cache, eb);
btrfs_put_block_group(cache);
}
- write_one_eb(eb, bio_ctrl);
+ write_one_eb(eb, wbc);
free_extent_buffer(eb);
return 1;
}
@@ -2242,11 +1932,6 @@ int btree_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct extent_buffer *eb_context = NULL;
- struct btrfs_bio_ctrl bio_ctrl = {
- .wbc = wbc,
- .opf = REQ_OP_WRITE | wbc_to_write_flags(wbc),
- .extent_locked = 0,
- };
struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
int ret = 0;
int done = 0;
@@ -2288,7 +1973,7 @@ retry:
for (i = 0; i < nr_folios; i++) {
struct folio *folio = fbatch.folios[i];
- ret = submit_eb_page(&folio->page, &bio_ctrl, &eb_context);
+ ret = submit_eb_page(&folio->page, wbc, &eb_context);
if (ret == 0)
continue;
if (ret < 0) {
@@ -2349,8 +2034,6 @@ retry:
ret = 0;
if (!ret && BTRFS_FS_ERROR(fs_info))
ret = -EROFS;
- submit_write_bio(&bio_ctrl, ret);
-
btrfs_zoned_meta_io_unlock(fs_info);
return ret;
}
@@ -2520,38 +2203,31 @@ retry:
* already been ran (aka, ordered extent inserted) and all pages are still
* locked.
*/
-int extent_write_locked_range(struct inode *inode, u64 start, u64 end)
+int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
+ struct writeback_control *wbc)
{
bool found_error = false;
int first_error = 0;
int ret = 0;
struct address_space *mapping = inode->i_mapping;
- struct page *page;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ const u32 sectorsize = fs_info->sectorsize;
+ loff_t i_size = i_size_read(inode);
u64 cur = start;
- unsigned long nr_pages;
- const u32 sectorsize = btrfs_sb(inode->i_sb)->sectorsize;
- struct writeback_control wbc_writepages = {
- .sync_mode = WB_SYNC_ALL,
- .range_start = start,
- .range_end = end + 1,
- .no_cgroup_owner = 1,
- };
struct btrfs_bio_ctrl bio_ctrl = {
- .wbc = &wbc_writepages,
- /* We're called from an async helper function */
- .opf = REQ_OP_WRITE | REQ_BTRFS_CGROUP_PUNT |
- wbc_to_write_flags(&wbc_writepages),
- .extent_locked = 1,
+ .wbc = wbc,
+ .opf = REQ_OP_WRITE | wbc_to_write_flags(wbc),
};
+ if (wbc->no_cgroup_owner)
+ bio_ctrl.opf |= REQ_BTRFS_CGROUP_PUNT;
+
ASSERT(IS_ALIGNED(start, sectorsize) && IS_ALIGNED(end + 1, sectorsize));
- nr_pages = (round_up(end, PAGE_SIZE) - round_down(start, PAGE_SIZE)) >>
- PAGE_SHIFT;
- wbc_writepages.nr_to_write = nr_pages * 2;
- wbc_attach_fdatawrite_inode(&wbc_writepages, inode);
while (cur <= end) {
u64 cur_end = min(round_down(cur, PAGE_SIZE) + PAGE_SIZE - 1, end);
+ struct page *page;
+ int nr = 0;
page = find_get_page(mapping, cur >> PAGE_SHIFT);
/*
@@ -2562,19 +2238,31 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end)
ASSERT(PageLocked(page));
ASSERT(PageDirty(page));
clear_page_dirty_for_io(page);
- ret = __extent_writepage(page, &bio_ctrl);
- ASSERT(ret <= 0);
+
+ ret = __extent_writepage_io(BTRFS_I(inode), page, &bio_ctrl,
+ i_size, &nr);
+ if (ret == 1)
+ goto next_page;
+
+ /* Make sure the mapping tag for page dirty gets cleared. */
+ if (nr == 0) {
+ set_page_writeback(page);
+ end_page_writeback(page);
+ }
+ if (ret)
+ end_extent_writepage(page, ret, cur, cur_end);
+ btrfs_page_unlock_writer(fs_info, page, cur, cur_end + 1 - cur);
if (ret < 0) {
found_error = true;
first_error = ret;
}
+next_page:
put_page(page);
cur = cur_end + 1;
}
submit_write_bio(&bio_ctrl, found_error ? ret : 0);
- wbc_detach_inode(&wbc_writepages);
if (found_error)
return first_error;
return ret;
@@ -2588,7 +2276,6 @@ int extent_writepages(struct address_space *mapping,
struct btrfs_bio_ctrl bio_ctrl = {
.wbc = wbc,
.opf = REQ_OP_WRITE | wbc_to_write_flags(wbc),
- .extent_locked = 0,
};
/*
@@ -2679,8 +2366,7 @@ static int try_release_extent_state(struct extent_io_tree *tree,
* The delalloc new bit will be cleared by ordered extent
* completion.
*/
- ret = __clear_extent_bit(tree, start, end, clear_bits, NULL,
- mask, NULL);
+ ret = __clear_extent_bit(tree, start, end, clear_bits, NULL, NULL);
/* if clear_extent_bit failed for enomem reasons,
* we can't allow the release to continue.
@@ -3421,10 +3107,9 @@ static void __free_extent_buffer(struct extent_buffer *eb)
kmem_cache_free(extent_buffer_cache, eb);
}
-int extent_buffer_under_io(const struct extent_buffer *eb)
+static int extent_buffer_under_io(const struct extent_buffer *eb)
{
- return (atomic_read(&eb->io_pages) ||
- test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
+ return (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
}
@@ -3557,11 +3242,9 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
init_rwsem(&eb->lock);
btrfs_leak_debug_add_eb(eb);
- INIT_LIST_HEAD(&eb->release_list);
spin_lock_init(&eb->refs_lock);
atomic_set(&eb->refs, 1);
- atomic_set(&eb->io_pages, 0);
ASSERT(len <= BTRFS_MAX_METADATA_BLOCKSIZE);
@@ -3678,9 +3361,9 @@ static void check_buffer_tree_ref(struct extent_buffer *eb)
* adequately protected by the refcount, but the TREE_REF bit and
* its corresponding reference are not. To protect against this
* class of races, we call check_buffer_tree_ref from the codepaths
- * which trigger io after they set eb->io_pages. Note that once io is
- * initiated, TREE_REF can no longer be cleared, so that is the
- * moment at which any such race is best fixed.
+ * which trigger io. Note that once io is initiated, TREE_REF can no
+ * longer be cleared, so that is the moment at which any such race is
+ * best fixed.
*/
refs = atomic_read(&eb->refs);
if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
@@ -3939,7 +3622,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
WARN_ON(btrfs_page_test_dirty(fs_info, p, eb->start, eb->len));
eb->pages[i] = p;
- if (!PageUptodate(p))
+ if (!btrfs_page_test_uptodate(fs_info, p, eb->start, eb->len))
uptodate = 0;
/*
@@ -4142,13 +3825,12 @@ void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
continue;
lock_page(page);
btree_clear_page_dirty(page);
- ClearPageError(page);
unlock_page(page);
}
WARN_ON(atomic_read(&eb->refs) == 0);
}
-bool set_extent_buffer_dirty(struct extent_buffer *eb)
+void set_extent_buffer_dirty(struct extent_buffer *eb)
{
int i;
int num_pages;
@@ -4183,13 +3865,14 @@ bool set_extent_buffer_dirty(struct extent_buffer *eb)
eb->start, eb->len);
if (subpage)
unlock_page(eb->pages[0]);
+ percpu_counter_add_batch(&eb->fs_info->dirty_metadata_bytes,
+ eb->len,
+ eb->fs_info->dirty_metadata_batch);
}
#ifdef CONFIG_BTRFS_DEBUG
for (i = 0; i < num_pages; i++)
ASSERT(PageDirty(eb->pages[i]));
#endif
-
- return was_dirty;
}
void clear_extent_buffer_uptodate(struct extent_buffer *eb)
@@ -4242,84 +3925,54 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
}
}
-static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
- int mirror_num,
- struct btrfs_tree_parent_check *check)
+static void extent_buffer_read_end_io(struct btrfs_bio *bbio)
{
+ struct extent_buffer *eb = bbio->private;
struct btrfs_fs_info *fs_info = eb->fs_info;
- struct extent_io_tree *io_tree;
- struct page *page = eb->pages[0];
- struct extent_state *cached_state = NULL;
- struct btrfs_bio_ctrl bio_ctrl = {
- .opf = REQ_OP_READ,
- .mirror_num = mirror_num,
- .parent_check = check,
- };
- int ret;
+ bool uptodate = !bbio->bio.bi_status;
+ struct bvec_iter_all iter_all;
+ struct bio_vec *bvec;
+ u32 bio_offset = 0;
- ASSERT(!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags));
- ASSERT(PagePrivate(page));
- ASSERT(check);
- io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
+ eb->read_mirror = bbio->mirror_num;
- if (wait == WAIT_NONE) {
- if (!try_lock_extent(io_tree, eb->start, eb->start + eb->len - 1,
- &cached_state))
- return -EAGAIN;
- } else {
- ret = lock_extent(io_tree, eb->start, eb->start + eb->len - 1,
- &cached_state);
- if (ret < 0)
- return ret;
- }
+ if (uptodate &&
+ btrfs_validate_extent_buffer(eb, &bbio->parent_check) < 0)
+ uptodate = false;
- if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags) ||
- PageUptodate(page) ||
- btrfs_subpage_test_uptodate(fs_info, page, eb->start, eb->len)) {
- set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
- unlock_extent(io_tree, eb->start, eb->start + eb->len - 1,
- &cached_state);
- return 0;
+ if (uptodate) {
+ set_extent_buffer_uptodate(eb);
+ } else {
+ clear_extent_buffer_uptodate(eb);
+ set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
}
- clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
- eb->read_mirror = 0;
- atomic_set(&eb->io_pages, 1);
- check_buffer_tree_ref(eb);
- bio_ctrl.end_io_func = end_bio_extent_readpage;
+ bio_for_each_segment_all(bvec, &bbio->bio, iter_all) {
+ u64 start = eb->start + bio_offset;
+ struct page *page = bvec->bv_page;
+ u32 len = bvec->bv_len;
- btrfs_subpage_clear_error(fs_info, page, eb->start, eb->len);
+ if (uptodate)
+ btrfs_page_set_uptodate(fs_info, page, start, len);
+ else
+ btrfs_page_clear_uptodate(fs_info, page, start, len);
- btrfs_subpage_start_reader(fs_info, page, eb->start, eb->len);
- submit_extent_page(&bio_ctrl, eb->start, page, eb->len,
- eb->start - page_offset(page));
- submit_one_bio(&bio_ctrl);
- if (wait != WAIT_COMPLETE) {
- free_extent_state(cached_state);
- return 0;
+ bio_offset += len;
}
- wait_extent_bit(io_tree, eb->start, eb->start + eb->len - 1,
- EXTENT_LOCKED, &cached_state);
- if (!test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
- return -EIO;
- return 0;
+ clear_bit(EXTENT_BUFFER_READING, &eb->bflags);
+ smp_mb__after_atomic();
+ wake_up_bit(&eb->bflags, EXTENT_BUFFER_READING);
+ free_extent_buffer(eb);
+
+ bio_put(&bbio->bio);
}
int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
struct btrfs_tree_parent_check *check)
{
- int i;
- struct page *page;
- int locked_pages = 0;
- int all_uptodate = 1;
- int num_pages;
- unsigned long num_reads = 0;
- struct btrfs_bio_ctrl bio_ctrl = {
- .opf = REQ_OP_READ,
- .mirror_num = mirror_num,
- .parent_check = check,
- };
+ int num_pages = num_extent_pages(eb), i;
+ struct btrfs_bio *bbio;
if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
return 0;
@@ -4332,87 +3985,39 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
if (unlikely(test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)))
return -EIO;
- if (eb->fs_info->nodesize < PAGE_SIZE)
- return read_extent_buffer_subpage(eb, wait, mirror_num, check);
-
- num_pages = num_extent_pages(eb);
- for (i = 0; i < num_pages; i++) {
- page = eb->pages[i];
- if (wait == WAIT_NONE) {
- /*
- * WAIT_NONE is only utilized by readahead. If we can't
- * acquire the lock atomically it means either the eb
- * is being read out or under modification.
- * Either way the eb will be or has been cached,
- * readahead can exit safely.
- */
- if (!trylock_page(page))
- goto unlock_exit;
- } else {
- lock_page(page);
- }
- locked_pages++;
- }
- /*
- * We need to firstly lock all pages to make sure that
- * the uptodate bit of our pages won't be affected by
- * clear_extent_buffer_uptodate().
- */
- for (i = 0; i < num_pages; i++) {
- page = eb->pages[i];
- if (!PageUptodate(page)) {
- num_reads++;
- all_uptodate = 0;
- }
- }
-
- if (all_uptodate) {
- set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
- goto unlock_exit;
- }
+ /* Someone else is already reading the buffer, just wait for it. */
+ if (test_and_set_bit(EXTENT_BUFFER_READING, &eb->bflags))
+ goto done;
clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
eb->read_mirror = 0;
- atomic_set(&eb->io_pages, num_reads);
- /*
- * It is possible for release_folio to clear the TREE_REF bit before we
- * set io_pages. See check_buffer_tree_ref for a more detailed comment.
- */
check_buffer_tree_ref(eb);
- bio_ctrl.end_io_func = end_bio_extent_readpage;
- for (i = 0; i < num_pages; i++) {
- page = eb->pages[i];
-
- if (!PageUptodate(page)) {
- ClearPageError(page);
- submit_extent_page(&bio_ctrl, page_offset(page), page,
- PAGE_SIZE, 0);
- } else {
- unlock_page(page);
- }
+ atomic_inc(&eb->refs);
+
+ bbio = btrfs_bio_alloc(INLINE_EXTENT_BUFFER_PAGES,
+ REQ_OP_READ | REQ_META, eb->fs_info,
+ extent_buffer_read_end_io, eb);
+ bbio->bio.bi_iter.bi_sector = eb->start >> SECTOR_SHIFT;
+ bbio->inode = BTRFS_I(eb->fs_info->btree_inode);
+ bbio->file_offset = eb->start;
+ memcpy(&bbio->parent_check, check, sizeof(*check));
+ if (eb->fs_info->nodesize < PAGE_SIZE) {
+ __bio_add_page(&bbio->bio, eb->pages[0], eb->len,
+ eb->start - page_offset(eb->pages[0]));
+ } else {
+ for (i = 0; i < num_pages; i++)
+ __bio_add_page(&bbio->bio, eb->pages[i], PAGE_SIZE, 0);
}
+ btrfs_submit_bio(bbio, mirror_num);
- submit_one_bio(&bio_ctrl);
-
- if (wait != WAIT_COMPLETE)
- return 0;
-
- for (i = 0; i < num_pages; i++) {
- page = eb->pages[i];
- wait_on_page_locked(page);
- if (!PageUptodate(page))
+done:
+ if (wait == WAIT_COMPLETE) {
+ wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_READING, TASK_UNINTERRUPTIBLE);
+ if (!test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
return -EIO;
}
return 0;
-
-unlock_exit:
- while (locked_pages > 0) {
- locked_pages--;
- page = eb->pages[locked_pages];
- unlock_page(page);
- }
- return 0;
}
static bool report_eb_range(const struct extent_buffer *eb, unsigned long start,
@@ -4561,18 +4166,17 @@ static void assert_eb_page_uptodate(const struct extent_buffer *eb,
* looked up. We don't want to complain in this case, as the page was
* valid before, we just didn't write it out. Instead we want to catch
* the case where we didn't actually read the block properly, which
- * would have !PageUptodate && !PageError, as we clear PageError before
- * reading.
+ * would have !PageUptodate and !EXTENT_BUFFER_WRITE_ERR.
*/
- if (fs_info->nodesize < PAGE_SIZE) {
- bool uptodate, error;
+ if (test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
+ return;
- uptodate = btrfs_subpage_test_uptodate(fs_info, page,
- eb->start, eb->len);
- error = btrfs_subpage_test_error(fs_info, page, eb->start, eb->len);
- WARN_ON(!uptodate && !error);
+ if (fs_info->nodesize < PAGE_SIZE) {
+ if (WARN_ON(!btrfs_subpage_test_uptodate(fs_info, page,
+ eb->start, eb->len)))
+ btrfs_subpage_dump_bitmap(fs_info, page, eb->start, eb->len);
} else {
- WARN_ON(!PageUptodate(page) && !PageError(page));
+ WARN_ON(!PageUptodate(page));
}
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 4341ad978fb8..c5fae3a7d911 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -29,6 +29,8 @@ enum {
/* write IO error */
EXTENT_BUFFER_WRITE_ERR,
EXTENT_BUFFER_NO_CHECK,
+ /* Indicate that extent buffer pages a being read */
+ EXTENT_BUFFER_READING,
};
/* these are flags for __process_pages_contig */
@@ -38,7 +40,6 @@ enum {
ENUM_BIT(PAGE_START_WRITEBACK),
ENUM_BIT(PAGE_END_WRITEBACK),
ENUM_BIT(PAGE_SET_ORDERED),
- ENUM_BIT(PAGE_SET_ERROR),
ENUM_BIT(PAGE_LOCK),
};
@@ -79,7 +80,6 @@ struct extent_buffer {
struct btrfs_fs_info *fs_info;
spinlock_t refs_lock;
atomic_t refs;
- atomic_t io_pages;
int read_mirror;
struct rcu_head rcu_head;
pid_t lock_owner;
@@ -89,7 +89,6 @@ struct extent_buffer {
struct rw_semaphore lock;
struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
- struct list_head release_list;
#ifdef CONFIG_BTRFS_DEBUG
struct list_head leak_list;
#endif
@@ -179,7 +178,8 @@ int try_release_extent_mapping(struct page *page, gfp_t mask);
int try_release_extent_buffer(struct page *page);
int btrfs_read_folio(struct file *file, struct folio *folio);
-int extent_write_locked_range(struct inode *inode, u64 start, u64 end);
+int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
+ struct writeback_control *wbc);
int extent_writepages(struct address_space *mapping,
struct writeback_control *wbc);
int btree_write_cache_pages(struct address_space *mapping,
@@ -262,10 +262,9 @@ void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long star
void extent_buffer_bitmap_clear(const struct extent_buffer *eb,
unsigned long start, unsigned long pos,
unsigned long len);
-bool set_extent_buffer_dirty(struct extent_buffer *eb);
+void set_extent_buffer_dirty(struct extent_buffer *eb);
void set_extent_buffer_uptodate(struct extent_buffer *eb);
void clear_extent_buffer_uptodate(struct extent_buffer *eb);
-int extent_buffer_under_io(const struct extent_buffer *eb);
void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 138afa955370..0cdb3e86f29b 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -364,8 +364,9 @@ static void extent_map_device_set_bits(struct extent_map *em, unsigned bits)
struct btrfs_io_stripe *stripe = &map->stripes[i];
struct btrfs_device *device = stripe->dev;
- set_extent_bits_nowait(&device->alloc_state, stripe->physical,
- stripe->physical + stripe_size - 1, bits);
+ set_extent_bit(&device->alloc_state, stripe->physical,
+ stripe->physical + stripe_size - 1,
+ bits | EXTENT_NOWAIT, NULL);
}
}
@@ -380,8 +381,9 @@ static void extent_map_device_clear_bits(struct extent_map *em, unsigned bits)
struct btrfs_device *device = stripe->dev;
__clear_extent_bit(&device->alloc_state, stripe->physical,
- stripe->physical + stripe_size - 1, bits,
- NULL, GFP_NOWAIT, NULL);
+ stripe->physical + stripe_size - 1,
+ bits | EXTENT_NOWAIT,
+ NULL, NULL);
}
}
@@ -502,10 +504,10 @@ void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
RB_CLEAR_NODE(&em->rb_node);
}
-void replace_extent_mapping(struct extent_map_tree *tree,
- struct extent_map *cur,
- struct extent_map *new,
- int modified)
+static void replace_extent_mapping(struct extent_map_tree *tree,
+ struct extent_map *cur,
+ struct extent_map *new,
+ int modified)
{
lockdep_assert_held_write(&tree->lock);
@@ -959,3 +961,95 @@ int btrfs_replace_extent_map_range(struct btrfs_inode *inode,
return ret;
}
+
+/*
+ * Split off the first pre bytes from the extent_map at [start, start + len],
+ * and set the block_start for it to new_logical.
+ *
+ * This function is used when an ordered_extent needs to be split.
+ */
+int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
+ u64 new_logical)
+{
+ struct extent_map_tree *em_tree = &inode->extent_tree;
+ struct extent_map *em;
+ struct extent_map *split_pre = NULL;
+ struct extent_map *split_mid = NULL;
+ int ret = 0;
+ unsigned long flags;
+
+ ASSERT(pre != 0);
+ ASSERT(pre < len);
+
+ split_pre = alloc_extent_map();
+ if (!split_pre)
+ return -ENOMEM;
+ split_mid = alloc_extent_map();
+ if (!split_mid) {
+ ret = -ENOMEM;
+ goto out_free_pre;
+ }
+
+ lock_extent(&inode->io_tree, start, start + len - 1, NULL);
+ write_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, start, len);
+ if (!em) {
+ ret = -EIO;
+ goto out_unlock;
+ }
+
+ ASSERT(em->len == len);
+ ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
+ ASSERT(em->block_start < EXTENT_MAP_LAST_BYTE);
+ ASSERT(test_bit(EXTENT_FLAG_PINNED, &em->flags));
+ ASSERT(!test_bit(EXTENT_FLAG_LOGGING, &em->flags));
+ ASSERT(!list_empty(&em->list));
+
+ flags = em->flags;
+ clear_bit(EXTENT_FLAG_PINNED, &em->flags);
+
+ /* First, replace the em with a new extent_map starting from * em->start */
+ split_pre->start = em->start;
+ split_pre->len = pre;
+ split_pre->orig_start = split_pre->start;
+ split_pre->block_start = new_logical;
+ split_pre->block_len = split_pre->len;
+ split_pre->orig_block_len = split_pre->block_len;
+ split_pre->ram_bytes = split_pre->len;
+ split_pre->flags = flags;
+ split_pre->compress_type = em->compress_type;
+ split_pre->generation = em->generation;
+
+ replace_extent_mapping(em_tree, em, split_pre, 1);
+
+ /*
+ * Now we only have an extent_map at:
+ * [em->start, em->start + pre]
+ */
+
+ /* Insert the middle extent_map. */
+ split_mid->start = em->start + pre;
+ split_mid->len = em->len - pre;
+ split_mid->orig_start = split_mid->start;
+ split_mid->block_start = em->block_start + pre;
+ split_mid->block_len = split_mid->len;
+ split_mid->orig_block_len = split_mid->block_len;
+ split_mid->ram_bytes = split_mid->len;
+ split_mid->flags = flags;
+ split_mid->compress_type = em->compress_type;
+ split_mid->generation = em->generation;
+ add_extent_mapping(em_tree, split_mid, 1);
+
+ /* Once for us */
+ free_extent_map(em);
+ /* Once for the tree */
+ free_extent_map(em);
+
+out_unlock:
+ write_unlock(&em_tree->lock);
+ unlock_extent(&inode->io_tree, start, start + len - 1, NULL);
+ free_extent_map(split_mid);
+out_free_pre:
+ free_extent_map(split_pre);
+ return ret;
+}
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index ad311864272a..35d27c756e08 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -90,10 +90,8 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
int add_extent_mapping(struct extent_map_tree *tree,
struct extent_map *em, int modified);
void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em);
-void replace_extent_mapping(struct extent_map_tree *tree,
- struct extent_map *cur,
- struct extent_map *new,
- int modified);
+int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
+ u64 new_logical);
struct extent_map *alloc_extent_map(void);
void free_extent_map(struct extent_map *em);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index cd4cce9ba443..696bf695d8eb 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -94,8 +94,8 @@ int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES))
return 0;
- return set_extent_bits(&inode->file_extent_tree, start, start + len - 1,
- EXTENT_DIRTY);
+ return set_extent_bit(&inode->file_extent_tree, start, start + len - 1,
+ EXTENT_DIRTY, NULL);
}
/*
@@ -438,9 +438,9 @@ blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
BTRFS_DATA_RELOC_TREE_OBJECTID) {
u64 file_offset = bbio->file_offset + bio_offset;
- set_extent_bits(&inode->io_tree, file_offset,
- file_offset + sectorsize - 1,
- EXTENT_NODATASUM);
+ set_extent_bit(&inode->io_tree, file_offset,
+ file_offset + sectorsize - 1,
+ EXTENT_NODATASUM, NULL);
} else {
btrfs_warn_rl(fs_info,
"csum hole found for disk bytenr range [%llu, %llu)",
@@ -560,8 +560,8 @@ int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end,
goto fail;
}
- sums->bytenr = start;
- sums->len = (int)size;
+ sums->logical = start;
+ sums->len = size;
offset = bytes_to_csum_size(fs_info, start - key.offset);
@@ -721,20 +721,17 @@ fail:
*/
blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio)
{
+ struct btrfs_ordered_extent *ordered = bbio->ordered;
struct btrfs_inode *inode = bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
struct bio *bio = &bbio->bio;
- u64 offset = bbio->file_offset;
struct btrfs_ordered_sum *sums;
- struct btrfs_ordered_extent *ordered = NULL;
char *data;
struct bvec_iter iter;
struct bio_vec bvec;
int index;
unsigned int blockcount;
- unsigned long total_bytes = 0;
- unsigned long this_sum_bytes = 0;
int i;
unsigned nofs_flag;
@@ -749,59 +746,17 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio)
sums->len = bio->bi_iter.bi_size;
INIT_LIST_HEAD(&sums->list);
- sums->bytenr = bio->bi_iter.bi_sector << 9;
+ sums->logical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
index = 0;
shash->tfm = fs_info->csum_shash;
bio_for_each_segment(bvec, bio, iter) {
- if (!ordered) {
- ordered = btrfs_lookup_ordered_extent(inode, offset);
- /*
- * The bio range is not covered by any ordered extent,
- * must be a code logic error.
- */
- if (unlikely(!ordered)) {
- WARN(1, KERN_WARNING
- "no ordered extent for root %llu ino %llu offset %llu\n",
- inode->root->root_key.objectid,
- btrfs_ino(inode), offset);
- kvfree(sums);
- return BLK_STS_IOERR;
- }
- }
-
blockcount = BTRFS_BYTES_TO_BLKS(fs_info,
bvec.bv_len + fs_info->sectorsize
- 1);
for (i = 0; i < blockcount; i++) {
- if (!(bio->bi_opf & REQ_BTRFS_ONE_ORDERED) &&
- !in_range(offset, ordered->file_offset,
- ordered->num_bytes)) {
- unsigned long bytes_left;
-
- sums->len = this_sum_bytes;
- this_sum_bytes = 0;
- btrfs_add_ordered_sum(ordered, sums);
- btrfs_put_ordered_extent(ordered);
-
- bytes_left = bio->bi_iter.bi_size - total_bytes;
-
- nofs_flag = memalloc_nofs_save();
- sums = kvzalloc(btrfs_ordered_sum_size(fs_info,
- bytes_left), GFP_KERNEL);
- memalloc_nofs_restore(nofs_flag);
- BUG_ON(!sums); /* -ENOMEM */
- sums->len = bytes_left;
- ordered = btrfs_lookup_ordered_extent(inode,
- offset);
- ASSERT(ordered); /* Logic error */
- sums->bytenr = (bio->bi_iter.bi_sector << 9)
- + total_bytes;
- index = 0;
- }
-
data = bvec_kmap_local(&bvec);
crypto_shash_digest(shash,
data + (i * fs_info->sectorsize),
@@ -809,15 +764,28 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio)
sums->sums + index);
kunmap_local(data);
index += fs_info->csum_size;
- offset += fs_info->sectorsize;
- this_sum_bytes += fs_info->sectorsize;
- total_bytes += fs_info->sectorsize;
}
}
- this_sum_bytes = 0;
+
+ bbio->sums = sums;
btrfs_add_ordered_sum(ordered, sums);
- btrfs_put_ordered_extent(ordered);
+ return 0;
+}
+
+/*
+ * Nodatasum I/O on zoned file systems still requires an btrfs_ordered_sum to
+ * record the updated logical address on Zone Append completion.
+ * Allocate just the structure with an empty sums array here for that case.
+ */
+blk_status_t btrfs_alloc_dummy_sum(struct btrfs_bio *bbio)
+{
+ bbio->sums = kmalloc(sizeof(*bbio->sums), GFP_NOFS);
+ if (!bbio->sums)
+ return BLK_STS_RESOURCE;
+ bbio->sums->len = bbio->bio.bi_iter.bi_size;
+ bbio->sums->logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
+ btrfs_add_ordered_sum(bbio->ordered, bbio->sums);
return 0;
}
@@ -1084,7 +1052,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
again:
next_offset = (u64)-1;
found_next = 0;
- bytenr = sums->bytenr + total_bytes;
+ bytenr = sums->logical + total_bytes;
file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
file_key.offset = bytenr;
file_key.type = BTRFS_EXTENT_CSUM_KEY;
diff --git a/fs/btrfs/file-item.h b/fs/btrfs/file-item.h
index 6be8725cd574..4ec669b69008 100644
--- a/fs/btrfs/file-item.h
+++ b/fs/btrfs/file-item.h
@@ -50,6 +50,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_ordered_sum *sums);
blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio);
+blk_status_t btrfs_alloc_dummy_sum(struct btrfs_bio *bbio);
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit,
bool nowait);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 71426c6408fa..ba5b0c9f2bbd 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1651,7 +1651,6 @@ ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
struct file *file = iocb->ki_filp;
struct btrfs_inode *inode = BTRFS_I(file_inode(file));
ssize_t num_written, num_sync;
- const bool sync = iocb_is_dsync(iocb);
/*
* If the fs flips readonly due to some impossible error, although we
@@ -1664,9 +1663,6 @@ ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
if (encoded && (iocb->ki_flags & IOCB_NOWAIT))
return -EOPNOTSUPP;
- if (sync)
- atomic_inc(&inode->sync_writers);
-
if (encoded) {
num_written = btrfs_encoded_write(iocb, from, encoded);
num_sync = encoded->len;
@@ -1686,9 +1682,6 @@ ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
num_written = num_sync;
}
- if (sync)
- atomic_dec(&inode->sync_writers);
-
current->backing_dev_info = NULL;
return num_written;
}
@@ -1733,9 +1726,7 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
* several segments of stripe length (currently 64K).
*/
blk_start_plug(&plug);
- atomic_inc(&BTRFS_I(inode)->sync_writers);
ret = btrfs_fdatawrite_range(inode, start, end);
- atomic_dec(&BTRFS_I(inode)->sync_writers);
blk_finish_plug(&plug);
return ret;
@@ -3709,7 +3700,8 @@ static int btrfs_file_open(struct inode *inode, struct file *filp)
{
int ret;
- filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC;
+ filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC |
+ FMODE_CAN_ODIRECT;
ret = fsverity_file_open(inode, filp);
if (ret)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index cf98a3c05480..880800418075 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -292,25 +292,6 @@ out:
return ret;
}
-int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *rsv)
-{
- u64 needed_bytes;
- int ret;
-
- /* 1 for slack space, 1 for updating the inode */
- needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) +
- btrfs_calc_metadata_size(fs_info, 1);
-
- spin_lock(&rsv->lock);
- if (rsv->reserved < needed_bytes)
- ret = -ENOSPC;
- else
- ret = 0;
- spin_unlock(&rsv->lock);
- return ret;
-}
-
int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct inode *vfs_inode)
@@ -923,27 +904,31 @@ static int copy_free_space_cache(struct btrfs_block_group *block_group,
while (!ret && (n = rb_first(&ctl->free_space_offset)) != NULL) {
info = rb_entry(n, struct btrfs_free_space, offset_index);
if (!info->bitmap) {
+ const u64 offset = info->offset;
+ const u64 bytes = info->bytes;
+
unlink_free_space(ctl, info, true);
- ret = btrfs_add_free_space(block_group, info->offset,
- info->bytes);
+ spin_unlock(&ctl->tree_lock);
kmem_cache_free(btrfs_free_space_cachep, info);
+ ret = btrfs_add_free_space(block_group, offset, bytes);
+ spin_lock(&ctl->tree_lock);
} else {
u64 offset = info->offset;
u64 bytes = ctl->unit;
- while (search_bitmap(ctl, info, &offset, &bytes,
- false) == 0) {
+ ret = search_bitmap(ctl, info, &offset, &bytes, false);
+ if (ret == 0) {
+ bitmap_clear_bits(ctl, info, offset, bytes, true);
+ spin_unlock(&ctl->tree_lock);
ret = btrfs_add_free_space(block_group, offset,
bytes);
- if (ret)
- break;
- bitmap_clear_bits(ctl, info, offset, bytes, true);
- offset = info->offset;
- bytes = ctl->unit;
+ spin_lock(&ctl->tree_lock);
+ } else {
+ free_bitmap(ctl, info);
+ ret = 0;
}
- free_bitmap(ctl, info);
}
- cond_resched();
+ cond_resched_lock(&ctl->tree_lock);
}
return ret;
}
@@ -1037,7 +1022,9 @@ int load_free_space_cache(struct btrfs_block_group *block_group)
block_group->bytes_super));
if (matched) {
+ spin_lock(&tmp_ctl.tree_lock);
ret = copy_free_space_cache(block_group, &tmp_ctl);
+ spin_unlock(&tmp_ctl.tree_lock);
/*
* ret == 1 means we successfully loaded the free space cache,
* so we need to re-set it here.
@@ -1596,20 +1583,34 @@ static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
return bitmap_start;
}
-static int tree_insert_offset(struct rb_root *root, u64 offset,
- struct rb_node *node, int bitmap)
+static int tree_insert_offset(struct btrfs_free_space_ctl *ctl,
+ struct btrfs_free_cluster *cluster,
+ struct btrfs_free_space *new_entry)
{
- struct rb_node **p = &root->rb_node;
+ struct rb_root *root;
+ struct rb_node **p;
struct rb_node *parent = NULL;
- struct btrfs_free_space *info;
+
+ lockdep_assert_held(&ctl->tree_lock);
+
+ if (cluster) {
+ lockdep_assert_held(&cluster->lock);
+ root = &cluster->root;
+ } else {
+ root = &ctl->free_space_offset;
+ }
+
+ p = &root->rb_node;
while (*p) {
+ struct btrfs_free_space *info;
+
parent = *p;
info = rb_entry(parent, struct btrfs_free_space, offset_index);
- if (offset < info->offset) {
+ if (new_entry->offset < info->offset) {
p = &(*p)->rb_left;
- } else if (offset > info->offset) {
+ } else if (new_entry->offset > info->offset) {
p = &(*p)->rb_right;
} else {
/*
@@ -1625,7 +1626,7 @@ static int tree_insert_offset(struct rb_root *root, u64 offset,
* found a bitmap, we want to go left, or before
* logically.
*/
- if (bitmap) {
+ if (new_entry->bitmap) {
if (info->bitmap) {
WARN_ON_ONCE(1);
return -EEXIST;
@@ -1641,8 +1642,8 @@ static int tree_insert_offset(struct rb_root *root, u64 offset,
}
}
- rb_link_node(node, parent, p);
- rb_insert_color(node, root);
+ rb_link_node(&new_entry->offset_index, parent, p);
+ rb_insert_color(&new_entry->offset_index, root);
return 0;
}
@@ -1705,6 +1706,8 @@ tree_search_offset(struct btrfs_free_space_ctl *ctl,
struct rb_node *n = ctl->free_space_offset.rb_node;
struct btrfs_free_space *entry = NULL, *prev = NULL;
+ lockdep_assert_held(&ctl->tree_lock);
+
/* find entry that is closest to the 'offset' */
while (n) {
entry = rb_entry(n, struct btrfs_free_space, offset_index);
@@ -1814,6 +1817,8 @@ static inline void unlink_free_space(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info,
bool update_stat)
{
+ lockdep_assert_held(&ctl->tree_lock);
+
rb_erase(&info->offset_index, &ctl->free_space_offset);
rb_erase_cached(&info->bytes_index, &ctl->free_space_bytes);
ctl->free_extents--;
@@ -1832,9 +1837,10 @@ static int link_free_space(struct btrfs_free_space_ctl *ctl,
{
int ret = 0;
+ lockdep_assert_held(&ctl->tree_lock);
+
ASSERT(info->bytes || info->bitmap);
- ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
- &info->offset_index, (info->bitmap != NULL));
+ ret = tree_insert_offset(ctl, NULL, info);
if (ret)
return ret;
@@ -1862,6 +1868,8 @@ static void relink_bitmap_entry(struct btrfs_free_space_ctl *ctl,
if (RB_EMPTY_NODE(&info->bytes_index))
return;
+ lockdep_assert_held(&ctl->tree_lock);
+
rb_erase_cached(&info->bytes_index, &ctl->free_space_bytes);
rb_add_cached(&info->bytes_index, &ctl->free_space_bytes, entry_less);
}
@@ -2447,6 +2455,7 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
u64 offset = info->offset;
u64 bytes = info->bytes;
const bool is_trimmed = btrfs_free_space_trimmed(info);
+ struct rb_node *right_prev = NULL;
/*
* first we want to see if there is free space adjacent to the range we
@@ -2454,9 +2463,11 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
* cover the entire range
*/
right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
- if (right_info && rb_prev(&right_info->offset_index))
- left_info = rb_entry(rb_prev(&right_info->offset_index),
- struct btrfs_free_space, offset_index);
+ if (right_info)
+ right_prev = rb_prev(&right_info->offset_index);
+
+ if (right_prev)
+ left_info = rb_entry(right_prev, struct btrfs_free_space, offset_index);
else if (!right_info)
left_info = tree_search_offset(ctl, offset - 1, 0, 0);
@@ -2969,9 +2980,10 @@ static void __btrfs_return_cluster_to_free_space(
struct btrfs_free_cluster *cluster)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- struct btrfs_free_space *entry;
struct rb_node *node;
+ lockdep_assert_held(&ctl->tree_lock);
+
spin_lock(&cluster->lock);
if (cluster->block_group != block_group) {
spin_unlock(&cluster->lock);
@@ -2984,15 +2996,14 @@ static void __btrfs_return_cluster_to_free_space(
node = rb_first(&cluster->root);
while (node) {
- bool bitmap;
+ struct btrfs_free_space *entry;
entry = rb_entry(node, struct btrfs_free_space, offset_index);
node = rb_next(&entry->offset_index);
rb_erase(&entry->offset_index, &cluster->root);
RB_CLEAR_NODE(&entry->offset_index);
- bitmap = (entry->bitmap != NULL);
- if (!bitmap) {
+ if (!entry->bitmap) {
/* Merging treats extents as if they were new */
if (!btrfs_free_space_trimmed(entry)) {
ctl->discardable_extents[BTRFS_STAT_CURR]--;
@@ -3010,8 +3021,7 @@ static void __btrfs_return_cluster_to_free_space(
entry->bytes;
}
}
- tree_insert_offset(&ctl->free_space_offset,
- entry->offset, &entry->offset_index, bitmap);
+ tree_insert_offset(ctl, NULL, entry);
rb_add_cached(&entry->bytes_index, &ctl->free_space_bytes,
entry_less);
}
@@ -3324,6 +3334,8 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group *block_group,
unsigned long total_found = 0;
int ret;
+ lockdep_assert_held(&ctl->tree_lock);
+
i = offset_to_bit(entry->offset, ctl->unit,
max_t(u64, offset, entry->offset));
want_bits = bytes_to_bits(bytes, ctl->unit);
@@ -3385,8 +3397,7 @@ again:
*/
RB_CLEAR_NODE(&entry->bytes_index);
- ret = tree_insert_offset(&cluster->root, entry->offset,
- &entry->offset_index, 1);
+ ret = tree_insert_offset(ctl, cluster, entry);
ASSERT(!ret); /* -EEXIST; Logic error */
trace_btrfs_setup_cluster(block_group, cluster,
@@ -3414,6 +3425,8 @@ setup_cluster_no_bitmap(struct btrfs_block_group *block_group,
u64 max_extent;
u64 total_size = 0;
+ lockdep_assert_held(&ctl->tree_lock);
+
entry = tree_search_offset(ctl, offset, 0, 1);
if (!entry)
return -ENOSPC;
@@ -3476,8 +3489,7 @@ setup_cluster_no_bitmap(struct btrfs_block_group *block_group,
rb_erase(&entry->offset_index, &ctl->free_space_offset);
rb_erase_cached(&entry->bytes_index, &ctl->free_space_bytes);
- ret = tree_insert_offset(&cluster->root, entry->offset,
- &entry->offset_index, 0);
+ ret = tree_insert_offset(ctl, cluster, entry);
total_size += entry->bytes;
ASSERT(!ret); /* -EEXIST; Logic error */
} while (node && entry != last);
@@ -3671,7 +3683,7 @@ static int do_trimming(struct btrfs_block_group *block_group,
__btrfs_add_free_space(block_group, reserved_start,
start - reserved_start,
reserved_trim_state);
- if (start + bytes < reserved_start + reserved_bytes)
+ if (end < reserved_end)
__btrfs_add_free_space(block_group, end, reserved_end - end,
reserved_trim_state);
__btrfs_add_free_space(block_group, start, bytes, trim_state);
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index a855e0483e03..33b4da3271b1 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -101,8 +101,6 @@ int btrfs_remove_free_space_inode(struct btrfs_trans_handle *trans,
struct inode *inode,
struct btrfs_block_group *block_group);
-int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *rsv);
int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct inode *inode);
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index b21da1446f2a..045ddce32eca 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1280,7 +1280,10 @@ int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
goto abort;
btrfs_global_root_delete(free_space_root);
+
+ spin_lock(&fs_info->trans_lock);
list_del(&free_space_root->dirty_list);
+ spin_unlock(&fs_info->trans_lock);
btrfs_tree_lock(free_space_root->node);
btrfs_clear_buffer_dirty(trans, free_space_root->node);
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index 0d98fc5f6f44..203d2a267828 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -543,7 +543,6 @@ struct btrfs_fs_info {
* A third pool does submit_bio to avoid deadlocking with the other two.
*/
struct btrfs_workqueue *workers;
- struct btrfs_workqueue *hipri_workers;
struct btrfs_workqueue *delalloc_workers;
struct btrfs_workqueue *flush_workers;
struct workqueue_struct *endio_workers;
@@ -577,6 +576,7 @@ struct btrfs_fs_info {
s32 dirty_metadata_batch;
s32 delalloc_batch;
+ /* Protected by 'trans_lock'. */
struct list_head dirty_cowonly_roots;
struct btrfs_fs_devices *fs_devices;
@@ -643,7 +643,6 @@ struct btrfs_fs_info {
*/
refcount_t scrub_workers_refcnt;
struct workqueue_struct *scrub_workers;
- struct workqueue_struct *scrub_wr_completion_workers;
struct btrfs_subpage_info *subpage_info;
struct btrfs_discard_ctl discard_ctl;
@@ -854,7 +853,7 @@ static inline u64 btrfs_calc_metadata_size(const struct btrfs_fs_info *fs_info,
static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
{
- return fs_info->zone_size > 0;
+ return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && fs_info->zone_size > 0;
}
/*
diff --git a/fs/btrfs/inode-item.h b/fs/btrfs/inode-item.h
index b80aeb715701..ede43b6c6559 100644
--- a/fs/btrfs/inode-item.h
+++ b/fs/btrfs/inode-item.h
@@ -60,6 +60,22 @@ struct btrfs_truncate_control {
bool clear_extent_range;
};
+/*
+ * btrfs_inode_item stores flags in a u64, btrfs_inode stores them in two
+ * separate u32s. These two functions convert between the two representations.
+ */
+static inline u64 btrfs_inode_combine_flags(u32 flags, u32 ro_flags)
+{
+ return (flags | ((u64)ro_flags << 32));
+}
+
+static inline void btrfs_inode_split_flags(u64 inode_item_flags,
+ u32 *flags, u32 *ro_flags)
+{
+ *flags = (u32)inode_item_flags;
+ *ro_flags = (u32)(inode_item_flags >> 32);
+}
+
int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_truncate_control *control);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 19c707bc8801..dbbb67293e34 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -70,6 +70,7 @@
#include "verity.h"
#include "super.h"
#include "orphan.h"
+#include "backref.h"
struct btrfs_iget_args {
u64 ino;
@@ -100,6 +101,18 @@ struct btrfs_rename_ctx {
u64 index;
};
+/*
+ * Used by data_reloc_print_warning_inode() to pass needed info for filename
+ * resolution and output of error message.
+ */
+struct data_reloc_warn {
+ struct btrfs_path path;
+ struct btrfs_fs_info *fs_info;
+ u64 extent_item_size;
+ u64 logical;
+ int mirror_num;
+};
+
static const struct inode_operations btrfs_dir_inode_operations;
static const struct inode_operations btrfs_symlink_inode_operations;
static const struct inode_operations btrfs_special_inode_operations;
@@ -122,12 +135,198 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
u64 ram_bytes, int compress_type,
int type);
+static int data_reloc_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
+ u64 root, void *warn_ctx)
+{
+ struct data_reloc_warn *warn = warn_ctx;
+ struct btrfs_fs_info *fs_info = warn->fs_info;
+ struct extent_buffer *eb;
+ struct btrfs_inode_item *inode_item;
+ struct inode_fs_paths *ipath = NULL;
+ struct btrfs_root *local_root;
+ struct btrfs_key key;
+ unsigned int nofs_flag;
+ u32 nlink;
+ int ret;
+
+ local_root = btrfs_get_fs_root(fs_info, root, true);
+ if (IS_ERR(local_root)) {
+ ret = PTR_ERR(local_root);
+ goto err;
+ }
+
+ /* This makes the path point to (inum INODE_ITEM ioff). */
+ key.objectid = inum;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, local_root, &key, &warn->path, 0, 0);
+ if (ret) {
+ btrfs_put_root(local_root);
+ btrfs_release_path(&warn->path);
+ goto err;
+ }
+
+ eb = warn->path.nodes[0];
+ inode_item = btrfs_item_ptr(eb, warn->path.slots[0], struct btrfs_inode_item);
+ nlink = btrfs_inode_nlink(eb, inode_item);
+ btrfs_release_path(&warn->path);
+
+ nofs_flag = memalloc_nofs_save();
+ ipath = init_ipath(4096, local_root, &warn->path);
+ memalloc_nofs_restore(nofs_flag);
+ if (IS_ERR(ipath)) {
+ btrfs_put_root(local_root);
+ ret = PTR_ERR(ipath);
+ ipath = NULL;
+ /*
+ * -ENOMEM, not a critical error, just output an generic error
+ * without filename.
+ */
+ btrfs_warn(fs_info,
+"checksum error at logical %llu mirror %u root %llu, inode %llu offset %llu",
+ warn->logical, warn->mirror_num, root, inum, offset);
+ return ret;
+ }
+ ret = paths_from_inode(inum, ipath);
+ if (ret < 0)
+ goto err;
+
+ /*
+ * We deliberately ignore the bit ipath might have been too small to
+ * hold all of the paths here
+ */
+ for (int i = 0; i < ipath->fspath->elem_cnt; i++) {
+ btrfs_warn(fs_info,
+"checksum error at logical %llu mirror %u root %llu inode %llu offset %llu length %u links %u (path: %s)",
+ warn->logical, warn->mirror_num, root, inum, offset,
+ fs_info->sectorsize, nlink,
+ (char *)(unsigned long)ipath->fspath->val[i]);
+ }
+
+ btrfs_put_root(local_root);
+ free_ipath(ipath);
+ return 0;
+
+err:
+ btrfs_warn(fs_info,
+"checksum error at logical %llu mirror %u root %llu inode %llu offset %llu, path resolving failed with ret=%d",
+ warn->logical, warn->mirror_num, root, inum, offset, ret);
+
+ free_ipath(ipath);
+ return ret;
+}
+
+/*
+ * Do extra user-friendly error output (e.g. lookup all the affected files).
+ *
+ * Return true if we succeeded doing the backref lookup.
+ * Return false if such lookup failed, and has to fallback to the old error message.
+ */
+static void print_data_reloc_error(const struct btrfs_inode *inode, u64 file_off,
+ const u8 *csum, const u8 *csum_expected,
+ int mirror_num)
+{
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct btrfs_path path = { 0 };
+ struct btrfs_key found_key = { 0 };
+ struct extent_buffer *eb;
+ struct btrfs_extent_item *ei;
+ const u32 csum_size = fs_info->csum_size;
+ u64 logical;
+ u64 flags;
+ u32 item_size;
+ int ret;
+
+ mutex_lock(&fs_info->reloc_mutex);
+ logical = btrfs_get_reloc_bg_bytenr(fs_info);
+ mutex_unlock(&fs_info->reloc_mutex);
+
+ if (logical == U64_MAX) {
+ btrfs_warn_rl(fs_info, "has data reloc tree but no running relocation");
+ btrfs_warn_rl(fs_info,
+"csum failed root %lld ino %llu off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d",
+ inode->root->root_key.objectid, btrfs_ino(inode), file_off,
+ CSUM_FMT_VALUE(csum_size, csum),
+ CSUM_FMT_VALUE(csum_size, csum_expected),
+ mirror_num);
+ return;
+ }
+
+ logical += file_off;
+ btrfs_warn_rl(fs_info,
+"csum failed root %lld ino %llu off %llu logical %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d",
+ inode->root->root_key.objectid,
+ btrfs_ino(inode), file_off, logical,
+ CSUM_FMT_VALUE(csum_size, csum),
+ CSUM_FMT_VALUE(csum_size, csum_expected),
+ mirror_num);
+
+ ret = extent_from_logical(fs_info, logical, &path, &found_key, &flags);
+ if (ret < 0) {
+ btrfs_err_rl(fs_info, "failed to lookup extent item for logical %llu: %d",
+ logical, ret);
+ return;
+ }
+ eb = path.nodes[0];
+ ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
+ item_size = btrfs_item_size(eb, path.slots[0]);
+ if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+ unsigned long ptr = 0;
+ u64 ref_root;
+ u8 ref_level;
+
+ while (true) {
+ ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
+ item_size, &ref_root,
+ &ref_level);
+ if (ret < 0) {
+ btrfs_warn_rl(fs_info,
+ "failed to resolve tree backref for logical %llu: %d",
+ logical, ret);
+ break;
+ }
+ if (ret > 0)
+ break;
+
+ btrfs_warn_rl(fs_info,
+"csum error at logical %llu mirror %u: metadata %s (level %d) in tree %llu",
+ logical, mirror_num,
+ (ref_level ? "node" : "leaf"),
+ ref_level, ref_root);
+ }
+ btrfs_release_path(&path);
+ } else {
+ struct btrfs_backref_walk_ctx ctx = { 0 };
+ struct data_reloc_warn reloc_warn = { 0 };
+
+ btrfs_release_path(&path);
+
+ ctx.bytenr = found_key.objectid;
+ ctx.extent_item_pos = logical - found_key.objectid;
+ ctx.fs_info = fs_info;
+
+ reloc_warn.logical = logical;
+ reloc_warn.extent_item_size = found_key.offset;
+ reloc_warn.mirror_num = mirror_num;
+ reloc_warn.fs_info = fs_info;
+
+ iterate_extent_inodes(&ctx, true,
+ data_reloc_print_warning_inode, &reloc_warn);
+ }
+}
+
static void __cold btrfs_print_data_csum_error(struct btrfs_inode *inode,
u64 logical_start, u8 *csum, u8 *csum_expected, int mirror_num)
{
struct btrfs_root *root = inode->root;
const u32 csum_size = root->fs_info->csum_size;
+ /* For data reloc tree, it's better to do a backref lookup instead. */
+ if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ return print_data_reloc_error(inode, logical_start, csum,
+ csum_expected, mirror_num);
+
/* Output without objectid, which is more meaningful */
if (root->root_key.objectid >= BTRFS_LAST_FREE_OBJECTID) {
btrfs_warn_rl(root->fs_info,
@@ -636,6 +835,7 @@ static noinline int compress_file_range(struct async_chunk *async_chunk)
{
struct btrfs_inode *inode = async_chunk->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct address_space *mapping = inode->vfs_inode.i_mapping;
u64 blocksize = fs_info->sectorsize;
u64 start = async_chunk->start;
u64 end = async_chunk->end;
@@ -750,7 +950,7 @@ again:
/* Compression level is applied here and only here */
ret = btrfs_compress_pages(
compress_type | (fs_info->compress_level << 4),
- inode->vfs_inode.i_mapping, start,
+ mapping, start,
pages,
&nr_pages,
&total_in,
@@ -793,9 +993,9 @@ cont:
unsigned long clear_flags = EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
EXTENT_DO_ACCOUNTING;
- unsigned long page_error_op;
- page_error_op = ret < 0 ? PAGE_SET_ERROR : 0;
+ if (ret < 0)
+ mapping_set_error(mapping, -EIO);
/*
* inline extent creation worked or returned error,
@@ -812,7 +1012,6 @@ cont:
clear_flags,
PAGE_UNLOCK |
PAGE_START_WRITEBACK |
- page_error_op |
PAGE_END_WRITEBACK);
/*
@@ -934,6 +1133,12 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
unsigned long nr_written = 0;
int page_started = 0;
int ret;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .range_start = start,
+ .range_end = end,
+ .no_cgroup_owner = 1,
+ };
/*
* Call cow_file_range() to run the delalloc range directly, since we
@@ -954,8 +1159,6 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
const u64 page_start = page_offset(locked_page);
const u64 page_end = page_start + PAGE_SIZE - 1;
- btrfs_page_set_error(inode->root->fs_info, locked_page,
- page_start, PAGE_SIZE);
set_page_writeback(locked_page);
end_page_writeback(locked_page);
end_extent_writepage(locked_page, ret, page_start, page_end);
@@ -965,7 +1168,10 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
}
/* All pages will be unlocked, including @locked_page */
- return extent_write_locked_range(&inode->vfs_inode, start, end);
+ wbc_attach_fdatawrite_inode(&wbc, &inode->vfs_inode);
+ ret = extent_write_locked_range(&inode->vfs_inode, start, end, &wbc);
+ wbc_detach_inode(&wbc);
+ return ret;
}
static int submit_one_async_extent(struct btrfs_inode *inode,
@@ -976,6 +1182,7 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
struct extent_io_tree *io_tree = &inode->io_tree;
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_ordered_extent *ordered;
struct btrfs_key ins;
struct page *locked_page = NULL;
struct extent_map *em;
@@ -1037,7 +1244,7 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
}
free_extent_map(em);
- ret = btrfs_add_ordered_extent(inode, start, /* file_offset */
+ ordered = btrfs_alloc_ordered_extent(inode, start, /* file_offset */
async_extent->ram_size, /* num_bytes */
async_extent->ram_size, /* ram_bytes */
ins.objectid, /* disk_bytenr */
@@ -1045,8 +1252,9 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
0, /* offset */
1 << BTRFS_ORDERED_COMPRESSED,
async_extent->compress_type);
- if (ret) {
+ if (IS_ERR(ordered)) {
btrfs_drop_extent_map_range(inode, start, end, false);
+ ret = PTR_ERR(ordered);
goto out_free_reserve;
}
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
@@ -1055,11 +1263,7 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
extent_clear_unlock_delalloc(inode, start, end,
NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
PAGE_UNLOCK | PAGE_START_WRITEBACK);
-
- btrfs_submit_compressed_write(inode, start, /* file_offset */
- async_extent->ram_size, /* num_bytes */
- ins.objectid, /* disk_bytenr */
- ins.offset, /* compressed_len */
+ btrfs_submit_compressed_write(ordered,
async_extent->pages, /* compressed_pages */
async_extent->nr_pages,
async_chunk->write_flags, true);
@@ -1074,12 +1278,13 @@ out_free_reserve:
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
out_free:
+ mapping_set_error(inode->vfs_inode.i_mapping, -EIO);
extent_clear_unlock_delalloc(inode, start, end,
NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW |
EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
PAGE_UNLOCK | PAGE_START_WRITEBACK |
- PAGE_END_WRITEBACK | PAGE_SET_ERROR);
+ PAGE_END_WRITEBACK);
free_async_extent_pages(async_extent);
goto done;
}
@@ -1287,6 +1492,8 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
min_alloc_size = fs_info->sectorsize;
while (num_bytes > 0) {
+ struct btrfs_ordered_extent *ordered;
+
cur_alloc_size = num_bytes;
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
min_alloc_size, 0, alloc_hint,
@@ -1311,16 +1518,18 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
}
free_extent_map(em);
- ret = btrfs_add_ordered_extent(inode, start, ram_size, ram_size,
- ins.objectid, cur_alloc_size, 0,
- 1 << BTRFS_ORDERED_REGULAR,
- BTRFS_COMPRESS_NONE);
- if (ret)
+ ordered = btrfs_alloc_ordered_extent(inode, start, ram_size,
+ ram_size, ins.objectid, cur_alloc_size,
+ 0, 1 << BTRFS_ORDERED_REGULAR,
+ BTRFS_COMPRESS_NONE);
+ if (IS_ERR(ordered)) {
+ ret = PTR_ERR(ordered);
goto out_drop_extent_cache;
+ }
if (btrfs_is_data_reloc_root(root)) {
- ret = btrfs_reloc_clone_csums(inode, start,
- cur_alloc_size);
+ ret = btrfs_reloc_clone_csums(ordered);
+
/*
* Only drop cache here, and process as normal.
*
@@ -1337,6 +1546,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
start + ram_size - 1,
false);
}
+ btrfs_put_ordered_extent(ordered);
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
@@ -1494,7 +1704,7 @@ static noinline void async_cow_submit(struct btrfs_work *work)
* ->inode could be NULL if async_chunk_start has failed to compress,
* in which case we don't have anything to submit, yet we need to
* always adjust ->async_delalloc_pages as its paired with the init
- * happening in cow_file_range_async
+ * happening in run_delalloc_compressed
*/
if (async_chunk->inode)
submit_compressed_extents(async_chunk);
@@ -1521,58 +1731,36 @@ static noinline void async_cow_free(struct btrfs_work *work)
kvfree(async_cow);
}
-static int cow_file_range_async(struct btrfs_inode *inode,
- struct writeback_control *wbc,
- struct page *locked_page,
- u64 start, u64 end, int *page_started,
- unsigned long *nr_written)
+static bool run_delalloc_compressed(struct btrfs_inode *inode,
+ struct writeback_control *wbc,
+ struct page *locked_page,
+ u64 start, u64 end, int *page_started,
+ unsigned long *nr_written)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc);
struct async_cow *ctx;
struct async_chunk *async_chunk;
unsigned long nr_pages;
- u64 cur_end;
u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
int i;
- bool should_compress;
unsigned nofs_flag;
const blk_opf_t write_flags = wbc_to_write_flags(wbc);
- unlock_extent(&inode->io_tree, start, end, NULL);
-
- if (inode->flags & BTRFS_INODE_NOCOMPRESS &&
- !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
- num_chunks = 1;
- should_compress = false;
- } else {
- should_compress = true;
- }
-
nofs_flag = memalloc_nofs_save();
ctx = kvmalloc(struct_size(ctx, chunks, num_chunks), GFP_KERNEL);
memalloc_nofs_restore(nofs_flag);
+ if (!ctx)
+ return false;
- if (!ctx) {
- unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC |
- EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
- EXTENT_DO_ACCOUNTING;
- unsigned long page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK |
- PAGE_END_WRITEBACK | PAGE_SET_ERROR;
-
- extent_clear_unlock_delalloc(inode, start, end, locked_page,
- clear_bits, page_ops);
- return -ENOMEM;
- }
+ unlock_extent(&inode->io_tree, start, end, NULL);
+ set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
async_chunk = ctx->chunks;
atomic_set(&ctx->num_chunks, num_chunks);
for (i = 0; i < num_chunks; i++) {
- if (should_compress)
- cur_end = min(end, start + SZ_512K - 1);
- else
- cur_end = end;
+ u64 cur_end = min(end, start + SZ_512K - 1);
/*
* igrab is called higher up in the call chain, take only the
@@ -1633,13 +1821,14 @@ static int cow_file_range_async(struct btrfs_inode *inode,
start = cur_end + 1;
}
*page_started = 1;
- return 0;
+ return true;
}
static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
struct page *locked_page, u64 start,
u64 end, int *page_started,
- unsigned long *nr_written)
+ unsigned long *nr_written,
+ struct writeback_control *wbc)
{
u64 done_offset = end;
int ret;
@@ -1671,8 +1860,8 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
account_page_redirty(locked_page);
}
locked_page_done = true;
- extent_write_locked_range(&inode->vfs_inode, start, done_offset);
-
+ extent_write_locked_range(&inode->vfs_inode, start, done_offset,
+ wbc);
start = done_offset + 1;
}
@@ -1864,7 +2053,7 @@ static int can_nocow_file_extent(struct btrfs_path *path,
ret = btrfs_cross_ref_exist(root, btrfs_ino(inode),
key->offset - args->extent_offset,
- args->disk_bytenr, false, path);
+ args->disk_bytenr, args->strict, path);
WARN_ON_ONCE(ret > 0 && is_freespace_inode);
if (ret != 0)
goto out;
@@ -1947,6 +2136,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
nocow_args.writeback_path = true;
while (1) {
+ struct btrfs_ordered_extent *ordered;
struct btrfs_key found_key;
struct btrfs_file_extent_item *fi;
struct extent_buffer *leaf;
@@ -1954,6 +2144,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
u64 ram_bytes;
u64 nocow_end;
int extent_type;
+ bool is_prealloc;
nocow = false;
@@ -2092,8 +2283,8 @@ out_check:
}
nocow_end = cur_offset + nocow_args.num_bytes - 1;
-
- if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
+ is_prealloc = extent_type == BTRFS_FILE_EXTENT_PREALLOC;
+ if (is_prealloc) {
u64 orig_start = found_key.offset - nocow_args.extent_offset;
struct extent_map *em;
@@ -2109,29 +2300,22 @@ out_check:
goto error;
}
free_extent_map(em);
- ret = btrfs_add_ordered_extent(inode,
- cur_offset, nocow_args.num_bytes,
- nocow_args.num_bytes,
- nocow_args.disk_bytenr,
- nocow_args.num_bytes, 0,
- 1 << BTRFS_ORDERED_PREALLOC,
- BTRFS_COMPRESS_NONE);
- if (ret) {
+ }
+
+ ordered = btrfs_alloc_ordered_extent(inode, cur_offset,
+ nocow_args.num_bytes, nocow_args.num_bytes,
+ nocow_args.disk_bytenr, nocow_args.num_bytes, 0,
+ is_prealloc
+ ? (1 << BTRFS_ORDERED_PREALLOC)
+ : (1 << BTRFS_ORDERED_NOCOW),
+ BTRFS_COMPRESS_NONE);
+ if (IS_ERR(ordered)) {
+ if (is_prealloc) {
btrfs_drop_extent_map_range(inode, cur_offset,
nocow_end, false);
- goto error;
}
- } else {
- ret = btrfs_add_ordered_extent(inode, cur_offset,
- nocow_args.num_bytes,
- nocow_args.num_bytes,
- nocow_args.disk_bytenr,
- nocow_args.num_bytes,
- 0,
- 1 << BTRFS_ORDERED_NOCOW,
- BTRFS_COMPRESS_NONE);
- if (ret)
- goto error;
+ ret = PTR_ERR(ordered);
+ goto error;
}
if (nocow) {
@@ -2145,8 +2329,8 @@ out_check:
* extent_clear_unlock_delalloc() in error handler
* from freeing metadata of created ordered extent.
*/
- ret = btrfs_reloc_clone_csums(inode, cur_offset,
- nocow_args.num_bytes);
+ ret = btrfs_reloc_clone_csums(ordered);
+ btrfs_put_ordered_extent(ordered);
extent_clear_unlock_delalloc(inode, cur_offset, nocow_end,
locked_page, EXTENT_LOCKED |
@@ -2214,7 +2398,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
u64 start, u64 end, int *page_started, unsigned long *nr_written,
struct writeback_control *wbc)
{
- int ret;
+ int ret = 0;
const bool zoned = btrfs_is_zoned(inode->root->fs_info);
/*
@@ -2235,19 +2419,23 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
ASSERT(!zoned || btrfs_is_data_reloc_root(inode->root));
ret = run_delalloc_nocow(inode, locked_page, start, end,
page_started, nr_written);
- } else if (!btrfs_inode_can_compress(inode) ||
- !inode_need_compress(inode, start, end)) {
- if (zoned)
- ret = run_delalloc_zoned(inode, locked_page, start, end,
- page_started, nr_written);
- else
- ret = cow_file_range(inode, locked_page, start, end,
- page_started, nr_written, 1, NULL);
- } else {
- set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
- ret = cow_file_range_async(inode, wbc, locked_page, start, end,
- page_started, nr_written);
+ goto out;
}
+
+ if (btrfs_inode_can_compress(inode) &&
+ inode_need_compress(inode, start, end) &&
+ run_delalloc_compressed(inode, wbc, locked_page, start,
+ end, page_started, nr_written))
+ goto out;
+
+ if (zoned)
+ ret = run_delalloc_zoned(inode, locked_page, start, end,
+ page_started, nr_written, wbc);
+ else
+ ret = cow_file_range(inode, locked_page, start, end,
+ page_started, nr_written, 1, NULL);
+
+out:
ASSERT(ret <= 0);
if (ret)
btrfs_cleanup_ordered_extents(inode, locked_page, start,
@@ -2515,125 +2703,42 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode,
}
}
-/*
- * Split off the first pre bytes from the extent_map at [start, start + len]
- *
- * This function is intended to be used only for extract_ordered_extent().
- */
-static int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre)
-{
- struct extent_map_tree *em_tree = &inode->extent_tree;
- struct extent_map *em;
- struct extent_map *split_pre = NULL;
- struct extent_map *split_mid = NULL;
- int ret = 0;
- unsigned long flags;
-
- ASSERT(pre != 0);
- ASSERT(pre < len);
-
- split_pre = alloc_extent_map();
- if (!split_pre)
- return -ENOMEM;
- split_mid = alloc_extent_map();
- if (!split_mid) {
- ret = -ENOMEM;
- goto out_free_pre;
- }
-
- lock_extent(&inode->io_tree, start, start + len - 1, NULL);
- write_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, start, len);
- if (!em) {
- ret = -EIO;
- goto out_unlock;
- }
-
- ASSERT(em->len == len);
- ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
- ASSERT(em->block_start < EXTENT_MAP_LAST_BYTE);
- ASSERT(test_bit(EXTENT_FLAG_PINNED, &em->flags));
- ASSERT(!test_bit(EXTENT_FLAG_LOGGING, &em->flags));
- ASSERT(!list_empty(&em->list));
-
- flags = em->flags;
- clear_bit(EXTENT_FLAG_PINNED, &em->flags);
-
- /* First, replace the em with a new extent_map starting from * em->start */
- split_pre->start = em->start;
- split_pre->len = pre;
- split_pre->orig_start = split_pre->start;
- split_pre->block_start = em->block_start;
- split_pre->block_len = split_pre->len;
- split_pre->orig_block_len = split_pre->block_len;
- split_pre->ram_bytes = split_pre->len;
- split_pre->flags = flags;
- split_pre->compress_type = em->compress_type;
- split_pre->generation = em->generation;
-
- replace_extent_mapping(em_tree, em, split_pre, 1);
-
- /*
- * Now we only have an extent_map at:
- * [em->start, em->start + pre]
- */
-
- /* Insert the middle extent_map. */
- split_mid->start = em->start + pre;
- split_mid->len = em->len - pre;
- split_mid->orig_start = split_mid->start;
- split_mid->block_start = em->block_start + pre;
- split_mid->block_len = split_mid->len;
- split_mid->orig_block_len = split_mid->block_len;
- split_mid->ram_bytes = split_mid->len;
- split_mid->flags = flags;
- split_mid->compress_type = em->compress_type;
- split_mid->generation = em->generation;
- add_extent_mapping(em_tree, split_mid, 1);
-
- /* Once for us */
- free_extent_map(em);
- /* Once for the tree */
- free_extent_map(em);
-
-out_unlock:
- write_unlock(&em_tree->lock);
- unlock_extent(&inode->io_tree, start, start + len - 1, NULL);
- free_extent_map(split_mid);
-out_free_pre:
- free_extent_map(split_pre);
- return ret;
-}
-
-int btrfs_extract_ordered_extent(struct btrfs_bio *bbio,
- struct btrfs_ordered_extent *ordered)
+static int btrfs_extract_ordered_extent(struct btrfs_bio *bbio,
+ struct btrfs_ordered_extent *ordered)
{
u64 start = (u64)bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
u64 len = bbio->bio.bi_iter.bi_size;
- struct btrfs_inode *inode = bbio->inode;
- u64 ordered_len = ordered->num_bytes;
- int ret = 0;
+ struct btrfs_ordered_extent *new;
+ int ret;
/* Must always be called for the beginning of an ordered extent. */
if (WARN_ON_ONCE(start != ordered->disk_bytenr))
return -EINVAL;
/* No need to split if the ordered extent covers the entire bio. */
- if (ordered->disk_num_bytes == len)
+ if (ordered->disk_num_bytes == len) {
+ refcount_inc(&ordered->refs);
+ bbio->ordered = ordered;
return 0;
-
- ret = btrfs_split_ordered_extent(ordered, len);
- if (ret)
- return ret;
+ }
/*
* Don't split the extent_map for NOCOW extents, as we're writing into
* a pre-existing one.
*/
- if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
- return 0;
+ if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
+ ret = split_extent_map(bbio->inode, bbio->file_offset,
+ ordered->num_bytes, len,
+ ordered->disk_bytenr);
+ if (ret)
+ return ret;
+ }
- return split_extent_map(inode, bbio->file_offset, ordered_len, len);
+ new = btrfs_split_ordered_extent(ordered, len);
+ if (IS_ERR(new))
+ return PTR_ERR(new);
+ bbio->ordered = new;
+ return 0;
}
/*
@@ -2651,7 +2756,7 @@ static int add_pending_csums(struct btrfs_trans_handle *trans,
trans->adding_csums = true;
if (!csum_root)
csum_root = btrfs_csum_root(trans->fs_info,
- sum->bytenr);
+ sum->logical);
ret = btrfs_csum_file_blocks(trans, csum_root, sum);
trans->adding_csums = false;
if (ret)
@@ -2689,8 +2794,7 @@ static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
ret = set_extent_bit(&inode->io_tree, search_start,
search_start + em_len - 1,
- EXTENT_DELALLOC_NEW, cached_state,
- GFP_NOFS);
+ EXTENT_DELALLOC_NEW, cached_state);
next:
search_start = extent_map_end(em);
free_extent_map(em);
@@ -2723,8 +2827,8 @@ int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
return ret;
}
- return set_extent_delalloc(&inode->io_tree, start, end, extra_bits,
- cached_state);
+ return set_extent_bit(&inode->io_tree, start, end,
+ EXTENT_DELALLOC | extra_bits, cached_state);
}
/* see btrfs_writepage_start_hook for details on why this is required */
@@ -2847,7 +2951,6 @@ out_page:
mapping_set_error(page->mapping, ret);
end_extent_writepage(page, ret, page_start, page_end);
clear_page_dirty_for_io(page);
- SetPageError(page);
}
btrfs_page_clear_checked(inode->root->fs_info, page, page_start, PAGE_SIZE);
unlock_page(page);
@@ -3068,7 +3171,7 @@ static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
* an ordered extent if the range of bytes in the file it covers are
* fully written.
*/
-int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
+int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
{
struct btrfs_inode *inode = BTRFS_I(ordered_extent->inode);
struct btrfs_root *root = inode->root;
@@ -3103,15 +3206,9 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
goto out;
}
- /* A valid ->physical implies a write on a sequential zone. */
- if (ordered_extent->physical != (u64)-1) {
- btrfs_rewrite_logical_zoned(ordered_extent);
- btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
- ordered_extent->disk_num_bytes);
- } else if (btrfs_is_data_reloc_root(inode->root)) {
+ if (btrfs_is_zoned(fs_info))
btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
ordered_extent->disk_num_bytes);
- }
if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
truncated = true;
@@ -3279,6 +3376,14 @@ out:
return ret;
}
+int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered)
+{
+ if (btrfs_is_zoned(btrfs_sb(ordered->inode->i_sb)) &&
+ !test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))
+ btrfs_finish_ordered_zoned(ordered);
+ return btrfs_finish_one_ordered(ordered);
+}
+
void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
struct page *page, u64 start,
u64 end, bool uptodate)
@@ -4226,7 +4331,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
}
btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
- 0);
+ false);
ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
&fname.disk_name);
@@ -4801,7 +4906,7 @@ again:
if (only_release_metadata)
set_extent_bit(&inode->io_tree, block_start, block_end,
- EXTENT_NORESERVE, NULL, GFP_NOFS);
+ EXTENT_NORESERVE, NULL);
out_unlock:
if (ret) {
@@ -7264,7 +7369,7 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
static int btrfs_get_blocks_direct_write(struct extent_map **map,
struct inode *inode,
struct btrfs_dio_data *dio_data,
- u64 start, u64 len,
+ u64 start, u64 *lenp,
unsigned int iomap_flags)
{
const bool nowait = (iomap_flags & IOMAP_NOWAIT);
@@ -7275,6 +7380,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
struct btrfs_block_group *bg;
bool can_nocow = false;
bool space_reserved = false;
+ u64 len = *lenp;
u64 prev_len;
int ret = 0;
@@ -7345,15 +7451,19 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
free_extent_map(em);
*map = NULL;
- if (nowait)
- return -EAGAIN;
+ if (nowait) {
+ ret = -EAGAIN;
+ goto out;
+ }
/*
* If we could not allocate data space before locking the file
* range and we can't do a NOCOW write, then we have to fail.
*/
- if (!dio_data->data_space_reserved)
- return -ENOSPC;
+ if (!dio_data->data_space_reserved) {
+ ret = -ENOSPC;
+ goto out;
+ }
/*
* We have to COW and we have already reserved data space before,
@@ -7394,6 +7504,7 @@ out:
btrfs_delalloc_release_extents(BTRFS_I(inode), len);
btrfs_delalloc_release_metadata(BTRFS_I(inode), len, true);
}
+ *lenp = len;
return ret;
}
@@ -7570,7 +7681,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
if (write) {
ret = btrfs_get_blocks_direct_write(&em, inode, dio_data,
- start, len, flags);
+ start, &len, flags);
if (ret < 0)
goto unlock_err;
unlock_extents = true;
@@ -7664,8 +7775,8 @@ static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
pos += submitted;
length -= submitted;
if (write)
- btrfs_mark_ordered_io_finished(BTRFS_I(inode), NULL,
- pos, length, false);
+ btrfs_finish_ordered_extent(dio_data->ordered, NULL,
+ pos, length, false);
else
unlock_extent(&BTRFS_I(inode)->io_tree, pos,
pos + length - 1, NULL);
@@ -7695,12 +7806,14 @@ static void btrfs_dio_end_io(struct btrfs_bio *bbio)
dip->file_offset, dip->bytes, bio->bi_status);
}
- if (btrfs_op(bio) == BTRFS_MAP_WRITE)
- btrfs_mark_ordered_io_finished(inode, NULL, dip->file_offset,
- dip->bytes, !bio->bi_status);
- else
+ if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
+ btrfs_finish_ordered_extent(bbio->ordered, NULL,
+ dip->file_offset, dip->bytes,
+ !bio->bi_status);
+ } else {
unlock_extent(&inode->io_tree, dip->file_offset,
dip->file_offset + dip->bytes - 1, NULL);
+ }
bbio->bio.bi_private = bbio->private;
iomap_dio_bio_end_io(bio);
@@ -7736,7 +7849,8 @@ static void btrfs_dio_submit_io(const struct iomap_iter *iter, struct bio *bio,
ret = btrfs_extract_ordered_extent(bbio, dio_data->ordered);
if (ret) {
- btrfs_bio_end_io(bbio, errno_to_blk_status(ret));
+ bbio->bio.bi_status = errno_to_blk_status(ret);
+ btrfs_dio_end_io(bbio);
return;
}
}
@@ -8230,7 +8344,7 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback)
int ret;
struct btrfs_trans_handle *trans;
u64 mask = fs_info->sectorsize - 1;
- u64 min_size = btrfs_calc_metadata_size(fs_info, 1);
+ const u64 min_size = btrfs_calc_metadata_size(fs_info, 1);
if (!skip_writeback) {
ret = btrfs_wait_ordered_range(&inode->vfs_inode,
@@ -8287,7 +8401,15 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback)
/* Migrate the slack space for the truncate to our reserve */
ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv,
min_size, false);
- BUG_ON(ret);
+ /*
+ * We have reserved 2 metadata units when we started the transaction and
+ * min_size matches 1 unit, so this should never fail, but if it does,
+ * it's not critical we just fail truncation.
+ */
+ if (WARN_ON(ret)) {
+ btrfs_end_transaction(trans);
+ goto out;
+ }
trans->block_rsv = rsv;
@@ -8335,7 +8457,14 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback)
btrfs_block_rsv_release(fs_info, rsv, -1, NULL);
ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv,
rsv, min_size, false);
- BUG_ON(ret); /* shouldn't happen */
+ /*
+ * We have reserved 2 metadata units when we started the
+ * transaction and min_size matches 1 unit, so this should never
+ * fail, but if it does, it's not critical we just fail truncation.
+ */
+ if (WARN_ON(ret))
+ break;
+
trans->block_rsv = rsv;
}
@@ -8462,7 +8591,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->io_tree.inode = ei;
extent_io_tree_init(fs_info, &ei->file_extent_tree,
IO_TREE_INODE_FILE_EXTENT);
- atomic_set(&ei->sync_writers, 0);
mutex_init(&ei->log_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree);
INIT_LIST_HEAD(&ei->delalloc_inodes);
@@ -8633,7 +8761,7 @@ static int btrfs_getattr(struct mnt_idmap *idmap,
inode_bytes = inode_get_bytes(inode);
spin_unlock(&BTRFS_I(inode)->lock);
stat->blocks = (ALIGN(inode_bytes, blocksize) +
- ALIGN(delalloc_bytes, blocksize)) >> 9;
+ ALIGN(delalloc_bytes, blocksize)) >> SECTOR_SHIFT;
return 0;
}
@@ -8789,9 +8917,9 @@ static int btrfs_rename_exchange(struct inode *old_dir,
if (old_dentry->d_parent != new_dentry->d_parent) {
btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
- BTRFS_I(old_inode), 1);
+ BTRFS_I(old_inode), true);
btrfs_record_unlink_dir(trans, BTRFS_I(new_dir),
- BTRFS_I(new_inode), 1);
+ BTRFS_I(new_inode), true);
}
/* src is a subvolume */
@@ -9057,7 +9185,7 @@ static int btrfs_rename(struct mnt_idmap *idmap,
if (old_dentry->d_parent != new_dentry->d_parent)
btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
- BTRFS_I(old_inode), 1);
+ BTRFS_I(old_inode), true);
if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
ret = btrfs_unlink_subvol(trans, BTRFS_I(old_dir), old_dentry);
@@ -10164,6 +10292,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
struct extent_io_tree *io_tree = &inode->io_tree;
struct extent_changeset *data_reserved = NULL;
struct extent_state *cached_state = NULL;
+ struct btrfs_ordered_extent *ordered;
int compression;
size_t orig_count;
u64 start, end;
@@ -10340,14 +10469,15 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
}
free_extent_map(em);
- ret = btrfs_add_ordered_extent(inode, start, num_bytes, ram_bytes,
+ ordered = btrfs_alloc_ordered_extent(inode, start, num_bytes, ram_bytes,
ins.objectid, ins.offset,
encoded->unencoded_offset,
(1 << BTRFS_ORDERED_ENCODED) |
(1 << BTRFS_ORDERED_COMPRESSED),
compression);
- if (ret) {
+ if (IS_ERR(ordered)) {
btrfs_drop_extent_map_range(inode, start, end, false);
+ ret = PTR_ERR(ordered);
goto out_free_reserved;
}
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
@@ -10359,8 +10489,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
btrfs_delalloc_release_extents(inode, num_bytes);
- btrfs_submit_compressed_write(inode, start, num_bytes, ins.objectid,
- ins.offset, pages, nr_pages, 0, false);
+ btrfs_submit_compressed_write(ordered, pages, nr_pages, 0, false);
ret = orig_count;
goto out;
@@ -10897,7 +11026,6 @@ static const struct address_space_operations btrfs_aops = {
.read_folio = btrfs_read_folio,
.writepages = btrfs_writepages,
.readahead = btrfs_readahead,
- .direct_IO = noop_direct_IO,
.invalidate_folio = btrfs_invalidate_folio,
.release_folio = btrfs_release_folio,
.migrate_folio = btrfs_migrate_folio,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d99376a79ef4..a895d105464b 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -649,6 +649,8 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
}
trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size;
+ /* Tree log can't currently deal with an inode which is a new root. */
+ btrfs_set_log_full_commit(trans);
ret = btrfs_qgroup_inherit(trans, 0, objectid, inherit);
if (ret)
@@ -757,10 +759,7 @@ out:
trans->bytes_reserved = 0;
btrfs_subvolume_release_metadata(root, &block_rsv);
- if (ret)
- btrfs_end_transaction(trans);
- else
- ret = btrfs_commit_transaction(trans);
+ btrfs_end_transaction(trans);
out_new_inode_args:
btrfs_new_inode_args_destroy(&new_inode_args);
out_inode:
@@ -3113,6 +3112,13 @@ static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root,
struct btrfs_trans_handle *trans;
u64 transid;
+ /*
+ * Start orphan cleanup here for the given root in case it hasn't been
+ * started already by other means. Errors are handled in the other
+ * functions during transaction commit.
+ */
+ btrfs_orphan_cleanup(root);
+
trans = btrfs_attach_transaction_barrier(root);
if (IS_ERR(trans)) {
if (PTR_ERR(trans) != -ENOENT)
@@ -3134,14 +3140,13 @@ out:
static noinline long btrfs_ioctl_wait_sync(struct btrfs_fs_info *fs_info,
void __user *argp)
{
- u64 transid;
+ /* By default wait for the current transaction. */
+ u64 transid = 0;
- if (argp) {
+ if (argp)
if (copy_from_user(&transid, argp, sizeof(transid)))
return -EFAULT;
- } else {
- transid = 0; /* current trans */
- }
+
return btrfs_wait_for_commit(fs_info, transid);
}
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 3a496b0d3d2b..7979449a58d6 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -57,8 +57,8 @@
static struct btrfs_lockdep_keyset {
u64 id; /* root objectid */
- /* Longest entry: btrfs-free-space-00 */
- char names[BTRFS_MAX_LEVEL][20];
+ /* Longest entry: btrfs-block-group-00 */
+ char names[BTRFS_MAX_LEVEL][24];
struct lock_class_key keys[BTRFS_MAX_LEVEL];
} btrfs_lockdep_keysets[] = {
{ .id = BTRFS_ROOT_TREE_OBJECTID, DEFINE_NAME("root") },
@@ -72,6 +72,7 @@ static struct btrfs_lockdep_keyset {
{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID, DEFINE_NAME("dreloc") },
{ .id = BTRFS_UUID_TREE_OBJECTID, DEFINE_NAME("uuid") },
{ .id = BTRFS_FREE_SPACE_TREE_OBJECTID, DEFINE_NAME("free-space") },
+ { .id = BTRFS_BLOCK_GROUP_TREE_OBJECTID, DEFINE_NAME("block-group") },
{ .id = 0, DEFINE_NAME("tree") },
};
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index 3a095b9c6373..d3fcfc628a4f 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -88,9 +88,9 @@ struct list_head *lzo_alloc_workspace(unsigned int level)
if (!workspace)
return ERR_PTR(-ENOMEM);
- workspace->mem = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
- workspace->buf = kvmalloc(WORKSPACE_BUF_LENGTH, GFP_KERNEL);
- workspace->cbuf = kvmalloc(WORKSPACE_CBUF_LENGTH, GFP_KERNEL);
+ workspace->mem = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL | __GFP_NOWARN);
+ workspace->buf = kvmalloc(WORKSPACE_BUF_LENGTH, GFP_KERNEL | __GFP_NOWARN);
+ workspace->cbuf = kvmalloc(WORKSPACE_CBUF_LENGTH, GFP_KERNEL | __GFP_NOWARN);
if (!workspace->mem || !workspace->buf || !workspace->cbuf)
goto fail;
diff --git a/fs/btrfs/messages.c b/fs/btrfs/messages.c
index 310a05cf95ef..23fc11af498a 100644
--- a/fs/btrfs/messages.c
+++ b/fs/btrfs/messages.c
@@ -252,14 +252,6 @@ void __cold _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt,
}
#endif
-#ifdef CONFIG_BTRFS_ASSERT
-void __cold __noreturn btrfs_assertfail(const char *expr, const char *file, int line)
-{
- pr_err("assertion failed: %s, in %s:%d\n", expr, file, line);
- BUG();
-}
-#endif
-
void __cold btrfs_print_v0_err(struct btrfs_fs_info *fs_info)
{
btrfs_err(fs_info,
diff --git a/fs/btrfs/messages.h b/fs/btrfs/messages.h
index ac2d1982ba3d..deedc1a168e2 100644
--- a/fs/btrfs/messages.h
+++ b/fs/btrfs/messages.h
@@ -4,14 +4,23 @@
#define BTRFS_MESSAGES_H
#include <linux/types.h>
+#include <linux/printk.h>
+#include <linux/bug.h>
struct btrfs_fs_info;
+/*
+ * We want to be able to override this in btrfs-progs.
+ */
+#ifdef __KERNEL__
+
static inline __printf(2, 3) __cold
void btrfs_no_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
{
}
+#endif
+
#ifdef CONFIG_PRINTK
#define btrfs_printk(fs_info, fmt, args...) \
@@ -160,7 +169,11 @@ do { \
} while (0)
#ifdef CONFIG_BTRFS_ASSERT
-void __cold __noreturn btrfs_assertfail(const char *expr, const char *file, int line);
+
+#define btrfs_assertfail(expr, file, line) ({ \
+ pr_err("assertion failed: %s, in %s:%d\n", (expr), (file), (line)); \
+ BUG(); \
+})
#define ASSERT(expr) \
(likely(expr) ? (void)0 : btrfs_assertfail(#expr, __FILE__, __LINE__))
diff --git a/fs/btrfs/misc.h b/fs/btrfs/misc.h
index 768583a440e1..005751a12911 100644
--- a/fs/btrfs/misc.h
+++ b/fs/btrfs/misc.h
@@ -143,4 +143,24 @@ static inline struct rb_node *rb_simple_insert(struct rb_root *root, u64 bytenr,
return NULL;
}
+static inline bool bitmap_test_range_all_set(const unsigned long *addr,
+ unsigned long start,
+ unsigned long nbits)
+{
+ unsigned long found_zero;
+
+ found_zero = find_next_zero_bit(addr, start + nbits, start);
+ return (found_zero == start + nbits);
+}
+
+static inline bool bitmap_test_range_all_zero(const unsigned long *addr,
+ unsigned long start,
+ unsigned long nbits)
+{
+ unsigned long found_set;
+
+ found_set = find_next_bit(addr, start + nbits, start);
+ return (found_set == start + nbits);
+}
+
#endif
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index a9778a91511e..a629532283bc 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -146,35 +146,11 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
return ret;
}
-/*
- * Add an ordered extent to the per-inode tree.
- *
- * @inode: Inode that this extent is for.
- * @file_offset: Logical offset in file where the extent starts.
- * @num_bytes: Logical length of extent in file.
- * @ram_bytes: Full length of unencoded data.
- * @disk_bytenr: Offset of extent on disk.
- * @disk_num_bytes: Size of extent on disk.
- * @offset: Offset into unencoded data where file data starts.
- * @flags: Flags specifying type of extent (1 << BTRFS_ORDERED_*).
- * @compress_type: Compression algorithm used for data.
- *
- * Most of these parameters correspond to &struct btrfs_file_extent_item. The
- * tree is given a single reference on the ordered extent that was inserted, and
- * the returned pointer is given a second reference.
- *
- * Return: the new ordered extent or error pointer.
- */
-struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
- struct btrfs_inode *inode, u64 file_offset,
- u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
- u64 disk_num_bytes, u64 offset, unsigned long flags,
- int compress_type)
+static struct btrfs_ordered_extent *alloc_ordered_extent(
+ struct btrfs_inode *inode, u64 file_offset, u64 num_bytes,
+ u64 ram_bytes, u64 disk_bytenr, u64 disk_num_bytes,
+ u64 offset, unsigned long flags, int compress_type)
{
- struct btrfs_root *root = inode->root;
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
- struct rb_node *node;
struct btrfs_ordered_extent *entry;
int ret;
@@ -184,7 +160,6 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes);
if (ret < 0)
return ERR_PTR(ret);
- ret = 0;
} else {
/*
* The ordered extent has reserved qgroup space, release now
@@ -209,15 +184,7 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
entry->compress_type = compress_type;
entry->truncated_len = (u64)-1;
entry->qgroup_rsv = ret;
- entry->physical = (u64)-1;
-
- ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0);
entry->flags = flags;
-
- percpu_counter_add_batch(&fs_info->ordered_bytes, num_bytes,
- fs_info->delalloc_batch);
-
- /* one ref for the tree */
refcount_set(&entry->refs, 1);
init_waitqueue_head(&entry->wait);
INIT_LIST_HEAD(&entry->list);
@@ -226,15 +193,40 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
INIT_LIST_HEAD(&entry->work_list);
init_completion(&entry->completion);
+ /*
+ * We don't need the count_max_extents here, we can assume that all of
+ * that work has been done at higher layers, so this is truly the
+ * smallest the extent is going to get.
+ */
+ spin_lock(&inode->lock);
+ btrfs_mod_outstanding_extents(inode, 1);
+ spin_unlock(&inode->lock);
+
+ return entry;
+}
+
+static void insert_ordered_extent(struct btrfs_ordered_extent *entry)
+{
+ struct btrfs_inode *inode = BTRFS_I(entry->inode);
+ struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
+ struct btrfs_root *root = inode->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct rb_node *node;
+
trace_btrfs_ordered_extent_add(inode, entry);
+ percpu_counter_add_batch(&fs_info->ordered_bytes, entry->num_bytes,
+ fs_info->delalloc_batch);
+
+ /* One ref for the tree. */
+ refcount_inc(&entry->refs);
+
spin_lock_irq(&tree->lock);
- node = tree_insert(&tree->tree, file_offset,
- &entry->rb_node);
+ node = tree_insert(&tree->tree, entry->file_offset, &entry->rb_node);
if (node)
btrfs_panic(fs_info, -EEXIST,
"inconsistency in ordered tree at offset %llu",
- file_offset);
+ entry->file_offset);
spin_unlock_irq(&tree->lock);
spin_lock(&root->ordered_extent_lock);
@@ -248,43 +240,43 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
spin_unlock(&fs_info->ordered_root_lock);
}
spin_unlock(&root->ordered_extent_lock);
-
- /*
- * We don't need the count_max_extents here, we can assume that all of
- * that work has been done at higher layers, so this is truly the
- * smallest the extent is going to get.
- */
- spin_lock(&inode->lock);
- btrfs_mod_outstanding_extents(inode, 1);
- spin_unlock(&inode->lock);
-
- /* One ref for the returned entry to match semantics of lookup. */
- refcount_inc(&entry->refs);
-
- return entry;
}
/*
- * Add a new btrfs_ordered_extent for the range, but drop the reference instead
- * of returning it to the caller.
+ * Add an ordered extent to the per-inode tree.
+ *
+ * @inode: Inode that this extent is for.
+ * @file_offset: Logical offset in file where the extent starts.
+ * @num_bytes: Logical length of extent in file.
+ * @ram_bytes: Full length of unencoded data.
+ * @disk_bytenr: Offset of extent on disk.
+ * @disk_num_bytes: Size of extent on disk.
+ * @offset: Offset into unencoded data where file data starts.
+ * @flags: Flags specifying type of extent (1 << BTRFS_ORDERED_*).
+ * @compress_type: Compression algorithm used for data.
+ *
+ * Most of these parameters correspond to &struct btrfs_file_extent_item. The
+ * tree is given a single reference on the ordered extent that was inserted, and
+ * the returned pointer is given a second reference.
+ *
+ * Return: the new ordered extent or error pointer.
*/
-int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
- u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
- u64 disk_num_bytes, u64 offset, unsigned long flags,
- int compress_type)
+struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
+ struct btrfs_inode *inode, u64 file_offset,
+ u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
+ u64 disk_num_bytes, u64 offset, unsigned long flags,
+ int compress_type)
{
- struct btrfs_ordered_extent *ordered;
-
- ordered = btrfs_alloc_ordered_extent(inode, file_offset, num_bytes,
- ram_bytes, disk_bytenr,
- disk_num_bytes, offset, flags,
- compress_type);
+ struct btrfs_ordered_extent *entry;
- if (IS_ERR(ordered))
- return PTR_ERR(ordered);
- btrfs_put_ordered_extent(ordered);
+ ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0);
- return 0;
+ entry = alloc_ordered_extent(inode, file_offset, num_bytes, ram_bytes,
+ disk_bytenr, disk_num_bytes, offset, flags,
+ compress_type);
+ if (!IS_ERR(entry))
+ insert_ordered_extent(entry);
+ return entry;
}
/*
@@ -311,6 +303,90 @@ static void finish_ordered_fn(struct btrfs_work *work)
btrfs_finish_ordered_io(ordered_extent);
}
+static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
+ struct page *page, u64 file_offset,
+ u64 len, bool uptodate)
+{
+ struct btrfs_inode *inode = BTRFS_I(ordered->inode);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+
+ lockdep_assert_held(&inode->ordered_tree.lock);
+
+ if (page) {
+ ASSERT(page->mapping);
+ ASSERT(page_offset(page) <= file_offset);
+ ASSERT(file_offset + len <= page_offset(page) + PAGE_SIZE);
+
+ /*
+ * Ordered (Private2) bit indicates whether we still have
+ * pending io unfinished for the ordered extent.
+ *
+ * If there's no such bit, we need to skip to next range.
+ */
+ if (!btrfs_page_test_ordered(fs_info, page, file_offset, len))
+ return false;
+ btrfs_page_clear_ordered(fs_info, page, file_offset, len);
+ }
+
+ /* Now we're fine to update the accounting. */
+ if (WARN_ON_ONCE(len > ordered->bytes_left)) {
+ btrfs_crit(fs_info,
+"bad ordered extent accounting, root=%llu ino=%llu OE offset=%llu OE len=%llu to_dec=%llu left=%llu",
+ inode->root->root_key.objectid, btrfs_ino(inode),
+ ordered->file_offset, ordered->num_bytes,
+ len, ordered->bytes_left);
+ ordered->bytes_left = 0;
+ } else {
+ ordered->bytes_left -= len;
+ }
+
+ if (!uptodate)
+ set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
+
+ if (ordered->bytes_left)
+ return false;
+
+ /*
+ * All the IO of the ordered extent is finished, we need to queue
+ * the finish_func to be executed.
+ */
+ set_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags);
+ cond_wake_up(&ordered->wait);
+ refcount_inc(&ordered->refs);
+ trace_btrfs_ordered_extent_mark_finished(inode, ordered);
+ return true;
+}
+
+static void btrfs_queue_ordered_fn(struct btrfs_ordered_extent *ordered)
+{
+ struct btrfs_inode *inode = BTRFS_I(ordered->inode);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct btrfs_workqueue *wq = btrfs_is_free_space_inode(inode) ?
+ fs_info->endio_freespace_worker : fs_info->endio_write_workers;
+
+ btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL);
+ btrfs_queue_work(wq, &ordered->work);
+}
+
+bool btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
+ struct page *page, u64 file_offset, u64 len,
+ bool uptodate)
+{
+ struct btrfs_inode *inode = BTRFS_I(ordered->inode);
+ unsigned long flags;
+ bool ret;
+
+ trace_btrfs_finish_ordered_extent(inode, file_offset, len, uptodate);
+
+ spin_lock_irqsave(&inode->ordered_tree.lock, flags);
+ ret = can_finish_ordered_extent(ordered, page, file_offset, len, uptodate);
+ spin_unlock_irqrestore(&inode->ordered_tree.lock, flags);
+
+ if (ret)
+ btrfs_queue_ordered_fn(ordered);
+ return ret;
+}
+
/*
* Mark all ordered extents io inside the specified range finished.
*
@@ -329,22 +405,11 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
u64 num_bytes, bool uptodate)
{
struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct btrfs_workqueue *wq;
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
unsigned long flags;
u64 cur = file_offset;
- if (btrfs_is_free_space_inode(inode))
- wq = fs_info->endio_freespace_worker;
- else
- wq = fs_info->endio_write_workers;
-
- if (page)
- ASSERT(page->mapping && page_offset(page) <= file_offset &&
- file_offset + num_bytes <= page_offset(page) + PAGE_SIZE);
-
spin_lock_irqsave(&tree->lock, flags);
while (cur < file_offset + num_bytes) {
u64 entry_end;
@@ -397,50 +462,9 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
ASSERT(end + 1 - cur < U32_MAX);
len = end + 1 - cur;
- if (page) {
- /*
- * Ordered (Private2) bit indicates whether we still
- * have pending io unfinished for the ordered extent.
- *
- * If there's no such bit, we need to skip to next range.
- */
- if (!btrfs_page_test_ordered(fs_info, page, cur, len)) {
- cur += len;
- continue;
- }
- btrfs_page_clear_ordered(fs_info, page, cur, len);
- }
-
- /* Now we're fine to update the accounting */
- if (unlikely(len > entry->bytes_left)) {
- WARN_ON(1);
- btrfs_crit(fs_info,
-"bad ordered extent accounting, root=%llu ino=%llu OE offset=%llu OE len=%llu to_dec=%u left=%llu",
- inode->root->root_key.objectid,
- btrfs_ino(inode),
- entry->file_offset,
- entry->num_bytes,
- len, entry->bytes_left);
- entry->bytes_left = 0;
- } else {
- entry->bytes_left -= len;
- }
-
- if (!uptodate)
- set_bit(BTRFS_ORDERED_IOERR, &entry->flags);
-
- /*
- * All the IO of the ordered extent is finished, we need to queue
- * the finish_func to be executed.
- */
- if (entry->bytes_left == 0) {
- set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
- cond_wake_up(&entry->wait);
- refcount_inc(&entry->refs);
- trace_btrfs_ordered_extent_mark_finished(inode, entry);
+ if (can_finish_ordered_extent(entry, page, cur, len, uptodate)) {
spin_unlock_irqrestore(&tree->lock, flags);
- btrfs_init_work(&entry->work, finish_ordered_fn, NULL, NULL);
- btrfs_queue_work(wq, &entry->work);
+ btrfs_queue_ordered_fn(entry);
spin_lock_irqsave(&tree->lock, flags);
}
cur += len;
@@ -564,7 +588,7 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
freespace_inode = btrfs_is_free_space_inode(btrfs_inode);
btrfs_lockdep_acquire(fs_info, btrfs_trans_pending_ordered);
- /* This is paired with btrfs_add_ordered_extent. */
+ /* This is paired with btrfs_alloc_ordered_extent. */
spin_lock(&btrfs_inode->lock);
btrfs_mod_outstanding_extents(btrfs_inode, -1);
spin_unlock(&btrfs_inode->lock);
@@ -1117,17 +1141,22 @@ bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end,
}
/* Split out a new ordered extent for this first @len bytes of @ordered. */
-int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 len)
+struct btrfs_ordered_extent *btrfs_split_ordered_extent(
+ struct btrfs_ordered_extent *ordered, u64 len)
{
- struct inode *inode = ordered->inode;
- struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_inode *inode = BTRFS_I(ordered->inode);
+ struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
+ struct btrfs_root *root = inode->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
u64 file_offset = ordered->file_offset;
u64 disk_bytenr = ordered->disk_bytenr;
- unsigned long flags = ordered->flags & BTRFS_ORDERED_TYPE_FLAGS;
+ unsigned long flags = ordered->flags;
+ struct btrfs_ordered_sum *sum, *tmpsum;
+ struct btrfs_ordered_extent *new;
struct rb_node *node;
+ u64 offset = 0;
- trace_btrfs_ordered_extent_split(BTRFS_I(inode), ordered);
+ trace_btrfs_ordered_extent_split(inode, ordered);
ASSERT(!(flags & (1U << BTRFS_ORDERED_COMPRESSED)));
@@ -1136,18 +1165,27 @@ int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 len)
* reduce the original extent to a zero length either.
*/
if (WARN_ON_ONCE(len >= ordered->num_bytes))
- return -EINVAL;
- /* We cannot split once ordered extent is past end_bio. */
- if (WARN_ON_ONCE(ordered->bytes_left != ordered->disk_num_bytes))
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
+ /* We cannot split partially completed ordered extents. */
+ if (ordered->bytes_left) {
+ ASSERT(!(flags & ~BTRFS_ORDERED_TYPE_FLAGS));
+ if (WARN_ON_ONCE(ordered->bytes_left != ordered->disk_num_bytes))
+ return ERR_PTR(-EINVAL);
+ }
/* We cannot split a compressed ordered extent. */
if (WARN_ON_ONCE(ordered->disk_num_bytes != ordered->num_bytes))
- return -EINVAL;
- /* Checksum list should be empty. */
- if (WARN_ON_ONCE(!list_empty(&ordered->list)))
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
- spin_lock_irq(&tree->lock);
+ new = alloc_ordered_extent(inode, file_offset, len, len, disk_bytenr,
+ len, 0, flags, ordered->compress_type);
+ if (IS_ERR(new))
+ return new;
+
+ /* One ref for the tree. */
+ refcount_inc(&new->refs);
+
+ spin_lock_irq(&root->ordered_extent_lock);
+ spin_lock(&tree->lock);
/* Remove from tree once */
node = &ordered->rb_node;
rb_erase(node, &tree->tree);
@@ -1159,26 +1197,48 @@ int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 len)
ordered->disk_bytenr += len;
ordered->num_bytes -= len;
ordered->disk_num_bytes -= len;
- ordered->bytes_left -= len;
+
+ if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) {
+ ASSERT(ordered->bytes_left == 0);
+ new->bytes_left = 0;
+ } else {
+ ordered->bytes_left -= len;
+ }
+
+ if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags)) {
+ if (ordered->truncated_len > len) {
+ ordered->truncated_len -= len;
+ } else {
+ new->truncated_len = ordered->truncated_len;
+ ordered->truncated_len = 0;
+ }
+ }
+
+ list_for_each_entry_safe(sum, tmpsum, &ordered->list, list) {
+ if (offset == len)
+ break;
+ list_move_tail(&sum->list, &new->list);
+ offset += sum->len;
+ }
/* Re-insert the node */
node = tree_insert(&tree->tree, ordered->file_offset, &ordered->rb_node);
if (node)
btrfs_panic(fs_info, -EEXIST,
"zoned: inconsistency in ordered tree at offset %llu",
- ordered->file_offset);
+ ordered->file_offset);
- spin_unlock_irq(&tree->lock);
-
- /*
- * The splitting extent is already counted and will be added again in
- * btrfs_add_ordered_extent(). Subtract len to avoid double counting.
- */
- percpu_counter_add_batch(&fs_info->ordered_bytes, -len, fs_info->delalloc_batch);
+ node = tree_insert(&tree->tree, new->file_offset, &new->rb_node);
+ if (node)
+ btrfs_panic(fs_info, -EEXIST,
+ "zoned: inconsistency in ordered tree at offset %llu",
+ new->file_offset);
+ spin_unlock(&tree->lock);
- return btrfs_add_ordered_extent(BTRFS_I(inode), file_offset, len, len,
- disk_bytenr, len, 0, flags,
- ordered->compress_type);
+ list_add_tail(&new->root_extent_list, &root->ordered_extents);
+ root->nr_ordered_extents++;
+ spin_unlock_irq(&root->ordered_extent_lock);
+ return new;
}
int __init ordered_data_init(void)
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index f0f1138d23c3..173bd5c5df26 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -14,13 +14,13 @@ struct btrfs_ordered_inode_tree {
};
struct btrfs_ordered_sum {
- /* bytenr is the start of this extent on disk */
- u64 bytenr;
-
/*
- * this is the length in bytes covered by the sums array below.
+ * Logical start address and length for of the blocks covered by
+ * the sums array.
*/
- int len;
+ u64 logical;
+ u32 len;
+
struct list_head list;
/* last field is a variable length array of csums */
u8 sums[];
@@ -151,12 +151,6 @@ struct btrfs_ordered_extent {
struct completion completion;
struct btrfs_work flush_work;
struct list_head work_list;
-
- /*
- * Used to reverse-map physical address returned from ZONE_APPEND write
- * command in a workqueue context
- */
- u64 physical;
};
static inline void
@@ -167,11 +161,15 @@ btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t)
t->last = NULL;
}
+int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent);
int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry);
void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
struct btrfs_ordered_extent *entry);
+bool btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
+ struct page *page, u64 file_offset, u64 len,
+ bool uptodate);
void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
struct page *page, u64 file_offset,
u64 num_bytes, bool uptodate);
@@ -183,10 +181,6 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
u64 disk_num_bytes, u64 offset, unsigned long flags,
int compress_type);
-int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
- u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
- u64 disk_num_bytes, u64 offset, unsigned long flags,
- int compress_type);
void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
struct btrfs_ordered_sum *sum);
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode,
@@ -212,7 +206,8 @@ void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
struct extent_state **cached_state);
bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end,
struct extent_state **cached_state);
-int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 len);
+struct btrfs_ordered_extent *btrfs_split_ordered_extent(
+ struct btrfs_ordered_extent *ordered, u64 len);
int __init ordered_data_init(void);
void __cold ordered_data_exit(void);
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 497b9dbd8a13..aa06d9ca911d 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -49,7 +49,7 @@ const char *btrfs_root_name(const struct btrfs_key *key, char *buf)
return buf;
}
-static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk)
+static void print_chunk(const struct extent_buffer *eb, struct btrfs_chunk *chunk)
{
int num_stripes = btrfs_chunk_num_stripes(eb, chunk);
int i;
@@ -62,7 +62,7 @@ static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk)
btrfs_stripe_offset_nr(eb, chunk, i));
}
}
-static void print_dev_item(struct extent_buffer *eb,
+static void print_dev_item(const struct extent_buffer *eb,
struct btrfs_dev_item *dev_item)
{
pr_info("\t\tdev item devid %llu total_bytes %llu bytes used %llu\n",
@@ -70,7 +70,7 @@ static void print_dev_item(struct extent_buffer *eb,
btrfs_device_total_bytes(eb, dev_item),
btrfs_device_bytes_used(eb, dev_item));
}
-static void print_extent_data_ref(struct extent_buffer *eb,
+static void print_extent_data_ref(const struct extent_buffer *eb,
struct btrfs_extent_data_ref *ref)
{
pr_cont("extent data backref root %llu objectid %llu offset %llu count %u\n",
@@ -80,7 +80,7 @@ static void print_extent_data_ref(struct extent_buffer *eb,
btrfs_extent_data_ref_count(eb, ref));
}
-static void print_extent_item(struct extent_buffer *eb, int slot, int type)
+static void print_extent_item(const struct extent_buffer *eb, int slot, int type)
{
struct btrfs_extent_item *ei;
struct btrfs_extent_inline_ref *iref;
@@ -169,7 +169,7 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
WARN_ON(ptr > end);
}
-static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
+static void print_uuid_item(const struct extent_buffer *l, unsigned long offset,
u32 item_size)
{
if (!IS_ALIGNED(item_size, sizeof(u64))) {
@@ -191,7 +191,7 @@ static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
* Helper to output refs and locking status of extent buffer. Useful to debug
* race condition related problems.
*/
-static void print_eb_refs_lock(struct extent_buffer *eb)
+static void print_eb_refs_lock(const struct extent_buffer *eb)
{
#ifdef CONFIG_BTRFS_DEBUG
btrfs_info(eb->fs_info, "refs %u lock_owner %u current %u",
@@ -199,7 +199,7 @@ static void print_eb_refs_lock(struct extent_buffer *eb)
#endif
}
-void btrfs_print_leaf(struct extent_buffer *l)
+void btrfs_print_leaf(const struct extent_buffer *l)
{
struct btrfs_fs_info *fs_info;
int i;
@@ -355,7 +355,7 @@ void btrfs_print_leaf(struct extent_buffer *l)
}
}
-void btrfs_print_tree(struct extent_buffer *c, bool follow)
+void btrfs_print_tree(const struct extent_buffer *c, bool follow)
{
struct btrfs_fs_info *fs_info;
int i; u32 nr;
diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h
index 8c3e9319ec4e..c42bc666d5ee 100644
--- a/fs/btrfs/print-tree.h
+++ b/fs/btrfs/print-tree.h
@@ -9,8 +9,8 @@
/* Buffer size to contain tree name and possibly additional data (offset) */
#define BTRFS_ROOT_NAME_BUF_LEN 48
-void btrfs_print_leaf(struct extent_buffer *l);
-void btrfs_print_tree(struct extent_buffer *c, bool follow);
+void btrfs_print_leaf(const struct extent_buffer *l);
+void btrfs_print_tree(const struct extent_buffer *c, bool follow);
const char *btrfs_root_name(const struct btrfs_key *key, char *buf);
#endif
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index f41da7ac360d..da1f84a0eb29 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1232,12 +1232,23 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
int ret = 0;
/*
- * We need to have subvol_sem write locked, to prevent races between
- * concurrent tasks trying to disable quotas, because we will unlock
- * and relock qgroup_ioctl_lock across BTRFS_FS_QUOTA_ENABLED changes.
+ * We need to have subvol_sem write locked to prevent races with
+ * snapshot creation.
*/
lockdep_assert_held_write(&fs_info->subvol_sem);
+ /*
+ * Lock the cleaner mutex to prevent races with concurrent relocation,
+ * because relocation may be building backrefs for blocks of the quota
+ * root while we are deleting the root. This is like dropping fs roots
+ * of deleted snapshots/subvolumes, we need the same protection.
+ *
+ * This also prevents races between concurrent tasks trying to disable
+ * quotas, because we will unlock and relock qgroup_ioctl_lock across
+ * BTRFS_FS_QUOTA_ENABLED changes.
+ */
+ mutex_lock(&fs_info->cleaner_mutex);
+
mutex_lock(&fs_info->qgroup_ioctl_lock);
if (!fs_info->quota_root)
goto out;
@@ -1301,7 +1312,9 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
goto out;
}
+ spin_lock(&fs_info->trans_lock);
list_del(&quota_root->dirty_list);
+ spin_unlock(&fs_info->trans_lock);
btrfs_tree_lock(quota_root->node);
btrfs_clear_buffer_dirty(trans, quota_root->node);
@@ -1317,6 +1330,7 @@ out:
btrfs_end_transaction(trans);
else if (trans)
ret = btrfs_end_transaction(trans);
+ mutex_unlock(&fs_info->cleaner_mutex);
return ret;
}
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 2fab37f062de..f37b925d587f 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1079,7 +1079,7 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
/* see if we can add this page onto our existing bio */
if (last) {
- u64 last_end = last->bi_iter.bi_sector << 9;
+ u64 last_end = last->bi_iter.bi_sector << SECTOR_SHIFT;
last_end += last->bi_iter.bi_size;
/*
@@ -1099,7 +1099,7 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
bio = bio_alloc(stripe->dev->bdev,
max(BTRFS_STRIPE_LEN >> PAGE_SHIFT, 1),
op, GFP_NOFS);
- bio->bi_iter.bi_sector = disk_start >> 9;
+ bio->bi_iter.bi_sector = disk_start >> SECTOR_SHIFT;
bio->bi_private = rbio;
__bio_add_page(bio, sector->page, sectorsize, sector->pgoff);
@@ -2747,3 +2747,48 @@ void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
if (!lock_stripe_add(rbio))
start_async_work(rbio, scrub_rbio_work_locked);
}
+
+/*
+ * This is for scrub call sites where we already have correct data contents.
+ * This allows us to avoid reading data stripes again.
+ *
+ * Unfortunately here we have to do page copy, other than reusing the pages.
+ * This is due to the fact rbio has its own page management for its cache.
+ */
+void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
+ struct page **data_pages, u64 data_logical)
+{
+ const u64 offset_in_full_stripe = data_logical -
+ rbio->bioc->full_stripe_logical;
+ const int page_index = offset_in_full_stripe >> PAGE_SHIFT;
+ const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
+ const u32 sectors_per_page = PAGE_SIZE / sectorsize;
+ int ret;
+
+ /*
+ * If we hit ENOMEM temporarily, but later at
+ * raid56_parity_submit_scrub_rbio() time it succeeded, we just do
+ * the extra read, not a big deal.
+ *
+ * If we hit ENOMEM later at raid56_parity_submit_scrub_rbio() time,
+ * the bio would got proper error number set.
+ */
+ ret = alloc_rbio_data_pages(rbio);
+ if (ret < 0)
+ return;
+
+ /* data_logical must be at stripe boundary and inside the full stripe. */
+ ASSERT(IS_ALIGNED(offset_in_full_stripe, BTRFS_STRIPE_LEN));
+ ASSERT(offset_in_full_stripe < (rbio->nr_data << BTRFS_STRIPE_LEN_SHIFT));
+
+ for (int page_nr = 0; page_nr < (BTRFS_STRIPE_LEN >> PAGE_SHIFT); page_nr++) {
+ struct page *dst = rbio->stripe_pages[page_nr + page_index];
+ struct page *src = data_pages[page_nr];
+
+ memcpy_page(dst, 0, src, 0, PAGE_SIZE);
+ for (int sector_nr = sectors_per_page * page_index;
+ sector_nr < sectors_per_page * (page_index + 1);
+ sector_nr++)
+ rbio->stripe_sectors[sector_nr].uptodate = true;
+ }
+}
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
index 0f7f31c8cb98..0e84c9c9293f 100644
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -193,6 +193,9 @@ struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
unsigned long *dbitmap, int stripe_nsectors);
void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);
+void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
+ struct page **data_pages, u64 data_logical);
+
int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 59a06499c647..25a3361caedc 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -174,8 +174,8 @@ static void mark_block_processed(struct reloc_control *rc,
in_range(node->bytenr, rc->block_group->start,
rc->block_group->length)) {
blocksize = rc->extent_root->fs_info->nodesize;
- set_extent_bits(&rc->processed_blocks, node->bytenr,
- node->bytenr + blocksize - 1, EXTENT_DIRTY);
+ set_extent_bit(&rc->processed_blocks, node->bytenr,
+ node->bytenr + blocksize - 1, EXTENT_DIRTY, NULL);
}
node->processed = 1;
}
@@ -3051,9 +3051,9 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
u64 boundary_end = boundary_start +
fs_info->sectorsize - 1;
- set_extent_bits(&BTRFS_I(inode)->io_tree,
- boundary_start, boundary_end,
- EXTENT_BOUNDARY);
+ set_extent_bit(&BTRFS_I(inode)->io_tree,
+ boundary_start, boundary_end,
+ EXTENT_BOUNDARY, NULL);
}
unlock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end,
&cached_state);
@@ -4342,29 +4342,25 @@ out:
* cloning checksum properly handles the nodatasum extents.
* it also saves CPU time to re-calculate the checksum.
*/
-int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len)
+int btrfs_reloc_clone_csums(struct btrfs_ordered_extent *ordered)
{
+ struct btrfs_inode *inode = BTRFS_I(ordered->inode);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct btrfs_root *csum_root;
- struct btrfs_ordered_sum *sums;
- struct btrfs_ordered_extent *ordered;
- int ret;
- u64 disk_bytenr;
- u64 new_bytenr;
+ u64 disk_bytenr = ordered->file_offset + inode->index_cnt;
+ struct btrfs_root *csum_root = btrfs_csum_root(fs_info, disk_bytenr);
LIST_HEAD(list);
+ int ret;
- ordered = btrfs_lookup_ordered_extent(inode, file_pos);
- BUG_ON(ordered->file_offset != file_pos || ordered->num_bytes != len);
-
- disk_bytenr = file_pos + inode->index_cnt;
- csum_root = btrfs_csum_root(fs_info, disk_bytenr);
ret = btrfs_lookup_csums_list(csum_root, disk_bytenr,
- disk_bytenr + len - 1, &list, 0, false);
+ disk_bytenr + ordered->num_bytes - 1,
+ &list, 0, false);
if (ret)
- goto out;
+ return ret;
while (!list_empty(&list)) {
- sums = list_entry(list.next, struct btrfs_ordered_sum, list);
+ struct btrfs_ordered_sum *sums =
+ list_entry(list.next, struct btrfs_ordered_sum, list);
+
list_del_init(&sums->list);
/*
@@ -4379,14 +4375,11 @@ int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len)
* disk_len vs real len like with real inodes since it's all
* disk length.
*/
- new_bytenr = ordered->disk_bytenr + sums->bytenr - disk_bytenr;
- sums->bytenr = new_bytenr;
-
+ sums->logical = ordered->disk_bytenr + sums->logical - disk_bytenr;
btrfs_add_ordered_sum(ordered, sums);
}
-out:
- btrfs_put_ordered_extent(ordered);
- return ret;
+
+ return 0;
}
int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
@@ -4523,3 +4516,19 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
ret = clone_backref_node(trans, rc, root, reloc_root);
return ret;
}
+
+/*
+ * Get the current bytenr for the block group which is being relocated.
+ *
+ * Return U64_MAX if no running relocation.
+ */
+u64 btrfs_get_reloc_bg_bytenr(struct btrfs_fs_info *fs_info)
+{
+ u64 logical = U64_MAX;
+
+ lockdep_assert_held(&fs_info->reloc_mutex);
+
+ if (fs_info->reloc_ctl && fs_info->reloc_ctl->block_group)
+ logical = fs_info->reloc_ctl->block_group->start;
+ return logical;
+}
diff --git a/fs/btrfs/relocation.h b/fs/btrfs/relocation.h
index 2041a86186de..77d69f6ae967 100644
--- a/fs/btrfs/relocation.h
+++ b/fs/btrfs/relocation.h
@@ -8,7 +8,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, struct btrfs_root *r
int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_recover_relocation(struct btrfs_fs_info *fs_info);
-int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len);
+int btrfs_reloc_clone_csums(struct btrfs_ordered_extent *ordered);
int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf,
struct extent_buffer *cow);
@@ -19,5 +19,6 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info);
struct btrfs_root *find_reloc_root(struct btrfs_fs_info *fs_info, u64 bytenr);
int btrfs_should_ignore_reloc_root(struct btrfs_root *root);
+u64 btrfs_get_reloc_bg_bytenr(struct btrfs_fs_info *fs_info);
#endif
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 836725a19661..4cae41bd6de0 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -134,8 +134,14 @@ struct scrub_stripe {
* The errors hit during the initial read of the stripe.
*
* Would be utilized for error reporting and repair.
+ *
+ * The remaining init_nr_* records the number of errors hit, only used
+ * by error reporting.
*/
unsigned long init_error_bitmap;
+ unsigned int init_nr_io_errors;
+ unsigned int init_nr_csum_errors;
+ unsigned int init_nr_meta_errors;
/*
* The following error bitmaps are all for the current status.
@@ -171,7 +177,6 @@ struct scrub_ctx {
struct btrfs_fs_info *fs_info;
int first_free;
int cur_stripe;
- struct list_head csum_list;
atomic_t cancel_req;
int readonly;
int sectors_per_bio;
@@ -303,17 +308,6 @@ static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
scrub_pause_off(fs_info);
}
-static void scrub_free_csums(struct scrub_ctx *sctx)
-{
- while (!list_empty(&sctx->csum_list)) {
- struct btrfs_ordered_sum *sum;
- sum = list_first_entry(&sctx->csum_list,
- struct btrfs_ordered_sum, list);
- list_del(&sum->list);
- kfree(sum);
- }
-}
-
static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
{
int i;
@@ -324,7 +318,6 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
for (i = 0; i < SCRUB_STRIPES_PER_SCTX; i++)
release_scrub_stripe(&sctx->stripes[i]);
- scrub_free_csums(sctx);
kfree(sctx);
}
@@ -346,7 +339,6 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
refcount_set(&sctx->refs, 1);
sctx->is_dev_replace = is_dev_replace;
sctx->fs_info = fs_info;
- INIT_LIST_HEAD(&sctx->csum_list);
for (i = 0; i < SCRUB_STRIPES_PER_SCTX; i++) {
int ret;
@@ -473,11 +465,8 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
struct extent_buffer *eb;
struct btrfs_extent_item *ei;
struct scrub_warning swarn;
- unsigned long ptr = 0;
u64 flags = 0;
- u64 ref_root;
u32 item_size;
- u8 ref_level = 0;
int ret;
/* Super block error, no need to search extent tree. */
@@ -507,19 +496,28 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
item_size = btrfs_item_size(eb, path->slots[0]);
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
- do {
+ unsigned long ptr = 0;
+ u8 ref_level;
+ u64 ref_root;
+
+ while (true) {
ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
item_size, &ref_root,
&ref_level);
+ if (ret < 0) {
+ btrfs_warn(fs_info,
+ "failed to resolve tree backref for logical %llu: %d",
+ swarn.logical, ret);
+ break;
+ }
+ if (ret > 0)
+ break;
btrfs_warn_in_rcu(fs_info,
"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
- errstr, swarn.logical,
- btrfs_dev_name(dev),
- swarn.physical,
- ref_level ? "node" : "leaf",
- ret < 0 ? -1 : ref_level,
- ret < 0 ? -1 : ref_root);
- } while (ret != 1);
+ errstr, swarn.logical, btrfs_dev_name(dev),
+ swarn.physical, (ref_level ? "node" : "leaf"),
+ ref_level, ref_root);
+ }
btrfs_release_path(path);
} else {
struct btrfs_backref_walk_ctx ctx = { 0 };
@@ -540,48 +538,6 @@ out:
btrfs_free_path(path);
}
-static inline int scrub_nr_raid_mirrors(struct btrfs_io_context *bioc)
-{
- if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID5)
- return 2;
- else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID6)
- return 3;
- else
- return (int)bioc->num_stripes;
-}
-
-static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
- u64 full_stripe_logical,
- int nstripes, int mirror,
- int *stripe_index,
- u64 *stripe_offset)
-{
- int i;
-
- if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
- const int nr_data_stripes = (map_type & BTRFS_BLOCK_GROUP_RAID5) ?
- nstripes - 1 : nstripes - 2;
-
- /* RAID5/6 */
- for (i = 0; i < nr_data_stripes; i++) {
- const u64 data_stripe_start = full_stripe_logical +
- (i * BTRFS_STRIPE_LEN);
-
- if (logical >= data_stripe_start &&
- logical < data_stripe_start + BTRFS_STRIPE_LEN)
- break;
- }
-
- *stripe_index = i;
- *stripe_offset = (logical - full_stripe_logical) &
- BTRFS_STRIPE_LEN_MASK;
- } else {
- /* The other RAID type */
- *stripe_index = mirror;
- *stripe_offset = 0;
- }
-}
-
static int fill_writer_pointer_gap(struct scrub_ctx *sctx, u64 physical)
{
int ret = 0;
@@ -918,8 +874,9 @@ static void scrub_stripe_report_errors(struct scrub_ctx *sctx,
/* For scrub, our mirror_num should always start at 1. */
ASSERT(stripe->mirror_num >= 1);
- ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
- stripe->logical, &mapped_len, &bioc);
+ ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
+ stripe->logical, &mapped_len, &bioc,
+ NULL, NULL, 1);
/*
* If we failed, dev will be NULL, and later detailed reports
* will just be skipped.
@@ -1003,12 +960,9 @@ skip:
sctx->stat.data_bytes_scrubbed += nr_data_sectors << fs_info->sectorsize_bits;
sctx->stat.tree_bytes_scrubbed += nr_meta_sectors << fs_info->sectorsize_bits;
sctx->stat.no_csum += nr_nodatacsum_sectors;
- sctx->stat.read_errors +=
- bitmap_weight(&stripe->io_error_bitmap, stripe->nr_sectors);
- sctx->stat.csum_errors +=
- bitmap_weight(&stripe->csum_error_bitmap, stripe->nr_sectors);
- sctx->stat.verify_errors +=
- bitmap_weight(&stripe->meta_error_bitmap, stripe->nr_sectors);
+ sctx->stat.read_errors += stripe->init_nr_io_errors;
+ sctx->stat.csum_errors += stripe->init_nr_csum_errors;
+ sctx->stat.verify_errors += stripe->init_nr_meta_errors;
sctx->stat.uncorrectable_errors +=
bitmap_weight(&stripe->error_bitmap, stripe->nr_sectors);
sctx->stat.corrected_errors += nr_repaired_sectors;
@@ -1041,6 +995,12 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
scrub_verify_one_stripe(stripe, stripe->extent_sector_bitmap);
/* Save the initial failed bitmap for later repair and report usage. */
stripe->init_error_bitmap = stripe->error_bitmap;
+ stripe->init_nr_io_errors = bitmap_weight(&stripe->io_error_bitmap,
+ stripe->nr_sectors);
+ stripe->init_nr_csum_errors = bitmap_weight(&stripe->csum_error_bitmap,
+ stripe->nr_sectors);
+ stripe->init_nr_meta_errors = bitmap_weight(&stripe->meta_error_bitmap,
+ stripe->nr_sectors);
if (bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors))
goto out;
@@ -1137,6 +1097,35 @@ static void scrub_write_endio(struct btrfs_bio *bbio)
wake_up(&stripe->io_wait);
}
+static void scrub_submit_write_bio(struct scrub_ctx *sctx,
+ struct scrub_stripe *stripe,
+ struct btrfs_bio *bbio, bool dev_replace)
+{
+ struct btrfs_fs_info *fs_info = sctx->fs_info;
+ u32 bio_len = bbio->bio.bi_iter.bi_size;
+ u32 bio_off = (bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT) -
+ stripe->logical;
+
+ fill_writer_pointer_gap(sctx, stripe->physical + bio_off);
+ atomic_inc(&stripe->pending_io);
+ btrfs_submit_repair_write(bbio, stripe->mirror_num, dev_replace);
+ if (!btrfs_is_zoned(fs_info))
+ return;
+ /*
+ * For zoned writeback, queue depth must be 1, thus we must wait for
+ * the write to finish before the next write.
+ */
+ wait_scrub_stripe_io(stripe);
+
+ /*
+ * And also need to update the write pointer if write finished
+ * successfully.
+ */
+ if (!test_bit(bio_off >> fs_info->sectorsize_bits,
+ &stripe->write_error_bitmap))
+ sctx->write_pointer += bio_len;
+}
+
/*
* Submit the write bio(s) for the sectors specified by @write_bitmap.
*
@@ -1155,7 +1144,6 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str
{
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
struct btrfs_bio *bbio = NULL;
- const bool zoned = btrfs_is_zoned(fs_info);
int sector_nr;
for_each_set_bit(sector_nr, &write_bitmap, stripe->nr_sectors) {
@@ -1168,13 +1156,7 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str
/* Cannot merge with previous sector, submit the current one. */
if (bbio && sector_nr && !test_bit(sector_nr - 1, &write_bitmap)) {
- fill_writer_pointer_gap(sctx, stripe->physical +
- (sector_nr << fs_info->sectorsize_bits));
- atomic_inc(&stripe->pending_io);
- btrfs_submit_repair_write(bbio, stripe->mirror_num, dev_replace);
- /* For zoned writeback, queue depth must be 1. */
- if (zoned)
- wait_scrub_stripe_io(stripe);
+ scrub_submit_write_bio(sctx, stripe, bbio, dev_replace);
bbio = NULL;
}
if (!bbio) {
@@ -1187,14 +1169,8 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str
ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff);
ASSERT(ret == fs_info->sectorsize);
}
- if (bbio) {
- fill_writer_pointer_gap(sctx, bbio->bio.bi_iter.bi_sector <<
- SECTOR_SHIFT);
- atomic_inc(&stripe->pending_io);
- btrfs_submit_repair_write(bbio, stripe->mirror_num, dev_replace);
- if (zoned)
- wait_scrub_stripe_io(stripe);
- }
+ if (bbio)
+ scrub_submit_write_bio(sctx, stripe, bbio, dev_replace);
}
/*
@@ -1279,7 +1255,7 @@ static int get_raid56_logic_offset(u64 physical, int num,
u32 stripe_index;
u32 rot;
- *offset = last_offset + (i << BTRFS_STRIPE_LEN_SHIFT);
+ *offset = last_offset + btrfs_stripe_nr_to_offset(i);
stripe_nr = (u32)(*offset >> BTRFS_STRIPE_LEN_SHIFT) / data_stripes;
@@ -1294,7 +1270,7 @@ static int get_raid56_logic_offset(u64 physical, int num,
if (stripe_index < num)
j++;
}
- *offset = last_offset + (j << BTRFS_STRIPE_LEN_SHIFT);
+ *offset = last_offset + btrfs_stripe_nr_to_offset(j);
return 1;
}
@@ -1474,6 +1450,9 @@ static void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe)
{
stripe->extent_sector_bitmap = 0;
stripe->init_error_bitmap = 0;
+ stripe->init_nr_io_errors = 0;
+ stripe->init_nr_csum_errors = 0;
+ stripe->init_nr_meta_errors = 0;
stripe->error_bitmap = 0;
stripe->io_error_bitmap = 0;
stripe->csum_error_bitmap = 0;
@@ -1687,7 +1666,7 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx)
ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &sctx->stripes[0].state));
scrub_throttle_dev_io(sctx, sctx->stripes[0].dev,
- nr_stripes << BTRFS_STRIPE_LEN_SHIFT);
+ btrfs_stripe_nr_to_offset(nr_stripes));
for (int i = 0; i < nr_stripes; i++) {
stripe = &sctx->stripes[i];
scrub_submit_initial_read(sctx, stripe);
@@ -1714,7 +1693,7 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx)
break;
}
}
- } else {
+ } else if (!sctx->readonly) {
for (int i = 0; i < nr_stripes; i++) {
unsigned long repaired;
@@ -1810,7 +1789,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
bool all_empty = true;
const int data_stripes = nr_data_stripes(map);
unsigned long extent_bitmap = 0;
- u64 length = data_stripes << BTRFS_STRIPE_LEN_SHIFT;
+ u64 length = btrfs_stripe_nr_to_offset(data_stripes);
int ret;
ASSERT(sctx->raid56_data_stripes);
@@ -1825,13 +1804,13 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
data_stripes) >> BTRFS_STRIPE_LEN_SHIFT;
stripe_index = (i + rot) % map->num_stripes;
physical = map->stripes[stripe_index].physical +
- (rot << BTRFS_STRIPE_LEN_SHIFT);
+ btrfs_stripe_nr_to_offset(rot);
scrub_reset_stripe(stripe);
set_bit(SCRUB_STRIPE_FLAG_NO_REPORT, &stripe->state);
ret = scrub_find_fill_first_stripe(bg,
map->stripes[stripe_index].dev, physical, 1,
- full_stripe_start + (i << BTRFS_STRIPE_LEN_SHIFT),
+ full_stripe_start + btrfs_stripe_nr_to_offset(i),
BTRFS_STRIPE_LEN, stripe);
if (ret < 0)
goto out;
@@ -1841,7 +1820,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
*/
if (ret > 0) {
stripe->logical = full_stripe_start +
- (i << BTRFS_STRIPE_LEN_SHIFT);
+ btrfs_stripe_nr_to_offset(i);
stripe->dev = map->stripes[stripe_index].dev;
stripe->mirror_num = 1;
set_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state);
@@ -1929,8 +1908,8 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
bio->bi_end_io = raid56_scrub_wait_endio;
btrfs_bio_counter_inc_blocked(fs_info);
- ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, full_stripe_start,
- &length, &bioc);
+ ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, full_stripe_start,
+ &length, &bioc, NULL, NULL, 1);
if (ret < 0) {
btrfs_put_bioc(bioc);
btrfs_bio_counter_dec(fs_info);
@@ -1944,6 +1923,13 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
btrfs_bio_counter_dec(fs_info);
goto out;
}
+ /* Use the recovered stripes as cache to avoid read them from disk again. */
+ for (int i = 0; i < data_stripes; i++) {
+ stripe = &sctx->raid56_data_stripes[i];
+
+ raid56_parity_cache_data_pages(rbio, stripe->pages,
+ full_stripe_start + (i << BTRFS_STRIPE_LEN_SHIFT));
+ }
raid56_parity_submit_scrub_rbio(rbio);
wait_for_completion_io(&io_done);
ret = blk_status_to_errno(bio->bi_status);
@@ -2034,7 +2020,7 @@ static u64 simple_stripe_full_stripe_len(const struct map_lookup *map)
ASSERT(map->type & (BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID10));
- return (map->num_stripes / map->sub_stripes) << BTRFS_STRIPE_LEN_SHIFT;
+ return btrfs_stripe_nr_to_offset(map->num_stripes / map->sub_stripes);
}
/* Get the logical bytenr for the stripe */
@@ -2050,7 +2036,7 @@ static u64 simple_stripe_get_logical(struct map_lookup *map,
* (stripe_index / sub_stripes) gives how many data stripes we need to
* skip.
*/
- return ((stripe_index / map->sub_stripes) << BTRFS_STRIPE_LEN_SHIFT) +
+ return btrfs_stripe_nr_to_offset(stripe_index / map->sub_stripes) +
bg->start;
}
@@ -2176,7 +2162,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
}
if (profile & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) {
ret = scrub_simple_stripe(sctx, bg, map, scrub_dev, stripe_index);
- offset = (stripe_index / map->sub_stripes) << BTRFS_STRIPE_LEN_SHIFT;
+ offset = btrfs_stripe_nr_to_offset(stripe_index / map->sub_stripes);
goto out;
}
@@ -2191,7 +2177,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
/* Initialize @offset in case we need to go to out: label */
get_raid56_logic_offset(physical, stripe_index, map, &offset, NULL);
- increment = nr_data_stripes(map) << BTRFS_STRIPE_LEN_SHIFT;
+ increment = btrfs_stripe_nr_to_offset(nr_data_stripes(map));
/*
* Due to the rotation, for RAID56 it's better to iterate each stripe
@@ -2238,7 +2224,7 @@ next:
}
out:
ret2 = flush_scrub_stripes(sctx);
- if (!ret2)
+ if (!ret)
ret = ret2;
if (sctx->raid56_data_stripes) {
for (int i = 0; i < nr_data_stripes(map); i++)
@@ -2518,13 +2504,20 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
if (ret == 0) {
ro_set = 1;
- } else if (ret == -ENOSPC && !sctx->is_dev_replace) {
+ } else if (ret == -ENOSPC && !sctx->is_dev_replace &&
+ !(cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK)) {
/*
* btrfs_inc_block_group_ro return -ENOSPC when it
* failed in creating new chunk for metadata.
* It is not a problem for scrub, because
* metadata are always cowed, and our scrub paused
* commit_transactions.
+ *
+ * For RAID56 chunks, we have to mark them read-only
+ * for scrub, as later we would use our own cache
+ * out of RAID56 realm.
+ * Thus we want the RAID56 bg to be marked RO to
+ * prevent RMW from screwing up out cache.
*/
ro_set = 0;
} else if (ret == -ETXTBSY) {
@@ -2705,17 +2698,12 @@ static void scrub_workers_put(struct btrfs_fs_info *fs_info)
if (refcount_dec_and_mutex_lock(&fs_info->scrub_workers_refcnt,
&fs_info->scrub_lock)) {
struct workqueue_struct *scrub_workers = fs_info->scrub_workers;
- struct workqueue_struct *scrub_wr_comp =
- fs_info->scrub_wr_completion_workers;
fs_info->scrub_workers = NULL;
- fs_info->scrub_wr_completion_workers = NULL;
mutex_unlock(&fs_info->scrub_lock);
if (scrub_workers)
destroy_workqueue(scrub_workers);
- if (scrub_wr_comp)
- destroy_workqueue(scrub_wr_comp);
}
}
@@ -2726,7 +2714,6 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
int is_dev_replace)
{
struct workqueue_struct *scrub_workers = NULL;
- struct workqueue_struct *scrub_wr_comp = NULL;
unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
int max_active = fs_info->thread_pool_size;
int ret = -ENOMEM;
@@ -2734,21 +2721,17 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
if (refcount_inc_not_zero(&fs_info->scrub_workers_refcnt))
return 0;
- scrub_workers = alloc_workqueue("btrfs-scrub", flags,
- is_dev_replace ? 1 : max_active);
+ if (is_dev_replace)
+ scrub_workers = alloc_ordered_workqueue("btrfs-scrub", flags);
+ else
+ scrub_workers = alloc_workqueue("btrfs-scrub", flags, max_active);
if (!scrub_workers)
- goto fail_scrub_workers;
-
- scrub_wr_comp = alloc_workqueue("btrfs-scrubwrc", flags, max_active);
- if (!scrub_wr_comp)
- goto fail_scrub_wr_completion_workers;
+ return -ENOMEM;
mutex_lock(&fs_info->scrub_lock);
if (refcount_read(&fs_info->scrub_workers_refcnt) == 0) {
- ASSERT(fs_info->scrub_workers == NULL &&
- fs_info->scrub_wr_completion_workers == NULL);
+ ASSERT(fs_info->scrub_workers == NULL);
fs_info->scrub_workers = scrub_workers;
- fs_info->scrub_wr_completion_workers = scrub_wr_comp;
refcount_set(&fs_info->scrub_workers_refcnt, 1);
mutex_unlock(&fs_info->scrub_lock);
return 0;
@@ -2759,10 +2742,7 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
ret = 0;
- destroy_workqueue(scrub_wr_comp);
-fail_scrub_wr_completion_workers:
destroy_workqueue(scrub_workers);
-fail_scrub_workers:
return ret;
}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index af2e153543a5..8bfd44750efe 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1774,9 +1774,21 @@ static int read_symlink(struct btrfs_root *root,
ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_file_extent_item);
type = btrfs_file_extent_type(path->nodes[0], ei);
+ if (unlikely(type != BTRFS_FILE_EXTENT_INLINE)) {
+ ret = -EUCLEAN;
+ btrfs_crit(root->fs_info,
+"send: found symlink extent that is not inline, ino %llu root %llu extent type %d",
+ ino, btrfs_root_id(root), type);
+ goto out;
+ }
compression = btrfs_file_extent_compression(path->nodes[0], ei);
- BUG_ON(type != BTRFS_FILE_EXTENT_INLINE);
- BUG_ON(compression);
+ if (unlikely(compression != BTRFS_COMPRESS_NONE)) {
+ ret = -EUCLEAN;
+ btrfs_crit(root->fs_info,
+"send: found symlink extent with compression, ino %llu root %llu compression type %d",
+ ino, btrfs_root_id(root), compression);
+ goto out;
+ }
off = btrfs_file_extent_inline_start(ei);
len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index dd46b978ac2c..1b999c6e4193 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -100,9 +100,6 @@ void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info, u32 sector
subpage_info->uptodate_offset = cur;
cur += nr_bits;
- subpage_info->error_offset = cur;
- cur += nr_bits;
-
subpage_info->dirty_offset = cur;
cur += nr_bits;
@@ -367,28 +364,6 @@ void btrfs_page_end_writer_lock(const struct btrfs_fs_info *fs_info,
unlock_page(page);
}
-static bool bitmap_test_range_all_set(unsigned long *addr, unsigned int start,
- unsigned int nbits)
-{
- unsigned int found_zero;
-
- found_zero = find_next_zero_bit(addr, start + nbits, start);
- if (found_zero == start + nbits)
- return true;
- return false;
-}
-
-static bool bitmap_test_range_all_zero(unsigned long *addr, unsigned int start,
- unsigned int nbits)
-{
- unsigned int found_set;
-
- found_set = find_next_bit(addr, start + nbits, start);
- if (found_set == start + nbits)
- return true;
- return false;
-}
-
#define subpage_calc_start_bit(fs_info, page, name, start, len) \
({ \
unsigned int start_bit; \
@@ -438,35 +413,6 @@ void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info,
spin_unlock_irqrestore(&subpage->lock, flags);
}
-void btrfs_subpage_set_error(const struct btrfs_fs_info *fs_info,
- struct page *page, u64 start, u32 len)
-{
- struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
- unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
- error, start, len);
- unsigned long flags;
-
- spin_lock_irqsave(&subpage->lock, flags);
- bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
- SetPageError(page);
- spin_unlock_irqrestore(&subpage->lock, flags);
-}
-
-void btrfs_subpage_clear_error(const struct btrfs_fs_info *fs_info,
- struct page *page, u64 start, u32 len)
-{
- struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
- unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
- error, start, len);
- unsigned long flags;
-
- spin_lock_irqsave(&subpage->lock, flags);
- bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
- if (subpage_test_bitmap_all_zero(fs_info, subpage, error))
- ClearPageError(page);
- spin_unlock_irqrestore(&subpage->lock, flags);
-}
-
void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len)
{
@@ -628,7 +574,6 @@ bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \
return ret; \
}
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(uptodate);
-IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(error);
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(dirty);
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(writeback);
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(ordered);
@@ -696,7 +641,6 @@ bool btrfs_page_clamp_test_##name(const struct btrfs_fs_info *fs_info, \
}
IMPLEMENT_BTRFS_PAGE_OPS(uptodate, SetPageUptodate, ClearPageUptodate,
PageUptodate);
-IMPLEMENT_BTRFS_PAGE_OPS(error, SetPageError, ClearPageError, PageError);
IMPLEMENT_BTRFS_PAGE_OPS(dirty, set_page_dirty, clear_page_dirty_for_io,
PageDirty);
IMPLEMENT_BTRFS_PAGE_OPS(writeback, set_page_writeback, end_page_writeback,
@@ -767,3 +711,44 @@ void btrfs_page_unlock_writer(struct btrfs_fs_info *fs_info, struct page *page,
/* Have writers, use proper subpage helper to end it */
btrfs_page_end_writer_lock(fs_info, page, start, len);
}
+
+#define GET_SUBPAGE_BITMAP(subpage, subpage_info, name, dst) \
+ bitmap_cut(dst, subpage->bitmaps, 0, \
+ subpage_info->name##_offset, subpage_info->bitmap_nr_bits)
+
+void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
+ struct page *page, u64 start, u32 len)
+{
+ struct btrfs_subpage_info *subpage_info = fs_info->subpage_info;
+ struct btrfs_subpage *subpage;
+ unsigned long uptodate_bitmap;
+ unsigned long error_bitmap;
+ unsigned long dirty_bitmap;
+ unsigned long writeback_bitmap;
+ unsigned long ordered_bitmap;
+ unsigned long checked_bitmap;
+ unsigned long flags;
+
+ ASSERT(PagePrivate(page) && page->private);
+ ASSERT(subpage_info);
+ subpage = (struct btrfs_subpage *)page->private;
+
+ spin_lock_irqsave(&subpage->lock, flags);
+ GET_SUBPAGE_BITMAP(subpage, subpage_info, uptodate, &uptodate_bitmap);
+ GET_SUBPAGE_BITMAP(subpage, subpage_info, dirty, &dirty_bitmap);
+ GET_SUBPAGE_BITMAP(subpage, subpage_info, writeback, &writeback_bitmap);
+ GET_SUBPAGE_BITMAP(subpage, subpage_info, ordered, &ordered_bitmap);
+ GET_SUBPAGE_BITMAP(subpage, subpage_info, checked, &checked_bitmap);
+ spin_unlock_irqrestore(&subpage->lock, flags);
+
+ dump_page(page, "btrfs subpage dump");
+ btrfs_warn(fs_info,
+"start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl error=%*pbl dirty=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl",
+ start, len, page_offset(page),
+ subpage_info->bitmap_nr_bits, &uptodate_bitmap,
+ subpage_info->bitmap_nr_bits, &error_bitmap,
+ subpage_info->bitmap_nr_bits, &dirty_bitmap,
+ subpage_info->bitmap_nr_bits, &writeback_bitmap,
+ subpage_info->bitmap_nr_bits, &ordered_bitmap,
+ subpage_info->bitmap_nr_bits, &checked_bitmap);
+}
diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h
index 0e80ad336904..5cbf67ccbdeb 100644
--- a/fs/btrfs/subpage.h
+++ b/fs/btrfs/subpage.h
@@ -8,17 +8,17 @@
/*
* Extra info for subpapge bitmap.
*
- * For subpage we pack all uptodate/error/dirty/writeback/ordered bitmaps into
+ * For subpage we pack all uptodate/dirty/writeback/ordered bitmaps into
* one larger bitmap.
*
* This structure records how they are organized in the bitmap:
*
- * /- uptodate_offset /- error_offset /- dirty_offset
+ * /- uptodate_offset /- dirty_offset /- ordered_offset
* | | |
* v v v
- * |u|u|u|u|........|u|u|e|e|.......|e|e| ... |o|o|
+ * |u|u|u|u|........|u|u|d|d|.......|d|d|o|o|.......|o|o|
* |<- bitmap_nr_bits ->|
- * |<--------------- total_nr_bits ---------------->|
+ * |<----------------- total_nr_bits ------------------>|
*/
struct btrfs_subpage_info {
/* Number of bits for each bitmap */
@@ -32,7 +32,6 @@ struct btrfs_subpage_info {
* @bitmap_size, which is calculated from PAGE_SIZE / sectorsize.
*/
unsigned int uptodate_offset;
- unsigned int error_offset;
unsigned int dirty_offset;
unsigned int writeback_offset;
unsigned int ordered_offset;
@@ -141,7 +140,6 @@ bool btrfs_page_clamp_test_##name(const struct btrfs_fs_info *fs_info, \
struct page *page, u64 start, u32 len);
DECLARE_BTRFS_SUBPAGE_OPS(uptodate);
-DECLARE_BTRFS_SUBPAGE_OPS(error);
DECLARE_BTRFS_SUBPAGE_OPS(dirty);
DECLARE_BTRFS_SUBPAGE_OPS(writeback);
DECLARE_BTRFS_SUBPAGE_OPS(ordered);
@@ -154,5 +152,7 @@ void btrfs_page_assert_not_dirty(const struct btrfs_fs_info *fs_info,
struct page *page);
void btrfs_page_unlock_writer(struct btrfs_fs_info *fs_info, struct page *page,
u64 start, u32 len);
+void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
+ struct page *page, u64 start, u32 len);
#endif
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 1c3c1d7ad68c..f1dd172d8d5b 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1626,7 +1626,6 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
old_pool_size, new_pool_size);
btrfs_workqueue_set_max(fs_info->workers, new_pool_size);
- btrfs_workqueue_set_max(fs_info->hipri_workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
workqueue_set_max_active(fs_info->endio_workers, new_pool_size);
@@ -1836,6 +1835,12 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
btrfs_clear_sb_rdonly(sb);
set_bit(BTRFS_FS_OPEN, &fs_info->flags);
+
+ /*
+ * If we've gone from readonly -> read/write, we need to get
+ * our sync/async discard lists in the right state.
+ */
+ btrfs_discard_resume(fs_info);
}
out:
/*
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index dfc5c7fa6038..f6bc6d738555 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -159,7 +159,7 @@ static int test_find_delalloc(u32 sectorsize)
* |--- delalloc ---|
* |--- search ---|
*/
- set_extent_delalloc(tmp, 0, sectorsize - 1, 0, NULL);
+ set_extent_bit(tmp, 0, sectorsize - 1, EXTENT_DELALLOC, NULL);
start = 0;
end = start + PAGE_SIZE - 1;
found = find_lock_delalloc_range(inode, locked_page, &start,
@@ -190,7 +190,7 @@ static int test_find_delalloc(u32 sectorsize)
test_err("couldn't find the locked page");
goto out_bits;
}
- set_extent_delalloc(tmp, sectorsize, max_bytes - 1, 0, NULL);
+ set_extent_bit(tmp, sectorsize, max_bytes - 1, EXTENT_DELALLOC, NULL);
start = test_start;
end = start + PAGE_SIZE - 1;
found = find_lock_delalloc_range(inode, locked_page, &start,
@@ -245,7 +245,7 @@ static int test_find_delalloc(u32 sectorsize)
*
* We are re-using our test_start from above since it works out well.
*/
- set_extent_delalloc(tmp, max_bytes, total_dirty - 1, 0, NULL);
+ set_extent_bit(tmp, max_bytes, total_dirty - 1, EXTENT_DELALLOC, NULL);
start = test_start;
end = start + PAGE_SIZE - 1;
found = find_lock_delalloc_range(inode, locked_page, &start,
@@ -503,8 +503,8 @@ static int test_find_first_clear_extent_bit(void)
* Set 1M-4M alloc/discard and 32M-64M thus leaving a hole between
* 4M-32M
*/
- set_extent_bits(&tree, SZ_1M, SZ_4M - 1,
- CHUNK_TRIMMED | CHUNK_ALLOCATED);
+ set_extent_bit(&tree, SZ_1M, SZ_4M - 1,
+ CHUNK_TRIMMED | CHUNK_ALLOCATED, NULL);
find_first_clear_extent_bit(&tree, SZ_512K, &start, &end,
CHUNK_TRIMMED | CHUNK_ALLOCATED);
@@ -516,8 +516,8 @@ static int test_find_first_clear_extent_bit(void)
}
/* Now add 32M-64M so that we have a hole between 4M-32M */
- set_extent_bits(&tree, SZ_32M, SZ_64M - 1,
- CHUNK_TRIMMED | CHUNK_ALLOCATED);
+ set_extent_bit(&tree, SZ_32M, SZ_64M - 1,
+ CHUNK_TRIMMED | CHUNK_ALLOCATED, NULL);
/*
* Request first hole starting at 12M, we should get 4M-32M
@@ -548,7 +548,7 @@ static int test_find_first_clear_extent_bit(void)
* Set 64M-72M with CHUNK_ALLOC flag, then search for CHUNK_TRIMMED flag
* being unset in this range, we should get the entry in range 64M-72M
*/
- set_extent_bits(&tree, SZ_64M, SZ_64M + SZ_8M - 1, CHUNK_ALLOCATED);
+ set_extent_bit(&tree, SZ_64M, SZ_64M + SZ_8M - 1, CHUNK_ALLOCATED, NULL);
find_first_clear_extent_bit(&tree, SZ_64M + SZ_1M, &start, &end,
CHUNK_TRIMMED);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 8b6a99b8d7f6..cf306351b148 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -374,8 +374,6 @@ loop:
spin_lock_init(&cur_trans->dirty_bgs_lock);
INIT_LIST_HEAD(&cur_trans->deleted_bgs);
spin_lock_init(&cur_trans->dropped_roots_lock);
- INIT_LIST_HEAD(&cur_trans->releasing_ebs);
- spin_lock_init(&cur_trans->releasing_ebs_lock);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
extent_io_tree_init(fs_info, &cur_trans->dirty_pages,
IO_TREE_TRANS_DIRTY_PAGES);
@@ -1056,7 +1054,6 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
u64 start = 0;
u64 end;
- atomic_inc(&BTRFS_I(fs_info->btree_inode)->sync_writers);
while (!find_first_extent_bit(dirty_pages, start, &start, &end,
mark, &cached_state)) {
bool wait_writeback = false;
@@ -1092,7 +1089,6 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
cond_resched();
start = end + 1;
}
- atomic_dec(&BTRFS_I(fs_info->btree_inode)->sync_writers);
return werr;
}
@@ -1688,7 +1684,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
* insert the directory item
*/
ret = btrfs_set_inode_index(BTRFS_I(parent_inode), &index);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto fail;
+ }
/* check if there is a file/dir which has the same name. */
dir_item = btrfs_lookup_dir_item(NULL, parent_root, path,
@@ -2484,13 +2483,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
goto scrub_continue;
}
- /*
- * At this point, we should have written all the tree blocks allocated
- * in this transaction. So it's now safe to free the redirtyied extent
- * buffers.
- */
- btrfs_free_redirty_list(cur_trans);
-
ret = write_all_supers(fs_info, 0);
/*
* the super is written, we can safely allow the tree-loggers
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index fa728ab80826..8e9fa23bd7fe 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -94,9 +94,6 @@ struct btrfs_transaction {
*/
atomic_t pending_ordered;
wait_queue_head_t pending_wait;
-
- spinlock_t releasing_ebs_lock;
- struct list_head releasing_ebs;
};
enum {
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index e2b54793bf0c..038dfa8f1788 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -25,10 +25,10 @@
#include "compression.h"
#include "volumes.h"
#include "misc.h"
-#include "btrfs_inode.h"
#include "fs.h"
#include "accessors.h"
#include "file-item.h"
+#include "inode-item.h"
/*
* Error message should follow the following format:
@@ -857,10 +857,10 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf,
*
* Thus it should be a good way to catch obvious bitflips.
*/
- if (unlikely(length >= ((u64)U32_MAX << BTRFS_STRIPE_LEN_SHIFT))) {
+ if (unlikely(length >= btrfs_stripe_nr_to_offset(U32_MAX))) {
chunk_err(leaf, chunk, logical,
"chunk length too large: have %llu limit %llu",
- length, (u64)U32_MAX << BTRFS_STRIPE_LEN_SHIFT);
+ length, btrfs_stripe_nr_to_offset(U32_MAX));
return -EUCLEAN;
}
if (unlikely(type & ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
@@ -1620,9 +1620,10 @@ static int check_inode_ref(struct extent_buffer *leaf,
/*
* Common point to switch the item-specific validation.
*/
-static int check_leaf_item(struct extent_buffer *leaf,
- struct btrfs_key *key, int slot,
- struct btrfs_key *prev_key)
+static enum btrfs_tree_block_status check_leaf_item(struct extent_buffer *leaf,
+ struct btrfs_key *key,
+ int slot,
+ struct btrfs_key *prev_key)
{
int ret = 0;
struct btrfs_chunk *chunk;
@@ -1671,10 +1672,13 @@ static int check_leaf_item(struct extent_buffer *leaf,
ret = check_extent_data_ref(leaf, key, slot);
break;
}
- return ret;
+
+ if (ret)
+ return BTRFS_TREE_BLOCK_INVALID_ITEM;
+ return BTRFS_TREE_BLOCK_CLEAN;
}
-static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
+enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf)
{
struct btrfs_fs_info *fs_info = leaf->fs_info;
/* No valid key type is 0, so all key should be larger than this key */
@@ -1687,7 +1691,7 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
generic_err(leaf, 0,
"invalid level for leaf, have %d expect 0",
btrfs_header_level(leaf));
- return -EUCLEAN;
+ return BTRFS_TREE_BLOCK_INVALID_LEVEL;
}
/*
@@ -1710,32 +1714,32 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
generic_err(leaf, 0,
"invalid root, root %llu must never be empty",
owner);
- return -EUCLEAN;
+ return BTRFS_TREE_BLOCK_INVALID_NRITEMS;
}
/* Unknown tree */
if (unlikely(owner == 0)) {
generic_err(leaf, 0,
"invalid owner, root 0 is not defined");
- return -EUCLEAN;
+ return BTRFS_TREE_BLOCK_INVALID_OWNER;
}
/* EXTENT_TREE_V2 can have empty extent trees. */
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
- return 0;
+ return BTRFS_TREE_BLOCK_CLEAN;
if (unlikely(owner == BTRFS_EXTENT_TREE_OBJECTID)) {
generic_err(leaf, 0,
"invalid root, root %llu must never be empty",
owner);
- return -EUCLEAN;
+ return BTRFS_TREE_BLOCK_INVALID_NRITEMS;
}
- return 0;
+ return BTRFS_TREE_BLOCK_CLEAN;
}
if (unlikely(nritems == 0))
- return 0;
+ return BTRFS_TREE_BLOCK_CLEAN;
/*
* Check the following things to make sure this is a good leaf, and
@@ -1751,7 +1755,6 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
for (slot = 0; slot < nritems; slot++) {
u32 item_end_expected;
u64 item_data_end;
- int ret;
btrfs_item_key_to_cpu(leaf, &key, slot);
@@ -1762,7 +1765,7 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
prev_key.objectid, prev_key.type,
prev_key.offset, key.objectid, key.type,
key.offset);
- return -EUCLEAN;
+ return BTRFS_TREE_BLOCK_BAD_KEY_ORDER;
}
item_data_end = (u64)btrfs_item_offset(leaf, slot) +
@@ -1781,7 +1784,7 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
generic_err(leaf, slot,
"unexpected item end, have %llu expect %u",
item_data_end, item_end_expected);
- return -EUCLEAN;
+ return BTRFS_TREE_BLOCK_INVALID_OFFSETS;
}
/*
@@ -1793,7 +1796,7 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
generic_err(leaf, slot,
"slot end outside of leaf, have %llu expect range [0, %u]",
item_data_end, BTRFS_LEAF_DATA_SIZE(fs_info));
- return -EUCLEAN;
+ return BTRFS_TREE_BLOCK_INVALID_OFFSETS;
}
/* Also check if the item pointer overlaps with btrfs item. */
@@ -1804,16 +1807,22 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
btrfs_item_nr_offset(leaf, slot) +
sizeof(struct btrfs_item),
btrfs_item_ptr_offset(leaf, slot));
- return -EUCLEAN;
+ return BTRFS_TREE_BLOCK_INVALID_OFFSETS;
}
- if (check_item_data) {
+ /*
+ * We only want to do this if WRITTEN is set, otherwise the leaf
+ * may be in some intermediate state and won't appear valid.
+ */
+ if (btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_WRITTEN)) {
+ enum btrfs_tree_block_status ret;
+
/*
* Check if the item size and content meet other
* criteria
*/
ret = check_leaf_item(leaf, &key, slot, &prev_key);
- if (unlikely(ret < 0))
+ if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN))
return ret;
}
@@ -1822,21 +1831,21 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
prev_key.offset = key.offset;
}
- return 0;
+ return BTRFS_TREE_BLOCK_CLEAN;
}
-int btrfs_check_leaf_full(struct extent_buffer *leaf)
+int btrfs_check_leaf(struct extent_buffer *leaf)
{
- return check_leaf(leaf, true);
-}
-ALLOW_ERROR_INJECTION(btrfs_check_leaf_full, ERRNO);
+ enum btrfs_tree_block_status ret;
-int btrfs_check_leaf_relaxed(struct extent_buffer *leaf)
-{
- return check_leaf(leaf, false);
+ ret = __btrfs_check_leaf(leaf);
+ if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN))
+ return -EUCLEAN;
+ return 0;
}
+ALLOW_ERROR_INJECTION(btrfs_check_leaf, ERRNO);
-int btrfs_check_node(struct extent_buffer *node)
+enum btrfs_tree_block_status __btrfs_check_node(struct extent_buffer *node)
{
struct btrfs_fs_info *fs_info = node->fs_info;
unsigned long nr = btrfs_header_nritems(node);
@@ -1844,13 +1853,12 @@ int btrfs_check_node(struct extent_buffer *node)
int slot;
int level = btrfs_header_level(node);
u64 bytenr;
- int ret = 0;
if (unlikely(level <= 0 || level >= BTRFS_MAX_LEVEL)) {
generic_err(node, 0,
"invalid level for node, have %d expect [1, %d]",
level, BTRFS_MAX_LEVEL - 1);
- return -EUCLEAN;
+ return BTRFS_TREE_BLOCK_INVALID_LEVEL;
}
if (unlikely(nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(fs_info))) {
btrfs_crit(fs_info,
@@ -1858,7 +1866,7 @@ int btrfs_check_node(struct extent_buffer *node)
btrfs_header_owner(node), node->start,
nr == 0 ? "small" : "large", nr,
BTRFS_NODEPTRS_PER_BLOCK(fs_info));
- return -EUCLEAN;
+ return BTRFS_TREE_BLOCK_INVALID_NRITEMS;
}
for (slot = 0; slot < nr - 1; slot++) {
@@ -1869,15 +1877,13 @@ int btrfs_check_node(struct extent_buffer *node)
if (unlikely(!bytenr)) {
generic_err(node, slot,
"invalid NULL node pointer");
- ret = -EUCLEAN;
- goto out;
+ return BTRFS_TREE_BLOCK_INVALID_BLOCKPTR;
}
if (unlikely(!IS_ALIGNED(bytenr, fs_info->sectorsize))) {
generic_err(node, slot,
"unaligned pointer, have %llu should be aligned to %u",
bytenr, fs_info->sectorsize);
- ret = -EUCLEAN;
- goto out;
+ return BTRFS_TREE_BLOCK_INVALID_BLOCKPTR;
}
if (unlikely(btrfs_comp_cpu_keys(&key, &next_key) >= 0)) {
@@ -1886,12 +1892,20 @@ int btrfs_check_node(struct extent_buffer *node)
key.objectid, key.type, key.offset,
next_key.objectid, next_key.type,
next_key.offset);
- ret = -EUCLEAN;
- goto out;
+ return BTRFS_TREE_BLOCK_BAD_KEY_ORDER;
}
}
-out:
- return ret;
+ return BTRFS_TREE_BLOCK_CLEAN;
+}
+
+int btrfs_check_node(struct extent_buffer *node)
+{
+ enum btrfs_tree_block_status ret;
+
+ ret = __btrfs_check_node(node);
+ if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN))
+ return -EUCLEAN;
+ return 0;
}
ALLOW_ERROR_INJECTION(btrfs_check_node, ERRNO);
@@ -1949,3 +1963,61 @@ int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner)
}
return 0;
}
+
+int btrfs_verify_level_key(struct extent_buffer *eb, int level,
+ struct btrfs_key *first_key, u64 parent_transid)
+{
+ struct btrfs_fs_info *fs_info = eb->fs_info;
+ int found_level;
+ struct btrfs_key found_key;
+ int ret;
+
+ found_level = btrfs_header_level(eb);
+ if (found_level != level) {
+ WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
+ KERN_ERR "BTRFS: tree level check failed\n");
+ btrfs_err(fs_info,
+"tree level mismatch detected, bytenr=%llu level expected=%u has=%u",
+ eb->start, level, found_level);
+ return -EIO;
+ }
+
+ if (!first_key)
+ return 0;
+
+ /*
+ * For live tree block (new tree blocks in current transaction),
+ * we need proper lock context to avoid race, which is impossible here.
+ * So we only checks tree blocks which is read from disk, whose
+ * generation <= fs_info->last_trans_committed.
+ */
+ if (btrfs_header_generation(eb) > fs_info->last_trans_committed)
+ return 0;
+
+ /* We have @first_key, so this @eb must have at least one item */
+ if (btrfs_header_nritems(eb) == 0) {
+ btrfs_err(fs_info,
+ "invalid tree nritems, bytenr=%llu nritems=0 expect >0",
+ eb->start);
+ WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+ return -EUCLEAN;
+ }
+
+ if (found_level)
+ btrfs_node_key_to_cpu(eb, &found_key, 0);
+ else
+ btrfs_item_key_to_cpu(eb, &found_key, 0);
+ ret = btrfs_comp_cpu_keys(first_key, &found_key);
+
+ if (ret) {
+ WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
+ KERN_ERR "BTRFS: tree first key check failed\n");
+ btrfs_err(fs_info,
+"tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)",
+ eb->start, parent_transid, first_key->objectid,
+ first_key->type, first_key->offset,
+ found_key.objectid, found_key.type,
+ found_key.offset);
+ }
+ return ret;
+}
diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h
index bfb5efa4e01f..3c2a02a72f64 100644
--- a/fs/btrfs/tree-checker.h
+++ b/fs/btrfs/tree-checker.h
@@ -40,22 +40,33 @@ struct btrfs_tree_parent_check {
u8 level;
};
-/*
- * Comprehensive leaf checker.
- * Will check not only the item pointers, but also every possible member
- * in item data.
- */
-int btrfs_check_leaf_full(struct extent_buffer *leaf);
+enum btrfs_tree_block_status {
+ BTRFS_TREE_BLOCK_CLEAN,
+ BTRFS_TREE_BLOCK_INVALID_NRITEMS,
+ BTRFS_TREE_BLOCK_INVALID_PARENT_KEY,
+ BTRFS_TREE_BLOCK_BAD_KEY_ORDER,
+ BTRFS_TREE_BLOCK_INVALID_LEVEL,
+ BTRFS_TREE_BLOCK_INVALID_FREE_SPACE,
+ BTRFS_TREE_BLOCK_INVALID_OFFSETS,
+ BTRFS_TREE_BLOCK_INVALID_BLOCKPTR,
+ BTRFS_TREE_BLOCK_INVALID_ITEM,
+ BTRFS_TREE_BLOCK_INVALID_OWNER,
+};
/*
- * Less strict leaf checker.
- * Will only check item pointers, not reading item data.
+ * Exported simply for btrfs-progs which wants to have the
+ * btrfs_tree_block_status return codes.
*/
-int btrfs_check_leaf_relaxed(struct extent_buffer *leaf);
+enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf);
+enum btrfs_tree_block_status __btrfs_check_node(struct extent_buffer *node);
+
+int btrfs_check_leaf(struct extent_buffer *leaf);
int btrfs_check_node(struct extent_buffer *node);
int btrfs_check_chunk_valid(struct extent_buffer *leaf,
struct btrfs_chunk *chunk, u64 logical);
int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner);
+int btrfs_verify_level_key(struct extent_buffer *eb, int level,
+ struct btrfs_key *first_key, u64 parent_transid);
#endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9b212e8c70cc..365a1cc0a3c3 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -859,10 +859,10 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum,
list);
csum_root = btrfs_csum_root(fs_info,
- sums->bytenr);
+ sums->logical);
if (!ret)
ret = btrfs_del_csums(trans, csum_root,
- sums->bytenr,
+ sums->logical,
sums->len);
if (!ret)
ret = btrfs_csum_file_blocks(trans,
@@ -3252,7 +3252,7 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
* Returns 1 if the inode was logged before in the transaction, 0 if it was not,
* and < 0 on error.
*/
-static int inode_logged(struct btrfs_trans_handle *trans,
+static int inode_logged(const struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
struct btrfs_path *path_in)
{
@@ -4056,14 +4056,14 @@ static int drop_inode_items(struct btrfs_trans_handle *trans,
while (1) {
ret = btrfs_search_slot(trans, log, &key, path, -1, 1);
- BUG_ON(ret == 0); /* Logic error */
- if (ret < 0)
- break;
-
- if (path->slots[0] == 0)
+ if (ret < 0) {
break;
+ } else if (ret > 0) {
+ if (path->slots[0] == 0)
+ break;
+ path->slots[0]--;
+ }
- path->slots[0]--;
btrfs_item_key_to_cpu(path->nodes[0], &found_key,
path->slots[0]);
@@ -4221,7 +4221,7 @@ static int log_csums(struct btrfs_trans_handle *trans,
struct btrfs_root *log_root,
struct btrfs_ordered_sum *sums)
{
- const u64 lock_end = sums->bytenr + sums->len - 1;
+ const u64 lock_end = sums->logical + sums->len - 1;
struct extent_state *cached_state = NULL;
int ret;
@@ -4239,7 +4239,7 @@ static int log_csums(struct btrfs_trans_handle *trans,
* file which happens to refer to the same extent as well. Such races
* can leave checksum items in the log with overlapping ranges.
*/
- ret = lock_extent(&log_root->log_csum_range, sums->bytenr, lock_end,
+ ret = lock_extent(&log_root->log_csum_range, sums->logical, lock_end,
&cached_state);
if (ret)
return ret;
@@ -4252,11 +4252,11 @@ static int log_csums(struct btrfs_trans_handle *trans,
* some checksums missing in the fs/subvolume tree. So just delete (or
* trim and adjust) any existing csum items in the log for this range.
*/
- ret = btrfs_del_csums(trans, log_root, sums->bytenr, sums->len);
+ ret = btrfs_del_csums(trans, log_root, sums->logical, sums->len);
if (!ret)
ret = btrfs_csum_file_blocks(trans, log_root, sums);
- unlock_extent(&log_root->log_csum_range, sums->bytenr, lock_end,
+ unlock_extent(&log_root->log_csum_range, sums->logical, lock_end,
&cached_state);
return ret;
@@ -5303,7 +5303,7 @@ out:
* multiple times when multiple tasks have joined the same log transaction.
*/
static bool need_log_inode(const struct btrfs_trans_handle *trans,
- const struct btrfs_inode *inode)
+ struct btrfs_inode *inode)
{
/*
* If a directory was not modified, no dentries added or removed, we can
@@ -5321,7 +5321,7 @@ static bool need_log_inode(const struct btrfs_trans_handle *trans,
* logged_trans will be 0, in which case we have to fully log it since
* logged_trans is a transient field, not persisted.
*/
- if (inode->logged_trans == trans->transid &&
+ if (inode_logged(trans, inode, NULL) == 1 &&
!test_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags))
return false;
@@ -6158,7 +6158,7 @@ static int log_delayed_deletions_incremental(struct btrfs_trans_handle *trans,
{
struct btrfs_root *log = inode->root->log_root;
const struct btrfs_delayed_item *curr;
- u64 last_range_start;
+ u64 last_range_start = 0;
u64 last_range_end = 0;
struct btrfs_key key;
@@ -7309,7 +7309,7 @@ error:
*/
void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
struct btrfs_inode *dir, struct btrfs_inode *inode,
- int for_rename)
+ bool for_rename)
{
/*
* when we're logging a file, if it hasn't been renamed
@@ -7325,18 +7325,25 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
inode->last_unlink_trans = trans->transid;
mutex_unlock(&inode->log_mutex);
+ if (!for_rename)
+ return;
+
/*
- * if this directory was already logged any new
- * names for this file/dir will get recorded
+ * If this directory was already logged, any new names will be logged
+ * with btrfs_log_new_name() and old names will be deleted from the log
+ * tree with btrfs_del_dir_entries_in_log() or with
+ * btrfs_del_inode_ref_in_log().
*/
- if (dir->logged_trans == trans->transid)
+ if (inode_logged(trans, dir, NULL) == 1)
return;
/*
- * if the inode we're about to unlink was logged,
- * the log will be properly updated for any new names
+ * If the inode we're about to unlink was logged before, the log will be
+ * properly updated with the new name with btrfs_log_new_name() and the
+ * old name removed with btrfs_del_dir_entries_in_log() or with
+ * btrfs_del_inode_ref_in_log().
*/
- if (inode->logged_trans == trans->transid)
+ if (inode_logged(trans, inode, NULL) == 1)
return;
/*
@@ -7346,13 +7353,6 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
* properly. So, we have to be conservative and force commits
* so the new name gets discovered.
*/
- if (for_rename)
- goto record;
-
- /* we can safely do the unlink without any special recording */
- return;
-
-record:
mutex_lock(&dir->log_mutex);
dir->last_unlink_trans = trans->transid;
mutex_unlock(&dir->log_mutex);
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index bdeb5216718f..a550a8a375cd 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -100,7 +100,7 @@ void btrfs_end_log_trans(struct btrfs_root *root);
void btrfs_pin_log_trans(struct btrfs_root *root);
void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
struct btrfs_inode *dir, struct btrfs_inode *inode,
- int for_rename);
+ bool for_rename);
void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
struct btrfs_inode *dir);
void btrfs_log_new_name(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/tree-mod-log.c b/fs/btrfs/tree-mod-log.c
index a555baa0143a..3df6153d5d5a 100644
--- a/fs/btrfs/tree-mod-log.c
+++ b/fs/btrfs/tree-mod-log.c
@@ -226,21 +226,32 @@ int btrfs_tree_mod_log_insert_key(struct extent_buffer *eb, int slot,
enum btrfs_mod_log_op op)
{
struct tree_mod_elem *tm;
- int ret;
+ int ret = 0;
if (!tree_mod_need_log(eb->fs_info, eb))
return 0;
tm = alloc_tree_mod_elem(eb, slot, op);
if (!tm)
- return -ENOMEM;
+ ret = -ENOMEM;
if (tree_mod_dont_log(eb->fs_info, eb)) {
kfree(tm);
+ /*
+ * Don't error if we failed to allocate memory because we don't
+ * need to log.
+ */
return 0;
+ } else if (ret != 0) {
+ /*
+ * We previously failed to allocate memory and we need to log,
+ * so we have to fail.
+ */
+ goto out_unlock;
}
ret = tree_mod_log_insert(eb->fs_info, tm);
+out_unlock:
write_unlock(&eb->fs_info->tree_mod_log_lock);
if (ret)
kfree(tm);
@@ -248,6 +259,26 @@ int btrfs_tree_mod_log_insert_key(struct extent_buffer *eb, int slot,
return ret;
}
+static struct tree_mod_elem *tree_mod_log_alloc_move(struct extent_buffer *eb,
+ int dst_slot, int src_slot,
+ int nr_items)
+{
+ struct tree_mod_elem *tm;
+
+ tm = kzalloc(sizeof(*tm), GFP_NOFS);
+ if (!tm)
+ return ERR_PTR(-ENOMEM);
+
+ tm->logical = eb->start;
+ tm->slot = src_slot;
+ tm->move.dst_slot = dst_slot;
+ tm->move.nr_items = nr_items;
+ tm->op = BTRFS_MOD_LOG_MOVE_KEYS;
+ RB_CLEAR_NODE(&tm->node);
+
+ return tm;
+}
+
int btrfs_tree_mod_log_insert_move(struct extent_buffer *eb,
int dst_slot, int src_slot,
int nr_items)
@@ -262,35 +293,46 @@ int btrfs_tree_mod_log_insert_move(struct extent_buffer *eb,
return 0;
tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), GFP_NOFS);
- if (!tm_list)
- return -ENOMEM;
-
- tm = kzalloc(sizeof(*tm), GFP_NOFS);
- if (!tm) {
+ if (!tm_list) {
ret = -ENOMEM;
- goto free_tms;
+ goto lock;
}
- tm->logical = eb->start;
- tm->slot = src_slot;
- tm->move.dst_slot = dst_slot;
- tm->move.nr_items = nr_items;
- tm->op = BTRFS_MOD_LOG_MOVE_KEYS;
+ tm = tree_mod_log_alloc_move(eb, dst_slot, src_slot, nr_items);
+ if (IS_ERR(tm)) {
+ ret = PTR_ERR(tm);
+ tm = NULL;
+ goto lock;
+ }
for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot,
BTRFS_MOD_LOG_KEY_REMOVE_WHILE_MOVING);
if (!tm_list[i]) {
ret = -ENOMEM;
- goto free_tms;
+ goto lock;
}
}
- if (tree_mod_dont_log(eb->fs_info, eb))
+lock:
+ if (tree_mod_dont_log(eb->fs_info, eb)) {
+ /*
+ * Don't error if we failed to allocate memory because we don't
+ * need to log.
+ */
+ ret = 0;
goto free_tms;
+ }
locked = true;
/*
+ * We previously failed to allocate memory and we need to log, so we
+ * have to fail.
+ */
+ if (ret != 0)
+ goto free_tms;
+
+ /*
* When we override something during the move, we log these removals.
* This can only happen when we move towards the beginning of the
* buffer, i.e. dst_slot < src_slot.
@@ -310,10 +352,12 @@ int btrfs_tree_mod_log_insert_move(struct extent_buffer *eb,
return 0;
free_tms:
- for (i = 0; i < nr_items; i++) {
- if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
- rb_erase(&tm_list[i]->node, &eb->fs_info->tree_mod_log);
- kfree(tm_list[i]);
+ if (tm_list) {
+ for (i = 0; i < nr_items; i++) {
+ if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
+ rb_erase(&tm_list[i]->node, &eb->fs_info->tree_mod_log);
+ kfree(tm_list[i]);
+ }
}
if (locked)
write_unlock(&eb->fs_info->tree_mod_log_lock);
@@ -363,14 +407,14 @@ int btrfs_tree_mod_log_insert_root(struct extent_buffer *old_root,
GFP_NOFS);
if (!tm_list) {
ret = -ENOMEM;
- goto free_tms;
+ goto lock;
}
for (i = 0; i < nritems; i++) {
tm_list[i] = alloc_tree_mod_elem(old_root, i,
BTRFS_MOD_LOG_KEY_REMOVE_WHILE_FREEING);
if (!tm_list[i]) {
ret = -ENOMEM;
- goto free_tms;
+ goto lock;
}
}
}
@@ -378,7 +422,7 @@ int btrfs_tree_mod_log_insert_root(struct extent_buffer *old_root,
tm = kzalloc(sizeof(*tm), GFP_NOFS);
if (!tm) {
ret = -ENOMEM;
- goto free_tms;
+ goto lock;
}
tm->logical = new_root->start;
@@ -387,14 +431,28 @@ int btrfs_tree_mod_log_insert_root(struct extent_buffer *old_root,
tm->generation = btrfs_header_generation(old_root);
tm->op = BTRFS_MOD_LOG_ROOT_REPLACE;
- if (tree_mod_dont_log(fs_info, NULL))
+lock:
+ if (tree_mod_dont_log(fs_info, NULL)) {
+ /*
+ * Don't error if we failed to allocate memory because we don't
+ * need to log.
+ */
+ ret = 0;
goto free_tms;
+ } else if (ret != 0) {
+ /*
+ * We previously failed to allocate memory and we need to log,
+ * so we have to fail.
+ */
+ goto out_unlock;
+ }
if (tm_list)
ret = tree_mod_log_free_eb(fs_info, tm_list, nritems);
if (!ret)
ret = tree_mod_log_insert(fs_info, tm);
+out_unlock:
write_unlock(&fs_info->tree_mod_log_lock);
if (ret)
goto free_tms;
@@ -486,9 +544,14 @@ int btrfs_tree_mod_log_eb_copy(struct extent_buffer *dst,
struct btrfs_fs_info *fs_info = dst->fs_info;
int ret = 0;
struct tree_mod_elem **tm_list = NULL;
- struct tree_mod_elem **tm_list_add, **tm_list_rem;
+ struct tree_mod_elem **tm_list_add = NULL;
+ struct tree_mod_elem **tm_list_rem = NULL;
int i;
bool locked = false;
+ struct tree_mod_elem *dst_move_tm = NULL;
+ struct tree_mod_elem *src_move_tm = NULL;
+ u32 dst_move_nr_items = btrfs_header_nritems(dst) - dst_offset;
+ u32 src_move_nr_items = btrfs_header_nritems(src) - (src_offset + nr_items);
if (!tree_mod_need_log(fs_info, NULL))
return 0;
@@ -498,8 +561,30 @@ int btrfs_tree_mod_log_eb_copy(struct extent_buffer *dst,
tm_list = kcalloc(nr_items * 2, sizeof(struct tree_mod_elem *),
GFP_NOFS);
- if (!tm_list)
- return -ENOMEM;
+ if (!tm_list) {
+ ret = -ENOMEM;
+ goto lock;
+ }
+
+ if (dst_move_nr_items) {
+ dst_move_tm = tree_mod_log_alloc_move(dst, dst_offset + nr_items,
+ dst_offset, dst_move_nr_items);
+ if (IS_ERR(dst_move_tm)) {
+ ret = PTR_ERR(dst_move_tm);
+ dst_move_tm = NULL;
+ goto lock;
+ }
+ }
+ if (src_move_nr_items) {
+ src_move_tm = tree_mod_log_alloc_move(src, src_offset,
+ src_offset + nr_items,
+ src_move_nr_items);
+ if (IS_ERR(src_move_tm)) {
+ ret = PTR_ERR(src_move_tm);
+ src_move_tm = NULL;
+ goto lock;
+ }
+ }
tm_list_add = tm_list;
tm_list_rem = tm_list + nr_items;
@@ -508,21 +593,40 @@ int btrfs_tree_mod_log_eb_copy(struct extent_buffer *dst,
BTRFS_MOD_LOG_KEY_REMOVE);
if (!tm_list_rem[i]) {
ret = -ENOMEM;
- goto free_tms;
+ goto lock;
}
tm_list_add[i] = alloc_tree_mod_elem(dst, i + dst_offset,
BTRFS_MOD_LOG_KEY_ADD);
if (!tm_list_add[i]) {
ret = -ENOMEM;
- goto free_tms;
+ goto lock;
}
}
- if (tree_mod_dont_log(fs_info, NULL))
+lock:
+ if (tree_mod_dont_log(fs_info, NULL)) {
+ /*
+ * Don't error if we failed to allocate memory because we don't
+ * need to log.
+ */
+ ret = 0;
goto free_tms;
+ }
locked = true;
+ /*
+ * We previously failed to allocate memory and we need to log, so we
+ * have to fail.
+ */
+ if (ret != 0)
+ goto free_tms;
+
+ if (dst_move_tm) {
+ ret = tree_mod_log_insert(fs_info, dst_move_tm);
+ if (ret)
+ goto free_tms;
+ }
for (i = 0; i < nr_items; i++) {
ret = tree_mod_log_insert(fs_info, tm_list_rem[i]);
if (ret)
@@ -531,6 +635,11 @@ int btrfs_tree_mod_log_eb_copy(struct extent_buffer *dst,
if (ret)
goto free_tms;
}
+ if (src_move_tm) {
+ ret = tree_mod_log_insert(fs_info, src_move_tm);
+ if (ret)
+ goto free_tms;
+ }
write_unlock(&fs_info->tree_mod_log_lock);
kfree(tm_list);
@@ -538,10 +647,18 @@ int btrfs_tree_mod_log_eb_copy(struct extent_buffer *dst,
return 0;
free_tms:
- for (i = 0; i < nr_items * 2; i++) {
- if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
- rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
- kfree(tm_list[i]);
+ if (dst_move_tm && !RB_EMPTY_NODE(&dst_move_tm->node))
+ rb_erase(&dst_move_tm->node, &fs_info->tree_mod_log);
+ kfree(dst_move_tm);
+ if (src_move_tm && !RB_EMPTY_NODE(&src_move_tm->node))
+ rb_erase(&src_move_tm->node, &fs_info->tree_mod_log);
+ kfree(src_move_tm);
+ if (tm_list) {
+ for (i = 0; i < nr_items * 2; i++) {
+ if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
+ rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
+ kfree(tm_list[i]);
+ }
}
if (locked)
write_unlock(&fs_info->tree_mod_log_lock);
@@ -562,22 +679,38 @@ int btrfs_tree_mod_log_free_eb(struct extent_buffer *eb)
nritems = btrfs_header_nritems(eb);
tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *), GFP_NOFS);
- if (!tm_list)
- return -ENOMEM;
+ if (!tm_list) {
+ ret = -ENOMEM;
+ goto lock;
+ }
for (i = 0; i < nritems; i++) {
tm_list[i] = alloc_tree_mod_elem(eb, i,
BTRFS_MOD_LOG_KEY_REMOVE_WHILE_FREEING);
if (!tm_list[i]) {
ret = -ENOMEM;
- goto free_tms;
+ goto lock;
}
}
- if (tree_mod_dont_log(eb->fs_info, eb))
+lock:
+ if (tree_mod_dont_log(eb->fs_info, eb)) {
+ /*
+ * Don't error if we failed to allocate memory because we don't
+ * need to log.
+ */
+ ret = 0;
goto free_tms;
+ } else if (ret != 0) {
+ /*
+ * We previously failed to allocate memory and we need to log,
+ * so we have to fail.
+ */
+ goto out_unlock;
+ }
ret = tree_mod_log_free_eb(eb->fs_info, tm_list, nritems);
+out_unlock:
write_unlock(&eb->fs_info->tree_mod_log_lock);
if (ret)
goto free_tms;
@@ -586,9 +719,11 @@ int btrfs_tree_mod_log_free_eb(struct extent_buffer *eb)
return 0;
free_tms:
- for (i = 0; i < nritems; i++)
- kfree(tm_list[i]);
- kfree(tm_list);
+ if (tm_list) {
+ for (i = 0; i < nritems; i++)
+ kfree(tm_list[i]);
+ kfree(tm_list);
+ }
return ret;
}
@@ -664,10 +799,27 @@ static void tree_mod_log_rewind(struct btrfs_fs_info *fs_info,
unsigned long o_dst;
unsigned long o_src;
unsigned long p_size = sizeof(struct btrfs_key_ptr);
+ /*
+ * max_slot tracks the maximum valid slot of the rewind eb at every
+ * step of the rewind. This is in contrast with 'n' which eventually
+ * matches the number of items, but can be wrong during moves or if
+ * removes overlap on already valid slots (which is probably separately
+ * a bug). We do this to validate the offsets of memmoves for rewinding
+ * moves and detect invalid memmoves.
+ *
+ * Since a rewind eb can start empty, max_slot is a signed integer with
+ * a special meaning for -1, which is that no slot is valid to move out
+ * of. Any other negative value is invalid.
+ */
+ int max_slot;
+ int move_src_end_slot;
+ int move_dst_end_slot;
n = btrfs_header_nritems(eb);
+ max_slot = n - 1;
read_lock(&fs_info->tree_mod_log_lock);
while (tm && tm->seq >= time_seq) {
+ ASSERT(max_slot >= -1);
/*
* All the operations are recorded with the operator used for
* the modification. As we're going backwards, we do the
@@ -684,6 +836,8 @@ static void tree_mod_log_rewind(struct btrfs_fs_info *fs_info,
btrfs_set_node_ptr_generation(eb, tm->slot,
tm->generation);
n++;
+ if (tm->slot > max_slot)
+ max_slot = tm->slot;
break;
case BTRFS_MOD_LOG_KEY_REPLACE:
BUG_ON(tm->slot >= n);
@@ -693,14 +847,37 @@ static void tree_mod_log_rewind(struct btrfs_fs_info *fs_info,
tm->generation);
break;
case BTRFS_MOD_LOG_KEY_ADD:
+ /*
+ * It is possible we could have already removed keys
+ * behind the known max slot, so this will be an
+ * overestimate. In practice, the copy operation
+ * inserts them in increasing order, and overestimating
+ * just means we miss some warnings, so it's OK. It
+ * isn't worth carefully tracking the full array of
+ * valid slots to check against when moving.
+ */
+ if (tm->slot == max_slot)
+ max_slot--;
/* if a move operation is needed it's in the log */
n--;
break;
case BTRFS_MOD_LOG_MOVE_KEYS:
+ ASSERT(tm->move.nr_items > 0);
+ move_src_end_slot = tm->move.dst_slot + tm->move.nr_items - 1;
+ move_dst_end_slot = tm->slot + tm->move.nr_items - 1;
o_dst = btrfs_node_key_ptr_offset(eb, tm->slot);
o_src = btrfs_node_key_ptr_offset(eb, tm->move.dst_slot);
+ if (WARN_ON(move_src_end_slot > max_slot ||
+ tm->move.nr_items <= 0)) {
+ btrfs_warn(fs_info,
+"move from invalid tree mod log slot eb %llu slot %d dst_slot %d nr_items %d seq %llu n %u max_slot %d",
+ eb->start, tm->slot,
+ tm->move.dst_slot, tm->move.nr_items,
+ tm->seq, n, max_slot);
+ }
memmove_extent_buffer(eb, o_dst, o_src,
tm->move.nr_items * p_size);
+ max_slot = move_dst_end_slot;
break;
case BTRFS_MOD_LOG_ROOT_REPLACE:
/*
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c85e54f86035..73f9ea7672db 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -370,6 +370,8 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid,
{
struct btrfs_fs_devices *fs_devs;
+ ASSERT(fsid || !metadata_fsid);
+
fs_devs = kzalloc(sizeof(*fs_devs), GFP_KERNEL);
if (!fs_devs)
return ERR_PTR(-ENOMEM);
@@ -380,18 +382,17 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid,
INIT_LIST_HEAD(&fs_devs->alloc_list);
INIT_LIST_HEAD(&fs_devs->fs_list);
INIT_LIST_HEAD(&fs_devs->seed_list);
- if (fsid)
- memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
- if (metadata_fsid)
- memcpy(fs_devs->metadata_uuid, metadata_fsid, BTRFS_FSID_SIZE);
- else if (fsid)
- memcpy(fs_devs->metadata_uuid, fsid, BTRFS_FSID_SIZE);
+ if (fsid) {
+ memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
+ memcpy(fs_devs->metadata_uuid,
+ metadata_fsid ?: fsid, BTRFS_FSID_SIZE);
+ }
return fs_devs;
}
-void btrfs_free_device(struct btrfs_device *device)
+static void btrfs_free_device(struct btrfs_device *device)
{
WARN_ON(!list_empty(&device->post_commit_list));
rcu_string_free(device->name);
@@ -426,6 +427,21 @@ void __exit btrfs_cleanup_fs_uuids(void)
}
}
+static bool match_fsid_fs_devices(const struct btrfs_fs_devices *fs_devices,
+ const u8 *fsid, const u8 *metadata_fsid)
+{
+ if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) != 0)
+ return false;
+
+ if (!metadata_fsid)
+ return true;
+
+ if (memcmp(metadata_fsid, fs_devices->metadata_uuid, BTRFS_FSID_SIZE) != 0)
+ return false;
+
+ return true;
+}
+
static noinline struct btrfs_fs_devices *find_fsid(
const u8 *fsid, const u8 *metadata_fsid)
{
@@ -435,19 +451,25 @@ static noinline struct btrfs_fs_devices *find_fsid(
/* Handle non-split brain cases */
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
- if (metadata_fsid) {
- if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0
- && memcmp(metadata_fsid, fs_devices->metadata_uuid,
- BTRFS_FSID_SIZE) == 0)
- return fs_devices;
- } else {
- if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
- return fs_devices;
- }
+ if (match_fsid_fs_devices(fs_devices, fsid, metadata_fsid))
+ return fs_devices;
}
return NULL;
}
+/*
+ * First check if the metadata_uuid is different from the fsid in the given
+ * fs_devices. Then check if the given fsid is the same as the metadata_uuid
+ * in the fs_devices. If it is, return true; otherwise, return false.
+ */
+static inline bool check_fsid_changed(const struct btrfs_fs_devices *fs_devices,
+ const u8 *fsid)
+{
+ return memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
+ BTRFS_FSID_SIZE) != 0 &&
+ memcmp(fs_devices->metadata_uuid, fsid, BTRFS_FSID_SIZE) == 0;
+}
+
static struct btrfs_fs_devices *find_fsid_with_metadata_uuid(
struct btrfs_super_block *disk_super)
{
@@ -461,14 +483,14 @@ static struct btrfs_fs_devices *find_fsid_with_metadata_uuid(
* at all and the CHANGING_FSID_V2 flag set.
*/
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
- if (fs_devices->fsid_change &&
- memcmp(disk_super->metadata_uuid, fs_devices->fsid,
- BTRFS_FSID_SIZE) == 0 &&
- memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
- BTRFS_FSID_SIZE) == 0) {
+ if (!fs_devices->fsid_change)
+ continue;
+
+ if (match_fsid_fs_devices(fs_devices, disk_super->metadata_uuid,
+ fs_devices->fsid))
return fs_devices;
- }
}
+
/*
* Handle scanned device having completed its fsid change but
* belonging to a fs_devices that was created by a device that
@@ -476,13 +498,11 @@ static struct btrfs_fs_devices *find_fsid_with_metadata_uuid(
* CHANGING_FSID_V2 flag set.
*/
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
- if (fs_devices->fsid_change &&
- memcmp(fs_devices->metadata_uuid,
- fs_devices->fsid, BTRFS_FSID_SIZE) != 0 &&
- memcmp(disk_super->metadata_uuid, fs_devices->metadata_uuid,
- BTRFS_FSID_SIZE) == 0) {
+ if (!fs_devices->fsid_change)
+ continue;
+
+ if (check_fsid_changed(fs_devices, disk_super->metadata_uuid))
return fs_devices;
- }
}
return find_fsid(disk_super->fsid, disk_super->metadata_uuid);
@@ -673,18 +693,16 @@ static struct btrfs_fs_devices *find_fsid_inprogress(
struct btrfs_fs_devices *fs_devices;
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
- if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
- BTRFS_FSID_SIZE) != 0 &&
- memcmp(fs_devices->metadata_uuid, disk_super->fsid,
- BTRFS_FSID_SIZE) == 0 && !fs_devices->fsid_change) {
+ if (fs_devices->fsid_change)
+ continue;
+
+ if (check_fsid_changed(fs_devices, disk_super->fsid))
return fs_devices;
- }
}
return find_fsid(disk_super->fsid, NULL);
}
-
static struct btrfs_fs_devices *find_fsid_changed(
struct btrfs_super_block *disk_super)
{
@@ -701,10 +719,7 @@ static struct btrfs_fs_devices *find_fsid_changed(
*/
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
/* Changed UUIDs */
- if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
- BTRFS_FSID_SIZE) != 0 &&
- memcmp(fs_devices->metadata_uuid, disk_super->metadata_uuid,
- BTRFS_FSID_SIZE) == 0 &&
+ if (check_fsid_changed(fs_devices, disk_super->metadata_uuid) &&
memcmp(fs_devices->fsid, disk_super->fsid,
BTRFS_FSID_SIZE) != 0)
return fs_devices;
@@ -735,11 +750,10 @@ static struct btrfs_fs_devices *find_fsid_reverted_metadata(
* fs_devices equal to the FSID of the disk.
*/
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
- if (memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
- BTRFS_FSID_SIZE) != 0 &&
- memcmp(fs_devices->metadata_uuid, disk_super->fsid,
- BTRFS_FSID_SIZE) == 0 &&
- fs_devices->fsid_change)
+ if (!fs_devices->fsid_change)
+ continue;
+
+ if (check_fsid_changed(fs_devices, disk_super->fsid))
return fs_devices;
}
@@ -790,12 +804,8 @@ static noinline struct btrfs_device *device_list_add(const char *path,
if (!fs_devices) {
- if (has_metadata_uuid)
- fs_devices = alloc_fs_devices(disk_super->fsid,
- disk_super->metadata_uuid);
- else
- fs_devices = alloc_fs_devices(disk_super->fsid, NULL);
-
+ fs_devices = alloc_fs_devices(disk_super->fsid,
+ has_metadata_uuid ? disk_super->metadata_uuid : NULL);
if (IS_ERR(fs_devices))
return ERR_CAST(fs_devices);
@@ -1915,7 +1925,7 @@ static void update_dev_time(const char *device_path)
return;
now = current_time(d_inode(path.dentry));
- inode_update_time(d_inode(path.dentry), &now, S_MTIME | S_CTIME);
+ inode_update_time(d_inode(path.dentry), &now, S_MTIME | S_CTIME | S_VERSION);
path_put(&path);
}
@@ -5122,7 +5132,7 @@ static void init_alloc_chunk_ctl_policy_regular(
/* We don't want a chunk larger than 10% of writable space */
ctl->max_chunk_size = min(mult_perc(fs_devices->total_rw_bytes, 10),
ctl->max_chunk_size);
- ctl->dev_extent_min = ctl->dev_stripes << BTRFS_STRIPE_LEN_SHIFT;
+ ctl->dev_extent_min = btrfs_stripe_nr_to_offset(ctl->dev_stripes);
}
static void init_alloc_chunk_ctl_policy_zoned(
@@ -5798,7 +5808,7 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
if (!WARN_ON(IS_ERR(em))) {
map = em->map_lookup;
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
- len = nr_data_stripes(map) << BTRFS_STRIPE_LEN_SHIFT;
+ len = btrfs_stripe_nr_to_offset(nr_data_stripes(map));
free_extent_map(em);
}
return len;
@@ -5972,12 +5982,12 @@ struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info,
stripe_nr = offset >> BTRFS_STRIPE_LEN_SHIFT;
/* stripe_offset is the offset of this block in its stripe */
- stripe_offset = offset - (stripe_nr << BTRFS_STRIPE_LEN_SHIFT);
+ stripe_offset = offset - btrfs_stripe_nr_to_offset(stripe_nr);
stripe_nr_end = round_up(offset + length, BTRFS_STRIPE_LEN) >>
BTRFS_STRIPE_LEN_SHIFT;
stripe_cnt = stripe_nr_end - stripe_nr;
- stripe_end_offset = (stripe_nr_end << BTRFS_STRIPE_LEN_SHIFT) -
+ stripe_end_offset = btrfs_stripe_nr_to_offset(stripe_nr_end) -
(offset + length);
/*
* after this, stripe_nr is the number of stripes on this
@@ -6020,12 +6030,12 @@ struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info,
for (i = 0; i < *num_stripes; i++) {
stripes[i].physical =
map->stripes[stripe_index].physical +
- stripe_offset + (stripe_nr << BTRFS_STRIPE_LEN_SHIFT);
+ stripe_offset + btrfs_stripe_nr_to_offset(stripe_nr);
stripes[i].dev = map->stripes[stripe_index].dev;
if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID10)) {
- stripes[i].length = stripes_per_dev << BTRFS_STRIPE_LEN_SHIFT;
+ stripes[i].length = btrfs_stripe_nr_to_offset(stripes_per_dev);
if (i / sub_stripes < remaining_stripes)
stripes[i].length += BTRFS_STRIPE_LEN;
@@ -6160,17 +6170,10 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op,
bioc->replace_nr_stripes = nr_extra_stripes;
}
-static bool need_full_stripe(enum btrfs_map_op op)
-{
- return (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS);
-}
-
static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op,
u64 offset, u32 *stripe_nr, u64 *stripe_offset,
u64 *full_stripe_start)
{
- ASSERT(op != BTRFS_MAP_DISCARD);
-
/*
* Stripe_nr is the stripe where this block falls. stripe_offset is
* the offset of this block in its stripe.
@@ -6180,8 +6183,8 @@ static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op,
ASSERT(*stripe_offset < U32_MAX);
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
- unsigned long full_stripe_len = nr_data_stripes(map) <<
- BTRFS_STRIPE_LEN_SHIFT;
+ unsigned long full_stripe_len =
+ btrfs_stripe_nr_to_offset(nr_data_stripes(map));
/*
* For full stripe start, we use previously calculated
@@ -6193,9 +6196,11 @@ static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op,
* not ensured to be power of 2.
*/
*full_stripe_start =
- rounddown(*stripe_nr, nr_data_stripes(map)) <<
- BTRFS_STRIPE_LEN_SHIFT;
+ btrfs_stripe_nr_to_offset(
+ rounddown(*stripe_nr, nr_data_stripes(map)));
+ ASSERT(*full_stripe_start + full_stripe_len > offset);
+ ASSERT(*full_stripe_start <= offset);
/*
* For writes to RAID56, allow to write a full stripe set, but
* no straddling of stripe sets.
@@ -6218,14 +6223,14 @@ static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *
{
dst->dev = map->stripes[stripe_index].dev;
dst->physical = map->stripes[stripe_index].physical +
- stripe_offset + (stripe_nr << BTRFS_STRIPE_LEN_SHIFT);
+ stripe_offset + btrfs_stripe_nr_to_offset(stripe_nr);
}
-int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
- u64 logical, u64 *length,
- struct btrfs_io_context **bioc_ret,
- struct btrfs_io_stripe *smap, int *mirror_num_ret,
- int need_raid_map)
+int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
+ u64 logical, u64 *length,
+ struct btrfs_io_context **bioc_ret,
+ struct btrfs_io_stripe *smap, int *mirror_num_ret,
+ int need_raid_map)
{
struct extent_map *em;
struct map_lookup *map;
@@ -6248,7 +6253,6 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 max_len;
ASSERT(bioc_ret);
- ASSERT(op != BTRFS_MAP_DISCARD);
num_copies = btrfs_num_copies(fs_info, logical, fs_info->sectorsize);
if (mirror_num > num_copies)
@@ -6280,21 +6284,21 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
stripe_index = stripe_nr % map->num_stripes;
stripe_nr /= map->num_stripes;
- if (!need_full_stripe(op))
+ if (op == BTRFS_MAP_READ)
mirror_num = 1;
} else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
- if (need_full_stripe(op))
+ if (op != BTRFS_MAP_READ) {
num_stripes = map->num_stripes;
- else if (mirror_num)
+ } else if (mirror_num) {
stripe_index = mirror_num - 1;
- else {
+ } else {
stripe_index = find_live_mirror(fs_info, map, 0,
dev_replace_is_ongoing);
mirror_num = stripe_index + 1;
}
} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
- if (need_full_stripe(op)) {
+ if (op != BTRFS_MAP_READ) {
num_stripes = map->num_stripes;
} else if (mirror_num) {
stripe_index = mirror_num - 1;
@@ -6308,7 +6312,7 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
stripe_index = (stripe_nr % factor) * map->sub_stripes;
stripe_nr /= factor;
- if (need_full_stripe(op))
+ if (op != BTRFS_MAP_READ)
num_stripes = map->sub_stripes;
else if (mirror_num)
stripe_index += mirror_num - 1;
@@ -6321,7 +6325,7 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
}
} else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
- if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) {
+ if (need_raid_map && (op != BTRFS_MAP_READ || mirror_num > 1)) {
/*
* Push stripe_nr back to the start of the full stripe
* For those cases needing a full stripe, @stripe_nr
@@ -6340,7 +6344,8 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
/* Return the length to the full stripe end */
*length = min(logical + *length,
raid56_full_stripe_start + em->start +
- (data_stripes << BTRFS_STRIPE_LEN_SHIFT)) - logical;
+ btrfs_stripe_nr_to_offset(data_stripes)) -
+ logical;
stripe_index = 0;
stripe_offset = 0;
} else {
@@ -6356,7 +6361,7 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
/* We distribute the parity blocks across stripes */
stripe_index = (stripe_nr + stripe_index) % map->num_stripes;
- if (!need_full_stripe(op) && mirror_num <= 1)
+ if (op == BTRFS_MAP_READ && mirror_num <= 1)
mirror_num = 1;
}
} else {
@@ -6396,7 +6401,7 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
*/
if (smap && num_alloc_stripes == 1 &&
!((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1) &&
- (!need_full_stripe(op) || !dev_replace_is_ongoing ||
+ (op == BTRFS_MAP_READ || !dev_replace_is_ongoing ||
!dev_replace->tgtdev)) {
set_io_stripe(smap, map, stripe_index, stripe_offset, stripe_nr);
*mirror_num_ret = mirror_num;
@@ -6420,7 +6425,7 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
* It's still mostly the same as other profiles, just with extra rotation.
*/
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
- (need_full_stripe(op) || mirror_num > 1)) {
+ (op != BTRFS_MAP_READ || mirror_num > 1)) {
/*
* For RAID56 @stripe_nr is already the number of full stripes
* before us, which is also the rotation value (needs to modulo
@@ -6430,7 +6435,7 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
* modulo, to reduce one modulo call.
*/
bioc->full_stripe_logical = em->start +
- ((stripe_nr * data_stripes) << BTRFS_STRIPE_LEN_SHIFT);
+ btrfs_stripe_nr_to_offset(stripe_nr * data_stripes);
for (i = 0; i < num_stripes; i++)
set_io_stripe(&bioc->stripes[i], map,
(i + stripe_nr) % num_stripes,
@@ -6447,11 +6452,11 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
}
}
- if (need_full_stripe(op))
+ if (op != BTRFS_MAP_READ)
max_errors = btrfs_chunk_max_errors(map);
if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
- need_full_stripe(op)) {
+ op != BTRFS_MAP_READ) {
handle_ops_on_dev_replace(op, bioc, dev_replace, logical,
&num_stripes, &max_errors);
}
@@ -6471,23 +6476,6 @@ out:
return ret;
}
-int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
- u64 logical, u64 *length,
- struct btrfs_io_context **bioc_ret, int mirror_num)
-{
- return __btrfs_map_block(fs_info, op, logical, length, bioc_ret,
- NULL, &mirror_num, 0);
-}
-
-/* For Scrub/replace */
-int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
- u64 logical, u64 *length,
- struct btrfs_io_context **bioc_ret)
-{
- return __btrfs_map_block(fs_info, op, logical, length, bioc_ret,
- NULL, NULL, 1);
-}
-
static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args,
const struct btrfs_fs_devices *fs_devices)
{
@@ -8027,7 +8015,7 @@ static void map_raid56_repair_block(struct btrfs_io_context *bioc,
for (i = 0; i < data_stripes; i++) {
u64 stripe_start = bioc->full_stripe_logical +
- (i << BTRFS_STRIPE_LEN_SHIFT);
+ btrfs_stripe_nr_to_offset(i);
if (logical >= stripe_start &&
logical < stripe_start + BTRFS_STRIPE_LEN)
@@ -8064,8 +8052,8 @@ int btrfs_map_repair_block(struct btrfs_fs_info *fs_info,
ASSERT(mirror_num > 0);
- ret = __btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, &map_length,
- &bioc, smap, &mirror_ret, true);
+ ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, &map_length,
+ &bioc, smap, &mirror_ret, true);
if (ret < 0)
return ret;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 8227ba4d64b8..b8c51f16ba86 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -280,8 +280,19 @@ enum btrfs_read_policy {
struct btrfs_fs_devices {
u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
+
+ /*
+ * UUID written into the btree blocks:
+ *
+ * - If metadata_uuid != fsid then super block must have
+ * BTRFS_FEATURE_INCOMPAT_METADATA_UUID flag set.
+ *
+ * - Following shall be true at all times:
+ * - metadata_uuid == btrfs_header::fsid
+ * - metadata_uuid == btrfs_dev_item::fsid
+ */
u8 metadata_uuid[BTRFS_FSID_SIZE];
- bool fsid_change;
+
struct list_head fs_list;
/*
@@ -319,34 +330,32 @@ struct btrfs_fs_devices {
*/
struct btrfs_device *latest_dev;
- /* all of the devices in the FS, protected by a mutex
- * so we can safely walk it to write out the supers without
- * worrying about add/remove by the multi-device code.
- * Scrubbing super can kick off supers writing by holding
- * this mutex lock.
+ /*
+ * All of the devices in the filesystem, protected by a mutex so we can
+ * safely walk it to write out the super blocks without worrying about
+ * adding/removing by the multi-device code. Scrubbing super block can
+ * kick off supers writing by holding this mutex lock.
*/
struct mutex device_list_mutex;
/* List of all devices, protected by device_list_mutex */
struct list_head devices;
- /*
- * Devices which can satisfy space allocation. Protected by
- * chunk_mutex
- */
+ /* Devices which can satisfy space allocation. Protected by * chunk_mutex. */
struct list_head alloc_list;
struct list_head seed_list;
- bool seeding;
+ /* Count fs-devices opened. */
int opened;
- /* set when we find or add a device that doesn't have the
- * nonrot flag set
- */
+ /* Set when we find or add a device that doesn't have the nonrot flag set. */
bool rotating;
- /* Devices support TRIM/discard commands */
+ /* Devices support TRIM/discard commands. */
bool discardable;
+ bool fsid_change;
+ /* The filesystem is a seed filesystem. */
+ bool seeding;
struct btrfs_fs_info *fs_info;
/* sysfs kobjects */
@@ -357,7 +366,7 @@ struct btrfs_fs_devices {
enum btrfs_chunk_allocation_policy chunk_alloc_policy;
- /* Policy used to read the mirrored stripes */
+ /* Policy used to read the mirrored stripes. */
enum btrfs_read_policy read_policy;
};
@@ -547,15 +556,12 @@ struct btrfs_dev_lookup_args {
enum btrfs_map_op {
BTRFS_MAP_READ,
BTRFS_MAP_WRITE,
- BTRFS_MAP_DISCARD,
BTRFS_MAP_GET_READ_MIRRORS,
};
static inline enum btrfs_map_op btrfs_op(struct bio *bio)
{
switch (bio_op(bio)) {
- case REQ_OP_DISCARD:
- return BTRFS_MAP_DISCARD;
case REQ_OP_WRITE:
case REQ_OP_ZONE_APPEND:
return BTRFS_MAP_WRITE;
@@ -574,19 +580,24 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
sizeof(struct btrfs_stripe) * (num_stripes - 1);
}
+/*
+ * Do the type safe converstion from stripe_nr to offset inside the chunk.
+ *
+ * @stripe_nr is u32, with left shift it can overflow u32 for chunks larger
+ * than 4G. This does the proper type cast to avoid overflow.
+ */
+static inline u64 btrfs_stripe_nr_to_offset(u32 stripe_nr)
+{
+ return (u64)stripe_nr << BTRFS_STRIPE_LEN_SHIFT;
+}
+
void btrfs_get_bioc(struct btrfs_io_context *bioc);
void btrfs_put_bioc(struct btrfs_io_context *bioc);
int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 logical, u64 *length,
- struct btrfs_io_context **bioc_ret, int mirror_num);
-int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
- u64 logical, u64 *length,
- struct btrfs_io_context **bioc_ret);
-int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
- u64 logical, u64 *length,
- struct btrfs_io_context **bioc_ret,
- struct btrfs_io_stripe *smap, int *mirror_num_ret,
- int need_raid_map);
+ struct btrfs_io_context **bioc_ret,
+ struct btrfs_io_stripe *smap, int *mirror_num_ret,
+ int need_raid_map);
int btrfs_map_repair_block(struct btrfs_fs_info *fs_info,
struct btrfs_io_stripe *smap, u64 logical,
u32 length, int mirror_num);
@@ -616,7 +627,6 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
const u64 *devid, const u8 *uuid,
const char *path);
void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args);
-void btrfs_free_device(struct btrfs_device *device);
int btrfs_rm_device(struct btrfs_fs_info *fs_info,
struct btrfs_dev_lookup_args *args,
struct block_device **bdev, void **holder);
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 8acb05e176c5..6c231a116a29 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -63,7 +63,7 @@ struct list_head *zlib_alloc_workspace(unsigned int level)
workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
zlib_inflate_workspacesize());
- workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL);
+ workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL | __GFP_NOWARN);
workspace->level = level;
workspace->buf = NULL;
/*
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 39828af4a4e8..85b8b332add9 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -15,6 +15,7 @@
#include "transaction.h"
#include "dev-replace.h"
#include "space-info.h"
+#include "super.h"
#include "fs.h"
#include "accessors.h"
#include "bio.h"
@@ -1057,7 +1058,7 @@ u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start,
/* Check if zones in the region are all empty */
if (btrfs_dev_is_sequential(device, pos) &&
- find_next_zero_bit(zinfo->empty_zones, end, begin) != end) {
+ !bitmap_test_range_all_set(zinfo->empty_zones, begin, nzones)) {
pos += zinfo->zone_size;
continue;
}
@@ -1156,23 +1157,23 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
struct btrfs_zoned_device_info *zinfo = device->zone_info;
const u8 shift = zinfo->zone_size_shift;
unsigned long begin = start >> shift;
- unsigned long end = (start + size) >> shift;
+ unsigned long nbits = size >> shift;
u64 pos;
int ret;
ASSERT(IS_ALIGNED(start, zinfo->zone_size));
ASSERT(IS_ALIGNED(size, zinfo->zone_size));
- if (end > zinfo->nr_zones)
+ if (begin + nbits > zinfo->nr_zones)
return -ERANGE;
/* All the zones are conventional */
- if (find_next_bit(zinfo->seq_zones, end, begin) == end)
+ if (bitmap_test_range_all_zero(zinfo->seq_zones, begin, nbits))
return 0;
/* All the zones are sequential and empty */
- if (find_next_zero_bit(zinfo->seq_zones, end, begin) == end &&
- find_next_zero_bit(zinfo->empty_zones, end, begin) == end)
+ if (bitmap_test_range_all_set(zinfo->seq_zones, begin, nbits) &&
+ bitmap_test_range_all_set(zinfo->empty_zones, begin, nbits))
return 0;
for (pos = start; pos < start + size; pos += zinfo->zone_size) {
@@ -1602,37 +1603,17 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache)
void btrfs_redirty_list_add(struct btrfs_transaction *trans,
struct extent_buffer *eb)
{
- struct btrfs_fs_info *fs_info = eb->fs_info;
-
- if (!btrfs_is_zoned(fs_info) ||
- btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN) ||
- !list_empty(&eb->release_list))
+ if (!btrfs_is_zoned(eb->fs_info) ||
+ btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN))
return;
+ ASSERT(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
+
memzero_extent_buffer(eb, 0, eb->len);
set_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags);
set_extent_buffer_dirty(eb);
- set_extent_bits_nowait(&trans->dirty_pages, eb->start,
- eb->start + eb->len - 1, EXTENT_DIRTY);
-
- spin_lock(&trans->releasing_ebs_lock);
- list_add_tail(&eb->release_list, &trans->releasing_ebs);
- spin_unlock(&trans->releasing_ebs_lock);
- atomic_inc(&eb->refs);
-}
-
-void btrfs_free_redirty_list(struct btrfs_transaction *trans)
-{
- spin_lock(&trans->releasing_ebs_lock);
- while (!list_empty(&trans->releasing_ebs)) {
- struct extent_buffer *eb;
-
- eb = list_first_entry(&trans->releasing_ebs,
- struct extent_buffer, release_list);
- list_del_init(&eb->release_list);
- free_extent_buffer(eb);
- }
- spin_unlock(&trans->releasing_ebs_lock);
+ set_extent_bit(&trans->dirty_pages, eb->start, eb->start + eb->len - 1,
+ EXTENT_DIRTY | EXTENT_NOWAIT, NULL);
}
bool btrfs_use_zone_append(struct btrfs_bio *bbio)
@@ -1677,63 +1658,89 @@ bool btrfs_use_zone_append(struct btrfs_bio *bbio)
void btrfs_record_physical_zoned(struct btrfs_bio *bbio)
{
const u64 physical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
- struct btrfs_ordered_extent *ordered;
+ struct btrfs_ordered_sum *sum = bbio->sums;
- ordered = btrfs_lookup_ordered_extent(bbio->inode, bbio->file_offset);
- if (WARN_ON(!ordered))
- return;
-
- ordered->physical = physical;
- btrfs_put_ordered_extent(ordered);
+ if (physical < bbio->orig_physical)
+ sum->logical -= bbio->orig_physical - physical;
+ else
+ sum->logical += physical - bbio->orig_physical;
}
-void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered)
+static void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered,
+ u64 logical)
{
- struct btrfs_inode *inode = BTRFS_I(ordered->inode);
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct extent_map_tree *em_tree;
+ struct extent_map_tree *em_tree = &BTRFS_I(ordered->inode)->extent_tree;
struct extent_map *em;
- struct btrfs_ordered_sum *sum;
- u64 orig_logical = ordered->disk_bytenr;
- struct map_lookup *map;
- u64 physical = ordered->physical;
- u64 chunk_start_phys;
- u64 logical;
-
- em = btrfs_get_chunk_map(fs_info, orig_logical, 1);
- if (IS_ERR(em))
- return;
- map = em->map_lookup;
- chunk_start_phys = map->stripes[0].physical;
-
- if (WARN_ON_ONCE(map->num_stripes > 1) ||
- WARN_ON_ONCE((map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0) ||
- WARN_ON_ONCE(physical < chunk_start_phys) ||
- WARN_ON_ONCE(physical > chunk_start_phys + em->orig_block_len)) {
- free_extent_map(em);
- return;
- }
- logical = em->start + (physical - map->stripes[0].physical);
- free_extent_map(em);
-
- if (orig_logical == logical)
- return;
ordered->disk_bytenr = logical;
- em_tree = &inode->extent_tree;
write_lock(&em_tree->lock);
em = search_extent_mapping(em_tree, ordered->file_offset,
ordered->num_bytes);
em->block_start = logical;
free_extent_map(em);
write_unlock(&em_tree->lock);
+}
- list_for_each_entry(sum, &ordered->list, list) {
- if (logical < orig_logical)
- sum->bytenr -= orig_logical - logical;
- else
- sum->bytenr += logical - orig_logical;
+static bool btrfs_zoned_split_ordered(struct btrfs_ordered_extent *ordered,
+ u64 logical, u64 len)
+{
+ struct btrfs_ordered_extent *new;
+
+ if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags) &&
+ split_extent_map(BTRFS_I(ordered->inode), ordered->file_offset,
+ ordered->num_bytes, len, logical))
+ return false;
+
+ new = btrfs_split_ordered_extent(ordered, len);
+ if (IS_ERR(new))
+ return false;
+ new->disk_bytenr = logical;
+ btrfs_finish_one_ordered(new);
+ return true;
+}
+
+void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered)
+{
+ struct btrfs_inode *inode = BTRFS_I(ordered->inode);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct btrfs_ordered_sum *sum =
+ list_first_entry(&ordered->list, typeof(*sum), list);
+ u64 logical = sum->logical;
+ u64 len = sum->len;
+
+ while (len < ordered->disk_num_bytes) {
+ sum = list_next_entry(sum, list);
+ if (sum->logical == logical + len) {
+ len += sum->len;
+ continue;
+ }
+ if (!btrfs_zoned_split_ordered(ordered, logical, len)) {
+ set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
+ btrfs_err(fs_info, "failed to split ordered extent");
+ goto out;
+ }
+ logical = sum->logical;
+ len = sum->len;
+ }
+
+ if (ordered->disk_bytenr != logical)
+ btrfs_rewrite_logical_zoned(ordered, logical);
+
+out:
+ /*
+ * If we end up here for nodatasum I/O, the btrfs_ordered_sum structures
+ * were allocated by btrfs_alloc_dummy_sum only to record the logical
+ * addresses and don't contain actual checksums. We thus must free them
+ * here so that we don't attempt to log the csums later.
+ */
+ if ((inode->flags & BTRFS_INODE_NODATASUM) ||
+ test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) {
+ while ((sum = list_first_entry_or_null(&ordered->list,
+ typeof(*sum), list))) {
+ list_del(&sum->list);
+ kfree(sum);
+ }
}
}
@@ -1792,8 +1799,8 @@ static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical,
int nmirrors;
int i, ret;
- ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
- &mapped_length, &bioc);
+ ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
+ &mapped_length, &bioc, NULL, NULL, 1);
if (ret || !bioc || mapped_length < PAGE_SIZE) {
ret = -EIO;
goto out_put_bioc;
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index c0570d35fea2..27322b926038 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -30,6 +30,8 @@ struct btrfs_zoned_device_info {
struct blk_zone sb_zones[2 * BTRFS_SUPER_MIRROR_MAX];
};
+void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered);
+
#ifdef CONFIG_BLK_DEV_ZONED
int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
struct blk_zone *zone);
@@ -54,10 +56,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new);
void btrfs_calc_zone_unusable(struct btrfs_block_group *cache);
void btrfs_redirty_list_add(struct btrfs_transaction *trans,
struct extent_buffer *eb);
-void btrfs_free_redirty_list(struct btrfs_transaction *trans);
bool btrfs_use_zone_append(struct btrfs_bio *bbio);
void btrfs_record_physical_zoned(struct btrfs_bio *bbio);
-void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered);
bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb,
struct btrfs_block_group **cache_ret);
@@ -179,7 +179,6 @@ static inline void btrfs_calc_zone_unusable(struct btrfs_block_group *cache) { }
static inline void btrfs_redirty_list_add(struct btrfs_transaction *trans,
struct extent_buffer *eb) { }
-static inline void btrfs_free_redirty_list(struct btrfs_transaction *trans) { }
static inline bool btrfs_use_zone_append(struct btrfs_bio *bbio)
{
@@ -190,9 +189,6 @@ static inline void btrfs_record_physical_zoned(struct btrfs_bio *bbio)
{
}
-static inline void btrfs_rewrite_logical_zoned(
- struct btrfs_ordered_extent *ordered) { }
-
static inline bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb,
struct btrfs_block_group **cache_ret)
diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c
index f798da267590..e7ac4ec809a4 100644
--- a/fs/btrfs/zstd.c
+++ b/fs/btrfs/zstd.c
@@ -356,7 +356,7 @@ struct list_head *zstd_alloc_workspace(unsigned int level)
workspace->level = level;
workspace->req_level = level;
workspace->last_used = jiffies;
- workspace->mem = kvmalloc(workspace->size, GFP_KERNEL);
+ workspace->mem = kvmalloc(workspace->size, GFP_KERNEL | __GFP_NOWARN);
workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!workspace->mem || !workspace->buf)
goto fail;
diff --git a/fs/buffer.c b/fs/buffer.c
index 63da30ce946a..93c7446d9221 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -111,7 +111,6 @@ void buffer_check_dirty_writeback(struct folio *folio,
bh = bh->b_this_page;
} while (bh != head);
}
-EXPORT_SYMBOL(buffer_check_dirty_writeback);
/*
* Block until a buffer comes unlocked. This doesn't stop it
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 82219a8f6084..d9d22d0ec38a 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -451,9 +451,10 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object)
ret = cachefiles_inject_write_error();
if (ret == 0) {
- file = vfs_tmpfile_open(&nop_mnt_idmap, &parentpath, S_IFREG,
- O_RDWR | O_LARGEFILE | O_DIRECT,
- cache->cache_cred);
+ file = kernel_tmpfile_open(&nop_mnt_idmap, &parentpath,
+ S_IFREG | 0600,
+ O_RDWR | O_LARGEFILE | O_DIRECT,
+ cache->cache_cred);
ret = PTR_ERR_OR_ZERO(file);
}
if (ret) {
@@ -560,8 +561,8 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
*/
path.mnt = cache->mnt;
path.dentry = dentry;
- file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
- d_backing_inode(dentry), cache->cache_cred);
+ file = kernel_file_open(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
+ d_backing_inode(dentry), cache->cache_cred);
if (IS_ERR(file)) {
trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
PTR_ERR(file),
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 789be30d6ee2..2321e5ddb664 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1627,6 +1627,7 @@ void ceph_flush_snaps(struct ceph_inode_info *ci,
struct inode *inode = &ci->netfs.inode;
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
struct ceph_mds_session *session = NULL;
+ bool need_put = false;
int mds;
dout("ceph_flush_snaps %p\n", inode);
@@ -1671,8 +1672,13 @@ out:
ceph_put_mds_session(session);
/* we flushed them all; remove this inode from the queue */
spin_lock(&mdsc->snap_flush_lock);
+ if (!list_empty(&ci->i_snap_flush_item))
+ need_put = true;
list_del_init(&ci->i_snap_flush_item);
spin_unlock(&mdsc->snap_flush_lock);
+
+ if (need_put)
+ iput(inode);
}
/*
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 29cf00220b09..4c0f22acf53d 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3942,7 +3942,7 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg)
struct dentry *dentry;
struct ceph_cap *cap;
char *path;
- int pathlen = 0, err = 0;
+ int pathlen = 0, err;
u64 pathbase;
u64 snap_follows;
@@ -3965,6 +3965,7 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg)
cap = __get_cap_for_mds(ci, mds);
if (!cap) {
spin_unlock(&ci->i_ceph_lock);
+ err = 0;
goto out_err;
}
dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 87007203f130..2e73ba62bd7a 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -693,8 +693,10 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
capsnap->size);
spin_lock(&mdsc->snap_flush_lock);
- if (list_empty(&ci->i_snap_flush_item))
+ if (list_empty(&ci->i_snap_flush_item)) {
+ ihold(inode);
list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
+ }
spin_unlock(&mdsc->snap_flush_lock);
return 1; /* caller may want to ceph_flush_snaps */
}
@@ -1111,6 +1113,19 @@ skip_inode:
continue;
adjust_snap_realm_parent(mdsc, child, realm->ino);
}
+ } else {
+ /*
+ * In the non-split case both 'num_split_inos' and
+ * 'num_split_realms' should be 0, making this a no-op.
+ * However the MDS happens to populate 'split_realms' list
+ * in one of the UPDATE op cases by mistake.
+ *
+ * Skip both lists just in case to ensure that 'p' is
+ * positioned at the start of realm info, as expected by
+ * ceph_update_snap_trace().
+ */
+ p += sizeof(u64) * num_split_inos;
+ p += sizeof(u64) * num_split_realms;
}
/*
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 13deb45f1ec6..950b6919fb87 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -150,7 +150,7 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
cd->major = major;
cd->baseminor = baseminor;
cd->minorct = minorct;
- strlcpy(cd->name, name, sizeof(cd->name));
+ strscpy(cd->name, name, sizeof(cd->name));
if (!prev) {
cd->next = curr;
diff --git a/fs/coredump.c b/fs/coredump.c
index ece7badf701b..9d235fa14ab9 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -371,7 +371,9 @@ static int zap_process(struct task_struct *start, int exit_code)
if (t != current && !(t->flags & PF_POSTCOREDUMP)) {
sigaddset(&t->pending.signal, SIGKILL);
signal_wake_up(t, 1);
- nr++;
+ /* The vhost_worker does not particpate in coredumps */
+ if ((t->flags & (PF_USER_WORKER | PF_IO_WORKER)) != PF_USER_WORKER)
+ nr++;
}
}
@@ -646,7 +648,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
} else {
struct mnt_idmap *idmap;
struct inode *inode;
- int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW |
+ int open_flags = O_CREAT | O_WRONLY | O_NOFOLLOW |
O_LARGEFILE | O_EXCL;
if (cprm.limit < binfmt->min_coredump)
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 7ab5a7b7eef8..2d63da48635a 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -171,7 +171,7 @@ fscrypt_policy_flags(const union fscrypt_policy *policy)
*/
struct fscrypt_symlink_data {
__le16 len;
- char encrypted_path[1];
+ char encrypted_path[];
} __packed;
/**
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
index 9e786ae66a13..6238dbcadcad 100644
--- a/fs/crypto/hooks.c
+++ b/fs/crypto/hooks.c
@@ -255,10 +255,10 @@ int fscrypt_prepare_symlink(struct inode *dir, const char *target,
* for now since filesystems will assume it is there and subtract it.
*/
if (!__fscrypt_fname_encrypted_size(policy, len,
- max_len - sizeof(struct fscrypt_symlink_data),
+ max_len - sizeof(struct fscrypt_symlink_data) - 1,
&disk_link->len))
return -ENAMETOOLONG;
- disk_link->len += sizeof(struct fscrypt_symlink_data);
+ disk_link->len += sizeof(struct fscrypt_symlink_data) + 1;
disk_link->name = NULL;
return 0;
@@ -289,7 +289,7 @@ int __fscrypt_encrypt_symlink(struct inode *inode, const char *target,
if (!sd)
return -ENOMEM;
}
- ciphertext_len = disk_link->len - sizeof(*sd);
+ ciphertext_len = disk_link->len - sizeof(*sd) - 1;
sd->len = cpu_to_le16(ciphertext_len);
err = fscrypt_fname_encrypt(inode, &iname, sd->encrypted_path,
@@ -367,7 +367,7 @@ const char *fscrypt_get_symlink(struct inode *inode, const void *caddr,
* the ciphertext length, even though this is redundant with i_size.
*/
- if (max_size < sizeof(*sd))
+ if (max_size < sizeof(*sd) + 1)
return ERR_PTR(-EUCLEAN);
sd = caddr;
cstr.name = (unsigned char *)sd->encrypted_path;
@@ -376,7 +376,7 @@ const char *fscrypt_get_symlink(struct inode *inode, const void *caddr,
if (cstr.len == 0)
return ERR_PTR(-EUCLEAN);
- if (cstr.len + sizeof(*sd) - 1 > max_size)
+ if (cstr.len + sizeof(*sd) > max_size)
return ERR_PTR(-EUCLEAN);
err = fscrypt_fname_alloc_buffer(cstr.len, &pstr);
diff --git a/fs/d_path.c b/fs/d_path.c
index 56a6ee4c6331..5f4da5c8d5db 100644
--- a/fs/d_path.c
+++ b/fs/d_path.c
@@ -7,6 +7,7 @@
#include <linux/slab.h>
#include <linux/prefetch.h>
#include "mount.h"
+#include "internal.h"
struct prepend_buffer {
char *buf;
diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig
index 704fb59577e0..f259d92c9720 100644
--- a/fs/erofs/Kconfig
+++ b/fs/erofs/Kconfig
@@ -121,6 +121,7 @@ config EROFS_FS_PCPU_KTHREAD
config EROFS_FS_PCPU_KTHREAD_HIPRI
bool "EROFS high priority per-CPU kthread workers"
depends on EROFS_FS_ZIP && EROFS_FS_PCPU_KTHREAD
+ default y
help
This permits EROFS to configure per-CPU kthread workers to run
at higher priority.
diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile
index 99bbc597a3e9..a3a98fc3e481 100644
--- a/fs/erofs/Makefile
+++ b/fs/erofs/Makefile
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_EROFS_FS) += erofs.o
-erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o sysfs.o
+erofs-objs := super.o inode.o data.o namei.o dir.o utils.o sysfs.o
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
-erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o
+erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o pcpubuf.o
erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index 26fa170090b8..b1b846504027 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -89,8 +89,7 @@ static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi,
int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
unsigned int padbufsize);
-int z_erofs_decompress(struct z_erofs_decompress_req *rq,
- struct page **pagepool);
+extern const struct z_erofs_decompressor erofs_decompressors[];
/* prototypes for specific algorithms */
int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 7021e2cf6146..2a29943fa5cc 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -363,7 +363,7 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
return 0;
}
-static struct z_erofs_decompressor decompressors[] = {
+const struct z_erofs_decompressor erofs_decompressors[] = {
[Z_EROFS_COMPRESSION_SHIFTED] = {
.decompress = z_erofs_transform_plain,
.name = "shifted"
@@ -383,9 +383,3 @@ static struct z_erofs_decompressor decompressors[] = {
},
#endif
};
-
-int z_erofs_decompress(struct z_erofs_decompress_req *rq,
- struct page **pagepool)
-{
- return decompressors[rq->alg].decompress(rq, pagepool);
-}
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index af0431a40647..36e32fa542f0 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -208,46 +208,12 @@ enum {
EROFS_ZIP_CACHE_READAROUND
};
-#define EROFS_LOCKED_MAGIC (INT_MIN | 0xE0F510CCL)
-
/* basic unit of the workstation of a super_block */
struct erofs_workgroup {
- /* the workgroup index in the workstation */
pgoff_t index;
-
- /* overall workgroup reference count */
- atomic_t refcount;
+ struct lockref lockref;
};
-static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp,
- int val)
-{
- preempt_disable();
- if (val != atomic_cmpxchg(&grp->refcount, val, EROFS_LOCKED_MAGIC)) {
- preempt_enable();
- return false;
- }
- return true;
-}
-
-static inline void erofs_workgroup_unfreeze(struct erofs_workgroup *grp,
- int orig_val)
-{
- /*
- * other observers should notice all modifications
- * in the freezing period.
- */
- smp_mb();
- atomic_set(&grp->refcount, orig_val);
- preempt_enable();
-}
-
-static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
-{
- return atomic_cond_read_relaxed(&grp->refcount,
- VAL != EROFS_LOCKED_MAGIC);
-}
-
enum erofs_kmap_type {
EROFS_NO_KMAP, /* don't map the buffer */
EROFS_KMAP, /* use kmap_local_page() to map the buffer */
@@ -472,12 +438,6 @@ static inline void *erofs_vm_map_ram(struct page **pages, unsigned int count)
return NULL;
}
-void *erofs_get_pcpubuf(unsigned int requiredpages);
-void erofs_put_pcpubuf(void *ptr);
-int erofs_pcpubuf_growsize(unsigned int nrpages);
-void __init erofs_pcpubuf_init(void);
-void erofs_pcpubuf_exit(void);
-
int erofs_register_sysfs(struct super_block *sb);
void erofs_unregister_sysfs(struct super_block *sb);
int __init erofs_init_sysfs(void);
@@ -492,7 +452,7 @@ static inline void erofs_pagepool_add(struct page **pagepool, struct page *page)
void erofs_release_pages(struct page **pagepool);
#ifdef CONFIG_EROFS_FS_ZIP
-int erofs_workgroup_put(struct erofs_workgroup *grp);
+void erofs_workgroup_put(struct erofs_workgroup *grp);
struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
pgoff_t index);
struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
@@ -506,12 +466,17 @@ int __init z_erofs_init_zip_subsystem(void);
void z_erofs_exit_zip_subsystem(void);
int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
struct erofs_workgroup *egrp);
-int erofs_try_to_free_cached_page(struct page *page);
int z_erofs_load_lz4_config(struct super_block *sb,
struct erofs_super_block *dsb,
struct z_erofs_lz4_cfgs *lz4, int len);
int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
int flags);
+void *erofs_get_pcpubuf(unsigned int requiredpages);
+void erofs_put_pcpubuf(void *ptr);
+int erofs_pcpubuf_growsize(unsigned int nrpages);
+void __init erofs_pcpubuf_init(void);
+void erofs_pcpubuf_exit(void);
+int erofs_init_managed_cache(struct super_block *sb);
#else
static inline void erofs_shrinker_register(struct super_block *sb) {}
static inline void erofs_shrinker_unregister(struct super_block *sb) {}
@@ -529,6 +494,9 @@ static inline int z_erofs_load_lz4_config(struct super_block *sb,
}
return 0;
}
+static inline void erofs_pcpubuf_init(void) {}
+static inline void erofs_pcpubuf_exit(void) {}
+static inline int erofs_init_managed_cache(struct super_block *sb) { return 0; }
#endif /* !CONFIG_EROFS_FS_ZIP */
#ifdef CONFIG_EROFS_FS_ZIP_LZMA
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 3f080f0afc02..9d6a3c6158bd 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -600,68 +600,6 @@ static int erofs_fc_parse_param(struct fs_context *fc,
return 0;
}
-#ifdef CONFIG_EROFS_FS_ZIP
-static const struct address_space_operations managed_cache_aops;
-
-static bool erofs_managed_cache_release_folio(struct folio *folio, gfp_t gfp)
-{
- bool ret = true;
- struct address_space *const mapping = folio->mapping;
-
- DBG_BUGON(!folio_test_locked(folio));
- DBG_BUGON(mapping->a_ops != &managed_cache_aops);
-
- if (folio_test_private(folio))
- ret = erofs_try_to_free_cached_page(&folio->page);
-
- return ret;
-}
-
-/*
- * It will be called only on inode eviction. In case that there are still some
- * decompression requests in progress, wait with rescheduling for a bit here.
- * We could introduce an extra locking instead but it seems unnecessary.
- */
-static void erofs_managed_cache_invalidate_folio(struct folio *folio,
- size_t offset, size_t length)
-{
- const size_t stop = length + offset;
-
- DBG_BUGON(!folio_test_locked(folio));
-
- /* Check for potential overflow in debug mode */
- DBG_BUGON(stop > folio_size(folio) || stop < length);
-
- if (offset == 0 && stop == folio_size(folio))
- while (!erofs_managed_cache_release_folio(folio, GFP_NOFS))
- cond_resched();
-}
-
-static const struct address_space_operations managed_cache_aops = {
- .release_folio = erofs_managed_cache_release_folio,
- .invalidate_folio = erofs_managed_cache_invalidate_folio,
-};
-
-static int erofs_init_managed_cache(struct super_block *sb)
-{
- struct erofs_sb_info *const sbi = EROFS_SB(sb);
- struct inode *const inode = new_inode(sb);
-
- if (!inode)
- return -ENOMEM;
-
- set_nlink(inode, 1);
- inode->i_size = OFFSET_MAX;
-
- inode->i_mapping->a_ops = &managed_cache_aops;
- mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
- sbi->managed_cache = inode;
- return 0;
-}
-#else
-static int erofs_init_managed_cache(struct super_block *sb) { return 0; }
-#endif
-
static struct inode *erofs_nfs_get_inode(struct super_block *sb,
u64 ino, u32 generation)
{
@@ -1017,10 +955,8 @@ static int __init erofs_module_init(void)
sizeof(struct erofs_inode), 0,
SLAB_RECLAIM_ACCOUNT,
erofs_inode_init_once);
- if (!erofs_inode_cachep) {
- err = -ENOMEM;
- goto icache_err;
- }
+ if (!erofs_inode_cachep)
+ return -ENOMEM;
err = erofs_init_shrinker();
if (err)
@@ -1055,7 +991,6 @@ lzma_err:
erofs_exit_shrinker();
shrinker_err:
kmem_cache_destroy(erofs_inode_cachep);
-icache_err:
return err;
}
diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c
index 46627cb69abe..cc6fb9e98899 100644
--- a/fs/erofs/utils.c
+++ b/fs/erofs/utils.c
@@ -4,7 +4,6 @@
* https://www.huawei.com/
*/
#include "internal.h"
-#include <linux/pagevec.h>
struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp)
{
@@ -33,22 +32,21 @@ void erofs_release_pages(struct page **pagepool)
/* global shrink count (for all mounted EROFS instances) */
static atomic_long_t erofs_global_shrink_cnt;
-static int erofs_workgroup_get(struct erofs_workgroup *grp)
+static bool erofs_workgroup_get(struct erofs_workgroup *grp)
{
- int o;
+ if (lockref_get_not_zero(&grp->lockref))
+ return true;
-repeat:
- o = erofs_wait_on_workgroup_freezed(grp);
- if (o <= 0)
- return -1;
-
- if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o)
- goto repeat;
+ spin_lock(&grp->lockref.lock);
+ if (__lockref_is_dead(&grp->lockref)) {
+ spin_unlock(&grp->lockref.lock);
+ return false;
+ }
- /* decrease refcount paired by erofs_workgroup_put */
- if (o == 1)
+ if (!grp->lockref.count++)
atomic_long_dec(&erofs_global_shrink_cnt);
- return 0;
+ spin_unlock(&grp->lockref.lock);
+ return true;
}
struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
@@ -61,7 +59,7 @@ repeat:
rcu_read_lock();
grp = xa_load(&sbi->managed_pslots, index);
if (grp) {
- if (erofs_workgroup_get(grp)) {
+ if (!erofs_workgroup_get(grp)) {
/* prefer to relax rcu read side */
rcu_read_unlock();
goto repeat;
@@ -80,11 +78,10 @@ struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
struct erofs_workgroup *pre;
/*
- * Bump up a reference count before making this visible
- * to others for the XArray in order to avoid potential
- * UAF without serialized by xa_lock.
+ * Bump up before making this visible to others for the XArray in order
+ * to avoid potential UAF without serialized by xa_lock.
*/
- atomic_inc(&grp->refcount);
+ lockref_get(&grp->lockref);
repeat:
xa_lock(&sbi->managed_pslots);
@@ -93,13 +90,13 @@ repeat:
if (pre) {
if (xa_is_err(pre)) {
pre = ERR_PTR(xa_err(pre));
- } else if (erofs_workgroup_get(pre)) {
+ } else if (!erofs_workgroup_get(pre)) {
/* try to legitimize the current in-tree one */
xa_unlock(&sbi->managed_pslots);
cond_resched();
goto repeat;
}
- atomic_dec(&grp->refcount);
+ lockref_put_return(&grp->lockref);
grp = pre;
}
xa_unlock(&sbi->managed_pslots);
@@ -112,38 +109,34 @@ static void __erofs_workgroup_free(struct erofs_workgroup *grp)
erofs_workgroup_free_rcu(grp);
}
-int erofs_workgroup_put(struct erofs_workgroup *grp)
+void erofs_workgroup_put(struct erofs_workgroup *grp)
{
- int count = atomic_dec_return(&grp->refcount);
+ if (lockref_put_or_lock(&grp->lockref))
+ return;
- if (count == 1)
+ DBG_BUGON(__lockref_is_dead(&grp->lockref));
+ if (grp->lockref.count == 1)
atomic_long_inc(&erofs_global_shrink_cnt);
- else if (!count)
- __erofs_workgroup_free(grp);
- return count;
+ --grp->lockref.count;
+ spin_unlock(&grp->lockref.lock);
}
static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
struct erofs_workgroup *grp)
{
- /*
- * If managed cache is on, refcount of workgroups
- * themselves could be < 0 (freezed). In other words,
- * there is no guarantee that all refcounts > 0.
- */
- if (!erofs_workgroup_try_to_freeze(grp, 1))
- return false;
+ int free = false;
+
+ spin_lock(&grp->lockref.lock);
+ if (grp->lockref.count)
+ goto out;
/*
- * Note that all cached pages should be unattached
- * before deleted from the XArray. Otherwise some
- * cached pages could be still attached to the orphan
- * old workgroup when the new one is available in the tree.
+ * Note that all cached pages should be detached before deleted from
+ * the XArray. Otherwise some cached pages could be still attached to
+ * the orphan old workgroup when the new one is available in the tree.
*/
- if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
- erofs_workgroup_unfreeze(grp, 1);
- return false;
- }
+ if (erofs_try_to_free_all_cached_pages(sbi, grp))
+ goto out;
/*
* It's impossible to fail after the workgroup is freezed,
@@ -152,10 +145,13 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
*/
DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp);
- /* last refcount should be connected with its managed pslot. */
- erofs_workgroup_unfreeze(grp, 0);
- __erofs_workgroup_free(grp);
- return true;
+ lockref_mark_dead(&grp->lockref);
+ free = true;
+out:
+ spin_unlock(&grp->lockref.lock);
+ if (free)
+ __erofs_workgroup_free(grp);
+ return free;
}
static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
index cd80499351e0..40178b6e0688 100644
--- a/fs/erofs/xattr.c
+++ b/fs/erofs/xattr.c
@@ -7,32 +7,27 @@
#include <linux/security.h>
#include "xattr.h"
-static inline erofs_blk_t erofs_xattr_blkaddr(struct super_block *sb,
- unsigned int xattr_id)
-{
- return EROFS_SB(sb)->xattr_blkaddr +
- erofs_blknr(sb, xattr_id * sizeof(__u32));
-}
-
-static inline unsigned int erofs_xattr_blkoff(struct super_block *sb,
- unsigned int xattr_id)
-{
- return erofs_blkoff(sb, xattr_id * sizeof(__u32));
-}
-
-struct xattr_iter {
+struct erofs_xattr_iter {
struct super_block *sb;
struct erofs_buf buf;
+ erofs_off_t pos;
void *kaddr;
- erofs_blk_t blkaddr;
- unsigned int ofs;
+ char *buffer;
+ int buffer_size, buffer_ofs;
+
+ /* getxattr */
+ int index, infix_len;
+ struct qstr name;
+
+ /* listxattr */
+ struct dentry *dentry;
};
static int erofs_init_inode_xattrs(struct inode *inode)
{
struct erofs_inode *const vi = EROFS_I(inode);
- struct xattr_iter it;
+ struct erofs_xattr_iter it;
unsigned int i;
struct erofs_xattr_ibody_header *ih;
struct super_block *sb = inode->i_sb;
@@ -81,17 +76,17 @@ static int erofs_init_inode_xattrs(struct inode *inode)
}
it.buf = __EROFS_BUF_INITIALIZER;
- it.blkaddr = erofs_blknr(sb, erofs_iloc(inode) + vi->inode_isize);
- it.ofs = erofs_blkoff(sb, erofs_iloc(inode) + vi->inode_isize);
+ erofs_init_metabuf(&it.buf, sb);
+ it.pos = erofs_iloc(inode) + vi->inode_isize;
/* read in shared xattr array (non-atomic, see kmalloc below) */
- it.kaddr = erofs_read_metabuf(&it.buf, sb, it.blkaddr, EROFS_KMAP);
+ it.kaddr = erofs_bread(&it.buf, erofs_blknr(sb, it.pos), EROFS_KMAP);
if (IS_ERR(it.kaddr)) {
ret = PTR_ERR(it.kaddr);
goto out_unlock;
}
- ih = (struct erofs_xattr_ibody_header *)(it.kaddr + it.ofs);
+ ih = it.kaddr + erofs_blkoff(sb, it.pos);
vi->xattr_shared_count = ih->h_shared_count;
vi->xattr_shared_xattrs = kmalloc_array(vi->xattr_shared_count,
sizeof(uint), GFP_KERNEL);
@@ -102,26 +97,20 @@ static int erofs_init_inode_xattrs(struct inode *inode)
}
/* let's skip ibody header */
- it.ofs += sizeof(struct erofs_xattr_ibody_header);
+ it.pos += sizeof(struct erofs_xattr_ibody_header);
for (i = 0; i < vi->xattr_shared_count; ++i) {
- if (it.ofs >= sb->s_blocksize) {
- /* cannot be unaligned */
- DBG_BUGON(it.ofs != sb->s_blocksize);
-
- it.kaddr = erofs_read_metabuf(&it.buf, sb, ++it.blkaddr,
- EROFS_KMAP);
- if (IS_ERR(it.kaddr)) {
- kfree(vi->xattr_shared_xattrs);
- vi->xattr_shared_xattrs = NULL;
- ret = PTR_ERR(it.kaddr);
- goto out_unlock;
- }
- it.ofs = 0;
+ it.kaddr = erofs_bread(&it.buf, erofs_blknr(sb, it.pos),
+ EROFS_KMAP);
+ if (IS_ERR(it.kaddr)) {
+ kfree(vi->xattr_shared_xattrs);
+ vi->xattr_shared_xattrs = NULL;
+ ret = PTR_ERR(it.kaddr);
+ goto out_unlock;
}
- vi->xattr_shared_xattrs[i] =
- le32_to_cpu(*(__le32 *)(it.kaddr + it.ofs));
- it.ofs += sizeof(__le32);
+ vi->xattr_shared_xattrs[i] = le32_to_cpu(*(__le32 *)
+ (it.kaddr + erofs_blkoff(sb, it.pos)));
+ it.pos += sizeof(__le32);
}
erofs_put_metabuf(&it.buf);
@@ -134,287 +123,6 @@ out_unlock:
return ret;
}
-/*
- * the general idea for these return values is
- * if 0 is returned, go on processing the current xattr;
- * 1 (> 0) is returned, skip this round to process the next xattr;
- * -err (< 0) is returned, an error (maybe ENOXATTR) occurred
- * and need to be handled
- */
-struct xattr_iter_handlers {
- int (*entry)(struct xattr_iter *_it, struct erofs_xattr_entry *entry);
- int (*name)(struct xattr_iter *_it, unsigned int processed, char *buf,
- unsigned int len);
- int (*alloc_buffer)(struct xattr_iter *_it, unsigned int value_sz);
- void (*value)(struct xattr_iter *_it, unsigned int processed, char *buf,
- unsigned int len);
-};
-
-static inline int xattr_iter_fixup(struct xattr_iter *it)
-{
- if (it->ofs < it->sb->s_blocksize)
- return 0;
-
- it->blkaddr += erofs_blknr(it->sb, it->ofs);
- it->kaddr = erofs_read_metabuf(&it->buf, it->sb, it->blkaddr,
- EROFS_KMAP);
- if (IS_ERR(it->kaddr))
- return PTR_ERR(it->kaddr);
- it->ofs = erofs_blkoff(it->sb, it->ofs);
- return 0;
-}
-
-static int inline_xattr_iter_begin(struct xattr_iter *it,
- struct inode *inode)
-{
- struct erofs_inode *const vi = EROFS_I(inode);
- unsigned int xattr_header_sz, inline_xattr_ofs;
-
- xattr_header_sz = sizeof(struct erofs_xattr_ibody_header) +
- sizeof(u32) * vi->xattr_shared_count;
- if (xattr_header_sz >= vi->xattr_isize) {
- DBG_BUGON(xattr_header_sz > vi->xattr_isize);
- return -ENOATTR;
- }
-
- inline_xattr_ofs = vi->inode_isize + xattr_header_sz;
-
- it->blkaddr = erofs_blknr(it->sb, erofs_iloc(inode) + inline_xattr_ofs);
- it->ofs = erofs_blkoff(it->sb, erofs_iloc(inode) + inline_xattr_ofs);
- it->kaddr = erofs_read_metabuf(&it->buf, inode->i_sb, it->blkaddr,
- EROFS_KMAP);
- if (IS_ERR(it->kaddr))
- return PTR_ERR(it->kaddr);
- return vi->xattr_isize - xattr_header_sz;
-}
-
-/*
- * Regardless of success or failure, `xattr_foreach' will end up with
- * `ofs' pointing to the next xattr item rather than an arbitrary position.
- */
-static int xattr_foreach(struct xattr_iter *it,
- const struct xattr_iter_handlers *op,
- unsigned int *tlimit)
-{
- struct erofs_xattr_entry entry;
- unsigned int value_sz, processed, slice;
- int err;
-
- /* 0. fixup blkaddr, ofs, ipage */
- err = xattr_iter_fixup(it);
- if (err)
- return err;
-
- /*
- * 1. read xattr entry to the memory,
- * since we do EROFS_XATTR_ALIGN
- * therefore entry should be in the page
- */
- entry = *(struct erofs_xattr_entry *)(it->kaddr + it->ofs);
- if (tlimit) {
- unsigned int entry_sz = erofs_xattr_entry_size(&entry);
-
- /* xattr on-disk corruption: xattr entry beyond xattr_isize */
- if (*tlimit < entry_sz) {
- DBG_BUGON(1);
- return -EFSCORRUPTED;
- }
- *tlimit -= entry_sz;
- }
-
- it->ofs += sizeof(struct erofs_xattr_entry);
- value_sz = le16_to_cpu(entry.e_value_size);
-
- /* handle entry */
- err = op->entry(it, &entry);
- if (err) {
- it->ofs += entry.e_name_len + value_sz;
- goto out;
- }
-
- /* 2. handle xattr name (ofs will finally be at the end of name) */
- processed = 0;
-
- while (processed < entry.e_name_len) {
- if (it->ofs >= it->sb->s_blocksize) {
- DBG_BUGON(it->ofs > it->sb->s_blocksize);
-
- err = xattr_iter_fixup(it);
- if (err)
- goto out;
- it->ofs = 0;
- }
-
- slice = min_t(unsigned int, it->sb->s_blocksize - it->ofs,
- entry.e_name_len - processed);
-
- /* handle name */
- err = op->name(it, processed, it->kaddr + it->ofs, slice);
- if (err) {
- it->ofs += entry.e_name_len - processed + value_sz;
- goto out;
- }
-
- it->ofs += slice;
- processed += slice;
- }
-
- /* 3. handle xattr value */
- processed = 0;
-
- if (op->alloc_buffer) {
- err = op->alloc_buffer(it, value_sz);
- if (err) {
- it->ofs += value_sz;
- goto out;
- }
- }
-
- while (processed < value_sz) {
- if (it->ofs >= it->sb->s_blocksize) {
- DBG_BUGON(it->ofs > it->sb->s_blocksize);
-
- err = xattr_iter_fixup(it);
- if (err)
- goto out;
- it->ofs = 0;
- }
-
- slice = min_t(unsigned int, it->sb->s_blocksize - it->ofs,
- value_sz - processed);
- op->value(it, processed, it->kaddr + it->ofs, slice);
- it->ofs += slice;
- processed += slice;
- }
-
-out:
- /* xattrs should be 4-byte aligned (on-disk constraint) */
- it->ofs = EROFS_XATTR_ALIGN(it->ofs);
- return err < 0 ? err : 0;
-}
-
-struct getxattr_iter {
- struct xattr_iter it;
-
- char *buffer;
- int buffer_size, index, infix_len;
- struct qstr name;
-};
-
-static int erofs_xattr_long_entrymatch(struct getxattr_iter *it,
- struct erofs_xattr_entry *entry)
-{
- struct erofs_sb_info *sbi = EROFS_SB(it->it.sb);
- struct erofs_xattr_prefix_item *pf = sbi->xattr_prefixes +
- (entry->e_name_index & EROFS_XATTR_LONG_PREFIX_MASK);
-
- if (pf >= sbi->xattr_prefixes + sbi->xattr_prefix_count)
- return -ENOATTR;
-
- if (it->index != pf->prefix->base_index ||
- it->name.len != entry->e_name_len + pf->infix_len)
- return -ENOATTR;
-
- if (memcmp(it->name.name, pf->prefix->infix, pf->infix_len))
- return -ENOATTR;
-
- it->infix_len = pf->infix_len;
- return 0;
-}
-
-static int xattr_entrymatch(struct xattr_iter *_it,
- struct erofs_xattr_entry *entry)
-{
- struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
-
- /* should also match the infix for long name prefixes */
- if (entry->e_name_index & EROFS_XATTR_LONG_PREFIX)
- return erofs_xattr_long_entrymatch(it, entry);
-
- if (it->index != entry->e_name_index ||
- it->name.len != entry->e_name_len)
- return -ENOATTR;
- it->infix_len = 0;
- return 0;
-}
-
-static int xattr_namematch(struct xattr_iter *_it,
- unsigned int processed, char *buf, unsigned int len)
-{
- struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
-
- if (memcmp(buf, it->name.name + it->infix_len + processed, len))
- return -ENOATTR;
- return 0;
-}
-
-static int xattr_checkbuffer(struct xattr_iter *_it,
- unsigned int value_sz)
-{
- struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
- int err = it->buffer_size < value_sz ? -ERANGE : 0;
-
- it->buffer_size = value_sz;
- return !it->buffer ? 1 : err;
-}
-
-static void xattr_copyvalue(struct xattr_iter *_it,
- unsigned int processed,
- char *buf, unsigned int len)
-{
- struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
-
- memcpy(it->buffer + processed, buf, len);
-}
-
-static const struct xattr_iter_handlers find_xattr_handlers = {
- .entry = xattr_entrymatch,
- .name = xattr_namematch,
- .alloc_buffer = xattr_checkbuffer,
- .value = xattr_copyvalue
-};
-
-static int inline_getxattr(struct inode *inode, struct getxattr_iter *it)
-{
- int ret;
- unsigned int remaining;
-
- ret = inline_xattr_iter_begin(&it->it, inode);
- if (ret < 0)
- return ret;
-
- remaining = ret;
- while (remaining) {
- ret = xattr_foreach(&it->it, &find_xattr_handlers, &remaining);
- if (ret != -ENOATTR)
- break;
- }
- return ret ? ret : it->buffer_size;
-}
-
-static int shared_getxattr(struct inode *inode, struct getxattr_iter *it)
-{
- struct erofs_inode *const vi = EROFS_I(inode);
- struct super_block *const sb = it->it.sb;
- unsigned int i, xsid;
- int ret = -ENOATTR;
-
- for (i = 0; i < vi->xattr_shared_count; ++i) {
- xsid = vi->xattr_shared_xattrs[i];
- it->it.blkaddr = erofs_xattr_blkaddr(sb, xsid);
- it->it.ofs = erofs_xattr_blkoff(sb, xsid);
- it->it.kaddr = erofs_read_metabuf(&it->it.buf, sb,
- it->it.blkaddr, EROFS_KMAP);
- if (IS_ERR(it->it.kaddr))
- return PTR_ERR(it->it.kaddr);
-
- ret = xattr_foreach(&it->it, &find_xattr_handlers, NULL);
- if (ret != -ENOATTR)
- break;
- }
- return ret ? ret : it->buffer_size;
-}
-
static bool erofs_xattr_user_list(struct dentry *dentry)
{
return test_opt(&EROFS_SB(dentry->d_sb)->opt, XATTR_USER);
@@ -425,39 +133,6 @@ static bool erofs_xattr_trusted_list(struct dentry *dentry)
return capable(CAP_SYS_ADMIN);
}
-int erofs_getxattr(struct inode *inode, int index,
- const char *name,
- void *buffer, size_t buffer_size)
-{
- int ret;
- struct getxattr_iter it;
-
- if (!name)
- return -EINVAL;
-
- ret = erofs_init_inode_xattrs(inode);
- if (ret)
- return ret;
-
- it.index = index;
- it.name.len = strlen(name);
- if (it.name.len > EROFS_NAME_LEN)
- return -ERANGE;
-
- it.it.buf = __EROFS_BUF_INITIALIZER;
- it.name.name = name;
-
- it.buffer = buffer;
- it.buffer_size = buffer_size;
-
- it.it.sb = inode->i_sb;
- ret = inline_getxattr(inode, &it);
- if (ret == -ENOATTR)
- ret = shared_getxattr(inode, &it);
- erofs_put_metabuf(&it.it.buf);
- return ret;
-}
-
static int erofs_xattr_generic_get(const struct xattr_handler *handler,
struct dentry *unused, struct inode *inode,
const char *name, void *buffer, size_t size)
@@ -500,30 +175,49 @@ const struct xattr_handler *erofs_xattr_handlers[] = {
NULL,
};
-struct listxattr_iter {
- struct xattr_iter it;
-
- struct dentry *dentry;
- char *buffer;
- int buffer_size, buffer_ofs;
-};
+static int erofs_xattr_copy_to_buffer(struct erofs_xattr_iter *it,
+ unsigned int len)
+{
+ unsigned int slice, processed;
+ struct super_block *sb = it->sb;
+ void *src;
+
+ for (processed = 0; processed < len; processed += slice) {
+ it->kaddr = erofs_bread(&it->buf, erofs_blknr(sb, it->pos),
+ EROFS_KMAP);
+ if (IS_ERR(it->kaddr))
+ return PTR_ERR(it->kaddr);
+
+ src = it->kaddr + erofs_blkoff(sb, it->pos);
+ slice = min_t(unsigned int, sb->s_blocksize -
+ erofs_blkoff(sb, it->pos), len - processed);
+ memcpy(it->buffer + it->buffer_ofs, src, slice);
+ it->buffer_ofs += slice;
+ it->pos += slice;
+ }
+ return 0;
+}
-static int xattr_entrylist(struct xattr_iter *_it,
- struct erofs_xattr_entry *entry)
+static int erofs_listxattr_foreach(struct erofs_xattr_iter *it)
{
- struct listxattr_iter *it =
- container_of(_it, struct listxattr_iter, it);
- unsigned int base_index = entry->e_name_index;
- unsigned int prefix_len, infix_len = 0;
+ struct erofs_xattr_entry entry;
+ unsigned int base_index, name_total, prefix_len, infix_len = 0;
const char *prefix, *infix = NULL;
+ int err;
+
+ /* 1. handle xattr entry */
+ entry = *(struct erofs_xattr_entry *)
+ (it->kaddr + erofs_blkoff(it->sb, it->pos));
+ it->pos += sizeof(struct erofs_xattr_entry);
- if (entry->e_name_index & EROFS_XATTR_LONG_PREFIX) {
- struct erofs_sb_info *sbi = EROFS_SB(_it->sb);
+ base_index = entry.e_name_index;
+ if (entry.e_name_index & EROFS_XATTR_LONG_PREFIX) {
+ struct erofs_sb_info *sbi = EROFS_SB(it->sb);
struct erofs_xattr_prefix_item *pf = sbi->xattr_prefixes +
- (entry->e_name_index & EROFS_XATTR_LONG_PREFIX_MASK);
+ (entry.e_name_index & EROFS_XATTR_LONG_PREFIX_MASK);
if (pf >= sbi->xattr_prefixes + sbi->xattr_prefix_count)
- return 1;
+ return 0;
infix = pf->prefix->infix;
infix_len = pf->infix_len;
base_index = pf->prefix->base_index;
@@ -531,120 +225,228 @@ static int xattr_entrylist(struct xattr_iter *_it,
prefix = erofs_xattr_prefix(base_index, it->dentry);
if (!prefix)
- return 1;
+ return 0;
prefix_len = strlen(prefix);
+ name_total = prefix_len + infix_len + entry.e_name_len + 1;
if (!it->buffer) {
- it->buffer_ofs += prefix_len + infix_len +
- entry->e_name_len + 1;
- return 1;
+ it->buffer_ofs += name_total;
+ return 0;
}
- if (it->buffer_ofs + prefix_len + infix_len +
- + entry->e_name_len + 1 > it->buffer_size)
+ if (it->buffer_ofs + name_total > it->buffer_size)
return -ERANGE;
memcpy(it->buffer + it->buffer_ofs, prefix, prefix_len);
memcpy(it->buffer + it->buffer_ofs + prefix_len, infix, infix_len);
it->buffer_ofs += prefix_len + infix_len;
- return 0;
-}
-static int xattr_namelist(struct xattr_iter *_it,
- unsigned int processed, char *buf, unsigned int len)
-{
- struct listxattr_iter *it =
- container_of(_it, struct listxattr_iter, it);
+ /* 2. handle xattr name */
+ err = erofs_xattr_copy_to_buffer(it, entry.e_name_len);
+ if (err)
+ return err;
- memcpy(it->buffer + it->buffer_ofs, buf, len);
- it->buffer_ofs += len;
+ it->buffer[it->buffer_ofs++] = '\0';
return 0;
}
-static int xattr_skipvalue(struct xattr_iter *_it,
- unsigned int value_sz)
+static int erofs_getxattr_foreach(struct erofs_xattr_iter *it)
{
- struct listxattr_iter *it =
- container_of(_it, struct listxattr_iter, it);
+ struct super_block *sb = it->sb;
+ struct erofs_xattr_entry entry;
+ unsigned int slice, processed, value_sz;
- it->buffer[it->buffer_ofs++] = '\0';
- return 1;
-}
+ /* 1. handle xattr entry */
+ entry = *(struct erofs_xattr_entry *)
+ (it->kaddr + erofs_blkoff(sb, it->pos));
+ it->pos += sizeof(struct erofs_xattr_entry);
+ value_sz = le16_to_cpu(entry.e_value_size);
-static const struct xattr_iter_handlers list_xattr_handlers = {
- .entry = xattr_entrylist,
- .name = xattr_namelist,
- .alloc_buffer = xattr_skipvalue,
- .value = NULL
-};
+ /* should also match the infix for long name prefixes */
+ if (entry.e_name_index & EROFS_XATTR_LONG_PREFIX) {
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
+ struct erofs_xattr_prefix_item *pf = sbi->xattr_prefixes +
+ (entry.e_name_index & EROFS_XATTR_LONG_PREFIX_MASK);
+
+ if (pf >= sbi->xattr_prefixes + sbi->xattr_prefix_count)
+ return -ENOATTR;
+
+ if (it->index != pf->prefix->base_index ||
+ it->name.len != entry.e_name_len + pf->infix_len)
+ return -ENOATTR;
+
+ if (memcmp(it->name.name, pf->prefix->infix, pf->infix_len))
+ return -ENOATTR;
+
+ it->infix_len = pf->infix_len;
+ } else {
+ if (it->index != entry.e_name_index ||
+ it->name.len != entry.e_name_len)
+ return -ENOATTR;
-static int inline_listxattr(struct listxattr_iter *it)
+ it->infix_len = 0;
+ }
+
+ /* 2. handle xattr name */
+ for (processed = 0; processed < entry.e_name_len; processed += slice) {
+ it->kaddr = erofs_bread(&it->buf, erofs_blknr(sb, it->pos),
+ EROFS_KMAP);
+ if (IS_ERR(it->kaddr))
+ return PTR_ERR(it->kaddr);
+
+ slice = min_t(unsigned int,
+ sb->s_blocksize - erofs_blkoff(sb, it->pos),
+ entry.e_name_len - processed);
+ if (memcmp(it->name.name + it->infix_len + processed,
+ it->kaddr + erofs_blkoff(sb, it->pos), slice))
+ return -ENOATTR;
+ it->pos += slice;
+ }
+
+ /* 3. handle xattr value */
+ if (!it->buffer) {
+ it->buffer_ofs = value_sz;
+ return 0;
+ }
+
+ if (it->buffer_size < value_sz)
+ return -ERANGE;
+
+ return erofs_xattr_copy_to_buffer(it, value_sz);
+}
+
+static int erofs_xattr_iter_inline(struct erofs_xattr_iter *it,
+ struct inode *inode, bool getxattr)
{
+ struct erofs_inode *const vi = EROFS_I(inode);
+ unsigned int xattr_header_sz, remaining, entry_sz;
+ erofs_off_t next_pos;
int ret;
- unsigned int remaining;
- ret = inline_xattr_iter_begin(&it->it, d_inode(it->dentry));
- if (ret < 0)
- return ret;
+ xattr_header_sz = sizeof(struct erofs_xattr_ibody_header) +
+ sizeof(u32) * vi->xattr_shared_count;
+ if (xattr_header_sz >= vi->xattr_isize) {
+ DBG_BUGON(xattr_header_sz > vi->xattr_isize);
+ return -ENOATTR;
+ }
+
+ remaining = vi->xattr_isize - xattr_header_sz;
+ it->pos = erofs_iloc(inode) + vi->inode_isize + xattr_header_sz;
- remaining = ret;
while (remaining) {
- ret = xattr_foreach(&it->it, &list_xattr_handlers, &remaining);
- if (ret)
+ it->kaddr = erofs_bread(&it->buf, erofs_blknr(it->sb, it->pos),
+ EROFS_KMAP);
+ if (IS_ERR(it->kaddr))
+ return PTR_ERR(it->kaddr);
+
+ entry_sz = erofs_xattr_entry_size(it->kaddr +
+ erofs_blkoff(it->sb, it->pos));
+ /* xattr on-disk corruption: xattr entry beyond xattr_isize */
+ if (remaining < entry_sz) {
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
+ }
+ remaining -= entry_sz;
+ next_pos = it->pos + entry_sz;
+
+ if (getxattr)
+ ret = erofs_getxattr_foreach(it);
+ else
+ ret = erofs_listxattr_foreach(it);
+ if ((getxattr && ret != -ENOATTR) || (!getxattr && ret))
break;
+
+ it->pos = next_pos;
}
- return ret ? ret : it->buffer_ofs;
+ return ret;
}
-static int shared_listxattr(struct listxattr_iter *it)
+static int erofs_xattr_iter_shared(struct erofs_xattr_iter *it,
+ struct inode *inode, bool getxattr)
{
- struct inode *const inode = d_inode(it->dentry);
struct erofs_inode *const vi = EROFS_I(inode);
- struct super_block *const sb = it->it.sb;
- unsigned int i, xsid;
- int ret = 0;
+ struct super_block *const sb = it->sb;
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
+ unsigned int i;
+ int ret = -ENOATTR;
for (i = 0; i < vi->xattr_shared_count; ++i) {
- xsid = vi->xattr_shared_xattrs[i];
- it->it.blkaddr = erofs_xattr_blkaddr(sb, xsid);
- it->it.ofs = erofs_xattr_blkoff(sb, xsid);
- it->it.kaddr = erofs_read_metabuf(&it->it.buf, sb,
- it->it.blkaddr, EROFS_KMAP);
- if (IS_ERR(it->it.kaddr))
- return PTR_ERR(it->it.kaddr);
-
- ret = xattr_foreach(&it->it, &list_xattr_handlers, NULL);
- if (ret)
+ it->pos = erofs_pos(sb, sbi->xattr_blkaddr) +
+ vi->xattr_shared_xattrs[i] * sizeof(__le32);
+ it->kaddr = erofs_bread(&it->buf, erofs_blknr(sb, it->pos),
+ EROFS_KMAP);
+ if (IS_ERR(it->kaddr))
+ return PTR_ERR(it->kaddr);
+
+ if (getxattr)
+ ret = erofs_getxattr_foreach(it);
+ else
+ ret = erofs_listxattr_foreach(it);
+ if ((getxattr && ret != -ENOATTR) || (!getxattr && ret))
break;
}
- return ret ? ret : it->buffer_ofs;
+ return ret;
+}
+
+int erofs_getxattr(struct inode *inode, int index, const char *name,
+ void *buffer, size_t buffer_size)
+{
+ int ret;
+ struct erofs_xattr_iter it;
+
+ if (!name)
+ return -EINVAL;
+
+ ret = erofs_init_inode_xattrs(inode);
+ if (ret)
+ return ret;
+
+ it.index = index;
+ it.name = (struct qstr)QSTR_INIT(name, strlen(name));
+ if (it.name.len > EROFS_NAME_LEN)
+ return -ERANGE;
+
+ it.sb = inode->i_sb;
+ it.buf = __EROFS_BUF_INITIALIZER;
+ erofs_init_metabuf(&it.buf, it.sb);
+ it.buffer = buffer;
+ it.buffer_size = buffer_size;
+ it.buffer_ofs = 0;
+
+ ret = erofs_xattr_iter_inline(&it, inode, true);
+ if (ret == -ENOATTR)
+ ret = erofs_xattr_iter_shared(&it, inode, true);
+ erofs_put_metabuf(&it.buf);
+ return ret ? ret : it.buffer_ofs;
}
-ssize_t erofs_listxattr(struct dentry *dentry,
- char *buffer, size_t buffer_size)
+ssize_t erofs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
{
int ret;
- struct listxattr_iter it;
+ struct erofs_xattr_iter it;
+ struct inode *inode = d_inode(dentry);
- ret = erofs_init_inode_xattrs(d_inode(dentry));
+ ret = erofs_init_inode_xattrs(inode);
if (ret == -ENOATTR)
return 0;
if (ret)
return ret;
- it.it.buf = __EROFS_BUF_INITIALIZER;
+ it.sb = dentry->d_sb;
+ it.buf = __EROFS_BUF_INITIALIZER;
+ erofs_init_metabuf(&it.buf, it.sb);
it.dentry = dentry;
it.buffer = buffer;
it.buffer_size = buffer_size;
it.buffer_ofs = 0;
- it.it.sb = dentry->d_sb;
-
- ret = inline_listxattr(&it);
- if (ret >= 0 || ret == -ENOATTR)
- ret = shared_listxattr(&it);
- erofs_put_metabuf(&it.it.buf);
- return ret;
+ ret = erofs_xattr_iter_inline(&it, inode, false);
+ if (!ret || ret == -ENOATTR)
+ ret = erofs_xattr_iter_shared(&it, inode, false);
+ if (ret == -ENOATTR)
+ ret = 0;
+ erofs_put_metabuf(&it.buf);
+ return ret ? ret : it.buffer_ofs;
}
void erofs_xattr_prefixes_cleanup(struct super_block *sb)
@@ -675,7 +477,7 @@ int erofs_xattr_prefixes_init(struct super_block *sb)
if (!pfs)
return -ENOMEM;
- if (erofs_sb_has_fragments(sbi))
+ if (sbi->packed_inode)
buf.inode = sbi->packed_inode;
else
erofs_init_metabuf(&buf, sb);
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 45f21db2303a..5f1890e309c6 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -5,7 +5,6 @@
* Copyright (C) 2022 Alibaba Cloud
*/
#include "compress.h"
-#include <linux/prefetch.h>
#include <linux/psi.h>
#include <linux/cpuhotplug.h>
#include <trace/events/erofs.h>
@@ -92,13 +91,8 @@ struct z_erofs_pcluster {
struct z_erofs_bvec compressed_bvecs[];
};
-/* let's avoid the valid 32-bit kernel addresses */
-
-/* the chained workgroup has't submitted io (still open) */
-#define Z_EROFS_PCLUSTER_TAIL ((void *)0x5F0ECAFE)
-/* the chained workgroup has already submitted io */
-#define Z_EROFS_PCLUSTER_TAIL_CLOSED ((void *)0x5F0EDEAD)
-
+/* the end of a chain of pclusters */
+#define Z_EROFS_PCLUSTER_TAIL ((void *) 0x700 + POISON_POINTER_DELTA)
#define Z_EROFS_PCLUSTER_NIL (NULL)
struct z_erofs_decompressqueue {
@@ -241,14 +235,20 @@ static void z_erofs_bvec_iter_begin(struct z_erofs_bvec_iter *iter,
static int z_erofs_bvec_enqueue(struct z_erofs_bvec_iter *iter,
struct z_erofs_bvec *bvec,
- struct page **candidate_bvpage)
+ struct page **candidate_bvpage,
+ struct page **pagepool)
{
- if (iter->cur == iter->nr) {
- if (!*candidate_bvpage)
- return -EAGAIN;
-
+ if (iter->cur >= iter->nr) {
+ struct page *nextpage = *candidate_bvpage;
+
+ if (!nextpage) {
+ nextpage = erofs_allocpage(pagepool, GFP_NOFS);
+ if (!nextpage)
+ return -ENOMEM;
+ set_page_private(nextpage, Z_EROFS_SHORTLIVED_PAGE);
+ }
DBG_BUGON(iter->bvset->nextpage);
- iter->bvset->nextpage = *candidate_bvpage;
+ iter->bvset->nextpage = nextpage;
z_erofs_bvset_flip(iter);
iter->bvset->nextpage = NULL;
@@ -369,8 +369,6 @@ static struct kthread_worker *erofs_init_percpu_worker(int cpu)
return worker;
if (IS_ENABLED(CONFIG_EROFS_FS_PCPU_KTHREAD_HIPRI))
sched_set_fifo_low(worker->task);
- else
- sched_set_normal(worker->task, 0);
return worker;
}
@@ -502,20 +500,6 @@ out_error_pcluster_pool:
enum z_erofs_pclustermode {
Z_EROFS_PCLUSTER_INFLIGHT,
/*
- * The current pclusters was the tail of an exist chain, in addition
- * that the previous processed chained pclusters are all decided to
- * be hooked up to it.
- * A new chain will be created for the remaining pclusters which are
- * not processed yet, so different from Z_EROFS_PCLUSTER_FOLLOWED,
- * the next pcluster cannot reuse the whole page safely for inplace I/O
- * in the following scenario:
- * ________________________________________________________________
- * | tail (partial) page | head (partial) page |
- * | (belongs to the next pcl) | (belongs to the current pcl) |
- * |_______PCLUSTER_FOLLOWED______|________PCLUSTER_HOOKED__________|
- */
- Z_EROFS_PCLUSTER_HOOKED,
- /*
* a weak form of Z_EROFS_PCLUSTER_FOLLOWED, the difference is that it
* could be dispatched into bypass queue later due to uptodated managed
* pages. All related online pages cannot be reused for inplace I/O (or
@@ -532,8 +516,8 @@ enum z_erofs_pclustermode {
* ________________________________________________________________
* | tail (partial) page | head (partial) page |
* | (of the current cl) | (of the previous collection) |
- * | PCLUSTER_FOLLOWED or | |
- * |_____PCLUSTER_HOOKED__|___________PCLUSTER_FOLLOWED____________|
+ * | | |
+ * |__PCLUSTER_FOLLOWED___|___________PCLUSTER_FOLLOWED____________|
*
* [ (*) the above page can be used as inplace I/O. ]
*/
@@ -545,12 +529,12 @@ struct z_erofs_decompress_frontend {
struct erofs_map_blocks map;
struct z_erofs_bvec_iter biter;
+ struct page *pagepool;
struct page *candidate_bvpage;
- struct z_erofs_pcluster *pcl, *tailpcl;
+ struct z_erofs_pcluster *pcl;
z_erofs_next_pcluster_t owned_head;
enum z_erofs_pclustermode mode;
- bool readahead;
/* used for applying cache strategy on the fly */
bool backmost;
erofs_off_t headoffset;
@@ -580,8 +564,7 @@ static bool z_erofs_should_alloc_cache(struct z_erofs_decompress_frontend *fe)
return false;
}
-static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
- struct page **pagepool)
+static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
{
struct address_space *mc = MNGD_MAPPING(EROFS_I_SB(fe->inode));
struct z_erofs_pcluster *pcl = fe->pcl;
@@ -622,7 +605,7 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
* succeeds or fallback to in-place I/O instead
* to avoid any direct reclaim.
*/
- newpage = erofs_allocpage(pagepool, gfp);
+ newpage = erofs_allocpage(&fe->pagepool, gfp);
if (!newpage)
continue;
set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
@@ -635,7 +618,7 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
if (page)
put_page(page);
else if (newpage)
- erofs_pagepool_add(pagepool, newpage);
+ erofs_pagepool_add(&fe->pagepool, newpage);
}
/*
@@ -656,7 +639,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
/*
- * refcount of workgroup is now freezed as 1,
+ * refcount of workgroup is now freezed as 0,
* therefore no need to worry about available decompression users.
*/
for (i = 0; i < pcl->pclusterpages; ++i) {
@@ -680,29 +663,73 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
return 0;
}
-int erofs_try_to_free_cached_page(struct page *page)
+static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)
{
- struct z_erofs_pcluster *const pcl = (void *)page_private(page);
- int ret, i;
+ struct z_erofs_pcluster *pcl = folio_get_private(folio);
+ bool ret;
+ int i;
- if (!erofs_workgroup_try_to_freeze(&pcl->obj, 1))
- return 0;
+ if (!folio_test_private(folio))
+ return true;
+
+ ret = false;
+ spin_lock(&pcl->obj.lockref.lock);
+ if (pcl->obj.lockref.count > 0)
+ goto out;
- ret = 0;
DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
for (i = 0; i < pcl->pclusterpages; ++i) {
- if (pcl->compressed_bvecs[i].page == page) {
+ if (pcl->compressed_bvecs[i].page == &folio->page) {
WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
- ret = 1;
+ ret = true;
break;
}
}
- erofs_workgroup_unfreeze(&pcl->obj, 1);
if (ret)
- detach_page_private(page);
+ folio_detach_private(folio);
+out:
+ spin_unlock(&pcl->obj.lockref.lock);
return ret;
}
+/*
+ * It will be called only on inode eviction. In case that there are still some
+ * decompression requests in progress, wait with rescheduling for a bit here.
+ * An extra lock could be introduced instead but it seems unnecessary.
+ */
+static void z_erofs_cache_invalidate_folio(struct folio *folio,
+ size_t offset, size_t length)
+{
+ const size_t stop = length + offset;
+
+ /* Check for potential overflow in debug mode */
+ DBG_BUGON(stop > folio_size(folio) || stop < length);
+
+ if (offset == 0 && stop == folio_size(folio))
+ while (!z_erofs_cache_release_folio(folio, GFP_NOFS))
+ cond_resched();
+}
+
+static const struct address_space_operations z_erofs_cache_aops = {
+ .release_folio = z_erofs_cache_release_folio,
+ .invalidate_folio = z_erofs_cache_invalidate_folio,
+};
+
+int erofs_init_managed_cache(struct super_block *sb)
+{
+ struct inode *const inode = new_inode(sb);
+
+ if (!inode)
+ return -ENOMEM;
+
+ set_nlink(inode, 1);
+ inode->i_size = OFFSET_MAX;
+ inode->i_mapping->a_ops = &z_erofs_cache_aops;
+ mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
+ EROFS_SB(sb)->managed_cache = inode;
+ return 0;
+}
+
static bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe,
struct z_erofs_bvec *bvec)
{
@@ -733,7 +760,8 @@ static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe,
!fe->candidate_bvpage)
fe->candidate_bvpage = bvec->page;
}
- ret = z_erofs_bvec_enqueue(&fe->biter, bvec, &fe->candidate_bvpage);
+ ret = z_erofs_bvec_enqueue(&fe->biter, bvec, &fe->candidate_bvpage,
+ &fe->pagepool);
fe->pcl->vcnt += (ret >= 0);
return ret;
}
@@ -752,19 +780,7 @@ static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f)
return;
}
- /*
- * type 2, link to the end of an existing open chain, be careful
- * that its submission is controlled by the original attached chain.
- */
- if (*owned_head != &pcl->next && pcl != f->tailpcl &&
- cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
- *owned_head) == Z_EROFS_PCLUSTER_TAIL) {
- *owned_head = Z_EROFS_PCLUSTER_TAIL;
- f->mode = Z_EROFS_PCLUSTER_HOOKED;
- f->tailpcl = NULL;
- return;
- }
- /* type 3, it belongs to a chain, but it isn't the end of the chain */
+ /* type 2, it belongs to an ongoing chain */
f->mode = Z_EROFS_PCLUSTER_INFLIGHT;
}
@@ -788,7 +804,7 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
if (IS_ERR(pcl))
return PTR_ERR(pcl);
- atomic_set(&pcl->obj.refcount, 1);
+ spin_lock_init(&pcl->obj.lockref.lock);
pcl->algorithmformat = map->m_algorithmformat;
pcl->length = 0;
pcl->partial = true;
@@ -825,9 +841,6 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
goto err_out;
}
}
- /* used to check tail merging loop due to corrupted images */
- if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL)
- fe->tailpcl = pcl;
fe->owned_head = &pcl->next;
fe->pcl = pcl;
return 0;
@@ -848,7 +861,6 @@ static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe)
/* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous pcluster */
DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL);
- DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
if (!(map->m_flags & EROFS_MAP_META)) {
grp = erofs_find_workgroup(fe->inode->i_sb,
@@ -867,10 +879,6 @@ static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe)
if (ret == -EEXIST) {
mutex_lock(&fe->pcl->lock);
- /* used to check tail merging loop due to corrupted images */
- if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL)
- fe->tailpcl = fe->pcl;
-
z_erofs_try_to_claim_pcluster(fe);
} else if (ret) {
return ret;
@@ -910,10 +918,8 @@ static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe)
z_erofs_bvec_iter_end(&fe->biter);
mutex_unlock(&pcl->lock);
- if (fe->candidate_bvpage) {
- DBG_BUGON(z_erofs_is_shortlived_page(fe->candidate_bvpage));
+ if (fe->candidate_bvpage)
fe->candidate_bvpage = NULL;
- }
/*
* if all pending pages are added, don't hold its reference
@@ -960,7 +966,7 @@ static int z_erofs_read_fragment(struct inode *inode, erofs_off_t pos,
}
static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
- struct page *page, struct page **pagepool)
+ struct page *page)
{
struct inode *const inode = fe->inode;
struct erofs_map_blocks *const map = &fe->map;
@@ -1018,7 +1024,7 @@ repeat:
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
} else {
/* bind cache first when cached decompression is preferred */
- z_erofs_bind_cache(fe, pagepool);
+ z_erofs_bind_cache(fe);
}
hitted:
/*
@@ -1027,8 +1033,7 @@ hitted:
* those chains are handled asynchronously thus the page cannot be used
* for inplace I/O or bvpage (should be processed in a strict order.)
*/
- tight &= (fe->mode >= Z_EROFS_PCLUSTER_HOOKED &&
- fe->mode != Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE);
+ tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE);
cur = end - min_t(unsigned int, offset + end - map->m_la, end);
if (!(map->m_flags & EROFS_MAP_MAPPED)) {
@@ -1058,24 +1063,13 @@ hitted:
if (cur)
tight &= (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED);
-retry:
err = z_erofs_attach_page(fe, &((struct z_erofs_bvec) {
.page = page,
.offset = offset - map->m_la,
.end = end,
}), exclusive);
- /* should allocate an additional short-lived page for bvset */
- if (err == -EAGAIN && !fe->candidate_bvpage) {
- fe->candidate_bvpage = alloc_page(GFP_NOFS | __GFP_NOFAIL);
- set_page_private(fe->candidate_bvpage,
- Z_EROFS_SHORTLIVED_PAGE);
- goto retry;
- }
-
- if (err) {
- DBG_BUGON(err == -EAGAIN && fe->candidate_bvpage);
+ if (err)
goto out;
- }
z_erofs_onlinepage_split(page);
/* bump up the number of spiltted parts of a page */
@@ -1106,7 +1100,7 @@ out:
return err;
}
-static bool z_erofs_get_sync_decompress_policy(struct erofs_sb_info *sbi,
+static bool z_erofs_is_sync_decompress(struct erofs_sb_info *sbi,
unsigned int readahead_pages)
{
/* auto: enable for read_folio, disable for readahead */
@@ -1285,6 +1279,8 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
struct erofs_sb_info *const sbi = EROFS_SB(be->sb);
struct z_erofs_pcluster *pcl = be->pcl;
unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
+ const struct z_erofs_decompressor *decompressor =
+ &erofs_decompressors[pcl->algorithmformat];
unsigned int i, inputsize;
int err2;
struct page *page;
@@ -1328,7 +1324,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
else
inputsize = pclusterpages * PAGE_SIZE;
- err = z_erofs_decompress(&(struct z_erofs_decompress_req) {
+ err = decompressor->decompress(&(struct z_erofs_decompress_req) {
.sb = be->sb,
.in = be->compressed_pages,
.out = be->decompressed_pages,
@@ -1406,10 +1402,7 @@ static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
};
z_erofs_next_pcluster_t owned = io->head;
- while (owned != Z_EROFS_PCLUSTER_TAIL_CLOSED) {
- /* impossible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */
- DBG_BUGON(owned == Z_EROFS_PCLUSTER_TAIL);
- /* impossible that 'owned' equals Z_EROFS_PCLUSTER_NIL */
+ while (owned != Z_EROFS_PCLUSTER_TAIL) {
DBG_BUGON(owned == Z_EROFS_PCLUSTER_NIL);
be.pcl = container_of(owned, struct z_erofs_pcluster, next);
@@ -1426,7 +1419,7 @@ static void z_erofs_decompressqueue_work(struct work_struct *work)
container_of(work, struct z_erofs_decompressqueue, u.work);
struct page *pagepool = NULL;
- DBG_BUGON(bgq->head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
+ DBG_BUGON(bgq->head == Z_EROFS_PCLUSTER_TAIL);
z_erofs_decompress_queue(bgq, &pagepool);
erofs_release_pages(&pagepool);
kvfree(bgq);
@@ -1454,7 +1447,7 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
if (atomic_add_return(bios, &io->pending_bios))
return;
/* Use (kthread_)work and sync decompression for atomic contexts only */
- if (in_atomic() || irqs_disabled()) {
+ if (!in_task() || irqs_disabled() || rcu_read_lock_any_held()) {
#ifdef CONFIG_EROFS_FS_PCPU_KTHREAD
struct kthread_worker *worker;
@@ -1614,7 +1607,7 @@ fg_out:
q->sync = true;
}
q->sb = sb;
- q->head = Z_EROFS_PCLUSTER_TAIL_CLOSED;
+ q->head = Z_EROFS_PCLUSTER_TAIL;
return q;
}
@@ -1632,11 +1625,7 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
z_erofs_next_pcluster_t *const submit_qtail = qtail[JQ_SUBMIT];
z_erofs_next_pcluster_t *const bypass_qtail = qtail[JQ_BYPASS];
- DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
- if (owned_head == Z_EROFS_PCLUSTER_TAIL)
- owned_head = Z_EROFS_PCLUSTER_TAIL_CLOSED;
-
- WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_TAIL_CLOSED);
+ WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_TAIL);
WRITE_ONCE(*submit_qtail, owned_head);
WRITE_ONCE(*bypass_qtail, &pcl->next);
@@ -1670,9 +1659,8 @@ static void z_erofs_decompressqueue_endio(struct bio *bio)
}
static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
- struct page **pagepool,
struct z_erofs_decompressqueue *fgq,
- bool *force_fg)
+ bool *force_fg, bool readahead)
{
struct super_block *sb = f->inode->i_sb;
struct address_space *mc = MNGD_MAPPING(EROFS_SB(sb));
@@ -1707,15 +1695,10 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
unsigned int i = 0;
bool bypass = true;
- /* no possible 'owned_head' equals the following */
- DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_NIL);
-
pcl = container_of(owned_head, struct z_erofs_pcluster, next);
+ owned_head = READ_ONCE(pcl->next);
- /* close the main owned chain at first */
- owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
- Z_EROFS_PCLUSTER_TAIL_CLOSED);
if (z_erofs_is_inline_pcluster(pcl)) {
move_to_bypass_jobqueue(pcl, qtail, owned_head);
continue;
@@ -1733,8 +1716,8 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
do {
struct page *page;
- page = pickup_page_for_submission(pcl, i++, pagepool,
- mc);
+ page = pickup_page_for_submission(pcl, i++,
+ &f->pagepool, mc);
if (!page)
continue;
@@ -1763,7 +1746,7 @@ submit_bio_retry:
bio->bi_iter.bi_sector = (sector_t)cur <<
(sb->s_blocksize_bits - 9);
bio->bi_private = q[JQ_SUBMIT];
- if (f->readahead)
+ if (readahead)
bio->bi_opf |= REQ_RAHEAD;
++nr_bios;
}
@@ -1799,16 +1782,16 @@ submit_bio_retry:
}
static void z_erofs_runqueue(struct z_erofs_decompress_frontend *f,
- struct page **pagepool, bool force_fg)
+ bool force_fg, bool ra)
{
struct z_erofs_decompressqueue io[NR_JOBQUEUES];
if (f->owned_head == Z_EROFS_PCLUSTER_TAIL)
return;
- z_erofs_submit_queue(f, pagepool, io, &force_fg);
+ z_erofs_submit_queue(f, io, &force_fg, ra);
/* handle bypass queue (no i/o pclusters) immediately */
- z_erofs_decompress_queue(&io[JQ_BYPASS], pagepool);
+ z_erofs_decompress_queue(&io[JQ_BYPASS], &f->pagepool);
if (!force_fg)
return;
@@ -1817,7 +1800,7 @@ static void z_erofs_runqueue(struct z_erofs_decompress_frontend *f,
wait_for_completion_io(&io[JQ_SUBMIT].u.done);
/* handle synchronous decompress queue in the caller context */
- z_erofs_decompress_queue(&io[JQ_SUBMIT], pagepool);
+ z_erofs_decompress_queue(&io[JQ_SUBMIT], &f->pagepool);
}
/*
@@ -1825,29 +1808,28 @@ static void z_erofs_runqueue(struct z_erofs_decompress_frontend *f,
* approximate readmore strategies as a start.
*/
static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
- struct readahead_control *rac,
- erofs_off_t end,
- struct page **pagepool,
- bool backmost)
+ struct readahead_control *rac, bool backmost)
{
struct inode *inode = f->inode;
struct erofs_map_blocks *map = &f->map;
- erofs_off_t cur;
+ erofs_off_t cur, end, headoffset = f->headoffset;
int err;
if (backmost) {
+ if (rac)
+ end = headoffset + readahead_length(rac) - 1;
+ else
+ end = headoffset + PAGE_SIZE - 1;
map->m_la = end;
err = z_erofs_map_blocks_iter(inode, map,
EROFS_GET_BLOCKS_READMORE);
if (err)
return;
- /* expend ra for the trailing edge if readahead */
+ /* expand ra for the trailing edge if readahead */
if (rac) {
- loff_t newstart = readahead_pos(rac);
-
cur = round_up(map->m_la + map->m_llen, PAGE_SIZE);
- readahead_expand(rac, newstart, cur - newstart);
+ readahead_expand(rac, headoffset, cur - headoffset);
return;
}
end = round_up(end, PAGE_SIZE);
@@ -1868,7 +1850,7 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
if (PageUptodate(page)) {
unlock_page(page);
} else {
- err = z_erofs_do_read_page(f, page, pagepool);
+ err = z_erofs_do_read_page(f, page);
if (err)
erofs_err(inode->i_sb,
"readmore error at page %lu @ nid %llu",
@@ -1889,28 +1871,24 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio)
struct inode *const inode = page->mapping->host;
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
- struct page *pagepool = NULL;
int err;
trace_erofs_readpage(page, false);
f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
- z_erofs_pcluster_readmore(&f, NULL, f.headoffset + PAGE_SIZE - 1,
- &pagepool, true);
- err = z_erofs_do_read_page(&f, page, &pagepool);
- z_erofs_pcluster_readmore(&f, NULL, 0, &pagepool, false);
-
+ z_erofs_pcluster_readmore(&f, NULL, true);
+ err = z_erofs_do_read_page(&f, page);
+ z_erofs_pcluster_readmore(&f, NULL, false);
(void)z_erofs_collector_end(&f);
/* if some compressed cluster ready, need submit them anyway */
- z_erofs_runqueue(&f, &pagepool,
- z_erofs_get_sync_decompress_policy(sbi, 0));
+ z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, 0), false);
if (err)
erofs_err(inode->i_sb, "failed to read, err [%d]", err);
erofs_put_metabuf(&f.map.buf);
- erofs_release_pages(&pagepool);
+ erofs_release_pages(&f.pagepool);
return err;
}
@@ -1919,14 +1897,12 @@ static void z_erofs_readahead(struct readahead_control *rac)
struct inode *const inode = rac->mapping->host;
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
- struct page *pagepool = NULL, *head = NULL, *page;
+ struct page *head = NULL, *page;
unsigned int nr_pages;
- f.readahead = true;
f.headoffset = readahead_pos(rac);
- z_erofs_pcluster_readmore(&f, rac, f.headoffset +
- readahead_length(rac) - 1, &pagepool, true);
+ z_erofs_pcluster_readmore(&f, rac, true);
nr_pages = readahead_count(rac);
trace_erofs_readpages(inode, readahead_index(rac), nr_pages, false);
@@ -1942,20 +1918,19 @@ static void z_erofs_readahead(struct readahead_control *rac)
/* traversal in reverse order */
head = (void *)page_private(page);
- err = z_erofs_do_read_page(&f, page, &pagepool);
+ err = z_erofs_do_read_page(&f, page);
if (err)
erofs_err(inode->i_sb,
"readahead error at page %lu @ nid %llu",
page->index, EROFS_I(inode)->nid);
put_page(page);
}
- z_erofs_pcluster_readmore(&f, rac, 0, &pagepool, false);
+ z_erofs_pcluster_readmore(&f, rac, false);
(void)z_erofs_collector_end(&f);
- z_erofs_runqueue(&f, &pagepool,
- z_erofs_get_sync_decompress_policy(sbi, nr_pages));
+ z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, nr_pages), true);
erofs_put_metabuf(&f.map.buf);
- erofs_release_pages(&pagepool);
+ erofs_release_pages(&f.pagepool);
}
const struct address_space_operations z_erofs_aops = {
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index d37c5c89c728..1909ddafd9c7 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -22,8 +22,8 @@ struct z_erofs_maprecorder {
bool partialref;
};
-static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
- unsigned long lcn)
+static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m,
+ unsigned long lcn)
{
struct inode *const inode = m->inode;
struct erofs_inode *const vi = EROFS_I(inode);
@@ -129,7 +129,7 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
u8 *in, type;
bool big_pcluster;
- if (1 << amortizedshift == 4)
+ if (1 << amortizedshift == 4 && lclusterbits <= 14)
vcnt = 2;
else if (1 << amortizedshift == 2 && lclusterbits == 12)
vcnt = 16;
@@ -226,12 +226,11 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
return 0;
}
-static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
- unsigned long lcn, bool lookahead)
+static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m,
+ unsigned long lcn, bool lookahead)
{
struct inode *const inode = m->inode;
struct erofs_inode *const vi = EROFS_I(inode);
- const unsigned int lclusterbits = vi->z_logical_clusterbits;
const erofs_off_t ebase = sizeof(struct z_erofs_map_header) +
ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8);
unsigned int totalidx = erofs_iblks(inode);
@@ -239,9 +238,6 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
unsigned int amortizedshift;
erofs_off_t pos;
- if (lclusterbits != 12)
- return -EOPNOTSUPP;
-
if (lcn >= totalidx)
return -EINVAL;
@@ -281,23 +277,23 @@ out:
return unpack_compacted_index(m, amortizedshift, pos, lookahead);
}
-static int z_erofs_load_cluster_from_disk(struct z_erofs_maprecorder *m,
- unsigned int lcn, bool lookahead)
+static int z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder *m,
+ unsigned int lcn, bool lookahead)
{
- const unsigned int datamode = EROFS_I(m->inode)->datalayout;
-
- if (datamode == EROFS_INODE_COMPRESSED_FULL)
- return legacy_load_cluster_from_disk(m, lcn);
-
- if (datamode == EROFS_INODE_COMPRESSED_COMPACT)
- return compacted_load_cluster_from_disk(m, lcn, lookahead);
-
- return -EINVAL;
+ switch (EROFS_I(m->inode)->datalayout) {
+ case EROFS_INODE_COMPRESSED_FULL:
+ return z_erofs_load_full_lcluster(m, lcn);
+ case EROFS_INODE_COMPRESSED_COMPACT:
+ return z_erofs_load_compact_lcluster(m, lcn, lookahead);
+ default:
+ return -EINVAL;
+ }
}
static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
unsigned int lookback_distance)
{
+ struct super_block *sb = m->inode->i_sb;
struct erofs_inode *const vi = EROFS_I(m->inode);
const unsigned int lclusterbits = vi->z_logical_clusterbits;
@@ -305,21 +301,15 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
unsigned long lcn = m->lcn - lookback_distance;
int err;
- /* load extent head logical cluster if needed */
- err = z_erofs_load_cluster_from_disk(m, lcn, false);
+ err = z_erofs_load_lcluster_from_disk(m, lcn, false);
if (err)
return err;
switch (m->type) {
case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
- if (!m->delta[0]) {
- erofs_err(m->inode->i_sb,
- "invalid lookback distance 0 @ nid %llu",
- vi->nid);
- DBG_BUGON(1);
- return -EFSCORRUPTED;
- }
lookback_distance = m->delta[0];
+ if (!lookback_distance)
+ goto err_bogus;
continue;
case Z_EROFS_LCLUSTER_TYPE_PLAIN:
case Z_EROFS_LCLUSTER_TYPE_HEAD1:
@@ -328,16 +318,15 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
m->map->m_la = (lcn << lclusterbits) | m->clusterofs;
return 0;
default:
- erofs_err(m->inode->i_sb,
- "unknown type %u @ lcn %lu of nid %llu",
+ erofs_err(sb, "unknown type %u @ lcn %lu of nid %llu",
m->type, lcn, vi->nid);
DBG_BUGON(1);
return -EOPNOTSUPP;
}
}
-
- erofs_err(m->inode->i_sb, "bogus lookback distance @ nid %llu",
- vi->nid);
+err_bogus:
+ erofs_err(sb, "bogus lookback distance %u @ lcn %lu of nid %llu",
+ lookback_distance, m->lcn, vi->nid);
DBG_BUGON(1);
return -EFSCORRUPTED;
}
@@ -369,7 +358,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
if (m->compressedblks)
goto out;
- err = z_erofs_load_cluster_from_disk(m, lcn, false);
+ err = z_erofs_load_lcluster_from_disk(m, lcn, false);
if (err)
return err;
@@ -401,9 +390,8 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
break;
fallthrough;
default:
- erofs_err(m->inode->i_sb,
- "cannot found CBLKCNT @ lcn %lu of nid %llu",
- lcn, vi->nid);
+ erofs_err(sb, "cannot found CBLKCNT @ lcn %lu of nid %llu", lcn,
+ vi->nid);
DBG_BUGON(1);
return -EFSCORRUPTED;
}
@@ -411,9 +399,7 @@ out:
map->m_plen = erofs_pos(sb, m->compressedblks);
return 0;
err_bonus_cblkcnt:
- erofs_err(m->inode->i_sb,
- "bogus CBLKCNT @ lcn %lu of nid %llu",
- lcn, vi->nid);
+ erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid);
DBG_BUGON(1);
return -EFSCORRUPTED;
}
@@ -434,7 +420,7 @@ static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m)
return 0;
}
- err = z_erofs_load_cluster_from_disk(m, lcn, true);
+ err = z_erofs_load_lcluster_from_disk(m, lcn, true);
if (err)
return err;
@@ -481,7 +467,7 @@ static int z_erofs_do_map_blocks(struct inode *inode,
initial_lcn = ofs >> lclusterbits;
endoff = ofs & ((1 << lclusterbits) - 1);
- err = z_erofs_load_cluster_from_disk(&m, initial_lcn, false);
+ err = z_erofs_load_lcluster_from_disk(&m, initial_lcn, false);
if (err)
goto unmap_out;
@@ -539,8 +525,7 @@ static int z_erofs_do_map_blocks(struct inode *inode,
if (flags & EROFS_GET_BLOCKS_FINDTAIL) {
vi->z_tailextent_headlcn = m.lcn;
/* for non-compact indexes, fragmentoff is 64 bits */
- if (fragment &&
- vi->datalayout == EROFS_INODE_COMPRESSED_FULL)
+ if (fragment && vi->datalayout == EROFS_INODE_COMPRESSED_FULL)
vi->z_fragmentoff |= (u64)m.pblk << 32;
}
if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) {
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 95850a13ce8d..8aa36cd37351 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -33,17 +33,17 @@ struct eventfd_ctx {
/*
* Every time that a write(2) is performed on an eventfd, the
* value of the __u64 being written is added to "count" and a
- * wakeup is performed on "wqh". A read(2) will return the "count"
- * value to userspace, and will reset "count" to zero. The kernel
- * side eventfd_signal() also, adds to the "count" counter and
- * issue a wakeup.
+ * wakeup is performed on "wqh". If EFD_SEMAPHORE flag was not
+ * specified, a read(2) will return the "count" value to userspace,
+ * and will reset "count" to zero. The kernel side eventfd_signal()
+ * also, adds to the "count" counter and issue a wakeup.
*/
__u64 count;
unsigned int flags;
int id;
};
-__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask)
+__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, __poll_t mask)
{
unsigned long flags;
@@ -301,6 +301,8 @@ static void eventfd_show_fdinfo(struct seq_file *m, struct file *f)
(unsigned long long)ctx->count);
spin_unlock_irq(&ctx->wqh.lock);
seq_printf(m, "eventfd-id: %d\n", ctx->id);
+ seq_printf(m, "eventfd-semaphore: %d\n",
+ !!(ctx->flags & EFD_SEMAPHORE));
}
#endif
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 980483455cc0..4b1b3362f697 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -536,7 +536,7 @@ static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi,
#else
static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi,
- unsigned pollflags)
+ __poll_t pollflags)
{
wake_up_poll(&ep->poll_wait, EPOLLIN | pollflags);
}
@@ -1805,7 +1805,11 @@ static int ep_autoremove_wake_function(struct wait_queue_entry *wq_entry,
{
int ret = default_wake_function(wq_entry, mode, sync, key);
- list_del_init(&wq_entry->entry);
+ /*
+ * Pairs with list_empty_careful in ep_poll, and ensures future loop
+ * iterations see the cause of this wakeup.
+ */
+ list_del_init_careful(&wq_entry->entry);
return ret;
}
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index c1edde817be8..1f72f977c6db 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -324,17 +324,15 @@ static ext4_fsblk_t ext4_valid_block_bitmap_padding(struct super_block *sb,
struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
ext4_group_t group)
{
- struct ext4_group_info **grp_info;
- long indexv, indexh;
-
- if (unlikely(group >= EXT4_SB(sb)->s_groups_count)) {
- ext4_error(sb, "invalid group %u", group);
- return NULL;
- }
- indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
- indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
- grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv);
- return grp_info[indexh];
+ struct ext4_group_info **grp_info;
+ long indexv, indexh;
+
+ if (unlikely(group >= EXT4_SB(sb)->s_groups_count))
+ return NULL;
+ indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
+ indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
+ grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv);
+ return grp_info[indexh];
}
/*
@@ -886,7 +884,10 @@ static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
if (!ext4_bg_has_super(sb, group))
return 0;
- return EXT4_SB(sb)->s_gdb_count;
+ if (ext4_has_feature_meta_bg(sb))
+ return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
+ else
+ return EXT4_SB(sb)->s_gdb_count;
}
/**
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 2d60bbe8d171..02fa8a64dc3f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -918,11 +918,13 @@ do { \
* where the second inode has larger inode number
* than the first
* I_DATA_SEM_QUOTA - Used for quota inodes only
+ * I_DATA_SEM_EA - Used for ea_inodes only
*/
enum {
I_DATA_SEM_NORMAL = 0,
I_DATA_SEM_OTHER,
I_DATA_SEM_QUOTA,
+ I_DATA_SEM_EA
};
@@ -2901,7 +2903,8 @@ typedef enum {
EXT4_IGET_NORMAL = 0,
EXT4_IGET_SPECIAL = 0x0001, /* OK to iget a system inode */
EXT4_IGET_HANDLE = 0x0002, /* Inode # is from a handle */
- EXT4_IGET_BAD = 0x0004 /* Allow to iget a bad inode */
+ EXT4_IGET_BAD = 0x0004, /* Allow to iget a bad inode */
+ EXT4_IGET_EA_INODE = 0x0008 /* Inode should contain an EA value */
} ext4_iget_flags;
extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index f65fdb27ce14..2a143209aa0c 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -108,6 +108,13 @@ static int ext4_fsync_journal(struct inode *inode, bool datasync,
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
tid_t commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
+ /*
+ * Fastcommit does not really support fsync on directories or other
+ * special files. Force a full commit.
+ */
+ if (!S_ISREG(inode->i_mode))
+ return ext4_force_commit(inode->i_sb);
+
if (journal->j_flags & JBD2_BARRIER &&
!jbd2_trans_will_send_data_barrier(journal, commit_tid))
*needs_barrier = true;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ce5f21b6c2b3..02de439bf1f0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4641,6 +4641,24 @@ static inline void ext4_inode_set_iversion_queried(struct inode *inode, u64 val)
inode_set_iversion_queried(inode, val);
}
+static const char *check_igot_inode(struct inode *inode, ext4_iget_flags flags)
+
+{
+ if (flags & EXT4_IGET_EA_INODE) {
+ if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
+ return "missing EA_INODE flag";
+ if (ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
+ EXT4_I(inode)->i_file_acl)
+ return "ea_inode with extended attributes";
+ } else {
+ if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
+ return "unexpected EA_INODE flag";
+ }
+ if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD))
+ return "unexpected bad inode w/o EXT4_IGET_BAD";
+ return NULL;
+}
+
struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
ext4_iget_flags flags, const char *function,
unsigned int line)
@@ -4650,6 +4668,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
struct ext4_inode_info *ei;
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
struct inode *inode;
+ const char *err_str;
journal_t *journal = EXT4_SB(sb)->s_journal;
long ret;
loff_t size;
@@ -4677,8 +4696,14 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
inode = iget_locked(sb, ino);
if (!inode)
return ERR_PTR(-ENOMEM);
- if (!(inode->i_state & I_NEW))
+ if (!(inode->i_state & I_NEW)) {
+ if ((err_str = check_igot_inode(inode, flags)) != NULL) {
+ ext4_error_inode(inode, function, line, 0, err_str);
+ iput(inode);
+ return ERR_PTR(-EFSCORRUPTED);
+ }
return inode;
+ }
ei = EXT4_I(inode);
iloc.bh = NULL;
@@ -4944,10 +4969,9 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb))
ext4_error_inode(inode, function, line, 0,
"casefold flag without casefold feature");
- if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD)) {
- ext4_error_inode(inode, function, line, 0,
- "bad inode without EXT4_IGET_BAD flag");
- ret = -EUCLEAN;
+ if ((err_str = check_igot_inode(inode, flags)) != NULL) {
+ ext4_error_inode(inode, function, line, 0, err_str);
+ ret = -EFSCORRUPTED;
goto bad_inode;
}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 7b2e36d103cb..20f67a260df5 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2062,7 +2062,7 @@ static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
if (bex->fe_len < gex->fe_len)
return;
- if (finish_group)
+ if (finish_group || ac->ac_found > sbi->s_mb_min_to_scan)
ext4_mb_use_best_found(ac, e4b);
}
@@ -2074,6 +2074,20 @@ static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
* in the context. Later, the best found extent will be used, if
* mballoc can't find good enough extent.
*
+ * The algorithm used is roughly as follows:
+ *
+ * * If free extent found is exactly as big as goal, then
+ * stop the scan and use it immediately
+ *
+ * * If free extent found is smaller than goal, then keep retrying
+ * upto a max of sbi->s_mb_max_to_scan times (default 200). After
+ * that stop scanning and use whatever we have.
+ *
+ * * If free extent found is bigger than goal, then keep retrying
+ * upto a max of sbi->s_mb_min_to_scan times (default 10) before
+ * stopping the scan and using the extent.
+ *
+ *
* FIXME: real allocation policy is to be designed yet!
*/
static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 45b579805c95..0caf6c730ce3 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3834,19 +3834,10 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
return retval;
}
- /*
- * We need to protect against old.inode directory getting converted
- * from inline directory format into a normal one.
- */
- if (S_ISDIR(old.inode->i_mode))
- inode_lock_nested(old.inode, I_MUTEX_NONDIR2);
-
old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de,
&old.inlined);
- if (IS_ERR(old.bh)) {
- retval = PTR_ERR(old.bh);
- goto unlock_moved_dir;
- }
+ if (IS_ERR(old.bh))
+ return PTR_ERR(old.bh);
/*
* Check for inode number is _not_ due to possible IO errors.
@@ -4043,10 +4034,6 @@ release_bh:
brelse(old.bh);
brelse(new.bh);
-unlock_moved_dir:
- if (S_ISDIR(old.inode->i_mode))
- inode_unlock(old.inode);
-
return retval;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 64342adcd679..eaa5858d5285 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -6399,7 +6399,6 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
struct ext4_mount_options old_opts;
ext4_group_t g;
int err = 0;
- int enable_rw = 0;
#ifdef CONFIG_QUOTA
int enable_quota = 0;
int i, j;
@@ -6586,7 +6585,7 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
if (err)
goto restore_opts;
- enable_rw = 1;
+ sb->s_flags &= ~SB_RDONLY;
if (ext4_has_feature_mmp(sb)) {
err = ext4_multi_mount_protect(sb,
le64_to_cpu(es->s_mmp_block));
@@ -6600,18 +6599,6 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
}
/*
- * Reinitialize lazy itable initialization thread based on
- * current settings
- */
- if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE))
- ext4_unregister_li_request(sb);
- else {
- ext4_group_t first_not_zeroed;
- first_not_zeroed = ext4_has_uninit_itable(sb);
- ext4_register_li_request(sb, first_not_zeroed);
- }
-
- /*
* Handle creation of system zone data early because it can fail.
* Releasing of existing data is done when we are sure remount will
* succeed.
@@ -6645,8 +6632,17 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
ext4_release_system_zone(sb);
- if (enable_rw)
- sb->s_flags &= ~SB_RDONLY;
+ /*
+ * Reinitialize lazy itable initialization thread based on
+ * current settings
+ */
+ if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE))
+ ext4_unregister_li_request(sb);
+ else {
+ ext4_group_t first_not_zeroed;
+ first_not_zeroed = ext4_has_uninit_itable(sb);
+ ext4_register_li_request(sb, first_not_zeroed);
+ }
if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
ext4_stop_mmpd(sbi);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index dfc2e223bd10..321e3a888c20 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -121,7 +121,11 @@ ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
#ifdef CONFIG_LOCKDEP
void ext4_xattr_inode_set_class(struct inode *ea_inode)
{
+ struct ext4_inode_info *ei = EXT4_I(ea_inode);
+
lockdep_set_subclass(&ea_inode->i_rwsem, 1);
+ (void) ei; /* shut up clang warning if !CONFIG_LOCKDEP */
+ lockdep_set_subclass(&ei->i_data_sem, I_DATA_SEM_EA);
}
#endif
@@ -433,7 +437,7 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
return -EFSCORRUPTED;
}
- inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_NORMAL);
+ inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_EA_INODE);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
ext4_error(parent->i_sb,
@@ -441,23 +445,6 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
err);
return err;
}
-
- if (is_bad_inode(inode)) {
- ext4_error(parent->i_sb,
- "error while reading EA inode %lu is_bad_inode",
- ea_ino);
- err = -EIO;
- goto error;
- }
-
- if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
- ext4_error(parent->i_sb,
- "EA inode %lu does not have EXT4_EA_INODE_FL flag",
- ea_ino);
- err = -EINVAL;
- goto error;
- }
-
ext4_xattr_inode_set_class(inode);
/*
@@ -478,9 +465,6 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
*ea_inode = inode;
return 0;
-error:
- iput(inode);
- return err;
}
/* Remove entry from mbcache when EA inode is getting evicted */
@@ -1556,11 +1540,11 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
while (ce) {
ea_inode = ext4_iget(inode->i_sb, ce->e_value,
- EXT4_IGET_NORMAL);
- if (!IS_ERR(ea_inode) &&
- !is_bad_inode(ea_inode) &&
- (EXT4_I(ea_inode)->i_flags & EXT4_EA_INODE_FL) &&
- i_size_read(ea_inode) == value_len &&
+ EXT4_IGET_EA_INODE);
+ if (IS_ERR(ea_inode))
+ goto next_entry;
+ ext4_xattr_inode_set_class(ea_inode);
+ if (i_size_read(ea_inode) == value_len &&
!ext4_xattr_inode_read(ea_inode, ea_data, value_len) &&
!ext4_xattr_inode_verify_hashes(ea_inode, NULL, ea_data,
value_len) &&
@@ -1570,9 +1554,8 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
kvfree(ea_data);
return ea_inode;
}
-
- if (!IS_ERR(ea_inode))
- iput(ea_inode);
+ iput(ea_inode);
+ next_entry:
ce = mb_cache_entry_find_next(ea_inode_cache, ce);
}
kvfree(ea_data);
@@ -2073,8 +2056,9 @@ inserted:
else {
u32 ref;
+#ifdef EXT4_XATTR_DEBUG
WARN_ON_ONCE(dquot_initialize_needed(inode));
-
+#endif
/* The old block is released after updating
the inode. */
error = dquot_alloc_block(inode,
@@ -2137,8 +2121,9 @@ inserted:
/* We need to allocate a new block */
ext4_fsblk_t goal, block;
+#ifdef EXT4_XATTR_DEBUG
WARN_ON_ONCE(dquot_initialize_needed(inode));
-
+#endif
goal = ext4_group_first_block_no(sb,
EXT4_I(inode)->i_block_group);
block = ext4_new_meta_blocks(handle, inode, goal, 0,
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 77a71276ecb1..ad597b417fea 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -995,20 +995,12 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
goto out;
}
- /*
- * Copied from ext4_rename: we need to protect against old.inode
- * directory getting converted from inline directory format into
- * a normal one.
- */
- if (S_ISDIR(old_inode->i_mode))
- inode_lock_nested(old_inode, I_MUTEX_NONDIR2);
-
err = -ENOENT;
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_entry) {
if (IS_ERR(old_page))
err = PTR_ERR(old_page);
- goto out_unlock_old;
+ goto out;
}
if (S_ISDIR(old_inode->i_mode)) {
@@ -1116,9 +1108,6 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
f2fs_unlock_op(sbi);
- if (S_ISDIR(old_inode->i_mode))
- inode_unlock(old_inode);
-
if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
f2fs_sync_fs(sbi->sb, 1);
@@ -1133,9 +1122,6 @@ out_dir:
f2fs_put_page(old_dir_page, 0);
out_old:
f2fs_put_page(old_page, 0);
-out_unlock_old:
- if (S_ISDIR(old_inode->i_mode))
- inode_unlock(old_inode);
out:
iput(whiteout);
return err;
diff --git a/fs/file_table.c b/fs/file_table.c
index 372653b92617..e06c68e2d757 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -44,18 +44,40 @@ static struct kmem_cache *filp_cachep __read_mostly;
static struct percpu_counter nr_files __cacheline_aligned_in_smp;
+/* Container for backing file with optional real path */
+struct backing_file {
+ struct file file;
+ struct path real_path;
+};
+
+static inline struct backing_file *backing_file(struct file *f)
+{
+ return container_of(f, struct backing_file, file);
+}
+
+struct path *backing_file_real_path(struct file *f)
+{
+ return &backing_file(f)->real_path;
+}
+EXPORT_SYMBOL_GPL(backing_file_real_path);
+
static void file_free_rcu(struct rcu_head *head)
{
struct file *f = container_of(head, struct file, f_rcuhead);
put_cred(f->f_cred);
- kmem_cache_free(filp_cachep, f);
+ if (unlikely(f->f_mode & FMODE_BACKING))
+ kfree(backing_file(f));
+ else
+ kmem_cache_free(filp_cachep, f);
}
static inline void file_free(struct file *f)
{
security_file_free(f);
- if (!(f->f_mode & FMODE_NOACCOUNT))
+ if (unlikely(f->f_mode & FMODE_BACKING))
+ path_put(backing_file_real_path(f));
+ if (likely(!(f->f_mode & FMODE_NOACCOUNT)))
percpu_counter_dec(&nr_files);
call_rcu(&f->f_rcuhead, file_free_rcu);
}
@@ -131,20 +153,15 @@ static int __init init_fs_stat_sysctls(void)
fs_initcall(init_fs_stat_sysctls);
#endif
-static struct file *__alloc_file(int flags, const struct cred *cred)
+static int init_file(struct file *f, int flags, const struct cred *cred)
{
- struct file *f;
int error;
- f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
- if (unlikely(!f))
- return ERR_PTR(-ENOMEM);
-
f->f_cred = get_cred(cred);
error = security_file_alloc(f);
if (unlikely(error)) {
file_free_rcu(&f->f_rcuhead);
- return ERR_PTR(error);
+ return error;
}
atomic_long_set(&f->f_count, 1);
@@ -155,7 +172,7 @@ static struct file *__alloc_file(int flags, const struct cred *cred)
f->f_mode = OPEN_FMODE(flags);
/* f->f_version: 0 */
- return f;
+ return 0;
}
/* Find an unused file structure and return a pointer to it.
@@ -172,6 +189,7 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
{
static long old_max;
struct file *f;
+ int error;
/*
* Privileged users can go above max_files
@@ -185,9 +203,15 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
goto over;
}
- f = __alloc_file(flags, cred);
- if (!IS_ERR(f))
- percpu_counter_inc(&nr_files);
+ f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
+ if (unlikely(!f))
+ return ERR_PTR(-ENOMEM);
+
+ error = init_file(f, flags, cred);
+ if (unlikely(error))
+ return ERR_PTR(error);
+
+ percpu_counter_inc(&nr_files);
return f;
@@ -203,18 +227,51 @@ over:
/*
* Variant of alloc_empty_file() that doesn't check and modify nr_files.
*
- * Should not be used unless there's a very good reason to do so.
+ * This is only for kernel internal use, and the allocate file must not be
+ * installed into file tables or such.
*/
struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
{
- struct file *f = __alloc_file(flags, cred);
+ struct file *f;
+ int error;
- if (!IS_ERR(f))
- f->f_mode |= FMODE_NOACCOUNT;
+ f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
+ if (unlikely(!f))
+ return ERR_PTR(-ENOMEM);
+
+ error = init_file(f, flags, cred);
+ if (unlikely(error))
+ return ERR_PTR(error);
+
+ f->f_mode |= FMODE_NOACCOUNT;
return f;
}
+/*
+ * Variant of alloc_empty_file() that allocates a backing_file container
+ * and doesn't check and modify nr_files.
+ *
+ * This is only for kernel internal use, and the allocate file must not be
+ * installed into file tables or such.
+ */
+struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
+{
+ struct backing_file *ff;
+ int error;
+
+ ff = kzalloc(sizeof(struct backing_file), GFP_KERNEL);
+ if (unlikely(!ff))
+ return ERR_PTR(-ENOMEM);
+
+ error = init_file(&ff->file, flags, cred);
+ if (unlikely(error))
+ return ERR_PTR(error);
+
+ ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT;
+ return &ff->file;
+}
+
/**
* alloc_file - allocate and initialize a 'struct file'
*
diff --git a/fs/fs_context.c b/fs/fs_context.c
index 24ce12f0db32..851214d1d013 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -561,7 +561,8 @@ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param)
return -ENOMEM;
}
- ctx->legacy_data[size++] = ',';
+ if (size)
+ ctx->legacy_data[size++] = ',';
len = strlen(param->key);
memcpy(ctx->legacy_data + size, param->key, len);
size += len;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 0f5ad5165361..1d679a3178ff 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -784,9 +784,13 @@ static inline bool should_fault_in_pages(struct iov_iter *i,
if (!user_backed_iter(i))
return false;
+ /*
+ * Try to fault in multiple pages initially. When that doesn't result
+ * in any progress, fall back to a single page.
+ */
size = PAGE_SIZE;
offs = offset_in_page(iocb->ki_pos);
- if (*prev_count != count || !*window_size) {
+ if (*prev_count != count) {
size_t nr_dirtied;
nr_dirtied = max(current->nr_dirtied_pause -
@@ -870,6 +874,7 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
struct gfs2_inode *ip = GFS2_I(inode);
size_t prev_count = 0, window_size = 0;
size_t written = 0;
+ bool enough_retries;
ssize_t ret;
/*
@@ -913,11 +918,17 @@ retry:
if (ret > 0)
written = ret;
+ enough_retries = prev_count == iov_iter_count(from) &&
+ window_size <= PAGE_SIZE;
if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) {
gfs2_glock_dq(gh);
window_size -= fault_in_iov_iter_readable(from, window_size);
- if (window_size)
- goto retry;
+ if (window_size) {
+ if (!enough_retries)
+ goto retry;
+ /* fall back to buffered I/O */
+ ret = 0;
+ }
}
out_unlock:
if (gfs2_holder_queued(gh))
diff --git a/fs/inode.c b/fs/inode.c
index 4d6a1544e95b..d37fad91c8da 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1104,9 +1104,51 @@ void discard_new_inode(struct inode *inode)
EXPORT_SYMBOL(discard_new_inode);
/**
+ * lock_two_inodes - lock two inodes (may be regular files but also dirs)
+ *
+ * Lock any non-NULL argument. The caller must make sure that if he is passing
+ * in two directories, one is not ancestor of the other. Zero, one or two
+ * objects may be locked by this function.
+ *
+ * @inode1: first inode to lock
+ * @inode2: second inode to lock
+ * @subclass1: inode lock subclass for the first lock obtained
+ * @subclass2: inode lock subclass for the second lock obtained
+ */
+void lock_two_inodes(struct inode *inode1, struct inode *inode2,
+ unsigned subclass1, unsigned subclass2)
+{
+ if (!inode1 || !inode2) {
+ /*
+ * Make sure @subclass1 will be used for the acquired lock.
+ * This is not strictly necessary (no current caller cares) but
+ * let's keep things consistent.
+ */
+ if (!inode1)
+ swap(inode1, inode2);
+ goto lock;
+ }
+
+ /*
+ * If one object is directory and the other is not, we must make sure
+ * to lock directory first as the other object may be its child.
+ */
+ if (S_ISDIR(inode2->i_mode) == S_ISDIR(inode1->i_mode)) {
+ if (inode1 > inode2)
+ swap(inode1, inode2);
+ } else if (!S_ISDIR(inode1->i_mode))
+ swap(inode1, inode2);
+lock:
+ if (inode1)
+ inode_lock_nested(inode1, subclass1);
+ if (inode2 && inode2 != inode1)
+ inode_lock_nested(inode2, subclass2);
+}
+
+/**
* lock_two_nondirectories - take two i_mutexes on non-directory objects
*
- * Lock any non-NULL argument that is not a directory.
+ * Lock any non-NULL argument. Passed objects must not be directories.
* Zero, one or two objects may be locked by this function.
*
* @inode1: first inode to lock
@@ -1114,13 +1156,9 @@ EXPORT_SYMBOL(discard_new_inode);
*/
void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
{
- if (inode1 > inode2)
- swap(inode1, inode2);
-
- if (inode1 && !S_ISDIR(inode1->i_mode))
- inode_lock(inode1);
- if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
- inode_lock_nested(inode2, I_MUTEX_NONDIR2);
+ WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
+ WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
+ lock_two_inodes(inode1, inode2, I_MUTEX_NORMAL, I_MUTEX_NONDIR2);
}
EXPORT_SYMBOL(lock_two_nondirectories);
@@ -1131,10 +1169,14 @@ EXPORT_SYMBOL(lock_two_nondirectories);
*/
void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2)
{
- if (inode1 && !S_ISDIR(inode1->i_mode))
+ if (inode1) {
+ WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
inode_unlock(inode1);
- if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
+ }
+ if (inode2 && inode2 != inode1) {
+ WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
inode_unlock(inode2);
+ }
}
EXPORT_SYMBOL(unlock_two_nondirectories);
diff --git a/fs/internal.h b/fs/internal.h
index bd3b2810a36b..f7a3dc111026 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -97,8 +97,9 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
/*
* file_table.c
*/
-extern struct file *alloc_empty_file(int, const struct cred *);
-extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
+struct file *alloc_empty_file(int flags, const struct cred *cred);
+struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred);
+struct file *alloc_empty_backing_file(int flags, const struct cred *cred);
static inline void put_file_access(struct file *file)
{
@@ -121,6 +122,47 @@ extern bool mount_capable(struct fs_context *);
int sb_init_dio_done_wq(struct super_block *sb);
/*
+ * Prepare superblock for changing its read-only state (i.e., either remount
+ * read-write superblock read-only or vice versa). After this function returns
+ * mnt_is_readonly() will return true for any mount of the superblock if its
+ * caller is able to observe any changes done by the remount. This holds until
+ * sb_end_ro_state_change() is called.
+ */
+static inline void sb_start_ro_state_change(struct super_block *sb)
+{
+ WRITE_ONCE(sb->s_readonly_remount, 1);
+ /*
+ * For RO->RW transition, the barrier pairs with the barrier in
+ * mnt_is_readonly() making sure if mnt_is_readonly() sees SB_RDONLY
+ * cleared, it will see s_readonly_remount set.
+ * For RW->RO transition, the barrier pairs with the barrier in
+ * __mnt_want_write() before the mnt_is_readonly() check. The barrier
+ * makes sure if __mnt_want_write() sees MNT_WRITE_HOLD already
+ * cleared, it will see s_readonly_remount set.
+ */
+ smp_wmb();
+}
+
+/*
+ * Ends section changing read-only state of the superblock. After this function
+ * returns if mnt_is_readonly() returns false, the caller will be able to
+ * observe all the changes remount did to the superblock.
+ */
+static inline void sb_end_ro_state_change(struct super_block *sb)
+{
+ /*
+ * This barrier provides release semantics that pairs with
+ * the smp_rmb() acquire semantics in mnt_is_readonly().
+ * This barrier pair ensure that when mnt_is_readonly() sees
+ * 0 for sb->s_readonly_remount, it will also see all the
+ * preceding flag changes that were made during the RO state
+ * change.
+ */
+ smp_wmb();
+ WRITE_ONCE(sb->s_readonly_remount, 0);
+}
+
+/*
* open.c
*/
struct open_flags {
@@ -152,6 +194,8 @@ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
int dentry_needs_remove_privs(struct mnt_idmap *, struct dentry *dentry);
bool in_group_or_capable(struct mnt_idmap *idmap,
const struct inode *inode, vfsgid_t vfsgid);
+void lock_two_inodes(struct inode *inode1, struct inode *inode2,
+ unsigned subclass1, unsigned subclass2);
/*
* fs-writeback.c
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index 837cd55fd4c5..6ae9d6fefb86 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -211,7 +211,10 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
ic->scan_dents = NULL;
cond_resched();
}
- jffs2_build_xattr_subsystem(c);
+ ret = jffs2_build_xattr_subsystem(c);
+ if (ret)
+ goto exit;
+
c->flags &= ~JFFS2_SB_FLAG_BUILDING;
dbg_fsbuild("FS build complete\n");
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index aa4048a27f31..3b6bdc9a49e1 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -772,10 +772,10 @@ void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c)
}
#define XREF_TMPHASH_SIZE (128)
-void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
+int jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
{
struct jffs2_xattr_ref *ref, *_ref;
- struct jffs2_xattr_ref *xref_tmphash[XREF_TMPHASH_SIZE];
+ struct jffs2_xattr_ref **xref_tmphash;
struct jffs2_xattr_datum *xd, *_xd;
struct jffs2_inode_cache *ic;
struct jffs2_raw_node_ref *raw;
@@ -784,9 +784,12 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
BUG_ON(!(c->flags & JFFS2_SB_FLAG_BUILDING));
+ xref_tmphash = kcalloc(XREF_TMPHASH_SIZE,
+ sizeof(struct jffs2_xattr_ref *), GFP_KERNEL);
+ if (!xref_tmphash)
+ return -ENOMEM;
+
/* Phase.1 : Merge same xref */
- for (i=0; i < XREF_TMPHASH_SIZE; i++)
- xref_tmphash[i] = NULL;
for (ref=c->xref_temp; ref; ref=_ref) {
struct jffs2_xattr_ref *tmp;
@@ -884,6 +887,8 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
"%u of xref (%u dead, %u orphan) found.\n",
xdatum_count, xdatum_unchecked_count, xdatum_orphan_count,
xref_count, xref_dead_count, xref_orphan_count);
+ kfree(xref_tmphash);
+ return 0;
}
struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c,
diff --git a/fs/jffs2/xattr.h b/fs/jffs2/xattr.h
index 720007b2fd65..1b5030a3349d 100644
--- a/fs/jffs2/xattr.h
+++ b/fs/jffs2/xattr.h
@@ -71,7 +71,7 @@ static inline int is_xattr_ref_dead(struct jffs2_xattr_ref *ref)
#ifdef CONFIG_JFFS2_FS_XATTR
extern void jffs2_init_xattr_subsystem(struct jffs2_sb_info *c);
-extern void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c);
+extern int jffs2_build_xattr_subsystem(struct jffs2_sb_info *c);
extern void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c);
extern struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c,
@@ -103,7 +103,7 @@ extern ssize_t jffs2_listxattr(struct dentry *, char *, size_t);
#else
#define jffs2_init_xattr_subsystem(c)
-#define jffs2_build_xattr_subsystem(c)
+#define jffs2_build_xattr_subsystem(c) (0)
#define jffs2_clear_xattr_subsystem(c)
#define jffs2_xattr_do_crccheck_inode(c, ic)
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index b29d68b5eec5..494b9f4043cf 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -876,7 +876,7 @@ static int jfs_symlink(struct mnt_idmap *idmap, struct inode *dip,
tid_t tid;
ino_t ino = 0;
struct component_name dname;
- int ssize; /* source pathname size */
+ u32 ssize; /* source pathname size */
struct btstack btstack;
struct inode *ip = d_inode(dentry);
s64 xlen = 0;
@@ -957,7 +957,7 @@ static int jfs_symlink(struct mnt_idmap *idmap, struct inode *dip,
if (ssize > sizeof (JFS_IP(ip)->i_inline))
JFS_IP(ip)->mode2 &= ~INLINEEA;
- jfs_info("jfs_symlink: fast symlink added ssize:%d name:%s ",
+ jfs_info("jfs_symlink: fast symlink added ssize:%u name:%s ",
ssize, name);
}
/*
@@ -987,7 +987,7 @@ static int jfs_symlink(struct mnt_idmap *idmap, struct inode *dip,
ip->i_size = ssize - 1;
while (ssize) {
/* This is kind of silly since PATH_MAX == 4K */
- int copy_size = min(ssize, PSIZE);
+ u32 copy_size = min_t(u32, ssize, PSIZE);
mp = get_metapage(ip, xaddr, PSIZE, 1);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index bb94949bc223..22d3ff3818f5 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -77,9 +77,9 @@ static const unsigned long nlm_grace_period_min = 0;
static const unsigned long nlm_grace_period_max = 240;
static const unsigned long nlm_timeout_min = 3;
static const unsigned long nlm_timeout_max = 20;
-static const int nlm_port_min = 0, nlm_port_max = 65535;
#ifdef CONFIG_SYSCTL
+static const int nlm_port_min = 0, nlm_port_max = 65535;
static struct ctl_table_header * nlm_sysctl_table;
#endif
@@ -355,7 +355,6 @@ static int lockd_get(void)
int error;
if (nlmsvc_serv) {
- svc_get(nlmsvc_serv);
nlmsvc_users++;
return 0;
}
diff --git a/fs/namei.c b/fs/namei.c
index e4fe0879ae55..91171da719c5 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3028,8 +3028,8 @@ static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2)
return p;
}
- inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
- inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
+ lock_two_inodes(p1->d_inode, p2->d_inode,
+ I_MUTEX_PARENT, I_MUTEX_PARENT2);
return NULL;
}
@@ -3703,7 +3703,7 @@ static int vfs_tmpfile(struct mnt_idmap *idmap,
}
/**
- * vfs_tmpfile_open - open a tmpfile for kernel internal use
+ * kernel_tmpfile_open - open a tmpfile for kernel internal use
* @idmap: idmap of the mount the inode was found from
* @parentpath: path of the base directory
* @mode: mode of the new tmpfile
@@ -3714,24 +3714,26 @@ static int vfs_tmpfile(struct mnt_idmap *idmap,
* hence this is only for kernel internal use, and must not be installed into
* file tables or such.
*/
-struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
- const struct path *parentpath,
- umode_t mode, int open_flag, const struct cred *cred)
+struct file *kernel_tmpfile_open(struct mnt_idmap *idmap,
+ const struct path *parentpath,
+ umode_t mode, int open_flag,
+ const struct cred *cred)
{
struct file *file;
int error;
file = alloc_empty_file_noaccount(open_flag, cred);
- if (!IS_ERR(file)) {
- error = vfs_tmpfile(idmap, parentpath, file, mode);
- if (error) {
- fput(file);
- file = ERR_PTR(error);
- }
+ if (IS_ERR(file))
+ return file;
+
+ error = vfs_tmpfile(idmap, parentpath, file, mode);
+ if (error) {
+ fput(file);
+ file = ERR_PTR(error);
}
return file;
}
-EXPORT_SYMBOL(vfs_tmpfile_open);
+EXPORT_SYMBOL(kernel_tmpfile_open);
static int do_tmpfile(struct nameidata *nd, unsigned flags,
const struct open_flags *op,
@@ -4731,7 +4733,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
* sb->s_vfs_rename_mutex. We might be more accurate, but that's another
* story.
* c) we have to lock _four_ objects - parents and victim (if it exists),
- * and source (if it is not a directory).
+ * and source.
* And that - after we got ->i_mutex on parents (until then we don't know
* whether the target exists). Solution: try to be smart with locking
* order for inodes. We rely on the fact that tree topology may change
@@ -4815,10 +4817,16 @@ int vfs_rename(struct renamedata *rd)
take_dentry_name_snapshot(&old_name, old_dentry);
dget(new_dentry);
- if (!is_dir || (flags & RENAME_EXCHANGE))
- lock_two_nondirectories(source, target);
- else if (target)
- inode_lock(target);
+ /*
+ * Lock all moved children. Moved directories may need to change parent
+ * pointer so they need the lock to prevent against concurrent
+ * directory changes moving parent pointer. For regular files we've
+ * historically always done this. The lockdep locking subclasses are
+ * somewhat arbitrary but RENAME_EXCHANGE in particular can swap
+ * regular files and directories so it's difficult to tell which
+ * subclasses to use.
+ */
+ lock_two_inodes(source, target, I_MUTEX_NORMAL, I_MUTEX_NONDIR2);
error = -EPERM;
if (IS_SWAPFILE(source) || (target && IS_SWAPFILE(target)))
@@ -4866,9 +4874,9 @@ int vfs_rename(struct renamedata *rd)
d_exchange(old_dentry, new_dentry);
}
out:
- if (!is_dir || (flags & RENAME_EXCHANGE))
- unlock_two_nondirectories(source, target);
- else if (target)
+ if (source)
+ inode_unlock(source);
+ if (target)
inode_unlock(target);
dput(new_dentry);
if (!error) {
diff --git a/fs/namespace.c b/fs/namespace.c
index 54847db5b819..e157efc54023 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -309,9 +309,16 @@ static unsigned int mnt_get_writers(struct mount *mnt)
static int mnt_is_readonly(struct vfsmount *mnt)
{
- if (mnt->mnt_sb->s_readonly_remount)
+ if (READ_ONCE(mnt->mnt_sb->s_readonly_remount))
return 1;
- /* Order wrt setting s_flags/s_readonly_remount in do_remount() */
+ /*
+ * The barrier pairs with the barrier in sb_start_ro_state_change()
+ * making sure if we don't see s_readonly_remount set yet, we also will
+ * not see any superblock / mount flag changes done by remount.
+ * It also pairs with the barrier in sb_end_ro_state_change()
+ * assuring that if we see s_readonly_remount already cleared, we will
+ * see the values of superblock / mount flags updated by remount.
+ */
smp_rmb();
return __mnt_is_readonly(mnt);
}
@@ -364,9 +371,11 @@ int __mnt_want_write(struct vfsmount *m)
}
}
/*
- * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
- * be set to match its requirements. So we must not load that until
- * MNT_WRITE_HOLD is cleared.
+ * The barrier pairs with the barrier sb_start_ro_state_change() making
+ * sure that if we see MNT_WRITE_HOLD cleared, we will also see
+ * s_readonly_remount set (or even SB_RDONLY / MNT_READONLY flags) in
+ * mnt_is_readonly() and bail in case we are racing with remount
+ * read-only.
*/
smp_rmb();
if (mnt_is_readonly(m)) {
@@ -588,10 +597,8 @@ int sb_prepare_remount_readonly(struct super_block *sb)
if (!err && atomic_long_read(&sb->s_remove_count))
err = -EBUSY;
- if (!err) {
- sb->s_readonly_remount = 1;
- smp_wmb();
- }
+ if (!err)
+ sb_start_ro_state_change(sb);
list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
@@ -658,9 +665,25 @@ static bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
return false;
}
-/*
- * find the first mount at @dentry on vfsmount @mnt.
- * call under rcu_read_lock()
+/**
+ * __lookup_mnt - find first child mount
+ * @mnt: parent mount
+ * @dentry: mountpoint
+ *
+ * If @mnt has a child mount @c mounted @dentry find and return it.
+ *
+ * Note that the child mount @c need not be unique. There are cases
+ * where shadow mounts are created. For example, during mount
+ * propagation when a source mount @mnt whose root got overmounted by a
+ * mount @o after path lookup but before @namespace_sem could be
+ * acquired gets copied and propagated. So @mnt gets copied including
+ * @o. When @mnt is propagated to a destination mount @d that already
+ * has another mount @n mounted at the same mountpoint then the source
+ * mount @mnt will be tucked beneath @n, i.e., @n will be mounted on
+ * @mnt and @mnt mounted on @d. Now both @n and @o are mounted at @mnt
+ * on @dentry.
+ *
+ * Return: The first child of @mnt mounted @dentry or NULL.
*/
struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
{
@@ -910,6 +933,33 @@ void mnt_set_mountpoint(struct mount *mnt,
hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
}
+/**
+ * mnt_set_mountpoint_beneath - mount a mount beneath another one
+ *
+ * @new_parent: the source mount
+ * @top_mnt: the mount beneath which @new_parent is mounted
+ * @new_mp: the new mountpoint of @top_mnt on @new_parent
+ *
+ * Remove @top_mnt from its current mountpoint @top_mnt->mnt_mp and
+ * parent @top_mnt->mnt_parent and mount it on top of @new_parent at
+ * @new_mp. And mount @new_parent on the old parent and old
+ * mountpoint of @top_mnt.
+ *
+ * Context: This function expects namespace_lock() and lock_mount_hash()
+ * to have been acquired in that order.
+ */
+static void mnt_set_mountpoint_beneath(struct mount *new_parent,
+ struct mount *top_mnt,
+ struct mountpoint *new_mp)
+{
+ struct mount *old_top_parent = top_mnt->mnt_parent;
+ struct mountpoint *old_top_mp = top_mnt->mnt_mp;
+
+ mnt_set_mountpoint(old_top_parent, old_top_mp, new_parent);
+ mnt_change_mountpoint(new_parent, new_mp, top_mnt);
+}
+
+
static void __attach_mnt(struct mount *mnt, struct mount *parent)
{
hlist_add_head_rcu(&mnt->mnt_hash,
@@ -917,15 +967,42 @@ static void __attach_mnt(struct mount *mnt, struct mount *parent)
list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
}
-/*
- * vfsmount lock must be held for write
+/**
+ * attach_mnt - mount a mount, attach to @mount_hashtable and parent's
+ * list of child mounts
+ * @parent: the parent
+ * @mnt: the new mount
+ * @mp: the new mountpoint
+ * @beneath: whether to mount @mnt beneath or on top of @parent
+ *
+ * If @beneath is false, mount @mnt at @mp on @parent. Then attach @mnt
+ * to @parent's child mount list and to @mount_hashtable.
+ *
+ * If @beneath is true, remove @mnt from its current parent and
+ * mountpoint and mount it on @mp on @parent, and mount @parent on the
+ * old parent and old mountpoint of @mnt. Finally, attach @parent to
+ * @mnt_hashtable and @parent->mnt_parent->mnt_mounts.
+ *
+ * Note, when __attach_mnt() is called @mnt->mnt_parent already points
+ * to the correct parent.
+ *
+ * Context: This function expects namespace_lock() and lock_mount_hash()
+ * to have been acquired in that order.
*/
-static void attach_mnt(struct mount *mnt,
- struct mount *parent,
- struct mountpoint *mp)
+static void attach_mnt(struct mount *mnt, struct mount *parent,
+ struct mountpoint *mp, bool beneath)
{
- mnt_set_mountpoint(parent, mp, mnt);
- __attach_mnt(mnt, parent);
+ if (beneath)
+ mnt_set_mountpoint_beneath(mnt, parent, mp);
+ else
+ mnt_set_mountpoint(parent, mp, mnt);
+ /*
+ * Note, @mnt->mnt_parent has to be used. If @mnt was mounted
+ * beneath @parent then @mnt will need to be attached to
+ * @parent's old parent, not @parent. IOW, @mnt->mnt_parent
+ * isn't the same mount as @parent.
+ */
+ __attach_mnt(mnt, mnt->mnt_parent);
}
void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
@@ -937,7 +1014,7 @@ void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct m
hlist_del_init(&mnt->mnt_mp_list);
hlist_del_init_rcu(&mnt->mnt_hash);
- attach_mnt(mnt, parent, mp);
+ attach_mnt(mnt, parent, mp, false);
put_mountpoint(old_mp);
mnt_add_count(old_parent, -1);
@@ -1767,6 +1844,19 @@ bool may_mount(void)
return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
}
+/**
+ * path_mounted - check whether path is mounted
+ * @path: path to check
+ *
+ * Determine whether @path refers to the root of a mount.
+ *
+ * Return: true if @path is the root of a mount, false if not.
+ */
+static inline bool path_mounted(const struct path *path)
+{
+ return path->mnt->mnt_root == path->dentry;
+}
+
static void warn_mandlock(void)
{
pr_warn_once("=======================================================\n"
@@ -1782,7 +1872,7 @@ static int can_umount(const struct path *path, int flags)
if (!may_mount())
return -EPERM;
- if (path->dentry != path->mnt->mnt_root)
+ if (!path_mounted(path))
return -EINVAL;
if (!check_mnt(mnt))
return -EINVAL;
@@ -1925,7 +2015,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
goto out;
lock_mount_hash();
list_add_tail(&q->mnt_list, &res->mnt_list);
- attach_mnt(q, parent, p->mnt_mp);
+ attach_mnt(q, parent, p->mnt_mp, false);
unlock_mount_hash();
}
}
@@ -2134,12 +2224,17 @@ int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
return 0;
}
-/*
- * @source_mnt : mount tree to be attached
- * @nd : place the mount tree @source_mnt is attached
- * @parent_nd : if non-null, detach the source_mnt from its parent and
- * store the parent mount and mountpoint dentry.
- * (done when source_mnt is moved)
+enum mnt_tree_flags_t {
+ MNT_TREE_MOVE = BIT(0),
+ MNT_TREE_BENEATH = BIT(1),
+};
+
+/**
+ * attach_recursive_mnt - attach a source mount tree
+ * @source_mnt: mount tree to be attached
+ * @top_mnt: mount that @source_mnt will be mounted on or mounted beneath
+ * @dest_mp: the mountpoint @source_mnt will be mounted at
+ * @flags: modify how @source_mnt is supposed to be attached
*
* NOTE: in the table below explains the semantics when a source mount
* of a given type is attached to a destination mount of a given type.
@@ -2196,22 +2291,28 @@ int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
* applied to each mount in the tree.
* Must be called without spinlocks held, since this function can sleep
* in allocations.
+ *
+ * Context: The function expects namespace_lock() to be held.
+ * Return: If @source_mnt was successfully attached 0 is returned.
+ * Otherwise a negative error code is returned.
*/
static int attach_recursive_mnt(struct mount *source_mnt,
- struct mount *dest_mnt,
- struct mountpoint *dest_mp,
- bool moving)
+ struct mount *top_mnt,
+ struct mountpoint *dest_mp,
+ enum mnt_tree_flags_t flags)
{
struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
HLIST_HEAD(tree_list);
- struct mnt_namespace *ns = dest_mnt->mnt_ns;
+ struct mnt_namespace *ns = top_mnt->mnt_ns;
struct mountpoint *smp;
- struct mount *child, *p;
+ struct mount *child, *dest_mnt, *p;
struct hlist_node *n;
- int err;
+ int err = 0;
+ bool moving = flags & MNT_TREE_MOVE, beneath = flags & MNT_TREE_BENEATH;
- /* Preallocate a mountpoint in case the new mounts need
- * to be tucked under other mounts.
+ /*
+ * Preallocate a mountpoint in case the new mounts need to be
+ * mounted beneath mounts on the same mountpoint.
*/
smp = get_mountpoint(source_mnt->mnt.mnt_root);
if (IS_ERR(smp))
@@ -2224,29 +2325,41 @@ static int attach_recursive_mnt(struct mount *source_mnt,
goto out;
}
+ if (beneath)
+ dest_mnt = top_mnt->mnt_parent;
+ else
+ dest_mnt = top_mnt;
+
if (IS_MNT_SHARED(dest_mnt)) {
err = invent_group_ids(source_mnt, true);
if (err)
goto out;
err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
- lock_mount_hash();
- if (err)
- goto out_cleanup_ids;
+ }
+ lock_mount_hash();
+ if (err)
+ goto out_cleanup_ids;
+
+ if (IS_MNT_SHARED(dest_mnt)) {
for (p = source_mnt; p; p = next_mnt(p, source_mnt))
set_mnt_shared(p);
- } else {
- lock_mount_hash();
}
+
if (moving) {
+ if (beneath)
+ dest_mp = smp;
unhash_mnt(source_mnt);
- attach_mnt(source_mnt, dest_mnt, dest_mp);
+ attach_mnt(source_mnt, top_mnt, dest_mp, beneath);
touch_mnt_namespace(source_mnt->mnt_ns);
} else {
if (source_mnt->mnt_ns) {
/* move from anon - the caller will destroy */
list_del_init(&source_mnt->mnt_ns->list);
}
- mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
+ if (beneath)
+ mnt_set_mountpoint_beneath(source_mnt, top_mnt, smp);
+ else
+ mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
commit_tree(source_mnt);
}
@@ -2286,33 +2399,101 @@ static int attach_recursive_mnt(struct mount *source_mnt,
return err;
}
-static struct mountpoint *lock_mount(struct path *path)
+/**
+ * do_lock_mount - lock mount and mountpoint
+ * @path: target path
+ * @beneath: whether the intention is to mount beneath @path
+ *
+ * Follow the mount stack on @path until the top mount @mnt is found. If
+ * the initial @path->{mnt,dentry} is a mountpoint lookup the first
+ * mount stacked on top of it. Then simply follow @{mnt,mnt->mnt_root}
+ * until nothing is stacked on top of it anymore.
+ *
+ * Acquire the inode_lock() on the top mount's ->mnt_root to protect
+ * against concurrent removal of the new mountpoint from another mount
+ * namespace.
+ *
+ * If @beneath is requested, acquire inode_lock() on @mnt's mountpoint
+ * @mp on @mnt->mnt_parent must be acquired. This protects against a
+ * concurrent unlink of @mp->mnt_dentry from another mount namespace
+ * where @mnt doesn't have a child mount mounted @mp. A concurrent
+ * removal of @mnt->mnt_root doesn't matter as nothing will be mounted
+ * on top of it for @beneath.
+ *
+ * In addition, @beneath needs to make sure that @mnt hasn't been
+ * unmounted or moved from its current mountpoint in between dropping
+ * @mount_lock and acquiring @namespace_sem. For the !@beneath case @mnt
+ * being unmounted would be detected later by e.g., calling
+ * check_mnt(mnt) in the function it's called from. For the @beneath
+ * case however, it's useful to detect it directly in do_lock_mount().
+ * If @mnt hasn't been unmounted then @mnt->mnt_mountpoint still points
+ * to @mnt->mnt_mp->m_dentry. But if @mnt has been unmounted it will
+ * point to @mnt->mnt_root and @mnt->mnt_mp will be NULL.
+ *
+ * Return: Either the target mountpoint on the top mount or the top
+ * mount's mountpoint.
+ */
+static struct mountpoint *do_lock_mount(struct path *path, bool beneath)
{
- struct vfsmount *mnt;
- struct dentry *dentry = path->dentry;
-retry:
- inode_lock(dentry->d_inode);
- if (unlikely(cant_mount(dentry))) {
- inode_unlock(dentry->d_inode);
- return ERR_PTR(-ENOENT);
- }
- namespace_lock();
- mnt = lookup_mnt(path);
- if (likely(!mnt)) {
- struct mountpoint *mp = get_mountpoint(dentry);
- if (IS_ERR(mp)) {
+ struct vfsmount *mnt = path->mnt;
+ struct dentry *dentry;
+ struct mountpoint *mp = ERR_PTR(-ENOENT);
+
+ for (;;) {
+ struct mount *m;
+
+ if (beneath) {
+ m = real_mount(mnt);
+ read_seqlock_excl(&mount_lock);
+ dentry = dget(m->mnt_mountpoint);
+ read_sequnlock_excl(&mount_lock);
+ } else {
+ dentry = path->dentry;
+ }
+
+ inode_lock(dentry->d_inode);
+ if (unlikely(cant_mount(dentry))) {
+ inode_unlock(dentry->d_inode);
+ goto out;
+ }
+
+ namespace_lock();
+
+ if (beneath && (!is_mounted(mnt) || m->mnt_mountpoint != dentry)) {
namespace_unlock();
inode_unlock(dentry->d_inode);
- return mp;
+ goto out;
}
- return mp;
+
+ mnt = lookup_mnt(path);
+ if (likely(!mnt))
+ break;
+
+ namespace_unlock();
+ inode_unlock(dentry->d_inode);
+ if (beneath)
+ dput(dentry);
+ path_put(path);
+ path->mnt = mnt;
+ path->dentry = dget(mnt->mnt_root);
}
- namespace_unlock();
- inode_unlock(path->dentry->d_inode);
- path_put(path);
- path->mnt = mnt;
- dentry = path->dentry = dget(mnt->mnt_root);
- goto retry;
+
+ mp = get_mountpoint(dentry);
+ if (IS_ERR(mp)) {
+ namespace_unlock();
+ inode_unlock(dentry->d_inode);
+ }
+
+out:
+ if (beneath)
+ dput(dentry);
+
+ return mp;
+}
+
+static inline struct mountpoint *lock_mount(struct path *path)
+{
+ return do_lock_mount(path, false);
}
static void unlock_mount(struct mountpoint *where)
@@ -2336,7 +2517,7 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
d_is_dir(mnt->mnt.mnt_root))
return -ENOTDIR;
- return attach_recursive_mnt(mnt, p, mp, false);
+ return attach_recursive_mnt(mnt, p, mp, 0);
}
/*
@@ -2367,7 +2548,7 @@ static int do_change_type(struct path *path, int ms_flags)
int type;
int err = 0;
- if (path->dentry != path->mnt->mnt_root)
+ if (!path_mounted(path))
return -EINVAL;
type = flags_to_propagation_type(ms_flags);
@@ -2643,7 +2824,7 @@ static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
if (!check_mnt(mnt))
return -EINVAL;
- if (path->dentry != mnt->mnt.mnt_root)
+ if (!path_mounted(path))
return -EINVAL;
if (!can_change_locked_flags(mnt, mnt_flags))
@@ -2682,7 +2863,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
if (!check_mnt(mnt))
return -EINVAL;
- if (path->dentry != path->mnt->mnt_root)
+ if (!path_mounted(path))
return -EINVAL;
if (!can_change_locked_flags(mnt, mnt_flags))
@@ -2772,9 +2953,9 @@ static int do_set_group(struct path *from_path, struct path *to_path)
err = -EINVAL;
/* To and From paths should be mount roots */
- if (from_path->dentry != from_path->mnt->mnt_root)
+ if (!path_mounted(from_path))
goto out;
- if (to_path->dentry != to_path->mnt->mnt_root)
+ if (!path_mounted(to_path))
goto out;
/* Setting sharing groups is only allowed across same superblock */
@@ -2818,7 +2999,110 @@ out:
return err;
}
-static int do_move_mount(struct path *old_path, struct path *new_path)
+/**
+ * path_overmounted - check if path is overmounted
+ * @path: path to check
+ *
+ * Check if path is overmounted, i.e., if there's a mount on top of
+ * @path->mnt with @path->dentry as mountpoint.
+ *
+ * Context: This function expects namespace_lock() to be held.
+ * Return: If path is overmounted true is returned, false if not.
+ */
+static inline bool path_overmounted(const struct path *path)
+{
+ rcu_read_lock();
+ if (unlikely(__lookup_mnt(path->mnt, path->dentry))) {
+ rcu_read_unlock();
+ return true;
+ }
+ rcu_read_unlock();
+ return false;
+}
+
+/**
+ * can_move_mount_beneath - check that we can mount beneath the top mount
+ * @from: mount to mount beneath
+ * @to: mount under which to mount
+ *
+ * - Make sure that @to->dentry is actually the root of a mount under
+ * which we can mount another mount.
+ * - Make sure that nothing can be mounted beneath the caller's current
+ * root or the rootfs of the namespace.
+ * - Make sure that the caller can unmount the topmost mount ensuring
+ * that the caller could reveal the underlying mountpoint.
+ * - Ensure that nothing has been mounted on top of @from before we
+ * grabbed @namespace_sem to avoid creating pointless shadow mounts.
+ * - Prevent mounting beneath a mount if the propagation relationship
+ * between the source mount, parent mount, and top mount would lead to
+ * nonsensical mount trees.
+ *
+ * Context: This function expects namespace_lock() to be held.
+ * Return: On success 0, and on error a negative error code is returned.
+ */
+static int can_move_mount_beneath(const struct path *from,
+ const struct path *to,
+ const struct mountpoint *mp)
+{
+ struct mount *mnt_from = real_mount(from->mnt),
+ *mnt_to = real_mount(to->mnt),
+ *parent_mnt_to = mnt_to->mnt_parent;
+
+ if (!mnt_has_parent(mnt_to))
+ return -EINVAL;
+
+ if (!path_mounted(to))
+ return -EINVAL;
+
+ if (IS_MNT_LOCKED(mnt_to))
+ return -EINVAL;
+
+ /* Avoid creating shadow mounts during mount propagation. */
+ if (path_overmounted(from))
+ return -EINVAL;
+
+ /*
+ * Mounting beneath the rootfs only makes sense when the
+ * semantics of pivot_root(".", ".") are used.
+ */
+ if (&mnt_to->mnt == current->fs->root.mnt)
+ return -EINVAL;
+ if (parent_mnt_to == current->nsproxy->mnt_ns->root)
+ return -EINVAL;
+
+ for (struct mount *p = mnt_from; mnt_has_parent(p); p = p->mnt_parent)
+ if (p == mnt_to)
+ return -EINVAL;
+
+ /*
+ * If the parent mount propagates to the child mount this would
+ * mean mounting @mnt_from on @mnt_to->mnt_parent and then
+ * propagating a copy @c of @mnt_from on top of @mnt_to. This
+ * defeats the whole purpose of mounting beneath another mount.
+ */
+ if (propagation_would_overmount(parent_mnt_to, mnt_to, mp))
+ return -EINVAL;
+
+ /*
+ * If @mnt_to->mnt_parent propagates to @mnt_from this would
+ * mean propagating a copy @c of @mnt_from on top of @mnt_from.
+ * Afterwards @mnt_from would be mounted on top of
+ * @mnt_to->mnt_parent and @mnt_to would be unmounted from
+ * @mnt->mnt_parent and remounted on @mnt_from. But since @c is
+ * already mounted on @mnt_from, @mnt_to would ultimately be
+ * remounted on top of @c. Afterwards, @mnt_from would be
+ * covered by a copy @c of @mnt_from and @c would be covered by
+ * @mnt_from itself. This defeats the whole purpose of mounting
+ * @mnt_from beneath @mnt_to.
+ */
+ if (propagation_would_overmount(parent_mnt_to, mnt_from, mp))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int do_move_mount(struct path *old_path, struct path *new_path,
+ bool beneath)
{
struct mnt_namespace *ns;
struct mount *p;
@@ -2827,8 +3111,9 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
struct mountpoint *mp, *old_mp;
int err;
bool attached;
+ enum mnt_tree_flags_t flags = 0;
- mp = lock_mount(new_path);
+ mp = do_lock_mount(new_path, beneath);
if (IS_ERR(mp))
return PTR_ERR(mp);
@@ -2836,6 +3121,8 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
p = real_mount(new_path->mnt);
parent = old->mnt_parent;
attached = mnt_has_parent(old);
+ if (attached)
+ flags |= MNT_TREE_MOVE;
old_mp = old->mnt_mp;
ns = old->mnt_ns;
@@ -2855,7 +3142,7 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
if (old->mnt.mnt_flags & MNT_LOCKED)
goto out;
- if (old_path->dentry != old_path->mnt->mnt_root)
+ if (!path_mounted(old_path))
goto out;
if (d_is_dir(new_path->dentry) !=
@@ -2866,6 +3153,17 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
*/
if (attached && IS_MNT_SHARED(parent))
goto out;
+
+ if (beneath) {
+ err = can_move_mount_beneath(old_path, new_path, mp);
+ if (err)
+ goto out;
+
+ err = -EINVAL;
+ p = p->mnt_parent;
+ flags |= MNT_TREE_BENEATH;
+ }
+
/*
* Don't move a mount tree containing unbindable mounts to a destination
* mount which is shared.
@@ -2879,8 +3177,7 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
if (p == old)
goto out;
- err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp,
- attached);
+ err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp, flags);
if (err)
goto out;
@@ -2912,7 +3209,7 @@ static int do_move_mount_old(struct path *path, const char *old_name)
if (err)
return err;
- err = do_move_mount(&old_path, path);
+ err = do_move_mount(&old_path, path, false);
path_put(&old_path);
return err;
}
@@ -2937,8 +3234,7 @@ static int do_add_mount(struct mount *newmnt, struct mountpoint *mp,
}
/* Refuse the same filesystem on the same mount point */
- if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
- path->mnt->mnt_root == path->dentry)
+ if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb && path_mounted(path))
return -EBUSY;
if (d_is_symlink(newmnt->mnt.mnt_root))
@@ -3079,13 +3375,10 @@ int finish_automount(struct vfsmount *m, const struct path *path)
err = -ENOENT;
goto discard_locked;
}
- rcu_read_lock();
- if (unlikely(__lookup_mnt(path->mnt, dentry))) {
- rcu_read_unlock();
+ if (path_overmounted(path)) {
err = 0;
goto discard_locked;
}
- rcu_read_unlock();
mp = get_mountpoint(dentry);
if (IS_ERR(mp)) {
err = PTR_ERR(mp);
@@ -3777,6 +4070,10 @@ SYSCALL_DEFINE5(move_mount,
if (flags & ~MOVE_MOUNT__MASK)
return -EINVAL;
+ if ((flags & (MOVE_MOUNT_BENEATH | MOVE_MOUNT_SET_GROUP)) ==
+ (MOVE_MOUNT_BENEATH | MOVE_MOUNT_SET_GROUP))
+ return -EINVAL;
+
/* If someone gives a pathname, they aren't permitted to move
* from an fd that requires unmount as we can't get at the flag
* to clear it afterwards.
@@ -3806,7 +4103,8 @@ SYSCALL_DEFINE5(move_mount,
if (flags & MOVE_MOUNT_SET_GROUP)
ret = do_set_group(&from_path, &to_path);
else
- ret = do_move_mount(&from_path, &to_path);
+ ret = do_move_mount(&from_path, &to_path,
+ (flags & MOVE_MOUNT_BENEATH));
out_to:
path_put(&to_path);
@@ -3917,11 +4215,11 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
if (new_mnt == root_mnt || old_mnt == root_mnt)
goto out4; /* loop, on the same file system */
error = -EINVAL;
- if (root.mnt->mnt_root != root.dentry)
+ if (!path_mounted(&root))
goto out4; /* not a mountpoint */
if (!mnt_has_parent(root_mnt))
goto out4; /* not attached */
- if (new.mnt->mnt_root != new.dentry)
+ if (!path_mounted(&new))
goto out4; /* not a mountpoint */
if (!mnt_has_parent(new_mnt))
goto out4; /* not attached */
@@ -3939,9 +4237,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
}
/* mount old root on put_old */
- attach_mnt(root_mnt, old_mnt, old_mp);
+ attach_mnt(root_mnt, old_mnt, old_mp, false);
/* mount new_root on / */
- attach_mnt(new_mnt, root_parent, root_mp);
+ attach_mnt(new_mnt, root_parent, root_mp, false);
mnt_add_count(root_parent, -1);
touch_mnt_namespace(current->nsproxy->mnt_ns);
/* A moved mount should not expire automatically */
@@ -4124,7 +4422,7 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
struct mount *mnt = real_mount(path->mnt);
int err = 0;
- if (path->dentry != mnt->mnt.mnt_root)
+ if (!path_mounted(path))
return -EINVAL;
if (kattr->mnt_userns) {
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e63c1d46f189..8f3112e71a6a 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -317,7 +317,7 @@ static int nfs_readdir_folio_array_append(struct folio *folio,
name = nfs_readdir_copy_name(entry->name, entry->len);
- array = kmap_atomic(folio_page(folio, 0));
+ array = kmap_local_folio(folio, 0);
if (!name)
goto out;
ret = nfs_readdir_array_can_expand(array);
@@ -340,7 +340,7 @@ static int nfs_readdir_folio_array_append(struct folio *folio,
nfs_readdir_array_set_eof(array);
out:
*cookie = array->last_cookie;
- kunmap_atomic(array);
+ kunmap_local(array);
return ret;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 18f25ff4bff7..d3665390c4cb 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5437,10 +5437,18 @@ static bool nfs4_read_plus_not_supported(struct rpc_task *task,
return false;
}
-static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
+static inline void nfs4_read_plus_scratch_free(struct nfs_pgio_header *hdr)
{
- if (hdr->res.scratch)
+ if (hdr->res.scratch) {
kfree(hdr->res.scratch);
+ hdr->res.scratch = NULL;
+ }
+}
+
+static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
+{
+ nfs4_read_plus_scratch_free(hdr);
+
if (!nfs4_sequence_done(task, &hdr->res.seq_res))
return -EAGAIN;
if (nfs4_read_stateid_changed(task, &hdr->args))
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
index f21259ead64b..4c9b87850ab1 100644
--- a/fs/nfsd/cache.h
+++ b/fs/nfsd/cache.h
@@ -80,6 +80,8 @@ enum {
int nfsd_drc_slab_create(void);
void nfsd_drc_slab_free(void);
+int nfsd_net_reply_cache_init(struct nfsd_net *nn);
+void nfsd_net_reply_cache_destroy(struct nfsd_net *nn);
int nfsd_reply_cache_init(struct nfsd_net *);
void nfsd_reply_cache_shutdown(struct nfsd_net *);
int nfsd_cache_lookup(struct svc_rqst *);
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index ae85257b4238..11a0eaa2f914 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -97,7 +97,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
goto out;
err = -EINVAL;
- if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
+ if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
goto out;
err = -ENOENT;
@@ -107,7 +107,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
dprintk("found domain %s\n", buf);
err = -EINVAL;
- if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
+ if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
goto out;
fsidtype = simple_strtoul(buf, &ep, 10);
if (*ep)
@@ -593,7 +593,6 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
{
/* client path expiry [flags anonuid anongid fsid] */
char *buf;
- int len;
int err;
struct auth_domain *dom = NULL;
struct svc_export exp = {}, *expp;
@@ -609,8 +608,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
/* client */
err = -EINVAL;
- len = qword_get(&mesg, buf, PAGE_SIZE);
- if (len <= 0)
+ if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
goto out;
err = -ENOENT;
@@ -620,7 +618,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
/* path */
err = -EINVAL;
- if ((len = qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
+ if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
goto out1;
err = kern_path(buf, 0, &exp.ex_path);
@@ -665,7 +663,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
goto out3;
exp.ex_fsid = an_int;
- while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) {
+ while (qword_get(&mesg, buf, PAGE_SIZE) > 0) {
if (strcmp(buf, "fsloc") == 0)
err = fsloc_parse(&mesg, buf, &exp.ex_fslocs);
else if (strcmp(buf, "uuid") == 0)
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index e6bb8eeb5bc2..fc8d5b7db9f8 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -151,8 +151,6 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
{
struct nfsd3_readargs *argp = rqstp->rq_argp;
struct nfsd3_readres *resp = rqstp->rq_resp;
- unsigned int len;
- int v;
dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
SVCFH_fmt(&argp->fh),
@@ -166,17 +164,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
if (argp->offset + argp->count > (u64)OFFSET_MAX)
argp->count = (u64)OFFSET_MAX - argp->offset;
- v = 0;
- len = argp->count;
resp->pages = rqstp->rq_next_page;
- while (len > 0) {
- struct page *page = *(rqstp->rq_next_page++);
-
- rqstp->rq_vec[v].iov_base = page_address(page);
- rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
- len -= rqstp->rq_vec[v].iov_len;
- v++;
- }
/* Obtain buffer pointer for payload.
* 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
@@ -187,7 +175,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
fh_copy(&resp->fh, &argp->fh);
resp->status = nfsd_read(rqstp, &resp->fh, argp->offset,
- rqstp->rq_vec, v, &resp->count, &resp->eof);
+ &resp->count, &resp->eof);
return rpc_success;
}
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 3308dd671ef0..f32128955ec8 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -828,7 +828,8 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false;
if (xdr_stream_encode_u32(xdr, resp->len) < 0)
return false;
- xdr_write_pages(xdr, resp->pages, 0, resp->len);
+ svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages, 0,
+ resp->len);
if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0)
return false;
break;
@@ -859,8 +860,9 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false;
if (xdr_stream_encode_u32(xdr, resp->count) < 0)
return false;
- xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base,
- resp->count);
+ svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages,
+ rqstp->rq_res.page_base,
+ resp->count);
if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0)
return false;
break;
@@ -961,7 +963,8 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false;
if (!svcxdr_encode_cookieverf3(xdr, resp->verf))
return false;
- xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len);
+ svcxdr_encode_opaque_pages(rqstp, xdr, dirlist->pages, 0,
+ dirlist->len);
/* no more entries */
if (xdr_stream_encode_item_absent(xdr) < 0)
return false;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 76db2fe29624..26b1343c8035 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2541,6 +2541,20 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
return p;
}
+static __be32 nfsd4_encode_nfstime4(struct xdr_stream *xdr,
+ struct timespec64 *tv)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, XDR_UNIT * 3);
+ if (!p)
+ return nfserr_resource;
+
+ p = xdr_encode_hyper(p, (s64)tv->tv_sec);
+ *p = cpu_to_be32(tv->tv_nsec);
+ return nfs_ok;
+}
+
/*
* ctime (in NFSv4, time_metadata) is not writeable, and the client
* doesn't really care what resolution could theoretically be stored by
@@ -2566,12 +2580,16 @@ static __be32 *encode_time_delta(__be32 *p, struct inode *inode)
return p;
}
-static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c)
+static __be32
+nfsd4_encode_change_info4(struct xdr_stream *xdr, struct nfsd4_change_info *c)
{
- *p++ = cpu_to_be32(c->atomic);
- p = xdr_encode_hyper(p, c->before_change);
- p = xdr_encode_hyper(p, c->after_change);
- return p;
+ if (xdr_stream_encode_bool(xdr, c->atomic) < 0)
+ return nfserr_resource;
+ if (xdr_stream_encode_u64(xdr, c->before_change) < 0)
+ return nfserr_resource;
+ if (xdr_stream_encode_u64(xdr, c->after_change) < 0)
+ return nfserr_resource;
+ return nfs_ok;
}
/* Encode as an array of strings the string given with components
@@ -3348,11 +3366,9 @@ out_acl:
p = xdr_encode_hyper(p, dummy64);
}
if (bmval1 & FATTR4_WORD1_TIME_ACCESS) {
- p = xdr_reserve_space(xdr, 12);
- if (!p)
- goto out_resource;
- p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec);
- *p++ = cpu_to_be32(stat.atime.tv_nsec);
+ status = nfsd4_encode_nfstime4(xdr, &stat.atime);
+ if (status)
+ goto out;
}
if (bmval1 & FATTR4_WORD1_TIME_DELTA) {
p = xdr_reserve_space(xdr, 12);
@@ -3361,25 +3377,19 @@ out_acl:
p = encode_time_delta(p, d_inode(dentry));
}
if (bmval1 & FATTR4_WORD1_TIME_METADATA) {
- p = xdr_reserve_space(xdr, 12);
- if (!p)
- goto out_resource;
- p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec);
- *p++ = cpu_to_be32(stat.ctime.tv_nsec);
+ status = nfsd4_encode_nfstime4(xdr, &stat.ctime);
+ if (status)
+ goto out;
}
if (bmval1 & FATTR4_WORD1_TIME_MODIFY) {
- p = xdr_reserve_space(xdr, 12);
- if (!p)
- goto out_resource;
- p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec);
- *p++ = cpu_to_be32(stat.mtime.tv_nsec);
+ status = nfsd4_encode_nfstime4(xdr, &stat.mtime);
+ if (status)
+ goto out;
}
if (bmval1 & FATTR4_WORD1_TIME_CREATE) {
- p = xdr_reserve_space(xdr, 12);
- if (!p)
- goto out_resource;
- p = xdr_encode_hyper(p, (s64)stat.btime.tv_sec);
- *p++ = cpu_to_be32(stat.btime.tv_nsec);
+ status = nfsd4_encode_nfstime4(xdr, &stat.btime);
+ if (status)
+ goto out;
}
if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
u64 ino = stat.ino;
@@ -3689,6 +3699,30 @@ fail:
}
static __be32
+nfsd4_encode_verifier4(struct xdr_stream *xdr, const nfs4_verifier *verf)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
+ if (!p)
+ return nfserr_resource;
+ memcpy(p, verf->data, sizeof(verf->data));
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_encode_clientid4(struct xdr_stream *xdr, const clientid_t *clientid)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, sizeof(__be64));
+ if (!p)
+ return nfserr_resource;
+ memcpy(p, clientid, sizeof(*clientid));
+ return nfs_ok;
+}
+
+static __be32
nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid)
{
__be32 *p;
@@ -3752,15 +3786,8 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr,
union nfsd4_op_u *u)
{
struct nfsd4_commit *commit = &u->commit;
- struct xdr_stream *xdr = resp->xdr;
- __be32 *p;
- p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
- if (!p)
- return nfserr_resource;
- p = xdr_encode_opaque_fixed(p, commit->co_verf.data,
- NFS4_VERIFIER_SIZE);
- return 0;
+ return nfsd4_encode_verifier4(resp->xdr, &commit->co_verf);
}
static __be32
@@ -3769,12 +3796,10 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr,
{
struct nfsd4_create *create = &u->create;
struct xdr_stream *xdr = resp->xdr;
- __be32 *p;
- p = xdr_reserve_space(xdr, 20);
- if (!p)
- return nfserr_resource;
- encode_cinfo(p, &create->cr_cinfo);
+ nfserr = nfsd4_encode_change_info4(xdr, &create->cr_cinfo);
+ if (nfserr)
+ return nfserr;
return nfsd4_encode_bitmap(xdr, create->cr_bmval[0],
create->cr_bmval[1], create->cr_bmval[2]);
}
@@ -3892,13 +3917,8 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr,
{
struct nfsd4_link *link = &u->link;
struct xdr_stream *xdr = resp->xdr;
- __be32 *p;
- p = xdr_reserve_space(xdr, 20);
- if (!p)
- return nfserr_resource;
- p = encode_cinfo(p, &link->li_cinfo);
- return 0;
+ return nfsd4_encode_change_info4(xdr, &link->li_cinfo);
}
@@ -3913,11 +3933,11 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr,
nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid);
if (nfserr)
return nfserr;
- p = xdr_reserve_space(xdr, 24);
- if (!p)
+ nfserr = nfsd4_encode_change_info4(xdr, &open->op_cinfo);
+ if (nfserr)
+ return nfserr;
+ if (xdr_stream_encode_u32(xdr, open->op_rflags) < 0)
return nfserr_resource;
- p = encode_cinfo(p, &open->op_cinfo);
- *p++ = cpu_to_be32(open->op_rflags);
nfserr = nfsd4_encode_bitmap(xdr, open->op_bmval[0], open->op_bmval[1],
open->op_bmval[2]);
@@ -3956,7 +3976,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr,
p = xdr_reserve_space(xdr, 32);
if (!p)
return nfserr_resource;
- *p++ = cpu_to_be32(0);
+ *p++ = cpu_to_be32(open->op_recall);
/*
* TODO: space_limit's in delegations
@@ -4018,6 +4038,11 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr,
return nfsd4_encode_stateid(xdr, &od->od_stateid);
}
+/*
+ * The operation of this function assumes that this is the only
+ * READ operation in the COMPOUND. If there are multiple READs,
+ * we use nfsd4_encode_readv().
+ */
static __be32 nfsd4_encode_splice_read(
struct nfsd4_compoundres *resp,
struct nfsd4_read *read,
@@ -4028,8 +4053,12 @@ static __be32 nfsd4_encode_splice_read(
int status, space_left;
__be32 nfserr;
- /* Make sure there will be room for padding if needed */
- if (xdr->end - xdr->p < 1)
+ /*
+ * Make sure there is room at the end of buf->head for
+ * svcxdr_encode_opaque_pages() to create a tail buffer
+ * to XDR-pad the payload.
+ */
+ if (xdr->iov != xdr->buf->head || xdr->end - xdr->p < 1)
return nfserr_resource;
nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp,
@@ -4038,6 +4067,8 @@ static __be32 nfsd4_encode_splice_read(
read->rd_length = maxcount;
if (nfserr)
goto out_err;
+ svcxdr_encode_opaque_pages(read->rd_rqstp, xdr, buf->pages,
+ buf->page_base, maxcount);
status = svc_encode_result_payload(read->rd_rqstp,
buf->head[0].iov_len, maxcount);
if (status) {
@@ -4045,31 +4076,19 @@ static __be32 nfsd4_encode_splice_read(
goto out_err;
}
- buf->page_len = maxcount;
- buf->len += maxcount;
- xdr->page_ptr += (buf->page_base + maxcount + PAGE_SIZE - 1)
- / PAGE_SIZE;
-
- /* Use rest of head for padding and remaining ops: */
- buf->tail[0].iov_base = xdr->p;
- buf->tail[0].iov_len = 0;
- xdr->iov = buf->tail;
- if (maxcount&3) {
- int pad = 4 - (maxcount&3);
-
- *(xdr->p++) = 0;
-
- buf->tail[0].iov_base += maxcount&3;
- buf->tail[0].iov_len = pad;
- buf->len += pad;
- }
-
+ /*
+ * Prepare to encode subsequent operations.
+ *
+ * xdr_truncate_encode() is not safe to use after a successful
+ * splice read has been done, so the following stream
+ * manipulations are open-coded.
+ */
space_left = min_t(int, (void *)xdr->end - (void *)xdr->p,
buf->buflen - buf->len);
buf->buflen = buf->len + space_left;
xdr->end = (__be32 *)((void *)xdr->end + space_left);
- return 0;
+ return nfs_ok;
out_err:
/*
@@ -4090,13 +4109,13 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
__be32 zero = xdr_zero;
__be32 nfserr;
- read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount);
- if (read->rd_vlen < 0)
+ if (xdr_reserve_space_vec(xdr, maxcount) < 0)
return nfserr_resource;
- nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
- resp->rqstp->rq_vec, read->rd_vlen, &maxcount,
- &read->rd_eof);
+ nfserr = nfsd_iter_read(resp->rqstp, read->rd_fhp, file,
+ read->rd_offset, &maxcount,
+ xdr->buf->page_len & ~PAGE_MASK,
+ &read->rd_eof);
read->rd_length = maxcount;
if (nfserr)
return nfserr;
@@ -4213,15 +4232,9 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr,
int starting_len = xdr->buf->len;
__be32 *p;
- p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
- if (!p)
- return nfserr_resource;
-
- /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
- *p++ = cpu_to_be32(0);
- *p++ = cpu_to_be32(0);
- xdr->buf->head[0].iov_len = (char *)xdr->p -
- (char *)xdr->buf->head[0].iov_base;
+ nfserr = nfsd4_encode_verifier4(xdr, &readdir->rd_verf);
+ if (nfserr != nfs_ok)
+ return nfserr;
/*
* Number of bytes left for directory entries allowing for the
@@ -4299,13 +4312,8 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr,
{
struct nfsd4_remove *remove = &u->remove;
struct xdr_stream *xdr = resp->xdr;
- __be32 *p;
- p = xdr_reserve_space(xdr, 20);
- if (!p)
- return nfserr_resource;
- p = encode_cinfo(p, &remove->rm_cinfo);
- return 0;
+ return nfsd4_encode_change_info4(xdr, &remove->rm_cinfo);
}
static __be32
@@ -4314,14 +4322,11 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr,
{
struct nfsd4_rename *rename = &u->rename;
struct xdr_stream *xdr = resp->xdr;
- __be32 *p;
- p = xdr_reserve_space(xdr, 40);
- if (!p)
- return nfserr_resource;
- p = encode_cinfo(p, &rename->rn_sinfo);
- p = encode_cinfo(p, &rename->rn_tinfo);
- return 0;
+ nfserr = nfsd4_encode_change_info4(xdr, &rename->rn_sinfo);
+ if (nfserr)
+ return nfserr;
+ return nfsd4_encode_change_info4(xdr, &rename->rn_tinfo);
}
static __be32
@@ -4448,23 +4453,25 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr,
{
struct nfsd4_setclientid *scd = &u->setclientid;
struct xdr_stream *xdr = resp->xdr;
- __be32 *p;
if (!nfserr) {
- p = xdr_reserve_space(xdr, 8 + NFS4_VERIFIER_SIZE);
- if (!p)
- return nfserr_resource;
- p = xdr_encode_opaque_fixed(p, &scd->se_clientid, 8);
- p = xdr_encode_opaque_fixed(p, &scd->se_confirm,
- NFS4_VERIFIER_SIZE);
- }
- else if (nfserr == nfserr_clid_inuse) {
- p = xdr_reserve_space(xdr, 8);
- if (!p)
- return nfserr_resource;
- *p++ = cpu_to_be32(0);
- *p++ = cpu_to_be32(0);
+ nfserr = nfsd4_encode_clientid4(xdr, &scd->se_clientid);
+ if (nfserr != nfs_ok)
+ goto out;
+ nfserr = nfsd4_encode_verifier4(xdr, &scd->se_confirm);
+ } else if (nfserr == nfserr_clid_inuse) {
+ /* empty network id */
+ if (xdr_stream_encode_u32(xdr, 0) < 0) {
+ nfserr = nfserr_resource;
+ goto out;
+ }
+ /* empty universal address */
+ if (xdr_stream_encode_u32(xdr, 0) < 0) {
+ nfserr = nfserr_resource;
+ goto out;
+ }
}
+out:
return nfserr;
}
@@ -4473,17 +4480,12 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr,
union nfsd4_op_u *u)
{
struct nfsd4_write *write = &u->write;
- struct xdr_stream *xdr = resp->xdr;
- __be32 *p;
- p = xdr_reserve_space(xdr, 16);
- if (!p)
+ if (xdr_stream_encode_u32(resp->xdr, write->wr_bytes_written) < 0)
return nfserr_resource;
- *p++ = cpu_to_be32(write->wr_bytes_written);
- *p++ = cpu_to_be32(write->wr_how_written);
- p = xdr_encode_opaque_fixed(p, write->wr_verifier.data,
- NFS4_VERIFIER_SIZE);
- return 0;
+ if (xdr_stream_encode_u32(resp->xdr, write->wr_how_written) < 0)
+ return nfserr_resource;
+ return nfsd4_encode_verifier4(resp->xdr, &write->wr_verifier);
}
static __be32
@@ -4505,20 +4507,15 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
server_scope = nn->nfsd_name;
server_scope_sz = strlen(nn->nfsd_name);
- p = xdr_reserve_space(xdr,
- 8 /* eir_clientid */ +
- 4 /* eir_sequenceid */ +
- 4 /* eir_flags */ +
- 4 /* spr_how */);
- if (!p)
+ if (nfsd4_encode_clientid4(xdr, &exid->clientid) != nfs_ok)
+ return nfserr_resource;
+ if (xdr_stream_encode_u32(xdr, exid->seqid) < 0)
+ return nfserr_resource;
+ if (xdr_stream_encode_u32(xdr, exid->flags) < 0)
return nfserr_resource;
- p = xdr_encode_opaque_fixed(p, &exid->clientid, 8);
- *p++ = cpu_to_be32(exid->seqid);
- *p++ = cpu_to_be32(exid->flags);
-
- *p++ = cpu_to_be32(exid->spa_how);
-
+ if (xdr_stream_encode_u32(xdr, exid->spa_how) < 0)
+ return nfserr_resource;
switch (exid->spa_how) {
case SP4_NONE:
break;
@@ -5099,15 +5096,8 @@ nfsd4_encode_setxattr(struct nfsd4_compoundres *resp, __be32 nfserr,
{
struct nfsd4_setxattr *setxattr = &u->setxattr;
struct xdr_stream *xdr = resp->xdr;
- __be32 *p;
- p = xdr_reserve_space(xdr, 20);
- if (!p)
- return nfserr_resource;
-
- encode_cinfo(p, &setxattr->setxa_cinfo);
-
- return 0;
+ return nfsd4_encode_change_info4(xdr, &setxattr->setxa_cinfo);
}
/*
@@ -5253,14 +5243,8 @@ nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr,
{
struct nfsd4_removexattr *removexattr = &u->removexattr;
struct xdr_stream *xdr = resp->xdr;
- __be32 *p;
- p = xdr_reserve_space(xdr, 20);
- if (!p)
- return nfserr_resource;
-
- p = encode_cinfo(p, &removexattr->rmxa_cinfo);
- return 0;
+ return nfsd4_encode_change_info4(xdr, &removexattr->rmxa_cinfo);
}
typedef __be32(*nfsd4_enc)(struct nfsd4_compoundres *, __be32, union nfsd4_op_u *u);
@@ -5460,6 +5444,12 @@ status:
release:
if (opdesc && opdesc->op_release)
opdesc->op_release(&op->u);
+
+ /*
+ * Account for pages consumed while encoding this operation.
+ * The xdr_stream primitives don't manage rq_next_page.
+ */
+ rqstp->rq_next_page = xdr->page_ptr + 1;
}
/*
@@ -5528,9 +5518,6 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
p = resp->statusp;
*p++ = resp->cstate.status;
-
- rqstp->rq_next_page = xdr->page_ptr + 1;
-
*p++ = htonl(resp->taglen);
memcpy(p, resp->tag, resp->taglen);
p += XDR_QUADLEN(resp->taglen);
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 041faa13b852..a8eda1c85829 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -148,12 +148,23 @@ void nfsd_drc_slab_free(void)
kmem_cache_destroy(drc_slab);
}
-static int nfsd_reply_cache_stats_init(struct nfsd_net *nn)
+/**
+ * nfsd_net_reply_cache_init - per net namespace reply cache set-up
+ * @nn: nfsd_net being initialized
+ *
+ * Returns zero on succes; otherwise a negative errno is returned.
+ */
+int nfsd_net_reply_cache_init(struct nfsd_net *nn)
{
return nfsd_percpu_counters_init(nn->counter, NFSD_NET_COUNTERS_NUM);
}
-static void nfsd_reply_cache_stats_destroy(struct nfsd_net *nn)
+/**
+ * nfsd_net_reply_cache_destroy - per net namespace reply cache tear-down
+ * @nn: nfsd_net being freed
+ *
+ */
+void nfsd_net_reply_cache_destroy(struct nfsd_net *nn)
{
nfsd_percpu_counters_destroy(nn->counter, NFSD_NET_COUNTERS_NUM);
}
@@ -169,17 +180,13 @@ int nfsd_reply_cache_init(struct nfsd_net *nn)
hashsize = nfsd_hashsize(nn->max_drc_entries);
nn->maskbits = ilog2(hashsize);
- status = nfsd_reply_cache_stats_init(nn);
- if (status)
- goto out_nomem;
-
nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan;
nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count;
nn->nfsd_reply_cache_shrinker.seeks = 1;
status = register_shrinker(&nn->nfsd_reply_cache_shrinker,
"nfsd-reply:%s", nn->nfsd_name);
if (status)
- goto out_stats_destroy;
+ return status;
nn->drc_hashtbl = kvzalloc(array_size(hashsize,
sizeof(*nn->drc_hashtbl)), GFP_KERNEL);
@@ -195,9 +202,6 @@ int nfsd_reply_cache_init(struct nfsd_net *nn)
return 0;
out_shrinker:
unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
-out_stats_destroy:
- nfsd_reply_cache_stats_destroy(nn);
-out_nomem:
printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
return -ENOMEM;
}
@@ -217,7 +221,6 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
rp, nn);
}
}
- nfsd_reply_cache_stats_destroy(nn);
kvfree(nn->drc_hashtbl);
nn->drc_hashtbl = NULL;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 7b8f17ee5224..1b8b1aab9a15 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -25,6 +25,7 @@
#include "netns.h"
#include "pnfs.h"
#include "filecache.h"
+#include "trace.h"
/*
* We have a single directory with several nodes in it.
@@ -109,12 +110,12 @@ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *bu
if (IS_ERR(data))
return PTR_ERR(data);
- rv = write_op[ino](file, data, size);
- if (rv >= 0) {
- simple_transaction_set(file, rv);
- rv = size;
- }
- return rv;
+ rv = write_op[ino](file, data, size);
+ if (rv < 0)
+ return rv;
+
+ simple_transaction_set(file, rv);
+ return size;
}
static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
@@ -153,18 +154,6 @@ static int exports_net_open(struct net *net, struct file *file)
return 0;
}
-static int exports_proc_open(struct inode *inode, struct file *file)
-{
- return exports_net_open(current->nsproxy->net_ns, file);
-}
-
-static const struct proc_ops exports_proc_ops = {
- .proc_open = exports_proc_open,
- .proc_read = seq_read,
- .proc_lseek = seq_lseek,
- .proc_release = seq_release,
-};
-
static int exports_nfsd_open(struct inode *inode, struct file *file)
{
return exports_net_open(inode->i_sb->s_fs_info, file);
@@ -242,6 +231,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size)
if (rpc_pton(net, fo_path, size, sap, salen) == 0)
return -EINVAL;
+ trace_nfsd_ctl_unlock_ip(net, buf);
return nlmsvc_unlock_all_by_ip(sap);
}
@@ -275,7 +265,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
fo_path = buf;
if (qword_get(&buf, fo_path, size) < 0)
return -EINVAL;
-
+ trace_nfsd_ctl_unlock_fs(netns(file), fo_path);
error = kern_path(fo_path, 0, &path);
if (error)
return error;
@@ -336,7 +326,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
len = qword_get(&mesg, dname, size);
if (len <= 0)
return -EINVAL;
-
+
path = dname+len+1;
len = qword_get(&mesg, path, size);
if (len <= 0)
@@ -350,15 +340,17 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
return -EINVAL;
maxsize = min(maxsize, NFS3_FHSIZE);
- if (qword_get(&mesg, mesg, size)>0)
+ if (qword_get(&mesg, mesg, size) > 0)
return -EINVAL;
+ trace_nfsd_ctl_filehandle(netns(file), dname, path, maxsize);
+
/* we have all the words, they are in buf.. */
dom = unix_domain_find(dname);
if (!dom)
return -ENOMEM;
- len = exp_rootfh(netns(file), dom, path, &fh, maxsize);
+ len = exp_rootfh(netns(file), dom, path, &fh, maxsize);
auth_domain_put(dom);
if (len)
return len;
@@ -411,6 +403,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
return rv;
if (newthreads < 0)
return -EINVAL;
+ trace_nfsd_ctl_threads(net, newthreads);
rv = nfsd_svc(newthreads, net, file->f_cred);
if (rv < 0)
return rv;
@@ -430,8 +423,8 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
* OR
*
* Input:
- * buf: C string containing whitespace-
- * separated unsigned integer values
+ * buf: C string containing whitespace-
+ * separated unsigned integer values
* representing the number of NFSD
* threads to start in each pool
* size: non-zero length of C string in @buf
@@ -483,6 +476,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
rv = -EINVAL;
if (nthreads[i] < 0)
goto out_free;
+ trace_nfsd_ctl_pool_threads(net, i, nthreads[i]);
}
rv = nfsd_set_nrthreads(i, nthreads, net);
if (rv)
@@ -538,7 +532,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
char *sep;
struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id);
- if (size>0) {
+ if (size > 0) {
if (nn->nfsd_serv)
/* Cannot change versions without updating
* nn->nfsd_serv->sv_xdrsize, and reallocing
@@ -548,6 +542,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
if (buf[size-1] != '\n')
return -EINVAL;
buf[size-1] = 0;
+ trace_nfsd_ctl_version(netns(file), buf);
vers = mesg;
len = qword_get(&mesg, vers, size);
@@ -649,11 +644,11 @@ out:
* OR
*
* Input:
- * buf: C string containing whitespace-
- * separated positive or negative
- * integer values representing NFS
- * protocol versions to enable ("+n")
- * or disable ("-n")
+ * buf: C string containing whitespace-
+ * separated positive or negative
+ * integer values representing NFS
+ * protocol versions to enable ("+n")
+ * or disable ("-n")
* size: non-zero length of C string in @buf
* Output:
* On success: status of zero or more protocol versions has
@@ -701,17 +696,13 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
err = get_int(&mesg, &fd);
if (err != 0 || fd < 0)
return -EINVAL;
-
- if (svc_alien_sock(net, fd)) {
- printk(KERN_ERR "%s: socket net is different to NFSd's one\n", __func__);
- return -EINVAL;
- }
+ trace_nfsd_ctl_ports_addfd(net, fd);
err = nfsd_create_serv(net);
if (err != 0)
return err;
- err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
+ err = svc_addsock(nn->nfsd_serv, net, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
if (err >= 0 &&
!nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1))
@@ -722,7 +713,7 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
}
/*
- * A transport listener is added by writing it's transport name and
+ * A transport listener is added by writing its transport name and
* a port number.
*/
static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cred *cred)
@@ -737,6 +728,7 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr
if (port < 1 || port > USHRT_MAX)
return -EINVAL;
+ trace_nfsd_ctl_ports_addxprt(net, transport, port);
err = nfsd_create_serv(net);
if (err != 0)
@@ -849,9 +841,9 @@ int nfsd_max_blksize;
* OR
*
* Input:
- * buf: C string containing an unsigned
- * integer value representing the new
- * NFS blksize
+ * buf: C string containing an unsigned
+ * integer value representing the new
+ * NFS blksize
* size: non-zero length of C string in @buf
* Output:
* On success: passed-in buffer filled with '\n'-terminated C string
@@ -870,6 +862,8 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
int rv = get_int(&mesg, &bsize);
if (rv)
return rv;
+ trace_nfsd_ctl_maxblksize(netns(file), bsize);
+
/* force bsize into allowed range and
* required alignment.
*/
@@ -898,9 +892,9 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
* OR
*
* Input:
- * buf: C string containing an unsigned
- * integer value representing the new
- * number of max connections
+ * buf: C string containing an unsigned
+ * integer value representing the new
+ * number of max connections
* size: non-zero length of C string in @buf
* Output:
* On success: passed-in buffer filled with '\n'-terminated C string
@@ -920,6 +914,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size)
if (rv)
return rv;
+ trace_nfsd_ctl_maxconn(netns(file), maxconn);
nn->max_connections = maxconn;
}
@@ -930,6 +925,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size)
static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
time64_t *time, struct nfsd_net *nn)
{
+ struct dentry *dentry = file_dentry(file);
char *mesg = buf;
int rv, i;
@@ -939,6 +935,9 @@ static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
rv = get_int(&mesg, &i);
if (rv)
return rv;
+ trace_nfsd_ctl_time(netns(file), dentry->d_name.name,
+ dentry->d_name.len, i);
+
/*
* Some sanity checking. We don't have a reason for
* these particular numbers, but problems with the
@@ -1031,6 +1030,7 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size,
len = qword_get(&mesg, recdir, size);
if (len <= 0)
return -EINVAL;
+ trace_nfsd_ctl_recoverydir(netns(file), recdir);
status = nfs4_reset_recoverydir(recdir);
if (status)
@@ -1082,7 +1082,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
* OR
*
* Input:
- * buf: any value
+ * buf: any value
* size: non-zero length of C string in @buf
* Output:
* passed-in buffer filled with "Y" or "N" with a newline
@@ -1104,7 +1104,7 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
case '1':
if (!nn->nfsd_serv)
return -EBUSY;
- nfsd4_end_grace(nn);
+ trace_nfsd_end_grace(netns(file));
break;
default:
return -EINVAL;
@@ -1209,8 +1209,8 @@ static int __nfsd_symlink(struct inode *dir, struct dentry *dentry,
* @content is assumed to be a NUL-terminated string that lives
* longer than the symlink itself.
*/
-static void nfsd_symlink(struct dentry *parent, const char *name,
- const char *content)
+static void _nfsd_symlink(struct dentry *parent, const char *name,
+ const char *content)
{
struct inode *dir = parent->d_inode;
struct dentry *dentry;
@@ -1227,8 +1227,8 @@ out:
inode_unlock(dir);
}
#else
-static inline void nfsd_symlink(struct dentry *parent, const char *name,
- const char *content)
+static inline void _nfsd_symlink(struct dentry *parent, const char *name,
+ const char *content)
{
}
@@ -1406,8 +1406,8 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
ret = simple_fill_super(sb, 0x6e667364, nfsd_files);
if (ret)
return ret;
- nfsd_symlink(sb->s_root, "supported_krb5_enctypes",
- "/proc/net/rpc/gss_krb5_enctypes");
+ _nfsd_symlink(sb->s_root, "supported_krb5_enctypes",
+ "/proc/net/rpc/gss_krb5_enctypes");
dentry = nfsd_mkdir(sb->s_root, NULL, "clients");
if (IS_ERR(dentry))
return PTR_ERR(dentry);
@@ -1458,6 +1458,19 @@ static struct file_system_type nfsd_fs_type = {
MODULE_ALIAS_FS("nfsd");
#ifdef CONFIG_PROC_FS
+
+static int exports_proc_open(struct inode *inode, struct file *file)
+{
+ return exports_net_open(current->nsproxy->net_ns, file);
+}
+
+static const struct proc_ops exports_proc_ops = {
+ .proc_open = exports_proc_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_release = seq_release,
+};
+
static int create_proc_exports_entry(void)
{
struct proc_dir_entry *entry;
@@ -1481,7 +1494,17 @@ static int create_proc_exports_entry(void)
unsigned int nfsd_net_id;
-static __net_init int nfsd_init_net(struct net *net)
+/**
+ * nfsd_net_init - Prepare the nfsd_net portion of a new net namespace
+ * @net: a freshly-created network namespace
+ *
+ * This information stays around as long as the network namespace is
+ * alive whether or not there is an NFSD instance running in the
+ * namespace.
+ *
+ * Returns zero on success, or a negative errno otherwise.
+ */
+static __net_init int nfsd_net_init(struct net *net)
{
int retval;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -1492,6 +1515,9 @@ static __net_init int nfsd_init_net(struct net *net)
retval = nfsd_idmap_init(net);
if (retval)
goto out_idmap_error;
+ retval = nfsd_net_reply_cache_init(nn);
+ if (retval)
+ goto out_repcache_error;
nn->nfsd_versions = NULL;
nn->nfsd4_minorversions = NULL;
nfsd4_init_leases_net(nn);
@@ -1500,22 +1526,32 @@ static __net_init int nfsd_init_net(struct net *net)
return 0;
+out_repcache_error:
+ nfsd_idmap_shutdown(net);
out_idmap_error:
nfsd_export_shutdown(net);
out_export_error:
return retval;
}
-static __net_exit void nfsd_exit_net(struct net *net)
+/**
+ * nfsd_net_exit - Release the nfsd_net portion of a net namespace
+ * @net: a network namespace that is about to be destroyed
+ *
+ */
+static __net_exit void nfsd_net_exit(struct net *net)
{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ nfsd_net_reply_cache_destroy(nn);
nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net);
- nfsd_netns_free_versions(net_generic(net, nfsd_net_id));
+ nfsd_netns_free_versions(nn);
}
static struct pernet_operations nfsd_net_ops = {
- .init = nfsd_init_net,
- .exit = nfsd_exit_net,
+ .init = nfsd_net_init,
+ .exit = nfsd_net_exit,
.id = &nfsd_net_id,
.size = sizeof(struct nfsd_net),
};
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index ccd8485fee04..e8e13ae72e3c 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -623,16 +623,9 @@ void fh_fill_pre_attrs(struct svc_fh *fhp)
inode = d_inode(fhp->fh_dentry);
err = fh_getattr(fhp, &stat);
- if (err) {
- /* Grab the times from inode anyway */
- stat.mtime = inode->i_mtime;
- stat.ctime = inode->i_ctime;
- stat.size = inode->i_size;
- if (v4 && IS_I_VERSION(inode)) {
- stat.change_cookie = inode_query_iversion(inode);
- stat.result_mask |= STATX_CHANGE_COOKIE;
- }
- }
+ if (err)
+ return;
+
if (v4)
fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
@@ -660,15 +653,10 @@ void fh_fill_post_attrs(struct svc_fh *fhp)
printk("nfsd: inode locked twice during operation.\n");
err = fh_getattr(fhp, &fhp->fh_post_attr);
- if (err) {
- fhp->fh_post_saved = false;
- fhp->fh_post_attr.ctime = inode->i_ctime;
- if (v4 && IS_I_VERSION(inode)) {
- fhp->fh_post_attr.change_cookie = inode_query_iversion(inode);
- fhp->fh_post_attr.result_mask |= STATX_CHANGE_COOKIE;
- }
- } else
- fhp->fh_post_saved = true;
+ if (err)
+ return;
+
+ fhp->fh_post_saved = true;
if (v4)
fhp->fh_post_change =
nfsd4_change_attribute(&fhp->fh_post_attr, inode);
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index c37195572fd0..a7315928a760 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -176,9 +176,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
{
struct nfsd_readargs *argp = rqstp->rq_argp;
struct nfsd_readres *resp = rqstp->rq_resp;
- unsigned int len;
u32 eof;
- int v;
dprintk("nfsd: READ %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh),
@@ -187,17 +185,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2);
argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen);
- v = 0;
- len = argp->count;
resp->pages = rqstp->rq_next_page;
- while (len > 0) {
- struct page *page = *(rqstp->rq_next_page++);
-
- rqstp->rq_vec[v].iov_base = page_address(page);
- rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
- len -= rqstp->rq_vec[v].iov_len;
- v++;
- }
/* Obtain buffer pointer for payload. 19 is 1 word for
* status, 17 words for fattr, and 1 word for the byte count.
@@ -207,7 +195,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
resp->count = argp->count;
fh_copy(&resp->fh, &argp->fh);
resp->status = nfsd_read(rqstp, &resp->fh, argp->offset,
- rqstp->rq_vec, v, &resp->count, &eof);
+ &resp->count, &eof);
if (resp->status == nfs_ok)
resp->status = fh_getattr(&resp->fh, &resp->stat);
else if (resp->status == nfserr_jukebox)
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 9c7b1ef5be40..2154fa63c5f2 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -402,6 +402,11 @@ void nfsd_reset_write_verifier(struct nfsd_net *nn)
write_sequnlock(&nn->writeverf_lock);
}
+/*
+ * Crank up a set of per-namespace resources for a new NFSD instance,
+ * including lockd, a duplicate reply cache, an open file cache
+ * instance, and a cache of NFSv4 state objects.
+ */
static int nfsd_startup_net(struct net *net, const struct cred *cred)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index caf6355b18fa..5777f40c7353 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -468,7 +468,8 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
case nfs_ok:
if (xdr_stream_encode_u32(xdr, resp->len) < 0)
return false;
- xdr_write_pages(xdr, &resp->page, 0, resp->len);
+ svcxdr_encode_opaque_pages(rqstp, xdr, &resp->page, 0,
+ resp->len);
if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0)
return false;
break;
@@ -491,8 +492,9 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false;
if (xdr_stream_encode_u32(xdr, resp->count) < 0)
return false;
- xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base,
- resp->count);
+ svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages,
+ rqstp->rq_res.page_base,
+ resp->count);
if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0)
return false;
break;
@@ -511,7 +513,8 @@ nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
return false;
switch (resp->status) {
case nfs_ok:
- xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len);
+ svcxdr_encode_opaque_pages(rqstp, xdr, dirlist->pages, 0,
+ dirlist->len);
/* no more entries */
if (xdr_stream_encode_item_absent(xdr) < 0)
return false;
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 4183819ea082..2af74983f146 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -1365,19 +1365,19 @@ TRACE_EVENT(nfsd_cb_setup,
__field(u32, cl_id)
__field(unsigned long, authflavor)
__sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
- __array(unsigned char, netid, 8)
+ __string(netid, netid)
),
TP_fast_assign(
__entry->cl_boot = clp->cl_clientid.cl_boot;
__entry->cl_id = clp->cl_clientid.cl_id;
- strlcpy(__entry->netid, netid, sizeof(__entry->netid));
+ __assign_str(netid, netid);
__entry->authflavor = authflavor;
__assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
clp->cl_cb_conn.cb_addrlen)
),
TP_printk("addr=%pISpc client %08x:%08x proto=%s flavor=%s",
__get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
- __entry->netid, show_nfsd_authflavor(__entry->authflavor))
+ __get_str(netid), show_nfsd_authflavor(__entry->authflavor))
);
TRACE_EVENT(nfsd_cb_setup_err,
@@ -1581,6 +1581,265 @@ TRACE_EVENT(nfsd_cb_recall_any_done,
)
);
+TRACE_EVENT(nfsd_ctl_unlock_ip,
+ TP_PROTO(
+ const struct net *net,
+ const char *address
+ ),
+ TP_ARGS(net, address),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __string(address, address)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __assign_str(address, address);
+ ),
+ TP_printk("address=%s",
+ __get_str(address)
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_unlock_fs,
+ TP_PROTO(
+ const struct net *net,
+ const char *path
+ ),
+ TP_ARGS(net, path),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __string(path, path)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __assign_str(path, path);
+ ),
+ TP_printk("path=%s",
+ __get_str(path)
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_filehandle,
+ TP_PROTO(
+ const struct net *net,
+ const char *domain,
+ const char *path,
+ int maxsize
+ ),
+ TP_ARGS(net, domain, path, maxsize),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(int, maxsize)
+ __string(domain, domain)
+ __string(path, path)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __entry->maxsize = maxsize;
+ __assign_str(domain, domain);
+ __assign_str(path, path);
+ ),
+ TP_printk("domain=%s path=%s maxsize=%d",
+ __get_str(domain), __get_str(path), __entry->maxsize
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_threads,
+ TP_PROTO(
+ const struct net *net,
+ int newthreads
+ ),
+ TP_ARGS(net, newthreads),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(int, newthreads)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __entry->newthreads = newthreads;
+ ),
+ TP_printk("newthreads=%d",
+ __entry->newthreads
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_pool_threads,
+ TP_PROTO(
+ const struct net *net,
+ int pool,
+ int nrthreads
+ ),
+ TP_ARGS(net, pool, nrthreads),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(int, pool)
+ __field(int, nrthreads)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __entry->pool = pool;
+ __entry->nrthreads = nrthreads;
+ ),
+ TP_printk("pool=%d nrthreads=%d",
+ __entry->pool, __entry->nrthreads
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_version,
+ TP_PROTO(
+ const struct net *net,
+ const char *mesg
+ ),
+ TP_ARGS(net, mesg),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __string(mesg, mesg)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __assign_str(mesg, mesg);
+ ),
+ TP_printk("%s",
+ __get_str(mesg)
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_ports_addfd,
+ TP_PROTO(
+ const struct net *net,
+ int fd
+ ),
+ TP_ARGS(net, fd),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(int, fd)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __entry->fd = fd;
+ ),
+ TP_printk("fd=%d",
+ __entry->fd
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_ports_addxprt,
+ TP_PROTO(
+ const struct net *net,
+ const char *transport,
+ int port
+ ),
+ TP_ARGS(net, transport, port),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(int, port)
+ __string(transport, transport)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __entry->port = port;
+ __assign_str(transport, transport);
+ ),
+ TP_printk("transport=%s port=%d",
+ __get_str(transport), __entry->port
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_maxblksize,
+ TP_PROTO(
+ const struct net *net,
+ int bsize
+ ),
+ TP_ARGS(net, bsize),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(int, bsize)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __entry->bsize = bsize;
+ ),
+ TP_printk("bsize=%d",
+ __entry->bsize
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_maxconn,
+ TP_PROTO(
+ const struct net *net,
+ int maxconn
+ ),
+ TP_ARGS(net, maxconn),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(int, maxconn)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __entry->maxconn = maxconn;
+ ),
+ TP_printk("maxconn=%d",
+ __entry->maxconn
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_time,
+ TP_PROTO(
+ const struct net *net,
+ const char *name,
+ size_t namelen,
+ int time
+ ),
+ TP_ARGS(net, name, namelen, time),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(int, time)
+ __string_len(name, name, namelen)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __entry->time = time;
+ __assign_str_len(name, name, namelen);
+ ),
+ TP_printk("file=%s time=%d\n",
+ __get_str(name), __entry->time
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_recoverydir,
+ TP_PROTO(
+ const struct net *net,
+ const char *recdir
+ ),
+ TP_ARGS(net, recdir),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __string(recdir, recdir)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __assign_str(recdir, recdir);
+ ),
+ TP_printk("recdir=%s",
+ __get_str(recdir)
+ )
+);
+
+TRACE_EVENT(nfsd_end_grace,
+ TP_PROTO(
+ const struct net *net
+ ),
+ TP_ARGS(net),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ ),
+ TP_printk("nn=%d", __entry->netns_ino
+ )
+);
+
#endif /* _NFSD_TRACE_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index bb9d47172162..59b7d60ae33e 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -388,7 +388,9 @@ nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
iap->ia_mode &= ~S_ISGID;
} else {
/* set ATTR_KILL_* bits and let VFS handle it */
- iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID);
+ iap->ia_valid |= ATTR_KILL_SUID;
+ iap->ia_valid |=
+ setattr_should_drop_sgid(&nop_mnt_idmap, inode);
}
}
}
@@ -536,7 +538,15 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
inode_lock(inode);
for (retries = 1;;) {
- host_err = __nfsd_setattr(dentry, iap);
+ struct iattr attrs;
+
+ /*
+ * notify_change() can alter its iattr argument, making
+ * @iap unsuitable for submission multiple times. Make a
+ * copy for every loop iteration.
+ */
+ attrs = *iap;
+ host_err = __nfsd_setattr(dentry, &attrs);
if (host_err != -EAGAIN || !retries--)
break;
if (!nfsd_wait_for_delegreturn(rqstp, inode))
@@ -993,6 +1003,18 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
}
+/**
+ * nfsd_splice_read - Perform a VFS read using a splice pipe
+ * @rqstp: RPC transaction context
+ * @fhp: file handle of file to be read
+ * @file: opened struct file of file to be read
+ * @offset: starting byte offset
+ * @count: IN: requested number of bytes; OUT: number of bytes read
+ * @eof: OUT: set non-zero if operation reached the end of the file
+ *
+ * Returns nfs_ok on success, otherwise an nfserr stat value is
+ * returned.
+ */
__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, unsigned long *count,
u32 *eof)
@@ -1006,22 +1028,50 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
ssize_t host_err;
trace_nfsd_read_splice(rqstp, fhp, offset, *count);
- rqstp->rq_next_page = rqstp->rq_respages + 1;
host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
}
-__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
- struct file *file, loff_t offset,
- struct kvec *vec, int vlen, unsigned long *count,
- u32 *eof)
+/**
+ * nfsd_iter_read - Perform a VFS read using an iterator
+ * @rqstp: RPC transaction context
+ * @fhp: file handle of file to be read
+ * @file: opened struct file of file to be read
+ * @offset: starting byte offset
+ * @count: IN: requested number of bytes; OUT: number of bytes read
+ * @base: offset in first page of read buffer
+ * @eof: OUT: set non-zero if operation reached the end of the file
+ *
+ * Some filesystems or situations cannot use nfsd_splice_read. This
+ * function is the slightly less-performant fallback for those cases.
+ *
+ * Returns nfs_ok on success, otherwise an nfserr stat value is
+ * returned.
+ */
+__be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct file *file, loff_t offset, unsigned long *count,
+ unsigned int base, u32 *eof)
{
+ unsigned long v, total;
struct iov_iter iter;
loff_t ppos = offset;
+ struct page *page;
ssize_t host_err;
+ v = 0;
+ total = *count;
+ while (total) {
+ page = *(rqstp->rq_next_page++);
+ rqstp->rq_vec[v].iov_base = page_address(page) + base;
+ rqstp->rq_vec[v].iov_len = min_t(size_t, total, PAGE_SIZE - base);
+ total -= rqstp->rq_vec[v].iov_len;
+ ++v;
+ base = 0;
+ }
+ WARN_ON_ONCE(v > ARRAY_SIZE(rqstp->rq_vec));
+
trace_nfsd_read_vector(rqstp, fhp, offset, *count);
- iov_iter_kvec(&iter, ITER_DEST, vec, vlen, *count);
+ iov_iter_kvec(&iter, ITER_DEST, rqstp->rq_vec, v, *count);
host_err = vfs_iter_read(file, &iter, &ppos, 0);
return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
}
@@ -1151,14 +1201,24 @@ out_nfserr:
return nfserr;
}
-/*
- * Read data from a file. count must contain the requested read count
- * on entry. On return, *count contains the number of bytes actually read.
+/**
+ * nfsd_read - Read data from a file
+ * @rqstp: RPC transaction context
+ * @fhp: file handle of file to be read
+ * @offset: starting byte offset
+ * @count: IN: requested number of bytes; OUT: number of bytes read
+ * @eof: OUT: set non-zero if operation reached the end of the file
+ *
+ * The caller must verify that there is enough space in @rqstp.rq_res
+ * to perform this operation.
+ *
* N.B. After this call fhp needs an fh_put
+ *
+ * Returns nfs_ok on success, otherwise an nfserr stat value is
+ * returned.
*/
__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
- loff_t offset, struct kvec *vec, int vlen, unsigned long *count,
- u32 *eof)
+ loff_t offset, unsigned long *count, u32 *eof)
{
struct nfsd_file *nf;
struct file *file;
@@ -1173,12 +1233,10 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof);
else
- err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count, eof);
+ err = nfsd_iter_read(rqstp, fhp, file, offset, count, 0, eof);
nfsd_file_put(nf);
-
trace_nfsd_read_done(rqstp, fhp, offset, *count);
-
return err;
}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 43fb57a301d3..a6890ea7b765 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -110,13 +110,12 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
unsigned long *count,
u32 *eof);
-__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
+__be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
- struct kvec *vec, int vlen,
- unsigned long *count,
+ unsigned long *count, unsigned int base,
u32 *eof);
-__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
- loff_t, struct kvec *, int, unsigned long *,
+__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ loff_t offset, unsigned long *count,
u32 *eof);
__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
struct kvec *, int, unsigned long *,
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index e956f886a1a1..5710833ac1cc 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -285,6 +285,14 @@ void nilfs_btnode_abort_change_key(struct address_space *btnc,
if (nbh == NULL) { /* blocksize == pagesize */
xa_erase_irq(&btnc->i_pages, newkey);
unlock_page(ctxt->bh->b_page);
- } else
- brelse(nbh);
+ } else {
+ /*
+ * When canceling a buffer that a prepare operation has
+ * allocated to copy a node block to another location, use
+ * nilfs_btnode_delete() to initialize and release the buffer
+ * so that the buffer flags will not be in an inconsistent
+ * state when it is reallocated.
+ */
+ nilfs_btnode_delete(nbh);
+ }
}
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 1310d2d5feb3..a8ce522ac747 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -917,6 +917,7 @@ void nilfs_evict_inode(struct inode *inode)
struct nilfs_transaction_info ti;
struct super_block *sb = inode->i_sb;
struct nilfs_inode_info *ii = NILFS_I(inode);
+ struct the_nilfs *nilfs;
int ret;
if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) {
@@ -929,6 +930,23 @@ void nilfs_evict_inode(struct inode *inode)
truncate_inode_pages_final(&inode->i_data);
+ nilfs = sb->s_fs_info;
+ if (unlikely(sb_rdonly(sb) || !nilfs->ns_writer)) {
+ /*
+ * If this inode is about to be disposed after the file system
+ * has been degraded to read-only due to file system corruption
+ * or after the writer has been detached, do not make any
+ * changes that cause writes, just clear it.
+ * Do this check after read-locking ns_segctor_sem by
+ * nilfs_transaction_begin() in order to avoid a race with
+ * the writer detach operation.
+ */
+ clear_inode(inode);
+ nilfs_clear_inode(inode);
+ nilfs_transaction_abort(sb);
+ return;
+ }
+
/* TODO: some of the following operations may fail. */
nilfs_truncate_bmap(ii, 0);
nilfs_mark_inode_dirty(inode);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 5cf30827f244..b4e54d079b7d 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -370,7 +370,15 @@ void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent)
struct folio *folio = fbatch.folios[i];
folio_lock(folio);
- nilfs_clear_dirty_page(&folio->page, silent);
+
+ /*
+ * This folio may have been removed from the address
+ * space by truncation or invalidation when the lock
+ * was acquired. Skip processing in that case.
+ */
+ if (likely(folio->mapping == mapping))
+ nilfs_clear_dirty_page(&folio->page, silent);
+
folio_unlock(folio);
}
folio_batch_release(&fbatch);
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 1362ccb64ec7..6e59dc19a732 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -101,6 +101,12 @@ int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *segbuf)
if (unlikely(!bh))
return -ENOMEM;
+ lock_buffer(bh);
+ if (!buffer_uptodate(bh)) {
+ memset(bh->b_data, 0, bh->b_size);
+ set_buffer_uptodate(bh);
+ }
+ unlock_buffer(bh);
nilfs_segbuf_add_segsum_buffer(segbuf, bh);
return 0;
}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index ac949fd7603f..c2553024bd25 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -981,10 +981,13 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
unsigned int isz, srsz;
bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
+
+ lock_buffer(bh_sr);
raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
isz = nilfs->ns_inode_size;
srsz = NILFS_SR_BYTES(isz);
+ raw_sr->sr_sum = 0; /* Ensure initialization within this update */
raw_sr->sr_bytes = cpu_to_le16(srsz);
raw_sr->sr_nongc_ctime
= cpu_to_le64(nilfs_doing_gc() ?
@@ -998,6 +1001,8 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
NILFS_SR_SUFILE_OFFSET(isz), 1);
memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz);
+ set_buffer_uptodate(bh_sr);
+ unlock_buffer(bh_sr);
}
static void nilfs_redirty_inodes(struct list_head *head)
@@ -1780,6 +1785,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
list_for_each_entry(segbuf, logs, sb_list) {
list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
b_assoc_buffers) {
+ clear_buffer_uptodate(bh);
if (bh->b_page != bd_page) {
if (bd_page)
end_page_writeback(bd_page);
@@ -1791,6 +1797,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
b_assoc_buffers) {
clear_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) {
+ clear_buffer_uptodate(bh);
if (bh->b_page != bd_page) {
end_page_writeback(bd_page);
bd_page = bh->b_page;
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index dc359b56fdfa..2c6078a6b8ec 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -779,6 +779,15 @@ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs)
goto out_header;
sui->ncleansegs -= nsegs - newnsegs;
+
+ /*
+ * If the sufile is successfully truncated, immediately adjust
+ * the segment allocation space while locking the semaphore
+ * "mi_sem" so that nilfs_sufile_alloc() never allocates
+ * segments in the truncated space.
+ */
+ sui->allocmax = newnsegs - 1;
+ sui->allocmin = 0;
}
kaddr = kmap_atomic(header_bh->b_page);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 15a5a1099427..0ef8c71bde8e 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -372,10 +372,31 @@ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
goto out;
}
nsbp = (void *)nsbh->b_data + offset;
- memset(nsbp, 0, nilfs->ns_blocksize);
+ lock_buffer(nsbh);
if (sb2i >= 0) {
+ /*
+ * The position of the second superblock only changes by 4KiB,
+ * which is larger than the maximum superblock data size
+ * (= 1KiB), so there is no need to use memmove() to allow
+ * overlap between source and destination.
+ */
memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize);
+
+ /*
+ * Zero fill after copy to avoid overwriting in case of move
+ * within the same block.
+ */
+ memset(nsbh->b_data, 0, offset);
+ memset((void *)nsbp + nilfs->ns_sbsize, 0,
+ nsbh->b_size - offset - nilfs->ns_sbsize);
+ } else {
+ memset(nsbh->b_data, 0, nsbh->b_size);
+ }
+ set_buffer_uptodate(nsbh);
+ unlock_buffer(nsbh);
+
+ if (sb2i >= 0) {
brelse(nilfs->ns_sbh[sb2i]);
nilfs->ns_sbh[sb2i] = nsbh;
nilfs->ns_sbp[sb2i] = nsbp;
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 2894152a6b25..0f0667957c81 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -405,6 +405,18 @@ unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs)
100));
}
+/**
+ * nilfs_max_segment_count - calculate the maximum number of segments
+ * @nilfs: nilfs object
+ */
+static u64 nilfs_max_segment_count(struct the_nilfs *nilfs)
+{
+ u64 max_count = U64_MAX;
+
+ do_div(max_count, nilfs->ns_blocks_per_segment);
+ return min_t(u64, max_count, ULONG_MAX);
+}
+
void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs)
{
nilfs->ns_nsegments = nsegs;
@@ -414,6 +426,8 @@ void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs)
static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
struct nilfs_super_block *sbp)
{
+ u64 nsegments, nblocks;
+
if (le32_to_cpu(sbp->s_rev_level) < NILFS_MIN_SUPP_REV) {
nilfs_err(nilfs->ns_sb,
"unsupported revision (superblock rev.=%d.%d, current rev.=%d.%d). Please check the version of mkfs.nilfs(2).",
@@ -457,7 +471,34 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
return -EINVAL;
}
- nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments));
+ nsegments = le64_to_cpu(sbp->s_nsegments);
+ if (nsegments > nilfs_max_segment_count(nilfs)) {
+ nilfs_err(nilfs->ns_sb,
+ "segment count %llu exceeds upper limit (%llu segments)",
+ (unsigned long long)nsegments,
+ (unsigned long long)nilfs_max_segment_count(nilfs));
+ return -EINVAL;
+ }
+
+ nblocks = sb_bdev_nr_blocks(nilfs->ns_sb);
+ if (nblocks) {
+ u64 min_block_count = nsegments * nilfs->ns_blocks_per_segment;
+ /*
+ * To avoid failing to mount early device images without a
+ * second superblock, exclude that block count from the
+ * "min_block_count" calculation.
+ */
+
+ if (nblocks < min_block_count) {
+ nilfs_err(nilfs->ns_sb,
+ "total number of segment blocks %llu exceeds device size (%llu blocks)",
+ (unsigned long long)min_block_count,
+ (unsigned long long)nblocks);
+ return -EINVAL;
+ }
+ }
+
+ nilfs_set_nsegments(nilfs, nsegments);
nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed);
return 0;
}
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index a3865bc4a0c6..f79408f9127a 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -2491,7 +2491,7 @@ conv_err_out:
* byte offset @ofs inside the attribute with the constant byte @val.
*
* This function is effectively like memset() applied to an ntfs attribute.
- * Note thie function actually only operates on the page cache pages belonging
+ * Note this function actually only operates on the page cache pages belonging
* to the ntfs attribute and it marks them dirty after doing the memset().
* Thus it relies on the vm dirty page write code paths to cause the modified
* pages to be written to the mft record/disk.
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index f9cb180b6f6b..761aaa0195d6 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -161,7 +161,7 @@ static int ntfs_decompress(struct page *dest_pages[], int completed_pages[],
*/
u8 *cb_end = cb_start + cb_size; /* End of cb. */
u8 *cb = cb_start; /* Current position in cb. */
- u8 *cb_sb_start = cb; /* Beginning of the current sb in the cb. */
+ u8 *cb_sb_start; /* Beginning of the current sb in the cb. */
u8 *cb_sb_end; /* End of current sb / beginning of next sb. */
/* Variables for uncompressed data / destination. */
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 48030899dc6e..0155f106ec34 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1955,36 +1955,38 @@ undo_alloc:
"attribute.%s", es);
NVolSetErrors(vol);
}
- a = ctx->attr;
+
if (ntfs_rl_truncate_nolock(vol, &mft_ni->runlist, old_last_vcn)) {
ntfs_error(vol->sb, "Failed to truncate mft data attribute "
"runlist.%s", es);
NVolSetErrors(vol);
}
- if (mp_rebuilt && !IS_ERR(ctx->mrec)) {
- if (ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu(
+ if (ctx) {
+ a = ctx->attr;
+ if (mp_rebuilt && !IS_ERR(ctx->mrec)) {
+ if (ntfs_mapping_pairs_build(vol, (u8 *)a + le16_to_cpu(
a->data.non_resident.mapping_pairs_offset),
old_alen - le16_to_cpu(
- a->data.non_resident.mapping_pairs_offset),
+ a->data.non_resident.mapping_pairs_offset),
rl2, ll, -1, NULL)) {
- ntfs_error(vol->sb, "Failed to restore mapping pairs "
+ ntfs_error(vol->sb, "Failed to restore mapping pairs "
"array.%s", es);
- NVolSetErrors(vol);
- }
- if (ntfs_attr_record_resize(ctx->mrec, a, old_alen)) {
- ntfs_error(vol->sb, "Failed to restore attribute "
+ NVolSetErrors(vol);
+ }
+ if (ntfs_attr_record_resize(ctx->mrec, a, old_alen)) {
+ ntfs_error(vol->sb, "Failed to restore attribute "
"record.%s", es);
+ NVolSetErrors(vol);
+ }
+ flush_dcache_mft_record_page(ctx->ntfs_ino);
+ mark_mft_record_dirty(ctx->ntfs_ino);
+ } else if (IS_ERR(ctx->mrec)) {
+ ntfs_error(vol->sb, "Failed to restore attribute search "
+ "context.%s", es);
NVolSetErrors(vol);
}
- flush_dcache_mft_record_page(ctx->ntfs_ino);
- mark_mft_record_dirty(ctx->ntfs_ino);
- } else if (IS_ERR(ctx->mrec)) {
- ntfs_error(vol->sb, "Failed to restore attribute search "
- "context.%s", es);
- NVolSetErrors(vol);
- }
- if (ctx)
ntfs_attr_put_search_ctx(ctx);
+ }
if (!IS_ERR(mrec))
unmap_mft_record(mft_ni);
up_write(&mft_ni->runlist.lock);
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 2643a08182e1..56a7d5bd33e4 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1620,7 +1620,7 @@ read_partial_attrdef_page:
memcpy((u8*)vol->attrdef + (index++ << PAGE_SHIFT),
page_address(page), size);
ntfs_unmap_page(page);
- };
+ }
if (size == PAGE_SIZE) {
size = i_size & ~PAGE_MASK;
if (size)
@@ -1689,7 +1689,7 @@ read_partial_upcase_page:
memcpy((char*)vol->upcase + (index++ << PAGE_SHIFT),
page_address(page), size);
ntfs_unmap_page(page);
- };
+ }
if (size == PAGE_SIZE) {
size = i_size & ~PAGE_MASK;
if (size)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 42549fc81468..91a194596552 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2100,14 +2100,20 @@ static long ocfs2_fallocate(struct file *file, int mode, loff_t offset,
struct ocfs2_space_resv sr;
int change_size = 1;
int cmd = OCFS2_IOC_RESVSP64;
+ int ret = 0;
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP;
if (!ocfs2_writes_unwritten_extents(osb))
return -EOPNOTSUPP;
- if (mode & FALLOC_FL_KEEP_SIZE)
+ if (mode & FALLOC_FL_KEEP_SIZE) {
change_size = 0;
+ } else {
+ ret = inode_newsize_ok(inode, offset + len);
+ if (ret)
+ return ret;
+ }
if (mode & FALLOC_FL_PUNCH_HOLE)
cmd = OCFS2_IOC_UNRESVSP64;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 0b0e6a132101..988d1c076861 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -952,8 +952,10 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb)
for (type = 0; type < OCFS2_MAXQUOTAS; type++) {
if (!sb_has_quota_loaded(sb, type))
continue;
- oinfo = sb_dqinfo(sb, type)->dqi_priv;
- cancel_delayed_work_sync(&oinfo->dqi_sync_work);
+ if (!sb_has_quota_suspended(sb, type)) {
+ oinfo = sb_dqinfo(sb, type)->dqi_priv;
+ cancel_delayed_work_sync(&oinfo->dqi_sync_work);
+ }
inode = igrab(sb->s_dquot.files[type]);
/* Turn off quotas. This will remove all dquot structures from
* memory and so they will be automatically synced to global
diff --git a/fs/open.c b/fs/open.c
index 4478adcc4f3a..fb07b2840eb4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -700,10 +700,7 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
return do_fchmodat(AT_FDCWD, filename, mode);
}
-/**
- * setattr_vfsuid - check and set ia_fsuid attribute
- * @kuid: new inode owner
- *
+/*
* Check whether @kuid is valid and if so generate and set vfsuid_t in
* ia_vfsuid.
*
@@ -718,10 +715,7 @@ static inline bool setattr_vfsuid(struct iattr *attr, kuid_t kuid)
return true;
}
-/**
- * setattr_vfsgid - check and set ia_fsgid attribute
- * @kgid: new inode owner
- *
+/*
* Check whether @kgid is valid and if so generate and set vfsgid_t in
* ia_vfsgid.
*
@@ -989,7 +983,6 @@ cleanup_file:
* @file: file pointer
* @dentry: pointer to dentry
* @open: open callback
- * @opened: state of open
*
* This can be used to finish opening a file passed to i_op->atomic_open().
*
@@ -1043,7 +1036,6 @@ EXPORT_SYMBOL(file_path);
* vfs_open - open the file at the given path
* @path: path to open
* @file: newly allocated file with f_flag initialized
- * @cred: credentials to use
*/
int vfs_open(const struct path *path, struct file *file)
{
@@ -1116,23 +1108,77 @@ struct file *dentry_create(const struct path *path, int flags, umode_t mode,
}
EXPORT_SYMBOL(dentry_create);
-struct file *open_with_fake_path(const struct path *path, int flags,
+/**
+ * kernel_file_open - open a file for kernel internal use
+ * @path: path of the file to open
+ * @flags: open flags
+ * @inode: the inode
+ * @cred: credentials for open
+ *
+ * Open a file for use by in-kernel consumers. The file is not accounted
+ * against nr_files and must not be installed into the file descriptor
+ * table.
+ *
+ * Return: Opened file on success, an error pointer on failure.
+ */
+struct file *kernel_file_open(const struct path *path, int flags,
struct inode *inode, const struct cred *cred)
{
- struct file *f = alloc_empty_file_noaccount(flags, cred);
- if (!IS_ERR(f)) {
- int error;
+ struct file *f;
+ int error;
- f->f_path = *path;
- error = do_dentry_open(f, inode, NULL);
- if (error) {
- fput(f);
- f = ERR_PTR(error);
- }
+ f = alloc_empty_file_noaccount(flags, cred);
+ if (IS_ERR(f))
+ return f;
+
+ f->f_path = *path;
+ error = do_dentry_open(f, inode, NULL);
+ if (error) {
+ fput(f);
+ f = ERR_PTR(error);
}
return f;
}
-EXPORT_SYMBOL(open_with_fake_path);
+EXPORT_SYMBOL_GPL(kernel_file_open);
+
+/**
+ * backing_file_open - open a backing file for kernel internal use
+ * @path: path of the file to open
+ * @flags: open flags
+ * @path: path of the backing file
+ * @cred: credentials for open
+ *
+ * Open a backing file for a stackable filesystem (e.g., overlayfs).
+ * @path may be on the stackable filesystem and backing inode on the
+ * underlying filesystem. In this case, we want to be able to return
+ * the @real_path of the backing inode. This is done by embedding the
+ * returned file into a container structure that also stores the path of
+ * the backing inode on the underlying filesystem, which can be
+ * retrieved using backing_file_real_path().
+ */
+struct file *backing_file_open(const struct path *path, int flags,
+ const struct path *real_path,
+ const struct cred *cred)
+{
+ struct file *f;
+ int error;
+
+ f = alloc_empty_backing_file(flags, cred);
+ if (IS_ERR(f))
+ return f;
+
+ f->f_path = *path;
+ path_get(real_path);
+ *backing_file_real_path(f) = *real_path;
+ error = do_dentry_open(f, d_inode(real_path->dentry), NULL);
+ if (error) {
+ fput(f);
+ f = ERR_PTR(error);
+ }
+
+ return f;
+}
+EXPORT_SYMBOL_GPL(backing_file_open);
#define WILL_CREATE(flags) (flags & (O_CREAT | __O_TMPFILE))
#define O_PATH_FLAGS (O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC)
@@ -1156,7 +1202,7 @@ inline struct open_how build_open_how(int flags, umode_t mode)
inline int build_open_flags(const struct open_how *how, struct open_flags *op)
{
u64 flags = how->flags;
- u64 strip = FMODE_NONOTIFY | O_CLOEXEC;
+ u64 strip = __FMODE_NONOTIFY | O_CLOEXEC;
int lookup_flags = 0;
int acc_mode = ACC_MODE(flags);
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 86197882ff35..1f93a3ae113e 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -34,8 +34,8 @@ static char ovl_whatisit(struct inode *inode, struct inode *realinode)
return 'm';
}
-/* No atime modification nor notify on underlying */
-#define OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY)
+/* No atime modification on underlying */
+#define OVL_OPEN_FLAGS (O_NOATIME)
static struct file *ovl_open_realfile(const struct file *file,
const struct path *realpath)
@@ -61,8 +61,8 @@ static struct file *ovl_open_realfile(const struct file *file,
if (!inode_owner_or_capable(real_idmap, realinode))
flags &= ~O_NOATIME;
- realfile = open_with_fake_path(&file->f_path, flags, realinode,
- current_cred());
+ realfile = backing_file_open(&file->f_path, flags, realpath,
+ current_cred());
}
revert_creds(old_cred);
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 4d0b278f5630..23686e8a06c4 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -329,8 +329,9 @@ static inline struct file *ovl_do_tmpfile(struct ovl_fs *ofs,
struct dentry *dentry, umode_t mode)
{
struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = dentry };
- struct file *file = vfs_tmpfile_open(ovl_upper_mnt_idmap(ofs), &path, mode,
- O_LARGEFILE | O_WRONLY, current_cred());
+ struct file *file = kernel_tmpfile_open(ovl_upper_mnt_idmap(ofs), &path,
+ mode, O_LARGEFILE | O_WRONLY,
+ current_cred());
int err = PTR_ERR_OR_ZERO(file);
pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err);
diff --git a/fs/pnode.c b/fs/pnode.c
index 3cede8b18c8b..e4d0340393d5 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -216,7 +216,7 @@ static struct mount *next_group(struct mount *m, struct mount *origin)
static struct mount *last_dest, *first_source, *last_source, *dest_master;
static struct hlist_head *list;
-static inline bool peers(struct mount *m1, struct mount *m2)
+static inline bool peers(const struct mount *m1, const struct mount *m2)
{
return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
}
@@ -354,6 +354,46 @@ static inline int do_refcount_check(struct mount *mnt, int count)
return mnt_get_count(mnt) > count;
}
+/**
+ * propagation_would_overmount - check whether propagation from @from
+ * would overmount @to
+ * @from: shared mount
+ * @to: mount to check
+ * @mp: future mountpoint of @to on @from
+ *
+ * If @from propagates mounts to @to, @from and @to must either be peers
+ * or one of the masters in the hierarchy of masters of @to must be a
+ * peer of @from.
+ *
+ * If the root of the @to mount is equal to the future mountpoint @mp of
+ * the @to mount on @from then @to will be overmounted by whatever is
+ * propagated to it.
+ *
+ * Context: This function expects namespace_lock() to be held and that
+ * @mp is stable.
+ * Return: If @from overmounts @to, true is returned, false if not.
+ */
+bool propagation_would_overmount(const struct mount *from,
+ const struct mount *to,
+ const struct mountpoint *mp)
+{
+ if (!IS_MNT_SHARED(from))
+ return false;
+
+ if (IS_MNT_NEW(to))
+ return false;
+
+ if (to->mnt.mnt_root != mp->m_dentry)
+ return false;
+
+ for (const struct mount *m = to; m; m = m->mnt_master) {
+ if (peers(from, m))
+ return true;
+ }
+
+ return false;
+}
+
/*
* check if the mount 'mnt' can be unmounted successfully.
* @mnt: the mount to be checked for unmount
diff --git a/fs/pnode.h b/fs/pnode.h
index 988f1aa9b02a..0b02a6393891 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -53,4 +53,7 @@ struct mount *copy_tree(struct mount *, struct dentry *, int);
bool is_path_reachable(struct mount *, struct dentry *,
const struct path *root);
int count_mounts(struct mnt_namespace *ns, struct mount *mnt);
+bool propagation_would_overmount(const struct mount *from,
+ const struct mount *to,
+ const struct mountpoint *mp);
#endif /* _LINUX_PNODE_H */
diff --git a/fs/readdir.c b/fs/readdir.c
index 9c53edb60c03..b264ce60114d 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -131,7 +131,7 @@ struct old_linux_dirent {
unsigned long d_ino;
unsigned long d_offset;
unsigned short d_namlen;
- char d_name[1];
+ char d_name[];
};
struct readdir_callback {
@@ -208,7 +208,7 @@ struct linux_dirent {
unsigned long d_ino;
unsigned long d_off;
unsigned short d_reclen;
- char d_name[1];
+ char d_name[];
};
struct getdents_callback {
@@ -388,7 +388,7 @@ struct compat_old_linux_dirent {
compat_ulong_t d_ino;
compat_ulong_t d_offset;
unsigned short d_namlen;
- char d_name[1];
+ char d_name[];
};
struct compat_readdir_callback {
@@ -460,7 +460,7 @@ struct compat_linux_dirent {
compat_ulong_t d_ino;
compat_ulong_t d_off;
unsigned short d_reclen;
- char d_name[1];
+ char d_name[];
};
struct compat_getdents_callback {
diff --git a/fs/remap_range.c b/fs/remap_range.c
index 1331a890f2f2..87ae4f0dc3aa 100644
--- a/fs/remap_range.c
+++ b/fs/remap_range.c
@@ -15,6 +15,7 @@
#include <linux/mount.h>
#include <linux/fs.h>
#include <linux/dax.h>
+#include <linux/overflow.h>
#include "internal.h"
#include <linux/uaccess.h>
@@ -101,10 +102,12 @@ static int generic_remap_checks(struct file *file_in, loff_t pos_in,
static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
bool write)
{
+ loff_t tmp;
+
if (unlikely(pos < 0 || len < 0))
return -EINVAL;
- if (unlikely((loff_t) (pos + len) < 0))
+ if (unlikely(check_add_overflow(pos, len, &tmp)))
return -EINVAL;
return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
diff --git a/fs/smb/Kconfig b/fs/smb/Kconfig
new file mode 100644
index 000000000000..ef425789fa6a
--- /dev/null
+++ b/fs/smb/Kconfig
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# smbfs configuration
+
+source "fs/smb/client/Kconfig"
+source "fs/smb/server/Kconfig"
+
+config SMBFS
+ tristate
+ default y if CIFS=y || SMB_SERVER=y
+ default m if CIFS=m || SMB_SERVER=m
diff --git a/fs/smb/Makefile b/fs/smb/Makefile
new file mode 100644
index 000000000000..9a1bf59a1a65
--- /dev/null
+++ b/fs/smb/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_SMBFS) += common/
+obj-$(CONFIG_CIFS) += client/
+obj-$(CONFIG_SMB_SERVER) += server/
diff --git a/fs/cifs/Kconfig b/fs/smb/client/Kconfig
index 4c0d53bf931a..4c0d53bf931a 100644
--- a/fs/cifs/Kconfig
+++ b/fs/smb/client/Kconfig
diff --git a/fs/cifs/Makefile b/fs/smb/client/Makefile
index 304a7f6cc13a..304a7f6cc13a 100644
--- a/fs/cifs/Makefile
+++ b/fs/smb/client/Makefile
diff --git a/fs/cifs/asn1.c b/fs/smb/client/asn1.c
index b5724ef9f182..b5724ef9f182 100644
--- a/fs/cifs/asn1.c
+++ b/fs/smb/client/asn1.c
diff --git a/fs/cifs/cached_dir.c b/fs/smb/client/cached_dir.c
index bfc964b36c72..bfc964b36c72 100644
--- a/fs/cifs/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
diff --git a/fs/cifs/cached_dir.h b/fs/smb/client/cached_dir.h
index 2f4e764c9ca9..2f4e764c9ca9 100644
--- a/fs/cifs/cached_dir.h
+++ b/fs/smb/client/cached_dir.h
diff --git a/fs/cifs/cifs_debug.c b/fs/smb/client/cifs_debug.c
index d4ed200a9471..b279f745466e 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/smb/client/cifs_debug.c
@@ -12,6 +12,7 @@
#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/uaccess.h>
+#include <uapi/linux/ethtool.h>
#include "cifspdu.h"
#include "cifsglob.h"
#include "cifsproto.h"
@@ -108,7 +109,7 @@ static void cifs_debug_tcon(struct seq_file *m, struct cifs_tcon *tcon)
if ((tcon->seal) ||
(tcon->ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA) ||
(tcon->share_flags & SHI1005_FLAGS_ENCRYPT_DATA))
- seq_printf(m, " Encrypted");
+ seq_puts(m, " encrypted");
if (tcon->nocase)
seq_printf(m, " nocase");
if (tcon->unix_ext)
@@ -130,12 +131,14 @@ cifs_dump_channel(struct seq_file *m, int i, struct cifs_chan *chan)
struct TCP_Server_Info *server = chan->server;
seq_printf(m, "\n\n\t\tChannel: %d ConnectionId: 0x%llx"
- "\n\t\tNumber of credits: %d Dialect 0x%x"
+ "\n\t\tNumber of credits: %d,%d,%d Dialect 0x%x"
"\n\t\tTCP status: %d Instance: %d"
"\n\t\tLocal Users To Server: %d SecMode: 0x%x Req On Wire: %d"
"\n\t\tIn Send: %d In MaxReq Wait: %d",
i+1, server->conn_id,
server->credits,
+ server->echo_credits,
+ server->oplock_credits,
server->dialect,
server->tcpStatus,
server->reconnect_instance,
@@ -146,18 +149,62 @@ cifs_dump_channel(struct seq_file *m, int i, struct cifs_chan *chan)
atomic_read(&server->num_waiters));
}
+static inline const char *smb_speed_to_str(size_t bps)
+{
+ size_t mbps = bps / 1000 / 1000;
+
+ switch (mbps) {
+ case SPEED_10:
+ return "10Mbps";
+ case SPEED_100:
+ return "100Mbps";
+ case SPEED_1000:
+ return "1Gbps";
+ case SPEED_2500:
+ return "2.5Gbps";
+ case SPEED_5000:
+ return "5Gbps";
+ case SPEED_10000:
+ return "10Gbps";
+ case SPEED_14000:
+ return "14Gbps";
+ case SPEED_20000:
+ return "20Gbps";
+ case SPEED_25000:
+ return "25Gbps";
+ case SPEED_40000:
+ return "40Gbps";
+ case SPEED_50000:
+ return "50Gbps";
+ case SPEED_56000:
+ return "56Gbps";
+ case SPEED_100000:
+ return "100Gbps";
+ case SPEED_200000:
+ return "200Gbps";
+ case SPEED_400000:
+ return "400Gbps";
+ case SPEED_800000:
+ return "800Gbps";
+ default:
+ return "Unknown";
+ }
+}
+
static void
cifs_dump_iface(struct seq_file *m, struct cifs_server_iface *iface)
{
struct sockaddr_in *ipv4 = (struct sockaddr_in *)&iface->sockaddr;
struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)&iface->sockaddr;
- seq_printf(m, "\tSpeed: %zu bps\n", iface->speed);
+ seq_printf(m, "\tSpeed: %s\n", smb_speed_to_str(iface->speed));
seq_puts(m, "\t\tCapabilities: ");
if (iface->rdma_capable)
seq_puts(m, "rdma ");
if (iface->rss_capable)
seq_puts(m, "rss ");
+ if (!iface->rdma_capable && !iface->rss_capable)
+ seq_puts(m, "None");
seq_putc(m, '\n');
if (iface->sockaddr.ss_family == AF_INET)
seq_printf(m, "\t\tIPv4: %pI4\n", &ipv4->sin_addr);
@@ -350,8 +397,11 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
atomic_read(&server->smbd_conn->mr_used_count));
skip_rdma:
#endif
- seq_printf(m, "\nNumber of credits: %d Dialect 0x%x",
- server->credits, server->dialect);
+ seq_printf(m, "\nNumber of credits: %d,%d,%d Dialect 0x%x",
+ server->credits,
+ server->echo_credits,
+ server->oplock_credits,
+ server->dialect);
if (server->compress_algorithm == SMB3_COMPRESS_LZNT1)
seq_printf(m, " COMPRESS_LZNT1");
else if (server->compress_algorithm == SMB3_COMPRESS_LZ77)
@@ -415,8 +465,12 @@ skip_rdma:
/* dump session id helpful for use with network trace */
seq_printf(m, " SessionId: 0x%llx", ses->Suid);
- if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)
+ if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA) {
seq_puts(m, " encrypted");
+ /* can help in debugging to show encryption type */
+ if (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)
+ seq_puts(m, "(gcm256)");
+ }
if (ses->sign)
seq_puts(m, " signed");
diff --git a/fs/cifs/cifs_debug.h b/fs/smb/client/cifs_debug.h
index ce5cfd236fdb..ce5cfd236fdb 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/smb/client/cifs_debug.h
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/smb/client/cifs_dfs_ref.c
index 0329a907bdfe..0329a907bdfe 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/smb/client/cifs_dfs_ref.c
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/smb/client/cifs_fs_sb.h
index 651759192280..651759192280 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/smb/client/cifs_fs_sb.h
diff --git a/fs/cifs/cifs_ioctl.h b/fs/smb/client/cifs_ioctl.h
index 332588e77c31..332588e77c31 100644
--- a/fs/cifs/cifs_ioctl.h
+++ b/fs/smb/client/cifs_ioctl.h
diff --git a/fs/cifs/cifs_spnego.c b/fs/smb/client/cifs_spnego.c
index 6f3285f1dfee..6f3285f1dfee 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/smb/client/cifs_spnego.c
diff --git a/fs/cifs/cifs_spnego.h b/fs/smb/client/cifs_spnego.h
index e4d751b0c812..e4d751b0c812 100644
--- a/fs/cifs/cifs_spnego.h
+++ b/fs/smb/client/cifs_spnego.h
diff --git a/fs/cifs/cifs_spnego_negtokeninit.asn1 b/fs/smb/client/cifs_spnego_negtokeninit.asn1
index 181c083887d5..181c083887d5 100644
--- a/fs/cifs/cifs_spnego_negtokeninit.asn1
+++ b/fs/smb/client/cifs_spnego_negtokeninit.asn1
diff --git a/fs/cifs/cifs_swn.c b/fs/smb/client/cifs_swn.c
index 7233c6a7e6d7..7233c6a7e6d7 100644
--- a/fs/cifs/cifs_swn.c
+++ b/fs/smb/client/cifs_swn.c
diff --git a/fs/cifs/cifs_swn.h b/fs/smb/client/cifs_swn.h
index 8a9d2a5c9077..8a9d2a5c9077 100644
--- a/fs/cifs/cifs_swn.h
+++ b/fs/smb/client/cifs_swn.h
diff --git a/fs/cifs/cifs_unicode.c b/fs/smb/client/cifs_unicode.c
index e7582dd79179..e7582dd79179 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/smb/client/cifs_unicode.c
diff --git a/fs/cifs/cifs_unicode.h b/fs/smb/client/cifs_unicode.h
index 80b3d845419f..80b3d845419f 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/smb/client/cifs_unicode.h
diff --git a/fs/cifs/cifs_uniupr.h b/fs/smb/client/cifs_uniupr.h
index 7b272fcdf0d3..7b272fcdf0d3 100644
--- a/fs/cifs/cifs_uniupr.h
+++ b/fs/smb/client/cifs_uniupr.h
diff --git a/fs/cifs/cifsacl.c b/fs/smb/client/cifsacl.c
index f5b6df82e857..f5b6df82e857 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/smb/client/cifsacl.c
diff --git a/fs/cifs/cifsacl.h b/fs/smb/client/cifsacl.h
index ccbfc754bd3c..ccbfc754bd3c 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/smb/client/cifsacl.h
diff --git a/fs/cifs/cifsencrypt.c b/fs/smb/client/cifsencrypt.c
index 357bd27a7fd1..ef4c2e3c9fa6 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/smb/client/cifsencrypt.c
@@ -21,7 +21,7 @@
#include <linux/random.h>
#include <linux/highmem.h>
#include <linux/fips.h>
-#include "../smbfs_common/arc4.h"
+#include "../common/arc4.h"
#include <crypto/aead.h>
/*
diff --git a/fs/cifs/cifsfs.c b/fs/smb/client/cifsfs.c
index 4f4492eb975f..4f4492eb975f 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
diff --git a/fs/cifs/cifsfs.h b/fs/smb/client/cifsfs.h
index d7274eefc666..d7274eefc666 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/smb/client/cifsfs.h
diff --git a/fs/cifs/cifsglob.h b/fs/smb/client/cifsglob.h
index 414685c5d530..b212a4e16b39 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -24,7 +24,7 @@
#include "cifsacl.h"
#include <crypto/internal/hash.h>
#include <uapi/linux/cifs/cifs_mount.h>
-#include "../smbfs_common/smb2pdu.h"
+#include "../common/smb2pdu.h"
#include "smb2pdu.h"
#include <linux/filelock.h>
@@ -424,8 +424,8 @@ struct smb_version_operations {
/* check for STATUS_NETWORK_SESSION_EXPIRED */
bool (*is_session_expired)(char *);
/* send oplock break response */
- int (*oplock_response)(struct cifs_tcon *, struct cifs_fid *,
- struct cifsInodeInfo *);
+ int (*oplock_response)(struct cifs_tcon *tcon, __u64 persistent_fid, __u64 volatile_fid,
+ __u16 net_fid, struct cifsInodeInfo *cifs_inode);
/* query remote filesystem */
int (*queryfs)(const unsigned int, struct cifs_tcon *,
struct cifs_sb_info *, struct kstatfs *);
@@ -970,43 +970,6 @@ release_iface(struct kref *ref)
kfree(iface);
}
-/*
- * compare two interfaces a and b
- * return 0 if everything matches.
- * return 1 if a has higher link speed, or rdma capable, or rss capable
- * return -1 otherwise.
- */
-static inline int
-iface_cmp(struct cifs_server_iface *a, struct cifs_server_iface *b)
-{
- int cmp_ret = 0;
-
- WARN_ON(!a || !b);
- if (a->speed == b->speed) {
- if (a->rdma_capable == b->rdma_capable) {
- if (a->rss_capable == b->rss_capable) {
- cmp_ret = memcmp(&a->sockaddr, &b->sockaddr,
- sizeof(a->sockaddr));
- if (!cmp_ret)
- return 0;
- else if (cmp_ret > 0)
- return 1;
- else
- return -1;
- } else if (a->rss_capable > b->rss_capable)
- return 1;
- else
- return -1;
- } else if (a->rdma_capable > b->rdma_capable)
- return 1;
- else
- return -1;
- } else if (a->speed > b->speed)
- return 1;
- else
- return -1;
-}
-
struct cifs_chan {
unsigned int in_reconnect : 1; /* if session setup in progress for this channel */
struct TCP_Server_Info *server;
diff --git a/fs/cifs/cifspdu.h b/fs/smb/client/cifspdu.h
index 445e3eaebcc1..e17222fec9d2 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/smb/client/cifspdu.h
@@ -11,7 +11,7 @@
#include <net/sock.h>
#include <asm/unaligned.h>
-#include "../smbfs_common/smbfsctl.h"
+#include "../common/smbfsctl.h"
#define CIFS_PROT 0
#define POSIX_PROT (CIFS_PROT+1)
diff --git a/fs/cifs/cifsproto.h b/fs/smb/client/cifsproto.h
index c1c704990b98..d127aded2f28 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/smb/client/cifsproto.h
@@ -87,6 +87,7 @@ extern int cifs_handle_standard(struct TCP_Server_Info *server,
struct mid_q_entry *mid);
extern int smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx);
extern int smb3_parse_opt(const char *options, const char *key, char **val);
+extern int cifs_ipaddr_cmp(struct sockaddr *srcaddr, struct sockaddr *rhs);
extern bool cifs_match_ipaddr(struct sockaddr *srcaddr, struct sockaddr *rhs);
extern int cifs_discard_remaining_data(struct TCP_Server_Info *server);
extern int cifs_call_async(struct TCP_Server_Info *server,
diff --git a/fs/cifs/cifsroot.c b/fs/smb/client/cifsroot.c
index 56ec1b233f52..56ec1b233f52 100644
--- a/fs/cifs/cifsroot.c
+++ b/fs/smb/client/cifsroot.c
diff --git a/fs/cifs/cifssmb.c b/fs/smb/client/cifssmb.c
index 9d963caec35c..9d963caec35c 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
diff --git a/fs/cifs/connect.c b/fs/smb/client/connect.c
index 8e9a672320ab..9d16626e7a66 100644
--- a/fs/cifs/connect.c
+++ b/fs/smb/client/connect.c
@@ -1288,6 +1288,56 @@ next_pdu:
module_put_and_kthread_exit(0);
}
+int
+cifs_ipaddr_cmp(struct sockaddr *srcaddr, struct sockaddr *rhs)
+{
+ struct sockaddr_in *saddr4 = (struct sockaddr_in *)srcaddr;
+ struct sockaddr_in *vaddr4 = (struct sockaddr_in *)rhs;
+ struct sockaddr_in6 *saddr6 = (struct sockaddr_in6 *)srcaddr;
+ struct sockaddr_in6 *vaddr6 = (struct sockaddr_in6 *)rhs;
+
+ switch (srcaddr->sa_family) {
+ case AF_UNSPEC:
+ switch (rhs->sa_family) {
+ case AF_UNSPEC:
+ return 0;
+ case AF_INET:
+ case AF_INET6:
+ return 1;
+ default:
+ return -1;
+ }
+ case AF_INET: {
+ switch (rhs->sa_family) {
+ case AF_UNSPEC:
+ return -1;
+ case AF_INET:
+ return memcmp(saddr4, vaddr4,
+ sizeof(struct sockaddr_in));
+ case AF_INET6:
+ return 1;
+ default:
+ return -1;
+ }
+ }
+ case AF_INET6: {
+ switch (rhs->sa_family) {
+ case AF_UNSPEC:
+ case AF_INET:
+ return -1;
+ case AF_INET6:
+ return memcmp(saddr6,
+ vaddr6,
+ sizeof(struct sockaddr_in6));
+ default:
+ return -1;
+ }
+ }
+ default:
+ return -1; /* don't expect to be here */
+ }
+}
+
/*
* Returns true if srcaddr isn't specified and rhs isn't specified, or
* if srcaddr is specified and matches the IP address of the rhs argument
@@ -4086,16 +4136,17 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru
/* only send once per connect */
spin_lock(&tcon->tc_lock);
+ if (tcon->status == TID_GOOD) {
+ spin_unlock(&tcon->tc_lock);
+ return 0;
+ }
+
if (tcon->status != TID_NEW &&
tcon->status != TID_NEED_TCON) {
spin_unlock(&tcon->tc_lock);
return -EHOSTDOWN;
}
- if (tcon->status == TID_GOOD) {
- spin_unlock(&tcon->tc_lock);
- return 0;
- }
tcon->status = TID_IN_TCON;
spin_unlock(&tcon->tc_lock);
diff --git a/fs/cifs/dfs.c b/fs/smb/client/dfs.c
index a93dbca1411b..2390b2fedd6a 100644
--- a/fs/cifs/dfs.c
+++ b/fs/smb/client/dfs.c
@@ -303,7 +303,7 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs)
if (!nodfs) {
rc = dfs_get_referral(mnt_ctx, ctx->UNC + 1, NULL, NULL);
if (rc) {
- if (rc != -ENOENT && rc != -EOPNOTSUPP)
+ if (rc != -ENOENT && rc != -EOPNOTSUPP && rc != -EIO)
goto out;
nodfs = true;
}
@@ -575,16 +575,17 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru
/* only send once per connect */
spin_lock(&tcon->tc_lock);
+ if (tcon->status == TID_GOOD) {
+ spin_unlock(&tcon->tc_lock);
+ return 0;
+ }
+
if (tcon->status != TID_NEW &&
tcon->status != TID_NEED_TCON) {
spin_unlock(&tcon->tc_lock);
return -EHOSTDOWN;
}
- if (tcon->status == TID_GOOD) {
- spin_unlock(&tcon->tc_lock);
- return 0;
- }
tcon->status = TID_IN_TCON;
spin_unlock(&tcon->tc_lock);
diff --git a/fs/cifs/dfs.h b/fs/smb/client/dfs.h
index 1c90df5ecfbd..1c90df5ecfbd 100644
--- a/fs/cifs/dfs.h
+++ b/fs/smb/client/dfs.h
diff --git a/fs/cifs/dfs_cache.c b/fs/smb/client/dfs_cache.c
index 1513b2709889..1513b2709889 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/smb/client/dfs_cache.c
diff --git a/fs/cifs/dfs_cache.h b/fs/smb/client/dfs_cache.h
index c6d89cd6d4fd..c6d89cd6d4fd 100644
--- a/fs/cifs/dfs_cache.h
+++ b/fs/smb/client/dfs_cache.h
diff --git a/fs/cifs/dir.c b/fs/smb/client/dir.c
index 30b1e1bfd204..30b1e1bfd204 100644
--- a/fs/cifs/dir.c
+++ b/fs/smb/client/dir.c
diff --git a/fs/cifs/dns_resolve.c b/fs/smb/client/dns_resolve.c
index 8bf8978bc5d6..8bf8978bc5d6 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/smb/client/dns_resolve.c
diff --git a/fs/cifs/dns_resolve.h b/fs/smb/client/dns_resolve.h
index 6eb0c15a2440..6eb0c15a2440 100644
--- a/fs/cifs/dns_resolve.h
+++ b/fs/smb/client/dns_resolve.h
diff --git a/fs/cifs/export.c b/fs/smb/client/export.c
index 37c28415df1e..37c28415df1e 100644
--- a/fs/cifs/export.c
+++ b/fs/smb/client/export.c
diff --git a/fs/cifs/file.c b/fs/smb/client/file.c
index 375a8037a3f3..f30f6ddc4b81 100644
--- a/fs/cifs/file.c
+++ b/fs/smb/client/file.c
@@ -3353,9 +3353,10 @@ static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_siz
while (n && ix < nbv) {
len = min3(n, bvecs[ix].bv_len - skip, max_size);
span += len;
+ max_size -= len;
nsegs++;
ix++;
- if (span >= max_size || nsegs >= max_segs)
+ if (max_size == 0 || nsegs >= max_segs)
break;
skip = 0;
n -= len;
@@ -4881,9 +4882,9 @@ void cifs_oplock_break(struct work_struct *work)
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
struct TCP_Server_Info *server = tcon->ses->server;
int rc = 0;
- bool purge_cache = false;
- struct cifs_deferred_close *dclose;
- bool is_deferred = false;
+ bool purge_cache = false, oplock_break_cancelled;
+ __u64 persistent_fid, volatile_fid;
+ __u16 net_fid;
wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
TASK_UNINTERRUPTIBLE);
@@ -4924,28 +4925,32 @@ oplock_break_ack:
* file handles but cached, then schedule deferred close immediately.
* So, new open will not use cached handle.
*/
- spin_lock(&CIFS_I(inode)->deferred_lock);
- is_deferred = cifs_is_deferred_close(cfile, &dclose);
- spin_unlock(&CIFS_I(inode)->deferred_lock);
- if (!CIFS_CACHE_HANDLE(cinode) && is_deferred &&
- cfile->deferred_close_scheduled && delayed_work_pending(&cfile->deferred)) {
+ if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
cifs_close_deferred_file(cinode);
- }
+ persistent_fid = cfile->fid.persistent_fid;
+ volatile_fid = cfile->fid.volatile_fid;
+ net_fid = cfile->fid.netfid;
+ oplock_break_cancelled = cfile->oplock_break_cancelled;
+
+ _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
/*
* releasing stale oplock after recent reconnect of smb session using
* a now incorrect file handle is not a data integrity issue but do
* not bother sending an oplock release if session to server still is
* disconnected since oplock already released by the server
*/
- if (!cfile->oplock_break_cancelled) {
- rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
- cinode);
- cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
+ if (!oplock_break_cancelled) {
+ /* check for server null since can race with kill_sb calling tree disconnect */
+ if (tcon->ses && tcon->ses->server) {
+ rc = tcon->ses->server->ops->oplock_response(tcon, persistent_fid,
+ volatile_fid, net_fid, cinode);
+ cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
+ } else
+ pr_warn_once("lease break not sent for unmounted share\n");
}
- _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
cifs_done_oplock_break(cinode);
}
diff --git a/fs/cifs/fs_context.c b/fs/smb/client/fs_context.c
index ace11a1a7c8a..1bda75609b64 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -904,6 +904,14 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
ctx->sfu_remap = false; /* disable SFU mapping */
}
break;
+ case Opt_mapchars:
+ if (result.negated)
+ ctx->sfu_remap = false;
+ else {
+ ctx->sfu_remap = true;
+ ctx->remap = false; /* disable SFM (mapposix) mapping */
+ }
+ break;
case Opt_user_xattr:
if (result.negated)
ctx->no_xattr = 1;
diff --git a/fs/cifs/fs_context.h b/fs/smb/client/fs_context.h
index f4eaf8558902..f4eaf8558902 100644
--- a/fs/cifs/fs_context.h
+++ b/fs/smb/client/fs_context.h
diff --git a/fs/cifs/fscache.c b/fs/smb/client/fscache.c
index 8f6909d633da..8f6909d633da 100644
--- a/fs/cifs/fscache.c
+++ b/fs/smb/client/fscache.c
diff --git a/fs/cifs/fscache.h b/fs/smb/client/fscache.h
index 173999610997..173999610997 100644
--- a/fs/cifs/fscache.h
+++ b/fs/smb/client/fscache.h
diff --git a/fs/cifs/inode.c b/fs/smb/client/inode.c
index 1087ac6104a9..1087ac6104a9 100644
--- a/fs/cifs/inode.c
+++ b/fs/smb/client/inode.c
diff --git a/fs/cifs/ioctl.c b/fs/smb/client/ioctl.c
index cb3be58cd55e..fff092bbc7a3 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/smb/client/ioctl.c
@@ -321,7 +321,11 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
struct tcon_link *tlink;
struct cifs_sb_info *cifs_sb;
__u64 ExtAttrBits = 0;
+#ifdef CONFIG_CIFS_POSIX
+#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
__u64 caps;
+#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
+#endif /* CONFIG_CIFS_POSIX */
xid = get_xid();
@@ -331,9 +335,9 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
if (pSMBFile == NULL)
break;
tcon = tlink_tcon(pSMBFile->tlink);
- caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
#ifdef CONFIG_CIFS_POSIX
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
+ caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
if (CIFS_UNIX_EXTATTR_CAP & caps) {
__u64 ExtAttrMask = 0;
rc = CIFSGetExtAttr(xid, tcon,
diff --git a/fs/cifs/link.c b/fs/smb/client/link.c
index c66be4904e1f..c66be4904e1f 100644
--- a/fs/cifs/link.c
+++ b/fs/smb/client/link.c
diff --git a/fs/cifs/misc.c b/fs/smb/client/misc.c
index cd914be905b2..cd914be905b2 100644
--- a/fs/cifs/misc.c
+++ b/fs/smb/client/misc.c
diff --git a/fs/cifs/netlink.c b/fs/smb/client/netlink.c
index 147d9409252c..147d9409252c 100644
--- a/fs/cifs/netlink.c
+++ b/fs/smb/client/netlink.c
diff --git a/fs/cifs/netlink.h b/fs/smb/client/netlink.h
index e2fa8ed24c54..e2fa8ed24c54 100644
--- a/fs/cifs/netlink.h
+++ b/fs/smb/client/netlink.h
diff --git a/fs/cifs/netmisc.c b/fs/smb/client/netmisc.c
index 1b52e6ac431c..1b52e6ac431c 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/smb/client/netmisc.c
diff --git a/fs/cifs/nterr.c b/fs/smb/client/nterr.c
index 358a766375b4..358a766375b4 100644
--- a/fs/cifs/nterr.c
+++ b/fs/smb/client/nterr.c
diff --git a/fs/cifs/nterr.h b/fs/smb/client/nterr.h
index edd4741cab0a..edd4741cab0a 100644
--- a/fs/cifs/nterr.h
+++ b/fs/smb/client/nterr.h
diff --git a/fs/cifs/ntlmssp.h b/fs/smb/client/ntlmssp.h
index 2c5dde2ece58..2c5dde2ece58 100644
--- a/fs/cifs/ntlmssp.h
+++ b/fs/smb/client/ntlmssp.h
diff --git a/fs/cifs/readdir.c b/fs/smb/client/readdir.c
index ef638086d734..ef638086d734 100644
--- a/fs/cifs/readdir.c
+++ b/fs/smb/client/readdir.c
diff --git a/fs/cifs/rfc1002pdu.h b/fs/smb/client/rfc1002pdu.h
index ae1d025da294..ae1d025da294 100644
--- a/fs/cifs/rfc1002pdu.h
+++ b/fs/smb/client/rfc1002pdu.h
diff --git a/fs/cifs/sess.c b/fs/smb/client/sess.c
index 335c078c42fb..335c078c42fb 100644
--- a/fs/cifs/sess.c
+++ b/fs/smb/client/sess.c
diff --git a/fs/cifs/smb1ops.c b/fs/smb/client/smb1ops.c
index abda6148be10..7d1b3fc014d9 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/smb/client/smb1ops.c
@@ -897,12 +897,11 @@ cifs_close_dir(const unsigned int xid, struct cifs_tcon *tcon,
}
static int
-cifs_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
- struct cifsInodeInfo *cinode)
+cifs_oplock_response(struct cifs_tcon *tcon, __u64 persistent_fid,
+ __u64 volatile_fid, __u16 net_fid, struct cifsInodeInfo *cinode)
{
- return CIFSSMBLock(0, tcon, fid->netfid, current->tgid, 0, 0, 0, 0,
- LOCKING_ANDX_OPLOCK_RELEASE, false,
- CIFS_CACHE_READ(cinode) ? 1 : 0);
+ return CIFSSMBLock(0, tcon, net_fid, current->tgid, 0, 0, 0, 0,
+ LOCKING_ANDX_OPLOCK_RELEASE, false, CIFS_CACHE_READ(cinode) ? 1 : 0);
}
static int
diff --git a/fs/cifs/smb2file.c b/fs/smb/client/smb2file.c
index e0ee96d69d49..e0ee96d69d49 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/smb/client/smb2file.c
diff --git a/fs/cifs/smb2glob.h b/fs/smb/client/smb2glob.h
index 82e916ad167c..82e916ad167c 100644
--- a/fs/cifs/smb2glob.h
+++ b/fs/smb/client/smb2glob.h
diff --git a/fs/cifs/smb2inode.c b/fs/smb/client/smb2inode.c
index 163a03298430..163a03298430 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/smb/client/smb2inode.c
diff --git a/fs/cifs/smb2maperror.c b/fs/smb/client/smb2maperror.c
index 194799ddd382..194799ddd382 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/smb/client/smb2maperror.c
diff --git a/fs/cifs/smb2misc.c b/fs/smb/client/smb2misc.c
index 3935a60db5c3..3935a60db5c3 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/smb/client/smb2misc.c
diff --git a/fs/cifs/smb2ops.c b/fs/smb/client/smb2ops.c
index a295e4c2d54e..a8bb9d00d33a 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -34,6 +34,8 @@ static int
change_conf(struct TCP_Server_Info *server)
{
server->credits += server->echo_credits + server->oplock_credits;
+ if (server->credits > server->max_credits)
+ server->credits = server->max_credits;
server->oplock_credits = server->echo_credits = 0;
switch (server->credits) {
case 0:
@@ -91,6 +93,7 @@ smb2_add_credits(struct TCP_Server_Info *server,
server->conn_id, server->hostname, *val,
add, server->in_flight);
}
+ WARN_ON_ONCE(server->in_flight == 0);
server->in_flight--;
if (server->in_flight == 0 &&
((optype & CIFS_OP_MASK) != CIFS_NEG_OP) &&
@@ -510,6 +513,43 @@ smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
return rsize;
}
+/*
+ * compare two interfaces a and b
+ * return 0 if everything matches.
+ * return 1 if a is rdma capable, or rss capable, or has higher link speed
+ * return -1 otherwise.
+ */
+static int
+iface_cmp(struct cifs_server_iface *a, struct cifs_server_iface *b)
+{
+ int cmp_ret = 0;
+
+ WARN_ON(!a || !b);
+ if (a->rdma_capable == b->rdma_capable) {
+ if (a->rss_capable == b->rss_capable) {
+ if (a->speed == b->speed) {
+ cmp_ret = cifs_ipaddr_cmp((struct sockaddr *) &a->sockaddr,
+ (struct sockaddr *) &b->sockaddr);
+ if (!cmp_ret)
+ return 0;
+ else if (cmp_ret > 0)
+ return 1;
+ else
+ return -1;
+ } else if (a->speed > b->speed)
+ return 1;
+ else
+ return -1;
+ } else if (a->rss_capable > b->rss_capable)
+ return 1;
+ else
+ return -1;
+ } else if (a->rdma_capable > b->rdma_capable)
+ return 1;
+ else
+ return -1;
+}
+
static int
parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
size_t buf_len, struct cifs_ses *ses, bool in_mount)
@@ -618,7 +658,6 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
* Add a new one instead
*/
spin_lock(&ses->iface_lock);
- iface = niface = NULL;
list_for_each_entry_safe(iface, niface, &ses->iface_list,
iface_head) {
ret = iface_cmp(iface, &tmp_iface);
@@ -2383,15 +2422,14 @@ smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server)
}
static int
-smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
- struct cifsInodeInfo *cinode)
+smb2_oplock_response(struct cifs_tcon *tcon, __u64 persistent_fid,
+ __u64 volatile_fid, __u16 net_fid, struct cifsInodeInfo *cinode)
{
if (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING)
return SMB2_lease_break(0, tcon, cinode->lease_key,
smb2_get_lease_state(cinode));
- return SMB2_oplock_break(0, tcon, fid->persistent_fid,
- fid->volatile_fid,
+ return SMB2_oplock_break(0, tcon, persistent_fid, volatile_fid,
CIFS_CACHE_READ(cinode) ? 1 : 0);
}
diff --git a/fs/cifs/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 9ed61b6f9b21..17fe212ab895 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -1305,7 +1305,12 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data)
}
/* enough to enable echos and oplocks and one max size write */
- req->hdr.CreditRequest = cpu_to_le16(130);
+ if (server->credits >= server->max_credits)
+ req->hdr.CreditRequest = cpu_to_le16(0);
+ else
+ req->hdr.CreditRequest = cpu_to_le16(
+ min_t(int, server->max_credits -
+ server->credits, 130));
/* only one of SMB2 signing flags may be set in SMB2 request */
if (server->sign)
@@ -1899,7 +1904,12 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
rqst.rq_nvec = 2;
/* Need 64 for max size write so ask for more in case not there yet */
- req->hdr.CreditRequest = cpu_to_le16(64);
+ if (server->credits >= server->max_credits)
+ req->hdr.CreditRequest = cpu_to_le16(0);
+ else
+ req->hdr.CreditRequest = cpu_to_le16(
+ min_t(int, server->max_credits -
+ server->credits, 64));
rc = cifs_send_recv(xid, ses, server,
&rqst, &resp_buftype, flags, &rsp_iov);
@@ -3725,7 +3735,7 @@ SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon,
if (*out_data == NULL) {
rc = -ENOMEM;
goto cnotify_exit;
- } else
+ } else if (plen)
*plen = le32_to_cpu(smb_rsp->OutputBufferLength);
}
@@ -4227,6 +4237,7 @@ smb2_async_readv(struct cifs_readdata *rdata)
struct TCP_Server_Info *server;
struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink);
unsigned int total_len;
+ int credit_request;
cifs_dbg(FYI, "%s: offset=%llu bytes=%u\n",
__func__, rdata->offset, rdata->bytes);
@@ -4258,7 +4269,13 @@ smb2_async_readv(struct cifs_readdata *rdata)
if (rdata->credits.value > 0) {
shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes,
SMB2_MAX_BUFFER_SIZE));
- shdr->CreditRequest = cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 8);
+ credit_request = le16_to_cpu(shdr->CreditCharge) + 8;
+ if (server->credits >= server->max_credits)
+ shdr->CreditRequest = cpu_to_le16(0);
+ else
+ shdr->CreditRequest = cpu_to_le16(
+ min_t(int, server->max_credits -
+ server->credits, credit_request));
rc = adjust_credits(server, &rdata->credits, rdata->bytes);
if (rc)
@@ -4468,6 +4485,7 @@ smb2_async_writev(struct cifs_writedata *wdata,
unsigned int total_len;
struct cifs_io_parms _io_parms;
struct cifs_io_parms *io_parms = NULL;
+ int credit_request;
if (!wdata->server)
server = wdata->server = cifs_pick_channel(tcon->ses);
@@ -4572,7 +4590,13 @@ smb2_async_writev(struct cifs_writedata *wdata,
if (wdata->credits.value > 0) {
shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes,
SMB2_MAX_BUFFER_SIZE));
- shdr->CreditRequest = cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 8);
+ credit_request = le16_to_cpu(shdr->CreditCharge) + 8;
+ if (server->credits >= server->max_credits)
+ shdr->CreditRequest = cpu_to_le16(0);
+ else
+ shdr->CreditRequest = cpu_to_le16(
+ min_t(int, server->max_credits -
+ server->credits, credit_request));
rc = adjust_credits(server, &wdata->credits, io_parms->length);
if (rc)
diff --git a/fs/cifs/smb2pdu.h b/fs/smb/client/smb2pdu.h
index 220994d0a0f7..220994d0a0f7 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/smb/client/smb2pdu.h
diff --git a/fs/cifs/smb2proto.h b/fs/smb/client/smb2proto.h
index d5d7ffb7711c..d5d7ffb7711c 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/smb/client/smb2proto.h
diff --git a/fs/cifs/smb2status.h b/fs/smb/client/smb2status.h
index a9e958166fc5..a9e958166fc5 100644
--- a/fs/cifs/smb2status.h
+++ b/fs/smb/client/smb2status.h
diff --git a/fs/cifs/smb2transport.c b/fs/smb/client/smb2transport.c
index 790acf65a092..790acf65a092 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/smb/client/smb2transport.c
diff --git a/fs/cifs/smbdirect.c b/fs/smb/client/smbdirect.c
index 0362ebd4fa0f..0362ebd4fa0f 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/smb/client/smbdirect.c
diff --git a/fs/cifs/smbdirect.h b/fs/smb/client/smbdirect.h
index 83f239f376f0..83f239f376f0 100644
--- a/fs/cifs/smbdirect.h
+++ b/fs/smb/client/smbdirect.h
diff --git a/fs/cifs/smbencrypt.c b/fs/smb/client/smbencrypt.c
index 4a0487753869..f0ce26414f17 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/smb/client/smbencrypt.c
@@ -24,7 +24,7 @@
#include "cifsglob.h"
#include "cifs_debug.h"
#include "cifsproto.h"
-#include "../smbfs_common/md4.h"
+#include "../common/md4.h"
#ifndef false
#define false 0
diff --git a/fs/cifs/smberr.h b/fs/smb/client/smberr.h
index aeffdad829e2..aeffdad829e2 100644
--- a/fs/cifs/smberr.h
+++ b/fs/smb/client/smberr.h
diff --git a/fs/cifs/trace.c b/fs/smb/client/trace.c
index 465483787193..465483787193 100644
--- a/fs/cifs/trace.c
+++ b/fs/smb/client/trace.c
diff --git a/fs/cifs/trace.h b/fs/smb/client/trace.h
index d3053bd8ae73..d3053bd8ae73 100644
--- a/fs/cifs/trace.h
+++ b/fs/smb/client/trace.h
diff --git a/fs/cifs/transport.c b/fs/smb/client/transport.c
index 24bdd5f4d3bc..0474d0bba0a2 100644
--- a/fs/cifs/transport.c
+++ b/fs/smb/client/transport.c
@@ -55,7 +55,7 @@ alloc_mid(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
temp->pid = current->pid;
temp->command = cpu_to_le16(smb_buffer->Command);
cifs_dbg(FYI, "For smb_command %d\n", smb_buffer->Command);
- /* do_gettimeofday(&temp->when_sent);*/ /* easier to use jiffies */
+ /* easier to use jiffies */
/* when mid allocated can be before when sent */
temp->when_alloc = jiffies;
temp->server = server;
diff --git a/fs/cifs/unc.c b/fs/smb/client/unc.c
index f6fc5e343ea4..f6fc5e343ea4 100644
--- a/fs/cifs/unc.c
+++ b/fs/smb/client/unc.c
diff --git a/fs/cifs/winucase.c b/fs/smb/client/winucase.c
index 2f075b5b50df..2f075b5b50df 100644
--- a/fs/cifs/winucase.c
+++ b/fs/smb/client/winucase.c
diff --git a/fs/cifs/xattr.c b/fs/smb/client/xattr.c
index 4ad5531686d8..4ad5531686d8 100644
--- a/fs/cifs/xattr.c
+++ b/fs/smb/client/xattr.c
diff --git a/fs/smbfs_common/Makefile b/fs/smb/common/Makefile
index cafc61a3bfc3..c66dbbc1469c 100644
--- a/fs/smbfs_common/Makefile
+++ b/fs/smb/common/Makefile
@@ -3,5 +3,5 @@
# Makefile for Linux filesystem routines that are shared by client and server.
#
-obj-$(CONFIG_SMBFS_COMMON) += cifs_arc4.o
-obj-$(CONFIG_SMBFS_COMMON) += cifs_md4.o
+obj-$(CONFIG_SMBFS) += cifs_arc4.o
+obj-$(CONFIG_SMBFS) += cifs_md4.o
diff --git a/fs/smbfs_common/arc4.h b/fs/smb/common/arc4.h
index 12e71ec033a1..12e71ec033a1 100644
--- a/fs/smbfs_common/arc4.h
+++ b/fs/smb/common/arc4.h
diff --git a/fs/smbfs_common/cifs_arc4.c b/fs/smb/common/cifs_arc4.c
index 043e4cb839fa..043e4cb839fa 100644
--- a/fs/smbfs_common/cifs_arc4.c
+++ b/fs/smb/common/cifs_arc4.c
diff --git a/fs/smbfs_common/cifs_md4.c b/fs/smb/common/cifs_md4.c
index 50f78cfc6ce9..50f78cfc6ce9 100644
--- a/fs/smbfs_common/cifs_md4.c
+++ b/fs/smb/common/cifs_md4.c
diff --git a/fs/smbfs_common/md4.h b/fs/smb/common/md4.h
index 5337becc699a..5337becc699a 100644
--- a/fs/smbfs_common/md4.h
+++ b/fs/smb/common/md4.h
diff --git a/fs/smbfs_common/smb2pdu.h b/fs/smb/common/smb2pdu.h
index bae590eec871..bae590eec871 100644
--- a/fs/smbfs_common/smb2pdu.h
+++ b/fs/smb/common/smb2pdu.h
diff --git a/fs/smbfs_common/smbfsctl.h b/fs/smb/common/smbfsctl.h
index edd7fc2a7921..edd7fc2a7921 100644
--- a/fs/smbfs_common/smbfsctl.h
+++ b/fs/smb/common/smbfsctl.h
diff --git a/fs/ksmbd/Kconfig b/fs/smb/server/Kconfig
index 7055cb5d2880..7055cb5d2880 100644
--- a/fs/ksmbd/Kconfig
+++ b/fs/smb/server/Kconfig
diff --git a/fs/ksmbd/Makefile b/fs/smb/server/Makefile
index 7d6337a7dee4..7d6337a7dee4 100644
--- a/fs/ksmbd/Makefile
+++ b/fs/smb/server/Makefile
diff --git a/fs/ksmbd/asn1.c b/fs/smb/server/asn1.c
index cc6384f79675..cc6384f79675 100644
--- a/fs/ksmbd/asn1.c
+++ b/fs/smb/server/asn1.c
diff --git a/fs/ksmbd/asn1.h b/fs/smb/server/asn1.h
index ce105f4ce305..ce105f4ce305 100644
--- a/fs/ksmbd/asn1.h
+++ b/fs/smb/server/asn1.h
diff --git a/fs/ksmbd/auth.c b/fs/smb/server/auth.c
index df8fb076f6f1..5e5e120edcc2 100644
--- a/fs/ksmbd/auth.c
+++ b/fs/smb/server/auth.c
@@ -29,7 +29,7 @@
#include "mgmt/user_config.h"
#include "crypto_ctx.h"
#include "transport_ipc.h"
-#include "../smbfs_common/arc4.h"
+#include "../common/arc4.h"
/*
* Fixed format data defining GSS header and fixed string
diff --git a/fs/ksmbd/auth.h b/fs/smb/server/auth.h
index 362b6159a6cf..362b6159a6cf 100644
--- a/fs/ksmbd/auth.h
+++ b/fs/smb/server/auth.h
diff --git a/fs/ksmbd/connection.c b/fs/smb/server/connection.c
index 4ed379f9b1aa..2a717d158f02 100644
--- a/fs/ksmbd/connection.c
+++ b/fs/smb/server/connection.c
@@ -294,6 +294,9 @@ bool ksmbd_conn_alive(struct ksmbd_conn *conn)
return true;
}
+#define SMB1_MIN_SUPPORTED_HEADER_SIZE (sizeof(struct smb_hdr))
+#define SMB2_MIN_SUPPORTED_HEADER_SIZE (sizeof(struct smb2_hdr) + 4)
+
/**
* ksmbd_conn_handler_loop() - session thread to listen on new smb requests
* @p: connection instance
@@ -350,15 +353,17 @@ int ksmbd_conn_handler_loop(void *p)
if (pdu_size > MAX_STREAM_PROT_LEN)
break;
+ if (pdu_size < SMB1_MIN_SUPPORTED_HEADER_SIZE)
+ break;
+
/* 4 for rfc1002 length field */
- size = pdu_size + 4;
+ /* 1 for implied bcc[0] */
+ size = pdu_size + 4 + 1;
conn->request_buf = kvmalloc(size, GFP_KERNEL);
if (!conn->request_buf)
break;
memcpy(conn->request_buf, hdr_buf, sizeof(hdr_buf));
- if (!ksmbd_smb_request(conn))
- break;
/*
* We already read 4 bytes to find out PDU size, now
@@ -376,6 +381,15 @@ int ksmbd_conn_handler_loop(void *p)
continue;
}
+ if (!ksmbd_smb_request(conn))
+ break;
+
+ if (((struct smb2_hdr *)smb2_get_msg(conn->request_buf))->ProtocolId ==
+ SMB2_PROTO_NUMBER) {
+ if (pdu_size < SMB2_MIN_SUPPORTED_HEADER_SIZE)
+ break;
+ }
+
if (!default_conn_ops.process_fn) {
pr_err("No connection request callback\n");
break;
diff --git a/fs/ksmbd/connection.h b/fs/smb/server/connection.h
index ad8dfaa48ffb..ad8dfaa48ffb 100644
--- a/fs/ksmbd/connection.h
+++ b/fs/smb/server/connection.h
diff --git a/fs/ksmbd/crypto_ctx.c b/fs/smb/server/crypto_ctx.c
index 81488d04199d..81488d04199d 100644
--- a/fs/ksmbd/crypto_ctx.c
+++ b/fs/smb/server/crypto_ctx.c
diff --git a/fs/ksmbd/crypto_ctx.h b/fs/smb/server/crypto_ctx.h
index 4a367c62f653..4a367c62f653 100644
--- a/fs/ksmbd/crypto_ctx.h
+++ b/fs/smb/server/crypto_ctx.h
diff --git a/fs/ksmbd/glob.h b/fs/smb/server/glob.h
index 5b8f3e0ebdb3..5b8f3e0ebdb3 100644
--- a/fs/ksmbd/glob.h
+++ b/fs/smb/server/glob.h
diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h
index fb8b2d566efb..fb8b2d566efb 100644
--- a/fs/ksmbd/ksmbd_netlink.h
+++ b/fs/smb/server/ksmbd_netlink.h
diff --git a/fs/ksmbd/ksmbd_spnego_negtokeninit.asn1 b/fs/smb/server/ksmbd_spnego_negtokeninit.asn1
index 0065f191b54b..0065f191b54b 100644
--- a/fs/ksmbd/ksmbd_spnego_negtokeninit.asn1
+++ b/fs/smb/server/ksmbd_spnego_negtokeninit.asn1
diff --git a/fs/ksmbd/ksmbd_spnego_negtokentarg.asn1 b/fs/smb/server/ksmbd_spnego_negtokentarg.asn1
index 1151933e7b9c..1151933e7b9c 100644
--- a/fs/ksmbd/ksmbd_spnego_negtokentarg.asn1
+++ b/fs/smb/server/ksmbd_spnego_negtokentarg.asn1
diff --git a/fs/ksmbd/ksmbd_work.c b/fs/smb/server/ksmbd_work.c
index 14b9caebf7a4..14b9caebf7a4 100644
--- a/fs/ksmbd/ksmbd_work.c
+++ b/fs/smb/server/ksmbd_work.c
diff --git a/fs/ksmbd/ksmbd_work.h b/fs/smb/server/ksmbd_work.h
index f8ae6144c0ae..f8ae6144c0ae 100644
--- a/fs/ksmbd/ksmbd_work.h
+++ b/fs/smb/server/ksmbd_work.h
diff --git a/fs/ksmbd/mgmt/ksmbd_ida.c b/fs/smb/server/mgmt/ksmbd_ida.c
index 54194d959a5e..54194d959a5e 100644
--- a/fs/ksmbd/mgmt/ksmbd_ida.c
+++ b/fs/smb/server/mgmt/ksmbd_ida.c
diff --git a/fs/ksmbd/mgmt/ksmbd_ida.h b/fs/smb/server/mgmt/ksmbd_ida.h
index 2bc07b16cfde..2bc07b16cfde 100644
--- a/fs/ksmbd/mgmt/ksmbd_ida.h
+++ b/fs/smb/server/mgmt/ksmbd_ida.h
diff --git a/fs/ksmbd/mgmt/share_config.c b/fs/smb/server/mgmt/share_config.c
index 328a412259dc..328a412259dc 100644
--- a/fs/ksmbd/mgmt/share_config.c
+++ b/fs/smb/server/mgmt/share_config.c
diff --git a/fs/ksmbd/mgmt/share_config.h b/fs/smb/server/mgmt/share_config.h
index 3fd338293942..3fd338293942 100644
--- a/fs/ksmbd/mgmt/share_config.h
+++ b/fs/smb/server/mgmt/share_config.h
diff --git a/fs/ksmbd/mgmt/tree_connect.c b/fs/smb/server/mgmt/tree_connect.c
index f07a05f37651..f07a05f37651 100644
--- a/fs/ksmbd/mgmt/tree_connect.c
+++ b/fs/smb/server/mgmt/tree_connect.c
diff --git a/fs/ksmbd/mgmt/tree_connect.h b/fs/smb/server/mgmt/tree_connect.h
index 700df36cf3e3..700df36cf3e3 100644
--- a/fs/ksmbd/mgmt/tree_connect.h
+++ b/fs/smb/server/mgmt/tree_connect.h
diff --git a/fs/ksmbd/mgmt/user_config.c b/fs/smb/server/mgmt/user_config.c
index 279d00feff21..279d00feff21 100644
--- a/fs/ksmbd/mgmt/user_config.c
+++ b/fs/smb/server/mgmt/user_config.c
diff --git a/fs/ksmbd/mgmt/user_config.h b/fs/smb/server/mgmt/user_config.h
index 6a44109617f1..6a44109617f1 100644
--- a/fs/ksmbd/mgmt/user_config.h
+++ b/fs/smb/server/mgmt/user_config.h
diff --git a/fs/ksmbd/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c
index 8a5dcab05614..8a5dcab05614 100644
--- a/fs/ksmbd/mgmt/user_session.c
+++ b/fs/smb/server/mgmt/user_session.c
diff --git a/fs/ksmbd/mgmt/user_session.h b/fs/smb/server/mgmt/user_session.h
index f99d475b28db..f99d475b28db 100644
--- a/fs/ksmbd/mgmt/user_session.h
+++ b/fs/smb/server/mgmt/user_session.h
diff --git a/fs/ksmbd/misc.c b/fs/smb/server/misc.c
index 9e8afaa686e3..9e8afaa686e3 100644
--- a/fs/ksmbd/misc.c
+++ b/fs/smb/server/misc.c
diff --git a/fs/ksmbd/misc.h b/fs/smb/server/misc.h
index 1facfcd21200..1facfcd21200 100644
--- a/fs/ksmbd/misc.h
+++ b/fs/smb/server/misc.h
diff --git a/fs/ksmbd/ndr.c b/fs/smb/server/ndr.c
index 3507d8f89074..3507d8f89074 100644
--- a/fs/ksmbd/ndr.c
+++ b/fs/smb/server/ndr.c
diff --git a/fs/ksmbd/ndr.h b/fs/smb/server/ndr.h
index f3c108c8cf4d..f3c108c8cf4d 100644
--- a/fs/ksmbd/ndr.h
+++ b/fs/smb/server/ndr.h
diff --git a/fs/ksmbd/nterr.h b/fs/smb/server/nterr.h
index 2f358f88a018..2f358f88a018 100644
--- a/fs/ksmbd/nterr.h
+++ b/fs/smb/server/nterr.h
diff --git a/fs/ksmbd/ntlmssp.h b/fs/smb/server/ntlmssp.h
index f13153c18b4e..f13153c18b4e 100644
--- a/fs/ksmbd/ntlmssp.h
+++ b/fs/smb/server/ntlmssp.h
diff --git a/fs/ksmbd/oplock.c b/fs/smb/server/oplock.c
index 2e54ded4d92c..844b303baf29 100644
--- a/fs/ksmbd/oplock.c
+++ b/fs/smb/server/oplock.c
@@ -157,13 +157,42 @@ static struct oplock_info *opinfo_get_list(struct ksmbd_inode *ci)
rcu_read_lock();
opinfo = list_first_or_null_rcu(&ci->m_op_list, struct oplock_info,
op_entry);
- if (opinfo && !atomic_inc_not_zero(&opinfo->refcount))
- opinfo = NULL;
+ if (opinfo) {
+ if (!atomic_inc_not_zero(&opinfo->refcount))
+ opinfo = NULL;
+ else {
+ atomic_inc(&opinfo->conn->r_count);
+ if (ksmbd_conn_releasing(opinfo->conn)) {
+ atomic_dec(&opinfo->conn->r_count);
+ atomic_dec(&opinfo->refcount);
+ opinfo = NULL;
+ }
+ }
+ }
+
rcu_read_unlock();
return opinfo;
}
+static void opinfo_conn_put(struct oplock_info *opinfo)
+{
+ struct ksmbd_conn *conn;
+
+ if (!opinfo)
+ return;
+
+ conn = opinfo->conn;
+ /*
+ * Checking waitqueue to dropping pending requests on
+ * disconnection. waitqueue_active is safe because it
+ * uses atomic operation for condition.
+ */
+ if (!atomic_dec_return(&conn->r_count) && waitqueue_active(&conn->r_count_q))
+ wake_up(&conn->r_count_q);
+ opinfo_put(opinfo);
+}
+
void opinfo_put(struct oplock_info *opinfo)
{
if (!atomic_dec_and_test(&opinfo->refcount))
@@ -666,13 +695,6 @@ static void __smb2_oplock_break_noti(struct work_struct *wk)
out:
ksmbd_free_work_struct(work);
- /*
- * Checking waitqueue to dropping pending requests on
- * disconnection. waitqueue_active is safe because it
- * uses atomic operation for condition.
- */
- if (!atomic_dec_return(&conn->r_count) && waitqueue_active(&conn->r_count_q))
- wake_up(&conn->r_count_q);
}
/**
@@ -706,7 +728,6 @@ static int smb2_oplock_break_noti(struct oplock_info *opinfo)
work->conn = conn;
work->sess = opinfo->sess;
- atomic_inc(&conn->r_count);
if (opinfo->op_state == OPLOCK_ACK_WAIT) {
INIT_WORK(&work->work, __smb2_oplock_break_noti);
ksmbd_queue_work(work);
@@ -776,13 +797,6 @@ static void __smb2_lease_break_noti(struct work_struct *wk)
out:
ksmbd_free_work_struct(work);
- /*
- * Checking waitqueue to dropping pending requests on
- * disconnection. waitqueue_active is safe because it
- * uses atomic operation for condition.
- */
- if (!atomic_dec_return(&conn->r_count) && waitqueue_active(&conn->r_count_q))
- wake_up(&conn->r_count_q);
}
/**
@@ -822,7 +836,6 @@ static int smb2_lease_break_noti(struct oplock_info *opinfo)
work->conn = conn;
work->sess = opinfo->sess;
- atomic_inc(&conn->r_count);
if (opinfo->op_state == OPLOCK_ACK_WAIT) {
list_for_each_safe(tmp, t, &opinfo->interim_list) {
struct ksmbd_work *in_work;
@@ -1144,8 +1157,10 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid,
}
prev_opinfo = opinfo_get_list(ci);
if (!prev_opinfo ||
- (prev_opinfo->level == SMB2_OPLOCK_LEVEL_NONE && lctx))
+ (prev_opinfo->level == SMB2_OPLOCK_LEVEL_NONE && lctx)) {
+ opinfo_conn_put(prev_opinfo);
goto set_lev;
+ }
prev_op_has_lease = prev_opinfo->is_lease;
if (prev_op_has_lease)
prev_op_state = prev_opinfo->o_lease->state;
@@ -1153,19 +1168,19 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid,
if (share_ret < 0 &&
prev_opinfo->level == SMB2_OPLOCK_LEVEL_EXCLUSIVE) {
err = share_ret;
- opinfo_put(prev_opinfo);
+ opinfo_conn_put(prev_opinfo);
goto err_out;
}
if (prev_opinfo->level != SMB2_OPLOCK_LEVEL_BATCH &&
prev_opinfo->level != SMB2_OPLOCK_LEVEL_EXCLUSIVE) {
- opinfo_put(prev_opinfo);
+ opinfo_conn_put(prev_opinfo);
goto op_break_not_needed;
}
list_add(&work->interim_entry, &prev_opinfo->interim_list);
err = oplock_break(prev_opinfo, SMB2_OPLOCK_LEVEL_II);
- opinfo_put(prev_opinfo);
+ opinfo_conn_put(prev_opinfo);
if (err == -ENOENT)
goto set_lev;
/* Check all oplock was freed by close */
@@ -1228,14 +1243,14 @@ static void smb_break_all_write_oplock(struct ksmbd_work *work,
return;
if (brk_opinfo->level != SMB2_OPLOCK_LEVEL_BATCH &&
brk_opinfo->level != SMB2_OPLOCK_LEVEL_EXCLUSIVE) {
- opinfo_put(brk_opinfo);
+ opinfo_conn_put(brk_opinfo);
return;
}
brk_opinfo->open_trunc = is_trunc;
list_add(&work->interim_entry, &brk_opinfo->interim_list);
oplock_break(brk_opinfo, SMB2_OPLOCK_LEVEL_II);
- opinfo_put(brk_opinfo);
+ opinfo_conn_put(brk_opinfo);
}
/**
@@ -1263,6 +1278,13 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp,
list_for_each_entry_rcu(brk_op, &ci->m_op_list, op_entry) {
if (!atomic_inc_not_zero(&brk_op->refcount))
continue;
+
+ atomic_inc(&brk_op->conn->r_count);
+ if (ksmbd_conn_releasing(brk_op->conn)) {
+ atomic_dec(&brk_op->conn->r_count);
+ continue;
+ }
+
rcu_read_unlock();
if (brk_op->is_lease && (brk_op->o_lease->state &
(~(SMB2_LEASE_READ_CACHING_LE |
@@ -1292,7 +1314,7 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp,
brk_op->open_trunc = is_trunc;
oplock_break(brk_op, SMB2_OPLOCK_LEVEL_NONE);
next:
- opinfo_put(brk_op);
+ opinfo_conn_put(brk_op);
rcu_read_lock();
}
rcu_read_unlock();
@@ -1393,67 +1415,50 @@ void create_lease_buf(u8 *rbuf, struct lease *lease)
*/
struct lease_ctx_info *parse_lease_state(void *open_req)
{
- char *data_offset;
struct create_context *cc;
- unsigned int next = 0;
- char *name;
- bool found = false;
struct smb2_create_req *req = (struct smb2_create_req *)open_req;
- struct lease_ctx_info *lreq = kzalloc(sizeof(struct lease_ctx_info),
- GFP_KERNEL);
+ struct lease_ctx_info *lreq;
+
+ cc = smb2_find_context_vals(req, SMB2_CREATE_REQUEST_LEASE, 4);
+ if (IS_ERR_OR_NULL(cc))
+ return NULL;
+
+ lreq = kzalloc(sizeof(struct lease_ctx_info), GFP_KERNEL);
if (!lreq)
return NULL;
- data_offset = (char *)req + le32_to_cpu(req->CreateContextsOffset);
- cc = (struct create_context *)data_offset;
- do {
- cc = (struct create_context *)((char *)cc + next);
- name = le16_to_cpu(cc->NameOffset) + (char *)cc;
- if (le16_to_cpu(cc->NameLength) != 4 ||
- strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4)) {
- next = le32_to_cpu(cc->Next);
- continue;
- }
- found = true;
- break;
- } while (next != 0);
+ if (sizeof(struct lease_context_v2) == le32_to_cpu(cc->DataLength)) {
+ struct create_lease_v2 *lc = (struct create_lease_v2 *)cc;
- if (found) {
- if (sizeof(struct lease_context_v2) == le32_to_cpu(cc->DataLength)) {
- struct create_lease_v2 *lc = (struct create_lease_v2 *)cc;
-
- memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE);
- lreq->req_state = lc->lcontext.LeaseState;
- lreq->flags = lc->lcontext.LeaseFlags;
- lreq->duration = lc->lcontext.LeaseDuration;
- memcpy(lreq->parent_lease_key, lc->lcontext.ParentLeaseKey,
- SMB2_LEASE_KEY_SIZE);
- lreq->version = 2;
- } else {
- struct create_lease *lc = (struct create_lease *)cc;
+ memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE);
+ lreq->req_state = lc->lcontext.LeaseState;
+ lreq->flags = lc->lcontext.LeaseFlags;
+ lreq->duration = lc->lcontext.LeaseDuration;
+ memcpy(lreq->parent_lease_key, lc->lcontext.ParentLeaseKey,
+ SMB2_LEASE_KEY_SIZE);
+ lreq->version = 2;
+ } else {
+ struct create_lease *lc = (struct create_lease *)cc;
- memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE);
- lreq->req_state = lc->lcontext.LeaseState;
- lreq->flags = lc->lcontext.LeaseFlags;
- lreq->duration = lc->lcontext.LeaseDuration;
- lreq->version = 1;
- }
- return lreq;
+ memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE);
+ lreq->req_state = lc->lcontext.LeaseState;
+ lreq->flags = lc->lcontext.LeaseFlags;
+ lreq->duration = lc->lcontext.LeaseDuration;
+ lreq->version = 1;
}
-
- kfree(lreq);
- return NULL;
+ return lreq;
}
/**
* smb2_find_context_vals() - find a particular context info in open request
* @open_req: buffer containing smb2 file open(create) request
* @tag: context name to search for
+ * @tag_len: the length of tag
*
* Return: pointer to requested context, NULL if @str context not found
* or error pointer if name length is invalid.
*/
-struct create_context *smb2_find_context_vals(void *open_req, const char *tag)
+struct create_context *smb2_find_context_vals(void *open_req, const char *tag, int tag_len)
{
struct create_context *cc;
unsigned int next = 0;
@@ -1492,7 +1497,7 @@ struct create_context *smb2_find_context_vals(void *open_req, const char *tag)
return ERR_PTR(-EINVAL);
name = (char *)cc + name_off;
- if (memcmp(name, tag, name_len) == 0)
+ if (name_len == tag_len && !memcmp(name, tag, name_len))
return cc;
remain_len -= next;
diff --git a/fs/ksmbd/oplock.h b/fs/smb/server/oplock.h
index 09753448f779..4b0fe6da7694 100644
--- a/fs/ksmbd/oplock.h
+++ b/fs/smb/server/oplock.h
@@ -118,7 +118,7 @@ void create_durable_v2_rsp_buf(char *cc, struct ksmbd_file *fp);
void create_mxac_rsp_buf(char *cc, int maximal_access);
void create_disk_id_rsp_buf(char *cc, __u64 file_id, __u64 vol_id);
void create_posix_rsp_buf(char *cc, struct ksmbd_file *fp);
-struct create_context *smb2_find_context_vals(void *open_req, const char *str);
+struct create_context *smb2_find_context_vals(void *open_req, const char *tag, int tag_len);
struct oplock_info *lookup_lease_in_table(struct ksmbd_conn *conn,
char *lease_key);
int find_same_lease_key(struct ksmbd_session *sess, struct ksmbd_inode *ci,
diff --git a/fs/ksmbd/server.c b/fs/smb/server/server.c
index f9b2e0f19b03..ced7a9e916f0 100644
--- a/fs/ksmbd/server.c
+++ b/fs/smb/server/server.c
@@ -185,24 +185,31 @@ static void __handle_ksmbd_work(struct ksmbd_work *work,
goto send;
}
- if (conn->ops->check_user_session) {
- rc = conn->ops->check_user_session(work);
- if (rc < 0) {
- command = conn->ops->get_cmd_val(work);
- conn->ops->set_rsp_status(work,
- STATUS_USER_SESSION_DELETED);
- goto send;
- } else if (rc > 0) {
- rc = conn->ops->get_ksmbd_tcon(work);
+ do {
+ if (conn->ops->check_user_session) {
+ rc = conn->ops->check_user_session(work);
if (rc < 0) {
- conn->ops->set_rsp_status(work,
- STATUS_NETWORK_NAME_DELETED);
+ if (rc == -EINVAL)
+ conn->ops->set_rsp_status(work,
+ STATUS_INVALID_PARAMETER);
+ else
+ conn->ops->set_rsp_status(work,
+ STATUS_USER_SESSION_DELETED);
goto send;
+ } else if (rc > 0) {
+ rc = conn->ops->get_ksmbd_tcon(work);
+ if (rc < 0) {
+ if (rc == -EINVAL)
+ conn->ops->set_rsp_status(work,
+ STATUS_INVALID_PARAMETER);
+ else
+ conn->ops->set_rsp_status(work,
+ STATUS_NETWORK_NAME_DELETED);
+ goto send;
+ }
}
}
- }
- do {
rc = __process_request(work, conn, &command);
if (rc == SERVER_HANDLER_ABORT)
break;
diff --git a/fs/ksmbd/server.h b/fs/smb/server/server.h
index db7278181760..db7278181760 100644
--- a/fs/ksmbd/server.h
+++ b/fs/smb/server/server.h
diff --git a/fs/ksmbd/smb2misc.c b/fs/smb/server/smb2misc.c
index fbdde426dd01..33b7e6c4ceff 100644
--- a/fs/ksmbd/smb2misc.c
+++ b/fs/smb/server/smb2misc.c
@@ -351,9 +351,16 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work)
int command;
__u32 clc_len; /* calculated length */
__u32 len = get_rfc1002_len(work->request_buf);
+ __u32 req_struct_size, next_cmd = le32_to_cpu(hdr->NextCommand);
- if (le32_to_cpu(hdr->NextCommand) > 0)
- len = le32_to_cpu(hdr->NextCommand);
+ if ((u64)work->next_smb2_rcv_hdr_off + next_cmd > len) {
+ pr_err("next command(%u) offset exceeds smb msg size\n",
+ next_cmd);
+ return 1;
+ }
+
+ if (next_cmd > 0)
+ len = next_cmd;
else if (work->next_smb2_rcv_hdr_off)
len -= work->next_smb2_rcv_hdr_off;
@@ -373,17 +380,9 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work)
}
if (smb2_req_struct_sizes[command] != pdu->StructureSize2) {
- if (command != SMB2_OPLOCK_BREAK_HE &&
- (hdr->Status == 0 || pdu->StructureSize2 != SMB2_ERROR_STRUCTURE_SIZE2_LE)) {
- /* error packets have 9 byte structure size */
- ksmbd_debug(SMB,
- "Illegal request size %u for command %d\n",
- le16_to_cpu(pdu->StructureSize2), command);
- return 1;
- } else if (command == SMB2_OPLOCK_BREAK_HE &&
- hdr->Status == 0 &&
- le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_20 &&
- le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_21) {
+ if (command == SMB2_OPLOCK_BREAK_HE &&
+ le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_20 &&
+ le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_21) {
/* special case for SMB2.1 lease break message */
ksmbd_debug(SMB,
"Illegal request size %d for oplock break\n",
@@ -392,6 +391,14 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work)
}
}
+ req_struct_size = le16_to_cpu(pdu->StructureSize2) +
+ __SMB2_HEADER_STRUCTURE_SIZE;
+ if (command == SMB2_LOCK_HE)
+ req_struct_size -= sizeof(struct smb2_lock_element);
+
+ if (req_struct_size > len + 1)
+ return 1;
+
if (smb2_calc_size(hdr, &clc_len))
return 1;
@@ -416,8 +423,11 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work)
/*
* Allow a message that padded to 8byte boundary.
+ * Linux 4.19.217 with smb 3.0.2 are sometimes
+ * sending messages where the cls_len is exactly
+ * 8 bytes less than len.
*/
- if (clc_len < len && (len - clc_len) < 8)
+ if (clc_len < len && (len - clc_len) <= 8)
goto validate_credit;
pr_err_ratelimited(
diff --git a/fs/ksmbd/smb2ops.c b/fs/smb/server/smb2ops.c
index aed7704a0672..aed7704a0672 100644
--- a/fs/ksmbd/smb2ops.c
+++ b/fs/smb/server/smb2ops.c
diff --git a/fs/ksmbd/smb2pdu.c b/fs/smb/server/smb2pdu.c
index cb93fd231f4e..da1787c68ba0 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -91,7 +91,6 @@ int smb2_get_ksmbd_tcon(struct ksmbd_work *work)
unsigned int cmd = le16_to_cpu(req_hdr->Command);
int tree_id;
- work->tcon = NULL;
if (cmd == SMB2_TREE_CONNECT_HE ||
cmd == SMB2_CANCEL_HE ||
cmd == SMB2_LOGOFF_HE) {
@@ -105,10 +104,28 @@ int smb2_get_ksmbd_tcon(struct ksmbd_work *work)
}
tree_id = le32_to_cpu(req_hdr->Id.SyncId.TreeId);
+
+ /*
+ * If request is not the first in Compound request,
+ * Just validate tree id in header with work->tcon->id.
+ */
+ if (work->next_smb2_rcv_hdr_off) {
+ if (!work->tcon) {
+ pr_err("The first operation in the compound does not have tcon\n");
+ return -EINVAL;
+ }
+ if (work->tcon->id != tree_id) {
+ pr_err("tree id(%u) is different with id(%u) in first operation\n",
+ tree_id, work->tcon->id);
+ return -EINVAL;
+ }
+ return 1;
+ }
+
work->tcon = ksmbd_tree_conn_lookup(work->sess, tree_id);
if (!work->tcon) {
pr_err("Invalid tid %d\n", tree_id);
- return -EINVAL;
+ return -ENOENT;
}
return 1;
@@ -326,13 +343,9 @@ int smb2_set_rsp_credits(struct ksmbd_work *work)
if (hdr->Command == SMB2_NEGOTIATE)
aux_max = 1;
else
- aux_max = conn->vals->max_credits - credit_charge;
+ aux_max = conn->vals->max_credits - conn->total_credits;
credits_granted = min_t(unsigned short, credits_requested, aux_max);
- if (conn->vals->max_credits - conn->total_credits < credits_granted)
- credits_granted = conn->vals->max_credits -
- conn->total_credits;
-
conn->total_credits += credits_granted;
work->credits_granted += credits_granted;
@@ -551,7 +564,6 @@ int smb2_check_user_session(struct ksmbd_work *work)
unsigned int cmd = conn->ops->get_cmd_val(work);
unsigned long long sess_id;
- work->sess = NULL;
/*
* SMB2_ECHO, SMB2_NEGOTIATE, SMB2_SESSION_SETUP command do not
* require a session id, so no need to validate user session's for
@@ -562,15 +574,33 @@ int smb2_check_user_session(struct ksmbd_work *work)
return 0;
if (!ksmbd_conn_good(conn))
- return -EINVAL;
+ return -EIO;
sess_id = le64_to_cpu(req_hdr->SessionId);
+
+ /*
+ * If request is not the first in Compound request,
+ * Just validate session id in header with work->sess->id.
+ */
+ if (work->next_smb2_rcv_hdr_off) {
+ if (!work->sess) {
+ pr_err("The first operation in the compound does not have sess\n");
+ return -EINVAL;
+ }
+ if (work->sess->id != sess_id) {
+ pr_err("session id(%llu) is different with the first operation(%lld)\n",
+ sess_id, work->sess->id);
+ return -EINVAL;
+ }
+ return 1;
+ }
+
/* Check for validity of user session */
work->sess = ksmbd_session_lookup_all(conn, sess_id);
if (work->sess)
return 1;
ksmbd_debug(SMB, "Invalid user session, Uid %llu\n", sess_id);
- return -EINVAL;
+ return -ENOENT;
}
static void destroy_previous_session(struct ksmbd_conn *conn,
@@ -849,13 +879,14 @@ static void assemble_neg_contexts(struct ksmbd_conn *conn,
static __le32 decode_preauth_ctxt(struct ksmbd_conn *conn,
struct smb2_preauth_neg_context *pneg_ctxt,
- int len_of_ctxts)
+ int ctxt_len)
{
/*
* sizeof(smb2_preauth_neg_context) assumes SMB311_SALT_SIZE Salt,
* which may not be present. Only check for used HashAlgorithms[1].
*/
- if (len_of_ctxts < MIN_PREAUTH_CTXT_DATA_LEN)
+ if (ctxt_len <
+ sizeof(struct smb2_neg_context) + MIN_PREAUTH_CTXT_DATA_LEN)
return STATUS_INVALID_PARAMETER;
if (pneg_ctxt->HashAlgorithms != SMB2_PREAUTH_INTEGRITY_SHA512)
@@ -867,15 +898,23 @@ static __le32 decode_preauth_ctxt(struct ksmbd_conn *conn,
static void decode_encrypt_ctxt(struct ksmbd_conn *conn,
struct smb2_encryption_neg_context *pneg_ctxt,
- int len_of_ctxts)
+ int ctxt_len)
{
- int cph_cnt = le16_to_cpu(pneg_ctxt->CipherCount);
- int i, cphs_size = cph_cnt * sizeof(__le16);
+ int cph_cnt;
+ int i, cphs_size;
+
+ if (sizeof(struct smb2_encryption_neg_context) > ctxt_len) {
+ pr_err("Invalid SMB2_ENCRYPTION_CAPABILITIES context size\n");
+ return;
+ }
conn->cipher_type = 0;
+ cph_cnt = le16_to_cpu(pneg_ctxt->CipherCount);
+ cphs_size = cph_cnt * sizeof(__le16);
+
if (sizeof(struct smb2_encryption_neg_context) + cphs_size >
- len_of_ctxts) {
+ ctxt_len) {
pr_err("Invalid cipher count(%d)\n", cph_cnt);
return;
}
@@ -923,15 +962,22 @@ static void decode_compress_ctxt(struct ksmbd_conn *conn,
static void decode_sign_cap_ctxt(struct ksmbd_conn *conn,
struct smb2_signing_capabilities *pneg_ctxt,
- int len_of_ctxts)
+ int ctxt_len)
{
- int sign_algo_cnt = le16_to_cpu(pneg_ctxt->SigningAlgorithmCount);
- int i, sign_alos_size = sign_algo_cnt * sizeof(__le16);
+ int sign_algo_cnt;
+ int i, sign_alos_size;
+
+ if (sizeof(struct smb2_signing_capabilities) > ctxt_len) {
+ pr_err("Invalid SMB2_SIGNING_CAPABILITIES context length\n");
+ return;
+ }
conn->signing_negotiated = false;
+ sign_algo_cnt = le16_to_cpu(pneg_ctxt->SigningAlgorithmCount);
+ sign_alos_size = sign_algo_cnt * sizeof(__le16);
if (sizeof(struct smb2_signing_capabilities) + sign_alos_size >
- len_of_ctxts) {
+ ctxt_len) {
pr_err("Invalid signing algorithm count(%d)\n", sign_algo_cnt);
return;
}
@@ -951,13 +997,13 @@ static void decode_sign_cap_ctxt(struct ksmbd_conn *conn,
static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn,
struct smb2_negotiate_req *req,
- int len_of_smb)
+ unsigned int len_of_smb)
{
/* +4 is to account for the RFC1001 len field */
struct smb2_neg_context *pctx = (struct smb2_neg_context *)req;
int i = 0, len_of_ctxts;
- int offset = le32_to_cpu(req->NegotiateContextOffset);
- int neg_ctxt_cnt = le16_to_cpu(req->NegotiateContextCount);
+ unsigned int offset = le32_to_cpu(req->NegotiateContextOffset);
+ unsigned int neg_ctxt_cnt = le16_to_cpu(req->NegotiateContextCount);
__le32 status = STATUS_INVALID_PARAMETER;
ksmbd_debug(SMB, "decoding %d negotiate contexts\n", neg_ctxt_cnt);
@@ -969,18 +1015,16 @@ static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn,
len_of_ctxts = len_of_smb - offset;
while (i++ < neg_ctxt_cnt) {
- int clen;
-
- /* check that offset is not beyond end of SMB */
- if (len_of_ctxts == 0)
- break;
+ int clen, ctxt_len;
- if (len_of_ctxts < sizeof(struct smb2_neg_context))
+ if (len_of_ctxts < (int)sizeof(struct smb2_neg_context))
break;
pctx = (struct smb2_neg_context *)((char *)pctx + offset);
clen = le16_to_cpu(pctx->DataLength);
- if (clen + sizeof(struct smb2_neg_context) > len_of_ctxts)
+ ctxt_len = clen + sizeof(struct smb2_neg_context);
+
+ if (ctxt_len > len_of_ctxts)
break;
if (pctx->ContextType == SMB2_PREAUTH_INTEGRITY_CAPABILITIES) {
@@ -991,7 +1035,7 @@ static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn,
status = decode_preauth_ctxt(conn,
(struct smb2_preauth_neg_context *)pctx,
- len_of_ctxts);
+ ctxt_len);
if (status != STATUS_SUCCESS)
break;
} else if (pctx->ContextType == SMB2_ENCRYPTION_CAPABILITIES) {
@@ -1002,7 +1046,7 @@ static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn,
decode_encrypt_ctxt(conn,
(struct smb2_encryption_neg_context *)pctx,
- len_of_ctxts);
+ ctxt_len);
} else if (pctx->ContextType == SMB2_COMPRESSION_CAPABILITIES) {
ksmbd_debug(SMB,
"deassemble SMB2_COMPRESSION_CAPABILITIES context\n");
@@ -1021,15 +1065,15 @@ static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn,
} else if (pctx->ContextType == SMB2_SIGNING_CAPABILITIES) {
ksmbd_debug(SMB,
"deassemble SMB2_SIGNING_CAPABILITIES context\n");
+
decode_sign_cap_ctxt(conn,
(struct smb2_signing_capabilities *)pctx,
- len_of_ctxts);
+ ctxt_len);
}
/* offsets must be 8 byte aligned */
- clen = (clen + 7) & ~0x7;
- offset = clen + sizeof(struct smb2_neg_context);
- len_of_ctxts -= clen + sizeof(struct smb2_neg_context);
+ offset = (ctxt_len + 7) & ~0x7;
+ len_of_ctxts -= offset;
}
return status;
}
@@ -1057,16 +1101,16 @@ int smb2_handle_negotiate(struct ksmbd_work *work)
return rc;
}
- if (req->DialectCount == 0) {
- pr_err("malformed packet\n");
+ smb2_buf_len = get_rfc1002_len(work->request_buf);
+ smb2_neg_size = offsetof(struct smb2_negotiate_req, Dialects);
+ if (smb2_neg_size > smb2_buf_len) {
rsp->hdr.Status = STATUS_INVALID_PARAMETER;
rc = -EINVAL;
goto err_out;
}
- smb2_buf_len = get_rfc1002_len(work->request_buf);
- smb2_neg_size = offsetof(struct smb2_negotiate_req, Dialects);
- if (smb2_neg_size > smb2_buf_len) {
+ if (req->DialectCount == 0) {
+ pr_err("malformed packet\n");
rsp->hdr.Status = STATUS_INVALID_PARAMETER;
rc = -EINVAL;
goto err_out;
@@ -1356,7 +1400,7 @@ static struct ksmbd_user *session_user(struct ksmbd_conn *conn,
struct authenticate_message *authblob;
struct ksmbd_user *user;
char *name;
- unsigned int auth_msg_len, name_off, name_len, secbuf_len;
+ unsigned int name_off, name_len, secbuf_len;
secbuf_len = le16_to_cpu(req->SecurityBufferLength);
if (secbuf_len < sizeof(struct authenticate_message)) {
@@ -1366,9 +1410,8 @@ static struct ksmbd_user *session_user(struct ksmbd_conn *conn,
authblob = user_authblob(conn, req);
name_off = le32_to_cpu(authblob->UserName.BufferOffset);
name_len = le16_to_cpu(authblob->UserName.Length);
- auth_msg_len = le16_to_cpu(req->SecurityBufferOffset) + secbuf_len;
- if (auth_msg_len < (u64)name_off + name_len)
+ if (secbuf_len < (u64)name_off + name_len)
return NULL;
name = smb_strndup_from_utf16((const char *)authblob + name_off,
@@ -2240,7 +2283,7 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len,
/* delete the EA only when it exits */
if (rc > 0) {
rc = ksmbd_vfs_remove_xattr(idmap,
- path->dentry,
+ path,
attr_name);
if (rc < 0) {
@@ -2254,8 +2297,7 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len,
/* if the EA doesn't exist, just do nothing. */
rc = 0;
} else {
- rc = ksmbd_vfs_setxattr(idmap,
- path->dentry, attr_name, value,
+ rc = ksmbd_vfs_setxattr(idmap, path, attr_name, value,
le16_to_cpu(eabuf->EaValueLength), 0);
if (rc < 0) {
ksmbd_debug(SMB,
@@ -2312,8 +2354,7 @@ static noinline int smb2_set_stream_name_xattr(const struct path *path,
return -EBADF;
}
- rc = ksmbd_vfs_setxattr(idmap, path->dentry,
- xattr_stream_name, NULL, 0, 0);
+ rc = ksmbd_vfs_setxattr(idmap, path, xattr_stream_name, NULL, 0, 0);
if (rc < 0)
pr_err("Failed to store XATTR stream name :%d\n", rc);
return 0;
@@ -2341,7 +2382,7 @@ static int smb2_remove_smb_xattrs(const struct path *path)
if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
!strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX,
STREAM_PREFIX_LEN)) {
- err = ksmbd_vfs_remove_xattr(idmap, path->dentry,
+ err = ksmbd_vfs_remove_xattr(idmap, path,
name);
if (err)
ksmbd_debug(SMB, "remove xattr failed : %s\n",
@@ -2388,8 +2429,7 @@ static void smb2_new_xattrs(struct ksmbd_tree_connect *tcon, const struct path *
da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME |
XATTR_DOSINFO_ITIME;
- rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt),
- path->dentry, &da);
+ rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt), path, &da);
if (rc)
ksmbd_debug(SMB, "failed to store file attribute into xattr\n");
}
@@ -2464,7 +2504,7 @@ static int smb2_create_sd_buffer(struct ksmbd_work *work,
return -ENOENT;
/* Parse SD BUFFER create contexts */
- context = smb2_find_context_vals(req, SMB2_CREATE_SD_BUFFER);
+ context = smb2_find_context_vals(req, SMB2_CREATE_SD_BUFFER, 4);
if (!context)
return -ENOENT;
else if (IS_ERR(context))
@@ -2666,7 +2706,7 @@ int smb2_open(struct ksmbd_work *work)
if (req->CreateContextsOffset) {
/* Parse non-durable handle create contexts */
- context = smb2_find_context_vals(req, SMB2_CREATE_EA_BUFFER);
+ context = smb2_find_context_vals(req, SMB2_CREATE_EA_BUFFER, 4);
if (IS_ERR(context)) {
rc = PTR_ERR(context);
goto err_out1;
@@ -2686,7 +2726,7 @@ int smb2_open(struct ksmbd_work *work)
}
context = smb2_find_context_vals(req,
- SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST);
+ SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST, 4);
if (IS_ERR(context)) {
rc = PTR_ERR(context);
goto err_out1;
@@ -2697,7 +2737,7 @@ int smb2_open(struct ksmbd_work *work)
}
context = smb2_find_context_vals(req,
- SMB2_CREATE_TIMEWARP_REQUEST);
+ SMB2_CREATE_TIMEWARP_REQUEST, 4);
if (IS_ERR(context)) {
rc = PTR_ERR(context);
goto err_out1;
@@ -2709,7 +2749,7 @@ int smb2_open(struct ksmbd_work *work)
if (tcon->posix_extensions) {
context = smb2_find_context_vals(req,
- SMB2_CREATE_TAG_POSIX);
+ SMB2_CREATE_TAG_POSIX, 16);
if (IS_ERR(context)) {
rc = PTR_ERR(context);
goto err_out1;
@@ -2963,7 +3003,7 @@ int smb2_open(struct ksmbd_work *work)
struct inode *inode = d_inode(path.dentry);
posix_acl_rc = ksmbd_vfs_inherit_posix_acl(idmap,
- path.dentry,
+ &path,
d_inode(path.dentry->d_parent));
if (posix_acl_rc)
ksmbd_debug(SMB, "inherit posix acl failed : %d\n", posix_acl_rc);
@@ -2979,7 +3019,7 @@ int smb2_open(struct ksmbd_work *work)
if (rc) {
if (posix_acl_rc)
ksmbd_vfs_set_init_posix_acl(idmap,
- path.dentry);
+ &path);
if (test_share_config_flag(work->tcon->share_conf,
KSMBD_SHARE_FLAG_ACL_XATTR)) {
@@ -3019,7 +3059,7 @@ int smb2_open(struct ksmbd_work *work)
rc = ksmbd_vfs_set_sd_xattr(conn,
idmap,
- path.dentry,
+ &path,
pntsd,
pntsd_size);
kfree(pntsd);
@@ -3107,7 +3147,7 @@ int smb2_open(struct ksmbd_work *work)
struct create_alloc_size_req *az_req;
az_req = (struct create_alloc_size_req *)smb2_find_context_vals(req,
- SMB2_CREATE_ALLOCATION_SIZE);
+ SMB2_CREATE_ALLOCATION_SIZE, 4);
if (IS_ERR(az_req)) {
rc = PTR_ERR(az_req);
goto err_out;
@@ -3134,7 +3174,7 @@ int smb2_open(struct ksmbd_work *work)
err);
}
- context = smb2_find_context_vals(req, SMB2_CREATE_QUERY_ON_DISK_ID);
+ context = smb2_find_context_vals(req, SMB2_CREATE_QUERY_ON_DISK_ID, 4);
if (IS_ERR(context)) {
rc = PTR_ERR(context);
goto err_out;
@@ -4359,21 +4399,6 @@ static int get_file_basic_info(struct smb2_query_info_rsp *rsp,
return 0;
}
-static unsigned long long get_allocation_size(struct inode *inode,
- struct kstat *stat)
-{
- unsigned long long alloc_size = 0;
-
- if (!S_ISDIR(stat->mode)) {
- if ((inode->i_blocks << 9) <= stat->size)
- alloc_size = stat->size;
- else
- alloc_size = inode->i_blocks << 9;
- }
-
- return alloc_size;
-}
-
static void get_file_standard_info(struct smb2_query_info_rsp *rsp,
struct ksmbd_file *fp, void *rsp_org)
{
@@ -4388,7 +4413,7 @@ static void get_file_standard_info(struct smb2_query_info_rsp *rsp,
sinfo = (struct smb2_file_standard_info *)rsp->Buffer;
delete_pending = ksmbd_inode_pending_delete(fp);
- sinfo->AllocationSize = cpu_to_le64(get_allocation_size(inode, &stat));
+ sinfo->AllocationSize = cpu_to_le64(inode->i_blocks << 9);
sinfo->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
sinfo->NumberOfLinks = cpu_to_le32(get_nlink(&stat) - delete_pending);
sinfo->DeletePending = delete_pending;
@@ -4453,7 +4478,7 @@ static int get_file_all_info(struct ksmbd_work *work,
file_info->Attributes = fp->f_ci->m_fattr;
file_info->Pad1 = 0;
file_info->AllocationSize =
- cpu_to_le64(get_allocation_size(inode, &stat));
+ cpu_to_le64(inode->i_blocks << 9);
file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
file_info->NumberOfLinks =
cpu_to_le32(get_nlink(&stat) - delete_pending);
@@ -4642,7 +4667,7 @@ static int get_file_network_open_info(struct smb2_query_info_rsp *rsp,
file_info->ChangeTime = cpu_to_le64(time);
file_info->Attributes = fp->f_ci->m_fattr;
file_info->AllocationSize =
- cpu_to_le64(get_allocation_size(inode, &stat));
+ cpu_to_le64(inode->i_blocks << 9);
file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size);
file_info->Reserved = cpu_to_le32(0);
rsp->OutputBufferLength =
@@ -5470,7 +5495,7 @@ static int smb2_rename(struct ksmbd_work *work,
goto out;
rc = ksmbd_vfs_setxattr(file_mnt_idmap(fp->filp),
- fp->filp->f_path.dentry,
+ &fp->filp->f_path,
xattr_stream_name,
NULL, 0, 0);
if (rc < 0) {
@@ -5507,7 +5532,7 @@ static int smb2_create_link(struct ksmbd_work *work,
{
char *link_name = NULL, *target_name = NULL, *pathname = NULL;
struct path path;
- bool file_present = true;
+ bool file_present = false;
int rc;
if (buf_len < (u64)sizeof(struct smb2_file_link_info) +
@@ -5540,8 +5565,8 @@ static int smb2_create_link(struct ksmbd_work *work,
if (rc) {
if (rc != -ENOENT)
goto out;
- file_present = false;
- }
+ } else
+ file_present = true;
if (file_info->ReplaceIfExists) {
if (file_present) {
@@ -5635,8 +5660,7 @@ static int set_file_basic_info(struct ksmbd_file *fp,
da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME |
XATTR_DOSINFO_ITIME;
- rc = ksmbd_vfs_set_dos_attrib_xattr(idmap,
- filp->f_path.dentry, &da);
+ rc = ksmbd_vfs_set_dos_attrib_xattr(idmap, &filp->f_path, &da);
if (rc)
ksmbd_debug(SMB,
"failed to restore file attribute in EA\n");
@@ -7491,7 +7515,7 @@ static inline int fsctl_set_sparse(struct ksmbd_work *work, u64 id,
da.attr = le32_to_cpu(fp->f_ci->m_fattr);
ret = ksmbd_vfs_set_dos_attrib_xattr(idmap,
- fp->filp->f_path.dentry, &da);
+ &fp->filp->f_path, &da);
if (ret)
fp->f_ci->m_fattr = old_fattr;
}
diff --git a/fs/ksmbd/smb2pdu.h b/fs/smb/server/smb2pdu.h
index 2767c08a534a..2767c08a534a 100644
--- a/fs/ksmbd/smb2pdu.h
+++ b/fs/smb/server/smb2pdu.h
diff --git a/fs/ksmbd/smb_common.c b/fs/smb/server/smb_common.c
index af0c2a9b8529..569e5eecdf3d 100644
--- a/fs/ksmbd/smb_common.c
+++ b/fs/smb/server/smb_common.c
@@ -158,7 +158,19 @@ int ksmbd_verify_smb_message(struct ksmbd_work *work)
*/
bool ksmbd_smb_request(struct ksmbd_conn *conn)
{
- return conn->request_buf[0] == 0;
+ __le32 *proto = (__le32 *)smb2_get_msg(conn->request_buf);
+
+ if (*proto == SMB2_COMPRESSION_TRANSFORM_ID) {
+ pr_err_ratelimited("smb2 compression not support yet");
+ return false;
+ }
+
+ if (*proto != SMB1_PROTO_NUMBER &&
+ *proto != SMB2_PROTO_NUMBER &&
+ *proto != SMB2_TRANSFORM_PROTO_NUM)
+ return false;
+
+ return true;
}
static bool supported_protocol(int idx)
diff --git a/fs/ksmbd/smb_common.h b/fs/smb/server/smb_common.h
index 9130d2e3cd78..6b0d5f1fe85c 100644
--- a/fs/ksmbd/smb_common.h
+++ b/fs/smb/server/smb_common.h
@@ -10,7 +10,7 @@
#include "glob.h"
#include "nterr.h"
-#include "../smbfs_common/smb2pdu.h"
+#include "../common/smb2pdu.h"
#include "smb2pdu.h"
/* ksmbd's Specific ERRNO */
diff --git a/fs/ksmbd/smbacl.c b/fs/smb/server/smbacl.c
index 6d6cfb6957a9..ad919a4239d0 100644
--- a/fs/ksmbd/smbacl.c
+++ b/fs/smb/server/smbacl.c
@@ -1162,8 +1162,7 @@ pass:
pntsd_size += sizeof(struct smb_acl) + nt_size;
}
- ksmbd_vfs_set_sd_xattr(conn, idmap,
- path->dentry, pntsd, pntsd_size);
+ ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, pntsd_size);
kfree(pntsd);
}
@@ -1290,7 +1289,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path,
if (IS_ENABLED(CONFIG_FS_POSIX_ACL)) {
posix_acls = get_inode_acl(d_inode(path->dentry), ACL_TYPE_ACCESS);
- if (posix_acls && !found) {
+ if (!IS_ERR_OR_NULL(posix_acls) && !found) {
unsigned int id = -1;
pa_entry = posix_acls->a_entries;
@@ -1314,7 +1313,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path,
}
}
}
- if (posix_acls)
+ if (!IS_ERR_OR_NULL(posix_acls))
posix_acl_release(posix_acls);
}
@@ -1383,7 +1382,7 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
newattrs.ia_valid |= ATTR_MODE;
newattrs.ia_mode = (inode->i_mode & ~0777) | (fattr.cf_mode & 0777);
- ksmbd_vfs_remove_acl_xattrs(idmap, path->dentry);
+ ksmbd_vfs_remove_acl_xattrs(idmap, path);
/* Update posix acls */
if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && fattr.cf_dacls) {
rc = set_posix_acl(idmap, path->dentry,
@@ -1414,9 +1413,8 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
if (test_share_config_flag(tcon->share_conf, KSMBD_SHARE_FLAG_ACL_XATTR)) {
/* Update WinACL in xattr */
- ksmbd_vfs_remove_sd_xattrs(idmap, path->dentry);
- ksmbd_vfs_set_sd_xattr(conn, idmap,
- path->dentry, pntsd, ntsd_len);
+ ksmbd_vfs_remove_sd_xattrs(idmap, path);
+ ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, ntsd_len);
}
out:
diff --git a/fs/ksmbd/smbacl.h b/fs/smb/server/smbacl.h
index 49a8c292bd2e..49a8c292bd2e 100644
--- a/fs/ksmbd/smbacl.h
+++ b/fs/smb/server/smbacl.h
diff --git a/fs/ksmbd/smbfsctl.h b/fs/smb/server/smbfsctl.h
index b98418aae20c..ecdf8f6e0df4 100644
--- a/fs/ksmbd/smbfsctl.h
+++ b/fs/smb/server/smbfsctl.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
/*
- * fs/cifs/smbfsctl.h: SMB, CIFS, SMB2 FSCTL definitions
+ * fs/smb/server/smbfsctl.h: SMB, CIFS, SMB2 FSCTL definitions
*
* Copyright (c) International Business Machines Corp., 2002,2009
* Author(s): Steve French (sfrench@us.ibm.com)
diff --git a/fs/ksmbd/smbstatus.h b/fs/smb/server/smbstatus.h
index 108a8b6ed24a..8963deb42404 100644
--- a/fs/ksmbd/smbstatus.h
+++ b/fs/smb/server/smbstatus.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1+ */
/*
- * fs/cifs/smb2status.h
+ * fs/server/smb2status.h
*
* SMB2 Status code (network error) definitions
* Definitions are from MS-ERREF
diff --git a/fs/ksmbd/transport_ipc.c b/fs/smb/server/transport_ipc.c
index 40c721f9227e..40c721f9227e 100644
--- a/fs/ksmbd/transport_ipc.c
+++ b/fs/smb/server/transport_ipc.c
diff --git a/fs/ksmbd/transport_ipc.h b/fs/smb/server/transport_ipc.h
index 5e5b90a0c187..5e5b90a0c187 100644
--- a/fs/ksmbd/transport_ipc.h
+++ b/fs/smb/server/transport_ipc.h
diff --git a/fs/ksmbd/transport_rdma.c b/fs/smb/server/transport_rdma.c
index c06efc020bd9..c06efc020bd9 100644
--- a/fs/ksmbd/transport_rdma.c
+++ b/fs/smb/server/transport_rdma.c
diff --git a/fs/ksmbd/transport_rdma.h b/fs/smb/server/transport_rdma.h
index 77aee4e5c9dc..77aee4e5c9dc 100644
--- a/fs/ksmbd/transport_rdma.h
+++ b/fs/smb/server/transport_rdma.h
diff --git a/fs/ksmbd/transport_tcp.c b/fs/smb/server/transport_tcp.c
index eff7a1d793f0..eff7a1d793f0 100644
--- a/fs/ksmbd/transport_tcp.c
+++ b/fs/smb/server/transport_tcp.c
diff --git a/fs/ksmbd/transport_tcp.h b/fs/smb/server/transport_tcp.h
index e338bebe322f..e338bebe322f 100644
--- a/fs/ksmbd/transport_tcp.h
+++ b/fs/smb/server/transport_tcp.h
diff --git a/fs/ksmbd/unicode.c b/fs/smb/server/unicode.c
index 9ae676906ed3..9ae676906ed3 100644
--- a/fs/ksmbd/unicode.c
+++ b/fs/smb/server/unicode.c
diff --git a/fs/ksmbd/unicode.h b/fs/smb/server/unicode.h
index 076f6034a789..076f6034a789 100644
--- a/fs/ksmbd/unicode.h
+++ b/fs/smb/server/unicode.h
diff --git a/fs/ksmbd/uniupr.h b/fs/smb/server/uniupr.h
index 26583b776897..26583b776897 100644
--- a/fs/ksmbd/uniupr.h
+++ b/fs/smb/server/uniupr.h
diff --git a/fs/ksmbd/vfs.c b/fs/smb/server/vfs.c
index 778c152708e4..81489fdedd8e 100644
--- a/fs/ksmbd/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -86,12 +86,14 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
err = vfs_path_parent_lookup(filename, flags,
&parent_path, &last, &type,
root_share_path);
- putname(filename);
- if (err)
+ if (err) {
+ putname(filename);
return err;
+ }
if (unlikely(type != LAST_NORM)) {
path_put(&parent_path);
+ putname(filename);
return -ENOENT;
}
@@ -108,12 +110,14 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
path->dentry = d;
path->mnt = share_conf->vfs_path.mnt;
path_put(&parent_path);
+ putname(filename);
return 0;
err_out:
inode_unlock(parent_path.dentry->d_inode);
path_put(&parent_path);
+ putname(filename);
return -ENOENT;
}
@@ -166,6 +170,10 @@ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode)
return err;
}
+ err = mnt_want_write(path.mnt);
+ if (err)
+ goto out_err;
+
mode |= S_IFREG;
err = vfs_create(mnt_idmap(path.mnt), d_inode(path.dentry),
dentry, mode, true);
@@ -175,6 +183,9 @@ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode)
} else {
pr_err("File(%s): creation failed (err:%d)\n", name, err);
}
+ mnt_drop_write(path.mnt);
+
+out_err:
done_path_create(&path, dentry);
return err;
}
@@ -205,30 +216,35 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode)
return err;
}
+ err = mnt_want_write(path.mnt);
+ if (err)
+ goto out_err2;
+
idmap = mnt_idmap(path.mnt);
mode |= S_IFDIR;
err = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode);
- if (err) {
- goto out;
- } else if (d_unhashed(dentry)) {
+ if (!err && d_unhashed(dentry)) {
struct dentry *d;
d = lookup_one(idmap, dentry->d_name.name, dentry->d_parent,
dentry->d_name.len);
if (IS_ERR(d)) {
err = PTR_ERR(d);
- goto out;
+ goto out_err1;
}
if (unlikely(d_is_negative(d))) {
dput(d);
err = -ENOENT;
- goto out;
+ goto out_err1;
}
ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), d_inode(d));
dput(d);
}
-out:
+
+out_err1:
+ mnt_drop_write(path.mnt);
+out_err2:
done_path_create(&path, dentry);
if (err)
pr_err("mkdir(%s): creation failed (err:%d)\n", name, err);
@@ -439,7 +455,7 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos,
memcpy(&stream_buf[*pos], buf, count);
err = ksmbd_vfs_setxattr(idmap,
- fp->filp->f_path.dentry,
+ &fp->filp->f_path,
fp->stream.name,
(void *)stream_buf,
size,
@@ -585,6 +601,10 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, const struct path *path)
goto out_err;
}
+ err = mnt_want_write(path->mnt);
+ if (err)
+ goto out_err;
+
idmap = mnt_idmap(path->mnt);
if (S_ISDIR(d_inode(path->dentry)->i_mode)) {
err = vfs_rmdir(idmap, d_inode(parent), path->dentry);
@@ -595,6 +615,7 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, const struct path *path)
if (err)
ksmbd_debug(VFS, "unlink failed, err %d\n", err);
}
+ mnt_drop_write(path->mnt);
out_err:
ksmbd_revert_fsids(work);
@@ -640,11 +661,16 @@ int ksmbd_vfs_link(struct ksmbd_work *work, const char *oldname,
goto out3;
}
+ err = mnt_want_write(newpath.mnt);
+ if (err)
+ goto out3;
+
err = vfs_link(oldpath.dentry, mnt_idmap(newpath.mnt),
d_inode(newpath.dentry),
dentry, NULL);
if (err)
ksmbd_debug(VFS, "vfs_link failed err %d\n", err);
+ mnt_drop_write(newpath.mnt);
out3:
done_path_create(&newpath, dentry);
@@ -690,6 +716,10 @@ retry:
goto out2;
}
+ err = mnt_want_write(old_path->mnt);
+ if (err)
+ goto out2;
+
trap = lock_rename_child(old_child, new_path.dentry);
old_parent = dget(old_child->d_parent);
@@ -743,6 +773,7 @@ retry:
rd.new_dir = new_path.dentry->d_inode,
rd.new_dentry = new_dentry,
rd.flags = flags,
+ rd.delegated_inode = NULL,
err = vfs_rename(&rd);
if (err)
ksmbd_debug(VFS, "vfs_rename failed err %d\n", err);
@@ -752,6 +783,7 @@ out4:
out3:
dput(old_parent);
unlock_rename(old_parent, new_path.dentry);
+ mnt_drop_write(old_path->mnt);
out2:
path_put(&new_path);
@@ -892,19 +924,24 @@ ssize_t ksmbd_vfs_getxattr(struct mnt_idmap *idmap,
* Return: 0 on success, otherwise error
*/
int ksmbd_vfs_setxattr(struct mnt_idmap *idmap,
- struct dentry *dentry, const char *attr_name,
+ const struct path *path, const char *attr_name,
void *attr_value, size_t attr_size, int flags)
{
int err;
+ err = mnt_want_write(path->mnt);
+ if (err)
+ return err;
+
err = vfs_setxattr(idmap,
- dentry,
+ path->dentry,
attr_name,
attr_value,
attr_size,
flags);
if (err)
ksmbd_debug(VFS, "setxattr failed, err %d\n", err);
+ mnt_drop_write(path->mnt);
return err;
}
@@ -1008,9 +1045,18 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length,
}
int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap,
- struct dentry *dentry, char *attr_name)
+ const struct path *path, char *attr_name)
{
- return vfs_removexattr(idmap, dentry, attr_name);
+ int err;
+
+ err = mnt_want_write(path->mnt);
+ if (err)
+ return err;
+
+ err = vfs_removexattr(idmap, path->dentry, attr_name);
+ mnt_drop_write(path->mnt);
+
+ return err;
}
int ksmbd_vfs_unlink(struct file *filp)
@@ -1019,6 +1065,10 @@ int ksmbd_vfs_unlink(struct file *filp)
struct dentry *dir, *dentry = filp->f_path.dentry;
struct mnt_idmap *idmap = file_mnt_idmap(filp);
+ err = mnt_want_write(filp->f_path.mnt);
+ if (err)
+ return err;
+
dir = dget_parent(dentry);
err = ksmbd_vfs_lock_parent(dir, dentry);
if (err)
@@ -1036,6 +1086,7 @@ int ksmbd_vfs_unlink(struct file *filp)
ksmbd_debug(VFS, "failed to delete, err %d\n", err);
out:
dput(dir);
+ mnt_drop_write(filp->f_path.mnt);
return err;
}
@@ -1239,13 +1290,13 @@ struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work,
}
int ksmbd_vfs_remove_acl_xattrs(struct mnt_idmap *idmap,
- struct dentry *dentry)
+ const struct path *path)
{
char *name, *xattr_list = NULL;
ssize_t xattr_list_len;
int err = 0;
- xattr_list_len = ksmbd_vfs_listxattr(dentry, &xattr_list);
+ xattr_list_len = ksmbd_vfs_listxattr(path->dentry, &xattr_list);
if (xattr_list_len < 0) {
goto out;
} else if (!xattr_list_len) {
@@ -1253,6 +1304,10 @@ int ksmbd_vfs_remove_acl_xattrs(struct mnt_idmap *idmap,
goto out;
}
+ err = mnt_want_write(path->mnt);
+ if (err)
+ goto out;
+
for (name = xattr_list; name - xattr_list < xattr_list_len;
name += strlen(name) + 1) {
ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name));
@@ -1261,25 +1316,26 @@ int ksmbd_vfs_remove_acl_xattrs(struct mnt_idmap *idmap,
sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1) ||
!strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1)) {
- err = vfs_remove_acl(idmap, dentry, name);
+ err = vfs_remove_acl(idmap, path->dentry, name);
if (err)
ksmbd_debug(SMB,
"remove acl xattr failed : %s\n", name);
}
}
+ mnt_drop_write(path->mnt);
+
out:
kvfree(xattr_list);
return err;
}
-int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap,
- struct dentry *dentry)
+int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, const struct path *path)
{
char *name, *xattr_list = NULL;
ssize_t xattr_list_len;
int err = 0;
- xattr_list_len = ksmbd_vfs_listxattr(dentry, &xattr_list);
+ xattr_list_len = ksmbd_vfs_listxattr(path->dentry, &xattr_list);
if (xattr_list_len < 0) {
goto out;
} else if (!xattr_list_len) {
@@ -1292,7 +1348,7 @@ int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap,
ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name));
if (!strncmp(name, XATTR_NAME_SD, XATTR_NAME_SD_LEN)) {
- err = ksmbd_vfs_remove_xattr(idmap, dentry, name);
+ err = ksmbd_vfs_remove_xattr(idmap, path, name);
if (err)
ksmbd_debug(SMB, "remove xattr failed : %s\n", name);
}
@@ -1316,7 +1372,7 @@ static struct xattr_smb_acl *ksmbd_vfs_make_xattr_posix_acl(struct mnt_idmap *id
return NULL;
posix_acls = get_inode_acl(inode, acl_type);
- if (!posix_acls)
+ if (IS_ERR_OR_NULL(posix_acls))
return NULL;
smb_acl = kzalloc(sizeof(struct xattr_smb_acl) +
@@ -1369,13 +1425,14 @@ out:
int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
struct mnt_idmap *idmap,
- struct dentry *dentry,
+ const struct path *path,
struct smb_ntsd *pntsd, int len)
{
int rc;
struct ndr sd_ndr = {0}, acl_ndr = {0};
struct xattr_ntacl acl = {0};
struct xattr_smb_acl *smb_acl, *def_smb_acl = NULL;
+ struct dentry *dentry = path->dentry;
struct inode *inode = d_inode(dentry);
acl.version = 4;
@@ -1427,7 +1484,7 @@ int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
goto out;
}
- rc = ksmbd_vfs_setxattr(idmap, dentry,
+ rc = ksmbd_vfs_setxattr(idmap, path,
XATTR_NAME_SD, sd_ndr.data,
sd_ndr.offset, 0);
if (rc < 0)
@@ -1517,7 +1574,7 @@ free_n_data:
}
int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap,
- struct dentry *dentry,
+ const struct path *path,
struct xattr_dos_attrib *da)
{
struct ndr n;
@@ -1527,7 +1584,7 @@ int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap,
if (err)
return err;
- err = ksmbd_vfs_setxattr(idmap, dentry, XATTR_NAME_DOS_ATTRIBUTE,
+ err = ksmbd_vfs_setxattr(idmap, path, XATTR_NAME_DOS_ATTRIBUTE,
(void *)n.data, n.offset, 0);
if (err)
ksmbd_debug(SMB, "failed to store dos attribute in xattr\n");
@@ -1764,10 +1821,11 @@ void ksmbd_vfs_posix_lock_unblock(struct file_lock *flock)
}
int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap,
- struct dentry *dentry)
+ struct path *path)
{
struct posix_acl_state acl_state;
struct posix_acl *acls;
+ struct dentry *dentry = path->dentry;
struct inode *inode = d_inode(dentry);
int rc;
@@ -1797,6 +1855,11 @@ int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap,
return -ENOMEM;
}
posix_state_to_acl(&acl_state, acls->a_entries);
+
+ rc = mnt_want_write(path->mnt);
+ if (rc)
+ goto out_err;
+
rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls);
if (rc < 0)
ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n",
@@ -1808,16 +1871,20 @@ int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap,
ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n",
rc);
}
+ mnt_drop_write(path->mnt);
+
+out_err:
free_acl_state(&acl_state);
posix_acl_release(acls);
return rc;
}
int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap,
- struct dentry *dentry, struct inode *parent_inode)
+ struct path *path, struct inode *parent_inode)
{
struct posix_acl *acls;
struct posix_acl_entry *pace;
+ struct dentry *dentry = path->dentry;
struct inode *inode = d_inode(dentry);
int rc, i;
@@ -1825,7 +1892,7 @@ int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap,
return -EOPNOTSUPP;
acls = get_inode_acl(parent_inode, ACL_TYPE_DEFAULT);
- if (!acls)
+ if (IS_ERR_OR_NULL(acls))
return -ENOENT;
pace = acls->a_entries;
@@ -1836,6 +1903,10 @@ int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap,
}
}
+ rc = mnt_want_write(path->mnt);
+ if (rc)
+ goto out_err;
+
rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls);
if (rc < 0)
ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n",
@@ -1847,6 +1918,9 @@ int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap,
ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n",
rc);
}
+ mnt_drop_write(path->mnt);
+
+out_err:
posix_acl_release(acls);
return rc;
}
diff --git a/fs/ksmbd/vfs.h b/fs/smb/server/vfs.h
index a4ae89f3230d..8c0931d4d531 100644
--- a/fs/ksmbd/vfs.h
+++ b/fs/smb/server/vfs.h
@@ -108,12 +108,12 @@ ssize_t ksmbd_vfs_casexattr_len(struct mnt_idmap *idmap,
struct dentry *dentry, char *attr_name,
int attr_name_len);
int ksmbd_vfs_setxattr(struct mnt_idmap *idmap,
- struct dentry *dentry, const char *attr_name,
+ const struct path *path, const char *attr_name,
void *attr_value, size_t attr_size, int flags);
int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name,
size_t *xattr_stream_name_size, int s_type);
int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap,
- struct dentry *dentry, char *attr_name);
+ const struct path *path, char *attr_name);
int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name,
unsigned int flags, struct path *path,
bool caseless);
@@ -139,26 +139,25 @@ void ksmbd_vfs_posix_lock_wait(struct file_lock *flock);
int ksmbd_vfs_posix_lock_wait_timeout(struct file_lock *flock, long timeout);
void ksmbd_vfs_posix_lock_unblock(struct file_lock *flock);
int ksmbd_vfs_remove_acl_xattrs(struct mnt_idmap *idmap,
- struct dentry *dentry);
-int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap,
- struct dentry *dentry);
+ const struct path *path);
+int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, const struct path *path);
int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
struct mnt_idmap *idmap,
- struct dentry *dentry,
+ const struct path *path,
struct smb_ntsd *pntsd, int len);
int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn,
struct mnt_idmap *idmap,
struct dentry *dentry,
struct smb_ntsd **pntsd);
int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap,
- struct dentry *dentry,
+ const struct path *path,
struct xattr_dos_attrib *da);
int ksmbd_vfs_get_dos_attrib_xattr(struct mnt_idmap *idmap,
struct dentry *dentry,
struct xattr_dos_attrib *da);
int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap,
- struct dentry *dentry);
+ struct path *path);
int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap,
- struct dentry *dentry,
+ struct path *path,
struct inode *parent_inode);
#endif /* __KSMBD_VFS_H__ */
diff --git a/fs/ksmbd/vfs_cache.c b/fs/smb/server/vfs_cache.c
index 2d0138e72d78..f41f8d6108ce 100644
--- a/fs/ksmbd/vfs_cache.c
+++ b/fs/smb/server/vfs_cache.c
@@ -252,7 +252,7 @@ static void __ksmbd_inode_close(struct ksmbd_file *fp)
if (ksmbd_stream_fd(fp) && (ci->m_flags & S_DEL_ON_CLS_STREAM)) {
ci->m_flags &= ~S_DEL_ON_CLS_STREAM;
err = ksmbd_vfs_remove_xattr(file_mnt_idmap(filp),
- filp->f_path.dentry,
+ &filp->f_path,
fp->stream.name);
if (err)
pr_err("remove xattr failed : %s\n",
diff --git a/fs/ksmbd/vfs_cache.h b/fs/smb/server/vfs_cache.h
index fcb13413fa8d..fcb13413fa8d 100644
--- a/fs/ksmbd/vfs_cache.h
+++ b/fs/smb/server/vfs_cache.h
diff --git a/fs/ksmbd/xattr.h b/fs/smb/server/xattr.h
index 16499ca5c82d..16499ca5c82d 100644
--- a/fs/ksmbd/xattr.h
+++ b/fs/smb/server/xattr.h
diff --git a/fs/statfs.c b/fs/statfs.c
index 0ba34c135593..96d1c3edf289 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -130,6 +130,7 @@ static int do_statfs_native(struct kstatfs *st, struct statfs __user *p)
if (sizeof(buf) == sizeof(*st))
memcpy(&buf, st, sizeof(*st));
else {
+ memset(&buf, 0, sizeof(buf));
if (sizeof buf.f_blocks == 4) {
if ((st->f_blocks | st->f_bfree | st->f_bavail |
st->f_bsize | st->f_frsize) &
@@ -158,7 +159,6 @@ static int do_statfs_native(struct kstatfs *st, struct statfs __user *p)
buf.f_namelen = st->f_namelen;
buf.f_frsize = st->f_frsize;
buf.f_flags = st->f_flags;
- memset(buf.f_spare, 0, sizeof(buf.f_spare));
}
if (copy_to_user(p, &buf, sizeof(buf)))
return -EFAULT;
@@ -171,6 +171,7 @@ static int do_statfs64(struct kstatfs *st, struct statfs64 __user *p)
if (sizeof(buf) == sizeof(*st))
memcpy(&buf, st, sizeof(*st));
else {
+ memset(&buf, 0, sizeof(buf));
buf.f_type = st->f_type;
buf.f_bsize = st->f_bsize;
buf.f_blocks = st->f_blocks;
@@ -182,7 +183,6 @@ static int do_statfs64(struct kstatfs *st, struct statfs64 __user *p)
buf.f_namelen = st->f_namelen;
buf.f_frsize = st->f_frsize;
buf.f_flags = st->f_flags;
- memset(buf.f_spare, 0, sizeof(buf.f_spare));
}
if (copy_to_user(p, &buf, sizeof(buf)))
return -EFAULT;
diff --git a/fs/super.c b/fs/super.c
index 86f40f898198..05ff6abddd3c 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -54,7 +54,7 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = {
* One thing we have to be careful of with a per-sb shrinker is that we don't
* drop the last active reference to the superblock from within the shrinker.
* If that happens we could trigger unregistering the shrinker from within the
- * shrinker path and that leads to deadlock on the shrinker_mutex. Hence we
+ * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
* take a passive reference to the superblock to avoid this from occurring.
*/
static unsigned long super_cache_scan(struct shrinker *shrink,
@@ -595,7 +595,7 @@ retry:
fc->s_fs_info = NULL;
s->s_type = fc->fs_type;
s->s_iflags |= fc->s_iflags;
- strlcpy(s->s_id, s->s_type->name, sizeof(s->s_id));
+ strscpy(s->s_id, s->s_type->name, sizeof(s->s_id));
list_add_tail(&s->s_list, &super_blocks);
hlist_add_head(&s->s_instances, &s->s_type->fs_supers);
spin_unlock(&sb_lock);
@@ -674,7 +674,7 @@ retry:
return ERR_PTR(err);
}
s->s_type = type;
- strlcpy(s->s_id, type->name, sizeof(s->s_id));
+ strscpy(s->s_id, type->name, sizeof(s->s_id));
list_add_tail(&s->s_list, &super_blocks);
hlist_add_head(&s->s_instances, &type->fs_supers);
spin_unlock(&sb_lock);
@@ -903,6 +903,7 @@ int reconfigure_super(struct fs_context *fc)
struct super_block *sb = fc->root->d_sb;
int retval;
bool remount_ro = false;
+ bool remount_rw = false;
bool force = fc->sb_flags & SB_FORCE;
if (fc->sb_flags_mask & ~MS_RMT_MASK)
@@ -920,7 +921,7 @@ int reconfigure_super(struct fs_context *fc)
bdev_read_only(sb->s_bdev))
return -EACCES;
#endif
-
+ remount_rw = !(fc->sb_flags & SB_RDONLY) && sb_rdonly(sb);
remount_ro = (fc->sb_flags & SB_RDONLY) && !sb_rdonly(sb);
}
@@ -943,13 +944,18 @@ int reconfigure_super(struct fs_context *fc)
*/
if (remount_ro) {
if (force) {
- sb->s_readonly_remount = 1;
- smp_wmb();
+ sb_start_ro_state_change(sb);
} else {
retval = sb_prepare_remount_readonly(sb);
if (retval)
return retval;
}
+ } else if (remount_rw) {
+ /*
+ * Protect filesystem's reconfigure code from writes from
+ * userspace until reconfigure finishes.
+ */
+ sb_start_ro_state_change(sb);
}
if (fc->ops->reconfigure) {
@@ -965,9 +971,7 @@ int reconfigure_super(struct fs_context *fc)
WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) |
(fc->sb_flags & fc->sb_flags_mask)));
- /* Needs to be ordered wrt mnt_is_readonly() */
- smp_wmb();
- sb->s_readonly_remount = 0;
+ sb_end_ro_state_change(sb);
/*
* Some filesystems modify their metadata via some other path than the
@@ -982,7 +986,7 @@ int reconfigure_super(struct fs_context *fc)
return 0;
cancel_readonly:
- sb->s_readonly_remount = 0;
+ sb_end_ro_state_change(sb);
return retval;
}
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index cdb3d632c63d..0140010aa0c3 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -52,7 +52,7 @@ static int sysv_handle_dirsync(struct inode *dir)
}
/*
- * Calls to dir_get_page()/put_and_unmap_page() must be nested according to the
+ * Calls to dir_get_page()/unmap_and_put_page() must be nested according to the
* rules documented in mm/highmem.rst.
*
* NOTE: sysv_find_entry() and sysv_dotdot() act as calls to dir_get_page()
@@ -103,11 +103,11 @@ static int sysv_readdir(struct file *file, struct dir_context *ctx)
if (!dir_emit(ctx, name, strnlen(name,SYSV_NAMELEN),
fs16_to_cpu(SYSV_SB(sb), de->inode),
DT_UNKNOWN)) {
- put_and_unmap_page(page, kaddr);
+ unmap_and_put_page(page, kaddr);
return 0;
}
}
- put_and_unmap_page(page, kaddr);
+ unmap_and_put_page(page, kaddr);
}
return 0;
}
@@ -131,7 +131,7 @@ static inline int namecompare(int len, int maxlen,
* itself (as a parameter - res_dir). It does NOT read the inode of the
* entry - you'll have to do that yourself if you want to.
*
- * On Success put_and_unmap_page() should be called on *res_page.
+ * On Success unmap_and_put_page() should be called on *res_page.
*
* sysv_find_entry() acts as a call to dir_get_page() and must be treated
* accordingly for nesting purposes.
@@ -166,7 +166,7 @@ struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_
name, de->name))
goto found;
}
- put_and_unmap_page(page, kaddr);
+ unmap_and_put_page(page, kaddr);
}
if (++n >= npages)
@@ -209,7 +209,7 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode)
goto out_page;
de++;
}
- put_and_unmap_page(page, kaddr);
+ unmap_and_put_page(page, kaddr);
}
BUG();
return -EINVAL;
@@ -228,7 +228,7 @@ got_it:
mark_inode_dirty(dir);
err = sysv_handle_dirsync(dir);
out_page:
- put_and_unmap_page(page, kaddr);
+ unmap_and_put_page(page, kaddr);
return err;
out_unlock:
unlock_page(page);
@@ -321,12 +321,12 @@ int sysv_empty_dir(struct inode * inode)
if (de->name[1] != '.' || de->name[2])
goto not_empty;
}
- put_and_unmap_page(page, kaddr);
+ unmap_and_put_page(page, kaddr);
}
return 1;
not_empty:
- put_and_unmap_page(page, kaddr);
+ unmap_and_put_page(page, kaddr);
return 0;
}
@@ -352,7 +352,7 @@ int sysv_set_link(struct sysv_dir_entry *de, struct page *page,
}
/*
- * Calls to dir_get_page()/put_and_unmap_page() must be nested according to the
+ * Calls to dir_get_page()/unmap_and_put_page() must be nested according to the
* rules documented in mm/highmem.rst.
*
* sysv_dotdot() acts as a call to dir_get_page() and must be treated
@@ -376,7 +376,7 @@ ino_t sysv_inode_by_name(struct dentry *dentry)
if (de) {
res = fs16_to_cpu(SYSV_SB(dentry->d_sb), de->inode);
- put_and_unmap_page(page, de);
+ unmap_and_put_page(page, de);
}
return res;
}
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index b22764fe669c..58d7f43a1371 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -145,6 +145,10 @@ static int alloc_branch(struct inode *inode,
*/
parent = block_to_cpu(SYSV_SB(inode->i_sb), branch[n-1].key);
bh = sb_getblk(inode->i_sb, parent);
+ if (!bh) {
+ sysv_free_block(inode->i_sb, branch[n].key);
+ break;
+ }
lock_buffer(bh);
memset(bh->b_data, 0, blocksize);
branch[n].bh = bh;
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 2b2dba4c4f56..fcf163fea3ad 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -164,7 +164,7 @@ static int sysv_unlink(struct inode * dir, struct dentry * dentry)
inode->i_ctime = dir->i_ctime;
inode_dec_link_count(inode);
}
- put_and_unmap_page(page, de);
+ unmap_and_put_page(page, de);
return err;
}
@@ -227,7 +227,7 @@ static int sysv_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (!new_de)
goto out_dir;
err = sysv_set_link(new_de, new_page, old_inode);
- put_and_unmap_page(new_page, new_de);
+ unmap_and_put_page(new_page, new_de);
if (err)
goto out_dir;
new_inode->i_ctime = current_time(new_inode);
@@ -256,9 +256,9 @@ static int sysv_rename(struct mnt_idmap *idmap, struct inode *old_dir,
out_dir:
if (dir_de)
- put_and_unmap_page(dir_page, dir_de);
+ unmap_and_put_page(dir_page, dir_de);
out_old:
- put_and_unmap_page(old_page, old_de);
+ unmap_and_put_page(old_page, old_de);
out:
return err;
}
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index fd20423d3ed2..fd29a66e7241 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -793,11 +793,6 @@ static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (!empty_dir(new_inode))
goto out_oiter;
}
- /*
- * We need to protect against old_inode getting converted from
- * ICB to normal directory.
- */
- inode_lock_nested(old_inode, I_MUTEX_NONDIR2);
retval = udf_fiiter_find_entry(old_inode, &dotdot_name,
&diriter);
if (retval == -ENOENT) {
@@ -806,10 +801,8 @@ static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir,
old_inode->i_ino);
retval = -EFSCORRUPTED;
}
- if (retval) {
- inode_unlock(old_inode);
+ if (retval)
goto out_oiter;
- }
has_diriter = true;
tloc = lelb_to_cpu(diriter.fi.icb.extLocation);
if (udf_get_lb_pblock(old_inode->i_sb, &tloc, 0) !=
@@ -889,7 +882,6 @@ static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir,
udf_dir_entry_len(&diriter.fi));
udf_fiiter_write_fi(&diriter, NULL);
udf_fiiter_release(&diriter);
- inode_unlock(old_inode);
inode_dec_link_count(old_dir);
if (new_inode)
@@ -901,10 +893,8 @@ static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir,
}
return 0;
out_oiter:
- if (has_diriter) {
+ if (has_diriter)
udf_fiiter_release(&diriter);
- inode_unlock(old_inode);
- }
udf_fiiter_release(&oiter);
return retval;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 0fd96d6e39ce..4e800bb7d2ab 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1332,6 +1332,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
bool basic_ioctls;
unsigned long start, end, vma_end;
struct vma_iterator vmi;
+ pgoff_t pgoff;
user_uffdio_register = (struct uffdio_register __user *) arg;
@@ -1459,6 +1460,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
vma_iter_set(&vmi, start);
prev = vma_prev(&vmi);
+ if (vma->vm_start < start)
+ prev = vma;
ret = 0;
for_each_vma_range(vmi, vma, end) {
@@ -1482,8 +1485,9 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
vma_end = min(end, vma->vm_end);
new_flags = (vma->vm_flags & ~__VM_UFFD_FLAGS) | vm_flags;
+ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
prev = vma_merge(&vmi, mm, prev, start, vma_end, new_flags,
- vma->anon_vma, vma->vm_file, vma->vm_pgoff,
+ vma->anon_vma, vma->vm_file, pgoff,
vma_policy(vma),
((struct vm_userfaultfd_ctx){ ctx }),
anon_vma_name(vma));
@@ -1563,6 +1567,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
unsigned long start, end, vma_end;
const void __user *buf = (void __user *)arg;
struct vma_iterator vmi;
+ pgoff_t pgoff;
ret = -EFAULT;
if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister)))
@@ -1625,6 +1630,9 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
vma_iter_set(&vmi, start);
prev = vma_prev(&vmi);
+ if (vma->vm_start < start)
+ prev = vma;
+
ret = 0;
for_each_vma_range(vmi, vma, end) {
cond_resched();
@@ -1662,8 +1670,9 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
uffd_wp_range(vma, start, vma_end - start, false);
new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
+ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
prev = vma_merge(&vmi, mm, prev, start, vma_end, new_flags,
- vma->anon_vma, vma->vm_file, vma->vm_pgoff,
+ vma->anon_vma, vma->vm_file, pgoff,
vma_policy(vma),
NULL_VM_UFFD_CTX, anon_vma_name(vma));
if (prev) {
diff --git a/fs/verity/Kconfig b/fs/verity/Kconfig
index a7ffd718f171..e1036e535352 100644
--- a/fs/verity/Kconfig
+++ b/fs/verity/Kconfig
@@ -39,14 +39,14 @@ config FS_VERITY_BUILTIN_SIGNATURES
depends on FS_VERITY
select SYSTEM_DATA_VERIFICATION
help
- Support verifying signatures of verity files against the X.509
- certificates that have been loaded into the ".fs-verity"
- kernel keyring.
+ This option adds support for in-kernel verification of
+ fs-verity builtin signatures.
- This is meant as a relatively simple mechanism that can be
- used to provide an authenticity guarantee for verity files, as
- an alternative to IMA appraisal. Userspace programs still
- need to check that the verity bit is set in order to get an
- authenticity guarantee.
+ Please take great care before using this feature. It is not
+ the only way to do signatures with fs-verity, and the
+ alternatives (such as userspace signature verification, and
+ IMA appraisal) can be much better. For details about the
+ limitations of this feature, see
+ Documentation/filesystems/fsverity.rst.
If unsure, say N.
diff --git a/fs/verity/enable.c b/fs/verity/enable.c
index fc4c50e5219d..c284f46d1b53 100644
--- a/fs/verity/enable.c
+++ b/fs/verity/enable.c
@@ -7,6 +7,7 @@
#include "fsverity_private.h"
+#include <crypto/hash.h>
#include <linux/mount.h>
#include <linux/sched/signal.h>
#include <linux/uaccess.h>
@@ -20,7 +21,7 @@ struct block_buffer {
/* Hash a block, writing the result to the next level's pending block buffer. */
static int hash_one_block(struct inode *inode,
const struct merkle_tree_params *params,
- struct ahash_request *req, struct block_buffer *cur)
+ struct block_buffer *cur)
{
struct block_buffer *next = cur + 1;
int err;
@@ -36,8 +37,7 @@ static int hash_one_block(struct inode *inode,
/* Zero-pad the block if it's shorter than the block size. */
memset(&cur->data[cur->filled], 0, params->block_size - cur->filled);
- err = fsverity_hash_block(params, inode, req, virt_to_page(cur->data),
- offset_in_page(cur->data),
+ err = fsverity_hash_block(params, inode, cur->data,
&next->data[next->filled]);
if (err)
return err;
@@ -76,7 +76,6 @@ static int build_merkle_tree(struct file *filp,
struct inode *inode = file_inode(filp);
const u64 data_size = inode->i_size;
const int num_levels = params->num_levels;
- struct ahash_request *req;
struct block_buffer _buffers[1 + FS_VERITY_MAX_LEVELS + 1] = {};
struct block_buffer *buffers = &_buffers[1];
unsigned long level_offset[FS_VERITY_MAX_LEVELS];
@@ -90,9 +89,6 @@ static int build_merkle_tree(struct file *filp,
return 0;
}
- /* This allocation never fails, since it's mempool-backed. */
- req = fsverity_alloc_hash_request(params->hash_alg, GFP_KERNEL);
-
/*
* Allocate the block buffers. Buffer "-1" is for data blocks.
* Buffers 0 <= level < num_levels are for the actual tree levels.
@@ -130,7 +126,7 @@ static int build_merkle_tree(struct file *filp,
fsverity_err(inode, "Short read of file data");
goto out;
}
- err = hash_one_block(inode, params, req, &buffers[-1]);
+ err = hash_one_block(inode, params, &buffers[-1]);
if (err)
goto out;
for (level = 0; level < num_levels; level++) {
@@ -141,8 +137,7 @@ static int build_merkle_tree(struct file *filp,
}
/* Next block at @level is full */
- err = hash_one_block(inode, params, req,
- &buffers[level]);
+ err = hash_one_block(inode, params, &buffers[level]);
if (err)
goto out;
err = write_merkle_tree_block(inode,
@@ -162,8 +157,7 @@ static int build_merkle_tree(struct file *filp,
/* Finish all nonempty pending tree blocks. */
for (level = 0; level < num_levels; level++) {
if (buffers[level].filled != 0) {
- err = hash_one_block(inode, params, req,
- &buffers[level]);
+ err = hash_one_block(inode, params, &buffers[level]);
if (err)
goto out;
err = write_merkle_tree_block(inode,
@@ -183,7 +177,6 @@ static int build_merkle_tree(struct file *filp,
out:
for (level = -1; level < num_levels; level++)
kfree(buffers[level].data);
- fsverity_free_hash_request(params->hash_alg, req);
return err;
}
@@ -215,7 +208,7 @@ static int enable_verity(struct file *filp,
}
desc->salt_size = arg->salt_size;
- /* Get the signature if the user provided one */
+ /* Get the builtin signature if the user provided one */
if (arg->sig_size &&
copy_from_user(desc->signature, u64_to_user_ptr(arg->sig_ptr),
arg->sig_size)) {
diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h
index d34dcc033d72..49bf3a1eb2a0 100644
--- a/fs/verity/fsverity_private.h
+++ b/fs/verity/fsverity_private.h
@@ -11,9 +11,6 @@
#define pr_fmt(fmt) "fs-verity: " fmt
#include <linux/fsverity.h>
-#include <linux/mempool.h>
-
-struct ahash_request;
/*
* Implementation limit: maximum depth of the Merkle tree. For now 8 is plenty;
@@ -23,11 +20,10 @@ struct ahash_request;
/* A hash algorithm supported by fs-verity */
struct fsverity_hash_alg {
- struct crypto_ahash *tfm; /* hash tfm, allocated on demand */
+ struct crypto_shash *tfm; /* hash tfm, allocated on demand */
const char *name; /* crypto API name, e.g. sha256 */
unsigned int digest_size; /* digest size in bytes, e.g. 32 for SHA-256 */
unsigned int block_size; /* block size in bytes, e.g. 64 for SHA-256 */
- mempool_t req_pool; /* mempool with a preallocated hash request */
/*
* The HASH_ALGO_* constant for this algorithm. This is different from
* FS_VERITY_HASH_ALG_*, which uses a different numbering scheme.
@@ -37,7 +33,7 @@ struct fsverity_hash_alg {
/* Merkle tree parameters: hash algorithm, initial hash state, and topology */
struct merkle_tree_params {
- struct fsverity_hash_alg *hash_alg; /* the hash algorithm */
+ const struct fsverity_hash_alg *hash_alg; /* the hash algorithm */
const u8 *hashstate; /* initial hash state or NULL */
unsigned int digest_size; /* same as hash_alg->digest_size */
unsigned int block_size; /* size of data and tree blocks */
@@ -83,18 +79,13 @@ struct fsverity_info {
extern struct fsverity_hash_alg fsverity_hash_algs[];
-struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode,
- unsigned int num);
-struct ahash_request *fsverity_alloc_hash_request(struct fsverity_hash_alg *alg,
- gfp_t gfp_flags);
-void fsverity_free_hash_request(struct fsverity_hash_alg *alg,
- struct ahash_request *req);
-const u8 *fsverity_prepare_hash_state(struct fsverity_hash_alg *alg,
+const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode,
+ unsigned int num);
+const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg,
const u8 *salt, size_t salt_size);
int fsverity_hash_block(const struct merkle_tree_params *params,
- const struct inode *inode, struct ahash_request *req,
- struct page *page, unsigned int offset, u8 *out);
-int fsverity_hash_buffer(struct fsverity_hash_alg *alg,
+ const struct inode *inode, const void *data, u8 *out);
+int fsverity_hash_buffer(const struct fsverity_hash_alg *alg,
const void *data, size_t size, u8 *out);
void __init fsverity_check_hash_algs(void);
diff --git a/fs/verity/hash_algs.c b/fs/verity/hash_algs.c
index ea00dbedf756..c598d2035476 100644
--- a/fs/verity/hash_algs.c
+++ b/fs/verity/hash_algs.c
@@ -8,7 +8,6 @@
#include "fsverity_private.h"
#include <crypto/hash.h>
-#include <linux/scatterlist.h>
/* The hash algorithms supported by fs-verity */
struct fsverity_hash_alg fsverity_hash_algs[] = {
@@ -40,11 +39,11 @@ static DEFINE_MUTEX(fsverity_hash_alg_init_mutex);
*
* Return: pointer to the hash alg on success, else an ERR_PTR()
*/
-struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode,
- unsigned int num)
+const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode,
+ unsigned int num)
{
struct fsverity_hash_alg *alg;
- struct crypto_ahash *tfm;
+ struct crypto_shash *tfm;
int err;
if (num >= ARRAY_SIZE(fsverity_hash_algs) ||
@@ -63,11 +62,7 @@ struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode,
if (alg->tfm != NULL)
goto out_unlock;
- /*
- * Using the shash API would make things a bit simpler, but the ahash
- * API is preferable as it allows the use of crypto accelerators.
- */
- tfm = crypto_alloc_ahash(alg->name, 0, 0);
+ tfm = crypto_alloc_shash(alg->name, 0, 0);
if (IS_ERR(tfm)) {
if (PTR_ERR(tfm) == -ENOENT) {
fsverity_warn(inode,
@@ -84,26 +79,20 @@ struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode,
}
err = -EINVAL;
- if (WARN_ON_ONCE(alg->digest_size != crypto_ahash_digestsize(tfm)))
+ if (WARN_ON_ONCE(alg->digest_size != crypto_shash_digestsize(tfm)))
goto err_free_tfm;
- if (WARN_ON_ONCE(alg->block_size != crypto_ahash_blocksize(tfm)))
- goto err_free_tfm;
-
- err = mempool_init_kmalloc_pool(&alg->req_pool, 1,
- sizeof(struct ahash_request) +
- crypto_ahash_reqsize(tfm));
- if (err)
+ if (WARN_ON_ONCE(alg->block_size != crypto_shash_blocksize(tfm)))
goto err_free_tfm;
pr_info("%s using implementation \"%s\"\n",
- alg->name, crypto_ahash_driver_name(tfm));
+ alg->name, crypto_shash_driver_name(tfm));
/* pairs with smp_load_acquire() above */
smp_store_release(&alg->tfm, tfm);
goto out_unlock;
err_free_tfm:
- crypto_free_ahash(tfm);
+ crypto_free_shash(tfm);
alg = ERR_PTR(err);
out_unlock:
mutex_unlock(&fsverity_hash_alg_init_mutex);
@@ -111,42 +100,6 @@ out_unlock:
}
/**
- * fsverity_alloc_hash_request() - allocate a hash request object
- * @alg: the hash algorithm for which to allocate the request
- * @gfp_flags: memory allocation flags
- *
- * This is mempool-backed, so this never fails if __GFP_DIRECT_RECLAIM is set in
- * @gfp_flags. However, in that case this might need to wait for all
- * previously-allocated requests to be freed. So to avoid deadlocks, callers
- * must never need multiple requests at a time to make forward progress.
- *
- * Return: the request object on success; NULL on failure (but see above)
- */
-struct ahash_request *fsverity_alloc_hash_request(struct fsverity_hash_alg *alg,
- gfp_t gfp_flags)
-{
- struct ahash_request *req = mempool_alloc(&alg->req_pool, gfp_flags);
-
- if (req)
- ahash_request_set_tfm(req, alg->tfm);
- return req;
-}
-
-/**
- * fsverity_free_hash_request() - free a hash request object
- * @alg: the hash algorithm
- * @req: the hash request object to free
- */
-void fsverity_free_hash_request(struct fsverity_hash_alg *alg,
- struct ahash_request *req)
-{
- if (req) {
- ahash_request_zero(req);
- mempool_free(req, &alg->req_pool);
- }
-}
-
-/**
* fsverity_prepare_hash_state() - precompute the initial hash state
* @alg: hash algorithm
* @salt: a salt which is to be prepended to all data to be hashed
@@ -155,27 +108,24 @@ void fsverity_free_hash_request(struct fsverity_hash_alg *alg,
* Return: NULL if the salt is empty, otherwise the kmalloc()'ed precomputed
* initial hash state on success or an ERR_PTR() on failure.
*/
-const u8 *fsverity_prepare_hash_state(struct fsverity_hash_alg *alg,
+const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg,
const u8 *salt, size_t salt_size)
{
u8 *hashstate = NULL;
- struct ahash_request *req = NULL;
+ SHASH_DESC_ON_STACK(desc, alg->tfm);
u8 *padded_salt = NULL;
size_t padded_salt_size;
- struct scatterlist sg;
- DECLARE_CRYPTO_WAIT(wait);
int err;
+ desc->tfm = alg->tfm;
+
if (salt_size == 0)
return NULL;
- hashstate = kmalloc(crypto_ahash_statesize(alg->tfm), GFP_KERNEL);
+ hashstate = kmalloc(crypto_shash_statesize(alg->tfm), GFP_KERNEL);
if (!hashstate)
return ERR_PTR(-ENOMEM);
- /* This allocation never fails, since it's mempool-backed. */
- req = fsverity_alloc_hash_request(alg, GFP_KERNEL);
-
/*
* Zero-pad the salt to the next multiple of the input size of the hash
* algorithm's compression function, e.g. 64 bytes for SHA-256 or 128
@@ -190,26 +140,18 @@ const u8 *fsverity_prepare_hash_state(struct fsverity_hash_alg *alg,
goto err_free;
}
memcpy(padded_salt, salt, salt_size);
-
- sg_init_one(&sg, padded_salt, padded_salt_size);
- ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
- CRYPTO_TFM_REQ_MAY_BACKLOG,
- crypto_req_done, &wait);
- ahash_request_set_crypt(req, &sg, NULL, padded_salt_size);
-
- err = crypto_wait_req(crypto_ahash_init(req), &wait);
+ err = crypto_shash_init(desc);
if (err)
goto err_free;
- err = crypto_wait_req(crypto_ahash_update(req), &wait);
+ err = crypto_shash_update(desc, padded_salt, padded_salt_size);
if (err)
goto err_free;
- err = crypto_ahash_export(req, hashstate);
+ err = crypto_shash_export(desc, hashstate);
if (err)
goto err_free;
out:
- fsverity_free_hash_request(alg, req);
kfree(padded_salt);
return hashstate;
@@ -223,9 +165,7 @@ err_free:
* fsverity_hash_block() - hash a single data or hash block
* @params: the Merkle tree's parameters
* @inode: inode for which the hashing is being done
- * @req: preallocated hash request
- * @page: the page containing the block to hash
- * @offset: the offset of the block within @page
+ * @data: virtual address of a buffer containing the block to hash
* @out: output digest, size 'params->digest_size' bytes
*
* Hash a single data or hash block. The hash is salted if a salt is specified
@@ -234,33 +174,24 @@ err_free:
* Return: 0 on success, -errno on failure
*/
int fsverity_hash_block(const struct merkle_tree_params *params,
- const struct inode *inode, struct ahash_request *req,
- struct page *page, unsigned int offset, u8 *out)
+ const struct inode *inode, const void *data, u8 *out)
{
- struct scatterlist sg;
- DECLARE_CRYPTO_WAIT(wait);
+ SHASH_DESC_ON_STACK(desc, params->hash_alg->tfm);
int err;
- sg_init_table(&sg, 1);
- sg_set_page(&sg, page, params->block_size, offset);
- ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
- CRYPTO_TFM_REQ_MAY_BACKLOG,
- crypto_req_done, &wait);
- ahash_request_set_crypt(req, &sg, out, params->block_size);
+ desc->tfm = params->hash_alg->tfm;
if (params->hashstate) {
- err = crypto_ahash_import(req, params->hashstate);
+ err = crypto_shash_import(desc, params->hashstate);
if (err) {
fsverity_err(inode,
"Error %d importing hash state", err);
return err;
}
- err = crypto_ahash_finup(req);
+ err = crypto_shash_finup(desc, data, params->block_size, out);
} else {
- err = crypto_ahash_digest(req);
+ err = crypto_shash_digest(desc, data, params->block_size, out);
}
-
- err = crypto_wait_req(err, &wait);
if (err)
fsverity_err(inode, "Error %d computing block hash", err);
return err;
@@ -273,32 +204,12 @@ int fsverity_hash_block(const struct merkle_tree_params *params,
* @size: size of data to hash, in bytes
* @out: output digest, size 'alg->digest_size' bytes
*
- * Hash some data which is located in physically contiguous memory (i.e. memory
- * allocated by kmalloc(), not by vmalloc()). No salt is used.
- *
* Return: 0 on success, -errno on failure
*/
-int fsverity_hash_buffer(struct fsverity_hash_alg *alg,
+int fsverity_hash_buffer(const struct fsverity_hash_alg *alg,
const void *data, size_t size, u8 *out)
{
- struct ahash_request *req;
- struct scatterlist sg;
- DECLARE_CRYPTO_WAIT(wait);
- int err;
-
- /* This allocation never fails, since it's mempool-backed. */
- req = fsverity_alloc_hash_request(alg, GFP_KERNEL);
-
- sg_init_one(&sg, data, size);
- ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
- CRYPTO_TFM_REQ_MAY_BACKLOG,
- crypto_req_done, &wait);
- ahash_request_set_crypt(req, &sg, out, size);
-
- err = crypto_wait_req(crypto_ahash_digest(req), &wait);
-
- fsverity_free_hash_request(alg, req);
- return err;
+ return crypto_shash_tfm_digest(alg->tfm, data, size, out);
}
void __init fsverity_check_hash_algs(void)
diff --git a/fs/verity/measure.c b/fs/verity/measure.c
index 5c79ea1b2468..eec5956141da 100644
--- a/fs/verity/measure.c
+++ b/fs/verity/measure.c
@@ -61,27 +61,42 @@ EXPORT_SYMBOL_GPL(fsverity_ioctl_measure);
/**
* fsverity_get_digest() - get a verity file's digest
* @inode: inode to get digest of
- * @digest: (out) pointer to the digest
- * @alg: (out) pointer to the hash algorithm enumeration
+ * @raw_digest: (out) the raw file digest
+ * @alg: (out) the digest's algorithm, as a FS_VERITY_HASH_ALG_* value
+ * @halg: (out) the digest's algorithm, as a HASH_ALGO_* value
*
- * Return the file hash algorithm and digest of an fsverity protected file.
- * Assumption: before calling this, the file must have been opened.
+ * Retrieves the fsverity digest of the given file. The file must have been
+ * opened at least once since the inode was last loaded into the inode cache;
+ * otherwise this function will not recognize when fsverity is enabled.
*
- * Return: 0 on success, -errno on failure
+ * The file's fsverity digest consists of @raw_digest in combination with either
+ * @alg or @halg. (The caller can choose which one of @alg or @halg to use.)
+ *
+ * IMPORTANT: Callers *must* make use of one of the two algorithm IDs, since
+ * @raw_digest is meaningless without knowing which algorithm it uses! fsverity
+ * provides no security guarantee for users who ignore the algorithm ID, even if
+ * they use the digest size (since algorithms can share the same digest size).
+ *
+ * Return: The size of the raw digest in bytes, or 0 if the file doesn't have
+ * fsverity enabled.
*/
int fsverity_get_digest(struct inode *inode,
- u8 digest[FS_VERITY_MAX_DIGEST_SIZE],
- enum hash_algo *alg)
+ u8 raw_digest[FS_VERITY_MAX_DIGEST_SIZE],
+ u8 *alg, enum hash_algo *halg)
{
const struct fsverity_info *vi;
const struct fsverity_hash_alg *hash_alg;
vi = fsverity_get_info(inode);
if (!vi)
- return -ENODATA; /* not a verity file */
+ return 0; /* not a verity file */
hash_alg = vi->tree_params.hash_alg;
- memcpy(digest, vi->file_digest, hash_alg->digest_size);
- *alg = hash_alg->algo_id;
- return 0;
+ memcpy(raw_digest, vi->file_digest, hash_alg->digest_size);
+ if (alg)
+ *alg = hash_alg - fsverity_hash_algs;
+ if (halg)
+ *halg = hash_alg->algo_id;
+ return hash_alg->digest_size;
}
+EXPORT_SYMBOL_GPL(fsverity_get_digest);
diff --git a/fs/verity/open.c b/fs/verity/open.c
index 52048b7630dc..1db5106a9c38 100644
--- a/fs/verity/open.c
+++ b/fs/verity/open.c
@@ -32,7 +32,7 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params,
unsigned int log_blocksize,
const u8 *salt, size_t salt_size)
{
- struct fsverity_hash_alg *hash_alg;
+ const struct fsverity_hash_alg *hash_alg;
int err;
u64 blocks;
u64 blocks_in_level[FS_VERITY_MAX_LEVELS];
@@ -156,9 +156,9 @@ out_err:
/*
* Compute the file digest by hashing the fsverity_descriptor excluding the
- * signature and with the sig_size field set to 0.
+ * builtin signature and with the sig_size field set to 0.
*/
-static int compute_file_digest(struct fsverity_hash_alg *hash_alg,
+static int compute_file_digest(const struct fsverity_hash_alg *hash_alg,
struct fsverity_descriptor *desc,
u8 *file_digest)
{
@@ -174,7 +174,7 @@ static int compute_file_digest(struct fsverity_hash_alg *hash_alg,
/*
* Create a new fsverity_info from the given fsverity_descriptor (with optional
- * appended signature), and check the signature if present. The
+ * appended builtin signature), and check the signature if present. The
* fsverity_descriptor must have already undergone basic validation.
*/
struct fsverity_info *fsverity_create_info(const struct inode *inode,
@@ -319,8 +319,8 @@ static bool validate_fsverity_descriptor(struct inode *inode,
}
/*
- * Read the inode's fsverity_descriptor (with optional appended signature) from
- * the filesystem, and do basic validation of it.
+ * Read the inode's fsverity_descriptor (with optional appended builtin
+ * signature) from the filesystem, and do basic validation of it.
*/
int fsverity_get_descriptor(struct inode *inode,
struct fsverity_descriptor **desc_ret)
diff --git a/fs/verity/read_metadata.c b/fs/verity/read_metadata.c
index 2aefc5565152..f58432772d9e 100644
--- a/fs/verity/read_metadata.c
+++ b/fs/verity/read_metadata.c
@@ -105,7 +105,7 @@ static int fsverity_read_descriptor(struct inode *inode,
if (res)
return res;
- /* don't include the signature */
+ /* don't include the builtin signature */
desc_size = offsetof(struct fsverity_descriptor, signature);
desc->sig_size = 0;
@@ -131,7 +131,7 @@ static int fsverity_read_signature(struct inode *inode,
}
/*
- * Include only the signature. Note that fsverity_get_descriptor()
+ * Include only the builtin signature. fsverity_get_descriptor()
* already verified that sig_size is in-bounds.
*/
res = fsverity_read_buffer(buf, offset, length, desc->signature,
diff --git a/fs/verity/signature.c b/fs/verity/signature.c
index b8c51ad40d3a..72034bc71c9d 100644
--- a/fs/verity/signature.c
+++ b/fs/verity/signature.c
@@ -5,6 +5,14 @@
* Copyright 2019 Google LLC
*/
+/*
+ * This file implements verification of fs-verity builtin signatures. Please
+ * take great care before using this feature. It is not the only way to do
+ * signatures with fs-verity, and the alternatives (such as userspace signature
+ * verification, and IMA appraisal) can be much better. For details about the
+ * limitations of this feature, see Documentation/filesystems/fsverity.rst.
+ */
+
#include "fsverity_private.h"
#include <linux/cred.h>
diff --git a/fs/verity/verify.c b/fs/verity/verify.c
index e2508222750b..433cef51f5f6 100644
--- a/fs/verity/verify.c
+++ b/fs/verity/verify.c
@@ -12,38 +12,6 @@
static struct workqueue_struct *fsverity_read_workqueue;
-static inline int cmp_hashes(const struct fsverity_info *vi,
- const u8 *want_hash, const u8 *real_hash,
- u64 data_pos, int level)
-{
- const unsigned int hsize = vi->tree_params.digest_size;
-
- if (memcmp(want_hash, real_hash, hsize) == 0)
- return 0;
-
- fsverity_err(vi->inode,
- "FILE CORRUPTED! pos=%llu, level=%d, want_hash=%s:%*phN, real_hash=%s:%*phN",
- data_pos, level,
- vi->tree_params.hash_alg->name, hsize, want_hash,
- vi->tree_params.hash_alg->name, hsize, real_hash);
- return -EBADMSG;
-}
-
-static bool data_is_zeroed(struct inode *inode, struct page *page,
- unsigned int len, unsigned int offset)
-{
- void *virt = kmap_local_page(page);
-
- if (memchr_inv(virt + offset, 0, len)) {
- kunmap_local(virt);
- fsverity_err(inode,
- "FILE CORRUPTED! Data past EOF is not zeroed");
- return false;
- }
- kunmap_local(virt);
- return true;
-}
-
/*
* Returns true if the hash block with index @hblock_idx in the tree, located in
* @hpage, has already been verified.
@@ -122,9 +90,7 @@ static bool is_hash_block_verified(struct fsverity_info *vi, struct page *hpage,
*/
static bool
verify_data_block(struct inode *inode, struct fsverity_info *vi,
- struct ahash_request *req, struct page *data_page,
- u64 data_pos, unsigned int dblock_offset_in_page,
- unsigned long max_ra_pages)
+ const void *data, u64 data_pos, unsigned long max_ra_pages)
{
const struct merkle_tree_params *params = &vi->tree_params;
const unsigned int hsize = params->digest_size;
@@ -136,11 +102,11 @@ verify_data_block(struct inode *inode, struct fsverity_info *vi,
struct {
/* Page containing the hash block */
struct page *page;
+ /* Mapped address of the hash block (will be within @page) */
+ const void *addr;
/* Index of the hash block in the tree overall */
unsigned long index;
- /* Byte offset of the hash block within @page */
- unsigned int offset_in_page;
- /* Byte offset of the wanted hash within @page */
+ /* Byte offset of the wanted hash relative to @addr */
unsigned int hoffset;
} hblocks[FS_VERITY_MAX_LEVELS];
/*
@@ -148,7 +114,9 @@ verify_data_block(struct inode *inode, struct fsverity_info *vi,
* index of that block's hash within the current level.
*/
u64 hidx = data_pos >> params->log_blocksize;
- int err;
+
+ /* Up to 1 + FS_VERITY_MAX_LEVELS pages may be mapped at once */
+ BUILD_BUG_ON(1 + FS_VERITY_MAX_LEVELS > KM_MAX_IDX);
if (unlikely(data_pos >= inode->i_size)) {
/*
@@ -159,8 +127,12 @@ verify_data_block(struct inode *inode, struct fsverity_info *vi,
* any part past EOF should be all zeroes. Therefore, we need
* to verify that any data blocks fully past EOF are all zeroes.
*/
- return data_is_zeroed(inode, data_page, params->block_size,
- dblock_offset_in_page);
+ if (memchr_inv(data, 0, params->block_size)) {
+ fsverity_err(inode,
+ "FILE CORRUPTED! Data past EOF is not zeroed");
+ return false;
+ }
+ return true;
}
/*
@@ -175,6 +147,7 @@ verify_data_block(struct inode *inode, struct fsverity_info *vi,
unsigned int hblock_offset_in_page;
unsigned int hoffset;
struct page *hpage;
+ const void *haddr;
/*
* The index of the block in the current level; also the index
@@ -192,30 +165,30 @@ verify_data_block(struct inode *inode, struct fsverity_info *vi,
hblock_offset_in_page =
(hblock_idx << params->log_blocksize) & ~PAGE_MASK;
- /* Byte offset of the hash within the page */
- hoffset = hblock_offset_in_page +
- ((hidx << params->log_digestsize) &
- (params->block_size - 1));
+ /* Byte offset of the hash within the block */
+ hoffset = (hidx << params->log_digestsize) &
+ (params->block_size - 1);
hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode,
hpage_idx, level == 0 ? min(max_ra_pages,
params->tree_pages - hpage_idx) : 0);
if (IS_ERR(hpage)) {
- err = PTR_ERR(hpage);
fsverity_err(inode,
- "Error %d reading Merkle tree page %lu",
- err, hpage_idx);
- goto out;
+ "Error %ld reading Merkle tree page %lu",
+ PTR_ERR(hpage), hpage_idx);
+ goto error;
}
+ haddr = kmap_local_page(hpage) + hblock_offset_in_page;
if (is_hash_block_verified(vi, hpage, hblock_idx)) {
- memcpy_from_page(_want_hash, hpage, hoffset, hsize);
+ memcpy(_want_hash, haddr + hoffset, hsize);
want_hash = _want_hash;
+ kunmap_local(haddr);
put_page(hpage);
goto descend;
}
hblocks[level].page = hpage;
+ hblocks[level].addr = haddr;
hblocks[level].index = hblock_idx;
- hblocks[level].offset_in_page = hblock_offset_in_page;
hblocks[level].hoffset = hoffset;
hidx = next_hidx;
}
@@ -225,18 +198,14 @@ descend:
/* Descend the tree verifying hash blocks. */
for (; level > 0; level--) {
struct page *hpage = hblocks[level - 1].page;
+ const void *haddr = hblocks[level - 1].addr;
unsigned long hblock_idx = hblocks[level - 1].index;
- unsigned int hblock_offset_in_page =
- hblocks[level - 1].offset_in_page;
unsigned int hoffset = hblocks[level - 1].hoffset;
- err = fsverity_hash_block(params, inode, req, hpage,
- hblock_offset_in_page, real_hash);
- if (err)
- goto out;
- err = cmp_hashes(vi, want_hash, real_hash, data_pos, level - 1);
- if (err)
- goto out;
+ if (fsverity_hash_block(params, inode, haddr, real_hash) != 0)
+ goto error;
+ if (memcmp(want_hash, real_hash, hsize) != 0)
+ goto corrupted;
/*
* Mark the hash block as verified. This must be atomic and
* idempotent, as the same hash block might be verified by
@@ -246,29 +215,39 @@ descend:
set_bit(hblock_idx, vi->hash_block_verified);
else
SetPageChecked(hpage);
- memcpy_from_page(_want_hash, hpage, hoffset, hsize);
+ memcpy(_want_hash, haddr + hoffset, hsize);
want_hash = _want_hash;
+ kunmap_local(haddr);
put_page(hpage);
}
/* Finally, verify the data block. */
- err = fsverity_hash_block(params, inode, req, data_page,
- dblock_offset_in_page, real_hash);
- if (err)
- goto out;
- err = cmp_hashes(vi, want_hash, real_hash, data_pos, -1);
-out:
- for (; level > 0; level--)
- put_page(hblocks[level - 1].page);
+ if (fsverity_hash_block(params, inode, data, real_hash) != 0)
+ goto error;
+ if (memcmp(want_hash, real_hash, hsize) != 0)
+ goto corrupted;
+ return true;
- return err == 0;
+corrupted:
+ fsverity_err(inode,
+ "FILE CORRUPTED! pos=%llu, level=%d, want_hash=%s:%*phN, real_hash=%s:%*phN",
+ data_pos, level - 1,
+ params->hash_alg->name, hsize, want_hash,
+ params->hash_alg->name, hsize, real_hash);
+error:
+ for (; level > 0; level--) {
+ kunmap_local(hblocks[level - 1].addr);
+ put_page(hblocks[level - 1].page);
+ }
+ return false;
}
static bool
-verify_data_blocks(struct inode *inode, struct fsverity_info *vi,
- struct ahash_request *req, struct folio *data_folio,
- size_t len, size_t offset, unsigned long max_ra_pages)
+verify_data_blocks(struct folio *data_folio, size_t len, size_t offset,
+ unsigned long max_ra_pages)
{
+ struct inode *inode = data_folio->mapping->host;
+ struct fsverity_info *vi = inode->i_verity_info;
const unsigned int block_size = vi->tree_params.block_size;
u64 pos = (u64)data_folio->index << PAGE_SHIFT;
@@ -278,11 +257,14 @@ verify_data_blocks(struct inode *inode, struct fsverity_info *vi,
folio_test_uptodate(data_folio)))
return false;
do {
- struct page *data_page =
- folio_page(data_folio, offset >> PAGE_SHIFT);
-
- if (!verify_data_block(inode, vi, req, data_page, pos + offset,
- offset & ~PAGE_MASK, max_ra_pages))
+ void *data;
+ bool valid;
+
+ data = kmap_local_folio(data_folio, offset);
+ valid = verify_data_block(inode, vi, data, pos + offset,
+ max_ra_pages);
+ kunmap_local(data);
+ if (!valid)
return false;
offset += block_size;
len -= block_size;
@@ -304,19 +286,7 @@ verify_data_blocks(struct inode *inode, struct fsverity_info *vi,
*/
bool fsverity_verify_blocks(struct folio *folio, size_t len, size_t offset)
{
- struct inode *inode = folio->mapping->host;
- struct fsverity_info *vi = inode->i_verity_info;
- struct ahash_request *req;
- bool valid;
-
- /* This allocation never fails, since it's mempool-backed. */
- req = fsverity_alloc_hash_request(vi->tree_params.hash_alg, GFP_NOFS);
-
- valid = verify_data_blocks(inode, vi, req, folio, len, offset, 0);
-
- fsverity_free_hash_request(vi->tree_params.hash_alg, req);
-
- return valid;
+ return verify_data_blocks(folio, len, offset, 0);
}
EXPORT_SYMBOL_GPL(fsverity_verify_blocks);
@@ -337,15 +307,9 @@ EXPORT_SYMBOL_GPL(fsverity_verify_blocks);
*/
void fsverity_verify_bio(struct bio *bio)
{
- struct inode *inode = bio_first_page_all(bio)->mapping->host;
- struct fsverity_info *vi = inode->i_verity_info;
- struct ahash_request *req;
struct folio_iter fi;
unsigned long max_ra_pages = 0;
- /* This allocation never fails, since it's mempool-backed. */
- req = fsverity_alloc_hash_request(vi->tree_params.hash_alg, GFP_NOFS);
-
if (bio->bi_opf & REQ_RAHEAD) {
/*
* If this bio is for data readahead, then we also do readahead
@@ -360,14 +324,12 @@ void fsverity_verify_bio(struct bio *bio)
}
bio_for_each_folio_all(fi, bio) {
- if (!verify_data_blocks(inode, vi, req, fi.folio, fi.length,
- fi.offset, max_ra_pages)) {
+ if (!verify_data_blocks(fi.folio, fi.length, fi.offset,
+ max_ra_pages)) {
bio->bi_status = BLK_STS_IOERR;
break;
}
}
-
- fsverity_free_hash_request(vi->tree_params.hash_alg, req);
}
EXPORT_SYMBOL_GPL(fsverity_verify_bio);
#endif /* CONFIG_BLOCK */
diff --git a/fs/xattr.c b/fs/xattr.c
index fcf67d80d7f9..e7bbb7f57557 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -985,9 +985,16 @@ int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name)
return 0;
}
-/*
+/**
+ * generic_listxattr - run through a dentry's xattr list() operations
+ * @dentry: dentry to list the xattrs
+ * @buffer: result buffer
+ * @buffer_size: size of @buffer
+ *
* Combine the results of the list() operation from every xattr_handler in the
- * list.
+ * xattr_handler stack.
+ *
+ * Note that this will not include the entries for POSIX ACLs.
*/
ssize_t
generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
@@ -996,10 +1003,6 @@ generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
ssize_t remaining_size = buffer_size;
int err = 0;
- err = posix_acl_listxattr(d_inode(dentry), &buffer, &remaining_size);
- if (err)
- return err;
-
for_each_xattr_handler(handlers, handler) {
if (!handler->name || (handler->list && !handler->list(dentry)))
continue;
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 9b373a0c7aaf..ee84835ebc66 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -984,7 +984,10 @@ xfs_ag_shrink_space(
if (err2 != -ENOSPC)
goto resv_err;
- __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, true);
+ err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL,
+ true);
+ if (err2)
+ goto resv_err;
/*
* Roll the transaction before trying to re-init the per-ag
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index fdfa08cbf4db..c20fe99405d8 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -628,6 +628,25 @@ xfs_alloc_fixup_trees(
return 0;
}
+/*
+ * We do not verify the AGFL contents against AGF-based index counters here,
+ * even though we may have access to the perag that contains shadow copies. We
+ * don't know if the AGF based counters have been checked, and if they have they
+ * still may be inconsistent because they haven't yet been reset on the first
+ * allocation after the AGF has been read in.
+ *
+ * This means we can only check that all agfl entries contain valid or null
+ * values because we can't reliably determine the active range to exclude
+ * NULLAGBNO as a valid value.
+ *
+ * However, we can't even do that for v4 format filesystems because there are
+ * old versions of mkfs out there that does not initialise the AGFL to known,
+ * verifiable values. HEnce we can't tell the difference between a AGFL block
+ * allocated by mkfs and a corrupted AGFL block here on v4 filesystems.
+ *
+ * As a result, we can only fully validate AGFL block numbers when we pull them
+ * from the freelist in xfs_alloc_get_freelist().
+ */
static xfs_failaddr_t
xfs_agfl_verify(
struct xfs_buf *bp)
@@ -637,12 +656,6 @@ xfs_agfl_verify(
__be32 *agfl_bno = xfs_buf_to_agfl_bno(bp);
int i;
- /*
- * There is no verification of non-crc AGFLs because mkfs does not
- * initialise the AGFL to zero or NULL. Hence the only valid part of the
- * AGFL is what the AGF says is active. We can't get to the AGF, so we
- * can't verify just those entries are valid.
- */
if (!xfs_has_crc(mp))
return NULL;
@@ -2321,12 +2334,16 @@ xfs_free_agfl_block(
}
/*
- * Check the agfl fields of the agf for inconsistency or corruption. The purpose
- * is to detect an agfl header padding mismatch between current and early v5
- * kernels. This problem manifests as a 1-slot size difference between the
- * on-disk flcount and the active [first, last] range of a wrapped agfl. This
- * may also catch variants of agfl count corruption unrelated to padding. Either
- * way, we'll reset the agfl and warn the user.
+ * Check the agfl fields of the agf for inconsistency or corruption.
+ *
+ * The original purpose was to detect an agfl header padding mismatch between
+ * current and early v5 kernels. This problem manifests as a 1-slot size
+ * difference between the on-disk flcount and the active [first, last] range of
+ * a wrapped agfl.
+ *
+ * However, we need to use these same checks to catch agfl count corruptions
+ * unrelated to padding. This could occur on any v4 or v5 filesystem, so either
+ * way, we need to reset the agfl and warn the user.
*
* Return true if a reset is required before the agfl can be used, false
* otherwise.
@@ -2342,10 +2359,6 @@ xfs_agfl_needs_reset(
int agfl_size = xfs_agfl_size(mp);
int active;
- /* no agfl header on v4 supers */
- if (!xfs_has_crc(mp))
- return false;
-
/*
* The agf read verifier catches severe corruption of these fields.
* Repeat some sanity checks to cover a packed -> unpacked mismatch if
@@ -2418,7 +2431,7 @@ xfs_agfl_reset(
* the real allocation can proceed. Deferring the free disconnects freeing up
* the AGFL slot from freeing the block.
*/
-STATIC void
+static int
xfs_defer_agfl_block(
struct xfs_trans *tp,
xfs_agnumber_t agno,
@@ -2437,17 +2450,21 @@ xfs_defer_agfl_block(
xefi->xefi_blockcount = 1;
xefi->xefi_owner = oinfo->oi_owner;
+ if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, xefi->xefi_startblock)))
+ return -EFSCORRUPTED;
+
trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
xfs_extent_free_get_group(mp, xefi);
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list);
+ return 0;
}
/*
* Add the extent to the list of extents to be free at transaction end.
* The list is maintained sorted (by block number).
*/
-void
+int
__xfs_free_extent_later(
struct xfs_trans *tp,
xfs_fsblock_t bno,
@@ -2474,6 +2491,9 @@ __xfs_free_extent_later(
#endif
ASSERT(xfs_extfree_item_cache != NULL);
+ if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
+ return -EFSCORRUPTED;
+
xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
GFP_KERNEL | __GFP_NOFAIL);
xefi->xefi_startblock = bno;
@@ -2497,6 +2517,7 @@ __xfs_free_extent_later(
xfs_extent_free_get_group(mp, xefi);
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list);
+ return 0;
}
#ifdef DEBUG
@@ -2657,7 +2678,9 @@ xfs_alloc_fix_freelist(
goto out_agbp_relse;
/* defer agfl frees */
- xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
+ error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
+ if (error)
+ goto out_agbp_relse;
}
targs.tp = tp;
@@ -2767,6 +2790,9 @@ xfs_alloc_get_freelist(
*/
agfl_bno = xfs_buf_to_agfl_bno(agflbp);
bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
+ if (XFS_IS_CORRUPT(tp->t_mountp, !xfs_verify_agbno(pag, bno)))
+ return -EFSCORRUPTED;
+
be32_add_cpu(&agf->agf_flfirst, 1);
xfs_trans_brelse(tp, agflbp);
if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
@@ -2889,6 +2915,19 @@ xfs_alloc_put_freelist(
return 0;
}
+/*
+ * Verify the AGF is consistent.
+ *
+ * We do not verify the AGFL indexes in the AGF are fully consistent here
+ * because of issues with variable on-disk structure sizes. Instead, we check
+ * the agfl indexes for consistency when we initialise the perag from the AGF
+ * information after a read completes.
+ *
+ * If the index is inconsistent, then we mark the perag as needing an AGFL
+ * reset. The first AGFL update performed then resets the AGFL indexes and
+ * refills the AGFL with known good free blocks, allowing the filesystem to
+ * continue operating normally at the cost of a few leaked free space blocks.
+ */
static xfs_failaddr_t
xfs_agf_verify(
struct xfs_buf *bp)
@@ -2962,7 +3001,6 @@ xfs_agf_verify(
return __this_address;
return NULL;
-
}
static void
@@ -3187,7 +3225,8 @@ xfs_alloc_vextent_check_args(
*/
static int
xfs_alloc_vextent_prepare_ag(
- struct xfs_alloc_arg *args)
+ struct xfs_alloc_arg *args,
+ uint32_t flags)
{
bool need_pag = !args->pag;
int error;
@@ -3196,7 +3235,7 @@ xfs_alloc_vextent_prepare_ag(
args->pag = xfs_perag_get(args->mp, args->agno);
args->agbp = NULL;
- error = xfs_alloc_fix_freelist(args, 0);
+ error = xfs_alloc_fix_freelist(args, flags);
if (error) {
trace_xfs_alloc_vextent_nofix(args);
if (need_pag)
@@ -3336,7 +3375,7 @@ xfs_alloc_vextent_this_ag(
return error;
}
- error = xfs_alloc_vextent_prepare_ag(args);
+ error = xfs_alloc_vextent_prepare_ag(args, 0);
if (!error && args->agbp)
error = xfs_alloc_ag_vextent_size(args);
@@ -3380,7 +3419,7 @@ restart:
for_each_perag_wrap_range(mp, start_agno, restart_agno,
mp->m_sb.sb_agcount, agno, args->pag) {
args->agno = agno;
- error = xfs_alloc_vextent_prepare_ag(args);
+ error = xfs_alloc_vextent_prepare_ag(args, flags);
if (error)
break;
if (!args->agbp) {
@@ -3546,7 +3585,7 @@ xfs_alloc_vextent_exact_bno(
return error;
}
- error = xfs_alloc_vextent_prepare_ag(args);
+ error = xfs_alloc_vextent_prepare_ag(args, 0);
if (!error && args->agbp)
error = xfs_alloc_ag_vextent_exact(args);
@@ -3587,7 +3626,7 @@ xfs_alloc_vextent_near_bno(
if (needs_perag)
args->pag = xfs_perag_grab(mp, args->agno);
- error = xfs_alloc_vextent_prepare_ag(args);
+ error = xfs_alloc_vextent_prepare_ag(args, 0);
if (!error && args->agbp)
error = xfs_alloc_ag_vextent_near(args);
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 5dbb25546d0b..85ac470be0da 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -230,7 +230,7 @@ xfs_buf_to_agfl_bno(
return bp->b_addr;
}
-void __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
+int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
xfs_filblks_t len, const struct xfs_owner_info *oinfo,
bool skip_discard);
@@ -254,14 +254,14 @@ void xfs_extent_free_get_group(struct xfs_mount *mp,
#define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */
#define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */
-static inline void
+static inline int
xfs_free_extent_later(
struct xfs_trans *tp,
xfs_fsblock_t bno,
xfs_filblks_t len,
const struct xfs_owner_info *oinfo)
{
- __xfs_free_extent_later(tp, bno, len, oinfo, false);
+ return __xfs_free_extent_later(tp, bno, len, oinfo, false);
}
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index cd8870a16fd1..fef35696adb7 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -572,8 +572,12 @@ xfs_bmap_btree_to_extents(
cblock = XFS_BUF_TO_BLOCK(cbp);
if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
return error;
+
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
- xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo);
+ error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo);
+ if (error)
+ return error;
+
ip->i_nblocks--;
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
xfs_trans_binval(tp, cbp);
@@ -5230,10 +5234,12 @@ xfs_bmap_del_extent_real(
if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
xfs_refcount_decrease_extent(tp, del);
} else {
- __xfs_free_extent_later(tp, del->br_startblock,
+ error = __xfs_free_extent_later(tp, del->br_startblock,
del->br_blockcount, NULL,
(bflags & XFS_BMAPI_NODISCARD) ||
del->br_state == XFS_EXT_UNWRITTEN);
+ if (error)
+ goto done;
}
}
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 1b40e5f8b1ec..36564ae3084f 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -268,11 +268,14 @@ xfs_bmbt_free_block(
struct xfs_trans *tp = cur->bc_tp;
xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
struct xfs_owner_info oinfo;
+ int error;
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork);
- xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo);
- ip->i_nblocks--;
+ error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo);
+ if (error)
+ return error;
+ ip->i_nblocks--;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
return 0;
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index a16d5de16933..34600f94c2f4 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1834,7 +1834,7 @@ retry:
* might be sparse and only free the regions that are allocated as part of the
* chunk.
*/
-STATIC void
+static int
xfs_difree_inode_chunk(
struct xfs_trans *tp,
xfs_agnumber_t agno,
@@ -1851,10 +1851,10 @@ xfs_difree_inode_chunk(
if (!xfs_inobt_issparse(rec->ir_holemask)) {
/* not sparse, calculate extent info directly */
- xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, sagbno),
- M_IGEO(mp)->ialloc_blks,
- &XFS_RMAP_OINFO_INODES);
- return;
+ return xfs_free_extent_later(tp,
+ XFS_AGB_TO_FSB(mp, agno, sagbno),
+ M_IGEO(mp)->ialloc_blks,
+ &XFS_RMAP_OINFO_INODES);
}
/* holemask is only 16-bits (fits in an unsigned long) */
@@ -1871,6 +1871,8 @@ xfs_difree_inode_chunk(
XFS_INOBT_HOLEMASK_BITS);
nextbit = startidx + 1;
while (startidx < XFS_INOBT_HOLEMASK_BITS) {
+ int error;
+
nextbit = find_next_zero_bit(holemask, XFS_INOBT_HOLEMASK_BITS,
nextbit);
/*
@@ -1896,8 +1898,11 @@ xfs_difree_inode_chunk(
ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
- xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, agbno),
- contigblk, &XFS_RMAP_OINFO_INODES);
+ error = xfs_free_extent_later(tp,
+ XFS_AGB_TO_FSB(mp, agno, agbno),
+ contigblk, &XFS_RMAP_OINFO_INODES);
+ if (error)
+ return error;
/* reset range to current bit and carry on... */
startidx = endidx = nextbit;
@@ -1905,6 +1910,7 @@ xfs_difree_inode_chunk(
next:
nextbit++;
}
+ return 0;
}
STATIC int
@@ -2003,7 +2009,9 @@ xfs_difree_inobt(
goto error0;
}
- xfs_difree_inode_chunk(tp, pag->pag_agno, &rec);
+ error = xfs_difree_inode_chunk(tp, pag->pag_agno, &rec);
+ if (error)
+ goto error0;
} else {
xic->deleted = false;
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index f13e0809dc63..269573c82808 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -324,7 +324,6 @@ struct xfs_inode_log_format_32 {
#define XFS_ILOG_DOWNER 0x200 /* change the data fork owner on replay */
#define XFS_ILOG_AOWNER 0x400 /* change the attr fork owner on replay */
-
/*
* The timestamps are dirty, but not necessarily anything else in the inode
* core. Unlike the other fields above this one must never make it to disk
@@ -333,6 +332,14 @@ struct xfs_inode_log_format_32 {
*/
#define XFS_ILOG_TIMESTAMP 0x4000
+/*
+ * The version field has been changed, but not necessarily anything else of
+ * interest. This must never make it to disk - it is used purely to ensure that
+ * the inode item ->precommit operation can update the fsync flag triggers
+ * in the inode item correctly.
+ */
+#define XFS_ILOG_IVERSION 0x8000
+
#define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
XFS_ILOG_DBROOT | XFS_ILOG_DEV | \
XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index c1c65774dcc2..b6e21433925c 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1151,8 +1151,10 @@ xfs_refcount_adjust_extents(
fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
cur->bc_ag.pag->pag_agno,
tmp.rc_startblock);
- xfs_free_extent_later(cur->bc_tp, fsbno,
+ error = xfs_free_extent_later(cur->bc_tp, fsbno,
tmp.rc_blockcount, NULL);
+ if (error)
+ goto out_error;
}
(*agbno) += tmp.rc_blockcount;
@@ -1210,8 +1212,10 @@ xfs_refcount_adjust_extents(
fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
cur->bc_ag.pag->pag_agno,
ext.rc_startblock);
- xfs_free_extent_later(cur->bc_tp, fsbno,
+ error = xfs_free_extent_later(cur->bc_tp, fsbno,
ext.rc_blockcount, NULL);
+ if (error)
+ goto out_error;
}
skip:
@@ -1976,7 +1980,10 @@ xfs_refcount_recover_cow_leftovers(
rr->rr_rrec.rc_blockcount);
/* Free the block. */
- xfs_free_extent_later(tp, fsb, rr->rr_rrec.rc_blockcount, NULL);
+ error = xfs_free_extent_later(tp, fsb,
+ rr->rr_rrec.rc_blockcount, NULL);
+ if (error)
+ goto out_trans;
error = xfs_trans_commit(tp);
if (error)
diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index 8b5547073379..cb4796b6e693 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -40,9 +40,8 @@ xfs_trans_ijoin(
iip->ili_lock_flags = lock_flags;
ASSERT(!xfs_iflags_test(ip, XFS_ISTALE));
- /*
- * Get a log_item_desc to point at the new item.
- */
+ /* Reset the per-tx dirty context and add the item to the tx. */
+ iip->ili_dirty_flags = 0;
xfs_trans_add_item(tp, &iip->ili_item);
}
@@ -76,17 +75,10 @@ xfs_trans_ichgtime(
/*
* This is called to mark the fields indicated in fieldmask as needing to be
* logged when the transaction is committed. The inode must already be
- * associated with the given transaction.
- *
- * The values for fieldmask are defined in xfs_inode_item.h. We always log all
- * of the core inode if any of it has changed, and we always log all of the
- * inline data/extents/b-tree root if any of them has changed.
- *
- * Grab and pin the cluster buffer associated with this inode to avoid RMW
- * cycles at inode writeback time. Avoid the need to add error handling to every
- * xfs_trans_log_inode() call by shutting down on read error. This will cause
- * transactions to fail and everything to error out, just like if we return a
- * read error in a dirty transaction and cancel it.
+ * associated with the given transaction. All we do here is record where the
+ * inode was dirtied and mark the transaction and inode log item dirty;
+ * everything else is done in the ->precommit log item operation after the
+ * changes in the transaction have been completed.
*/
void
xfs_trans_log_inode(
@@ -96,7 +88,6 @@ xfs_trans_log_inode(
{
struct xfs_inode_log_item *iip = ip->i_itemp;
struct inode *inode = VFS_I(ip);
- uint iversion_flags = 0;
ASSERT(iip);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
@@ -105,18 +96,6 @@ xfs_trans_log_inode(
tp->t_flags |= XFS_TRANS_DIRTY;
/*
- * Don't bother with i_lock for the I_DIRTY_TIME check here, as races
- * don't matter - we either will need an extra transaction in 24 hours
- * to log the timestamps, or will clear already cleared fields in the
- * worst case.
- */
- if (inode->i_state & I_DIRTY_TIME) {
- spin_lock(&inode->i_lock);
- inode->i_state &= ~I_DIRTY_TIME;
- spin_unlock(&inode->i_lock);
- }
-
- /*
* First time we log the inode in a transaction, bump the inode change
* counter if it is configured for this to occur. While we have the
* inode locked exclusively for metadata modification, we can usually
@@ -128,86 +107,10 @@ xfs_trans_log_inode(
if (!test_and_set_bit(XFS_LI_DIRTY, &iip->ili_item.li_flags)) {
if (IS_I_VERSION(inode) &&
inode_maybe_inc_iversion(inode, flags & XFS_ILOG_CORE))
- iversion_flags = XFS_ILOG_CORE;
- }
-
- /*
- * If we're updating the inode core or the timestamps and it's possible
- * to upgrade this inode to bigtime format, do so now.
- */
- if ((flags & (XFS_ILOG_CORE | XFS_ILOG_TIMESTAMP)) &&
- xfs_has_bigtime(ip->i_mount) &&
- !xfs_inode_has_bigtime(ip)) {
- ip->i_diflags2 |= XFS_DIFLAG2_BIGTIME;
- flags |= XFS_ILOG_CORE;
- }
-
- /*
- * Inode verifiers do not check that the extent size hint is an integer
- * multiple of the rt extent size on a directory with both rtinherit
- * and extszinherit flags set. If we're logging a directory that is
- * misconfigured in this way, clear the hint.
- */
- if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
- (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
- (ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) {
- ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
- XFS_DIFLAG_EXTSZINHERIT);
- ip->i_extsize = 0;
- flags |= XFS_ILOG_CORE;
+ flags |= XFS_ILOG_IVERSION;
}
- /*
- * Record the specific change for fdatasync optimisation. This allows
- * fdatasync to skip log forces for inodes that are only timestamp
- * dirty.
- */
- spin_lock(&iip->ili_lock);
- iip->ili_fsync_fields |= flags;
-
- if (!iip->ili_item.li_buf) {
- struct xfs_buf *bp;
- int error;
-
- /*
- * We hold the ILOCK here, so this inode is not going to be
- * flushed while we are here. Further, because there is no
- * buffer attached to the item, we know that there is no IO in
- * progress, so nothing will clear the ili_fields while we read
- * in the buffer. Hence we can safely drop the spin lock and
- * read the buffer knowing that the state will not change from
- * here.
- */
- spin_unlock(&iip->ili_lock);
- error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &bp);
- if (error) {
- xfs_force_shutdown(ip->i_mount, SHUTDOWN_META_IO_ERROR);
- return;
- }
-
- /*
- * We need an explicit buffer reference for the log item but
- * don't want the buffer to remain attached to the transaction.
- * Hold the buffer but release the transaction reference once
- * we've attached the inode log item to the buffer log item
- * list.
- */
- xfs_buf_hold(bp);
- spin_lock(&iip->ili_lock);
- iip->ili_item.li_buf = bp;
- bp->b_flags |= _XBF_INODES;
- list_add_tail(&iip->ili_item.li_bio_list, &bp->b_li_list);
- xfs_trans_brelse(tp, bp);
- }
-
- /*
- * Always OR in the bits from the ili_last_fields field. This is to
- * coordinate with the xfs_iflush() and xfs_buf_inode_iodone() routines
- * in the eventual clearing of the ili_fields bits. See the big comment
- * in xfs_iflush() for an explanation of this coordination mechanism.
- */
- iip->ili_fields |= (flags | iip->ili_last_fields | iversion_flags);
- spin_unlock(&iip->ili_lock);
+ iip->ili_dirty_flags |= flags;
}
int
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 69bc89d0fc68..5bf4326e9783 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -769,14 +769,14 @@ xchk_are_bmaps_contiguous(
* mapping or false if there are no more mappings. Caller must ensure that
* @info.icur is zeroed before the first call.
*/
-static int
+static bool
xchk_bmap_iext_iter(
struct xchk_bmap_info *info,
struct xfs_bmbt_irec *irec)
{
struct xfs_bmbt_irec got;
struct xfs_ifork *ifp;
- xfs_filblks_t prev_len;
+ unsigned int nr = 0;
ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork);
@@ -790,12 +790,12 @@ xchk_bmap_iext_iter(
irec->br_startoff);
return false;
}
+ nr++;
/*
* Iterate subsequent iextent records and merge them with the one
* that we just read, if possible.
*/
- prev_len = irec->br_blockcount;
while (xfs_iext_peek_next_extent(ifp, &info->icur, &got)) {
if (!xchk_are_bmaps_contiguous(irec, &got))
break;
@@ -805,20 +805,21 @@ xchk_bmap_iext_iter(
got.br_startoff);
return false;
}
-
- /*
- * Notify the user of mergeable records in the data or attr
- * forks. CoW forks only exist in memory so we ignore them.
- */
- if (info->whichfork != XFS_COW_FORK &&
- prev_len + got.br_blockcount > BMBT_BLOCKCOUNT_MASK)
- xchk_ino_set_preen(info->sc, info->sc->ip->i_ino);
+ nr++;
irec->br_blockcount += got.br_blockcount;
- prev_len = got.br_blockcount;
xfs_iext_next(ifp, &info->icur);
}
+ /*
+ * If the merged mapping could be expressed with fewer bmbt records
+ * than we actually found, notify the user that this fork could be
+ * optimized. CoW forks only exist in memory so we ignore them.
+ */
+ if (nr > 1 && info->whichfork != XFS_COW_FORK &&
+ howmany_64(irec->br_blockcount, XFS_MAX_BMBT_EXTLEN) < nr)
+ xchk_ino_set_preen(info->sc, info->sc->ip->i_ino);
+
return true;
}
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index b38e93830dde..e113f2f5c254 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -105,10 +105,10 @@ struct xfs_scrub {
};
/* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */
-#define XCHK_TRY_HARDER (1 << 0) /* can't get resources, try again */
-#define XCHK_FSGATES_DRAIN (1 << 2) /* defer ops draining enabled */
-#define XCHK_NEED_DRAIN (1 << 3) /* scrub needs to drain defer ops */
-#define XREP_ALREADY_FIXED (1 << 31) /* checking our repair work */
+#define XCHK_TRY_HARDER (1U << 0) /* can't get resources, try again */
+#define XCHK_FSGATES_DRAIN (1U << 2) /* defer ops draining enabled */
+#define XCHK_NEED_DRAIN (1U << 3) /* scrub needs to drain defer ops */
+#define XREP_ALREADY_FIXED (1U << 31) /* checking our repair work */
/*
* The XCHK_FSGATES* flags reflect functionality in the main filesystem that
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index df7322ed73fa..023d4e0385dd 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -452,10 +452,18 @@ xfs_buf_item_format(
* This is called to pin the buffer associated with the buf log item in memory
* so it cannot be written out.
*
- * We also always take a reference to the buffer log item here so that the bli
- * is held while the item is pinned in memory. This means that we can
- * unconditionally drop the reference count a transaction holds when the
- * transaction is completed.
+ * We take a reference to the buffer log item here so that the BLI life cycle
+ * extends at least until the buffer is unpinned via xfs_buf_item_unpin() and
+ * inserted into the AIL.
+ *
+ * We also need to take a reference to the buffer itself as the BLI unpin
+ * processing requires accessing the buffer after the BLI has dropped the final
+ * BLI reference. See xfs_buf_item_unpin() for an explanation.
+ * If unpins race to drop the final BLI reference and only the
+ * BLI owns a reference to the buffer, then the loser of the race can have the
+ * buffer fgreed from under it (e.g. on shutdown). Taking a buffer reference per
+ * pin count ensures the life cycle of the buffer extends for as
+ * long as we hold the buffer pin reference in xfs_buf_item_unpin().
*/
STATIC void
xfs_buf_item_pin(
@@ -470,13 +478,30 @@ xfs_buf_item_pin(
trace_xfs_buf_item_pin(bip);
+ xfs_buf_hold(bip->bli_buf);
atomic_inc(&bip->bli_refcount);
atomic_inc(&bip->bli_buf->b_pin_count);
}
/*
- * This is called to unpin the buffer associated with the buf log item which
- * was previously pinned with a call to xfs_buf_item_pin().
+ * This is called to unpin the buffer associated with the buf log item which was
+ * previously pinned with a call to xfs_buf_item_pin(). We enter this function
+ * with a buffer pin count, a buffer reference and a BLI reference.
+ *
+ * We must drop the BLI reference before we unpin the buffer because the AIL
+ * doesn't acquire a BLI reference whenever it accesses it. Therefore if the
+ * refcount drops to zero, the bli could still be AIL resident and the buffer
+ * submitted for I/O at any point before we return. This can result in IO
+ * completion freeing the buffer while we are still trying to access it here.
+ * This race condition can also occur in shutdown situations where we abort and
+ * unpin buffers from contexts other that journal IO completion.
+ *
+ * Hence we have to hold a buffer reference per pin count to ensure that the
+ * buffer cannot be freed until we have finished processing the unpin operation.
+ * The reference is taken in xfs_buf_item_pin(), and we must hold it until we
+ * are done processing the buffer state. In the case of an abort (remove =
+ * true) then we re-use the current pin reference as the IO reference we hand
+ * off to IO failure handling.
*/
STATIC void
xfs_buf_item_unpin(
@@ -493,24 +518,18 @@ xfs_buf_item_unpin(
trace_xfs_buf_item_unpin(bip);
- /*
- * Drop the bli ref associated with the pin and grab the hold required
- * for the I/O simulation failure in the abort case. We have to do this
- * before the pin count drops because the AIL doesn't acquire a bli
- * reference. Therefore if the refcount drops to zero, the bli could
- * still be AIL resident and the buffer submitted for I/O (and freed on
- * completion) at any point before we return. This can be removed once
- * the AIL properly holds a reference on the bli.
- */
freed = atomic_dec_and_test(&bip->bli_refcount);
- if (freed && !stale && remove)
- xfs_buf_hold(bp);
if (atomic_dec_and_test(&bp->b_pin_count))
wake_up_all(&bp->b_waiters);
- /* nothing to do but drop the pin count if the bli is active */
- if (!freed)
+ /*
+ * Nothing to do but drop the buffer pin reference if the BLI is
+ * still active.
+ */
+ if (!freed) {
+ xfs_buf_rele(bp);
return;
+ }
if (stale) {
ASSERT(bip->bli_flags & XFS_BLI_STALE);
@@ -523,6 +542,15 @@ xfs_buf_item_unpin(
trace_xfs_buf_item_unpin_stale(bip);
/*
+ * The buffer has been locked and referenced since it was marked
+ * stale so we own both lock and reference exclusively here. We
+ * do not need the pin reference any more, so drop it now so
+ * that we only have one reference to drop once item completion
+ * processing is complete.
+ */
+ xfs_buf_rele(bp);
+
+ /*
* If we get called here because of an IO error, we may or may
* not have the item on the AIL. xfs_trans_ail_delete() will
* take care of that situation. xfs_trans_ail_delete() drops
@@ -538,16 +566,30 @@ xfs_buf_item_unpin(
ASSERT(bp->b_log_item == NULL);
}
xfs_buf_relse(bp);
- } else if (remove) {
+ return;
+ }
+
+ if (remove) {
/*
- * The buffer must be locked and held by the caller to simulate
- * an async I/O failure. We acquired the hold for this case
- * before the buffer was unpinned.
+ * We need to simulate an async IO failures here to ensure that
+ * the correct error completion is run on this buffer. This
+ * requires a reference to the buffer and for the buffer to be
+ * locked. We can safely pass ownership of the pin reference to
+ * the IO to ensure that nothing can free the buffer while we
+ * wait for the lock and then run the IO failure completion.
*/
xfs_buf_lock(bp);
bp->b_flags |= XBF_ASYNC;
xfs_buf_ioend_fail(bp);
+ return;
}
+
+ /*
+ * BLI has no more active references - it will be moved to the AIL to
+ * manage the remaining BLI/buffer life cycle. There is nothing left for
+ * us to do here so drop the pin reference to the buffer.
+ */
+ xfs_buf_rele(bp);
}
STATIC uint
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 22c13933c8f8..2fc98d313708 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -78,7 +78,6 @@ restart:
*longest = 0;
err = xfs_bmap_longest_free_extent(pag, NULL, longest);
if (err) {
- xfs_perag_rele(pag);
if (err != -EAGAIN)
break;
/* Couldn't lock the AGF, skip this AG. */
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 0f60e301eb1f..453890942d9f 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -454,6 +454,27 @@ xfs_inodegc_queue_all(
return ret;
}
+/* Wait for all queued work and collect errors */
+static int
+xfs_inodegc_wait_all(
+ struct xfs_mount *mp)
+{
+ int cpu;
+ int error = 0;
+
+ flush_workqueue(mp->m_inodegc_wq);
+ for_each_online_cpu(cpu) {
+ struct xfs_inodegc *gc;
+
+ gc = per_cpu_ptr(mp->m_inodegc, cpu);
+ if (gc->error && !error)
+ error = gc->error;
+ gc->error = 0;
+ }
+
+ return error;
+}
+
/*
* Check the validity of the inode we just found it the cache
*/
@@ -1491,15 +1512,14 @@ xfs_blockgc_free_space(
if (error)
return error;
- xfs_inodegc_flush(mp);
- return 0;
+ return xfs_inodegc_flush(mp);
}
/*
* Reclaim all the free space that we can by scheduling the background blockgc
* and inodegc workers immediately and waiting for them all to clear.
*/
-void
+int
xfs_blockgc_flush_all(
struct xfs_mount *mp)
{
@@ -1520,7 +1540,7 @@ xfs_blockgc_flush_all(
for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG)
flush_delayed_work(&pag->pag_blockgc_work);
- xfs_inodegc_flush(mp);
+ return xfs_inodegc_flush(mp);
}
/*
@@ -1842,13 +1862,17 @@ xfs_inodegc_set_reclaimable(
* This is the last chance to make changes to an otherwise unreferenced file
* before incore reclamation happens.
*/
-static void
+static int
xfs_inodegc_inactivate(
struct xfs_inode *ip)
{
+ int error;
+
trace_xfs_inode_inactivating(ip);
- xfs_inactive(ip);
+ error = xfs_inactive(ip);
xfs_inodegc_set_reclaimable(ip);
+ return error;
+
}
void
@@ -1880,8 +1904,12 @@ xfs_inodegc_worker(
WRITE_ONCE(gc->shrinker_hits, 0);
llist_for_each_entry_safe(ip, n, node, i_gclist) {
+ int error;
+
xfs_iflags_set(ip, XFS_INACTIVATING);
- xfs_inodegc_inactivate(ip);
+ error = xfs_inodegc_inactivate(ip);
+ if (error && !gc->error)
+ gc->error = error;
}
memalloc_nofs_restore(nofs_flag);
@@ -1905,13 +1933,13 @@ xfs_inodegc_push(
* Force all currently queued inode inactivation work to run immediately and
* wait for the work to finish.
*/
-void
+int
xfs_inodegc_flush(
struct xfs_mount *mp)
{
xfs_inodegc_push(mp);
trace_xfs_inodegc_flush(mp, __return_address);
- flush_workqueue(mp->m_inodegc_wq);
+ return xfs_inodegc_wait_all(mp);
}
/*
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 87910191a9dd..1dcdcb23796e 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -62,7 +62,7 @@ int xfs_blockgc_free_dquots(struct xfs_mount *mp, struct xfs_dquot *udqp,
unsigned int iwalk_flags);
int xfs_blockgc_free_quota(struct xfs_inode *ip, unsigned int iwalk_flags);
int xfs_blockgc_free_space(struct xfs_mount *mp, struct xfs_icwalk *icm);
-void xfs_blockgc_flush_all(struct xfs_mount *mp);
+int xfs_blockgc_flush_all(struct xfs_mount *mp);
void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
@@ -80,7 +80,7 @@ void xfs_blockgc_start(struct xfs_mount *mp);
void xfs_inodegc_worker(struct work_struct *work);
void xfs_inodegc_push(struct xfs_mount *mp);
-void xfs_inodegc_flush(struct xfs_mount *mp);
+int xfs_inodegc_flush(struct xfs_mount *mp);
void xfs_inodegc_stop(struct xfs_mount *mp);
void xfs_inodegc_start(struct xfs_mount *mp);
void xfs_inodegc_cpu_dead(struct xfs_mount *mp, unsigned int cpu);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5808abab786c..9e62cc500140 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1620,16 +1620,7 @@ xfs_inactive_ifree(
*/
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
- /*
- * Just ignore errors at this point. There is nothing we can do except
- * to try to keep going. Make sure it's not a silent error.
- */
- error = xfs_trans_commit(tp);
- if (error)
- xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
- __func__, error);
-
- return 0;
+ return xfs_trans_commit(tp);
}
/*
@@ -1693,12 +1684,12 @@ xfs_inode_needs_inactive(
* now be truncated. Also, we clear all of the read-ahead state
* kept for the inode here since the file is now closed.
*/
-void
+int
xfs_inactive(
xfs_inode_t *ip)
{
struct xfs_mount *mp;
- int error;
+ int error = 0;
int truncate = 0;
/*
@@ -1736,7 +1727,7 @@ xfs_inactive(
* reference to the inode at this point anyways.
*/
if (xfs_can_free_eofblocks(ip, true))
- xfs_free_eofblocks(ip);
+ error = xfs_free_eofblocks(ip);
goto out;
}
@@ -1773,7 +1764,7 @@ xfs_inactive(
/*
* Free the inode.
*/
- xfs_inactive_ifree(ip);
+ error = xfs_inactive_ifree(ip);
out:
/*
@@ -1781,6 +1772,7 @@ out:
* the attached dquots.
*/
xfs_qm_dqdetach(ip);
+ return error;
}
/*
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 69d21e42c10a..7547caf2f2ab 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -470,7 +470,7 @@ enum layout_break_reason {
(xfs_has_grpid((pip)->i_mount) || (VFS_I(pip)->i_mode & S_ISGID))
int xfs_release(struct xfs_inode *ip);
-void xfs_inactive(struct xfs_inode *ip);
+int xfs_inactive(struct xfs_inode *ip);
int xfs_lookup(struct xfs_inode *dp, const struct xfs_name *name,
struct xfs_inode **ipp, struct xfs_name *ci_name);
int xfs_create(struct mnt_idmap *idmap,
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index ca2941ab6cbc..91c847a84e10 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -29,6 +29,153 @@ static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip)
return container_of(lip, struct xfs_inode_log_item, ili_item);
}
+static uint64_t
+xfs_inode_item_sort(
+ struct xfs_log_item *lip)
+{
+ return INODE_ITEM(lip)->ili_inode->i_ino;
+}
+
+/*
+ * Prior to finally logging the inode, we have to ensure that all the
+ * per-modification inode state changes are applied. This includes VFS inode
+ * state updates, format conversions, verifier state synchronisation and
+ * ensuring the inode buffer remains in memory whilst the inode is dirty.
+ *
+ * We have to be careful when we grab the inode cluster buffer due to lock
+ * ordering constraints. The unlinked inode modifications (xfs_iunlink_item)
+ * require AGI -> inode cluster buffer lock order. The inode cluster buffer is
+ * not locked until ->precommit, so it happens after everything else has been
+ * modified.
+ *
+ * Further, we have AGI -> AGF lock ordering, and with O_TMPFILE handling we
+ * have AGI -> AGF -> iunlink item -> inode cluster buffer lock order. Hence we
+ * cannot safely lock the inode cluster buffer in xfs_trans_log_inode() because
+ * it can be called on a inode (e.g. via bumplink/droplink) before we take the
+ * AGF lock modifying directory blocks.
+ *
+ * Rather than force a complete rework of all the transactions to call
+ * xfs_trans_log_inode() once and once only at the end of every transaction, we
+ * move the pinning of the inode cluster buffer to a ->precommit operation. This
+ * matches how the xfs_iunlink_item locks the inode cluster buffer, and it
+ * ensures that the inode cluster buffer locking is always done last in a
+ * transaction. i.e. we ensure the lock order is always AGI -> AGF -> inode
+ * cluster buffer.
+ *
+ * If we return the inode number as the precommit sort key then we'll also
+ * guarantee that the order all inode cluster buffer locking is the same all the
+ * inodes and unlink items in the transaction.
+ */
+static int
+xfs_inode_item_precommit(
+ struct xfs_trans *tp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_inode_log_item *iip = INODE_ITEM(lip);
+ struct xfs_inode *ip = iip->ili_inode;
+ struct inode *inode = VFS_I(ip);
+ unsigned int flags = iip->ili_dirty_flags;
+
+ /*
+ * Don't bother with i_lock for the I_DIRTY_TIME check here, as races
+ * don't matter - we either will need an extra transaction in 24 hours
+ * to log the timestamps, or will clear already cleared fields in the
+ * worst case.
+ */
+ if (inode->i_state & I_DIRTY_TIME) {
+ spin_lock(&inode->i_lock);
+ inode->i_state &= ~I_DIRTY_TIME;
+ spin_unlock(&inode->i_lock);
+ }
+
+ /*
+ * If we're updating the inode core or the timestamps and it's possible
+ * to upgrade this inode to bigtime format, do so now.
+ */
+ if ((flags & (XFS_ILOG_CORE | XFS_ILOG_TIMESTAMP)) &&
+ xfs_has_bigtime(ip->i_mount) &&
+ !xfs_inode_has_bigtime(ip)) {
+ ip->i_diflags2 |= XFS_DIFLAG2_BIGTIME;
+ flags |= XFS_ILOG_CORE;
+ }
+
+ /*
+ * Inode verifiers do not check that the extent size hint is an integer
+ * multiple of the rt extent size on a directory with both rtinherit
+ * and extszinherit flags set. If we're logging a directory that is
+ * misconfigured in this way, clear the hint.
+ */
+ if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
+ (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
+ (ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) {
+ ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
+ XFS_DIFLAG_EXTSZINHERIT);
+ ip->i_extsize = 0;
+ flags |= XFS_ILOG_CORE;
+ }
+
+ /*
+ * Record the specific change for fdatasync optimisation. This allows
+ * fdatasync to skip log forces for inodes that are only timestamp
+ * dirty. Once we've processed the XFS_ILOG_IVERSION flag, convert it
+ * to XFS_ILOG_CORE so that the actual on-disk dirty tracking
+ * (ili_fields) correctly tracks that the version has changed.
+ */
+ spin_lock(&iip->ili_lock);
+ iip->ili_fsync_fields |= (flags & ~XFS_ILOG_IVERSION);
+ if (flags & XFS_ILOG_IVERSION)
+ flags = ((flags & ~XFS_ILOG_IVERSION) | XFS_ILOG_CORE);
+
+ if (!iip->ili_item.li_buf) {
+ struct xfs_buf *bp;
+ int error;
+
+ /*
+ * We hold the ILOCK here, so this inode is not going to be
+ * flushed while we are here. Further, because there is no
+ * buffer attached to the item, we know that there is no IO in
+ * progress, so nothing will clear the ili_fields while we read
+ * in the buffer. Hence we can safely drop the spin lock and
+ * read the buffer knowing that the state will not change from
+ * here.
+ */
+ spin_unlock(&iip->ili_lock);
+ error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &bp);
+ if (error)
+ return error;
+
+ /*
+ * We need an explicit buffer reference for the log item but
+ * don't want the buffer to remain attached to the transaction.
+ * Hold the buffer but release the transaction reference once
+ * we've attached the inode log item to the buffer log item
+ * list.
+ */
+ xfs_buf_hold(bp);
+ spin_lock(&iip->ili_lock);
+ iip->ili_item.li_buf = bp;
+ bp->b_flags |= _XBF_INODES;
+ list_add_tail(&iip->ili_item.li_bio_list, &bp->b_li_list);
+ xfs_trans_brelse(tp, bp);
+ }
+
+ /*
+ * Always OR in the bits from the ili_last_fields field. This is to
+ * coordinate with the xfs_iflush() and xfs_buf_inode_iodone() routines
+ * in the eventual clearing of the ili_fields bits. See the big comment
+ * in xfs_iflush() for an explanation of this coordination mechanism.
+ */
+ iip->ili_fields |= (flags | iip->ili_last_fields);
+ spin_unlock(&iip->ili_lock);
+
+ /*
+ * We are done with the log item transaction dirty state, so clear it so
+ * that it doesn't pollute future transactions.
+ */
+ iip->ili_dirty_flags = 0;
+ return 0;
+}
+
/*
* The logged size of an inode fork is always the current size of the inode
* fork. This means that when an inode fork is relogged, the size of the logged
@@ -662,6 +809,8 @@ xfs_inode_item_committing(
}
static const struct xfs_item_ops xfs_inode_item_ops = {
+ .iop_sort = xfs_inode_item_sort,
+ .iop_precommit = xfs_inode_item_precommit,
.iop_size = xfs_inode_item_size,
.iop_format = xfs_inode_item_format,
.iop_pin = xfs_inode_item_pin,
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index bbd836a44ff0..377e06007804 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -17,6 +17,7 @@ struct xfs_inode_log_item {
struct xfs_log_item ili_item; /* common portion */
struct xfs_inode *ili_inode; /* inode ptr */
unsigned short ili_lock_flags; /* inode lock flags */
+ unsigned int ili_dirty_flags; /* dirty in current tx */
/*
* The ili_lock protects the interactions between the dirty state and
* the flush state of the inode log item. This allows us to do atomic
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 322eb2ee6c55..82c81d20459d 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2711,7 +2711,9 @@ xlog_recover_iunlink_bucket(
* just to flush the inodegc queue and wait for it to
* complete.
*/
- xfs_inodegc_flush(mp);
+ error = xfs_inodegc_flush(mp);
+ if (error)
+ break;
}
prev_agino = agino;
@@ -2719,10 +2721,15 @@ xlog_recover_iunlink_bucket(
}
if (prev_ip) {
+ int error2;
+
ip->i_prev_unlinked = prev_agino;
xfs_irele(prev_ip);
+
+ error2 = xfs_inodegc_flush(mp);
+ if (error2 && !error)
+ return error2;
}
- xfs_inodegc_flush(mp);
return error;
}
@@ -2789,7 +2796,6 @@ xlog_recover_iunlink_ag(
* bucket and remaining inodes on it unreferenced and
* unfreeable.
*/
- xfs_inodegc_flush(pag->pag_mount);
xlog_recover_clear_agi_bucket(pag, bucket);
}
}
@@ -2806,13 +2812,6 @@ xlog_recover_process_iunlinks(
for_each_perag(log->l_mp, agno, pag)
xlog_recover_iunlink_ag(pag);
-
- /*
- * Flush the pending unlinked inodes to ensure that the inactivations
- * are fully completed on disk and the incore inodes can be reclaimed
- * before we signal that recovery is complete.
- */
- xfs_inodegc_flush(log->l_mp);
}
STATIC void
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 429a5e12c103..e2866e7fa60c 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -62,6 +62,7 @@ struct xfs_error_cfg {
struct xfs_inodegc {
struct llist_head list;
struct delayed_work work;
+ int error;
/* approximate count of inodes in the list */
unsigned int items;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index f5dc46ce9803..abcc559f3c64 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -616,8 +616,10 @@ xfs_reflink_cancel_cow_blocks(
xfs_refcount_free_cow_extent(*tpp, del.br_startblock,
del.br_blockcount);
- xfs_free_extent_later(*tpp, del.br_startblock,
+ error = xfs_free_extent_later(*tpp, del.br_startblock,
del.br_blockcount, NULL);
+ if (error)
+ break;
/* Roll the transaction */
error = xfs_defer_finish(tpp);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 67ad1c937637..d910b141d52e 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1112,6 +1112,7 @@ xfs_inodegc_init_percpu(
#endif
init_llist_head(&gc->list);
gc->items = 0;
+ gc->error = 0;
INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker);
}
return 0;
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 8afc0c080861..8c0bfc9a33b1 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -290,7 +290,9 @@ retry:
* Do not perform a synchronous scan because callers can hold
* other locks.
*/
- xfs_blockgc_flush_all(mp);
+ error = xfs_blockgc_flush_all(mp);
+ if (error)
+ return error;
want_retry = false;
goto retry;
}
@@ -970,6 +972,11 @@ __xfs_trans_commit(
error = xfs_defer_finish_noroll(&tp);
if (error)
goto out_unreserve;
+
+ /* Run precommits from final tx in defer chain. */
+ error = xfs_trans_run_precommits(tp);
+ if (error)
+ goto out_unreserve;
}
/*
diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c
index 65d4c4fe6364..1451e7b48669 100644
--- a/fs/zonefs/file.c
+++ b/fs/zonefs/file.c
@@ -181,7 +181,6 @@ const struct address_space_operations zonefs_file_aops = {
.migrate_folio = filemap_migrate_folio,
.is_partially_uptodate = iomap_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
- .direct_IO = noop_direct_IO,
.swap_activate = zonefs_swap_activate,
};
@@ -342,6 +341,77 @@ static loff_t zonefs_file_llseek(struct file *file, loff_t offset, int whence)
return generic_file_llseek_size(file, offset, whence, isize, isize);
}
+struct zonefs_zone_append_bio {
+ /* The target inode of the BIO */
+ struct inode *inode;
+
+ /* For sync writes, the target append write offset */
+ u64 append_offset;
+
+ /*
+ * This member must come last, bio_alloc_bioset will allocate enough
+ * bytes for entire zonefs_bio but relies on bio being last.
+ */
+ struct bio bio;
+};
+
+static inline struct zonefs_zone_append_bio *
+zonefs_zone_append_bio(struct bio *bio)
+{
+ return container_of(bio, struct zonefs_zone_append_bio, bio);
+}
+
+static void zonefs_file_zone_append_dio_bio_end_io(struct bio *bio)
+{
+ struct zonefs_zone_append_bio *za_bio = zonefs_zone_append_bio(bio);
+ struct zonefs_zone *z = zonefs_inode_zone(za_bio->inode);
+ sector_t za_sector;
+
+ if (bio->bi_status != BLK_STS_OK)
+ goto bio_end;
+
+ /*
+ * If the file zone was written underneath the file system, the zone
+ * append operation can still succedd (if the zone is not full) but
+ * the write append location will not be where we expect it to be.
+ * Check that we wrote where we intended to, that is, at z->z_wpoffset.
+ */
+ za_sector = z->z_sector + (za_bio->append_offset >> SECTOR_SHIFT);
+ if (bio->bi_iter.bi_sector != za_sector) {
+ zonefs_warn(za_bio->inode->i_sb,
+ "Invalid write sector %llu for zone at %llu\n",
+ bio->bi_iter.bi_sector, z->z_sector);
+ bio->bi_status = BLK_STS_IOERR;
+ }
+
+bio_end:
+ iomap_dio_bio_end_io(bio);
+}
+
+static void zonefs_file_zone_append_dio_submit_io(const struct iomap_iter *iter,
+ struct bio *bio,
+ loff_t file_offset)
+{
+ struct zonefs_zone_append_bio *za_bio = zonefs_zone_append_bio(bio);
+ struct inode *inode = iter->inode;
+ struct zonefs_zone *z = zonefs_inode_zone(inode);
+
+ /*
+ * Issue a zone append BIO to process sync dio writes. The append
+ * file offset is saved to check the zone append write location
+ * on completion of the BIO.
+ */
+ za_bio->inode = inode;
+ za_bio->append_offset = file_offset;
+
+ bio->bi_opf &= ~REQ_OP_WRITE;
+ bio->bi_opf |= REQ_OP_ZONE_APPEND;
+ bio->bi_iter.bi_sector = z->z_sector;
+ bio->bi_end_io = zonefs_file_zone_append_dio_bio_end_io;
+
+ submit_bio(bio);
+}
+
static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
int error, unsigned int flags)
{
@@ -372,93 +442,17 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
return 0;
}
-static const struct iomap_dio_ops zonefs_write_dio_ops = {
- .end_io = zonefs_file_write_dio_end_io,
-};
+static struct bio_set zonefs_zone_append_bio_set;
-static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
-{
- struct inode *inode = file_inode(iocb->ki_filp);
- struct zonefs_zone *z = zonefs_inode_zone(inode);
- struct block_device *bdev = inode->i_sb->s_bdev;
- unsigned int max = bdev_max_zone_append_sectors(bdev);
- pgoff_t start, end;
- struct bio *bio;
- ssize_t size = 0;
- int nr_pages;
- ssize_t ret;
-
- max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize);
- iov_iter_truncate(from, max);
-
- /*
- * If the inode block size (zone write granularity) is smaller than the
- * page size, we may be appending data belonging to the last page of the
- * inode straddling inode->i_size, with that page already cached due to
- * a buffered read or readahead. So make sure to invalidate that page.
- * This will always be a no-op for the case where the block size is
- * equal to the page size.
- */
- start = iocb->ki_pos >> PAGE_SHIFT;
- end = (iocb->ki_pos + iov_iter_count(from) - 1) >> PAGE_SHIFT;
- if (invalidate_inode_pages2_range(inode->i_mapping, start, end))
- return -EBUSY;
-
- nr_pages = iov_iter_npages(from, BIO_MAX_VECS);
- if (!nr_pages)
- return 0;
-
- bio = bio_alloc(bdev, nr_pages,
- REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE, GFP_NOFS);
- bio->bi_iter.bi_sector = z->z_sector;
- bio->bi_ioprio = iocb->ki_ioprio;
- if (iocb_is_dsync(iocb))
- bio->bi_opf |= REQ_FUA;
-
- ret = bio_iov_iter_get_pages(bio, from);
- if (unlikely(ret))
- goto out_release;
-
- size = bio->bi_iter.bi_size;
- task_io_account_write(size);
-
- if (iocb->ki_flags & IOCB_HIPRI)
- bio_set_polled(bio, iocb);
-
- ret = submit_bio_wait(bio);
-
- /*
- * If the file zone was written underneath the file system, the zone
- * write pointer may not be where we expect it to be, but the zone
- * append write can still succeed. So check manually that we wrote where
- * we intended to, that is, at zi->i_wpoffset.
- */
- if (!ret) {
- sector_t wpsector =
- z->z_sector + (z->z_wpoffset >> SECTOR_SHIFT);
-
- if (bio->bi_iter.bi_sector != wpsector) {
- zonefs_warn(inode->i_sb,
- "Corrupted write pointer %llu for zone at %llu\n",
- bio->bi_iter.bi_sector, z->z_sector);
- ret = -EIO;
- }
- }
-
- zonefs_file_write_dio_end_io(iocb, size, ret, 0);
- trace_zonefs_file_dio_append(inode, size, ret);
-
-out_release:
- bio_release_pages(bio, false);
- bio_put(bio);
-
- if (ret >= 0) {
- iocb->ki_pos += size;
- return size;
- }
+static const struct iomap_dio_ops zonefs_zone_append_dio_ops = {
+ .submit_io = zonefs_file_zone_append_dio_submit_io,
+ .end_io = zonefs_file_write_dio_end_io,
+ .bio_set = &zonefs_zone_append_bio_set,
+};
- return ret;
-}
+static const struct iomap_dio_ops zonefs_write_dio_ops = {
+ .end_io = zonefs_file_write_dio_end_io,
+};
/*
* Do not exceed the LFS limits nor the file zone size. If pos is under the
@@ -539,6 +533,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
struct zonefs_inode_info *zi = ZONEFS_I(inode);
struct zonefs_zone *z = zonefs_inode_zone(inode);
struct super_block *sb = inode->i_sb;
+ const struct iomap_dio_ops *dio_ops;
bool sync = is_sync_kiocb(iocb);
bool append = false;
ssize_t ret, count;
@@ -582,20 +577,26 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
}
if (append) {
- ret = zonefs_file_dio_append(iocb, from);
+ unsigned int max = bdev_max_zone_append_sectors(sb->s_bdev);
+
+ max = ALIGN_DOWN(max << SECTOR_SHIFT, sb->s_blocksize);
+ iov_iter_truncate(from, max);
+
+ dio_ops = &zonefs_zone_append_dio_ops;
} else {
- /*
- * iomap_dio_rw() may return ENOTBLK if there was an issue with
- * page invalidation. Overwrite that error code with EBUSY to
- * be consistent with zonefs_file_dio_append() return value for
- * similar issues.
- */
- ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops,
- &zonefs_write_dio_ops, 0, NULL, 0);
- if (ret == -ENOTBLK)
- ret = -EBUSY;
+ dio_ops = &zonefs_write_dio_ops;
}
+ /*
+ * iomap_dio_rw() may return ENOTBLK if there was an issue with
+ * page invalidation. Overwrite that error code with EBUSY so that
+ * the user can make sense of the error.
+ */
+ ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops,
+ dio_ops, 0, NULL, 0);
+ if (ret == -ENOTBLK)
+ ret = -EBUSY;
+
if (zonefs_zone_is_seq(z) &&
(ret > 0 || ret == -EIOCBQUEUED)) {
if (ret > 0)
@@ -851,6 +852,7 @@ static int zonefs_file_open(struct inode *inode, struct file *file)
{
int ret;
+ file->f_mode |= FMODE_CAN_ODIRECT;
ret = generic_file_open(inode, file);
if (ret)
return ret;
@@ -938,3 +940,15 @@ const struct file_operations zonefs_file_operations = {
.splice_write = iter_file_splice_write,
.iopoll = iocb_bio_iopoll,
};
+
+int zonefs_file_bioset_init(void)
+{
+ return bioset_init(&zonefs_zone_append_bio_set, BIO_POOL_SIZE,
+ offsetof(struct zonefs_zone_append_bio, bio),
+ BIOSET_NEED_BVECS);
+}
+
+void zonefs_file_bioset_exit(void)
+{
+ bioset_exit(&zonefs_zone_append_bio_set);
+}
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 9350221abfc5..bbe44a26a8e5 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -1412,10 +1412,14 @@ static int __init zonefs_init(void)
BUILD_BUG_ON(sizeof(struct zonefs_super) != ZONEFS_SUPER_SIZE);
- ret = zonefs_init_inodecache();
+ ret = zonefs_file_bioset_init();
if (ret)
return ret;
+ ret = zonefs_init_inodecache();
+ if (ret)
+ goto destroy_bioset;
+
ret = zonefs_sysfs_init();
if (ret)
goto destroy_inodecache;
@@ -1430,6 +1434,8 @@ sysfs_exit:
zonefs_sysfs_exit();
destroy_inodecache:
zonefs_destroy_inodecache();
+destroy_bioset:
+ zonefs_file_bioset_exit();
return ret;
}
@@ -1439,6 +1445,7 @@ static void __exit zonefs_exit(void)
unregister_filesystem(&zonefs_type);
zonefs_sysfs_exit();
zonefs_destroy_inodecache();
+ zonefs_file_bioset_exit();
}
MODULE_AUTHOR("Damien Le Moal");
diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h
index 8175652241b5..f663b8ebc2cb 100644
--- a/fs/zonefs/zonefs.h
+++ b/fs/zonefs/zonefs.h
@@ -279,6 +279,8 @@ extern const struct file_operations zonefs_dir_operations;
extern const struct address_space_operations zonefs_file_aops;
extern const struct file_operations zonefs_file_operations;
int zonefs_file_truncate(struct inode *inode, loff_t isize);
+int zonefs_file_bioset_init(void);
+void zonefs_file_bioset_exit(void);
/* In sysfs.c */
int zonefs_sysfs_register(struct super_block *sb);
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index e6098a08c914..9ffdc0425bc2 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -761,6 +761,7 @@ ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
acpi_event_status
*event_status))
ACPI_HW_DEPENDENT_RETURN_UINT32(u32 acpi_dispatch_gpe(acpi_handle gpe_device, u32 gpe_number))
+ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_hw_disable_all_gpes(void))
ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_disable_all_gpes(void))
ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_runtime_gpes(void))
ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_wakeup_gpes(void))
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index d1f57e4868ed..da9e5629ea43 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -839,6 +839,9 @@
#ifdef CONFIG_UNWINDER_ORC
#define ORC_UNWIND_TABLE \
+ .orc_header : AT(ADDR(.orc_header) - LOAD_OFFSET) { \
+ BOUNDED_SECTION_BY(.orc_header, _orc_header) \
+ } \
. = ALIGN(4); \
.orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) { \
BOUNDED_SECTION_BY(.orc_unwind_ip, _orc_unwind_ip) \
@@ -891,9 +894,16 @@
/*
* Discard .note.GNU-stack, which is emitted as PROGBITS by the compiler.
* Otherwise, the type of .notes section would become PROGBITS instead of NOTES.
+ *
+ * Also, discard .note.gnu.property, otherwise it forces the notes section to
+ * be 8-byte aligned which causes alignment mismatches with the kernel's custom
+ * 4-byte aligned notes.
*/
#define NOTES \
- /DISCARD/ : { *(.note.GNU-stack) } \
+ /DISCARD/ : { \
+ *(.note.GNU-stack) \
+ *(.note.gnu.property) \
+ } \
.notes : AT(ADDR(.notes) - LOAD_OFFSET) { \
BOUNDED_SECTION_BY(.note.*, _notes) \
} NOTES_HEADERS \
diff --git a/include/drm/drm_managed.h b/include/drm/drm_managed.h
index 359883942612..ad08f834af40 100644
--- a/include/drm/drm_managed.h
+++ b/include/drm/drm_managed.h
@@ -105,6 +105,22 @@ char *drmm_kstrdup(struct drm_device *dev, const char *s, gfp_t gfp);
void drmm_kfree(struct drm_device *dev, void *data);
-int drmm_mutex_init(struct drm_device *dev, struct mutex *lock);
+void __drmm_mutex_release(struct drm_device *dev, void *res);
+
+/**
+ * drmm_mutex_init - &drm_device-managed mutex_init()
+ * @dev: DRM device
+ * @lock: lock to be initialized
+ *
+ * Returns:
+ * 0 on success, or a negative errno code otherwise.
+ *
+ * This is a &drm_device-managed version of mutex_init(). The initialized
+ * lock is automatically destroyed on the final drm_dev_put().
+ */
+#define drmm_mutex_init(dev, lock) ({ \
+ mutex_init(lock); \
+ drmm_add_action_or_reset(dev, __drmm_mutex_release, lock); \
+}) \
#endif
diff --git a/include/dt-bindings/power/qcom-rpmpd.h b/include/dt-bindings/power/qcom-rpmpd.h
index 1bf8e87ecd7e..867b18e041ea 100644
--- a/include/dt-bindings/power/qcom-rpmpd.h
+++ b/include/dt-bindings/power/qcom-rpmpd.h
@@ -90,6 +90,15 @@
#define SM8150_MMCX 9
#define SM8150_MMCX_AO 10
+/* SA8155P is a special case, kept for backwards compatibility */
+#define SA8155P_CX SM8150_CX
+#define SA8155P_CX_AO SM8150_CX_AO
+#define SA8155P_EBI SM8150_EBI
+#define SA8155P_GFX SM8150_GFX
+#define SA8155P_MSS SM8150_MSS
+#define SA8155P_MX SM8150_MX
+#define SA8155P_MX_AO SM8150_MX_AO
+
/* SM8250 Power Domain Indexes */
#define SM8250_CX 0
#define SM8250_CX_AO 1
diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h
index c87aeecaa9b2..583fe3b49a49 100644
--- a/include/linux/arm_ffa.h
+++ b/include/linux/arm_ffa.h
@@ -96,6 +96,7 @@
/* FFA Bus/Device/Driver related */
struct ffa_device {
+ u32 id;
int vm_id;
bool mode_32bit;
uuid_t uuid;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 824e31dd752a..ed44a997f629 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1371,8 +1371,6 @@ enum blk_unique_id {
BLK_UID_NAA = 3,
};
-#define NFL4_UFLG_MASK 0x0000003F
-
struct block_device_operations {
void (*submit_bio)(struct bio *bio);
int (*poll_bio)(struct bio *bio, struct io_comp_batch *iob,
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 947a60b801db..d7779a18b24f 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -12,11 +12,10 @@
* Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code
* to disable branch tracing on a per file basis.
*/
-#if defined(CONFIG_TRACE_BRANCH_PROFILING) \
- && !defined(DISABLE_BRANCH_PROFILING) && !defined(__CHECKER__)
void ftrace_likely_update(struct ftrace_likely_data *f, int val,
int expect, int is_constant);
-
+#if defined(CONFIG_TRACE_BRANCH_PROFILING) \
+ && !defined(DISABLE_BRANCH_PROFILING) && !defined(__CHECKER__)
#define likely_notrace(x) __builtin_expect(!!(x), 1)
#define unlikely_notrace(x) __builtin_expect(!!(x), 0)
diff --git a/include/linux/cper.h b/include/linux/cper.h
index eacb7dd7b3af..c1a7dc325121 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -572,4 +572,10 @@ void cper_print_proc_ia(const char *pfx,
int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg);
int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg);
+struct acpi_hest_generic_status;
+void cper_estatus_print(const char *pfx,
+ const struct acpi_hest_generic_status *estatus);
+int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus);
+int cper_estatus_check(const struct acpi_hest_generic_status *estatus);
+
#endif
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 0f1001dca0e0..3ceb9dfa0993 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -200,6 +200,7 @@ enum cpuhp_state {
/* Online section invoked on the hotplugged CPU from the hotplug thread */
CPUHP_AP_ONLINE_IDLE,
+ CPUHP_AP_HYPERV_ONLINE,
CPUHP_AP_KVM_ONLINE,
CPUHP_AP_SCHED_WAIT_EMPTY,
CPUHP_AP_SMPBOOT_THREADS,
diff --git a/include/linux/device/class.h b/include/linux/device/class.h
index 9deeaeb457bb..abf3d3bfb6fe 100644
--- a/include/linux/device/class.h
+++ b/include/linux/device/class.h
@@ -74,6 +74,7 @@ struct class {
struct class_dev_iter {
struct klist_iter ki;
const struct device_type *type;
+ struct subsys_private *sp;
};
int __must_check class_register(const struct class *class);
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 7aa62c92185f..571d1a6e1b74 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1338,4 +1338,6 @@ bool efi_config_table_is_usable(const efi_guid_t *guid, unsigned long table)
return xen_efi_config_table_is_usable(guid, table);
}
+umode_t efi_attr_is_visible(struct kobject *kobj, struct attribute *attr, int n);
+
#endif /* _LINUX_EFI_H */
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index 36a486505b08..b9d83652c097 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -9,12 +9,12 @@
#ifndef _LINUX_EVENTFD_H
#define _LINUX_EVENTFD_H
-#include <linux/fcntl.h>
#include <linux/wait.h>
#include <linux/err.h>
#include <linux/percpu-defs.h>
#include <linux/percpu.h>
#include <linux/sched.h>
+#include <uapi/linux/eventfd.h>
/*
* CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining
@@ -23,10 +23,6 @@
* from eventfd, in order to leave a free define-space for
* shared O_* flags.
*/
-#define EFD_SEMAPHORE (1 << 0)
-#define EFD_CLOEXEC O_CLOEXEC
-#define EFD_NONBLOCK O_NONBLOCK
-
#define EFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
#define EFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS | EFD_SEMAPHORE)
@@ -40,7 +36,7 @@ struct file *eventfd_fget(int fd);
struct eventfd_ctx *eventfd_ctx_fdget(int fd);
struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n);
-__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask);
+__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, __poll_t mask);
int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
__u64 *cnt);
void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt);
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 1716c01c4e54..efb6e2cf2034 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -391,7 +391,7 @@ struct fw_iso_packet {
u32 tag:2; /* tx: Tag in packet header */
u32 sy:4; /* tx: Sy in packet header */
u32 header_length:8; /* Length of immediate header */
- u32 header[0]; /* tx: Top of 1394 isoch. data_block */
+ u32 header[]; /* tx: Top of 1394 isoch. data_block */
};
#define FW_ISO_CONTEXT_TRANSMIT 0
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8045c7ef4000..ed5b32dc2625 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -164,6 +164,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
/* File supports non-exclusive O_DIRECT writes from multiple threads */
#define FMODE_DIO_PARALLEL_WRITE ((__force fmode_t)0x1000000)
+/* File is embedded in backing_file object */
+#define FMODE_BACKING ((__force fmode_t)0x2000000)
+
/* File was opened by fanotify and shouldn't generate fanotify events */
#define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
@@ -949,29 +952,35 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
index < ra->start + ra->size);
}
+/*
+ * f_{lock,count,pos_lock} members can be highly contended and share
+ * the same cacheline. f_{lock,mode} are very frequently used together
+ * and so share the same cacheline as well. The read-mostly
+ * f_{path,inode,op} are kept on a separate cacheline.
+ */
struct file {
union {
struct llist_node f_llist;
struct rcu_head f_rcuhead;
unsigned int f_iocb_flags;
};
- struct path f_path;
- struct inode *f_inode; /* cached value */
- const struct file_operations *f_op;
/*
* Protects f_ep, f_flags.
* Must not be taken from IRQ context.
*/
spinlock_t f_lock;
- atomic_long_t f_count;
- unsigned int f_flags;
fmode_t f_mode;
+ atomic_long_t f_count;
struct mutex f_pos_lock;
loff_t f_pos;
+ unsigned int f_flags;
struct fown_struct f_owner;
const struct cred *f_cred;
struct file_ra_state f_ra;
+ struct path f_path;
+ struct inode *f_inode; /* cached value */
+ const struct file_operations *f_op;
u64 f_version;
#ifdef CONFIG_SECURITY
@@ -1069,29 +1078,29 @@ extern int send_sigurg(struct fown_struct *fown);
* sb->s_flags. Note that these mirror the equivalent MS_* flags where
* represented in both.
*/
-#define SB_RDONLY 1 /* Mount read-only */
-#define SB_NOSUID 2 /* Ignore suid and sgid bits */
-#define SB_NODEV 4 /* Disallow access to device special files */
-#define SB_NOEXEC 8 /* Disallow program execution */
-#define SB_SYNCHRONOUS 16 /* Writes are synced at once */
-#define SB_MANDLOCK 64 /* Allow mandatory locks on an FS */
-#define SB_DIRSYNC 128 /* Directory modifications are synchronous */
-#define SB_NOATIME 1024 /* Do not update access times. */
-#define SB_NODIRATIME 2048 /* Do not update directory access times */
-#define SB_SILENT 32768
-#define SB_POSIXACL (1<<16) /* VFS does not apply the umask */
-#define SB_INLINECRYPT (1<<17) /* Use blk-crypto for encrypted files */
-#define SB_KERNMOUNT (1<<22) /* this is a kern_mount call */
-#define SB_I_VERSION (1<<23) /* Update inode I_version field */
-#define SB_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
+#define SB_RDONLY BIT(0) /* Mount read-only */
+#define SB_NOSUID BIT(1) /* Ignore suid and sgid bits */
+#define SB_NODEV BIT(2) /* Disallow access to device special files */
+#define SB_NOEXEC BIT(3) /* Disallow program execution */
+#define SB_SYNCHRONOUS BIT(4) /* Writes are synced at once */
+#define SB_MANDLOCK BIT(6) /* Allow mandatory locks on an FS */
+#define SB_DIRSYNC BIT(7) /* Directory modifications are synchronous */
+#define SB_NOATIME BIT(10) /* Do not update access times. */
+#define SB_NODIRATIME BIT(11) /* Do not update directory access times */
+#define SB_SILENT BIT(15)
+#define SB_POSIXACL BIT(16) /* VFS does not apply the umask */
+#define SB_INLINECRYPT BIT(17) /* Use blk-crypto for encrypted files */
+#define SB_KERNMOUNT BIT(22) /* this is a kern_mount call */
+#define SB_I_VERSION BIT(23) /* Update inode I_version field */
+#define SB_LAZYTIME BIT(25) /* Update the on-disk [acm]times lazily */
/* These sb flags are internal to the kernel */
-#define SB_SUBMOUNT (1<<26)
-#define SB_FORCE (1<<27)
-#define SB_NOSEC (1<<28)
-#define SB_BORN (1<<29)
-#define SB_ACTIVE (1<<30)
-#define SB_NOUSER (1<<31)
+#define SB_SUBMOUNT BIT(26)
+#define SB_FORCE BIT(27)
+#define SB_NOSEC BIT(28)
+#define SB_BORN BIT(29)
+#define SB_ACTIVE BIT(30)
+#define SB_NOUSER BIT(31)
/* These flags relate to encoding and casefolding */
#define SB_ENC_STRICT_MODE_FL (1 << 0)
@@ -1234,7 +1243,7 @@ struct super_block {
*/
atomic_long_t s_fsnotify_connectors;
- /* Being remounted read-only */
+ /* Read-only state of the superblock is being changed */
int s_readonly_remount;
/* per-sb errseq_t for reporting writeback errors via syncfs */
@@ -1664,9 +1673,12 @@ static inline int vfs_whiteout(struct mnt_idmap *idmap,
WHITEOUT_DEV);
}
-struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
- const struct path *parentpath,
- umode_t mode, int open_flag, const struct cred *cred);
+struct file *kernel_tmpfile_open(struct mnt_idmap *idmap,
+ const struct path *parentpath,
+ umode_t mode, int open_flag,
+ const struct cred *cred);
+struct file *kernel_file_open(const struct path *path, int flags,
+ struct inode *inode, const struct cred *cred);
int vfs_mkobj(struct dentry *, umode_t,
int (*f)(struct dentry *, umode_t, void *),
@@ -2342,11 +2354,31 @@ static inline struct file *file_open_root_mnt(struct vfsmount *mnt,
return file_open_root(&(struct path){.mnt = mnt, .dentry = mnt->mnt_root},
name, flags, mode);
}
-extern struct file * dentry_open(const struct path *, int, const struct cred *);
-extern struct file *dentry_create(const struct path *path, int flags,
- umode_t mode, const struct cred *cred);
-extern struct file * open_with_fake_path(const struct path *, int,
- struct inode*, const struct cred *);
+struct file *dentry_open(const struct path *path, int flags,
+ const struct cred *creds);
+struct file *dentry_create(const struct path *path, int flags, umode_t mode,
+ const struct cred *cred);
+struct file *backing_file_open(const struct path *path, int flags,
+ const struct path *real_path,
+ const struct cred *cred);
+struct path *backing_file_real_path(struct file *f);
+
+/*
+ * file_real_path - get the path corresponding to f_inode
+ *
+ * When opening a backing file for a stackable filesystem (e.g.,
+ * overlayfs) f_path may be on the stackable filesystem and f_inode on
+ * the underlying filesystem. When the path associated with f_inode is
+ * needed, this helper should be used instead of accessing f_path
+ * directly.
+*/
+static inline const struct path *file_real_path(struct file *f)
+{
+ if (unlikely(f->f_mode & FMODE_BACKING))
+ return backing_file_real_path(f);
+ return &f->f_path;
+}
+
static inline struct file *file_clone_open(struct file *file)
{
return dentry_open(&file->f_path, file->f_flags, file->f_cred);
@@ -2662,7 +2694,7 @@ extern void evict_inodes(struct super_block *sb);
void dump_mapping(const struct address_space *);
/*
- * Userspace may rely on the the inode number being non-zero. For example, glibc
+ * Userspace may rely on the inode number being non-zero. For example, glibc
* simply ignores files with zero i_ino in unlink() and other places.
*
* As an additional complication, if userspace was compiled with
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index bb8467cd11ae..ed48e4f1e755 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -91,11 +91,13 @@ static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask)
static inline int fsnotify_file(struct file *file, __u32 mask)
{
- const struct path *path = &file->f_path;
+ const struct path *path;
if (file->f_mode & FMODE_NONOTIFY)
return 0;
+ /* Overlayfs internal files have fake f_path */
+ path = file_real_path(file);
return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH);
}
diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
index e76605d5b36e..1eb7eae580be 100644
--- a/include/linux/fsverity.h
+++ b/include/linux/fsverity.h
@@ -143,8 +143,8 @@ int fsverity_ioctl_enable(struct file *filp, const void __user *arg);
int fsverity_ioctl_measure(struct file *filp, void __user *arg);
int fsverity_get_digest(struct inode *inode,
- u8 digest[FS_VERITY_MAX_DIGEST_SIZE],
- enum hash_algo *alg);
+ u8 raw_digest[FS_VERITY_MAX_DIGEST_SIZE],
+ u8 *alg, enum hash_algo *halg);
/* open.c */
@@ -197,10 +197,14 @@ static inline int fsverity_ioctl_measure(struct file *filp, void __user *arg)
}
static inline int fsverity_get_digest(struct inode *inode,
- u8 digest[FS_VERITY_MAX_DIGEST_SIZE],
- enum hash_algo *alg)
+ u8 raw_digest[FS_VERITY_MAX_DIGEST_SIZE],
+ u8 *alg, enum hash_algo *halg)
{
- return -EOPNOTSUPP;
+ /*
+ * fsverity is not enabled in the kernel configuration, so always report
+ * that the file doesn't have fsverity enabled (digest size 0).
+ */
+ return 0;
}
/* open.c */
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 5c6db5533be6..67b8774eed8f 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -252,6 +252,14 @@ struct gpio_irq_chip {
bool initialized;
/**
+ * @domain_is_allocated_externally:
+ *
+ * True it the irq_domain was allocated outside of gpiolib, in which
+ * case gpiolib won't free the irq_domain itself.
+ */
+ bool domain_is_allocated_externally;
+
+ /**
* @init_hw: optional routine to initialize hardware before
* an IRQ chip will be added. This is quite useful when
* a particular driver wants to clear IRQ related registers
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 4de1dbcd3ef6..68da30625a6c 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -507,7 +507,7 @@ static inline void folio_zero_range(struct folio *folio,
zero_user_segments(&folio->page, start, start + length, 0, 0);
}
-static inline void put_and_unmap_page(struct page *page, void *addr)
+static inline void unmap_and_put_page(struct page *page, void *addr)
{
kunmap_local(addr);
put_page(page);
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index fc985e5c739d..8de6b6e67829 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -208,6 +208,7 @@ struct team {
bool queue_override_enabled;
struct list_head *qom_lists; /* array of queue override mapping lists */
bool port_mtu_change_allowed;
+ bool notifier_ctx;
struct {
unsigned int count;
unsigned int interval; /* in ms */
diff --git a/include/linux/iio/iio-gts-helper.h b/include/linux/iio/iio-gts-helper.h
index dd64e544a3da..9cb6c80dea71 100644
--- a/include/linux/iio/iio-gts-helper.h
+++ b/include/linux/iio/iio-gts-helper.h
@@ -135,7 +135,7 @@ static inline int iio_gts_find_int_time_by_sel(struct iio_gts *gts, int sel)
/**
* iio_gts_find_sel_by_int_time - find selector matching integration time
* @gts: Gain time scale descriptor
- * @gain: HW-gain for which matching selector is searched for
+ * @time: Integration time for which matching selector is searched for
*
* Return: a selector matching given integration time or -EINVAL if
* selector was not found.
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 7fe31b2cd02f..bb9c666bd584 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -46,13 +46,23 @@ int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
struct iov_iter *iter, void *ioucmd);
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2,
unsigned issue_flags);
-void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
- void (*task_work_cb)(struct io_uring_cmd *, unsigned));
struct sock *io_uring_get_socket(struct file *file);
void __io_uring_cancel(bool cancel_all);
void __io_uring_free(struct task_struct *tsk);
void io_uring_unreg_ringfd(void);
const char *io_uring_get_opcode(u8 opcode);
+void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
+ void (*task_work_cb)(struct io_uring_cmd *, unsigned),
+ unsigned flags);
+/* users should follow semantics of IOU_F_TWQ_LAZY_WAKE */
+void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
+ void (*task_work_cb)(struct io_uring_cmd *, unsigned));
+
+static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
+ void (*task_work_cb)(struct io_uring_cmd *, unsigned))
+{
+ __io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0);
+}
static inline void io_uring_files_cancel(void)
{
@@ -85,6 +95,10 @@ static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
{
}
+static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
+ void (*task_work_cb)(struct io_uring_cmd *, unsigned))
+{
+}
static inline struct sock *io_uring_get_socket(struct file *file)
{
return NULL;
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 1b2a20a42413..f04ce513fadb 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -211,6 +211,16 @@ struct io_ring_ctx {
unsigned int compat: 1;
enum task_work_notify_mode notify_method;
+
+ /*
+ * If IORING_SETUP_NO_MMAP is used, then the below holds
+ * the gup'ed pages for the two rings, and the sqes.
+ */
+ unsigned short n_ring_pages;
+ unsigned short n_sqe_pages;
+ struct page **ring_pages;
+ struct page **sqe_pages;
+
struct io_rings *rings;
struct task_struct *submitter_task;
struct percpu_ref refs;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 311cd93377c7..dd5797fb6305 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -836,7 +836,7 @@ struct ata_port {
struct mutex scsi_scan_mutex;
struct delayed_work hotplug_task;
- struct work_struct scsi_rescan_task;
+ struct delayed_work scsi_rescan_task;
unsigned int hsm_task_state;
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index b32256e9e944..74bd269a80a2 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -344,6 +344,16 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
#define lockdep_repin_lock(l,c) lock_repin_lock(&(l)->dep_map, (c))
#define lockdep_unpin_lock(l,c) lock_unpin_lock(&(l)->dep_map, (c))
+/*
+ * Must use lock_map_aquire_try() with override maps to avoid
+ * lockdep thinking they participate in the block chain.
+ */
+#define DEFINE_WAIT_OVERRIDE_MAP(_name, _wait_type) \
+ struct lockdep_map _name = { \
+ .name = #_name "-wait-type-override", \
+ .wait_type_inner = _wait_type, \
+ .lock_type = LD_LOCK_WAIT_OVERRIDE, }
+
#else /* !CONFIG_LOCKDEP */
static inline void lockdep_init_task(struct task_struct *task)
@@ -432,6 +442,9 @@ extern int lockdep_is_held(const void *);
#define lockdep_repin_lock(l, c) do { (void)(l); (void)(c); } while (0)
#define lockdep_unpin_lock(l, c) do { (void)(l); (void)(c); } while (0)
+#define DEFINE_WAIT_OVERRIDE_MAP(_name, _wait_type) \
+ struct lockdep_map __maybe_unused _name = {}
+
#endif /* !LOCKDEP */
enum xhlock_context_t {
@@ -556,6 +569,7 @@ do { \
#define rwsem_release(l, i) lock_release(l, i)
#define lock_map_acquire(l) lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_)
+#define lock_map_acquire_try(l) lock_acquire_exclusive(l, 0, 1, NULL, _THIS_IP_)
#define lock_map_acquire_read(l) lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_)
#define lock_map_acquire_tryread(l) lock_acquire_shared_recursive(l, 0, 1, NULL, _THIS_IP_)
#define lock_map_release(l) lock_release(l, _THIS_IP_)
diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h
index d22430840b53..59f4fb1626ea 100644
--- a/include/linux/lockdep_types.h
+++ b/include/linux/lockdep_types.h
@@ -33,6 +33,7 @@ enum lockdep_wait_type {
enum lockdep_lock_type {
LD_LOCK_NORMAL = 0, /* normal, catch all */
LD_LOCK_PERCPU, /* percpu */
+ LD_LOCK_WAIT_OVERRIDE, /* annotation */
LD_LOCK_MAX,
};
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index a4c4f737f9c1..4b9626cd83e4 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1093,6 +1093,7 @@ void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev);
int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
int npsvs, u32 *sig_index);
int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num);
+__be32 mlx5_core_get_terminate_scatter_list_mkey(struct mlx5_core_dev *dev);
void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common);
int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
struct mlx5_odp_caps *odp_caps);
@@ -1237,6 +1238,18 @@ static inline u16 mlx5_core_max_vfs(const struct mlx5_core_dev *dev)
return dev->priv.sriov.max_vfs;
}
+static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
+{
+ /* LACP owner conditions:
+ * 1) Function is physical.
+ * 2) LAG is supported by FW.
+ * 3) LAG is managed by driver (currently the only option).
+ */
+ return MLX5_CAP_GEN(dev, vport_group_manager) &&
+ (MLX5_CAP_GEN(dev, num_lag_ports) > 1) &&
+ MLX5_CAP_GEN(dev, lag_master);
+}
+
static inline int mlx5_get_gid_table_len(u16 param)
{
if (param > 4) {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index dc5e2cb302a5..b89778d0d326 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1705,7 +1705,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 rc[0x1];
u8 uar_4k[0x1];
- u8 reserved_at_241[0x9];
+ u8 reserved_at_241[0x7];
+ u8 fl_rc_qp_when_roce_disabled[0x1];
+ u8 regexp_params[0x1];
u8 uar_sz[0x6];
u8 port_selection_cap[0x1];
u8 reserved_at_248[0x1];
diff --git a/include/linux/msi.h b/include/linux/msi.h
index cdb14a1ef268..a50ea79522f8 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -383,6 +383,13 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc);
void arch_teardown_msi_irq(unsigned int irq);
int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
void arch_teardown_msi_irqs(struct pci_dev *dev);
+#endif /* CONFIG_PCI_MSI_ARCH_FALLBACKS */
+
+/*
+ * Xen uses non-default msi_domain_ops and hence needs a way to populate sysfs
+ * entries of MSI IRQs.
+ */
+#if defined(CONFIG_PCI_XEN) || defined(CONFIG_PCI_MSI_ARCH_FALLBACKS)
#ifdef CONFIG_SYSFS
int msi_device_populate_sysfs(struct device *dev);
void msi_device_destroy_sysfs(struct device *dev);
@@ -390,7 +397,7 @@ void msi_device_destroy_sysfs(struct device *dev);
static inline int msi_device_populate_sysfs(struct device *dev) { return 0; }
static inline void msi_device_destroy_sysfs(struct device *dev) { }
#endif /* !CONFIG_SYSFS */
-#endif /* CONFIG_PCI_MSI_ARCH_FALLBACKS */
+#endif /* CONFIG_PCI_XEN || CONFIG_PCI_MSI_ARCH_FALLBACKS */
/*
* The restore hook is still available even for fully irq domain based
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 08fbd4622ccf..c2f0c6002a84 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -620,7 +620,7 @@ struct netdev_queue {
netdevice_tracker dev_tracker;
struct Qdisc __rcu *qdisc;
- struct Qdisc *qdisc_sleeping;
+ struct Qdisc __rcu *qdisc_sleeping;
#ifdef CONFIG_SYSFS
struct kobject kobj;
#endif
@@ -768,8 +768,11 @@ static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
/* We only give a hint, preemption can change CPU under us */
val |= raw_smp_processor_id();
- if (table->ents[index] != val)
- table->ents[index] = val;
+ /* The following WRITE_ONCE() is paired with the READ_ONCE()
+ * here, and another one in get_rps_cpu().
+ */
+ if (READ_ONCE(table->ents[index]) != val)
+ WRITE_ONCE(table->ents[index], val);
}
}
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 2aba75145144..86544707236a 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -106,12 +106,22 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
#define RAW_NOTIFIER_INIT(name) { \
.head = NULL }
+#ifdef CONFIG_TREE_SRCU
#define SRCU_NOTIFIER_INIT(name, pcpu) \
{ \
.mutex = __MUTEX_INITIALIZER(name.mutex), \
.head = NULL, \
+ .srcuu = __SRCU_USAGE_INIT(name.srcuu), \
.srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
}
+#else
+#define SRCU_NOTIFIER_INIT(name, pcpu) \
+ { \
+ .mutex = __MUTEX_INITIALIZER(name.mutex), \
+ .head = NULL, \
+ .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
+ }
+#endif
#define ATOMIC_NOTIFIER_HEAD(name) \
struct atomic_notifier_head name = \
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 1c68d67b832f..92a2063a0a23 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -617,6 +617,12 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted)
* Please note that, confusingly, "page_mapping" refers to the inode
* address_space which maps the page from disk; whereas "page_mapped"
* refers to user virtual address space into which the page is mapped.
+ *
+ * For slab pages, since slab reuses the bits in struct page to store its
+ * internal states, the page->mapping does not exist as such, nor do these
+ * flags below. So in order to avoid testing non-existent bits, please
+ * make sure that PageSlab(page) actually evaluates to false before calling
+ * the following functions (e.g., PageAnon). See mm/slab.h.
*/
#define PAGE_MAPPING_ANON 0x1
#define PAGE_MAPPING_MOVABLE 0x2
diff --git a/include/linux/pe.h b/include/linux/pe.h
index 5e1e11540870..fdf9c95709ba 100644
--- a/include/linux/pe.h
+++ b/include/linux/pe.h
@@ -11,25 +11,26 @@
#include <linux/types.h>
/*
- * Linux EFI stub v1.0 adds the following functionality:
- * - Loading initrd from the LINUX_EFI_INITRD_MEDIA_GUID device path,
- * - Loading/starting the kernel from firmware that targets a different
- * machine type, via the entrypoint exposed in the .compat PE/COFF section.
+ * Starting from version v3.0, the major version field should be interpreted as
+ * a bit mask of features supported by the kernel's EFI stub:
+ * - 0x1: initrd loading from the LINUX_EFI_INITRD_MEDIA_GUID device path,
+ * - 0x2: initrd loading using the initrd= command line option, where the file
+ * may be specified using device path notation, and is not required to
+ * reside on the same volume as the loaded kernel image.
*
* The recommended way of loading and starting v1.0 or later kernels is to use
* the LoadImage() and StartImage() EFI boot services, and expose the initrd
* via the LINUX_EFI_INITRD_MEDIA_GUID device path.
*
- * Versions older than v1.0 support initrd loading via the image load options
- * (using initrd=, limited to the volume from which the kernel itself was
- * loaded), or via arch specific means (bootparams, DT, etc).
+ * Versions older than v1.0 may support initrd loading via the image load
+ * options (using initrd=, limited to the volume from which the kernel itself
+ * was loaded), or only via arch specific means (bootparams, DT, etc).
*
- * On x86, LoadImage() and StartImage() can be omitted if the EFI handover
- * protocol is implemented, which can be inferred from the version,
- * handover_offset and xloadflags fields in the bootparams structure.
+ * The minor version field must remain 0x0.
+ * (https://lore.kernel.org/all/efd6f2d4-547c-1378-1faa-53c044dbd297@gmail.com/)
*/
-#define LINUX_EFISTUB_MAJOR_VERSION 0x1
-#define LINUX_EFISTUB_MINOR_VERSION 0x1
+#define LINUX_EFISTUB_MAJOR_VERSION 0x3
+#define LINUX_EFISTUB_MINOR_VERSION 0x0
/*
* LINUX_PE_MAGIC appears at offset 0x38 into the MS-DOS header of EFI bootable
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index d5628a7b5eaa..c8dcfdbda1f4 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1845,9 +1845,9 @@ int perf_event_exit_cpu(unsigned int cpu);
#define perf_event_exit_cpu NULL
#endif
-extern void __weak arch_perf_update_userpage(struct perf_event *event,
- struct perf_event_mmap_page *userpg,
- u64 now);
+extern void arch_perf_update_userpage(struct perf_event *event,
+ struct perf_event_mmap_page *userpg,
+ u64 now);
#ifdef CONFIG_MMU
extern __weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index c5a0dc829714..6478838405a0 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1900,10 +1900,8 @@ void phy_package_leave(struct phy_device *phydev);
int devm_phy_package_join(struct device *dev, struct phy_device *phydev,
int addr, size_t priv_size);
-#if IS_ENABLED(CONFIG_PHYLIB)
int __init mdio_bus_init(void);
void mdio_bus_exit(void);
-#endif
int phy_ethtool_get_strings(struct phy_device *phydev, u8 *data);
int phy_ethtool_get_sset_count(struct phy_device *phydev);
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index d2c3f16cf6b1..02e0086b10f6 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -261,18 +261,14 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
extern const struct pipe_buf_operations nosteal_pipe_buf_ops;
-#ifdef CONFIG_WATCH_QUEUE
unsigned long account_pipe_buffers(struct user_struct *user,
unsigned long old, unsigned long new);
bool too_many_pipe_buffers_soft(unsigned long user_bufs);
bool too_many_pipe_buffers_hard(unsigned long user_bufs);
bool pipe_is_unprivileged_user(void);
-#endif
/* for F_SETPIPE_SZ and F_GETPIPE_SZ */
-#ifdef CONFIG_WATCH_QUEUE
int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots);
-#endif
long pipe_fcntl(struct file *, unsigned int, unsigned long arg);
struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice);
diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index a1aa68141d0b..7c8d65414a70 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -2,6 +2,8 @@
#ifndef __LINUX_BQ27X00_BATTERY_H__
#define __LINUX_BQ27X00_BATTERY_H__
+#include <linux/power_supply.h>
+
enum bq27xxx_chip {
BQ27000 = 1, /* bq27000, bq27200 */
BQ27010, /* bq27010, bq27210 */
@@ -68,7 +70,9 @@ struct bq27xxx_device_info {
struct bq27xxx_access_methods bus;
struct bq27xxx_reg_cache cache;
int charge_design_full;
+ bool removed;
unsigned long last_update;
+ union power_supply_propval last_status;
struct delayed_work work;
struct power_supply *bat;
struct list_head list;
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 0260f5ea98fe..253f2676d93a 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -158,6 +158,8 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task);
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
+void arch_report_meminfo(struct seq_file *m);
+
#else /* CONFIG_PROC_FS */
static inline void proc_root_init(void)
diff --git a/include/linux/regulator/pca9450.h b/include/linux/regulator/pca9450.h
index 3c01c2bf84f5..505c908dbb81 100644
--- a/include/linux/regulator/pca9450.h
+++ b/include/linux/regulator/pca9450.h
@@ -196,11 +196,11 @@ enum {
/* PCA9450_REG_LDO3_VOLT bits */
#define LDO3_EN_MASK 0xC0
-#define LDO3OUT_MASK 0x0F
+#define LDO3OUT_MASK 0x1F
/* PCA9450_REG_LDO4_VOLT bits */
#define LDO4_EN_MASK 0xC0
-#define LDO4OUT_MASK 0x0F
+#define LDO4OUT_MASK 0x1F
/* PCA9450_REG_LDO5_VOLT bits */
#define LDO5L_EN_MASK 0xC0
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 537cbf9a2ade..e0f5ac90a228 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -29,7 +29,6 @@ struct kernel_clone_args {
u32 io_thread:1;
u32 user_worker:1;
u32 no_files:1;
- u32 ignore_signals:1;
unsigned long stack;
unsigned long stack_size;
unsigned long tls;
diff --git a/include/linux/sched/vhost_task.h b/include/linux/sched/vhost_task.h
index 6123c10b99cf..837a23624a66 100644
--- a/include/linux/sched/vhost_task.h
+++ b/include/linux/sched/vhost_task.h
@@ -2,22 +2,13 @@
#ifndef _LINUX_VHOST_TASK_H
#define _LINUX_VHOST_TASK_H
-#include <linux/completion.h>
-struct task_struct;
+struct vhost_task;
-struct vhost_task {
- int (*fn)(void *data);
- void *data;
- struct completion exited;
- unsigned long flags;
- struct task_struct *task;
-};
-
-struct vhost_task *vhost_task_create(int (*fn)(void *), void *arg,
+struct vhost_task *vhost_task_create(bool (*fn)(void *), void *arg,
const char *name);
void vhost_task_start(struct vhost_task *vtsk);
void vhost_task_stop(struct vhost_task *vtsk);
-bool vhost_task_should_stop(struct vhost_task *vtsk);
+void vhost_task_wake(struct vhost_task *vtsk);
#endif
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 7bde8e1c228a..224293b2dd06 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -107,7 +107,10 @@ extern void synchronize_shrinkers(void);
#ifdef CONFIG_SHRINKER_DEBUG
extern int shrinker_debugfs_add(struct shrinker *shrinker);
-extern struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker);
+extern struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
+ int *debugfs_id);
+extern void shrinker_debugfs_remove(struct dentry *debugfs_entry,
+ int debugfs_id);
extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker,
const char *fmt, ...);
#else /* CONFIG_SHRINKER_DEBUG */
@@ -115,10 +118,16 @@ static inline int shrinker_debugfs_add(struct shrinker *shrinker)
{
return 0;
}
-static inline struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker)
+static inline struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
+ int *debugfs_id)
{
+ *debugfs_id = -1;
return NULL;
}
+static inline void shrinker_debugfs_remove(struct dentry *debugfs_entry,
+ int debugfs_id)
+{
+}
static inline __printf(2, 3)
int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
{
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 738776ab8838..0b40417457cd 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1587,6 +1587,16 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from)
to->l4_hash = from->l4_hash;
};
+static inline int skb_cmp_decrypted(const struct sk_buff *skb1,
+ const struct sk_buff *skb2)
+{
+#ifdef CONFIG_TLS_DEVICE
+ return skb2->decrypted - skb1->decrypted;
+#else
+ return 0;
+#endif
+}
+
static inline void skb_copy_decrypted(struct sk_buff *to,
const struct sk_buff *from)
{
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 84f787416a54..054d7911bfc9 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -71,7 +71,6 @@ struct sk_psock_link {
};
struct sk_psock_work_state {
- struct sk_buff *skb;
u32 len;
u32 off;
};
@@ -105,7 +104,7 @@ struct sk_psock {
struct proto *sk_proto;
struct mutex work_mutex;
struct sk_psock_work_state work_state;
- struct work_struct work;
+ struct delayed_work work;
struct rcu_work rwork;
};
diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h
index 423220e66026..93417ba1ead4 100644
--- a/include/linux/soc/qcom/llcc-qcom.h
+++ b/include/linux/soc/qcom/llcc-qcom.h
@@ -69,9 +69,6 @@ struct llcc_slice_desc {
/**
* struct llcc_edac_reg_data - llcc edac registers data for each error type
* @name: Name of the error
- * @synd_reg: Syndrome register address
- * @count_status_reg: Status register address to read the error count
- * @ways_status_reg: Status register address to read the error ways
* @reg_cnt: Number of registers
* @count_mask: Mask value to get the error count
* @ways_mask: Mask value to get the error ways
@@ -80,9 +77,6 @@ struct llcc_slice_desc {
*/
struct llcc_edac_reg_data {
char *name;
- u64 synd_reg;
- u64 count_status_reg;
- u64 ways_status_reg;
u32 reg_cnt;
u32 count_mask;
u32 ways_mask;
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 762d7231e574..3b10636c51a9 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -509,6 +509,27 @@ static inline void svcxdr_init_encode(struct svc_rqst *rqstp)
}
/**
+ * svcxdr_encode_opaque_pages - Insert pages into an xdr_stream
+ * @xdr: xdr_stream to be updated
+ * @pages: array of pages to insert
+ * @base: starting offset of first data byte in @pages
+ * @len: number of data bytes in @pages to insert
+ *
+ * After the @pages are added, the tail iovec is instantiated pointing
+ * to end of the head buffer, and the stream is set up to encode
+ * subsequent items into the tail.
+ */
+static inline void svcxdr_encode_opaque_pages(struct svc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct page **pages,
+ unsigned int base,
+ unsigned int len)
+{
+ xdr_write_pages(xdr, pages, base, len);
+ xdr->page_ptr = rqstp->rq_next_page - 1;
+}
+
+/**
* svcxdr_set_auth_slack -
* @rqstp: RPC transaction
* @slack: buffer space to reserve for the transaction's security flavor
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 24aa159d29a7..a5ee0af2a310 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -135,7 +135,6 @@ struct svc_rdma_recv_ctxt {
struct ib_sge rc_recv_sge;
void *rc_recv_buf;
struct xdr_stream rc_stream;
- bool rc_temp;
u32 rc_byte_len;
unsigned int rc_page_count;
u32 rc_inv_rkey;
@@ -155,12 +154,12 @@ struct svc_rdma_send_ctxt {
struct ib_send_wr sc_send_wr;
struct ib_cqe sc_cqe;
- struct completion sc_done;
struct xdr_buf sc_hdrbuf;
struct xdr_stream sc_stream;
void *sc_xprt_buf;
+ int sc_page_count;
int sc_cur_sge_no;
-
+ struct page *sc_pages[RPCSVC_MAXPAGES];
struct ib_sge sc_sges[];
};
@@ -176,7 +175,7 @@ extern struct svc_rdma_recv_ctxt *
extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
struct svc_rdma_recv_ctxt *ctxt);
extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma);
-extern void svc_rdma_release_rqst(struct svc_rqst *rqstp);
+extern void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *ctxt);
extern int svc_rdma_recvfrom(struct svc_rqst *);
/* svc_rdma_rw.c */
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 867479204840..a6b12631db21 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -23,7 +23,7 @@ struct svc_xprt_ops {
int (*xpo_sendto)(struct svc_rqst *);
int (*xpo_result_payload)(struct svc_rqst *, unsigned int,
unsigned int);
- void (*xpo_release_rqst)(struct svc_rqst *);
+ void (*xpo_release_ctxt)(struct svc_xprt *xprt, void *ctxt);
void (*xpo_detach)(struct svc_xprt *);
void (*xpo_free)(struct svc_xprt *);
void (*xpo_kill_temp_xprt)(struct svc_xprt *);
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index d16ae621782c..a7116048a4d4 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -61,10 +61,9 @@ int svc_recv(struct svc_rqst *, long);
void svc_send(struct svc_rqst *rqstp);
void svc_drop(struct svc_rqst *);
void svc_sock_update_bufs(struct svc_serv *serv);
-bool svc_alien_sock(struct net *net, int fd);
-int svc_addsock(struct svc_serv *serv, const int fd,
- char *name_return, const size_t len,
- const struct cred *cred);
+int svc_addsock(struct svc_serv *serv, struct net *net,
+ const int fd, char *name_return, const size_t len,
+ const struct cred *cred);
void svc_init_xprt_sock(void);
void svc_cleanup_xprt_sock(void);
struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot);
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 72014c9216fc..f89ec4b5ea16 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -242,8 +242,7 @@ extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf,
extern void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
struct page **pages, struct rpc_rqst *rqst);
extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
-extern int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec,
- size_t nbytes);
+extern int xdr_reserve_space_vec(struct xdr_stream *xdr, size_t nbytes);
extern void __xdr_commit_encode(struct xdr_stream *xdr);
extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);
extern void xdr_truncate_decode(struct xdr_stream *xdr, size_t len);
diff --git a/include/linux/surface_aggregator/device.h b/include/linux/surface_aggregator/device.h
index df81043b9e71..42b249b4c24b 100644
--- a/include/linux/surface_aggregator/device.h
+++ b/include/linux/surface_aggregator/device.h
@@ -243,11 +243,7 @@ static inline bool is_ssam_device(struct device *d)
* Return: Returns the pointer to the &struct ssam_device_driver wrapping the
* given device driver @d.
*/
-static inline
-struct ssam_device_driver *to_ssam_device_driver(struct device_driver *d)
-{
- return container_of(d, struct ssam_device_driver, driver);
-}
+#define to_ssam_device_driver(d) container_of_const(d, struct ssam_device_driver, driver)
const struct ssam_device_id *ssam_device_id_match(const struct ssam_device_id *table,
const struct ssam_device_uid uid);
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 77693389c3f9..6a1e8f157255 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -282,6 +282,7 @@ enum tpm_chip_flags {
TPM_CHIP_FLAG_ALWAYS_POWERED = BIT(5),
TPM_CHIP_FLAG_FIRMWARE_POWER_MANAGED = BIT(6),
TPM_CHIP_FLAG_FIRMWARE_UPGRADE = BIT(7),
+ TPM_CHIP_FLAG_SUSPENDED = BIT(8),
};
#define to_tpm_chip(d) container_of(d, struct tpm_chip, dev)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 0e373222a6df..7c4a0b72334e 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -806,6 +806,7 @@ enum {
FILTER_TRACE_FN,
FILTER_COMM,
FILTER_CPU,
+ FILTER_STACKTRACE,
};
extern int trace_event_raw_init(struct trace_event_call *call);
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index a2448e98854f..07531c4f4350 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -443,7 +443,7 @@ static inline struct usb_composite_driver *to_cdriver(
* @bcd_webusb_version: 0x0100 by default, WebUSB specification version
* @b_webusb_vendor_code: 0x0 by default, vendor code for WebUSB
* @landing_page: empty by default, landing page to announce in WebUSB
- * @use_webusb:: false by default, interested gadgets set it
+ * @use_webusb: false by default, interested gadgets set it
* @os_desc_config: the configuration to be used with OS descriptors
* @setup_pending: true when setup request is queued but not completed
* @os_desc_pending: true when os_desc request is queued but not completed
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 094c77eaf455..0c7eff91adf4 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -501,6 +501,11 @@ void *hcd_buffer_alloc(struct usb_bus *bus, size_t size,
void hcd_buffer_free(struct usb_bus *bus, size_t size,
void *addr, dma_addr_t dma);
+void *hcd_buffer_alloc_pages(struct usb_hcd *hcd,
+ size_t size, gfp_t mem_flags, dma_addr_t *dma);
+void hcd_buffer_free_pages(struct usb_hcd *hcd,
+ size_t size, void *addr, dma_addr_t dma);
+
/* generic bus glue, needed for host controllers that don't use PCI */
extern irqreturn_t usb_hcd_irq(int irq, void *__hcd);
diff --git a/include/linux/user_events.h b/include/linux/user_events.h
index 2847f5a18a86..8afa8c3a0973 100644
--- a/include/linux/user_events.h
+++ b/include/linux/user_events.h
@@ -17,9 +17,10 @@
#ifdef CONFIG_USER_EVENTS
struct user_event_mm {
- struct list_head link;
+ struct list_head mms_link;
struct list_head enablers;
struct mm_struct *mm;
+ /* Used for one-shot lists, protected by event_mutex */
struct user_event_mm *next;
refcount_t refcnt;
refcount_t tasks;
diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h
index fc6bba20273b..45cd42f55d49 100644
--- a/include/linux/watch_queue.h
+++ b/include/linux/watch_queue.h
@@ -38,7 +38,7 @@ struct watch_filter {
struct watch_queue {
struct rcu_head rcu;
struct watch_filter __rcu *filter;
- struct pipe_inode_info *pipe; /* The pipe we're using as a buffer */
+ struct pipe_inode_info *pipe; /* Pipe we use as a buffer, NULL if queue closed */
struct hlist_head watches; /* Contributory watches */
struct page **notes; /* Preallocated notifications */
unsigned long *notes_bitmap; /* Allocation bitmap for notes */
@@ -46,7 +46,6 @@ struct watch_queue {
spinlock_t lock;
unsigned int nr_notes; /* Number of notes */
unsigned int nr_pages; /* Number of pages in notes[] */
- bool defunct; /* T when queues closed */
};
/*
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 3992c994787f..683efe29fa69 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -68,7 +68,6 @@ enum {
WORK_OFFQ_FLAG_BASE = WORK_STRUCT_COLOR_SHIFT,
__WORK_OFFQ_CANCELING = WORK_OFFQ_FLAG_BASE,
- WORK_OFFQ_CANCELING = (1 << __WORK_OFFQ_CANCELING),
/*
* When a work item is off queue, its high bits point to the last
@@ -79,12 +78,6 @@ enum {
WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT,
WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
- WORK_OFFQ_POOL_NONE = (1LU << WORK_OFFQ_POOL_BITS) - 1,
-
- /* convenience constants */
- WORK_STRUCT_FLAG_MASK = (1UL << WORK_STRUCT_FLAG_BITS) - 1,
- WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
- WORK_STRUCT_NO_POOL = (unsigned long)WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT,
/* bit mask for work_busy() return values */
WORK_BUSY_PENDING = 1 << 0,
@@ -94,6 +87,14 @@ enum {
WORKER_DESC_LEN = 24,
};
+/* Convenience constants - of type 'unsigned long', not 'enum'! */
+#define WORK_OFFQ_CANCELING (1ul << __WORK_OFFQ_CANCELING)
+#define WORK_OFFQ_POOL_NONE ((1ul << WORK_OFFQ_POOL_BITS) - 1)
+#define WORK_STRUCT_NO_POOL (WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT)
+
+#define WORK_STRUCT_FLAG_MASK ((1ul << WORK_STRUCT_FLAG_BITS) - 1)
+#define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
+
struct work_struct {
atomic_long_t data;
struct list_head entry;
diff --git a/include/media/dvb_net.h b/include/media/dvb_net.h
index 9980b1dd750b..4a921ea96091 100644
--- a/include/media/dvb_net.h
+++ b/include/media/dvb_net.h
@@ -39,6 +39,9 @@ struct net_device;
* @exit: flag to indicate when the device is being removed.
* @demux: pointer to &struct dmx_demux.
* @ioctl_mutex: protect access to this struct.
+ * @remove_mutex: mutex that avoids a race condition between a callback
+ * called when the hardware is disconnected and the
+ * file_operations of dvb_net.
*
* Currently, the core supports up to %DVB_NET_DEVICES_MAX (10) network
* devices.
@@ -51,6 +54,7 @@ struct dvb_net {
unsigned int exit:1;
struct dmx_demux *demux;
struct mutex ioctl_mutex;
+ struct mutex remove_mutex;
};
/**
diff --git a/include/media/dvbdev.h b/include/media/dvbdev.h
index 29d25c8a6f13..8958e5e2fc5b 100644
--- a/include/media/dvbdev.h
+++ b/include/media/dvbdev.h
@@ -194,6 +194,21 @@ struct dvb_device {
};
/**
+ * struct dvbdevfops_node - fops nodes registered in dvbdevfops_list
+ *
+ * @fops: Dynamically allocated fops for ->owner registration
+ * @type: type of dvb_device
+ * @template: dvb_device used for registration
+ * @list_head: list_head for dvbdevfops_list
+ */
+struct dvbdevfops_node {
+ struct file_operations *fops;
+ enum dvb_device_type type;
+ const struct dvb_device *template;
+ struct list_head list_head;
+};
+
+/**
* dvb_device_get - Increase dvb_device reference
*
* @dvbdev: pointer to struct dvb_device
diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index cfd19e72d0fc..b325df0d54d6 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -1119,6 +1119,7 @@ struct v4l2_subdev {
* @vfh: pointer to &struct v4l2_fh
* @state: pointer to &struct v4l2_subdev_state
* @owner: module pointer to the owner of this file handle
+ * @client_caps: bitmask of ``V4L2_SUBDEV_CLIENT_CAP_*``
*/
struct v4l2_subdev_fh {
struct v4l2_fh vfh;
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 07df96c47ef4..872dcb91a540 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -350,6 +350,7 @@ enum {
enum {
HCI_SETUP,
HCI_CONFIG,
+ HCI_DEBUGFS_CREATED,
HCI_AUTO_OFF,
HCI_RFKILLED,
HCI_MGMT,
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index a6c8aee2f256..9654567cfae3 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -515,6 +515,7 @@ struct hci_dev {
struct work_struct cmd_sync_work;
struct list_head cmd_sync_work_list;
struct mutex cmd_sync_work_lock;
+ struct mutex unregister_lock;
struct work_struct cmd_sync_cancel_work;
struct work_struct reenable_adv_work;
@@ -1201,7 +1202,8 @@ static inline struct hci_conn *hci_conn_hash_lookup_cis(struct hci_dev *hdev,
if (id != BT_ISO_QOS_CIS_UNSET && id != c->iso_qos.ucast.cis)
continue;
- if (ba_type == c->dst_type && !bacmp(&c->dst, ba)) {
+ /* Match destination address if set */
+ if (!ba || (ba_type == c->dst_type && !bacmp(&c->dst, ba))) {
rcu_read_unlock();
return c;
}
@@ -1327,7 +1329,7 @@ int hci_le_create_cis(struct hci_conn *conn);
struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
u8 role);
-int hci_conn_del(struct hci_conn *conn);
+void hci_conn_del(struct hci_conn *conn);
void hci_conn_hash_flush(struct hci_dev *hdev);
void hci_conn_check_pending(struct hci_dev *hdev);
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 0efef2a952b7..59955ac33157 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -221,6 +221,7 @@ struct bonding {
struct bond_up_slave __rcu *usable_slaves;
struct bond_up_slave __rcu *all_slaves;
bool force_primary;
+ bool notifier_ctx;
s32 slave_cnt; /* never change this value outside the attach/detach wrappers */
int (*recv_probe)(const struct sk_buff *, struct bonding *,
struct slave *);
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 8903053fa5aa..ab0f0a5b0860 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -959,6 +959,14 @@ struct dsa_switch_ops {
void (*port_disable)(struct dsa_switch *ds, int port);
/*
+ * Compatibility between device trees defining multiple CPU ports and
+ * drivers which are not OK to use by default the numerically smallest
+ * CPU port of a switch for its local ports. This can return NULL,
+ * meaning "don't know/don't care".
+ */
+ struct dsa_port *(*preferred_default_local_cpu_port)(struct dsa_switch *ds);
+
+ /*
* Port's MAC EEE settings
*/
int (*set_mac_eee)(struct dsa_switch *ds, int port,
diff --git a/include/net/handshake.h b/include/net/handshake.h
index 3352b1ab43b3..2e26e436e85f 100644
--- a/include/net/handshake.h
+++ b/include/net/handshake.h
@@ -24,6 +24,7 @@ struct tls_handshake_args {
struct socket *ta_sock;
tls_done_func_t ta_done;
void *ta_data;
+ const char *ta_peername;
unsigned int ta_timeout_ms;
key_serial_t ta_keyring;
key_serial_t ta_my_cert;
diff --git a/include/net/ip.h b/include/net/ip.h
index c3fffaa92d6e..acec504c469a 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -76,6 +76,7 @@ struct ipcm_cookie {
__be32 addr;
int oif;
struct ip_options_rcu *opt;
+ __u8 protocol;
__u8 ttl;
__s16 tos;
char priority;
@@ -96,6 +97,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
ipcm->sockc.tsflags = inet->sk.sk_tsflags;
ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
ipcm->addr = inet->inet_saddr;
+ ipcm->protocol = inet->inet_num;
}
#define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index cd386aa7c7cc..9eef19972845 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -347,10 +347,8 @@ struct mana_tx_qp {
struct mana_ethtool_stats {
u64 stop_queue;
u64 wake_queue;
- u64 tx_cqes;
u64 tx_cqe_err;
u64 tx_cqe_unknown_type;
- u64 rx_cqes;
u64 rx_coalesced_err;
u64 rx_cqe_unknown_type;
};
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 3fa5774bddac..f6a8ecc6b1fa 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -180,7 +180,7 @@ struct pneigh_entry {
netdevice_tracker dev_tracker;
u32 flags;
u8 protocol;
- u8 key[];
+ u32 key[];
};
/*
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index ebb28ec5b6fa..f37f9f34430c 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -268,7 +268,7 @@ int flow_offload_route_init(struct flow_offload *flow,
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
void flow_offload_refresh(struct nf_flowtable *flow_table,
- struct flow_offload *flow);
+ struct flow_offload *flow, bool force);
struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
struct flow_offload_tuple *tuple);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 2e24ea1d744c..ee47d7143d99 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -462,7 +462,8 @@ struct nft_set_ops {
const struct nft_set *set,
const struct nft_set_elem *elem,
unsigned int flags);
-
+ void (*commit)(const struct nft_set *set);
+ void (*abort)(const struct nft_set *set);
u64 (*privsize)(const struct nlattr * const nla[],
const struct nft_set_desc *desc);
bool (*estimate)(const struct nft_set_desc *desc,
@@ -471,7 +472,8 @@ struct nft_set_ops {
int (*init)(const struct nft_set *set,
const struct nft_set_desc *desc,
const struct nlattr * const nla[]);
- void (*destroy)(const struct nft_set *set);
+ void (*destroy)(const struct nft_ctx *ctx,
+ const struct nft_set *set);
void (*gc_init)(const struct nft_set *set);
unsigned int elemsize;
@@ -557,6 +559,7 @@ struct nft_set {
u16 policy;
u16 udlen;
unsigned char *udata;
+ struct list_head pending_update;
/* runtime data below here */
const struct nft_set_ops *ops ____cacheline_aligned;
u16 flags:14,
@@ -807,6 +810,8 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_expr *expr_array[]);
void nft_set_elem_destroy(const struct nft_set *set, void *elem,
bool destroy_expr);
+void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set, void *elem);
/**
* struct nft_set_gc_batch_head - nf_tables set garbage collection batch
@@ -899,6 +904,7 @@ struct nft_expr_type {
enum nft_trans_phase {
NFT_TRANS_PREPARE,
+ NFT_TRANS_PREPARE_ERROR,
NFT_TRANS_ABORT,
NFT_TRANS_COMMIT,
NFT_TRANS_RELEASE
@@ -1007,7 +1013,10 @@ static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule)
return (void *)&rule->data[rule->dlen];
}
-void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule);
+void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule);
+void nft_rule_expr_deactivate(const struct nft_ctx *ctx, struct nft_rule *rule,
+ enum nft_trans_phase phase);
+void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule);
static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext,
struct nft_regs *regs,
@@ -1102,6 +1111,8 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
const struct nft_set_iter *iter,
struct nft_set_elem *elem);
int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set);
+int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
+void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
enum nft_chain_types {
NFT_CHAIN_T_DEFAULT = 0,
@@ -1138,11 +1149,17 @@ int nft_chain_validate_dependency(const struct nft_chain *chain,
int nft_chain_validate_hooks(const struct nft_chain *chain,
unsigned int hook_flags);
+static inline bool nft_chain_binding(const struct nft_chain *chain)
+{
+ return chain->flags & NFT_CHAIN_BINDING;
+}
+
static inline bool nft_chain_is_bound(struct nft_chain *chain)
{
return (chain->flags & NFT_CHAIN_BINDING) && chain->bound;
}
+int nft_chain_add(struct nft_table *table, struct nft_chain *chain);
void nft_chain_del(struct nft_chain *chain);
void nf_tables_chain_destroy(struct nft_ctx *ctx);
@@ -1556,6 +1573,7 @@ static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
* struct nft_trans - nf_tables object update in transaction
*
* @list: used internally
+ * @binding_list: list of objects with possible bindings
* @msg_type: message type
* @put_net: ctx->net needs to be put
* @ctx: transaction context
@@ -1563,6 +1581,7 @@ static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
*/
struct nft_trans {
struct list_head list;
+ struct list_head binding_list;
int msg_type;
bool put_net;
struct nft_ctx ctx;
@@ -1573,6 +1592,7 @@ struct nft_trans_rule {
struct nft_rule *rule;
struct nft_flow_rule *flow;
u32 rule_id;
+ bool bound;
};
#define nft_trans_rule(trans) \
@@ -1581,6 +1601,8 @@ struct nft_trans_rule {
(((struct nft_trans_rule *)trans->data)->flow)
#define nft_trans_rule_id(trans) \
(((struct nft_trans_rule *)trans->data)->rule_id)
+#define nft_trans_rule_bound(trans) \
+ (((struct nft_trans_rule *)trans->data)->bound)
struct nft_trans_set {
struct nft_set *set;
@@ -1605,15 +1627,19 @@ struct nft_trans_set {
(((struct nft_trans_set *)trans->data)->gc_int)
struct nft_trans_chain {
+ struct nft_chain *chain;
bool update;
char *name;
struct nft_stats __percpu *stats;
u8 policy;
+ bool bound;
u32 chain_id;
struct nft_base_chain *basechain;
struct list_head hook_list;
};
+#define nft_trans_chain(trans) \
+ (((struct nft_trans_chain *)trans->data)->chain)
#define nft_trans_chain_update(trans) \
(((struct nft_trans_chain *)trans->data)->update)
#define nft_trans_chain_name(trans) \
@@ -1622,6 +1648,8 @@ struct nft_trans_chain {
(((struct nft_trans_chain *)trans->data)->stats)
#define nft_trans_chain_policy(trans) \
(((struct nft_trans_chain *)trans->data)->policy)
+#define nft_trans_chain_bound(trans) \
+ (((struct nft_trans_chain *)trans->data)->bound)
#define nft_trans_chain_id(trans) \
(((struct nft_trans_chain *)trans->data)->chain_id)
#define nft_trans_basechain(trans) \
@@ -1698,6 +1726,7 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re
struct nftables_pernet {
struct list_head tables;
struct list_head commit_list;
+ struct list_head binding_list;
struct list_head module_list;
struct list_head notify_list;
struct mutex commit_mutex;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 3cceb3e9320b..5f2cfd84570a 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -53,7 +53,7 @@ struct netns_sysctl_ipv6 {
int seg6_flowlabel;
u32 ioam6_id;
u64 ioam6_id_wide;
- bool skip_notify_on_dev_down;
+ u8 skip_notify_on_dev_down;
u8 fib_notify_on_flag_change;
u8 icmpv6_error_anycast_as_unicast;
};
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index 9fa291a04621..2b12725de9c0 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -497,29 +497,6 @@ static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
return NULL;
}
-/* Variant of nexthop_fib6_nh().
- * Caller should either hold rcu_read_lock(), or RTNL.
- */
-static inline struct fib6_nh *nexthop_fib6_nh_bh(struct nexthop *nh)
-{
- struct nh_info *nhi;
-
- if (nh->is_group) {
- struct nh_group *nh_grp;
-
- nh_grp = rcu_dereference_rtnl(nh->nh_grp);
- nh = nexthop_mpath_select(nh_grp, 0);
- if (!nh)
- return NULL;
- }
-
- nhi = rcu_dereference_rtnl(nh->nh_info);
- if (nhi->family == AF_INET6)
- return &nhi->fib6_nh;
-
- return NULL;
-}
-
static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i)
{
struct fib6_nh *fib6_nh;
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index c8ec2f34722b..126f9e294389 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -399,22 +399,4 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid)
page_pool_update_nid(pool, new_nid);
}
-static inline void page_pool_ring_lock(struct page_pool *pool)
- __acquires(&pool->ring.producer_lock)
-{
- if (in_softirq())
- spin_lock(&pool->ring.producer_lock);
- else
- spin_lock_bh(&pool->ring.producer_lock);
-}
-
-static inline void page_pool_ring_unlock(struct page_pool *pool)
- __releases(&pool->ring.producer_lock)
-{
- if (in_softirq())
- spin_unlock(&pool->ring.producer_lock);
- else
- spin_unlock_bh(&pool->ring.producer_lock);
-}
-
#endif /* _NET_PAGE_POOL_H */
diff --git a/include/net/ping.h b/include/net/ping.h
index 9233ad3de0ad..bc7779262e60 100644
--- a/include/net/ping.h
+++ b/include/net/ping.h
@@ -16,11 +16,7 @@
#define PING_HTABLE_SIZE 64
#define PING_HTABLE_MASK (PING_HTABLE_SIZE-1)
-/*
- * gid_t is either uint or ushort. We want to pass it to
- * proc_dointvec_minmax(), so it must not be larger than MAX_INT
- */
-#define GID_T_MAX (((gid_t)~0U) >> 1)
+#define GID_T_MAX (((gid_t)~0U) - 1)
/* Compatibility glue so we can support IPv6 when it's compiled as a module */
struct pingv6_ops {
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index f436688b6efc..5722931d83d4 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -127,6 +127,8 @@ static inline void qdisc_run(struct Qdisc *q)
}
}
+extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
+
/* Calculate maximal size of packet seen by hard_start_xmit
routine of this device.
*/
diff --git a/include/net/rpl.h b/include/net/rpl.h
index 308ef0a05cae..30fe780d1e7c 100644
--- a/include/net/rpl.h
+++ b/include/net/rpl.h
@@ -23,9 +23,6 @@ static inline int rpl_init(void)
static inline void rpl_exit(void) {}
#endif
-/* Worst decompression memory usage ipv6 address (16) + pad 7 */
-#define IPV6_RPL_SRH_WORST_SWAP_SIZE (sizeof(struct in6_addr) + 7)
-
size_t ipv6_rpl_srh_size(unsigned char n, unsigned char cmpri,
unsigned char cmpre);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index fab5ba3e61b7..12eadecf8cd0 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -137,6 +137,13 @@ static inline void qdisc_refcount_inc(struct Qdisc *qdisc)
refcount_inc(&qdisc->refcnt);
}
+static inline bool qdisc_refcount_dec_if_one(struct Qdisc *qdisc)
+{
+ if (qdisc->flags & TCQ_F_BUILTIN)
+ return true;
+ return refcount_dec_if_one(&qdisc->refcnt);
+}
+
/* Intended to be used by unlocked users, when concurrent qdisc release is
* possible.
*/
@@ -545,7 +552,7 @@ static inline struct Qdisc *qdisc_root_bh(const struct Qdisc *qdisc)
static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc)
{
- return qdisc->dev_queue->qdisc_sleeping;
+ return rcu_dereference_rtnl(qdisc->dev_queue->qdisc_sleeping);
}
static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
@@ -652,6 +659,7 @@ void dev_deactivate_many(struct list_head *head);
struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
struct Qdisc *qdisc);
void qdisc_reset(struct Qdisc *qdisc);
+void qdisc_destroy(struct Qdisc *qdisc);
void qdisc_put(struct Qdisc *qdisc);
void qdisc_put_unlocked(struct Qdisc *qdisc);
void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, int n, int len);
@@ -754,7 +762,9 @@ static inline bool qdisc_tx_changing(const struct net_device *dev)
for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
- if (rcu_access_pointer(txq->qdisc) != txq->qdisc_sleeping)
+
+ if (rcu_access_pointer(txq->qdisc) !=
+ rcu_access_pointer(txq->qdisc_sleeping))
return true;
}
return false;
diff --git a/include/net/sock.h b/include/net/sock.h
index 656ea89f60ff..6f428a7f3567 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -336,6 +336,7 @@ struct sk_filter;
* @sk_cgrp_data: cgroup data for this cgroup
* @sk_memcg: this socket's memory cgroup association
* @sk_write_pending: a write to stream socket waits to start
+ * @sk_wait_pending: number of threads blocked on this socket
* @sk_state_change: callback to indicate change in the state of the sock
* @sk_data_ready: callback to indicate there is data to be processed
* @sk_write_space: callback to indicate there is bf sending space available
@@ -428,6 +429,7 @@ struct sock {
unsigned int sk_napi_id;
#endif
int sk_rcvbuf;
+ int sk_wait_pending;
struct sk_filter __rcu *sk_filter;
union {
@@ -1150,8 +1152,12 @@ static inline void sock_rps_record_flow(const struct sock *sk)
* OR an additional socket flag
* [1] : sk_state and sk_prot are in the same cache line.
*/
- if (sk->sk_state == TCP_ESTABLISHED)
- sock_rps_record_flow_hash(sk->sk_rxhash);
+ if (sk->sk_state == TCP_ESTABLISHED) {
+ /* This READ_ONCE() is paired with the WRITE_ONCE()
+ * from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
+ */
+ sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
+ }
}
#endif
}
@@ -1160,20 +1166,25 @@ static inline void sock_rps_save_rxhash(struct sock *sk,
const struct sk_buff *skb)
{
#ifdef CONFIG_RPS
- if (unlikely(sk->sk_rxhash != skb->hash))
- sk->sk_rxhash = skb->hash;
+ /* The following WRITE_ONCE() is paired with the READ_ONCE()
+ * here, and another one in sock_rps_record_flow().
+ */
+ if (unlikely(READ_ONCE(sk->sk_rxhash) != skb->hash))
+ WRITE_ONCE(sk->sk_rxhash, skb->hash);
#endif
}
static inline void sock_rps_reset_rxhash(struct sock *sk)
{
#ifdef CONFIG_RPS
- sk->sk_rxhash = 0;
+ /* Paired with READ_ONCE() in sock_rps_record_flow() */
+ WRITE_ONCE(sk->sk_rxhash, 0);
#endif
}
#define sk_wait_event(__sk, __timeo, __condition, __wait) \
({ int __rc; \
+ __sk->sk_wait_pending++; \
release_sock(__sk); \
__rc = __condition; \
if (!__rc) { \
@@ -1183,6 +1194,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)
} \
sched_annotate_sleep(); \
lock_sock(__sk); \
+ __sk->sk_wait_pending--; \
__rc = __condition; \
__rc; \
})
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 04a31643cda3..5066e4586cf0 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -632,6 +632,7 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb);
void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb);
void tcp_fin(struct sock *sk);
void tcp_check_space(struct sock *sk);
+void tcp_sack_compress_send_ack(struct sock *sk);
/* tcp_timer.c */
void tcp_init_xmit_timers(struct sock *);
@@ -1470,6 +1471,8 @@ static inline void tcp_adjust_rcv_ssthresh(struct sock *sk)
}
void tcp_cleanup_rbuf(struct sock *sk, int copied);
+void __tcp_cleanup_rbuf(struct sock *sk, int copied);
+
/* We provision sk_rcvbuf around 200% of sk_rcvlowat.
* If 87.5 % (7/8) of the space has been consumed, we want to override
@@ -2326,6 +2329,14 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
#endif /* CONFIG_BPF_SYSCALL */
+#ifdef CONFIG_INET
+void tcp_eat_skb(struct sock *sk, struct sk_buff *skb);
+#else
+static inline void tcp_eat_skb(struct sock *sk, struct sk_buff *skb)
+{
+}
+#endif
+
int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress,
struct sk_msg *msg, u32 bytes, int flags);
#endif /* CONFIG_NET_SOCK_MSG */
diff --git a/include/net/tls.h b/include/net/tls.h
index 6056ce5a2aa5..596595c4b1af 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -126,6 +126,7 @@ struct tls_strparser {
u32 mark : 8;
u32 stopped : 1;
u32 copy_mode : 1;
+ u32 mixed_decrypted : 1;
u32 msg_ready : 1;
struct strp_msg stm;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 33ee3f5936e6..151ca95dd08d 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1054,6 +1054,7 @@ struct xfrm_offload {
struct sec_path {
int len;
int olen;
+ int verified_cnt;
struct xfrm_state *xvec[XFRM_MAX_DEPTH];
struct xfrm_offload ovec[XFRM_MAX_OFFLOAD_DEPTH];
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index d808dc3d239e..811a0f11d0db 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -194,29 +194,6 @@ static inline enum ib_mtu iboe_get_mtu(int mtu)
return 0;
}
-static inline int iboe_get_rate(struct net_device *dev)
-{
- struct ethtool_link_ksettings cmd;
- int err;
-
- rtnl_lock();
- err = __ethtool_get_link_ksettings(dev, &cmd);
- rtnl_unlock();
- if (err)
- return IB_RATE_PORT_CURRENT;
-
- if (cmd.base.speed >= 40000)
- return IB_RATE_40_GBPS;
- else if (cmd.base.speed >= 30000)
- return IB_RATE_30_GBPS;
- else if (cmd.base.speed >= 20000)
- return IB_RATE_20_GBPS;
- else if (cmd.base.speed >= 10000)
- return IB_RATE_10_GBPS;
- else
- return IB_RATE_PORT_CURRENT;
-}
-
static inline int rdma_link_local_addr(struct in6_addr *addr)
{
if (addr->s6_addr32[0] == htonl(0xfe800000) &&
diff --git a/include/sound/hda-mlink.h b/include/sound/hda-mlink.h
index dbc47af08135..4f44f0bd5388 100644
--- a/include/sound/hda-mlink.h
+++ b/include/sound/hda-mlink.h
@@ -44,6 +44,9 @@ int hdac_bus_eml_sdw_power_down_unlocked(struct hdac_bus *bus, int sublink);
int hdac_bus_eml_sdw_set_lsdiid(struct hdac_bus *bus, int sublink, int dev_num);
+int hdac_bus_eml_sdw_map_stream_ch(struct hdac_bus *bus, int sublink, int y,
+ int channel_mask, int stream_id, int dir);
+
void hda_bus_ml_put_all(struct hdac_bus *bus);
void hda_bus_ml_reset_losidv(struct hdac_bus *bus);
int hda_bus_ml_resume(struct hdac_bus *bus);
@@ -51,6 +54,7 @@ int hda_bus_ml_suspend(struct hdac_bus *bus);
struct hdac_ext_link *hdac_bus_eml_ssp_get_hlink(struct hdac_bus *bus);
struct hdac_ext_link *hdac_bus_eml_dmic_get_hlink(struct hdac_bus *bus);
+struct hdac_ext_link *hdac_bus_eml_sdw_get_hlink(struct hdac_bus *bus);
struct mutex *hdac_bus_eml_get_mutex(struct hdac_bus *bus, bool alt, int elid);
@@ -144,6 +148,13 @@ hdac_bus_eml_sdw_power_down_unlocked(struct hdac_bus *bus, int sublink) { return
static inline int
hdac_bus_eml_sdw_set_lsdiid(struct hdac_bus *bus, int sublink, int dev_num) { return 0; }
+static inline int
+hdac_bus_eml_sdw_map_stream_ch(struct hdac_bus *bus, int sublink, int y,
+ int channel_mask, int stream_id, int dir)
+{
+ return 0;
+}
+
static inline void hda_bus_ml_put_all(struct hdac_bus *bus) { }
static inline void hda_bus_ml_reset_losidv(struct hdac_bus *bus) { }
static inline int hda_bus_ml_resume(struct hdac_bus *bus) { return 0; }
@@ -155,6 +166,9 @@ hdac_bus_eml_ssp_get_hlink(struct hdac_bus *bus) { return NULL; }
static inline struct hdac_ext_link *
hdac_bus_eml_dmic_get_hlink(struct hdac_bus *bus) { return NULL; }
+static inline struct hdac_ext_link *
+hdac_bus_eml_sdw_get_hlink(struct hdac_bus *bus) { return NULL; }
+
static inline struct mutex *
hdac_bus_eml_get_mutex(struct hdac_bus *bus, bool alt, int elid) { return NULL; }
diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h
index b38fd25c5729..528279056b3a 100644
--- a/include/sound/soc-acpi.h
+++ b/include/sound/soc-acpi.h
@@ -170,6 +170,7 @@ struct snd_soc_acpi_link_adr {
/* Descriptor for SST ASoC machine driver */
struct snd_soc_acpi_mach {
u8 id[ACPI_ID_LEN];
+ const char *uid;
const struct snd_soc_acpi_codecs *comp_ids;
const u32 link_mask;
const struct snd_soc_acpi_link_adr *links;
diff --git a/include/sound/soc-dpcm.h b/include/sound/soc-dpcm.h
index 4d6ac7699833..ebd24753dd00 100644
--- a/include/sound/soc-dpcm.h
+++ b/include/sound/soc-dpcm.h
@@ -122,6 +122,10 @@ int snd_soc_dpcm_can_be_free_stop(struct snd_soc_pcm_runtime *fe,
int snd_soc_dpcm_can_be_params(struct snd_soc_pcm_runtime *fe,
struct snd_soc_pcm_runtime *be, int stream);
+/* can this BE perform prepare */
+int snd_soc_dpcm_can_be_prepared(struct snd_soc_pcm_runtime *fe,
+ struct snd_soc_pcm_runtime *be, int stream);
+
/* is the current PCM operation for this FE ? */
int snd_soc_dpcm_fe_can_update(struct snd_soc_pcm_runtime *fe, int stream);
diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h
index 229118156a1f..4c15420e8965 100644
--- a/include/target/iscsi/iscsi_target_core.h
+++ b/include/target/iscsi/iscsi_target_core.h
@@ -562,12 +562,13 @@ struct iscsit_conn {
#define LOGIN_FLAGS_READ_ACTIVE 2
#define LOGIN_FLAGS_WRITE_ACTIVE 3
#define LOGIN_FLAGS_CLOSED 4
+#define LOGIN_FLAGS_WORKER_RUNNING 5
unsigned long login_flags;
struct delayed_work login_work;
struct iscsi_login *login;
struct timer_list nopin_timer;
struct timer_list nopin_response_timer;
- struct timer_list transport_timer;
+ struct timer_list login_timer;
struct task_struct *login_kworker;
/* Spinlock used for add/deleting cmd's from conn_cmd_list */
spinlock_t cmd_lock;
@@ -576,6 +577,8 @@ struct iscsit_conn {
spinlock_t nopin_timer_lock;
spinlock_t response_queue_lock;
spinlock_t state_lock;
+ spinlock_t login_timer_lock;
+ spinlock_t login_worker_lock;
/* libcrypto RX and TX contexts for crc32c */
struct ahash_request *conn_rx_hash;
struct ahash_request *conn_tx_hash;
@@ -792,7 +795,6 @@ struct iscsi_np {
enum np_thread_state_table np_thread_state;
bool enabled;
atomic_t np_reset_count;
- enum iscsi_timer_flags_table np_login_timer_flags;
u32 np_exports;
enum np_flags_table np_flags;
spinlock_t np_thread_lock;
@@ -800,7 +802,6 @@ struct iscsi_np {
struct socket *np_socket;
struct sockaddr_storage np_sockaddr;
struct task_struct *np_thread;
- struct timer_list np_login_timer;
void *np_context;
struct iscsit_transport *np_transport;
struct list_head np_list;
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 8ea9cea9bfeb..a8206f5332e9 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -661,6 +661,35 @@ DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_mark_finished,
TP_ARGS(inode, ordered)
);
+TRACE_EVENT(btrfs_finish_ordered_extent,
+
+ TP_PROTO(const struct btrfs_inode *inode, u64 start, u64 len,
+ bool uptodate),
+
+ TP_ARGS(inode, start, len, uptodate),
+
+ TP_STRUCT__entry_btrfs(
+ __field( u64, ino )
+ __field( u64, start )
+ __field( u64, len )
+ __field( bool, uptodate )
+ __field( u64, root_objectid )
+ ),
+
+ TP_fast_assign_btrfs(inode->root->fs_info,
+ __entry->ino = btrfs_ino(inode);
+ __entry->start = start;
+ __entry->len = len;
+ __entry->uptodate = uptodate;
+ __entry->root_objectid = inode->root->root_key.objectid;
+ ),
+
+ TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu len=%llu uptodate=%d",
+ show_root_type(__entry->root_objectid),
+ __entry->ino, __entry->start,
+ __entry->len, !!__entry->uptodate)
+);
+
DECLARE_EVENT_CLASS(btrfs__writepage,
TP_PROTO(const struct page *page, const struct inode *inode,
@@ -1982,25 +2011,27 @@ DEFINE_EVENT(btrfs__prelim_ref, btrfs_prelim_ref_insert,
);
TRACE_EVENT(btrfs_inode_mod_outstanding_extents,
- TP_PROTO(const struct btrfs_root *root, u64 ino, int mod),
+ TP_PROTO(const struct btrfs_root *root, u64 ino, int mod, unsigned outstanding),
- TP_ARGS(root, ino, mod),
+ TP_ARGS(root, ino, mod, outstanding),
TP_STRUCT__entry_btrfs(
__field( u64, root_objectid )
__field( u64, ino )
__field( int, mod )
+ __field( unsigned, outstanding )
),
TP_fast_assign_btrfs(root->fs_info,
__entry->root_objectid = root->root_key.objectid;
__entry->ino = ino;
__entry->mod = mod;
+ __entry->outstanding = outstanding;
),
- TP_printk_btrfs("root=%llu(%s) ino=%llu mod=%d",
+ TP_printk_btrfs("root=%llu(%s) ino=%llu mod=%d outstanding=%u",
show_root_type(__entry->root_objectid),
- __entry->ino, __entry->mod)
+ __entry->ino, __entry->mod, __entry->outstanding)
);
DECLARE_EVENT_CLASS(btrfs__block_group,
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index 8f461e04e5f0..f8069ef2ee0f 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -2112,6 +2112,14 @@ DEFINE_POST_CHUNK_EVENT(read);
DEFINE_POST_CHUNK_EVENT(write);
DEFINE_POST_CHUNK_EVENT(reply);
+DEFINE_EVENT(svcrdma_post_chunk_class, svcrdma_cc_release,
+ TP_PROTO(
+ const struct rpc_rdma_cid *cid,
+ int sqecount
+ ),
+ TP_ARGS(cid, sqecount)
+);
+
TRACE_EVENT(svcrdma_wc_read,
TP_PROTO(
const struct ib_wc *wc,
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 31bc7025cb44..69e42ef30979 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -2104,31 +2104,46 @@ DEFINE_SVC_DEFERRED_EVENT(drop);
DEFINE_SVC_DEFERRED_EVENT(queue);
DEFINE_SVC_DEFERRED_EVENT(recv);
-TRACE_EVENT(svcsock_new_socket,
+DECLARE_EVENT_CLASS(svcsock_lifetime_class,
TP_PROTO(
+ const void *svsk,
const struct socket *socket
),
-
- TP_ARGS(socket),
-
+ TP_ARGS(svsk, socket),
TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(const void *, svsk)
+ __field(const void *, sk)
__field(unsigned long, type)
__field(unsigned long, family)
- __field(bool, listener)
+ __field(unsigned long, state)
),
-
TP_fast_assign(
+ struct sock *sk = socket->sk;
+
+ __entry->netns_ino = sock_net(sk)->ns.inum;
+ __entry->svsk = svsk;
+ __entry->sk = sk;
__entry->type = socket->type;
- __entry->family = socket->sk->sk_family;
- __entry->listener = (socket->sk->sk_state == TCP_LISTEN);
+ __entry->family = sk->sk_family;
+ __entry->state = sk->sk_state;
),
-
- TP_printk("type=%s family=%s%s",
- show_socket_type(__entry->type),
+ TP_printk("svsk=%p type=%s family=%s%s",
+ __entry->svsk, show_socket_type(__entry->type),
rpc_show_address_family(__entry->family),
- __entry->listener ? " (listener)" : ""
+ __entry->state == TCP_LISTEN ? " (listener)" : ""
)
);
+#define DEFINE_SVCSOCK_LIFETIME_EVENT(name) \
+ DEFINE_EVENT(svcsock_lifetime_class, name, \
+ TP_PROTO( \
+ const void *svsk, \
+ const struct socket *socket \
+ ), \
+ TP_ARGS(svsk, socket))
+
+DEFINE_SVCSOCK_LIFETIME_EVENT(svcsock_new);
+DEFINE_SVCSOCK_LIFETIME_EVENT(svcsock_free);
TRACE_EVENT(svcsock_marker,
TP_PROTO(
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 86b2a82da546..54e353c9f919 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -68,7 +68,7 @@ DECLARE_EVENT_CLASS(writeback_folio_template,
strscpy_pad(__entry->name,
bdi_dev_name(mapping ? inode_to_bdi(mapping->host) :
NULL), 32);
- __entry->ino = mapping ? mapping->host->i_ino : 0;
+ __entry->ino = (mapping && mapping->host) ? mapping->host->i_ino : 0;
__entry->index = folio->index;
),
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1bb11a6ee667..c994ff5b157c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1035,6 +1035,7 @@ enum bpf_attach_type {
BPF_TRACE_KPROBE_MULTI,
BPF_LSM_CGROUP,
BPF_STRUCT_OPS,
+ BPF_NETFILTER,
__MAX_BPF_ATTACH_TYPE
};
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 1ebf8d455f07..73e2c10dc2cc 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -783,7 +783,7 @@ enum {
/* add new constants above here */
__ETHTOOL_A_STATS_GRP_CNT,
- ETHTOOL_A_STATS_GRP_MAX = (__ETHTOOL_A_STATS_CNT - 1)
+ ETHTOOL_A_STATS_GRP_MAX = (__ETHTOOL_A_STATS_GRP_CNT - 1)
};
enum {
diff --git a/include/uapi/linux/eventfd.h b/include/uapi/linux/eventfd.h
new file mode 100644
index 000000000000..2eb9ab6c32f3
--- /dev/null
+++ b/include/uapi/linux/eventfd.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_EVENTFD_H
+#define _UAPI_LINUX_EVENTFD_H
+
+#include <linux/fcntl.h>
+
+#define EFD_SEMAPHORE (1 << 0)
+#define EFD_CLOEXEC O_CLOEXEC
+#define EFD_NONBLOCK O_NONBLOCK
+
+#endif /* _UAPI_LINUX_EVENTFD_H */
diff --git a/include/uapi/linux/handshake.h b/include/uapi/linux/handshake.h
index 1de4d0b95325..3d7ea58778c9 100644
--- a/include/uapi/linux/handshake.h
+++ b/include/uapi/linux/handshake.h
@@ -44,6 +44,7 @@ enum {
HANDSHAKE_A_ACCEPT_AUTH_MODE,
HANDSHAKE_A_ACCEPT_PEER_IDENTITY,
HANDSHAKE_A_ACCEPT_CERTIFICATE,
+ HANDSHAKE_A_ACCEPT_PEERNAME,
__HANDSHAKE_A_ACCEPT_MAX,
HANDSHAKE_A_ACCEPT_MAX = (__HANDSHAKE_A_ACCEPT_MAX - 1)
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index 4b7f2df66b99..e682ab628dfa 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -163,6 +163,7 @@ struct in_addr {
#define IP_MULTICAST_ALL 49
#define IP_UNICAST_IF 50
#define IP_LOCAL_PORT_RANGE 51
+#define IP_PROTOCOL 52
#define MCAST_EXCLUDE 0
#define MCAST_INCLUDE 1
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 0716cb17e436..f222d263bc55 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -173,6 +173,18 @@ enum {
*/
#define IORING_SETUP_DEFER_TASKRUN (1U << 13)
+/*
+ * Application provides the memory for the rings
+ */
+#define IORING_SETUP_NO_MMAP (1U << 14)
+
+/*
+ * Register the ring fd in itself for use with
+ * IORING_REGISTER_USE_REGISTERED_RING; return a registered fd index rather
+ * than an fd.
+ */
+#define IORING_SETUP_REGISTERED_FD_ONLY (1U << 15)
+
enum io_uring_op {
IORING_OP_NOP,
IORING_OP_READV,
@@ -406,7 +418,7 @@ struct io_sqring_offsets {
__u32 dropped;
__u32 array;
__u32 resv1;
- __u64 resv2;
+ __u64 user_addr;
};
/*
@@ -425,7 +437,7 @@ struct io_cqring_offsets {
__u32 cqes;
__u32 flags;
__u32 resv1;
- __u64 resv2;
+ __u64 user_addr;
};
/*
diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index 4d93967f8aea..8eb0d7b758d2 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -74,7 +74,8 @@
#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */
#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */
#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */
-#define MOVE_MOUNT__MASK 0x00000177
+#define MOVE_MOUNT_BENEATH 0x00000200 /* Mount beneath top mount */
+#define MOVE_MOUNT__MASK 0x00000377
/*
* fsopen() flags.
diff --git a/include/uapi/sound/skl-tplg-interface.h b/include/uapi/sound/skl-tplg-interface.h
index f29899b179a6..4bf9c4f9add8 100644
--- a/include/uapi/sound/skl-tplg-interface.h
+++ b/include/uapi/sound/skl-tplg-interface.h
@@ -66,7 +66,8 @@ enum skl_ch_cfg {
SKL_CH_CFG_DUAL_MONO = 9,
SKL_CH_CFG_I2S_DUAL_STEREO_0 = 10,
SKL_CH_CFG_I2S_DUAL_STEREO_1 = 11,
- SKL_CH_CFG_4_CHANNEL = 12,
+ SKL_CH_CFG_7_1 = 12,
+ SKL_CH_CFG_4_CHANNEL = SKL_CH_CFG_7_1,
SKL_CH_CFG_INVALID
};
diff --git a/include/uapi/sound/sof/tokens.h b/include/uapi/sound/sof/tokens.h
index bbc37877aaff..e9ec7e4eb982 100644
--- a/include/uapi/sound/sof/tokens.h
+++ b/include/uapi/sound/sof/tokens.h
@@ -183,7 +183,7 @@
#define SOF_TKN_CAVS_AUDIO_FORMAT_IN_INTERLEAVING_STYLE 1906
#define SOF_TKN_CAVS_AUDIO_FORMAT_IN_FMT_CFG 1907
#define SOF_TKN_CAVS_AUDIO_FORMAT_IN_SAMPLE_TYPE 1908
-#define SOF_TKN_CAVS_AUDIO_FORMAT_PIN_INDEX 1909
+#define SOF_TKN_CAVS_AUDIO_FORMAT_INPUT_PIN_INDEX 1909
/* intentional token numbering discontinuity, reserved for future use */
#define SOF_TKN_CAVS_AUDIO_FORMAT_OUT_RATE 1930
#define SOF_TKN_CAVS_AUDIO_FORMAT_OUT_BIT_DEPTH 1931
@@ -194,6 +194,7 @@
#define SOF_TKN_CAVS_AUDIO_FORMAT_OUT_INTERLEAVING_STYLE 1936
#define SOF_TKN_CAVS_AUDIO_FORMAT_OUT_FMT_CFG 1937
#define SOF_TKN_CAVS_AUDIO_FORMAT_OUT_SAMPLE_TYPE 1938
+#define SOF_TKN_CAVS_AUDIO_FORMAT_OUTPUT_PIN_INDEX 1939
/* intentional token numbering discontinuity, reserved for future use */
#define SOF_TKN_CAVS_AUDIO_FORMAT_IBS 1970
#define SOF_TKN_CAVS_AUDIO_FORMAT_OBS 1971
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index f7553293ba98..df1d04f7a542 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -1133,7 +1133,7 @@ static inline size_t ufshcd_sg_entry_size(const struct ufs_hba *hba)
({ (void)(hba); BUILD_BUG_ON(sg_entry_size != sizeof(struct ufshcd_sg_entry)); })
#endif
-static inline size_t sizeof_utp_transfer_cmd_desc(const struct ufs_hba *hba)
+static inline size_t ufshcd_get_ucd_size(const struct ufs_hba *hba)
{
return sizeof(struct utp_transfer_cmd_desc) + SG_ALL * ufshcd_sg_entry_size(hba);
}
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 780546a6cbfb..2a970f6ac68f 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -103,7 +103,7 @@ __setup("rootfstype=", fs_names_setup);
__setup("rootdelay=", root_delay_setup);
/* This can return zero length strings. Caller should check */
-static int __init split_fs_names(char *page, size_t size, char *names)
+static int __init split_fs_names(char *page, size_t size)
{
int count = 1;
char *p = page;
@@ -167,7 +167,7 @@ void __init mount_root_generic(char *name, char *pretty_name, int flags)
scnprintf(b, BDEVNAME_SIZE, "unknown-block(%u,%u)",
MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
if (root_fs_names)
- num_fs = split_fs_names(fs_names, PAGE_SIZE, root_fs_names);
+ num_fs = split_fs_names(fs_names, PAGE_SIZE);
else
num_fs = list_bdev_fs_names(fs_names, PAGE_SIZE);
retry:
@@ -315,7 +315,7 @@ static int __init mount_nodev_root(char *root_device_name)
fs_names = (void *)__get_free_page(GFP_KERNEL);
if (!fs_names)
return -EINVAL;
- num_fs = split_fs_names(fs_names, PAGE_SIZE, root_fs_names);
+ num_fs = split_fs_names(fs_names, PAGE_SIZE);
for (i = 0, fstype = fs_names; i < num_fs;
i++, fstype += strlen(fstype) + 1) {
diff --git a/io_uring/cancel.c b/io_uring/cancel.c
index b4f5dfacc0c3..58c46c852bdd 100644
--- a/io_uring/cancel.c
+++ b/io_uring/cancel.c
@@ -216,13 +216,10 @@ static int __io_sync_cancel(struct io_uring_task *tctx,
/* fixed must be grabbed every time since we drop the uring_lock */
if ((cd->flags & IORING_ASYNC_CANCEL_FD) &&
(cd->flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
- unsigned long file_ptr;
-
if (unlikely(fd >= ctx->nr_user_files))
return -EBADF;
fd = array_index_nospec(fd, ctx->nr_user_files);
- file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr;
- cd->file = (struct file *) (file_ptr & FFS_MASK);
+ cd->file = io_file_from_index(&ctx->file_table, fd);
if (!cd->file)
return -EBADF;
}
diff --git a/io_uring/epoll.c b/io_uring/epoll.c
index 9aa74d2c80bc..89bff2068a19 100644
--- a/io_uring/epoll.c
+++ b/io_uring/epoll.c
@@ -25,10 +25,6 @@ int io_epoll_ctl_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_epoll *epoll = io_kiocb_to_cmd(req, struct io_epoll);
- pr_warn_once("%s: epoll_ctl support in io_uring is deprecated and will "
- "be removed in a future Linux kernel version.\n",
- current->comm);
-
if (sqe->buf_index || sqe->splice_fd_in)
return -EINVAL;
diff --git a/io_uring/filetable.c b/io_uring/filetable.c
index 0f6fa791a47d..e7d749991de4 100644
--- a/io_uring/filetable.c
+++ b/io_uring/filetable.c
@@ -78,10 +78,8 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
file_slot = io_fixed_file_slot(&ctx->file_table, slot_index);
if (file_slot->file_ptr) {
- struct file *old_file;
-
- old_file = (struct file *)(file_slot->file_ptr & FFS_MASK);
- ret = io_queue_rsrc_removal(ctx->file_data, slot_index, old_file);
+ ret = io_queue_rsrc_removal(ctx->file_data, slot_index,
+ io_slot_file(file_slot));
if (ret)
return ret;
@@ -140,7 +138,6 @@ int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset)
{
struct io_fixed_file *file_slot;
- struct file *file;
int ret;
if (unlikely(!ctx->file_data))
@@ -153,8 +150,8 @@ int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset)
if (!file_slot->file_ptr)
return -EBADF;
- file = (struct file *)(file_slot->file_ptr & FFS_MASK);
- ret = io_queue_rsrc_removal(ctx->file_data, offset, file);
+ ret = io_queue_rsrc_removal(ctx->file_data, offset,
+ io_slot_file(file_slot));
if (ret)
return ret;
diff --git a/io_uring/filetable.h b/io_uring/filetable.h
index 351111ff8882..b47adf170c31 100644
--- a/io_uring/filetable.h
+++ b/io_uring/filetable.h
@@ -5,10 +5,6 @@
#include <linux/file.h>
#include <linux/io_uring_types.h>
-#define FFS_NOWAIT 0x1UL
-#define FFS_ISREG 0x2UL
-#define FFS_MASK ~(FFS_NOWAIT|FFS_ISREG)
-
bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files);
void io_free_file_tables(struct io_file_table *table);
@@ -43,21 +39,31 @@ io_fixed_file_slot(struct io_file_table *table, unsigned i)
return &table->files[i];
}
+#define FFS_NOWAIT 0x1UL
+#define FFS_ISREG 0x2UL
+#define FFS_MASK ~(FFS_NOWAIT|FFS_ISREG)
+
+static inline unsigned int io_slot_flags(struct io_fixed_file *slot)
+{
+ return (slot->file_ptr & ~FFS_MASK) << REQ_F_SUPPORT_NOWAIT_BIT;
+}
+
+static inline struct file *io_slot_file(struct io_fixed_file *slot)
+{
+ return (struct file *)(slot->file_ptr & FFS_MASK);
+}
+
static inline struct file *io_file_from_index(struct io_file_table *table,
int index)
{
- struct io_fixed_file *slot = io_fixed_file_slot(table, index);
-
- return (struct file *) (slot->file_ptr & FFS_MASK);
+ return io_slot_file(io_fixed_file_slot(table, index));
}
static inline void io_fixed_file_set(struct io_fixed_file *file_slot,
struct file *file)
{
- unsigned long file_ptr = (unsigned long) file;
-
- file_ptr |= io_file_get_flags(file);
- file_slot->file_ptr = file_ptr;
+ file_slot->file_ptr = (unsigned long)file |
+ (io_file_get_flags(file) >> REQ_F_SUPPORT_NOWAIT_BIT);
}
static inline void io_reset_alloc_hint(struct io_ring_ctx *ctx)
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index b2715988791e..399e9a15c38d 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -220,10 +220,12 @@ static void io_worker_exit(struct io_worker *worker)
list_del_rcu(&worker->all_list);
raw_spin_unlock(&wq->lock);
io_wq_dec_running(worker);
- worker->flags = 0;
- preempt_disable();
- current->flags &= ~PF_IO_WORKER;
- preempt_enable();
+ /*
+ * this worker is a goner, clear ->worker_private to avoid any
+ * inc/dec running calls that could happen as part of exit from
+ * touching 'worker'.
+ */
+ current->worker_private = NULL;
kfree_rcu(worker, rcu);
io_worker_ref_put(wq);
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 3bca7a79efda..1b53a2ab0a27 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -95,6 +95,7 @@
#include "timeout.h"
#include "poll.h"
+#include "rw.h"
#include "alloc_cache.h"
#define IORING_MAX_ENTRIES 32768
@@ -145,8 +146,6 @@ static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
struct task_struct *task,
bool cancel_all);
-static void io_dismantle_req(struct io_kiocb *req);
-static void io_clean_op(struct io_kiocb *req);
static void io_queue_sqe(struct io_kiocb *req);
static void io_move_task_work_from_local(struct io_ring_ctx *ctx);
static void __io_submit_flush_completions(struct io_ring_ctx *ctx);
@@ -367,6 +366,39 @@ static bool req_need_defer(struct io_kiocb *req, u32 seq)
return false;
}
+static void io_clean_op(struct io_kiocb *req)
+{
+ if (req->flags & REQ_F_BUFFER_SELECTED) {
+ spin_lock(&req->ctx->completion_lock);
+ io_put_kbuf_comp(req);
+ spin_unlock(&req->ctx->completion_lock);
+ }
+
+ if (req->flags & REQ_F_NEED_CLEANUP) {
+ const struct io_cold_def *def = &io_cold_defs[req->opcode];
+
+ if (def->cleanup)
+ def->cleanup(req);
+ }
+ if ((req->flags & REQ_F_POLLED) && req->apoll) {
+ kfree(req->apoll->double_poll);
+ kfree(req->apoll);
+ req->apoll = NULL;
+ }
+ if (req->flags & REQ_F_INFLIGHT) {
+ struct io_uring_task *tctx = req->task->io_uring;
+
+ atomic_dec(&tctx->inflight_tracked);
+ }
+ if (req->flags & REQ_F_CREDS)
+ put_cred(req->creds);
+ if (req->flags & REQ_F_ASYNC_DATA) {
+ kfree(req->async_data);
+ req->async_data = NULL;
+ }
+ req->flags &= ~IO_REQ_CLEAN_FLAGS;
+}
+
static inline void io_req_track_inflight(struct io_kiocb *req)
{
if (!(req->flags & REQ_F_INFLIGHT)) {
@@ -423,8 +455,8 @@ static void io_prep_async_work(struct io_kiocb *req)
if (req->flags & REQ_F_FORCE_ASYNC)
req->work.flags |= IO_WQ_WORK_CONCURRENT;
- if (req->file && !io_req_ffs_set(req))
- req->flags |= io_file_get_flags(req->file) << REQ_F_SUPPORT_NOWAIT_BIT;
+ if (req->file && !(req->flags & REQ_F_FIXED_FILE))
+ req->flags |= io_file_get_flags(req->file);
if (req->file && (req->flags & REQ_F_ISREG)) {
bool should_hash = def->hash_reg_file;
@@ -594,42 +626,18 @@ void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
}
static inline void __io_cq_lock(struct io_ring_ctx *ctx)
- __acquires(ctx->completion_lock)
{
if (!ctx->task_complete)
spin_lock(&ctx->completion_lock);
}
-static inline void __io_cq_unlock(struct io_ring_ctx *ctx)
-{
- if (!ctx->task_complete)
- spin_unlock(&ctx->completion_lock);
-}
-
static inline void io_cq_lock(struct io_ring_ctx *ctx)
__acquires(ctx->completion_lock)
{
spin_lock(&ctx->completion_lock);
}
-static inline void io_cq_unlock(struct io_ring_ctx *ctx)
- __releases(ctx->completion_lock)
-{
- spin_unlock(&ctx->completion_lock);
-}
-
-/* keep it inlined for io_submit_flush_completions() */
static inline void __io_cq_unlock_post(struct io_ring_ctx *ctx)
- __releases(ctx->completion_lock)
-{
- io_commit_cqring(ctx);
- __io_cq_unlock(ctx);
- io_commit_cqring_flush(ctx);
- io_cqring_wake(ctx);
-}
-
-static void __io_cq_unlock_post_flush(struct io_ring_ctx *ctx)
- __releases(ctx->completion_lock)
{
io_commit_cqring(ctx);
@@ -641,13 +649,13 @@ static void __io_cq_unlock_post_flush(struct io_ring_ctx *ctx)
*/
io_commit_cqring_flush(ctx);
} else {
- __io_cq_unlock(ctx);
+ spin_unlock(&ctx->completion_lock);
io_commit_cqring_flush(ctx);
io_cqring_wake(ctx);
}
}
-void io_cq_unlock_post(struct io_ring_ctx *ctx)
+static void io_cq_unlock_post(struct io_ring_ctx *ctx)
__releases(ctx->completion_lock)
{
io_commit_cqring(ctx);
@@ -662,10 +670,10 @@ static void io_cqring_overflow_kill(struct io_ring_ctx *ctx)
struct io_overflow_cqe *ocqe;
LIST_HEAD(list);
- io_cq_lock(ctx);
+ spin_lock(&ctx->completion_lock);
list_splice_init(&ctx->cq_overflow_list, &list);
clear_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq);
- io_cq_unlock(ctx);
+ spin_unlock(&ctx->completion_lock);
while (!list_empty(&list)) {
ocqe = list_first_entry(&list, struct io_overflow_cqe, list);
@@ -722,29 +730,29 @@ static void io_cqring_overflow_flush(struct io_ring_ctx *ctx)
}
/* can be called by any task */
-static void io_put_task_remote(struct task_struct *task, int nr)
+static void io_put_task_remote(struct task_struct *task)
{
struct io_uring_task *tctx = task->io_uring;
- percpu_counter_sub(&tctx->inflight, nr);
+ percpu_counter_sub(&tctx->inflight, 1);
if (unlikely(atomic_read(&tctx->in_cancel)))
wake_up(&tctx->wait);
- put_task_struct_many(task, nr);
+ put_task_struct(task);
}
/* used by a task to put its own references */
-static void io_put_task_local(struct task_struct *task, int nr)
+static void io_put_task_local(struct task_struct *task)
{
- task->io_uring->cached_refs += nr;
+ task->io_uring->cached_refs++;
}
/* must to be called somewhat shortly after putting a request */
-static inline void io_put_task(struct task_struct *task, int nr)
+static inline void io_put_task(struct task_struct *task)
{
if (likely(task == current))
- io_put_task_local(task, nr);
+ io_put_task_local(task);
else
- io_put_task_remote(task, nr);
+ io_put_task_remote(task);
}
void io_task_refs_refill(struct io_uring_task *tctx)
@@ -934,20 +942,19 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags
return __io_post_aux_cqe(ctx, user_data, res, cflags, true);
}
-bool io_aux_cqe(struct io_ring_ctx *ctx, bool defer, u64 user_data, s32 res, u32 cflags,
+bool io_aux_cqe(const struct io_kiocb *req, bool defer, s32 res, u32 cflags,
bool allow_overflow)
{
+ struct io_ring_ctx *ctx = req->ctx;
+ u64 user_data = req->cqe.user_data;
struct io_uring_cqe *cqe;
- unsigned int length;
if (!defer)
return __io_post_aux_cqe(ctx, user_data, res, cflags, allow_overflow);
- length = ARRAY_SIZE(ctx->submit_state.cqes);
-
lockdep_assert_held(&ctx->uring_lock);
- if (ctx->submit_state.cqes_count == length) {
+ if (ctx->submit_state.cqes_count == ARRAY_SIZE(ctx->submit_state.cqes)) {
__io_cq_lock(ctx);
__io_flush_post_cqes(ctx);
/* no need to flush - flush is deferred */
@@ -991,14 +998,18 @@ static void __io_req_complete_post(struct io_kiocb *req, unsigned issue_flags)
}
}
io_put_kbuf_comp(req);
- io_dismantle_req(req);
+ if (unlikely(req->flags & IO_REQ_CLEAN_FLAGS))
+ io_clean_op(req);
+ if (!(req->flags & REQ_F_FIXED_FILE))
+ io_put_file(req->file);
+
rsrc_node = req->rsrc_node;
/*
* Selected buffer deallocation in io_clean_op() assumes that
* we don't hold ->completion_lock. Clean them here to avoid
* deadlocks.
*/
- io_put_task_remote(req->task, 1);
+ io_put_task_remote(req->task);
wq_list_add_head(&req->comp_list, &ctx->locked_free_list);
ctx->locked_free_nr++;
}
@@ -1111,36 +1122,13 @@ __cold bool __io_alloc_req_refill(struct io_ring_ctx *ctx)
return true;
}
-static inline void io_dismantle_req(struct io_kiocb *req)
-{
- unsigned int flags = req->flags;
-
- if (unlikely(flags & IO_REQ_CLEAN_FLAGS))
- io_clean_op(req);
- if (!(flags & REQ_F_FIXED_FILE))
- io_put_file(req->file);
-}
-
-static __cold void io_free_req_tw(struct io_kiocb *req, struct io_tw_state *ts)
-{
- struct io_ring_ctx *ctx = req->ctx;
-
- if (req->rsrc_node) {
- io_tw_lock(ctx, ts);
- io_put_rsrc_node(ctx, req->rsrc_node);
- }
- io_dismantle_req(req);
- io_put_task_remote(req->task, 1);
-
- spin_lock(&ctx->completion_lock);
- wq_list_add_head(&req->comp_list, &ctx->locked_free_list);
- ctx->locked_free_nr++;
- spin_unlock(&ctx->completion_lock);
-}
-
__cold void io_free_req(struct io_kiocb *req)
{
- req->io_task_work.func = io_free_req_tw;
+ /* refs were already put, restore them for io_req_task_complete() */
+ req->flags &= ~REQ_F_REFCOUNT;
+ /* we only want to free it, don't post CQEs */
+ req->flags |= REQ_F_CQE_SKIP;
+ req->io_task_work.func = io_req_task_complete;
io_req_task_work_add(req);
}
@@ -1205,7 +1193,9 @@ static unsigned int handle_tw_list(struct llist_node *node,
ts->locked = mutex_trylock(&(*ctx)->uring_lock);
percpu_ref_get(&(*ctx)->refs);
}
- req->io_task_work.func(req, ts);
+ INDIRECT_CALL_2(req->io_task_work.func,
+ io_poll_task_func, io_req_rw_complete,
+ req, ts);
node = next;
count++;
if (unlikely(need_resched())) {
@@ -1303,7 +1293,7 @@ static __cold void io_fallback_tw(struct io_uring_task *tctx)
}
}
-static void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
+static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
{
struct io_ring_ctx *ctx = req->ctx;
unsigned nr_wait, nr_tw, nr_tw_prev;
@@ -1354,19 +1344,11 @@ static void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
wake_up_state(ctx->submitter_task, TASK_INTERRUPTIBLE);
}
-void __io_req_task_work_add(struct io_kiocb *req, unsigned flags)
+static void io_req_normal_work_add(struct io_kiocb *req)
{
struct io_uring_task *tctx = req->task->io_uring;
struct io_ring_ctx *ctx = req->ctx;
- if (!(flags & IOU_F_TWQ_FORCE_NORMAL) &&
- (ctx->flags & IORING_SETUP_DEFER_TASKRUN)) {
- rcu_read_lock();
- io_req_local_work_add(req, flags);
- rcu_read_unlock();
- return;
- }
-
/* task_work already pending, we're done */
if (!llist_add(&req->io_task_work.node, &tctx->task_list))
return;
@@ -1380,6 +1362,17 @@ void __io_req_task_work_add(struct io_kiocb *req, unsigned flags)
io_fallback_tw(tctx);
}
+void __io_req_task_work_add(struct io_kiocb *req, unsigned flags)
+{
+ if (req->ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
+ rcu_read_lock();
+ io_req_local_work_add(req, flags);
+ rcu_read_unlock();
+ } else {
+ io_req_normal_work_add(req);
+ }
+}
+
static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx)
{
struct llist_node *node;
@@ -1390,7 +1383,7 @@ static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx)
io_task_work.node);
node = node->next;
- __io_req_task_work_add(req, IOU_F_TWQ_FORCE_NORMAL);
+ io_req_normal_work_add(req);
}
}
@@ -1405,13 +1398,19 @@ static int __io_run_local_work(struct io_ring_ctx *ctx, struct io_tw_state *ts)
if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
atomic_andnot(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
again:
- node = io_llist_xchg(&ctx->work_llist, NULL);
+ /*
+ * llists are in reverse order, flip it back the right way before
+ * running the pending items.
+ */
+ node = llist_reverse_order(io_llist_xchg(&ctx->work_llist, NULL));
while (node) {
struct llist_node *next = node->next;
struct io_kiocb *req = container_of(node, struct io_kiocb,
io_task_work.node);
prefetch(container_of(next, struct io_kiocb, io_task_work.node));
- req->io_task_work.func(req, ts);
+ INDIRECT_CALL_2(req->io_task_work.func,
+ io_poll_task_func, io_req_rw_complete,
+ req, ts);
ret++;
node = next;
}
@@ -1498,9 +1497,6 @@ void io_queue_next(struct io_kiocb *req)
void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node)
__must_hold(&ctx->uring_lock)
{
- struct task_struct *task = NULL;
- int task_refs = 0;
-
do {
struct io_kiocb *req = container_of(node, struct io_kiocb,
comp_list);
@@ -1530,19 +1526,10 @@ void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node)
io_req_put_rsrc_locked(req, ctx);
- if (req->task != task) {
- if (task)
- io_put_task(task, task_refs);
- task = req->task;
- task_refs = 0;
- }
- task_refs++;
+ io_put_task(req->task);
node = req->comp_list.next;
io_req_add_to_cache(req, ctx);
} while (node);
-
- if (task)
- io_put_task(task, task_refs);
}
static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
@@ -1570,7 +1557,7 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
}
}
}
- __io_cq_unlock_post_flush(ctx);
+ __io_cq_unlock_post(ctx);
if (!wq_list_empty(&ctx->submit_state.compl_reqs)) {
io_free_batch_list(ctx, state->compl_reqs.first);
@@ -1578,22 +1565,6 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
}
}
-/*
- * Drop reference to request, return next in chain (if there is one) if this
- * was the last reference to this request.
- */
-static inline struct io_kiocb *io_put_req_find_next(struct io_kiocb *req)
-{
- struct io_kiocb *nxt = NULL;
-
- if (req_ref_put_and_test(req)) {
- if (unlikely(req->flags & IO_REQ_LINK_FLAGS))
- nxt = io_req_find_next(req);
- io_free_req(req);
- }
- return nxt;
-}
-
static unsigned io_cqring_events(struct io_ring_ctx *ctx)
{
/* See comment at the top of this file */
@@ -1758,54 +1729,14 @@ static void io_iopoll_req_issued(struct io_kiocb *req, unsigned int issue_flags)
}
}
-static bool io_bdev_nowait(struct block_device *bdev)
-{
- return !bdev || bdev_nowait(bdev);
-}
-
-/*
- * If we tracked the file through the SCM inflight mechanism, we could support
- * any file. For now, just ensure that anything potentially problematic is done
- * inline.
- */
-static bool __io_file_supports_nowait(struct file *file, umode_t mode)
-{
- if (S_ISBLK(mode)) {
- if (IS_ENABLED(CONFIG_BLOCK) &&
- io_bdev_nowait(I_BDEV(file->f_mapping->host)))
- return true;
- return false;
- }
- if (S_ISSOCK(mode))
- return true;
- if (S_ISREG(mode)) {
- if (IS_ENABLED(CONFIG_BLOCK) &&
- io_bdev_nowait(file->f_inode->i_sb->s_bdev) &&
- !io_is_uring_fops(file))
- return true;
- return false;
- }
-
- /* any ->read/write should understand O_NONBLOCK */
- if (file->f_flags & O_NONBLOCK)
- return true;
- return file->f_mode & FMODE_NOWAIT;
-}
-
-/*
- * If we tracked the file through the SCM inflight mechanism, we could support
- * any file. For now, just ensure that anything potentially problematic is done
- * inline.
- */
unsigned int io_file_get_flags(struct file *file)
{
- umode_t mode = file_inode(file)->i_mode;
unsigned int res = 0;
- if (S_ISREG(mode))
- res |= FFS_ISREG;
- if (__io_file_supports_nowait(file, mode))
- res |= FFS_NOWAIT;
+ if (S_ISREG(file_inode(file)->i_mode))
+ res |= REQ_F_ISREG;
+ if ((file->f_flags & O_NONBLOCK) || (file->f_mode & FMODE_NOWAIT))
+ res |= REQ_F_SUPPORT_NOWAIT;
return res;
}
@@ -1891,39 +1822,6 @@ queue:
spin_unlock(&ctx->completion_lock);
}
-static void io_clean_op(struct io_kiocb *req)
-{
- if (req->flags & REQ_F_BUFFER_SELECTED) {
- spin_lock(&req->ctx->completion_lock);
- io_put_kbuf_comp(req);
- spin_unlock(&req->ctx->completion_lock);
- }
-
- if (req->flags & REQ_F_NEED_CLEANUP) {
- const struct io_cold_def *def = &io_cold_defs[req->opcode];
-
- if (def->cleanup)
- def->cleanup(req);
- }
- if ((req->flags & REQ_F_POLLED) && req->apoll) {
- kfree(req->apoll->double_poll);
- kfree(req->apoll);
- req->apoll = NULL;
- }
- if (req->flags & REQ_F_INFLIGHT) {
- struct io_uring_task *tctx = req->task->io_uring;
-
- atomic_dec(&tctx->inflight_tracked);
- }
- if (req->flags & REQ_F_CREDS)
- put_cred(req->creds);
- if (req->flags & REQ_F_ASYNC_DATA) {
- kfree(req->async_data);
- req->async_data = NULL;
- }
- req->flags &= ~IO_REQ_CLEAN_FLAGS;
-}
-
static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def,
unsigned int issue_flags)
{
@@ -1986,9 +1884,14 @@ int io_poll_issue(struct io_kiocb *req, struct io_tw_state *ts)
struct io_wq_work *io_wq_free_work(struct io_wq_work *work)
{
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+ struct io_kiocb *nxt = NULL;
- req = io_put_req_find_next(req);
- return req ? &req->work : NULL;
+ if (req_ref_put_and_test(req)) {
+ if (req->flags & IO_REQ_LINK_FLAGS)
+ nxt = io_req_find_next(req);
+ io_free_req(req);
+ }
+ return nxt ? &nxt->work : NULL;
}
void io_wq_submit_work(struct io_wq_work *work)
@@ -2060,19 +1963,17 @@ inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
unsigned int issue_flags)
{
struct io_ring_ctx *ctx = req->ctx;
+ struct io_fixed_file *slot;
struct file *file = NULL;
- unsigned long file_ptr;
io_ring_submit_lock(ctx, issue_flags);
if (unlikely((unsigned int)fd >= ctx->nr_user_files))
goto out;
fd = array_index_nospec(fd, ctx->nr_user_files);
- file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr;
- file = (struct file *) (file_ptr & FFS_MASK);
- file_ptr &= ~FFS_MASK;
- /* mask in overlapping REQ_F and FFS bits */
- req->flags |= (file_ptr << REQ_F_SUPPORT_NOWAIT_BIT);
+ slot = io_fixed_file_slot(&ctx->file_table, fd);
+ file = io_slot_file(slot);
+ req->flags |= io_slot_flags(slot);
io_req_set_rsrc_node(req, ctx, 0);
out:
io_ring_submit_unlock(ctx, issue_flags);
@@ -2709,11 +2610,96 @@ static void io_mem_free(void *ptr)
free_compound_page(page);
}
+static void io_pages_free(struct page ***pages, int npages)
+{
+ struct page **page_array;
+ int i;
+
+ if (!pages)
+ return;
+ page_array = *pages;
+ for (i = 0; i < npages; i++)
+ unpin_user_page(page_array[i]);
+ kvfree(page_array);
+ *pages = NULL;
+}
+
+static void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
+ unsigned long uaddr, size_t size)
+{
+ struct page **page_array;
+ unsigned int nr_pages;
+ int ret;
+
+ *npages = 0;
+
+ if (uaddr & (PAGE_SIZE - 1) || !size)
+ return ERR_PTR(-EINVAL);
+
+ nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ if (nr_pages > USHRT_MAX)
+ return ERR_PTR(-EINVAL);
+ page_array = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL);
+ if (!page_array)
+ return ERR_PTR(-ENOMEM);
+
+ ret = pin_user_pages_fast(uaddr, nr_pages, FOLL_WRITE | FOLL_LONGTERM,
+ page_array);
+ if (ret != nr_pages) {
+err:
+ io_pages_free(&page_array, ret > 0 ? ret : 0);
+ return ret < 0 ? ERR_PTR(ret) : ERR_PTR(-EFAULT);
+ }
+ /*
+ * Should be a single page. If the ring is small enough that we can
+ * use a normal page, that is fine. If we need multiple pages, then
+ * userspace should use a huge page. That's the only way to guarantee
+ * that we get contigious memory, outside of just being lucky or
+ * (currently) having low memory fragmentation.
+ */
+ if (page_array[0] != page_array[ret - 1])
+ goto err;
+ *pages = page_array;
+ *npages = nr_pages;
+ return page_to_virt(page_array[0]);
+}
+
+static void *io_rings_map(struct io_ring_ctx *ctx, unsigned long uaddr,
+ size_t size)
+{
+ return __io_uaddr_map(&ctx->ring_pages, &ctx->n_ring_pages, uaddr,
+ size);
+}
+
+static void *io_sqes_map(struct io_ring_ctx *ctx, unsigned long uaddr,
+ size_t size)
+{
+ return __io_uaddr_map(&ctx->sqe_pages, &ctx->n_sqe_pages, uaddr,
+ size);
+}
+
+static void io_rings_free(struct io_ring_ctx *ctx)
+{
+ if (!(ctx->flags & IORING_SETUP_NO_MMAP)) {
+ io_mem_free(ctx->rings);
+ io_mem_free(ctx->sq_sqes);
+ ctx->rings = NULL;
+ ctx->sq_sqes = NULL;
+ } else {
+ io_pages_free(&ctx->ring_pages, ctx->n_ring_pages);
+ io_pages_free(&ctx->sqe_pages, ctx->n_sqe_pages);
+ }
+}
+
static void *io_mem_alloc(size_t size)
{
gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP;
+ void *ret;
- return (void *) __get_free_pages(gfp, get_order(size));
+ ret = (void *) __get_free_pages(gfp, get_order(size));
+ if (ret)
+ return ret;
+ return ERR_PTR(-ENOMEM);
}
static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries,
@@ -2869,8 +2855,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
mmdrop(ctx->mm_account);
ctx->mm_account = NULL;
}
- io_mem_free(ctx->rings);
- io_mem_free(ctx->sq_sqes);
+ io_rings_free(ctx);
percpu_ref_exit(&ctx->refs);
free_uid(ctx->user);
@@ -3050,7 +3035,18 @@ static __cold void io_ring_exit_work(struct work_struct *work)
/* there is little hope left, don't run it too often */
interval = HZ * 60;
}
- } while (!wait_for_completion_timeout(&ctx->ref_comp, interval));
+ /*
+ * This is really an uninterruptible wait, as it has to be
+ * complete. But it's also run from a kworker, which doesn't
+ * take signals, so it's fine to make it interruptible. This
+ * avoids scenarios where we knowingly can wait much longer
+ * on completions, for example if someone does a SIGSTOP on
+ * a task that needs to finish task_work to make this loop
+ * complete. That's a synthetic situation that should not
+ * cause a stuck task backtrace, and hence a potential panic
+ * on stuck tasks if that is enabled.
+ */
+ } while (!wait_for_completion_interruptible_timeout(&ctx->ref_comp, interval));
init_completion(&exit.completion);
init_task_work(&exit.task_work, io_tctx_exit_cb);
@@ -3074,7 +3070,12 @@ static __cold void io_ring_exit_work(struct work_struct *work)
continue;
mutex_unlock(&ctx->uring_lock);
- wait_for_completion(&exit.completion);
+ /*
+ * See comment above for
+ * wait_for_completion_interruptible_timeout() on why this
+ * wait is marked as interruptible.
+ */
+ wait_for_completion_interruptible(&exit.completion);
mutex_lock(&ctx->uring_lock);
}
mutex_unlock(&ctx->uring_lock);
@@ -3348,6 +3349,10 @@ static void *io_uring_validate_mmap_request(struct file *file,
struct page *page;
void *ptr;
+ /* Don't allow mmap if the ring was setup without it */
+ if (ctx->flags & IORING_SETUP_NO_MMAP)
+ return ERR_PTR(-EINVAL);
+
switch (offset & IORING_OFF_MMAP_MASK) {
case IORING_OFF_SQ_RING:
case IORING_OFF_CQ_RING:
@@ -3673,6 +3678,7 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
{
struct io_rings *rings;
size_t size, sq_array_offset;
+ void *ptr;
/* make sure these are sane, as we already accounted them */
ctx->sq_entries = p->sq_entries;
@@ -3682,9 +3688,13 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
if (size == SIZE_MAX)
return -EOVERFLOW;
- rings = io_mem_alloc(size);
- if (!rings)
- return -ENOMEM;
+ if (!(ctx->flags & IORING_SETUP_NO_MMAP))
+ rings = io_mem_alloc(size);
+ else
+ rings = io_rings_map(ctx, p->cq_off.user_addr, size);
+
+ if (IS_ERR(rings))
+ return PTR_ERR(rings);
ctx->rings = rings;
ctx->sq_array = (u32 *)((char *)rings + sq_array_offset);
@@ -3698,34 +3708,31 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
else
size = array_size(sizeof(struct io_uring_sqe), p->sq_entries);
if (size == SIZE_MAX) {
- io_mem_free(ctx->rings);
- ctx->rings = NULL;
+ io_rings_free(ctx);
return -EOVERFLOW;
}
- ctx->sq_sqes = io_mem_alloc(size);
- if (!ctx->sq_sqes) {
- io_mem_free(ctx->rings);
- ctx->rings = NULL;
- return -ENOMEM;
+ if (!(ctx->flags & IORING_SETUP_NO_MMAP))
+ ptr = io_mem_alloc(size);
+ else
+ ptr = io_sqes_map(ctx, p->sq_off.user_addr, size);
+
+ if (IS_ERR(ptr)) {
+ io_rings_free(ctx);
+ return PTR_ERR(ptr);
}
+ ctx->sq_sqes = ptr;
return 0;
}
-static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file)
+static int io_uring_install_fd(struct file *file)
{
- int ret, fd;
+ int fd;
fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
if (fd < 0)
return fd;
-
- ret = __io_uring_add_tctx_node(ctx);
- if (ret) {
- put_unused_fd(fd);
- return ret;
- }
fd_install(fd, file);
return fd;
}
@@ -3765,6 +3772,7 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
struct io_uring_params __user *params)
{
struct io_ring_ctx *ctx;
+ struct io_uring_task *tctx;
struct file *file;
int ret;
@@ -3776,6 +3784,10 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
entries = IORING_MAX_ENTRIES;
}
+ if ((p->flags & IORING_SETUP_REGISTERED_FD_ONLY)
+ && !(p->flags & IORING_SETUP_NO_MMAP))
+ return -EINVAL;
+
/*
* Use twice as many entries for the CQ ring. It's possible for the
* application to drive a higher depth than the size of the SQ ring,
@@ -3887,7 +3899,6 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
if (ret)
goto err;
- memset(&p->sq_off, 0, sizeof(p->sq_off));
p->sq_off.head = offsetof(struct io_rings, sq.head);
p->sq_off.tail = offsetof(struct io_rings, sq.tail);
p->sq_off.ring_mask = offsetof(struct io_rings, sq_ring_mask);
@@ -3895,8 +3906,10 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
p->sq_off.flags = offsetof(struct io_rings, sq_flags);
p->sq_off.dropped = offsetof(struct io_rings, sq_dropped);
p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings;
+ p->sq_off.resv1 = 0;
+ if (!(ctx->flags & IORING_SETUP_NO_MMAP))
+ p->sq_off.user_addr = 0;
- memset(&p->cq_off, 0, sizeof(p->cq_off));
p->cq_off.head = offsetof(struct io_rings, cq.head);
p->cq_off.tail = offsetof(struct io_rings, cq.tail);
p->cq_off.ring_mask = offsetof(struct io_rings, cq_ring_mask);
@@ -3904,6 +3917,9 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
p->cq_off.overflow = offsetof(struct io_rings, cq_overflow);
p->cq_off.cqes = offsetof(struct io_rings, cqes);
p->cq_off.flags = offsetof(struct io_rings, cq_flags);
+ p->cq_off.resv1 = 0;
+ if (!(ctx->flags & IORING_SETUP_NO_MMAP))
+ p->cq_off.user_addr = 0;
p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
@@ -3928,22 +3944,30 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
goto err;
}
+ ret = __io_uring_add_tctx_node(ctx);
+ if (ret)
+ goto err_fput;
+ tctx = current->io_uring;
+
/*
* Install ring fd as the very last thing, so we don't risk someone
* having closed it before we finish setup
*/
- ret = io_uring_install_fd(ctx, file);
- if (ret < 0) {
- /* fput will clean it up */
- fput(file);
- return ret;
- }
+ if (p->flags & IORING_SETUP_REGISTERED_FD_ONLY)
+ ret = io_ring_add_registered_file(tctx, file, 0, IO_RINGFD_REG_MAX);
+ else
+ ret = io_uring_install_fd(file);
+ if (ret < 0)
+ goto err_fput;
trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
return ret;
err:
io_ring_ctx_wait_and_kill(ctx);
return ret;
+err_fput:
+ fput(file);
+ return ret;
}
/*
@@ -3969,7 +3993,8 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL |
IORING_SETUP_COOP_TASKRUN | IORING_SETUP_TASKRUN_FLAG |
IORING_SETUP_SQE128 | IORING_SETUP_CQE32 |
- IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN))
+ IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN |
+ IORING_SETUP_NO_MMAP | IORING_SETUP_REGISTERED_FD_ONLY))
return -EINVAL;
return io_uring_create(entries, &p, params);
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 259bf798a390..d3606d30cf6f 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -16,9 +16,6 @@
#endif
enum {
- /* don't use deferred task_work */
- IOU_F_TWQ_FORCE_NORMAL = 1,
-
/*
* A hint to not wake right away but delay until there are enough of
* tw's queued to match the number of CQEs the task is waiting for.
@@ -26,7 +23,7 @@ enum {
* Must not be used wirh requests generating more than one CQE.
* It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set.
*/
- IOU_F_TWQ_LAZY_WAKE = 2,
+ IOU_F_TWQ_LAZY_WAKE = 1,
};
enum {
@@ -47,7 +44,7 @@ int io_run_task_work_sig(struct io_ring_ctx *ctx);
void io_req_defer_failed(struct io_kiocb *req, s32 res);
void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags);
bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags);
-bool io_aux_cqe(struct io_ring_ctx *ctx, bool defer, u64 user_data, s32 res, u32 cflags,
+bool io_aux_cqe(const struct io_kiocb *req, bool defer, s32 res, u32 cflags,
bool allow_overflow);
void __io_commit_cqring_flush(struct io_ring_ctx *ctx);
@@ -57,11 +54,6 @@ struct file *io_file_get_normal(struct io_kiocb *req, int fd);
struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
unsigned issue_flags);
-static inline bool io_req_ffs_set(struct io_kiocb *req)
-{
- return req->flags & REQ_F_FIXED_FILE;
-}
-
void __io_req_task_work_add(struct io_kiocb *req, unsigned flags);
bool io_is_uring_fops(struct file *file);
bool io_alloc_async_data(struct io_kiocb *req);
@@ -75,6 +67,9 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd);
int io_uring_alloc_task_context(struct task_struct *task,
struct io_ring_ctx *ctx);
+int io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file,
+ int start, int end);
+
int io_poll_issue(struct io_kiocb *req, struct io_tw_state *ts);
int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr);
int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin);
@@ -115,8 +110,6 @@ static inline void io_req_task_work_add(struct io_kiocb *req)
#define io_for_each_link(pos, head) \
for (pos = (head); pos; pos = pos->link)
-void io_cq_unlock_post(struct io_ring_ctx *ctx);
-
static inline struct io_uring_cqe *io_get_cqe_overflow(struct io_ring_ctx *ctx,
bool overflow)
{
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index 85fd7ce5f05b..cd6dcf634ba3 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -162,14 +162,12 @@ static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_fl
struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
struct io_ring_ctx *ctx = req->ctx;
struct file *file = NULL;
- unsigned long file_ptr;
int idx = msg->src_fd;
io_ring_submit_lock(ctx, issue_flags);
if (likely(idx < ctx->nr_user_files)) {
idx = array_index_nospec(idx, ctx->nr_user_files);
- file_ptr = io_fixed_file_slot(&ctx->file_table, idx)->file_ptr;
- file = (struct file *) (file_ptr & FFS_MASK);
+ file = io_file_from_index(&ctx->file_table, idx);
if (file)
get_file(file);
}
diff --git a/io_uring/net.c b/io_uring/net.c
index 89e839013837..a8e303796f16 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -65,6 +65,7 @@ struct io_sr_msg {
u16 addr_len;
u16 buf_group;
void __user *addr;
+ void __user *msg_control;
/* used only for send zerocopy */
struct io_kiocb *notif;
};
@@ -195,11 +196,15 @@ static int io_sendmsg_copy_hdr(struct io_kiocb *req,
struct io_async_msghdr *iomsg)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ int ret;
iomsg->msg.msg_name = &iomsg->addr;
iomsg->free_iov = iomsg->fast_iov;
- return sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags,
+ ret = sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags,
&iomsg->free_iov);
+ /* save msg_control as sys_sendmsg() overwrites it */
+ sr->msg_control = iomsg->msg.msg_control_user;
+ return ret;
}
int io_send_prep_async(struct io_kiocb *req)
@@ -297,6 +302,7 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
if (req_has_async_data(req)) {
kmsg = req->async_data;
+ kmsg->msg.msg_control_user = sr->msg_control;
} else {
ret = io_sendmsg_copy_hdr(req, &iomsg);
if (ret)
@@ -320,6 +326,8 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
return io_setup_async_msg(req, kmsg, issue_flags);
if (ret > 0 && io_net_retry(sock, flags)) {
+ kmsg->msg.msg_controllen = 0;
+ kmsg->msg.msg_control = NULL;
sr->done_io += ret;
req->flags |= REQ_F_PARTIAL_IO;
return io_setup_async_msg(req, kmsg, issue_flags);
@@ -616,9 +624,15 @@ static inline void io_recv_prep_retry(struct io_kiocb *req)
* again (for multishot).
*/
static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
- unsigned int cflags, bool mshot_finished,
+ struct msghdr *msg, bool mshot_finished,
unsigned issue_flags)
{
+ unsigned int cflags;
+
+ cflags = io_put_kbuf(req, issue_flags);
+ if (msg->msg_inq && msg->msg_inq != -1U)
+ cflags |= IORING_CQE_F_SOCK_NONEMPTY;
+
if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
io_req_set_res(req, *ret, cflags);
*ret = IOU_OK;
@@ -626,10 +640,18 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
}
if (!mshot_finished) {
- if (io_aux_cqe(req->ctx, issue_flags & IO_URING_F_COMPLETE_DEFER,
- req->cqe.user_data, *ret, cflags | IORING_CQE_F_MORE, true)) {
+ if (io_aux_cqe(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
+ *ret, cflags | IORING_CQE_F_MORE, true)) {
io_recv_prep_retry(req);
- return false;
+ /* Known not-empty or unknown state, retry */
+ if (cflags & IORING_CQE_F_SOCK_NONEMPTY ||
+ msg->msg_inq == -1U)
+ return false;
+ if (issue_flags & IO_URING_F_MULTISHOT)
+ *ret = IOU_ISSUE_SKIP_COMPLETE;
+ else
+ *ret = -EAGAIN;
+ return true;
}
/* Otherwise stop multishot but use the current result. */
}
@@ -732,7 +754,6 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
struct io_async_msghdr iomsg, *kmsg;
struct socket *sock;
- unsigned int cflags;
unsigned flags;
int ret, min_ret = 0;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
@@ -781,16 +802,20 @@ retry_multishot:
flags = sr->msg_flags;
if (force_nonblock)
flags |= MSG_DONTWAIT;
- if (flags & MSG_WAITALL)
- min_ret = iov_iter_count(&kmsg->msg.msg_iter);
kmsg->msg.msg_get_inq = 1;
- if (req->flags & REQ_F_APOLL_MULTISHOT)
+ kmsg->msg.msg_inq = -1U;
+ if (req->flags & REQ_F_APOLL_MULTISHOT) {
ret = io_recvmsg_multishot(sock, sr, kmsg, flags,
&mshot_finished);
- else
+ } else {
+ /* disable partial retry for recvmsg with cmsg attached */
+ if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen)
+ min_ret = iov_iter_count(&kmsg->msg.msg_iter);
+
ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg,
kmsg->uaddr, flags);
+ }
if (ret < min_ret) {
if (ret == -EAGAIN && force_nonblock) {
@@ -820,11 +845,7 @@ retry_multishot:
else
io_kbuf_recycle(req, issue_flags);
- cflags = io_put_kbuf(req, issue_flags);
- if (kmsg->msg.msg_inq)
- cflags |= IORING_CQE_F_SOCK_NONEMPTY;
-
- if (!io_recv_finish(req, &ret, cflags, mshot_finished, issue_flags))
+ if (!io_recv_finish(req, &ret, &kmsg->msg, mshot_finished, issue_flags))
goto retry_multishot;
if (mshot_finished) {
@@ -843,7 +864,6 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags)
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
struct msghdr msg;
struct socket *sock;
- unsigned int cflags;
unsigned flags;
int ret, min_ret = 0;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
@@ -860,6 +880,14 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags)
if (unlikely(!sock))
return -ENOTSOCK;
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ msg.msg_control = NULL;
+ msg.msg_get_inq = 1;
+ msg.msg_controllen = 0;
+ msg.msg_iocb = NULL;
+ msg.msg_ubuf = NULL;
+
retry_multishot:
if (io_do_buffer_select(req)) {
void __user *buf;
@@ -874,14 +902,8 @@ retry_multishot:
if (unlikely(ret))
goto out_free;
- msg.msg_name = NULL;
- msg.msg_namelen = 0;
- msg.msg_control = NULL;
- msg.msg_get_inq = 1;
+ msg.msg_inq = -1U;
msg.msg_flags = 0;
- msg.msg_controllen = 0;
- msg.msg_iocb = NULL;
- msg.msg_ubuf = NULL;
flags = sr->msg_flags;
if (force_nonblock)
@@ -921,11 +943,7 @@ out_free:
else
io_kbuf_recycle(req, issue_flags);
- cflags = io_put_kbuf(req, issue_flags);
- if (msg.msg_inq)
- cflags |= IORING_CQE_F_SOCK_NONEMPTY;
-
- if (!io_recv_finish(req, &ret, cflags, ret <= 0, issue_flags))
+ if (!io_recv_finish(req, &ret, &msg, ret <= 0, issue_flags))
goto retry_multishot;
return ret;
@@ -1297,7 +1315,6 @@ int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
int io_accept(struct io_kiocb *req, unsigned int issue_flags)
{
- struct io_ring_ctx *ctx = req->ctx;
struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
@@ -1347,8 +1364,8 @@ retry:
if (ret < 0)
return ret;
- if (io_aux_cqe(ctx, issue_flags & IO_URING_F_COMPLETE_DEFER,
- req->cqe.user_data, ret, IORING_CQE_F_MORE, true))
+ if (io_aux_cqe(req, issue_flags & IO_URING_F_COMPLETE_DEFER, ret,
+ IORING_CQE_F_MORE, true))
goto retry;
return -ECANCELED;
diff --git a/io_uring/poll.c b/io_uring/poll.c
index c90e47dc1e29..d4597efe14a7 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -300,8 +300,8 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
__poll_t mask = mangle_poll(req->cqe.res &
req->apoll_events);
- if (!io_aux_cqe(req->ctx, ts->locked, req->cqe.user_data,
- mask, IORING_CQE_F_MORE, false)) {
+ if (!io_aux_cqe(req, ts->locked, mask,
+ IORING_CQE_F_MORE, false)) {
io_req_set_res(req, mask, 0);
return IOU_POLL_REMOVE_POLL_USE_RES;
}
@@ -326,7 +326,7 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
return IOU_POLL_NO_ACTION;
}
-static void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts)
+void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts)
{
int ret;
@@ -977,8 +977,9 @@ int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags)
struct io_hash_bucket *bucket;
struct io_kiocb *preq;
int ret2, ret = 0;
- struct io_tw_state ts = {};
+ struct io_tw_state ts = { .locked = true };
+ io_ring_submit_lock(ctx, issue_flags);
preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table, &bucket);
ret2 = io_poll_disarm(preq);
if (bucket)
@@ -990,12 +991,10 @@ int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags)
goto out;
}
- io_ring_submit_lock(ctx, issue_flags);
preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table_locked, &bucket);
ret2 = io_poll_disarm(preq);
if (bucket)
spin_unlock(&bucket->lock);
- io_ring_submit_unlock(ctx, issue_flags);
if (ret2) {
ret = ret2;
goto out;
@@ -1019,7 +1018,7 @@ found:
if (poll_update->update_user_data)
preq->cqe.user_data = poll_update->new_user_data;
- ret2 = io_poll_add(preq, issue_flags);
+ ret2 = io_poll_add(preq, issue_flags & ~IO_URING_F_UNLOCKED);
/* successfully updated, don't complete poll request */
if (!ret2 || ret2 == -EIOCBQUEUED)
goto out;
@@ -1027,9 +1026,9 @@ found:
req_set_fail(preq);
io_req_set_res(preq, -ECANCELED, 0);
- ts.locked = !(issue_flags & IO_URING_F_UNLOCKED);
io_req_task_complete(preq, &ts);
out:
+ io_ring_submit_unlock(ctx, issue_flags);
if (ret < 0) {
req_set_fail(req);
return ret;
diff --git a/io_uring/poll.h b/io_uring/poll.h
index b2393b403a2c..ff4d5d753387 100644
--- a/io_uring/poll.h
+++ b/io_uring/poll.h
@@ -38,3 +38,5 @@ bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
bool cancel_all);
void io_apoll_cache_free(struct io_cache_entry *entry);
+
+void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts);
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index d46f72a5ef73..a2dce7ef3a78 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -354,7 +354,6 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
__s32 __user *fds = u64_to_user_ptr(up->data);
struct io_rsrc_data *data = ctx->file_data;
struct io_fixed_file *file_slot;
- struct file *file;
int fd, i, err = 0;
unsigned int done;
@@ -382,15 +381,16 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
file_slot = io_fixed_file_slot(&ctx->file_table, i);
if (file_slot->file_ptr) {
- file = (struct file *)(file_slot->file_ptr & FFS_MASK);
- err = io_queue_rsrc_removal(data, i, file);
+ err = io_queue_rsrc_removal(data, i,
+ io_slot_file(file_slot));
if (err)
break;
file_slot->file_ptr = 0;
io_file_bitmap_clear(&ctx->file_table, i);
}
if (fd != -1) {
- file = fget(fd);
+ struct file *file = fget(fd);
+
if (!file) {
err = -EBADF;
break;
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 3f118ed46e4f..1bce2208b65c 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -283,7 +283,7 @@ static inline int io_fixup_rw_res(struct io_kiocb *req, long res)
return res;
}
-static void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts)
+void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts)
{
io_req_io_end(req);
@@ -666,8 +666,8 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode)
if (unlikely(!file || !(file->f_mode & mode)))
return -EBADF;
- if (!io_req_ffs_set(req))
- req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT;
+ if (!(req->flags & REQ_F_FIXED_FILE))
+ req->flags |= io_file_get_flags(file);
kiocb->ki_flags = file->f_iocb_flags;
ret = kiocb_set_rw_flags(kiocb, rw->flags);
diff --git a/io_uring/rw.h b/io_uring/rw.h
index 3b733f4b610a..4b89f9659366 100644
--- a/io_uring/rw.h
+++ b/io_uring/rw.h
@@ -22,3 +22,4 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags);
int io_writev_prep_async(struct io_kiocb *req);
void io_readv_writev_cleanup(struct io_kiocb *req);
void io_rw_fail(struct io_kiocb *req);
+void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts);
diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
index 9db4bc1f521a..5e329e3cd470 100644
--- a/io_uring/sqpoll.c
+++ b/io_uring/sqpoll.c
@@ -255,9 +255,13 @@ static int io_sq_thread(void *data)
sqt_spin = true;
if (sqt_spin || !time_after(jiffies, timeout)) {
- cond_resched();
if (sqt_spin)
timeout = jiffies + sqd->sq_thread_idle;
+ if (unlikely(need_resched())) {
+ mutex_unlock(&sqd->lock);
+ cond_resched();
+ mutex_lock(&sqd->lock);
+ }
continue;
}
diff --git a/io_uring/tctx.c b/io_uring/tctx.c
index 3a8d1dd97e1b..c043fe93a3f2 100644
--- a/io_uring/tctx.c
+++ b/io_uring/tctx.c
@@ -208,31 +208,40 @@ void io_uring_unreg_ringfd(void)
}
}
-static int io_ring_add_registered_fd(struct io_uring_task *tctx, int fd,
+int io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file,
int start, int end)
{
- struct file *file;
int offset;
-
for (offset = start; offset < end; offset++) {
offset = array_index_nospec(offset, IO_RINGFD_REG_MAX);
if (tctx->registered_rings[offset])
continue;
- file = fget(fd);
- if (!file) {
- return -EBADF;
- } else if (!io_is_uring_fops(file)) {
- fput(file);
- return -EOPNOTSUPP;
- }
tctx->registered_rings[offset] = file;
return offset;
}
-
return -EBUSY;
}
+static int io_ring_add_registered_fd(struct io_uring_task *tctx, int fd,
+ int start, int end)
+{
+ struct file *file;
+ int offset;
+
+ file = fget(fd);
+ if (!file) {
+ return -EBADF;
+ } else if (!io_is_uring_fops(file)) {
+ fput(file);
+ return -EOPNOTSUPP;
+ }
+ offset = io_ring_add_registered_file(tctx, file, start, end);
+ if (offset < 0)
+ fput(file);
+ return offset;
+}
+
/*
* Register a ring fd to avoid fdget/fdput for each io_uring_enter()
* invocation. User passes in an array of struct io_uring_rsrc_update
diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index fc950177e2e1..fb0547b35dcd 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -73,8 +73,8 @@ static void io_timeout_complete(struct io_kiocb *req, struct io_tw_state *ts)
if (!io_timeout_finish(timeout, data)) {
bool filled;
- filled = io_aux_cqe(ctx, ts->locked, req->cqe.user_data, -ETIME,
- IORING_CQE_F_MORE, false);
+ filled = io_aux_cqe(req, ts->locked, -ETIME, IORING_CQE_F_MORE,
+ false);
if (filled) {
/* re-arm timer */
spin_lock_irq(&ctx->timeout_lock);
@@ -594,7 +594,7 @@ int io_timeout(struct io_kiocb *req, unsigned int issue_flags)
goto add;
}
- tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
+ tail = data_race(ctx->cached_cq_tail) - atomic_read(&ctx->cq_timeouts);
timeout->target_seq = tail + off;
/* Update the last seq here in case io_flush_timeouts() hasn't.
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 5e32db48696d..476c7877ce58 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -20,16 +20,24 @@ static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
ioucmd->task_work_cb(ioucmd, issue_flags);
}
-void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
- void (*task_work_cb)(struct io_uring_cmd *, unsigned))
+void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
+ void (*task_work_cb)(struct io_uring_cmd *, unsigned),
+ unsigned flags)
{
struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
ioucmd->task_work_cb = task_work_cb;
req->io_task_work.func = io_uring_cmd_work;
- io_req_task_work_add(req);
+ __io_req_task_work_add(req, flags);
+}
+EXPORT_SYMBOL_GPL(__io_uring_cmd_do_in_task);
+
+void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
+ void (*task_work_cb)(struct io_uring_cmd *, unsigned))
+{
+ __io_uring_cmd_do_in_task(ioucmd, task_work_cb, IOU_F_TWQ_LAZY_WAKE);
}
-EXPORT_SYMBOL_GPL(io_uring_cmd_complete_in_task);
+EXPORT_SYMBOL_GPL(io_uring_cmd_do_in_task_lazy);
static inline void io_req_set_cqe32_extra(struct io_kiocb *req,
u64 extra1, u64 extra2)
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 6b682b8e4b50..72b32b7cd9cd 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -744,13 +744,12 @@ static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
return offset < btf->hdr.str_len;
}
-static bool __btf_name_char_ok(char c, bool first, bool dot_ok)
+static bool __btf_name_char_ok(char c, bool first)
{
if ((first ? !isalpha(c) :
!isalnum(c)) &&
c != '_' &&
- ((c == '.' && !dot_ok) ||
- c != '.'))
+ c != '.')
return false;
return true;
}
@@ -767,20 +766,20 @@ static const char *btf_str_by_offset(const struct btf *btf, u32 offset)
return NULL;
}
-static bool __btf_name_valid(const struct btf *btf, u32 offset, bool dot_ok)
+static bool __btf_name_valid(const struct btf *btf, u32 offset)
{
/* offset must be valid */
const char *src = btf_str_by_offset(btf, offset);
const char *src_limit;
- if (!__btf_name_char_ok(*src, true, dot_ok))
+ if (!__btf_name_char_ok(*src, true))
return false;
/* set a limit on identifier length */
src_limit = src + KSYM_NAME_LEN;
src++;
while (*src && src < src_limit) {
- if (!__btf_name_char_ok(*src, false, dot_ok))
+ if (!__btf_name_char_ok(*src, false))
return false;
src++;
}
@@ -788,17 +787,14 @@ static bool __btf_name_valid(const struct btf *btf, u32 offset, bool dot_ok)
return !*src;
}
-/* Only C-style identifier is permitted. This can be relaxed if
- * necessary.
- */
static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
{
- return __btf_name_valid(btf, offset, false);
+ return __btf_name_valid(btf, offset);
}
static bool btf_name_valid_section(const struct btf *btf, u32 offset)
{
- return __btf_name_valid(btf, offset, true);
+ return __btf_name_valid(btf, offset);
}
static const char *__btf_name_by_offset(const struct btf *btf, u32 offset)
@@ -4422,7 +4418,7 @@ static s32 btf_var_check_meta(struct btf_verifier_env *env,
}
if (!t->name_off ||
- !__btf_name_valid(env->btf, t->name_off, true)) {
+ !__btf_name_valid(env->btf, t->name_off)) {
btf_verifier_log_type(env, t, "Invalid name");
return -EINVAL;
}
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 00c253b84bf5..9901efee4339 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1215,7 +1215,7 @@ static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value
ret = htab_lock_bucket(htab, b, hash, &flags);
if (ret)
- return ret;
+ goto err_lock_bucket;
l_old = lookup_elem_raw(head, hash, key, key_size);
@@ -1236,6 +1236,7 @@ static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value
err:
htab_unlock_bucket(htab, b, hash, flags);
+err_lock_bucket:
if (ret)
htab_lru_push_free(htab, l_new);
else if (l_old)
@@ -1338,7 +1339,7 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
ret = htab_lock_bucket(htab, b, hash, &flags);
if (ret)
- return ret;
+ goto err_lock_bucket;
l_old = lookup_elem_raw(head, hash, key, key_size);
@@ -1361,6 +1362,7 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
ret = 0;
err:
htab_unlock_bucket(htab, b, hash, flags);
+err_lock_bucket:
if (l_new)
bpf_lru_push_free(&htab->lru, &l_new->lru_node);
return ret;
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 2c5c64c2a53b..cd5eafaba97e 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -69,9 +69,13 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
/* Misc members not needed in bpf_map_meta_equal() check. */
inner_map_meta->ops = inner_map->ops;
if (inner_map->ops == &array_map_ops) {
+ struct bpf_array *inner_array_meta =
+ container_of(inner_map_meta, struct bpf_array, map);
+ struct bpf_array *inner_array = container_of(inner_map, struct bpf_array, map);
+
+ inner_array_meta->index_mask = inner_array->index_mask;
+ inner_array_meta->elem_size = inner_array->elem_size;
inner_map_meta->bypass_spec_v1 = inner_map->bypass_spec_v1;
- container_of(inner_map_meta, struct bpf_array, map)->index_mask =
- container_of(inner_map, struct bpf_array, map)->index_mask;
}
fdput(f);
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index d9c9f45e3529..8a26cd8814c1 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -859,4 +859,4 @@ static int __init bpf_offload_init(void)
return rhashtable_init(&offdevs, &offdevs_params);
}
-late_initcall(bpf_offload_init);
+core_initcall(bpf_offload_init);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 14f39c1e573e..f1c8733f76b8 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2433,6 +2433,10 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
default:
return -EINVAL;
}
+ case BPF_PROG_TYPE_NETFILTER:
+ if (expected_attach_type == BPF_NETFILTER)
+ return 0;
+ return -EINVAL;
case BPF_PROG_TYPE_SYSCALL:
case BPF_PROG_TYPE_EXT:
if (expected_attach_type)
@@ -3436,6 +3440,11 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
return prog->enforce_expected_attach_type &&
prog->expected_attach_type != attach_type ?
-EINVAL : 0;
+ case BPF_PROG_TYPE_KPROBE:
+ if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI &&
+ attach_type != BPF_TRACE_KPROBE_MULTI)
+ return -EINVAL;
+ return 0;
default:
return 0;
}
@@ -4590,7 +4599,12 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
switch (prog->type) {
case BPF_PROG_TYPE_EXT:
+ break;
case BPF_PROG_TYPE_NETFILTER:
+ if (attr->link_create.attach_type != BPF_NETFILTER) {
+ ret = -EINVAL;
+ goto out;
+ }
break;
case BPF_PROG_TYPE_PERF_EVENT:
case BPF_PROG_TYPE_TRACEPOINT:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index fbcf5a4e2fcd..cf5f230360f5 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3868,6 +3868,9 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
return err;
}
save_register_state(state, spi, reg, size);
+ /* Break the relation on a narrowing spill. */
+ if (fls64(reg->umax_value) > BITS_PER_BYTE * size)
+ state->stack[spi].spilled_ptr.id = 0;
} else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
insn->imm != 0 && env->bpf_capable) {
struct bpf_reg_state fake_reg = {};
@@ -17033,7 +17036,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
insn->dst_reg,
shift);
- insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
+ insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
(1ULL << size * 8) - 1);
}
}
@@ -17214,9 +17217,10 @@ static int jit_subprogs(struct bpf_verifier_env *env)
}
/* finally lock prog and jit images for all functions and
- * populate kallsysm
+ * populate kallsysm. Begin at the first subprogram, since
+ * bpf_prog_load will add the kallsyms for the main program.
*/
- for (i = 0; i < env->subprog_cnt; i++) {
+ for (i = 1; i < env->subprog_cnt; i++) {
bpf_prog_lock_ro(func[i]);
bpf_prog_kallsyms_add(func[i]);
}
@@ -17242,6 +17246,8 @@ static int jit_subprogs(struct bpf_verifier_env *env)
prog->jited = 1;
prog->bpf_func = func[0]->bpf_func;
prog->jited_len = func[0]->jited_len;
+ prog->aux->extable = func[0]->aux->extable;
+ prog->aux->num_exentries = func[0]->aux->num_exentries;
prog->aux->func = func;
prog->aux->func_cnt = env->subprog_cnt;
bpf_prog_jit_attempt_done(prog);
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index aeef06c465ef..5407241dbb45 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -108,7 +108,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
cgroup_lock();
- percpu_down_write(&cgroup_threadgroup_rwsem);
+ cgroup_attach_lock(true);
/* all tasks in @from are being moved, all csets are source */
spin_lock_irq(&css_set_lock);
@@ -144,7 +144,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
} while (task && !ret);
out_err:
cgroup_migrate_finish(&mgctx);
- percpu_up_write(&cgroup_threadgroup_rwsem);
+ cgroup_attach_unlock(true);
cgroup_unlock();
return ret;
}
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 625d7483951c..4d42f0cbc11e 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1798,7 +1798,7 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
{
struct cgroup *dcgrp = &dst_root->cgrp;
struct cgroup_subsys *ss;
- int ssid, i, ret;
+ int ssid, ret;
u16 dfl_disable_ss_mask = 0;
lockdep_assert_held(&cgroup_mutex);
@@ -1842,7 +1842,8 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
struct cgroup_root *src_root = ss->root;
struct cgroup *scgrp = &src_root->cgrp;
struct cgroup_subsys_state *css = cgroup_css(scgrp, ss);
- struct css_set *cset;
+ struct css_set *cset, *cset_pos;
+ struct css_task_iter *it;
WARN_ON(!css || cgroup_css(dcgrp, ss));
@@ -1860,9 +1861,22 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
css->cgroup = dcgrp;
spin_lock_irq(&css_set_lock);
- hash_for_each(css_set_table, i, cset, hlist)
+ WARN_ON(!list_empty(&dcgrp->e_csets[ss->id]));
+ list_for_each_entry_safe(cset, cset_pos, &scgrp->e_csets[ss->id],
+ e_cset_node[ss->id]) {
list_move_tail(&cset->e_cset_node[ss->id],
&dcgrp->e_csets[ss->id]);
+ /*
+ * all css_sets of scgrp together in same order to dcgrp,
+ * patch in-flight iterators to preserve correct iteration.
+ * since the iterator is always advanced right away and
+ * finished when it->cset_pos meets it->cset_head, so only
+ * update it->cset_head is enough here.
+ */
+ list_for_each_entry(it, &cset->task_iters, iters_node)
+ if (it->cset_head == &scgrp->e_csets[ss->id])
+ it->cset_head = &dcgrp->e_csets[ss->id];
+ }
spin_unlock_irq(&css_set_lock);
if (ss->css_rstat_flush) {
@@ -6486,19 +6500,18 @@ err:
static void cgroup_css_set_put_fork(struct kernel_clone_args *kargs)
__releases(&cgroup_threadgroup_rwsem) __releases(&cgroup_mutex)
{
+ struct cgroup *cgrp = kargs->cgrp;
+ struct css_set *cset = kargs->cset;
+
cgroup_threadgroup_change_end(current);
- if (kargs->flags & CLONE_INTO_CGROUP) {
- struct cgroup *cgrp = kargs->cgrp;
- struct css_set *cset = kargs->cset;
+ if (cset) {
+ put_css_set(cset);
+ kargs->cset = NULL;
+ }
+ if (kargs->flags & CLONE_INTO_CGROUP) {
cgroup_unlock();
-
- if (cset) {
- put_css_set(cset);
- kargs->cset = NULL;
- }
-
if (cgrp) {
cgroup_put(cgrp);
kargs->cgrp = NULL;
diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c
index 936473203a6b..122dacb3a443 100644
--- a/kernel/cgroup/legacy_freezer.c
+++ b/kernel/cgroup/legacy_freezer.c
@@ -108,16 +108,18 @@ static int freezer_css_online(struct cgroup_subsys_state *css)
struct freezer *freezer = css_freezer(css);
struct freezer *parent = parent_freezer(freezer);
+ cpus_read_lock();
mutex_lock(&freezer_mutex);
freezer->state |= CGROUP_FREEZER_ONLINE;
if (parent && (parent->state & CGROUP_FREEZING)) {
freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN;
- static_branch_inc(&freezer_active);
+ static_branch_inc_cpuslocked(&freezer_active);
}
mutex_unlock(&freezer_mutex);
+ cpus_read_unlock();
return 0;
}
@@ -132,14 +134,16 @@ static void freezer_css_offline(struct cgroup_subsys_state *css)
{
struct freezer *freezer = css_freezer(css);
+ cpus_read_lock();
mutex_lock(&freezer_mutex);
if (freezer->state & CGROUP_FREEZING)
- static_branch_dec(&freezer_active);
+ static_branch_dec_cpuslocked(&freezer_active);
freezer->state = 0;
mutex_unlock(&freezer_mutex);
+ cpus_read_unlock();
}
static void freezer_css_free(struct cgroup_subsys_state *css)
diff --git a/kernel/exit.c b/kernel/exit.c
index 34b90e2e7cf7..edb50b4c9972 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -411,7 +411,10 @@ static void coredump_task_exit(struct task_struct *tsk)
tsk->flags |= PF_POSTCOREDUMP;
core_state = tsk->signal->core_state;
spin_unlock_irq(&tsk->sighand->siglock);
- if (core_state) {
+
+ /* The vhost_worker does not particpate in coredumps */
+ if (core_state &&
+ ((tsk->flags & (PF_IO_WORKER | PF_USER_WORKER)) != PF_USER_WORKER)) {
struct core_thread self;
self.task = current;
diff --git a/kernel/fork.c b/kernel/fork.c
index ed4e01daccaa..41c964104b58 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -627,6 +627,7 @@ void free_task(struct task_struct *tsk)
arch_release_task_struct(tsk);
if (tsk->flags & PF_KTHREAD)
free_kthread_struct(tsk);
+ bpf_task_storage_free(tsk);
free_task_struct(tsk);
}
EXPORT_SYMBOL(free_task);
@@ -979,7 +980,6 @@ void __put_task_struct(struct task_struct *tsk)
cgroup_free(tsk);
task_numa_free(tsk, true);
security_task_free(tsk);
- bpf_task_storage_free(tsk);
exit_creds(tsk);
delayacct_tsk_free(tsk);
put_signal_struct(tsk->signal);
@@ -2336,16 +2336,16 @@ __latent_entropy struct task_struct *copy_process(
p->flags &= ~PF_KTHREAD;
if (args->kthread)
p->flags |= PF_KTHREAD;
- if (args->user_worker)
- p->flags |= PF_USER_WORKER;
- if (args->io_thread) {
+ if (args->user_worker) {
/*
- * Mark us an IO worker, and block any signal that isn't
+ * Mark us a user worker, and block any signal that isn't
* fatal or STOP
*/
- p->flags |= PF_IO_WORKER;
+ p->flags |= PF_USER_WORKER;
siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP));
}
+ if (args->io_thread)
+ p->flags |= PF_IO_WORKER;
if (args->name)
strscpy_pad(p->comm, args->name, sizeof(p->comm));
@@ -2517,9 +2517,6 @@ __latent_entropy struct task_struct *copy_process(
if (retval)
goto bad_fork_cleanup_io;
- if (args->ignore_signals)
- ignore_signals(p);
-
stackleak_task_init(p);
if (pid != &init_struct_pid) {
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 7a97bcb086bf..b4c31a5c1147 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -542,7 +542,7 @@ fail:
return ret;
}
-#ifdef CONFIG_PCI_MSI_ARCH_FALLBACKS
+#if defined(CONFIG_PCI_MSI_ARCH_FALLBACKS) || defined(CONFIG_PCI_XEN)
/**
* msi_device_populate_sysfs - Populate msi_irqs sysfs entries for a device
* @dev: The device (PCI, platform etc) which will get sysfs entries
@@ -574,7 +574,7 @@ void msi_device_destroy_sysfs(struct device *dev)
msi_for_each_desc(desc, dev, MSI_DESC_ALL)
msi_sysfs_remove_desc(dev, desc);
}
-#endif /* CONFIG_PCI_MSI_ARCH_FALLBACK */
+#endif /* CONFIG_PCI_MSI_ARCH_FALLBACK || CONFIG_PCI_XEN */
#else /* CONFIG_SYSFS */
static inline int msi_sysfs_create_group(struct device *dev) { return 0; }
static inline int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc) { return 0; }
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index f989f5f1933b..69ee4a29136f 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -901,10 +901,22 @@ static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi,
}
offset = ALIGN(offset, align);
+
+ /*
+ * Check if the segment contains the entry point, if so,
+ * calculate the value of image->start based on it.
+ * If the compiler has produced more than one .text section
+ * (Eg: .text.hot), they are generally after the main .text
+ * section, and they shall not be used to calculate
+ * image->start. So do not re-calculate image->start if it
+ * is not set to the initial value, and warn the user so they
+ * have a chance to fix their purgatory's linker script.
+ */
if (sechdrs[i].sh_flags & SHF_EXECINSTR &&
pi->ehdr->e_entry >= sechdrs[i].sh_addr &&
pi->ehdr->e_entry < (sechdrs[i].sh_addr
- + sechdrs[i].sh_size)) {
+ + sechdrs[i].sh_size) &&
+ !WARN_ON(kbuf->image->start != pi->ehdr->e_entry)) {
kbuf->image->start -= sechdrs[i].sh_addr;
kbuf->image->start += kbuf->mem + offset;
}
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index dcd1d5bfc1e0..4dfd2f3e09b2 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2263,6 +2263,9 @@ static inline bool usage_match(struct lock_list *entry, void *mask)
static inline bool usage_skip(struct lock_list *entry, void *mask)
{
+ if (entry->class->lock_type == LD_LOCK_NORMAL)
+ return false;
+
/*
* Skip local_lock() for irq inversion detection.
*
@@ -2289,14 +2292,16 @@ static inline bool usage_skip(struct lock_list *entry, void *mask)
* As a result, we will skip local_lock(), when we search for irq
* inversion bugs.
*/
- if (entry->class->lock_type == LD_LOCK_PERCPU) {
- if (DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner < LD_WAIT_CONFIG))
- return false;
+ if (entry->class->lock_type == LD_LOCK_PERCPU &&
+ DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner < LD_WAIT_CONFIG))
+ return false;
- return true;
- }
+ /*
+ * Skip WAIT_OVERRIDE for irq inversion detection -- it's not actually
+ * a lock and only used to override the wait_type.
+ */
- return false;
+ return true;
}
/*
@@ -4768,7 +4773,8 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next)
for (; depth < curr->lockdep_depth; depth++) {
struct held_lock *prev = curr->held_locks + depth;
- u8 prev_inner = hlock_class(prev)->wait_type_inner;
+ struct lock_class *class = hlock_class(prev);
+ u8 prev_inner = class->wait_type_inner;
if (prev_inner) {
/*
@@ -4778,6 +4784,14 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next)
* Also due to trylocks.
*/
curr_inner = min(curr_inner, prev_inner);
+
+ /*
+ * Allow override for annotations -- this is typically
+ * only valid/needed for code that only exists when
+ * CONFIG_PREEMPT_RT=n.
+ */
+ if (unlikely(class->lock_type == LD_LOCK_WAIT_OVERRIDE))
+ curr_inner = prev_inner;
}
}
diff --git a/kernel/module/decompress.c b/kernel/module/decompress.c
index e97232b125eb..8a5d6d63b06c 100644
--- a/kernel/module/decompress.c
+++ b/kernel/module/decompress.c
@@ -257,7 +257,7 @@ static ssize_t module_zstd_decompress(struct load_info *info,
do {
struct page *page = module_get_next_page(info);
- if (!IS_ERR(page)) {
+ if (IS_ERR(page)) {
retval = PTR_ERR(page);
goto out;
}
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 044aa2c9e3cb..4e2cf784cf8c 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -1521,14 +1521,14 @@ static void __layout_sections(struct module *mod, struct load_info *info, bool i
MOD_RODATA,
MOD_RO_AFTER_INIT,
MOD_DATA,
- MOD_INVALID, /* This is needed to match the masks array */
+ MOD_DATA,
};
static const int init_m_to_mem_type[] = {
MOD_INIT_TEXT,
MOD_INIT_RODATA,
MOD_INVALID,
MOD_INIT_DATA,
- MOD_INVALID, /* This is needed to match the masks array */
+ MOD_INIT_DATA,
};
for (m = 0; m < ARRAY_SIZE(masks); ++m) {
diff --git a/kernel/module/stats.c b/kernel/module/stats.c
index ad7b6ada29f2..6ab2c94d6bc3 100644
--- a/kernel/module/stats.c
+++ b/kernel/module/stats.c
@@ -276,6 +276,7 @@ static ssize_t read_file_mod_stats(struct file *file, char __user *user_buf,
struct mod_fail_load *mod_fail;
unsigned int len, size, count_failed = 0;
char *buf;
+ int ret;
u32 live_mod_count, fkreads, fdecompress, fbecoming, floads;
unsigned long total_size, text_size, ikread_bytes, ibecoming_bytes,
idecompress_bytes, imod_bytes, total_virtual_lost;
@@ -390,8 +391,9 @@ static ssize_t read_file_mod_stats(struct file *file, char __user *user_buf,
out_unlock:
mutex_unlock(&module_mutex);
out:
+ ret = simple_read_from_buffer(user_buf, count, ppos, buf, len);
kfree(buf);
- return simple_read_from_buffer(user_buf, count, ppos, buf, len);
+ return ret;
}
#undef MAX_PREAMBLE
#undef MAX_FAILED_MOD_PRINT
diff --git a/kernel/signal.c b/kernel/signal.c
index 8f6330f0e9ca..2547fa73bde5 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1368,7 +1368,9 @@ int zap_other_threads(struct task_struct *p)
while_each_thread(p, t) {
task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
- count++;
+ /* Don't require de_thread to wait for the vhost_worker */
+ if ((t->flags & (PF_IO_WORKER | PF_USER_WORKER)) != PF_USER_WORKER)
+ count++;
/* Don't bother with already dead threads */
if (t->exit_state)
@@ -2861,11 +2863,11 @@ relock:
}
/*
- * PF_IO_WORKER threads will catch and exit on fatal signals
+ * PF_USER_WORKER threads will catch and exit on fatal signals
* themselves. They have cleanup that must be performed, so
* we cannot call do_exit() on their behalf.
*/
- if (current->flags & PF_IO_WORKER)
+ if (current->flags & PF_USER_WORKER)
goto out;
/*
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 65b8658da829..e9138cd7a0f5 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -218,19 +218,8 @@ static void tick_setup_device(struct tick_device *td,
* this cpu:
*/
if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
- ktime_t next_p;
- u32 rem;
-
tick_do_timer_cpu = cpu;
-
- next_p = ktime_get();
- div_u64_rem(next_p, TICK_NSEC, &rem);
- if (rem) {
- next_p -= rem;
- next_p += TICK_NSEC;
- }
-
- tick_next_period = next_p;
+ tick_next_period = ktime_get();
#ifdef CONFIG_NO_HZ_FULL
/*
* The boot CPU may be nohz_full, in which case set
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 52254679ec48..42c0be3080bd 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -161,8 +161,19 @@ static ktime_t tick_init_jiffy_update(void)
raw_spin_lock(&jiffies_lock);
write_seqcount_begin(&jiffies_seq);
/* Did we start the jiffies update yet ? */
- if (last_jiffies_update == 0)
+ if (last_jiffies_update == 0) {
+ u32 rem;
+
+ /*
+ * Ensure that the tick is aligned to a multiple of
+ * TICK_NSEC.
+ */
+ div_u64_rem(tick_next_period, TICK_NSEC, &rem);
+ if (rem)
+ tick_next_period += TICK_NSEC - rem;
+
last_jiffies_update = tick_next_period;
+ }
period = last_jiffies_update;
write_seqcount_end(&jiffies_seq);
raw_spin_unlock(&jiffies_lock);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 9a050e36dc6c..1f4b07da327a 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -900,13 +900,23 @@ static const struct bpf_func_proto bpf_send_signal_thread_proto = {
BPF_CALL_3(bpf_d_path, struct path *, path, char *, buf, u32, sz)
{
+ struct path copy;
long len;
char *p;
if (!sz)
return 0;
- p = d_path(path, buf, sz);
+ /*
+ * The path pointer is verified as trusted and safe to use,
+ * but let's double check it's valid anyway to workaround
+ * potentially broken verifier.
+ */
+ len = copy_from_kernel_nofault(&copy, path, sizeof(*path));
+ if (len < 0)
+ return len;
+
+ p = d_path(&copy, buf, sz);
if (IS_ERR(p)) {
len = PTR_ERR(p);
} else {
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 9abb3905bc8e..18d36842faf5 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -17,36 +17,30 @@
struct fprobe_rethook_node {
struct rethook_node node;
unsigned long entry_ip;
+ unsigned long entry_parent_ip;
char data[];
};
-static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ops, struct ftrace_regs *fregs)
+static inline void __fprobe_handler(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *ops, struct ftrace_regs *fregs)
{
struct fprobe_rethook_node *fpr;
struct rethook_node *rh = NULL;
struct fprobe *fp;
void *entry_data = NULL;
- int bit, ret;
+ int ret = 0;
fp = container_of(ops, struct fprobe, ops);
- if (fprobe_disabled(fp))
- return;
-
- bit = ftrace_test_recursion_trylock(ip, parent_ip);
- if (bit < 0) {
- fp->nmissed++;
- return;
- }
if (fp->exit_handler) {
rh = rethook_try_get(fp->rethook);
if (!rh) {
fp->nmissed++;
- goto out;
+ return;
}
fpr = container_of(rh, struct fprobe_rethook_node, node);
fpr->entry_ip = ip;
+ fpr->entry_parent_ip = parent_ip;
if (fp->entry_data_size)
entry_data = fpr->data;
}
@@ -61,23 +55,60 @@ static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
else
rethook_hook(rh, ftrace_get_regs(fregs), true);
}
-out:
+}
+
+static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *ops, struct ftrace_regs *fregs)
+{
+ struct fprobe *fp;
+ int bit;
+
+ fp = container_of(ops, struct fprobe, ops);
+ if (fprobe_disabled(fp))
+ return;
+
+ /* recursion detection has to go before any traceable function and
+ * all functions before this point should be marked as notrace
+ */
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
+ if (bit < 0) {
+ fp->nmissed++;
+ return;
+ }
+ __fprobe_handler(ip, parent_ip, ops, fregs);
ftrace_test_recursion_unlock(bit);
+
}
NOKPROBE_SYMBOL(fprobe_handler);
static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct ftrace_regs *fregs)
{
- struct fprobe *fp = container_of(ops, struct fprobe, ops);
+ struct fprobe *fp;
+ int bit;
+
+ fp = container_of(ops, struct fprobe, ops);
+ if (fprobe_disabled(fp))
+ return;
+
+ /* recursion detection has to go before any traceable function and
+ * all functions called before this point should be marked as notrace
+ */
+ bit = ftrace_test_recursion_trylock(ip, parent_ip);
+ if (bit < 0) {
+ fp->nmissed++;
+ return;
+ }
if (unlikely(kprobe_running())) {
fp->nmissed++;
return;
}
+
kprobe_busy_begin();
- fprobe_handler(ip, parent_ip, ops, fregs);
+ __fprobe_handler(ip, parent_ip, ops, fregs);
kprobe_busy_end();
+ ftrace_test_recursion_unlock(bit);
}
static void fprobe_exit_handler(struct rethook_node *rh, void *data,
@@ -85,14 +116,26 @@ static void fprobe_exit_handler(struct rethook_node *rh, void *data,
{
struct fprobe *fp = (struct fprobe *)data;
struct fprobe_rethook_node *fpr;
+ int bit;
if (!fp || fprobe_disabled(fp))
return;
fpr = container_of(rh, struct fprobe_rethook_node, node);
+ /*
+ * we need to assure no calls to traceable functions in-between the
+ * end of fprobe_handler and the beginning of fprobe_exit_handler.
+ */
+ bit = ftrace_test_recursion_trylock(fpr->entry_ip, fpr->entry_parent_ip);
+ if (bit < 0) {
+ fp->nmissed++;
+ return;
+ }
+
fp->exit_handler(fp, fpr->entry_ip, regs,
fp->entry_data_size ? (void *)fpr->data : NULL);
+ ftrace_test_recursion_unlock(bit);
}
NOKPROBE_SYMBOL(fprobe_exit_handler);
diff --git a/kernel/trace/rethook.c b/kernel/trace/rethook.c
index 32c3dfdb4d6a..60f6cb2b486b 100644
--- a/kernel/trace/rethook.c
+++ b/kernel/trace/rethook.c
@@ -288,7 +288,7 @@ unsigned long rethook_trampoline_handler(struct pt_regs *regs,
* These loops must be protected from rethook_free_rcu() because those
* are accessing 'rhn->rethook'.
*/
- preempt_disable();
+ preempt_disable_notrace();
/*
* Run the handler on the shadow stack. Do not unlink the list here because
@@ -321,7 +321,7 @@ unsigned long rethook_trampoline_handler(struct pt_regs *regs,
first = first->next;
rethook_recycle(rhn);
}
- preempt_enable();
+ preempt_enable_notrace();
return correct_ret_addr;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c210d02fac97..5d2c5678b66f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -60,6 +60,7 @@
*/
bool ring_buffer_expanded;
+#ifdef CONFIG_FTRACE_STARTUP_TEST
/*
* We need to change this state when a selftest is running.
* A selftest will lurk into the ring-buffer to count the
@@ -75,7 +76,6 @@ static bool __read_mostly tracing_selftest_running;
*/
bool __read_mostly tracing_selftest_disabled;
-#ifdef CONFIG_FTRACE_STARTUP_TEST
void __init disable_tracing_selftest(const char *reason)
{
if (!tracing_selftest_disabled) {
@@ -83,6 +83,9 @@ void __init disable_tracing_selftest(const char *reason)
pr_info("Ftrace startup test is disabled due to %s\n", reason);
}
}
+#else
+#define tracing_selftest_running 0
+#define tracing_selftest_disabled 0
#endif
/* Pipe tracepoints to printk */
@@ -1051,7 +1054,10 @@ int __trace_array_puts(struct trace_array *tr, unsigned long ip,
if (!(tr->trace_flags & TRACE_ITER_PRINTK))
return 0;
- if (unlikely(tracing_selftest_running || tracing_disabled))
+ if (unlikely(tracing_selftest_running && tr == &global_trace))
+ return 0;
+
+ if (unlikely(tracing_disabled))
return 0;
alloc = sizeof(*entry) + size + 2; /* possible \n added */
@@ -2041,6 +2047,24 @@ static int run_tracer_selftest(struct tracer *type)
return 0;
}
+static int do_run_tracer_selftest(struct tracer *type)
+{
+ int ret;
+
+ /*
+ * Tests can take a long time, especially if they are run one after the
+ * other, as does happen during bootup when all the tracers are
+ * registered. This could cause the soft lockup watchdog to trigger.
+ */
+ cond_resched();
+
+ tracing_selftest_running = true;
+ ret = run_tracer_selftest(type);
+ tracing_selftest_running = false;
+
+ return ret;
+}
+
static __init int init_trace_selftests(void)
{
struct trace_selftests *p, *n;
@@ -2092,6 +2116,10 @@ static inline int run_tracer_selftest(struct tracer *type)
{
return 0;
}
+static inline int do_run_tracer_selftest(struct tracer *type)
+{
+ return 0;
+}
#endif /* CONFIG_FTRACE_STARTUP_TEST */
static void add_tracer_options(struct trace_array *tr, struct tracer *t);
@@ -2127,8 +2155,6 @@ int __init register_tracer(struct tracer *type)
mutex_lock(&trace_types_lock);
- tracing_selftest_running = true;
-
for (t = trace_types; t; t = t->next) {
if (strcmp(type->name, t->name) == 0) {
/* already found */
@@ -2157,7 +2183,7 @@ int __init register_tracer(struct tracer *type)
/* store the tracer for __set_tracer_option */
type->flags->trace = type;
- ret = run_tracer_selftest(type);
+ ret = do_run_tracer_selftest(type);
if (ret < 0)
goto out;
@@ -2166,7 +2192,6 @@ int __init register_tracer(struct tracer *type)
add_tracer_options(&global_trace, type);
out:
- tracing_selftest_running = false;
mutex_unlock(&trace_types_lock);
if (ret || !default_bootup_tracer)
@@ -3490,7 +3515,7 @@ __trace_array_vprintk(struct trace_buffer *buffer,
unsigned int trace_ctx;
char *tbuffer;
- if (tracing_disabled || tracing_selftest_running)
+ if (tracing_disabled)
return 0;
/* Don't pollute graph traces with trace_vprintk internals */
@@ -3538,6 +3563,9 @@ __printf(3, 0)
int trace_array_vprintk(struct trace_array *tr,
unsigned long ip, const char *fmt, va_list args)
{
+ if (tracing_selftest_running && tr == &global_trace)
+ return 0;
+
return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
}
@@ -5752,7 +5780,7 @@ static const char readme_msg[] =
"\t table using the key(s) and value(s) named, and the value of a\n"
"\t sum called 'hitcount' is incremented. Keys and values\n"
"\t correspond to fields in the event's format description. Keys\n"
- "\t can be any field, or the special string 'stacktrace'.\n"
+ "\t can be any field, or the special string 'common_stacktrace'.\n"
"\t Compound keys consisting of up to two fields can be specified\n"
"\t by the 'keys' keyword. Values must correspond to numeric\n"
"\t fields. Sort keys consisting of up to two fields can be\n"
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 654ffa40457a..57e539d47989 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -194,6 +194,8 @@ static int trace_define_generic_fields(void)
__generic_field(int, common_cpu, FILTER_CPU);
__generic_field(char *, COMM, FILTER_COMM);
__generic_field(char *, comm, FILTER_COMM);
+ __generic_field(char *, stacktrace, FILTER_STACKTRACE);
+ __generic_field(char *, STACKTRACE, FILTER_STACKTRACE);
return ret;
}
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 486cca3c2b75..b97d3ad832f1 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -1364,7 +1364,7 @@ static const char *hist_field_name(struct hist_field *field,
if (field->field)
field_name = field->field->name;
else
- field_name = "stacktrace";
+ field_name = "common_stacktrace";
} else if (field->flags & HIST_FIELD_FL_HITCOUNT)
field_name = "hitcount";
@@ -2367,7 +2367,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
hist_data->enable_timestamps = true;
if (*flags & HIST_FIELD_FL_TIMESTAMP_USECS)
hist_data->attrs->ts_in_usecs = true;
- } else if (strcmp(field_name, "stacktrace") == 0) {
+ } else if (strcmp(field_name, "common_stacktrace") == 0) {
*flags |= HIST_FIELD_FL_STACKTRACE;
} else if (strcmp(field_name, "common_cpu") == 0)
*flags |= HIST_FIELD_FL_CPU;
@@ -2378,11 +2378,15 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
if (!field || !field->size) {
/*
* For backward compatibility, if field_name
- * was "cpu", then we treat this the same as
- * common_cpu. This also works for "CPU".
+ * was "cpu" or "stacktrace", then we treat this
+ * the same as common_cpu and common_stacktrace
+ * respectively. This also works for "CPU", and
+ * "STACKTRACE".
*/
if (field && field->filter_type == FILTER_CPU) {
*flags |= HIST_FIELD_FL_CPU;
+ } else if (field && field->filter_type == FILTER_STACKTRACE) {
+ *flags |= HIST_FIELD_FL_STACKTRACE;
} else {
hist_err(tr, HIST_ERR_FIELD_NOT_FOUND,
errpos(field_name));
@@ -4238,13 +4242,19 @@ static int __create_val_field(struct hist_trigger_data *hist_data,
goto out;
}
- /* Some types cannot be a value */
- if (hist_field->flags & (HIST_FIELD_FL_GRAPH | HIST_FIELD_FL_PERCENT |
- HIST_FIELD_FL_BUCKET | HIST_FIELD_FL_LOG2 |
- HIST_FIELD_FL_SYM | HIST_FIELD_FL_SYM_OFFSET |
- HIST_FIELD_FL_SYSCALL | HIST_FIELD_FL_STACKTRACE)) {
- hist_err(file->tr, HIST_ERR_BAD_FIELD_MODIFIER, errpos(field_str));
- ret = -EINVAL;
+ /* values and variables should not have some modifiers */
+ if (hist_field->flags & HIST_FIELD_FL_VAR) {
+ /* Variable */
+ if (hist_field->flags & (HIST_FIELD_FL_GRAPH | HIST_FIELD_FL_PERCENT |
+ HIST_FIELD_FL_BUCKET | HIST_FIELD_FL_LOG2))
+ goto err;
+ } else {
+ /* Value */
+ if (hist_field->flags & (HIST_FIELD_FL_GRAPH | HIST_FIELD_FL_PERCENT |
+ HIST_FIELD_FL_BUCKET | HIST_FIELD_FL_LOG2 |
+ HIST_FIELD_FL_SYM | HIST_FIELD_FL_SYM_OFFSET |
+ HIST_FIELD_FL_SYSCALL | HIST_FIELD_FL_STACKTRACE))
+ goto err;
}
hist_data->fields[val_idx] = hist_field;
@@ -4256,6 +4266,9 @@ static int __create_val_field(struct hist_trigger_data *hist_data,
ret = -EINVAL;
out:
return ret;
+ err:
+ hist_err(file->tr, HIST_ERR_BAD_FIELD_MODIFIER, errpos(field_str));
+ return -EINVAL;
}
static int create_val_field(struct hist_trigger_data *hist_data,
@@ -5385,7 +5398,7 @@ static void hist_trigger_print_key(struct seq_file *m,
if (key_field->field)
seq_printf(m, "%s.stacktrace", key_field->field->name);
else
- seq_puts(m, "stacktrace:\n");
+ seq_puts(m, "common_stacktrace:\n");
hist_trigger_stacktrace_print(m,
key + key_field->offset,
HIST_STACKTRACE_DEPTH);
@@ -5968,7 +5981,7 @@ static int event_hist_trigger_print(struct seq_file *m,
if (field->field)
seq_printf(m, "%s.stacktrace", field->field->name);
else
- seq_puts(m, "stacktrace");
+ seq_puts(m, "common_stacktrace");
} else
hist_field_print(m, field);
}
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index b1ecd7677642..8df0550415e7 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -50,6 +50,18 @@
#define EVENT_STATUS_OTHER BIT(7)
/*
+ * User register flags are not allowed yet, keep them here until we are
+ * ready to expose them out to the user ABI.
+ */
+enum user_reg_flag {
+ /* Event will not delete upon last reference closing */
+ USER_EVENT_REG_PERSIST = 1U << 0,
+
+ /* This value or above is currently non-ABI */
+ USER_EVENT_REG_MAX = 1U << 1,
+};
+
+/*
* Stores the system name, tables, and locks for a group of events. This
* allows isolation for events by various means.
*/
@@ -85,8 +97,10 @@ struct user_event {
struct hlist_node node;
struct list_head fields;
struct list_head validators;
+ struct work_struct put_work;
refcount_t refcnt;
int min_size;
+ int reg_flags;
char status;
};
@@ -96,12 +110,12 @@ struct user_event {
* these to track enablement sites that are tied to an event.
*/
struct user_event_enabler {
- struct list_head link;
+ struct list_head mm_enablers_link;
struct user_event *event;
unsigned long addr;
/* Track enable bit, flags, etc. Aligned for bitops. */
- unsigned int values;
+ unsigned long values;
};
/* Bits 0-5 are for the bit to update upon enable/disable (0-63 allowed) */
@@ -116,7 +130,9 @@ struct user_event_enabler {
/* Only duplicate the bit value */
#define ENABLE_VAL_DUP_MASK ENABLE_VAL_BIT_MASK
-#define ENABLE_BITOPS(e) ((unsigned long *)&(e)->values)
+#define ENABLE_BITOPS(e) (&(e)->values)
+
+#define ENABLE_BIT(e) ((int)((e)->values & ENABLE_VAL_BIT_MASK))
/* Used for asynchronous faulting in of pages */
struct user_event_enabler_fault {
@@ -153,7 +169,7 @@ struct user_event_file_info {
#define VALIDATOR_REL (1 << 1)
struct user_event_validator {
- struct list_head link;
+ struct list_head user_event_link;
int offset;
int flags;
};
@@ -163,76 +179,151 @@ typedef void (*user_event_func_t) (struct user_event *user, struct iov_iter *i,
static int user_event_parse(struct user_event_group *group, char *name,
char *args, char *flags,
- struct user_event **newuser);
+ struct user_event **newuser, int reg_flags);
static struct user_event_mm *user_event_mm_get(struct user_event_mm *mm);
static struct user_event_mm *user_event_mm_get_all(struct user_event *user);
static void user_event_mm_put(struct user_event_mm *mm);
+static int destroy_user_event(struct user_event *user);
static u32 user_event_key(char *name)
{
return jhash(name, strlen(name), 0);
}
-static void user_event_group_destroy(struct user_event_group *group)
+static struct user_event *user_event_get(struct user_event *user)
{
- kfree(group->system_name);
- kfree(group);
+ refcount_inc(&user->refcnt);
+
+ return user;
}
-static char *user_event_group_system_name(struct user_namespace *user_ns)
+static void delayed_destroy_user_event(struct work_struct *work)
{
- char *system_name;
- int len = sizeof(USER_EVENTS_SYSTEM) + 1;
+ struct user_event *user = container_of(
+ work, struct user_event, put_work);
+
+ mutex_lock(&event_mutex);
- if (user_ns != &init_user_ns) {
+ if (!refcount_dec_and_test(&user->refcnt))
+ goto out;
+
+ if (destroy_user_event(user)) {
/*
- * Unexpected at this point:
- * We only currently support init_user_ns.
- * When we enable more, this will trigger a failure so log.
+ * The only reason this would fail here is if we cannot
+ * update the visibility of the event. In this case the
+ * event stays in the hashtable, waiting for someone to
+ * attempt to delete it later.
*/
- pr_warn("user_events: Namespace other than init_user_ns!\n");
- return NULL;
+ pr_warn("user_events: Unable to delete event\n");
+ refcount_set(&user->refcnt, 1);
}
+out:
+ mutex_unlock(&event_mutex);
+}
- system_name = kmalloc(len, GFP_KERNEL);
+static void user_event_put(struct user_event *user, bool locked)
+{
+ bool delete;
- if (!system_name)
- return NULL;
+ if (unlikely(!user))
+ return;
- snprintf(system_name, len, "%s", USER_EVENTS_SYSTEM);
+ /*
+ * When the event is not enabled for auto-delete there will always
+ * be at least 1 reference to the event. During the event creation
+ * we initially set the refcnt to 2 to achieve this. In those cases
+ * the caller must acquire event_mutex and after decrement check if
+ * the refcnt is 1, meaning this is the last reference. When auto
+ * delete is enabled, there will only be 1 ref, IE: refcnt will be
+ * only set to 1 during creation to allow the below checks to go
+ * through upon the last put. The last put must always be done with
+ * the event mutex held.
+ */
+ if (!locked) {
+ lockdep_assert_not_held(&event_mutex);
+ delete = refcount_dec_and_mutex_lock(&user->refcnt, &event_mutex);
+ } else {
+ lockdep_assert_held(&event_mutex);
+ delete = refcount_dec_and_test(&user->refcnt);
+ }
- return system_name;
+ if (!delete)
+ return;
+
+ /*
+ * We now have the event_mutex in all cases, which ensures that
+ * no new references will be taken until event_mutex is released.
+ * New references come through find_user_event(), which requires
+ * the event_mutex to be held.
+ */
+
+ if (user->reg_flags & USER_EVENT_REG_PERSIST) {
+ /* We should not get here when persist flag is set */
+ pr_alert("BUG: Auto-delete engaged on persistent event\n");
+ goto out;
+ }
+
+ /*
+ * Unfortunately we have to attempt the actual destroy in a work
+ * queue. This is because not all cases handle a trace_event_call
+ * being removed within the class->reg() operation for unregister.
+ */
+ INIT_WORK(&user->put_work, delayed_destroy_user_event);
+
+ /*
+ * Since the event is still in the hashtable, we have to re-inc
+ * the ref count to 1. This count will be decremented and checked
+ * in the work queue to ensure it's still the last ref. This is
+ * needed because a user-process could register the same event in
+ * between the time of event_mutex release and the work queue
+ * running the delayed destroy. If we removed the item now from
+ * the hashtable, this would result in a timing window where a
+ * user process would fail a register because the trace_event_call
+ * register would fail in the tracing layers.
+ */
+ refcount_set(&user->refcnt, 1);
+
+ if (WARN_ON_ONCE(!schedule_work(&user->put_work))) {
+ /*
+ * If we fail we must wait for an admin to attempt delete or
+ * another register/close of the event, whichever is first.
+ */
+ pr_warn("user_events: Unable to queue delayed destroy\n");
+ }
+out:
+ /* Ensure if we didn't have event_mutex before we unlock it */
+ if (!locked)
+ mutex_unlock(&event_mutex);
}
-static inline struct user_event_group
-*user_event_group_from_user_ns(struct user_namespace *user_ns)
+static void user_event_group_destroy(struct user_event_group *group)
{
- if (user_ns == &init_user_ns)
- return init_group;
-
- return NULL;
+ kfree(group->system_name);
+ kfree(group);
}
-static struct user_event_group *current_user_event_group(void)
+static char *user_event_group_system_name(void)
{
- struct user_namespace *user_ns = current_user_ns();
- struct user_event_group *group = NULL;
+ char *system_name;
+ int len = sizeof(USER_EVENTS_SYSTEM) + 1;
- while (user_ns) {
- group = user_event_group_from_user_ns(user_ns);
+ system_name = kmalloc(len, GFP_KERNEL);
- if (group)
- break;
+ if (!system_name)
+ return NULL;
- user_ns = user_ns->parent;
- }
+ snprintf(system_name, len, "%s", USER_EVENTS_SYSTEM);
- return group;
+ return system_name;
+}
+
+static struct user_event_group *current_user_event_group(void)
+{
+ return init_group;
}
-static struct user_event_group
-*user_event_group_create(struct user_namespace *user_ns)
+static struct user_event_group *user_event_group_create(void)
{
struct user_event_group *group;
@@ -241,7 +332,7 @@ static struct user_event_group
if (!group)
return NULL;
- group->system_name = user_event_group_system_name(user_ns);
+ group->system_name = user_event_group_system_name();
if (!group->system_name)
goto error;
@@ -257,12 +348,13 @@ error:
return NULL;
};
-static void user_event_enabler_destroy(struct user_event_enabler *enabler)
+static void user_event_enabler_destroy(struct user_event_enabler *enabler,
+ bool locked)
{
- list_del_rcu(&enabler->link);
+ list_del_rcu(&enabler->mm_enablers_link);
/* No longer tracking the event via the enabler */
- refcount_dec(&enabler->event->refcnt);
+ user_event_put(enabler->event, locked);
kfree(enabler);
}
@@ -324,7 +416,7 @@ static void user_event_enabler_fault_fixup(struct work_struct *work)
/* User asked for enabler to be removed during fault */
if (test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler))) {
- user_event_enabler_destroy(enabler);
+ user_event_enabler_destroy(enabler, true);
goto out;
}
@@ -423,9 +515,9 @@ static int user_event_enabler_write(struct user_event_mm *mm,
/* Update bit atomically, user tracers must be atomic as well */
if (enabler->event && enabler->event->status)
- set_bit(enabler->values & ENABLE_VAL_BIT_MASK, ptr);
+ set_bit(ENABLE_BIT(enabler), ptr);
else
- clear_bit(enabler->values & ENABLE_VAL_BIT_MASK, ptr);
+ clear_bit(ENABLE_BIT(enabler), ptr);
kunmap_local(kaddr);
unpin_user_pages_dirty_lock(&page, 1, true);
@@ -437,11 +529,9 @@ static bool user_event_enabler_exists(struct user_event_mm *mm,
unsigned long uaddr, unsigned char bit)
{
struct user_event_enabler *enabler;
- struct user_event_enabler *next;
- list_for_each_entry_safe(enabler, next, &mm->enablers, link) {
- if (enabler->addr == uaddr &&
- (enabler->values & ENABLE_VAL_BIT_MASK) == bit)
+ list_for_each_entry(enabler, &mm->enablers, mm_enablers_link) {
+ if (enabler->addr == uaddr && ENABLE_BIT(enabler) == bit)
return true;
}
@@ -451,23 +541,36 @@ static bool user_event_enabler_exists(struct user_event_mm *mm,
static void user_event_enabler_update(struct user_event *user)
{
struct user_event_enabler *enabler;
- struct user_event_mm *mm = user_event_mm_get_all(user);
struct user_event_mm *next;
+ struct user_event_mm *mm;
int attempt;
+ lockdep_assert_held(&event_mutex);
+
+ /*
+ * We need to build a one-shot list of all the mms that have an
+ * enabler for the user_event passed in. This list is only valid
+ * while holding the event_mutex. The only reason for this is due
+ * to the global mm list being RCU protected and we use methods
+ * which can wait (mmap_read_lock and pin_user_pages_remote).
+ *
+ * NOTE: user_event_mm_get_all() increments the ref count of each
+ * mm that is added to the list to prevent removal timing windows.
+ * We must always put each mm after they are used, which may wait.
+ */
+ mm = user_event_mm_get_all(user);
+
while (mm) {
next = mm->next;
mmap_read_lock(mm->mm);
- rcu_read_lock();
- list_for_each_entry_rcu(enabler, &mm->enablers, link) {
+ list_for_each_entry(enabler, &mm->enablers, mm_enablers_link) {
if (enabler->event == user) {
attempt = 0;
user_event_enabler_write(mm, enabler, true, &attempt);
}
}
- rcu_read_unlock();
mmap_read_unlock(mm->mm);
user_event_mm_put(mm);
mm = next;
@@ -488,14 +591,14 @@ static bool user_event_enabler_dup(struct user_event_enabler *orig,
if (!enabler)
return false;
- enabler->event = orig->event;
+ enabler->event = user_event_get(orig->event);
enabler->addr = orig->addr;
/* Only dup part of value (ignore future flags, etc) */
enabler->values = orig->values & ENABLE_VAL_DUP_MASK;
- refcount_inc(&enabler->event->refcnt);
- list_add_rcu(&enabler->link, &mm->enablers);
+ /* Enablers not exposed yet, RCU not required */
+ list_add(&enabler->mm_enablers_link, &mm->enablers);
return true;
}
@@ -514,6 +617,14 @@ static struct user_event_mm *user_event_mm_get_all(struct user_event *user)
struct user_event_mm *mm;
/*
+ * We use the mm->next field to build a one-shot list from the global
+ * RCU protected list. To build this list the event_mutex must be held.
+ * This lets us build a list without requiring allocs that could fail
+ * when user based events are most wanted for diagnostics.
+ */
+ lockdep_assert_held(&event_mutex);
+
+ /*
* We do not want to block fork/exec while enablements are being
* updated, so we use RCU to walk the current tasks that have used
* user_events ABI for 1 or more events. Each enabler found in each
@@ -525,23 +636,24 @@ static struct user_event_mm *user_event_mm_get_all(struct user_event *user)
*/
rcu_read_lock();
- list_for_each_entry_rcu(mm, &user_event_mms, link)
- list_for_each_entry_rcu(enabler, &mm->enablers, link)
+ list_for_each_entry_rcu(mm, &user_event_mms, mms_link) {
+ list_for_each_entry_rcu(enabler, &mm->enablers, mm_enablers_link) {
if (enabler->event == user) {
mm->next = found;
found = user_event_mm_get(mm);
break;
}
+ }
+ }
rcu_read_unlock();
return found;
}
-static struct user_event_mm *user_event_mm_create(struct task_struct *t)
+static struct user_event_mm *user_event_mm_alloc(struct task_struct *t)
{
struct user_event_mm *user_mm;
- unsigned long flags;
user_mm = kzalloc(sizeof(*user_mm), GFP_KERNEL_ACCOUNT);
@@ -553,12 +665,6 @@ static struct user_event_mm *user_event_mm_create(struct task_struct *t)
refcount_set(&user_mm->refcnt, 1);
refcount_set(&user_mm->tasks, 1);
- spin_lock_irqsave(&user_event_mms_lock, flags);
- list_add_rcu(&user_mm->link, &user_event_mms);
- spin_unlock_irqrestore(&user_event_mms_lock, flags);
-
- t->user_event_mm = user_mm;
-
/*
* The lifetime of the memory descriptor can slightly outlast
* the task lifetime if a ref to the user_event_mm is taken
@@ -572,6 +678,17 @@ static struct user_event_mm *user_event_mm_create(struct task_struct *t)
return user_mm;
}
+static void user_event_mm_attach(struct user_event_mm *user_mm, struct task_struct *t)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&user_event_mms_lock, flags);
+ list_add_rcu(&user_mm->mms_link, &user_event_mms);
+ spin_unlock_irqrestore(&user_event_mms_lock, flags);
+
+ t->user_event_mm = user_mm;
+}
+
static struct user_event_mm *current_user_event_mm(void)
{
struct user_event_mm *user_mm = current->user_event_mm;
@@ -579,10 +696,12 @@ static struct user_event_mm *current_user_event_mm(void)
if (user_mm)
goto inc;
- user_mm = user_event_mm_create(current);
+ user_mm = user_event_mm_alloc(current);
if (!user_mm)
goto error;
+
+ user_event_mm_attach(user_mm, current);
inc:
refcount_inc(&user_mm->refcnt);
error:
@@ -593,8 +712,8 @@ static void user_event_mm_destroy(struct user_event_mm *mm)
{
struct user_event_enabler *enabler, *next;
- list_for_each_entry_safe(enabler, next, &mm->enablers, link)
- user_event_enabler_destroy(enabler);
+ list_for_each_entry_safe(enabler, next, &mm->enablers, mm_enablers_link)
+ user_event_enabler_destroy(enabler, false);
mmdrop(mm->mm);
kfree(mm);
@@ -630,7 +749,7 @@ void user_event_mm_remove(struct task_struct *t)
/* Remove the mm from the list, so it can no longer be enabled */
spin_lock_irqsave(&user_event_mms_lock, flags);
- list_del_rcu(&mm->link);
+ list_del_rcu(&mm->mms_link);
spin_unlock_irqrestore(&user_event_mms_lock, flags);
/*
@@ -670,7 +789,7 @@ void user_event_mm_remove(struct task_struct *t)
void user_event_mm_dup(struct task_struct *t, struct user_event_mm *old_mm)
{
- struct user_event_mm *mm = user_event_mm_create(t);
+ struct user_event_mm *mm = user_event_mm_alloc(t);
struct user_event_enabler *enabler;
if (!mm)
@@ -678,16 +797,18 @@ void user_event_mm_dup(struct task_struct *t, struct user_event_mm *old_mm)
rcu_read_lock();
- list_for_each_entry_rcu(enabler, &old_mm->enablers, link)
+ list_for_each_entry_rcu(enabler, &old_mm->enablers, mm_enablers_link) {
if (!user_event_enabler_dup(enabler, mm))
goto error;
+ }
rcu_read_unlock();
+ user_event_mm_attach(mm, t);
return;
error:
rcu_read_unlock();
- user_event_mm_remove(t);
+ user_event_mm_destroy(mm);
}
static bool current_user_event_enabler_exists(unsigned long uaddr,
@@ -747,8 +868,8 @@ retry:
* exit or run exec(), which includes forks and clones.
*/
if (!*write_result) {
- refcount_inc(&enabler->event->refcnt);
- list_add_rcu(&enabler->link, &user_mm->enablers);
+ user_event_get(user);
+ list_add_rcu(&enabler->mm_enablers_link, &user_mm->enablers);
}
mutex_unlock(&event_mutex);
@@ -770,7 +891,12 @@ out:
static __always_inline __must_check
bool user_event_last_ref(struct user_event *user)
{
- return refcount_read(&user->refcnt) == 1;
+ int last = 0;
+
+ if (user->reg_flags & USER_EVENT_REG_PERSIST)
+ last = 1;
+
+ return refcount_read(&user->refcnt) == last;
}
static __always_inline __must_check
@@ -809,7 +935,8 @@ static struct list_head *user_event_get_fields(struct trace_event_call *call)
* Upon success user_event has its ref count increased by 1.
*/
static int user_event_parse_cmd(struct user_event_group *group,
- char *raw_command, struct user_event **newuser)
+ char *raw_command, struct user_event **newuser,
+ int reg_flags)
{
char *name = raw_command;
char *args = strpbrk(name, " ");
@@ -823,7 +950,7 @@ static int user_event_parse_cmd(struct user_event_group *group,
if (flags)
*flags++ = '\0';
- return user_event_parse(group, name, args, flags, newuser);
+ return user_event_parse(group, name, args, flags, newuser, reg_flags);
}
static int user_field_array_size(const char *type)
@@ -904,8 +1031,8 @@ static void user_event_destroy_validators(struct user_event *user)
struct user_event_validator *validator, *next;
struct list_head *head = &user->validators;
- list_for_each_entry_safe(validator, next, head, link) {
- list_del(&validator->link);
+ list_for_each_entry_safe(validator, next, head, user_event_link) {
+ list_del(&validator->user_event_link);
kfree(validator);
}
}
@@ -959,7 +1086,7 @@ add_validator:
validator->offset = offset;
/* Want sequential access when validating */
- list_add_tail(&validator->link, &user->validators);
+ list_add_tail(&validator->user_event_link, &user->validators);
add_field:
field->type = type;
@@ -1334,10 +1461,8 @@ static struct user_event *find_user_event(struct user_event_group *group,
*outkey = key;
hash_for_each_possible(group->register_table, user, node, key)
- if (!strcmp(EVENT_NAME(user), name)) {
- refcount_inc(&user->refcnt);
- return user;
- }
+ if (!strcmp(EVENT_NAME(user), name))
+ return user_event_get(user);
return NULL;
}
@@ -1349,7 +1474,7 @@ static int user_event_validate(struct user_event *user, void *data, int len)
void *pos, *end = data + len;
u32 loc, offset, size;
- list_for_each_entry(validator, head, link) {
+ list_for_each_entry(validator, head, user_event_link) {
pos = data + validator->offset;
/* Already done min_size check, no bounds check here */
@@ -1399,7 +1524,7 @@ static void user_event_ftrace(struct user_event *user, struct iov_iter *i,
if (unlikely(!entry))
return;
- if (unlikely(!copy_nofault(entry + 1, i->count, i)))
+ if (unlikely(i->count != 0 && !copy_nofault(entry + 1, i->count, i)))
goto discard;
if (!list_empty(&user->validators) &&
@@ -1440,7 +1565,7 @@ static void user_event_perf(struct user_event *user, struct iov_iter *i,
perf_fetch_caller_regs(regs);
- if (unlikely(!copy_nofault(perf_entry + 1, i->count, i)))
+ if (unlikely(i->count != 0 && !copy_nofault(perf_entry + 1, i->count, i)))
goto discard;
if (!list_empty(&user->validators) &&
@@ -1551,12 +1676,12 @@ static int user_event_reg(struct trace_event_call *call,
return ret;
inc:
- refcount_inc(&user->refcnt);
+ user_event_get(user);
update_enable_bit_for(user);
return 0;
dec:
update_enable_bit_for(user);
- refcount_dec(&user->refcnt);
+ user_event_put(user, true);
return 0;
}
@@ -1587,10 +1712,11 @@ static int user_event_create(const char *raw_command)
mutex_lock(&group->reg_mutex);
- ret = user_event_parse_cmd(group, name, &user);
+ /* Dyn events persist, otherwise they would cleanup immediately */
+ ret = user_event_parse_cmd(group, name, &user, USER_EVENT_REG_PERSIST);
if (!ret)
- refcount_dec(&user->refcnt);
+ user_event_put(user, false);
mutex_unlock(&group->reg_mutex);
@@ -1712,6 +1838,8 @@ static bool user_event_match(const char *system, const char *event,
if (match && argc > 0)
match = user_fields_match(user, argc, argv);
+ else if (match && argc == 0)
+ match = list_empty(&user->fields);
return match;
}
@@ -1748,11 +1876,17 @@ static int user_event_trace_register(struct user_event *user)
*/
static int user_event_parse(struct user_event_group *group, char *name,
char *args, char *flags,
- struct user_event **newuser)
+ struct user_event **newuser, int reg_flags)
{
int ret;
u32 key;
struct user_event *user;
+ int argc = 0;
+ char **argv;
+
+ /* User register flags are not ready yet */
+ if (reg_flags != 0 || flags != NULL)
+ return -EINVAL;
/* Prevent dyn_event from racing */
mutex_lock(&event_mutex);
@@ -1760,13 +1894,35 @@ static int user_event_parse(struct user_event_group *group, char *name,
mutex_unlock(&event_mutex);
if (user) {
- *newuser = user;
- /*
- * Name is allocated by caller, free it since it already exists.
- * Caller only worries about failure cases for freeing.
- */
- kfree(name);
+ if (args) {
+ argv = argv_split(GFP_KERNEL, args, &argc);
+ if (!argv) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ ret = user_fields_match(user, argc, (const char **)argv);
+ argv_free(argv);
+
+ } else
+ ret = list_empty(&user->fields);
+
+ if (ret) {
+ *newuser = user;
+ /*
+ * Name is allocated by caller, free it since it already exists.
+ * Caller only worries about failure cases for freeing.
+ */
+ kfree(name);
+ } else {
+ ret = -EADDRINUSE;
+ goto error;
+ }
+
return 0;
+error:
+ user_event_put(user, false);
+ return ret;
}
user = kzalloc(sizeof(*user), GFP_KERNEL_ACCOUNT);
@@ -1819,8 +1975,15 @@ static int user_event_parse(struct user_event_group *group, char *name,
if (ret)
goto put_user_lock;
- /* Ensure we track self ref and caller ref (2) */
- refcount_set(&user->refcnt, 2);
+ user->reg_flags = reg_flags;
+
+ if (user->reg_flags & USER_EVENT_REG_PERSIST) {
+ /* Ensure we track self ref and caller ref (2) */
+ refcount_set(&user->refcnt, 2);
+ } else {
+ /* Ensure we track only caller ref (1) */
+ refcount_set(&user->refcnt, 1);
+ }
dyn_event_init(&user->devent, &user_event_dops);
dyn_event_add(&user->devent, &user->call);
@@ -1852,7 +2015,7 @@ static int delete_user_event(struct user_event_group *group, char *name)
if (!user)
return -ENOENT;
- refcount_dec(&user->refcnt);
+ user_event_put(user, true);
if (!user_event_last_ref(user))
return -EBUSY;
@@ -2011,9 +2174,7 @@ static int user_events_ref_add(struct user_event_file_info *info,
for (i = 0; i < count; ++i)
new_refs->events[i] = refs->events[i];
- new_refs->events[i] = user;
-
- refcount_inc(&user->refcnt);
+ new_refs->events[i] = user_event_get(user);
rcu_assign_pointer(info->refs, new_refs);
@@ -2044,8 +2205,8 @@ static long user_reg_get(struct user_reg __user *ureg, struct user_reg *kreg)
if (ret)
return ret;
- /* Ensure no flags, since we don't support any yet */
- if (kreg->flags != 0)
+ /* Ensure only valid flags */
+ if (kreg->flags & ~(USER_EVENT_REG_MAX-1))
return -EINVAL;
/* Ensure supported size */
@@ -2117,7 +2278,7 @@ static long user_events_ioctl_reg(struct user_event_file_info *info,
return ret;
}
- ret = user_event_parse_cmd(info->group, name, &user);
+ ret = user_event_parse_cmd(info->group, name, &user, reg.flags);
if (ret) {
kfree(name);
@@ -2127,7 +2288,7 @@ static long user_events_ioctl_reg(struct user_event_file_info *info,
ret = user_events_ref_add(info, user);
/* No longer need parse ref, ref_add either worked or not */
- refcount_dec(&user->refcnt);
+ user_event_put(user, false);
/* Positive number is index and valid */
if (ret < 0)
@@ -2270,17 +2431,18 @@ static long user_events_ioctl_unreg(unsigned long uarg)
*/
mutex_lock(&event_mutex);
- list_for_each_entry_safe(enabler, next, &mm->enablers, link)
+ list_for_each_entry_safe(enabler, next, &mm->enablers, mm_enablers_link) {
if (enabler->addr == reg.disable_addr &&
- (enabler->values & ENABLE_VAL_BIT_MASK) == reg.disable_bit) {
+ ENABLE_BIT(enabler) == reg.disable_bit) {
set_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler));
if (!test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler)))
- user_event_enabler_destroy(enabler);
+ user_event_enabler_destroy(enabler, true);
/* Removed at least one */
ret = 0;
}
+ }
mutex_unlock(&event_mutex);
@@ -2333,7 +2495,6 @@ static int user_events_release(struct inode *node, struct file *file)
struct user_event_file_info *info = file->private_data;
struct user_event_group *group;
struct user_event_refs *refs;
- struct user_event *user;
int i;
if (!info)
@@ -2357,12 +2518,9 @@ static int user_events_release(struct inode *node, struct file *file)
* The underlying user_events are ref counted, and cannot be freed.
* After this decrement, the user_events may be freed elsewhere.
*/
- for (i = 0; i < refs->count; ++i) {
- user = refs->events[i];
+ for (i = 0; i < refs->count; ++i)
+ user_event_put(refs->events[i], false);
- if (user)
- refcount_dec(&user->refcnt);
- }
out:
file->private_data = NULL;
@@ -2543,7 +2701,7 @@ static int __init trace_events_user_init(void)
if (!fault_cache)
return -ENOMEM;
- init_group = user_event_group_create(&init_user_ns);
+ init_group = user_event_group_create();
if (!init_group) {
kmem_cache_destroy(fault_cache);
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index efbbec2caff8..e97e3fa5cbed 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -1652,6 +1652,8 @@ static enum hrtimer_restart timerlat_irq(struct hrtimer *timer)
osnoise_stop_tracing();
notify_new_max_latency(diff);
+ wake_up_process(tlat->kthread);
+
return HRTIMER_NORESTART;
}
}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 15f05faaae44..1e33f367783e 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -847,7 +847,7 @@ static void print_fields(struct trace_iterator *iter, struct trace_event_call *c
int ret;
void *pos;
- list_for_each_entry(field, head, link) {
+ list_for_each_entry_reverse(field, head, link) {
trace_seq_printf(&iter->seq, " %s=", field->name);
if (field->offset + field->size > iter->ent_size) {
trace_seq_puts(&iter->seq, "<OVERFLOW>");
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index ef8ed3b65d05..6a4ecfb1da43 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -308,7 +308,7 @@ trace_probe_primary_from_call(struct trace_event_call *call)
{
struct trace_probe_event *tpe = trace_probe_event_from_call(call);
- return list_first_entry(&tpe->probes, struct trace_probe, list);
+ return list_first_entry_or_null(&tpe->probes, struct trace_probe, list);
}
static inline struct list_head *trace_probe_probe_list(struct trace_probe *tp)
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index a931d9aaea26..529590499b1f 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -848,6 +848,12 @@ trace_selftest_startup_function_graph(struct tracer *trace,
}
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ /*
+ * These tests can take some time to run. Make sure on non PREEMPT
+ * kernels, we do not trigger the softlockup detector.
+ */
+ cond_resched();
+
tracing_reset_online_cpus(&tr->array_buffer);
set_graph_array(tr);
@@ -869,6 +875,8 @@ trace_selftest_startup_function_graph(struct tracer *trace,
if (ret)
goto out;
+ cond_resched();
+
ret = register_ftrace_graph(&fgraph_ops);
if (ret) {
warn_failed_init_tracer(trace, ret);
@@ -891,6 +899,8 @@ trace_selftest_startup_function_graph(struct tracer *trace,
if (ret)
goto out;
+ cond_resched();
+
tracing_start();
if (!ret && !count) {
diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c
index b7cbd66f889e..da35e5b7f047 100644
--- a/kernel/vhost_task.c
+++ b/kernel/vhost_task.c
@@ -12,58 +12,90 @@ enum vhost_task_flags {
VHOST_TASK_FLAGS_STOP,
};
+struct vhost_task {
+ bool (*fn)(void *data);
+ void *data;
+ struct completion exited;
+ unsigned long flags;
+ struct task_struct *task;
+};
+
static int vhost_task_fn(void *data)
{
struct vhost_task *vtsk = data;
- int ret;
+ bool dead = false;
+
+ for (;;) {
+ bool did_work;
+
+ if (!dead && signal_pending(current)) {
+ struct ksignal ksig;
+ /*
+ * Calling get_signal will block in SIGSTOP,
+ * or clear fatal_signal_pending, but remember
+ * what was set.
+ *
+ * This thread won't actually exit until all
+ * of the file descriptors are closed, and
+ * the release function is called.
+ */
+ dead = get_signal(&ksig);
+ if (dead)
+ clear_thread_flag(TIF_SIGPENDING);
+ }
+
+ /* mb paired w/ vhost_task_stop */
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ if (test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags)) {
+ __set_current_state(TASK_RUNNING);
+ break;
+ }
+
+ did_work = vtsk->fn(vtsk->data);
+ if (!did_work)
+ schedule();
+ }
- ret = vtsk->fn(vtsk->data);
complete(&vtsk->exited);
- do_exit(ret);
+ do_exit(0);
}
/**
+ * vhost_task_wake - wakeup the vhost_task
+ * @vtsk: vhost_task to wake
+ *
+ * wake up the vhost_task worker thread
+ */
+void vhost_task_wake(struct vhost_task *vtsk)
+{
+ wake_up_process(vtsk->task);
+}
+EXPORT_SYMBOL_GPL(vhost_task_wake);
+
+/**
* vhost_task_stop - stop a vhost_task
* @vtsk: vhost_task to stop
*
- * Callers must call vhost_task_should_stop and return from their worker
- * function when it returns true;
+ * vhost_task_fn ensures the worker thread exits after
+ * VHOST_TASK_FLAGS_SOP becomes true.
*/
void vhost_task_stop(struct vhost_task *vtsk)
{
- pid_t pid = vtsk->task->pid;
-
set_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags);
- wake_up_process(vtsk->task);
+ vhost_task_wake(vtsk);
/*
* Make sure vhost_task_fn is no longer accessing the vhost_task before
- * freeing it below. If userspace crashed or exited without closing,
- * then the vhost_task->task could already be marked dead so
- * kernel_wait will return early.
+ * freeing it below.
*/
wait_for_completion(&vtsk->exited);
- /*
- * If we are just closing/removing a device and the parent process is
- * not exiting then reap the task.
- */
- kernel_wait4(pid, NULL, __WCLONE, NULL);
kfree(vtsk);
}
EXPORT_SYMBOL_GPL(vhost_task_stop);
/**
- * vhost_task_should_stop - should the vhost task return from the work function
- * @vtsk: vhost_task to stop
- */
-bool vhost_task_should_stop(struct vhost_task *vtsk)
-{
- return test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags);
-}
-EXPORT_SYMBOL_GPL(vhost_task_should_stop);
-
-/**
- * vhost_task_create - create a copy of a process to be used by the kernel
- * @fn: thread stack
+ * vhost_task_create - create a copy of a task to be used by the kernel
+ * @fn: vhost worker function
* @arg: data to be passed to fn
* @name: the thread's name
*
@@ -71,17 +103,17 @@ EXPORT_SYMBOL_GPL(vhost_task_should_stop);
* failure. The returned task is inactive, and the caller must fire it up
* through vhost_task_start().
*/
-struct vhost_task *vhost_task_create(int (*fn)(void *), void *arg,
+struct vhost_task *vhost_task_create(bool (*fn)(void *), void *arg,
const char *name)
{
struct kernel_clone_args args = {
- .flags = CLONE_FS | CLONE_UNTRACED | CLONE_VM,
+ .flags = CLONE_FS | CLONE_UNTRACED | CLONE_VM |
+ CLONE_THREAD | CLONE_SIGHAND,
.exit_signal = 0,
.fn = vhost_task_fn,
.name = name,
.user_worker = 1,
.no_files = 1,
- .ignore_signals = 1,
};
struct vhost_task *vtsk;
struct task_struct *tsk;
diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
index e91cb4c2833f..d0b6b390ee42 100644
--- a/kernel/watch_queue.c
+++ b/kernel/watch_queue.c
@@ -42,7 +42,7 @@ MODULE_AUTHOR("Red Hat, Inc.");
static inline bool lock_wqueue(struct watch_queue *wqueue)
{
spin_lock_bh(&wqueue->lock);
- if (unlikely(wqueue->defunct)) {
+ if (unlikely(!wqueue->pipe)) {
spin_unlock_bh(&wqueue->lock);
return false;
}
@@ -104,9 +104,6 @@ static bool post_one_notification(struct watch_queue *wqueue,
unsigned int head, tail, mask, note, offset, len;
bool done = false;
- if (!pipe)
- return false;
-
spin_lock_irq(&pipe->rd_wait.lock);
mask = pipe->ring_size - 1;
@@ -603,8 +600,11 @@ void watch_queue_clear(struct watch_queue *wqueue)
rcu_read_lock();
spin_lock_bh(&wqueue->lock);
- /* Prevent new notifications from being stored. */
- wqueue->defunct = true;
+ /*
+ * This pipe can be freed by callers like free_pipe_info().
+ * Removing this reference also prevents new notifications.
+ */
+ wqueue->pipe = NULL;
while (!hlist_empty(&wqueue->watches)) {
watch = hlist_entry(wqueue->watches.first, struct watch, queue_node);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4666a1a92a31..c913e333cce8 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -705,12 +705,17 @@ static void clear_work_data(struct work_struct *work)
set_work_data(work, WORK_STRUCT_NO_POOL, 0);
}
+static inline struct pool_workqueue *work_struct_pwq(unsigned long data)
+{
+ return (struct pool_workqueue *)(data & WORK_STRUCT_WQ_DATA_MASK);
+}
+
static struct pool_workqueue *get_work_pwq(struct work_struct *work)
{
unsigned long data = atomic_long_read(&work->data);
if (data & WORK_STRUCT_PWQ)
- return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
+ return work_struct_pwq(data);
else
return NULL;
}
@@ -738,8 +743,7 @@ static struct worker_pool *get_work_pool(struct work_struct *work)
assert_rcu_or_pool_mutex();
if (data & WORK_STRUCT_PWQ)
- return ((struct pool_workqueue *)
- (data & WORK_STRUCT_WQ_DATA_MASK))->pool;
+ return work_struct_pwq(data)->pool;
pool_id = data >> WORK_OFFQ_POOL_SHIFT;
if (pool_id == WORK_OFFQ_POOL_NONE)
@@ -760,8 +764,7 @@ static int get_work_pool_id(struct work_struct *work)
unsigned long data = atomic_long_read(&work->data);
if (data & WORK_STRUCT_PWQ)
- return ((struct pool_workqueue *)
- (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
+ return work_struct_pwq(data)->pool->id;
return data >> WORK_OFFQ_POOL_SHIFT;
}
diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c
index 73c1636b927b..4c348670da31 100644
--- a/lib/cpu_rmap.c
+++ b/lib/cpu_rmap.c
@@ -280,8 +280,8 @@ static void irq_cpu_rmap_release(struct kref *ref)
struct irq_glue *glue =
container_of(ref, struct irq_glue, notify.kref);
- cpu_rmap_put(glue->rmap);
glue->rmap->obj[glue->index] = NULL;
+ cpu_rmap_put(glue->rmap);
kfree(glue);
}
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 003edc5ebd67..984985c39c9b 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -126,7 +126,7 @@ static const char *obj_states[ODEBUG_STATE_MAX] = {
static void fill_pool(void)
{
- gfp_t gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
+ gfp_t gfp = __GFP_HIGH | __GFP_NOWARN;
struct debug_obj *obj;
unsigned long flags;
@@ -591,10 +591,21 @@ static void debug_objects_fill_pool(void)
{
/*
* On RT enabled kernels the pool refill must happen in preemptible
- * context:
+ * context -- for !RT kernels we rely on the fact that spinlock_t and
+ * raw_spinlock_t are basically the same type and this lock-type
+ * inversion works just fine.
*/
- if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible())
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) {
+ /*
+ * Annotate away the spinlock_t inside raw_spinlock_t warning
+ * by temporarily raising the wait-type to WAIT_SLEEP, matching
+ * the preemptible() condition above.
+ */
+ static DEFINE_WAIT_OVERRIDE_MAP(fill_pool_map, LD_WAIT_SLEEP);
+ lock_map_acquire_try(&fill_pool_map);
fill_pool();
+ lock_map_release(&fill_pool_map);
+ }
}
static void
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 110a36479dce..8ebc43d4cc8c 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -5317,15 +5317,9 @@ int mas_empty_area(struct ma_state *mas, unsigned long min,
mt = mte_node_type(mas->node);
pivots = ma_pivots(mas_mn(mas), mt);
- if (offset)
- mas->min = pivots[offset - 1] + 1;
-
- if (offset < mt_pivots[mt])
- mas->max = pivots[offset];
-
- if (mas->index < mas->min)
- mas->index = mas->min;
-
+ min = mas_safe_min(mas, pivots, offset);
+ if (mas->index < min)
+ mas->index = min;
mas->last = mas->index + size - 1;
return 0;
}
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 049ba132f7ef..1a31065b2036 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -27,6 +27,8 @@
#include <linux/string.h>
#include <linux/xarray.h>
+#include "radix-tree.h"
+
/*
* Radix tree node cache.
*/
diff --git a/lib/radix-tree.h b/lib/radix-tree.h
new file mode 100644
index 000000000000..40d5c03e2b09
--- /dev/null
+++ b/lib/radix-tree.h
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* radix-tree helpers that are only shared with xarray */
+
+struct kmem_cache;
+struct rcu_head;
+
+extern struct kmem_cache *radix_tree_node_cachep;
+extern void radix_tree_node_rcu_free(struct rcu_head *head);
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 05ed84c2fc4c..1d7d480b8eeb 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -45,6 +45,7 @@ struct test_batched_req {
bool sent;
const struct firmware *fw;
const char *name;
+ const char *fw_buf;
struct completion completion;
struct task_struct *task;
struct device *dev;
@@ -175,8 +176,14 @@ static void __test_release_all_firmware(void)
for (i = 0; i < test_fw_config->num_requests; i++) {
req = &test_fw_config->reqs[i];
- if (req->fw)
+ if (req->fw) {
+ if (req->fw_buf) {
+ kfree_const(req->fw_buf);
+ req->fw_buf = NULL;
+ }
release_firmware(req->fw);
+ req->fw = NULL;
+ }
}
vfree(test_fw_config->reqs);
@@ -353,16 +360,26 @@ static ssize_t config_test_show_str(char *dst,
return len;
}
-static int test_dev_config_update_bool(const char *buf, size_t size,
+static inline int __test_dev_config_update_bool(const char *buf, size_t size,
bool *cfg)
{
int ret;
- mutex_lock(&test_fw_mutex);
if (kstrtobool(buf, cfg) < 0)
ret = -EINVAL;
else
ret = size;
+
+ return ret;
+}
+
+static int test_dev_config_update_bool(const char *buf, size_t size,
+ bool *cfg)
+{
+ int ret;
+
+ mutex_lock(&test_fw_mutex);
+ ret = __test_dev_config_update_bool(buf, size, cfg);
mutex_unlock(&test_fw_mutex);
return ret;
@@ -373,7 +390,8 @@ static ssize_t test_dev_config_show_bool(char *buf, bool val)
return snprintf(buf, PAGE_SIZE, "%d\n", val);
}
-static int test_dev_config_update_size_t(const char *buf,
+static int __test_dev_config_update_size_t(
+ const char *buf,
size_t size,
size_t *cfg)
{
@@ -384,9 +402,7 @@ static int test_dev_config_update_size_t(const char *buf,
if (ret)
return ret;
- mutex_lock(&test_fw_mutex);
*(size_t *)cfg = new;
- mutex_unlock(&test_fw_mutex);
/* Always return full write size even if we didn't consume all */
return size;
@@ -402,7 +418,7 @@ static ssize_t test_dev_config_show_int(char *buf, int val)
return snprintf(buf, PAGE_SIZE, "%d\n", val);
}
-static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
+static int __test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
u8 val;
int ret;
@@ -411,14 +427,23 @@ static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
if (ret)
return ret;
- mutex_lock(&test_fw_mutex);
*(u8 *)cfg = val;
- mutex_unlock(&test_fw_mutex);
/* Always return full write size even if we didn't consume all */
return size;
}
+static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
+{
+ int ret;
+
+ mutex_lock(&test_fw_mutex);
+ ret = __test_dev_config_update_u8(buf, size, cfg);
+ mutex_unlock(&test_fw_mutex);
+
+ return ret;
+}
+
static ssize_t test_dev_config_show_u8(char *buf, u8 val)
{
return snprintf(buf, PAGE_SIZE, "%u\n", val);
@@ -471,10 +496,10 @@ static ssize_t config_num_requests_store(struct device *dev,
mutex_unlock(&test_fw_mutex);
goto out;
}
- mutex_unlock(&test_fw_mutex);
- rc = test_dev_config_update_u8(buf, count,
- &test_fw_config->num_requests);
+ rc = __test_dev_config_update_u8(buf, count,
+ &test_fw_config->num_requests);
+ mutex_unlock(&test_fw_mutex);
out:
return rc;
@@ -518,10 +543,10 @@ static ssize_t config_buf_size_store(struct device *dev,
mutex_unlock(&test_fw_mutex);
goto out;
}
- mutex_unlock(&test_fw_mutex);
- rc = test_dev_config_update_size_t(buf, count,
- &test_fw_config->buf_size);
+ rc = __test_dev_config_update_size_t(buf, count,
+ &test_fw_config->buf_size);
+ mutex_unlock(&test_fw_mutex);
out:
return rc;
@@ -548,10 +573,10 @@ static ssize_t config_file_offset_store(struct device *dev,
mutex_unlock(&test_fw_mutex);
goto out;
}
- mutex_unlock(&test_fw_mutex);
- rc = test_dev_config_update_size_t(buf, count,
- &test_fw_config->file_offset);
+ rc = __test_dev_config_update_size_t(buf, count,
+ &test_fw_config->file_offset);
+ mutex_unlock(&test_fw_mutex);
out:
return rc;
@@ -652,6 +677,8 @@ static ssize_t trigger_request_store(struct device *dev,
mutex_lock(&test_fw_mutex);
release_firmware(test_firmware);
+ if (test_fw_config->reqs)
+ __test_release_all_firmware();
test_firmware = NULL;
rc = request_firmware(&test_firmware, name, dev);
if (rc) {
@@ -752,6 +779,8 @@ static ssize_t trigger_async_request_store(struct device *dev,
mutex_lock(&test_fw_mutex);
release_firmware(test_firmware);
test_firmware = NULL;
+ if (test_fw_config->reqs)
+ __test_release_all_firmware();
rc = request_firmware_nowait(THIS_MODULE, 1, name, dev, GFP_KERNEL,
NULL, trigger_async_request_cb);
if (rc) {
@@ -794,6 +823,8 @@ static ssize_t trigger_custom_fallback_store(struct device *dev,
mutex_lock(&test_fw_mutex);
release_firmware(test_firmware);
+ if (test_fw_config->reqs)
+ __test_release_all_firmware();
test_firmware = NULL;
rc = request_firmware_nowait(THIS_MODULE, FW_ACTION_NOUEVENT, name,
dev, GFP_KERNEL, NULL,
@@ -856,6 +887,8 @@ static int test_fw_run_batch_request(void *data)
test_fw_config->buf_size);
if (!req->fw)
kfree(test_buf);
+ else
+ req->fw_buf = test_buf;
} else {
req->rc = test_fw_config->req_firmware(&req->fw,
req->name,
@@ -895,6 +928,11 @@ static ssize_t trigger_batched_requests_store(struct device *dev,
mutex_lock(&test_fw_mutex);
+ if (test_fw_config->reqs) {
+ rc = -EBUSY;
+ goto out_bail;
+ }
+
test_fw_config->reqs =
vzalloc(array3_size(sizeof(struct test_batched_req),
test_fw_config->num_requests, 2));
@@ -911,6 +949,7 @@ static ssize_t trigger_batched_requests_store(struct device *dev,
req->fw = NULL;
req->idx = i;
req->name = test_fw_config->name;
+ req->fw_buf = NULL;
req->dev = dev;
init_completion(&req->completion);
req->task = kthread_run(test_fw_run_batch_request, req,
@@ -993,6 +1032,11 @@ ssize_t trigger_batched_requests_async_store(struct device *dev,
mutex_lock(&test_fw_mutex);
+ if (test_fw_config->reqs) {
+ rc = -EBUSY;
+ goto out_bail;
+ }
+
test_fw_config->reqs =
vzalloc(array3_size(sizeof(struct test_batched_req),
test_fw_config->num_requests, 2));
@@ -1010,6 +1054,7 @@ ssize_t trigger_batched_requests_async_store(struct device *dev,
for (i = 0; i < test_fw_config->num_requests; i++) {
req = &test_fw_config->reqs[i];
req->name = test_fw_config->name;
+ req->fw_buf = NULL;
req->fw = NULL;
req->idx = i;
init_completion(&req->completion);
diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c
index 9dd9745d365f..3718d9886407 100644
--- a/lib/test_vmalloc.c
+++ b/lib/test_vmalloc.c
@@ -369,7 +369,7 @@ vm_map_ram_test(void)
int i;
map_nr_pages = nr_pages > 0 ? nr_pages:1;
- pages = kmalloc(map_nr_pages * sizeof(struct page), GFP_KERNEL);
+ pages = kcalloc(map_nr_pages, sizeof(struct page *), GFP_KERNEL);
if (!pages)
return -1;
diff --git a/lib/xarray.c b/lib/xarray.c
index ea9ce1f0b386..2071a3718f4e 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -12,6 +12,8 @@
#include <linux/slab.h>
#include <linux/xarray.h>
+#include "radix-tree.h"
+
/*
* Coding conventions in this file:
*
@@ -247,10 +249,6 @@ void *xas_load(struct xa_state *xas)
}
EXPORT_SYMBOL_GPL(xas_load);
-/* Move the radix tree node cache here */
-extern struct kmem_cache *radix_tree_node_cachep;
-extern void radix_tree_node_rcu_free(struct rcu_head *head);
-
#define XA_RCU_FREE ((struct xarray *)1)
static void xa_node_free(struct xa_node *node)
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index a925415b4d10..018a5bd2f576 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -98,6 +98,7 @@ config PAGE_OWNER
config PAGE_TABLE_CHECK
bool "Check for invalid mappings in user page tables"
depends on ARCH_SUPPORTS_PAGE_TABLE_CHECK
+ depends on EXCLUSIVE_SYSTEM_RAM
select PAGE_EXTENSION
help
Check that anonymous page is not being mapped twice with read write
diff --git a/mm/damon/core.c b/mm/damon/core.c
index d9ef62047bf5..91cff7f2997e 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -551,6 +551,8 @@ int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs)
return -EINVAL;
if (attrs->min_nr_regions > attrs->max_nr_regions)
return -EINVAL;
+ if (attrs->sample_interval > attrs->aggr_interval)
+ return -EINVAL;
damon_update_monitoring_results(ctx, attrs);
ctx->attrs = *attrs;
diff --git a/mm/filemap.c b/mm/filemap.c
index f87e2ad8cff1..00f01d8ead47 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1760,7 +1760,9 @@ bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm,
*
* Return: The index of the gap if found, otherwise an index outside the
* range specified (in which case 'return - index >= max_scan' will be true).
- * In the rare case of index wrap-around, 0 will be returned.
+ * In the rare case of index wrap-around, 0 will be returned. 0 will also
+ * be returned if index == 0 and there is a gap at the index. We can not
+ * wrap-around if passed index == 0.
*/
pgoff_t page_cache_next_miss(struct address_space *mapping,
pgoff_t index, unsigned long max_scan)
@@ -1770,12 +1772,13 @@ pgoff_t page_cache_next_miss(struct address_space *mapping,
while (max_scan--) {
void *entry = xas_next(&xas);
if (!entry || xa_is_value(entry))
- break;
- if (xas.xa_index == 0)
- break;
+ return xas.xa_index;
+ if (xas.xa_index == 0 && index != 0)
+ return xas.xa_index;
}
- return xas.xa_index;
+ /* No gaps in range and no wrap-around, return index beyond range */
+ return xas.xa_index + 1;
}
EXPORT_SYMBOL(page_cache_next_miss);
@@ -1796,7 +1799,9 @@ EXPORT_SYMBOL(page_cache_next_miss);
*
* Return: The index of the gap if found, otherwise an index outside the
* range specified (in which case 'index - return >= max_scan' will be true).
- * In the rare case of wrap-around, ULONG_MAX will be returned.
+ * In the rare case of wrap-around, ULONG_MAX will be returned. ULONG_MAX
+ * will also be returned if index == ULONG_MAX and there is a gap at the
+ * index. We can not wrap-around if passed index == ULONG_MAX.
*/
pgoff_t page_cache_prev_miss(struct address_space *mapping,
pgoff_t index, unsigned long max_scan)
@@ -1806,12 +1811,13 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
while (max_scan--) {
void *entry = xas_prev(&xas);
if (!entry || xa_is_value(entry))
- break;
- if (xas.xa_index == ULONG_MAX)
- break;
+ return xas.xa_index;
+ if (xas.xa_index == ULONG_MAX && index != ULONG_MAX)
+ return xas.xa_index;
}
- return xas.xa_index;
+ /* No gaps in range and no wrap-around, return index beyond range */
+ return xas.xa_index - 1;
}
EXPORT_SYMBOL(page_cache_prev_miss);
diff --git a/mm/gup_test.c b/mm/gup_test.c
index 8ae7307a1bb6..c0421b786dcd 100644
--- a/mm/gup_test.c
+++ b/mm/gup_test.c
@@ -381,6 +381,7 @@ static int gup_test_release(struct inode *inode, struct file *file)
static const struct file_operations gup_test_fops = {
.open = nonseekable_open,
.unlocked_ioctl = gup_test_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.release = gup_test_release,
};
diff --git a/mm/kfence/kfence.h b/mm/kfence/kfence.h
index 2aafc46a4aaf..392fb273e7bd 100644
--- a/mm/kfence/kfence.h
+++ b/mm/kfence/kfence.h
@@ -29,7 +29,7 @@
* canary of every 8 bytes is the same. 64-bit memory can be filled and checked
* at a time instead of byte by byte to improve performance.
*/
-#define KFENCE_CANARY_PATTERN_U64 ((u64)0xaaaaaaaaaaaaaaaa ^ (u64)(0x0706050403020100))
+#define KFENCE_CANARY_PATTERN_U64 ((u64)0xaaaaaaaaaaaaaaaa ^ (u64)(le64_to_cpu(0x0706050403020100)))
/* Maximum stack depth for reports. */
#define KFENCE_STACK_DEPTH 64
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 6b9d39d65b73..2d0d58fb4e7f 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -2070,7 +2070,6 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
TTU_IGNORE_MLOCK | TTU_BATCH_FLUSH);
xas_lock_irq(&xas);
- xas_set(&xas, index);
VM_BUG_ON_PAGE(page != xas_load(&xas), page);
diff --git a/mm/memfd.c b/mm/memfd.c
index 69b90c31d38c..e763e76f1106 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -371,12 +371,15 @@ SYSCALL_DEFINE2(memfd_create,
inode->i_mode &= ~0111;
file_seals = memfd_file_seals_ptr(file);
- *file_seals &= ~F_SEAL_SEAL;
- *file_seals |= F_SEAL_EXEC;
+ if (file_seals) {
+ *file_seals &= ~F_SEAL_SEAL;
+ *file_seals |= F_SEAL_EXEC;
+ }
} else if (flags & MFD_ALLOW_SEALING) {
/* MFD_EXEC and MFD_ALLOW_SEALING are set */
file_seals = memfd_file_seals_ptr(file);
- *file_seals &= ~F_SEAL_SEAL;
+ if (file_seals)
+ *file_seals &= ~F_SEAL_SEAL;
}
fd_install(fd, file);
diff --git a/mm/mmap.c b/mm/mmap.c
index 13678edaa22c..d600404580b2 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2318,21 +2318,6 @@ int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
return __split_vma(vmi, vma, addr, new_below);
}
-static inline int munmap_sidetree(struct vm_area_struct *vma,
- struct ma_state *mas_detach)
-{
- vma_start_write(vma);
- mas_set_range(mas_detach, vma->vm_start, vma->vm_end - 1);
- if (mas_store_gfp(mas_detach, vma, GFP_KERNEL))
- return -ENOMEM;
-
- vma_mark_detached(vma, true);
- if (vma->vm_flags & VM_LOCKED)
- vma->vm_mm->locked_vm -= vma_pages(vma);
-
- return 0;
-}
-
/*
* do_vmi_align_munmap() - munmap the aligned region from @start to @end.
* @vmi: The vma iterator
@@ -2354,6 +2339,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
struct maple_tree mt_detach;
int count = 0;
int error = -ENOMEM;
+ unsigned long locked_vm = 0;
MA_STATE(mas_detach, &mt_detach, 0, 0);
mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
mt_set_external_lock(&mt_detach, &mm->mmap_lock);
@@ -2399,9 +2385,13 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
if (error)
goto end_split_failed;
}
- error = munmap_sidetree(next, &mas_detach);
- if (error)
- goto munmap_sidetree_failed;
+ vma_start_write(next);
+ mas_set_range(&mas_detach, next->vm_start, next->vm_end - 1);
+ if (mas_store_gfp(&mas_detach, next, GFP_KERNEL))
+ goto munmap_gather_failed;
+ vma_mark_detached(next, true);
+ if (next->vm_flags & VM_LOCKED)
+ locked_vm += vma_pages(next);
count++;
#ifdef CONFIG_DEBUG_VM_MAPLE_TREE
@@ -2447,10 +2437,12 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
}
#endif
/* Point of no return */
+ error = -ENOMEM;
vma_iter_set(vmi, start);
if (vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL))
- return -ENOMEM;
+ goto clear_tree_failed;
+ mm->locked_vm -= locked_vm;
mm->map_count -= count;
/*
* Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or
@@ -2480,9 +2472,14 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
validate_mm(mm);
return downgrade ? 1 : 0;
+clear_tree_failed:
userfaultfd_error:
-munmap_sidetree_failed:
+munmap_gather_failed:
end_split_failed:
+ mas_set(&mas_detach, 0);
+ mas_for_each(&mas_detach, next, end)
+ vma_mark_detached(next, false);
+
__mt_destroy(&mt_detach);
start_split_failed:
map_count_exceeded:
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 92d3d3ca390a..c59e7561698c 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -867,7 +867,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
}
tlb_finish_mmu(&tlb);
- if (!error && vma_iter_end(&vmi) < end)
+ if (!error && tmp < end)
error = -ENOMEM;
out:
diff --git a/mm/page_table_check.c b/mm/page_table_check.c
index 25d8610c0042..f2baf97d5f38 100644
--- a/mm/page_table_check.c
+++ b/mm/page_table_check.c
@@ -71,6 +71,8 @@ static void page_table_check_clear(struct mm_struct *mm, unsigned long addr,
page = pfn_to_page(pfn);
page_ext = page_ext_get(page);
+
+ BUG_ON(PageSlab(page));
anon = PageAnon(page);
for (i = 0; i < pgcnt; i++) {
@@ -107,6 +109,8 @@ static void page_table_check_set(struct mm_struct *mm, unsigned long addr,
page = pfn_to_page(pfn);
page_ext = page_ext_get(page);
+
+ BUG_ON(PageSlab(page));
anon = PageAnon(page);
for (i = 0; i < pgcnt; i++) {
@@ -133,6 +137,8 @@ void __page_table_check_zero(struct page *page, unsigned int order)
struct page_ext *page_ext;
unsigned long i;
+ BUG_ON(PageSlab(page));
+
page_ext = page_ext_get(page);
BUG_ON(!page_ext);
for (i = 0; i < (1ul << order); i++) {
diff --git a/mm/shrinker_debug.c b/mm/shrinker_debug.c
index 3f83b10c5031..3ab53fad8876 100644
--- a/mm/shrinker_debug.c
+++ b/mm/shrinker_debug.c
@@ -5,12 +5,10 @@
#include <linux/seq_file.h>
#include <linux/shrinker.h>
#include <linux/memcontrol.h>
-#include <linux/srcu.h>
/* defined in vmscan.c */
-extern struct mutex shrinker_mutex;
+extern struct rw_semaphore shrinker_rwsem;
extern struct list_head shrinker_list;
-extern struct srcu_struct shrinker_srcu;
static DEFINE_IDA(shrinker_debugfs_ida);
static struct dentry *shrinker_debugfs_root;
@@ -51,13 +49,18 @@ static int shrinker_debugfs_count_show(struct seq_file *m, void *v)
struct mem_cgroup *memcg;
unsigned long total;
bool memcg_aware;
- int ret = 0, nid, srcu_idx;
+ int ret, nid;
count_per_node = kcalloc(nr_node_ids, sizeof(unsigned long), GFP_KERNEL);
if (!count_per_node)
return -ENOMEM;
- srcu_idx = srcu_read_lock(&shrinker_srcu);
+ ret = down_read_killable(&shrinker_rwsem);
+ if (ret) {
+ kfree(count_per_node);
+ return ret;
+ }
+ rcu_read_lock();
memcg_aware = shrinker->flags & SHRINKER_MEMCG_AWARE;
@@ -88,7 +91,8 @@ static int shrinker_debugfs_count_show(struct seq_file *m, void *v)
}
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
- srcu_read_unlock(&shrinker_srcu, srcu_idx);
+ rcu_read_unlock();
+ up_read(&shrinker_rwsem);
kfree(count_per_node);
return ret;
@@ -111,8 +115,9 @@ static ssize_t shrinker_debugfs_scan_write(struct file *file,
.gfp_mask = GFP_KERNEL,
};
struct mem_cgroup *memcg = NULL;
- int nid, srcu_idx;
+ int nid;
char kbuf[72];
+ ssize_t ret;
read_len = size < (sizeof(kbuf) - 1) ? size : (sizeof(kbuf) - 1);
if (copy_from_user(kbuf, buf, read_len))
@@ -141,7 +146,11 @@ static ssize_t shrinker_debugfs_scan_write(struct file *file,
return -EINVAL;
}
- srcu_idx = srcu_read_lock(&shrinker_srcu);
+ ret = down_read_killable(&shrinker_rwsem);
+ if (ret) {
+ mem_cgroup_put(memcg);
+ return ret;
+ }
sc.nid = nid;
sc.memcg = memcg;
@@ -150,7 +159,7 @@ static ssize_t shrinker_debugfs_scan_write(struct file *file,
shrinker->scan_objects(shrinker, &sc);
- srcu_read_unlock(&shrinker_srcu, srcu_idx);
+ up_read(&shrinker_rwsem);
mem_cgroup_put(memcg);
return size;
@@ -168,7 +177,7 @@ int shrinker_debugfs_add(struct shrinker *shrinker)
char buf[128];
int id;
- lockdep_assert_held(&shrinker_mutex);
+ lockdep_assert_held(&shrinker_rwsem);
/* debugfs isn't initialized yet, add debugfs entries later. */
if (!shrinker_debugfs_root)
@@ -211,7 +220,7 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
if (!new)
return -ENOMEM;
- mutex_lock(&shrinker_mutex);
+ down_write(&shrinker_rwsem);
old = shrinker->name;
shrinker->name = new;
@@ -229,7 +238,7 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
shrinker->debugfs_entry = entry;
}
- mutex_unlock(&shrinker_mutex);
+ up_write(&shrinker_rwsem);
kfree_const(old);
@@ -237,23 +246,28 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
}
EXPORT_SYMBOL(shrinker_debugfs_rename);
-struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker)
+struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
+ int *debugfs_id)
{
struct dentry *entry = shrinker->debugfs_entry;
- lockdep_assert_held(&shrinker_mutex);
+ lockdep_assert_held(&shrinker_rwsem);
kfree_const(shrinker->name);
shrinker->name = NULL;
- if (entry) {
- ida_free(&shrinker_debugfs_ida, shrinker->debugfs_id);
- shrinker->debugfs_entry = NULL;
- }
+ *debugfs_id = entry ? shrinker->debugfs_id : -1;
+ shrinker->debugfs_entry = NULL;
return entry;
}
+void shrinker_debugfs_remove(struct dentry *debugfs_entry, int debugfs_id)
+{
+ debugfs_remove_recursive(debugfs_entry);
+ ida_free(&shrinker_debugfs_ida, debugfs_id);
+}
+
static int __init shrinker_debugfs_init(void)
{
struct shrinker *shrinker;
@@ -266,14 +280,14 @@ static int __init shrinker_debugfs_init(void)
shrinker_debugfs_root = dentry;
/* Create debugfs entries for shrinkers registered at boot */
- mutex_lock(&shrinker_mutex);
+ down_write(&shrinker_rwsem);
list_for_each_entry(shrinker, &shrinker_list, list)
if (!shrinker->debugfs_entry) {
ret = shrinker_debugfs_add(shrinker);
if (ret)
break;
}
- mutex_unlock(&shrinker_mutex);
+ up_write(&shrinker_rwsem);
return ret;
}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 9683573f1225..1d13d71687d7 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3098,11 +3098,20 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
* allocation request, free them via vfree() if any.
*/
if (area->nr_pages != nr_small_pages) {
- /* vm_area_alloc_pages() can also fail due to a fatal signal */
- if (!fatal_signal_pending(current))
+ /*
+ * vm_area_alloc_pages() can fail due to insufficient memory but
+ * also:-
+ *
+ * - a pending fatal signal
+ * - insufficient huge page-order pages
+ *
+ * Since we always retry allocations at order-0 in the huge page
+ * case a warning for either is spurious.
+ */
+ if (!fatal_signal_pending(current) && page_order == 0)
warn_alloc(gfp_mask, NULL,
- "vmalloc error: size %lu, page order %u, failed to allocate pages",
- area->nr_pages * PAGE_SIZE, page_order);
+ "vmalloc error: size %lu, failed to allocate pages",
+ area->nr_pages * PAGE_SIZE);
goto fail;
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d257916f39e5..5bf98d0a22c9 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -35,7 +35,7 @@
#include <linux/cpuset.h>
#include <linux/compaction.h>
#include <linux/notifier.h>
-#include <linux/mutex.h>
+#include <linux/rwsem.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
@@ -57,7 +57,6 @@
#include <linux/khugepaged.h>
#include <linux/rculist_nulls.h>
#include <linux/random.h>
-#include <linux/srcu.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -190,9 +189,7 @@ struct scan_control {
int vm_swappiness = 60;
LIST_HEAD(shrinker_list);
-DEFINE_MUTEX(shrinker_mutex);
-DEFINE_SRCU(shrinker_srcu);
-static atomic_t shrinker_srcu_generation = ATOMIC_INIT(0);
+DECLARE_RWSEM(shrinker_rwsem);
#ifdef CONFIG_MEMCG
static int shrinker_nr_max;
@@ -211,21 +208,8 @@ static inline int shrinker_defer_size(int nr_items)
static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg,
int nid)
{
- return srcu_dereference_check(memcg->nodeinfo[nid]->shrinker_info,
- &shrinker_srcu,
- lockdep_is_held(&shrinker_mutex));
-}
-
-static struct shrinker_info *shrinker_info_srcu(struct mem_cgroup *memcg,
- int nid)
-{
- return srcu_dereference(memcg->nodeinfo[nid]->shrinker_info,
- &shrinker_srcu);
-}
-
-static void free_shrinker_info_rcu(struct rcu_head *head)
-{
- kvfree(container_of(head, struct shrinker_info, rcu));
+ return rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_info,
+ lockdep_is_held(&shrinker_rwsem));
}
static int expand_one_shrinker_info(struct mem_cgroup *memcg,
@@ -266,7 +250,7 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg,
defer_size - old_defer_size);
rcu_assign_pointer(pn->shrinker_info, new);
- call_srcu(&shrinker_srcu, &old->rcu, free_shrinker_info_rcu);
+ kvfree_rcu(old, rcu);
}
return 0;
@@ -292,7 +276,7 @@ int alloc_shrinker_info(struct mem_cgroup *memcg)
int nid, size, ret = 0;
int map_size, defer_size = 0;
- mutex_lock(&shrinker_mutex);
+ down_write(&shrinker_rwsem);
map_size = shrinker_map_size(shrinker_nr_max);
defer_size = shrinker_defer_size(shrinker_nr_max);
size = map_size + defer_size;
@@ -308,7 +292,7 @@ int alloc_shrinker_info(struct mem_cgroup *memcg)
info->map_nr_max = shrinker_nr_max;
rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info);
}
- mutex_unlock(&shrinker_mutex);
+ up_write(&shrinker_rwsem);
return ret;
}
@@ -324,7 +308,7 @@ static int expand_shrinker_info(int new_id)
if (!root_mem_cgroup)
goto out;
- lockdep_assert_held(&shrinker_mutex);
+ lockdep_assert_held(&shrinker_rwsem);
map_size = shrinker_map_size(new_nr_max);
defer_size = shrinker_defer_size(new_nr_max);
@@ -352,16 +336,15 @@ void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
{
if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) {
struct shrinker_info *info;
- int srcu_idx;
- srcu_idx = srcu_read_lock(&shrinker_srcu);
- info = shrinker_info_srcu(memcg, nid);
+ rcu_read_lock();
+ info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) {
/* Pairs with smp mb in shrink_slab() */
smp_mb__before_atomic();
set_bit(shrinker_id, info->map);
}
- srcu_read_unlock(&shrinker_srcu, srcu_idx);
+ rcu_read_unlock();
}
}
@@ -374,7 +357,8 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker)
if (mem_cgroup_disabled())
return -ENOSYS;
- mutex_lock(&shrinker_mutex);
+ down_write(&shrinker_rwsem);
+ /* This may call shrinker, so it must use down_read_trylock() */
id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
if (id < 0)
goto unlock;
@@ -388,7 +372,7 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker)
shrinker->id = id;
ret = 0;
unlock:
- mutex_unlock(&shrinker_mutex);
+ up_write(&shrinker_rwsem);
return ret;
}
@@ -398,7 +382,7 @@ static void unregister_memcg_shrinker(struct shrinker *shrinker)
BUG_ON(id < 0);
- lockdep_assert_held(&shrinker_mutex);
+ lockdep_assert_held(&shrinker_rwsem);
idr_remove(&shrinker_idr, id);
}
@@ -408,7 +392,7 @@ static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker,
{
struct shrinker_info *info;
- info = shrinker_info_srcu(memcg, nid);
+ info = shrinker_info_protected(memcg, nid);
return atomic_long_xchg(&info->nr_deferred[shrinker->id], 0);
}
@@ -417,7 +401,7 @@ static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
{
struct shrinker_info *info;
- info = shrinker_info_srcu(memcg, nid);
+ info = shrinker_info_protected(memcg, nid);
return atomic_long_add_return(nr, &info->nr_deferred[shrinker->id]);
}
@@ -433,7 +417,7 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg)
parent = root_mem_cgroup;
/* Prevent from concurrent shrinker_info expand */
- mutex_lock(&shrinker_mutex);
+ down_read(&shrinker_rwsem);
for_each_node(nid) {
child_info = shrinker_info_protected(memcg, nid);
parent_info = shrinker_info_protected(parent, nid);
@@ -442,7 +426,7 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg)
atomic_long_add(nr, &parent_info->nr_deferred[i]);
}
}
- mutex_unlock(&shrinker_mutex);
+ up_read(&shrinker_rwsem);
}
static bool cgroup_reclaim(struct scan_control *sc)
@@ -743,9 +727,9 @@ void free_prealloced_shrinker(struct shrinker *shrinker)
shrinker->name = NULL;
#endif
if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
- mutex_lock(&shrinker_mutex);
+ down_write(&shrinker_rwsem);
unregister_memcg_shrinker(shrinker);
- mutex_unlock(&shrinker_mutex);
+ up_write(&shrinker_rwsem);
return;
}
@@ -755,11 +739,11 @@ void free_prealloced_shrinker(struct shrinker *shrinker)
void register_shrinker_prepared(struct shrinker *shrinker)
{
- mutex_lock(&shrinker_mutex);
- list_add_tail_rcu(&shrinker->list, &shrinker_list);
+ down_write(&shrinker_rwsem);
+ list_add_tail(&shrinker->list, &shrinker_list);
shrinker->flags |= SHRINKER_REGISTERED;
shrinker_debugfs_add(shrinker);
- mutex_unlock(&shrinker_mutex);
+ up_write(&shrinker_rwsem);
}
static int __register_shrinker(struct shrinker *shrinker)
@@ -805,22 +789,20 @@ EXPORT_SYMBOL(register_shrinker);
void unregister_shrinker(struct shrinker *shrinker)
{
struct dentry *debugfs_entry;
+ int debugfs_id;
if (!(shrinker->flags & SHRINKER_REGISTERED))
return;
- mutex_lock(&shrinker_mutex);
- list_del_rcu(&shrinker->list);
+ down_write(&shrinker_rwsem);
+ list_del(&shrinker->list);
shrinker->flags &= ~SHRINKER_REGISTERED;
if (shrinker->flags & SHRINKER_MEMCG_AWARE)
unregister_memcg_shrinker(shrinker);
- debugfs_entry = shrinker_debugfs_remove(shrinker);
- mutex_unlock(&shrinker_mutex);
+ debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id);
+ up_write(&shrinker_rwsem);
- atomic_inc(&shrinker_srcu_generation);
- synchronize_srcu(&shrinker_srcu);
-
- debugfs_remove_recursive(debugfs_entry);
+ shrinker_debugfs_remove(debugfs_entry, debugfs_id);
kfree(shrinker->nr_deferred);
shrinker->nr_deferred = NULL;
@@ -830,13 +812,15 @@ EXPORT_SYMBOL(unregister_shrinker);
/**
* synchronize_shrinkers - Wait for all running shrinkers to complete.
*
- * This is useful to guarantee that all shrinker invocations have seen an
- * update, before freeing memory.
+ * This is equivalent to calling unregister_shrink() and register_shrinker(),
+ * but atomically and with less overhead. This is useful to guarantee that all
+ * shrinker invocations have seen an update, before freeing memory, similar to
+ * rcu.
*/
void synchronize_shrinkers(void)
{
- atomic_inc(&shrinker_srcu_generation);
- synchronize_srcu(&shrinker_srcu);
+ down_write(&shrinker_rwsem);
+ up_write(&shrinker_rwsem);
}
EXPORT_SYMBOL(synchronize_shrinkers);
@@ -945,20 +929,19 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
{
struct shrinker_info *info;
unsigned long ret, freed = 0;
- int srcu_idx, generation;
- int i = 0;
+ int i;
if (!mem_cgroup_online(memcg))
return 0;
-again:
- srcu_idx = srcu_read_lock(&shrinker_srcu);
- info = shrinker_info_srcu(memcg, nid);
+ if (!down_read_trylock(&shrinker_rwsem))
+ return 0;
+
+ info = shrinker_info_protected(memcg, nid);
if (unlikely(!info))
goto unlock;
- generation = atomic_read(&shrinker_srcu_generation);
- for_each_set_bit_from(i, info->map, info->map_nr_max) {
+ for_each_set_bit(i, info->map, info->map_nr_max) {
struct shrink_control sc = {
.gfp_mask = gfp_mask,
.nid = nid,
@@ -1004,14 +987,14 @@ again:
set_shrinker_bit(memcg, nid, i);
}
freed += ret;
- if (atomic_read(&shrinker_srcu_generation) != generation) {
- srcu_read_unlock(&shrinker_srcu, srcu_idx);
- i++;
- goto again;
+
+ if (rwsem_is_contended(&shrinker_rwsem)) {
+ freed = freed ? : 1;
+ break;
}
}
unlock:
- srcu_read_unlock(&shrinker_srcu, srcu_idx);
+ up_read(&shrinker_rwsem);
return freed;
}
#else /* CONFIG_MEMCG */
@@ -1048,7 +1031,6 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
{
unsigned long ret, freed = 0;
struct shrinker *shrinker;
- int srcu_idx, generation;
/*
* The root memcg might be allocated even though memcg is disabled
@@ -1060,11 +1042,10 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg))
return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
- srcu_idx = srcu_read_lock(&shrinker_srcu);
+ if (!down_read_trylock(&shrinker_rwsem))
+ goto out;
- generation = atomic_read(&shrinker_srcu_generation);
- list_for_each_entry_srcu(shrinker, &shrinker_list, list,
- srcu_read_lock_held(&shrinker_srcu)) {
+ list_for_each_entry(shrinker, &shrinker_list, list) {
struct shrink_control sc = {
.gfp_mask = gfp_mask,
.nid = nid,
@@ -1075,14 +1056,19 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
if (ret == SHRINK_EMPTY)
ret = 0;
freed += ret;
-
- if (atomic_read(&shrinker_srcu_generation) != generation) {
+ /*
+ * Bail out if someone want to register a new shrinker to
+ * prevent the registration from being stalled for long periods
+ * by parallel ongoing shrinking.
+ */
+ if (rwsem_is_contended(&shrinker_rwsem)) {
freed = freed ? : 1;
break;
}
}
- srcu_read_unlock(&shrinker_srcu, srcu_idx);
+ up_read(&shrinker_rwsem);
+out:
cond_resched();
return freed;
}
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 44ddaf5d601e..02f7f414aade 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1331,31 +1331,6 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
obj_to_location(obj, &page, &obj_idx);
zspage = get_zspage(page);
-#ifdef CONFIG_ZPOOL
- /*
- * Move the zspage to front of pool's LRU.
- *
- * Note that this is swap-specific, so by definition there are no ongoing
- * accesses to the memory while the page is swapped out that would make
- * it "hot". A new entry is hot, then ages to the tail until it gets either
- * written back or swaps back in.
- *
- * Furthermore, map is also called during writeback. We must not put an
- * isolated page on the LRU mid-reclaim.
- *
- * As a result, only update the LRU when the page is mapped for write
- * when it's first instantiated.
- *
- * This is a deviation from the other backends, which perform this update
- * in the allocation function (zbud_alloc, z3fold_alloc).
- */
- if (mm == ZS_MM_WO) {
- if (!list_empty(&zspage->lru))
- list_del(&zspage->lru);
- list_add(&zspage->lru, &pool->lru);
- }
-#endif
-
/*
* migration cannot move any zpages in this zspage. Here, pool->lock
* is too heavy since callers would take some time until they calls
@@ -1525,9 +1500,8 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
fix_fullness_group(class, zspage);
record_obj(handle, obj);
class_stat_inc(class, ZS_OBJS_INUSE, 1);
- spin_unlock(&pool->lock);
- return handle;
+ goto out;
}
spin_unlock(&pool->lock);
@@ -1550,6 +1524,14 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
/* We completely set up zspage so mark them as movable */
SetZsPageMovable(pool, zspage);
+out:
+#ifdef CONFIG_ZPOOL
+ /* Add/move zspage to beginning of LRU */
+ if (!list_empty(&zspage->lru))
+ list_del(&zspage->lru);
+ list_add(&zspage->lru, &pool->lru);
+#endif
+
spin_unlock(&pool->lock);
return handle;
diff --git a/mm/zswap.c b/mm/zswap.c
index e1e621d0b6a0..30092d9a3b23 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1020,6 +1020,22 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
goto fail;
case ZSWAP_SWAPCACHE_NEW: /* page is locked */
+ /*
+ * Having a local reference to the zswap entry doesn't exclude
+ * swapping from invalidating and recycling the swap slot. Once
+ * the swapcache is secured against concurrent swapping to and
+ * from the slot, recheck that the entry is still current before
+ * writing.
+ */
+ spin_lock(&tree->lock);
+ if (zswap_rb_search(&tree->rbroot, entry->offset) != entry) {
+ spin_unlock(&tree->lock);
+ delete_from_swap_cache(page_folio(page));
+ ret = -ENOMEM;
+ goto fail;
+ }
+ spin_unlock(&tree->lock);
+
/* decompress */
acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
dlen = PAGE_SIZE;
@@ -1158,9 +1174,16 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
goto reject;
}
+ /*
+ * XXX: zswap reclaim does not work with cgroups yet. Without a
+ * cgroup-aware entry LRU, we will push out entries system-wide based on
+ * local cgroup limits.
+ */
objcg = get_obj_cgroup_from_page(page);
- if (objcg && !obj_cgroup_may_zswap(objcg))
- goto shrink;
+ if (objcg && !obj_cgroup_may_zswap(objcg)) {
+ ret = -ENOMEM;
+ goto reject;
+ }
/* reclaim space if needed */
if (zswap_is_full()) {
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 870e4935d6e6..b90781b9ece6 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -109,8 +109,8 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
* NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING
* OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
*/
- if (veth->h_vlan_proto != vlan->vlan_proto ||
- vlan->flags & VLAN_FLAG_REORDER_HDR) {
+ if (vlan->flags & VLAN_FLAG_REORDER_HDR ||
+ veth->h_vlan_proto != vlan->vlan_proto) {
u16 vlan_tci;
vlan_tci = vlan->vlan_id;
vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb->priority);
diff --git a/net/atm/resources.c b/net/atm/resources.c
index 2b2d33eeaf20..995d29e7fb13 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -400,6 +400,7 @@ done:
return error;
}
+#ifdef CONFIG_PROC_FS
void *atm_dev_seq_start(struct seq_file *seq, loff_t *pos)
{
mutex_lock(&atm_dev_mutex);
@@ -415,3 +416,4 @@ void *atm_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
return seq_list_next(v, &atm_devs, pos);
}
+#endif
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 6968e55eb971..28a939d56090 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -101,7 +101,6 @@ static void batadv_dat_purge(struct work_struct *work);
*/
static void batadv_dat_start_timer(struct batadv_priv *bat_priv)
{
- INIT_DELAYED_WORK(&bat_priv->dat.work, batadv_dat_purge);
queue_delayed_work(batadv_event_workqueue, &bat_priv->dat.work,
msecs_to_jiffies(10000));
}
@@ -819,6 +818,7 @@ int batadv_dat_init(struct batadv_priv *bat_priv)
if (!bat_priv->dat.hash)
return -ENOMEM;
+ INIT_DELAYED_WORK(&bat_priv->dat.work, batadv_dat_purge);
batadv_dat_start_timer(bat_priv);
batadv_tvlv_handler_register(bat_priv, batadv_dat_tvlv_ogm_handler_v1,
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 640b951bf40a..1ef952bda97d 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -947,8 +947,8 @@ static void find_cis(struct hci_conn *conn, void *data)
{
struct iso_list_data *d = data;
- /* Ignore broadcast */
- if (!bacmp(&conn->dst, BDADDR_ANY))
+ /* Ignore broadcast or if CIG don't match */
+ if (!bacmp(&conn->dst, BDADDR_ANY) || d->cig != conn->iso_qos.ucast.cig)
return;
d->count++;
@@ -963,12 +963,17 @@ static void cis_cleanup(struct hci_conn *conn)
struct hci_dev *hdev = conn->hdev;
struct iso_list_data d;
+ if (conn->iso_qos.ucast.cig == BT_ISO_QOS_CIG_UNSET)
+ return;
+
memset(&d, 0, sizeof(d));
d.cig = conn->iso_qos.ucast.cig;
/* Check if ISO connection is a CIS and remove CIG if there are
* no other connections using it.
*/
+ hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_BOUND, &d);
+ hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_CONNECT, &d);
hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_CONNECTED, &d);
if (d.count)
return;
@@ -1083,8 +1088,28 @@ static void hci_conn_unlink(struct hci_conn *conn)
if (!conn->parent) {
struct hci_link *link, *t;
- list_for_each_entry_safe(link, t, &conn->link_list, list)
- hci_conn_unlink(link->conn);
+ list_for_each_entry_safe(link, t, &conn->link_list, list) {
+ struct hci_conn *child = link->conn;
+
+ hci_conn_unlink(child);
+
+ /* If hdev is down it means
+ * hci_dev_close_sync/hci_conn_hash_flush is in progress
+ * and links don't need to be cleanup as all connections
+ * would be cleanup.
+ */
+ if (!test_bit(HCI_UP, &hdev->flags))
+ continue;
+
+ /* Due to race, SCO connection might be not established
+ * yet at this point. Delete it now, otherwise it is
+ * possible for it to be stuck and can't be deleted.
+ */
+ if ((child->type == SCO_LINK ||
+ child->type == ESCO_LINK) &&
+ child->handle == HCI_CONN_HANDLE_UNSET)
+ hci_conn_del(child);
+ }
return;
}
@@ -1092,35 +1117,30 @@ static void hci_conn_unlink(struct hci_conn *conn)
if (!conn->link)
return;
- hci_conn_put(conn->parent);
- conn->parent = NULL;
-
list_del_rcu(&conn->link->list);
synchronize_rcu();
+ hci_conn_drop(conn->parent);
+ hci_conn_put(conn->parent);
+ conn->parent = NULL;
+
kfree(conn->link);
conn->link = NULL;
-
- /* Due to race, SCO connection might be not established
- * yet at this point. Delete it now, otherwise it is
- * possible for it to be stuck and can't be deleted.
- */
- if (conn->handle == HCI_CONN_HANDLE_UNSET)
- hci_conn_del(conn);
}
-int hci_conn_del(struct hci_conn *conn)
+void hci_conn_del(struct hci_conn *conn)
{
struct hci_dev *hdev = conn->hdev;
BT_DBG("%s hcon %p handle %d", hdev->name, conn, conn->handle);
+ hci_conn_unlink(conn);
+
cancel_delayed_work_sync(&conn->disc_work);
cancel_delayed_work_sync(&conn->auto_accept_work);
cancel_delayed_work_sync(&conn->idle_work);
if (conn->type == ACL_LINK) {
- hci_conn_unlink(conn);
/* Unacked frames */
hdev->acl_cnt += conn->sent;
} else if (conn->type == LE_LINK) {
@@ -1131,13 +1151,6 @@ int hci_conn_del(struct hci_conn *conn)
else
hdev->acl_cnt += conn->sent;
} else {
- struct hci_conn *acl = conn->parent;
-
- if (acl) {
- hci_conn_unlink(conn);
- hci_conn_drop(acl);
- }
-
/* Unacked ISO frames */
if (conn->type == ISO_LINK) {
if (hdev->iso_pkts)
@@ -1160,8 +1173,6 @@ int hci_conn_del(struct hci_conn *conn)
* rest of hci_conn_del.
*/
hci_conn_cleanup(conn);
-
- return 0;
}
struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type)
@@ -1760,24 +1771,23 @@ static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos)
memset(&data, 0, sizeof(data));
- /* Allocate a CIG if not set */
+ /* Allocate first still reconfigurable CIG if not set */
if (qos->ucast.cig == BT_ISO_QOS_CIG_UNSET) {
- for (data.cig = 0x00; data.cig < 0xff; data.cig++) {
+ for (data.cig = 0x00; data.cig < 0xf0; data.cig++) {
data.count = 0;
- data.cis = 0xff;
- hci_conn_hash_list_state(hdev, cis_list, ISO_LINK,
- BT_BOUND, &data);
+ hci_conn_hash_list_state(hdev, find_cis, ISO_LINK,
+ BT_CONNECT, &data);
if (data.count)
continue;
- hci_conn_hash_list_state(hdev, cis_list, ISO_LINK,
+ hci_conn_hash_list_state(hdev, find_cis, ISO_LINK,
BT_CONNECTED, &data);
if (!data.count)
break;
}
- if (data.cig == 0xff)
+ if (data.cig == 0xf0)
return false;
/* Update CIG */
@@ -2462,22 +2472,21 @@ timer:
/* Drop all connection on the device */
void hci_conn_hash_flush(struct hci_dev *hdev)
{
- struct hci_conn_hash *h = &hdev->conn_hash;
- struct hci_conn *c, *n;
+ struct list_head *head = &hdev->conn_hash.list;
+ struct hci_conn *conn;
BT_DBG("hdev %s", hdev->name);
- list_for_each_entry_safe(c, n, &h->list, list) {
- c->state = BT_CLOSED;
-
- hci_disconn_cfm(c, HCI_ERROR_LOCAL_HOST_TERM);
-
- /* Unlink before deleting otherwise it is possible that
- * hci_conn_del removes the link which may cause the list to
- * contain items already freed.
- */
- hci_conn_unlink(c);
- hci_conn_del(c);
+ /* We should not traverse the list here, because hci_conn_del
+ * can remove extra links, which may cause the list traversal
+ * to hit items that have already been released.
+ */
+ while ((conn = list_first_entry_or_null(head,
+ struct hci_conn,
+ list)) != NULL) {
+ conn->state = BT_CLOSED;
+ hci_disconn_cfm(conn, HCI_ERROR_LOCAL_HOST_TERM);
+ hci_conn_del(conn);
}
}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index a856b1051d35..48917c68358d 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1416,10 +1416,10 @@ int hci_remove_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr)
int hci_remove_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type)
{
- struct smp_ltk *k;
+ struct smp_ltk *k, *tmp;
int removed = 0;
- list_for_each_entry_rcu(k, &hdev->long_term_keys, list) {
+ list_for_each_entry_safe(k, tmp, &hdev->long_term_keys, list) {
if (bacmp(bdaddr, &k->bdaddr) || k->bdaddr_type != bdaddr_type)
continue;
@@ -1435,9 +1435,9 @@ int hci_remove_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type)
void hci_remove_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type)
{
- struct smp_irk *k;
+ struct smp_irk *k, *tmp;
- list_for_each_entry_rcu(k, &hdev->identity_resolving_keys, list) {
+ list_for_each_entry_safe(k, tmp, &hdev->identity_resolving_keys, list) {
if (bacmp(bdaddr, &k->bdaddr) || k->addr_type != addr_type)
continue;
@@ -2686,7 +2686,9 @@ void hci_unregister_dev(struct hci_dev *hdev)
{
BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
+ mutex_lock(&hdev->unregister_lock);
hci_dev_set_flag(hdev, HCI_UNREGISTER);
+ mutex_unlock(&hdev->unregister_lock);
write_lock(&hci_dev_list_lock);
list_del(&hdev->list);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index d00ef6e3fc45..09ba6d8987ee 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3804,48 +3804,56 @@ static u8 hci_cc_le_set_cig_params(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
struct hci_rp_le_set_cig_params *rp = data;
+ struct hci_cp_le_set_cig_params *cp;
struct hci_conn *conn;
- int i = 0;
+ u8 status = rp->status;
+ int i;
bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
+ cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_CIG_PARAMS);
+ if (!cp || rp->num_handles != cp->num_cis || rp->cig_id != cp->cig_id) {
+ bt_dev_err(hdev, "unexpected Set CIG Parameters response data");
+ status = HCI_ERROR_UNSPECIFIED;
+ }
+
hci_dev_lock(hdev);
- if (rp->status) {
+ if (status) {
while ((conn = hci_conn_hash_lookup_cig(hdev, rp->cig_id))) {
conn->state = BT_CLOSED;
- hci_connect_cfm(conn, rp->status);
+ hci_connect_cfm(conn, status);
hci_conn_del(conn);
}
goto unlock;
}
- rcu_read_lock();
+ /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 2553
+ *
+ * If the Status return parameter is zero, then the Controller shall
+ * set the Connection_Handle arrayed return parameter to the connection
+ * handle(s) corresponding to the CIS configurations specified in
+ * the CIS_IDs command parameter, in the same order.
+ */
+ for (i = 0; i < rp->num_handles; ++i) {
+ conn = hci_conn_hash_lookup_cis(hdev, NULL, 0, rp->cig_id,
+ cp->cis[i].cis_id);
+ if (!conn || !bacmp(&conn->dst, BDADDR_ANY))
+ continue;
- list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
- if (conn->type != ISO_LINK ||
- conn->iso_qos.ucast.cig != rp->cig_id ||
- conn->state == BT_CONNECTED)
+ if (conn->state != BT_BOUND && conn->state != BT_CONNECT)
continue;
- conn->handle = __le16_to_cpu(rp->handle[i++]);
+ conn->handle = __le16_to_cpu(rp->handle[i]);
bt_dev_dbg(hdev, "%p handle 0x%4.4x parent %p", conn,
conn->handle, conn->parent);
/* Create CIS if LE is already connected */
- if (conn->parent && conn->parent->state == BT_CONNECTED) {
- rcu_read_unlock();
+ if (conn->parent && conn->parent->state == BT_CONNECTED)
hci_le_create_cis(conn);
- rcu_read_lock();
- }
-
- if (i == rp->num_handles)
- break;
}
- rcu_read_unlock();
-
unlock:
hci_dev_unlock(hdev);
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 647a8ce54062..804cde43b4e0 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -629,6 +629,7 @@ void hci_cmd_sync_init(struct hci_dev *hdev)
INIT_WORK(&hdev->cmd_sync_work, hci_cmd_sync_work);
INIT_LIST_HEAD(&hdev->cmd_sync_work_list);
mutex_init(&hdev->cmd_sync_work_lock);
+ mutex_init(&hdev->unregister_lock);
INIT_WORK(&hdev->cmd_sync_cancel_work, hci_cmd_sync_cancel_work);
INIT_WORK(&hdev->reenable_adv_work, reenable_adv);
@@ -692,14 +693,19 @@ int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
void *data, hci_cmd_sync_work_destroy_t destroy)
{
struct hci_cmd_sync_work_entry *entry;
+ int err = 0;
- if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
- return -ENODEV;
+ mutex_lock(&hdev->unregister_lock);
+ if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
+ err = -ENODEV;
+ goto unlock;
+ }
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry)
- return -ENOMEM;
-
+ if (!entry) {
+ err = -ENOMEM;
+ goto unlock;
+ }
entry->func = func;
entry->data = data;
entry->destroy = destroy;
@@ -710,7 +716,9 @@ int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
queue_work(hdev->req_workqueue, &hdev->cmd_sync_work);
- return 0;
+unlock:
+ mutex_unlock(&hdev->unregister_lock);
+ return err;
}
EXPORT_SYMBOL(hci_cmd_sync_submit);
@@ -4543,6 +4551,9 @@ static int hci_init_sync(struct hci_dev *hdev)
!hci_dev_test_flag(hdev, HCI_CONFIG))
return 0;
+ if (hci_dev_test_and_set_flag(hdev, HCI_DEBUGFS_CREATED))
+ return 0;
+
hci_debugfs_create_common(hdev);
if (lmp_bredr_capable(hdev))
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 376b523c7b26..c5e8798e297c 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -4306,6 +4306,10 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn,
result = __le16_to_cpu(rsp->result);
status = __le16_to_cpu(rsp->status);
+ if (result == L2CAP_CR_SUCCESS && (dcid < L2CAP_CID_DYN_START ||
+ dcid > L2CAP_CID_DYN_END))
+ return -EPROTO;
+
BT_DBG("dcid 0x%4.4x scid 0x%4.4x result 0x%2.2x status 0x%2.2x",
dcid, scid, result, status);
@@ -4337,6 +4341,11 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn,
switch (result) {
case L2CAP_CR_SUCCESS:
+ if (__l2cap_get_chan_by_dcid(conn, dcid)) {
+ err = -EBADSLT;
+ break;
+ }
+
l2cap_state_change(chan, BT_CONFIG);
chan->ident = 0;
chan->dcid = dcid;
@@ -4663,7 +4672,9 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn,
chan->ops->set_shutdown(chan);
+ l2cap_chan_unlock(chan);
mutex_lock(&conn->chan_lock);
+ l2cap_chan_lock(chan);
l2cap_chan_del(chan, ECONNRESET);
mutex_unlock(&conn->chan_lock);
@@ -4702,7 +4713,9 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn,
return 0;
}
+ l2cap_chan_unlock(chan);
mutex_lock(&conn->chan_lock);
+ l2cap_chan_lock(chan);
l2cap_chan_del(chan, 0);
mutex_unlock(&conn->chan_lock);
diff --git a/net/bridge/br_private_tunnel.h b/net/bridge/br_private_tunnel.h
index 2b053289f016..efb096025151 100644
--- a/net/bridge/br_private_tunnel.h
+++ b/net/bridge/br_private_tunnel.h
@@ -27,6 +27,10 @@ int br_process_vlan_tunnel_info(const struct net_bridge *br,
int br_get_vlan_tunnel_info_size(struct net_bridge_vlan_group *vg);
int br_fill_vlan_tunnel_info(struct sk_buff *skb,
struct net_bridge_vlan_group *vg);
+bool vlan_tunid_inrange(const struct net_bridge_vlan *v_curr,
+ const struct net_bridge_vlan *v_last);
+int br_vlan_tunnel_info(const struct net_bridge_port *p, int cmd,
+ u16 vid, u32 tun_id, bool *changed);
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
/* br_vlan_tunnel.c */
@@ -43,10 +47,6 @@ void br_handle_ingress_vlan_tunnel(struct sk_buff *skb,
struct net_bridge_vlan_group *vg);
int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
struct net_bridge_vlan *vlan);
-bool vlan_tunid_inrange(const struct net_bridge_vlan *v_curr,
- const struct net_bridge_vlan *v_last);
-int br_vlan_tunnel_info(const struct net_bridge_port *p, int cmd,
- u16 vid, u32 tun_id, bool *changed);
#else
static inline int vlan_tunnel_init(struct net_bridge_vlan_group *vg)
{
diff --git a/net/can/isotp.c b/net/can/isotp.c
index a750259cb79c..84f9aba02901 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -1139,7 +1139,7 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
struct isotp_sock *so = isotp_sk(sk);
int ret = 0;
- if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK))
+ if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK | MSG_CMSG_COMPAT))
return -EINVAL;
if (!so->bound)
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index 821d4ff303b3..ecff1c947d68 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -126,7 +126,7 @@ static void j1939_can_recv(struct sk_buff *iskb, void *data)
#define J1939_CAN_ID CAN_EFF_FLAG
#define J1939_CAN_MASK (CAN_EFF_FLAG | CAN_RTR_FLAG)
-static DEFINE_SPINLOCK(j1939_netdev_lock);
+static DEFINE_MUTEX(j1939_netdev_lock);
static struct j1939_priv *j1939_priv_create(struct net_device *ndev)
{
@@ -220,7 +220,7 @@ static void __j1939_rx_release(struct kref *kref)
j1939_can_rx_unregister(priv);
j1939_ecu_unmap_all(priv);
j1939_priv_set(priv->ndev, NULL);
- spin_unlock(&j1939_netdev_lock);
+ mutex_unlock(&j1939_netdev_lock);
}
/* get pointer to priv without increasing ref counter */
@@ -248,9 +248,9 @@ static struct j1939_priv *j1939_priv_get_by_ndev(struct net_device *ndev)
{
struct j1939_priv *priv;
- spin_lock(&j1939_netdev_lock);
+ mutex_lock(&j1939_netdev_lock);
priv = j1939_priv_get_by_ndev_locked(ndev);
- spin_unlock(&j1939_netdev_lock);
+ mutex_unlock(&j1939_netdev_lock);
return priv;
}
@@ -260,14 +260,14 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
struct j1939_priv *priv, *priv_new;
int ret;
- spin_lock(&j1939_netdev_lock);
+ mutex_lock(&j1939_netdev_lock);
priv = j1939_priv_get_by_ndev_locked(ndev);
if (priv) {
kref_get(&priv->rx_kref);
- spin_unlock(&j1939_netdev_lock);
+ mutex_unlock(&j1939_netdev_lock);
return priv;
}
- spin_unlock(&j1939_netdev_lock);
+ mutex_unlock(&j1939_netdev_lock);
priv = j1939_priv_create(ndev);
if (!priv)
@@ -277,29 +277,31 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
spin_lock_init(&priv->j1939_socks_lock);
INIT_LIST_HEAD(&priv->j1939_socks);
- spin_lock(&j1939_netdev_lock);
+ mutex_lock(&j1939_netdev_lock);
priv_new = j1939_priv_get_by_ndev_locked(ndev);
if (priv_new) {
/* Someone was faster than us, use their priv and roll
* back our's.
*/
kref_get(&priv_new->rx_kref);
- spin_unlock(&j1939_netdev_lock);
+ mutex_unlock(&j1939_netdev_lock);
dev_put(ndev);
kfree(priv);
return priv_new;
}
j1939_priv_set(ndev, priv);
- spin_unlock(&j1939_netdev_lock);
ret = j1939_can_rx_register(priv);
if (ret < 0)
goto out_priv_put;
+ mutex_unlock(&j1939_netdev_lock);
return priv;
out_priv_put:
j1939_priv_set(ndev, NULL);
+ mutex_unlock(&j1939_netdev_lock);
+
dev_put(ndev);
kfree(priv);
@@ -308,7 +310,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
void j1939_netdev_stop(struct j1939_priv *priv)
{
- kref_put_lock(&priv->rx_kref, __j1939_rx_release, &j1939_netdev_lock);
+ kref_put_mutex(&priv->rx_kref, __j1939_rx_release, &j1939_netdev_lock);
j1939_priv_put(priv);
}
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 7e90f9e61d9b..35970c25496a 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -798,7 +798,7 @@ static int j1939_sk_recvmsg(struct socket *sock, struct msghdr *msg,
struct j1939_sk_buff_cb *skcb;
int ret = 0;
- if (flags & ~(MSG_DONTWAIT | MSG_ERRQUEUE))
+ if (flags & ~(MSG_DONTWAIT | MSG_ERRQUEUE | MSG_CMSG_COMPAT))
return -EINVAL;
if (flags & MSG_ERRQUEUE)
@@ -1088,6 +1088,11 @@ void j1939_sk_errqueue(struct j1939_session *session,
void j1939_sk_send_loop_abort(struct sock *sk, int err)
{
+ struct j1939_sock *jsk = j1939_sk(sk);
+
+ if (jsk->state & J1939_SOCK_ERRQUEUE)
+ return;
+
sk->sk_err = err;
sk_error_report(sk);
diff --git a/net/core/dev.c b/net/core/dev.c
index b3c13e041935..c29f3e1db3ca 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4471,8 +4471,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
u32 next_cpu;
u32 ident;
- /* First check into global flow table if there is a match */
- ident = sock_flow_table->ents[hash & sock_flow_table->mask];
+ /* First check into global flow table if there is a match.
+ * This READ_ONCE() pairs with WRITE_ONCE() from rps_record_sock_flow().
+ */
+ ident = READ_ONCE(sock_flow_table->ents[hash & sock_flow_table->mask]);
if ((ident ^ hash) & ~rps_cpu_mask)
goto try_rps;
@@ -10541,7 +10543,7 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
return NULL;
netdev_init_one_queue(dev, queue, NULL);
RCU_INIT_POINTER(queue->qdisc, &noop_qdisc);
- queue->qdisc_sleeping = &noop_qdisc;
+ RCU_INIT_POINTER(queue->qdisc_sleeping, &noop_qdisc);
rcu_assign_pointer(dev->ingress_queue, queue);
#endif
return queue;
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index e212e9d7edcb..a3e12a61d456 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -134,6 +134,29 @@ EXPORT_SYMBOL(page_pool_ethtool_stats_get);
#define recycle_stat_add(pool, __stat, val)
#endif
+static bool page_pool_producer_lock(struct page_pool *pool)
+ __acquires(&pool->ring.producer_lock)
+{
+ bool in_softirq = in_softirq();
+
+ if (in_softirq)
+ spin_lock(&pool->ring.producer_lock);
+ else
+ spin_lock_bh(&pool->ring.producer_lock);
+
+ return in_softirq;
+}
+
+static void page_pool_producer_unlock(struct page_pool *pool,
+ bool in_softirq)
+ __releases(&pool->ring.producer_lock)
+{
+ if (in_softirq)
+ spin_unlock(&pool->ring.producer_lock);
+ else
+ spin_unlock_bh(&pool->ring.producer_lock);
+}
+
static int page_pool_init(struct page_pool *pool,
const struct page_pool_params *params)
{
@@ -617,6 +640,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
int count)
{
int i, bulk_len = 0;
+ bool in_softirq;
for (i = 0; i < count; i++) {
struct page *page = virt_to_head_page(data[i]);
@@ -635,7 +659,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
return;
/* Bulk producer into ptr_ring page_pool cache */
- page_pool_ring_lock(pool);
+ in_softirq = page_pool_producer_lock(pool);
for (i = 0; i < bulk_len; i++) {
if (__ptr_ring_produce(&pool->ring, data[i])) {
/* ring full */
@@ -644,7 +668,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
}
}
recycle_stat_add(pool, ring, i);
- page_pool_ring_unlock(pool);
+ page_pool_producer_unlock(pool, in_softirq);
/* Hopefully all pages was return into ptr_ring */
if (likely(i == bulk_len))
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 653901a1bf75..41de3a2f29e1 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2385,6 +2385,37 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[],
if (tb[IFLA_BROADCAST] &&
nla_len(tb[IFLA_BROADCAST]) < dev->addr_len)
return -EINVAL;
+
+ if (tb[IFLA_GSO_MAX_SIZE] &&
+ nla_get_u32(tb[IFLA_GSO_MAX_SIZE]) > dev->tso_max_size) {
+ NL_SET_ERR_MSG(extack, "too big gso_max_size");
+ return -EINVAL;
+ }
+
+ if (tb[IFLA_GSO_MAX_SEGS] &&
+ (nla_get_u32(tb[IFLA_GSO_MAX_SEGS]) > GSO_MAX_SEGS ||
+ nla_get_u32(tb[IFLA_GSO_MAX_SEGS]) > dev->tso_max_segs)) {
+ NL_SET_ERR_MSG(extack, "too big gso_max_segs");
+ return -EINVAL;
+ }
+
+ if (tb[IFLA_GRO_MAX_SIZE] &&
+ nla_get_u32(tb[IFLA_GRO_MAX_SIZE]) > GRO_MAX_SIZE) {
+ NL_SET_ERR_MSG(extack, "too big gro_max_size");
+ return -EINVAL;
+ }
+
+ if (tb[IFLA_GSO_IPV4_MAX_SIZE] &&
+ nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE]) > dev->tso_max_size) {
+ NL_SET_ERR_MSG(extack, "too big gso_ipv4_max_size");
+ return -EINVAL;
+ }
+
+ if (tb[IFLA_GRO_IPV4_MAX_SIZE] &&
+ nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE]) > GRO_MAX_SIZE) {
+ NL_SET_ERR_MSG(extack, "too big gro_ipv4_max_size");
+ return -EINVAL;
+ }
}
if (tb[IFLA_AF_SPEC]) {
@@ -2858,11 +2889,6 @@ static int do_setlink(const struct sk_buff *skb,
if (tb[IFLA_GSO_MAX_SIZE]) {
u32 max_size = nla_get_u32(tb[IFLA_GSO_MAX_SIZE]);
- if (max_size > dev->tso_max_size) {
- err = -EINVAL;
- goto errout;
- }
-
if (dev->gso_max_size ^ max_size) {
netif_set_gso_max_size(dev, max_size);
status |= DO_SETLINK_MODIFIED;
@@ -2872,11 +2898,6 @@ static int do_setlink(const struct sk_buff *skb,
if (tb[IFLA_GSO_MAX_SEGS]) {
u32 max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]);
- if (max_segs > GSO_MAX_SEGS || max_segs > dev->tso_max_segs) {
- err = -EINVAL;
- goto errout;
- }
-
if (dev->gso_max_segs ^ max_segs) {
netif_set_gso_max_segs(dev, max_segs);
status |= DO_SETLINK_MODIFIED;
@@ -2895,11 +2916,6 @@ static int do_setlink(const struct sk_buff *skb,
if (tb[IFLA_GSO_IPV4_MAX_SIZE]) {
u32 max_size = nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE]);
- if (max_size > dev->tso_max_size) {
- err = -EINVAL;
- goto errout;
- }
-
if (dev->gso_ipv4_max_size ^ max_size) {
netif_set_gso_ipv4_max_size(dev, max_size);
status |= DO_SETLINK_MODIFIED;
@@ -3285,6 +3301,7 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
struct net_device *dev;
unsigned int num_tx_queues = 1;
unsigned int num_rx_queues = 1;
+ int err;
if (tb[IFLA_NUM_TX_QUEUES])
num_tx_queues = nla_get_u32(tb[IFLA_NUM_TX_QUEUES]);
@@ -3320,13 +3337,18 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
if (!dev)
return ERR_PTR(-ENOMEM);
+ err = validate_linkmsg(dev, tb, extack);
+ if (err < 0) {
+ free_netdev(dev);
+ return ERR_PTR(err);
+ }
+
dev_net_set(dev, net);
dev->rtnl_link_ops = ops;
dev->rtnl_link_state = RTNL_LINK_INITIALIZING;
if (tb[IFLA_MTU]) {
u32 mtu = nla_get_u32(tb[IFLA_MTU]);
- int err;
err = dev_validate_mtu(dev, mtu, extack);
if (err) {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 515ec5cdc79c..cea28d30abb5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5224,8 +5224,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
} else {
skb = skb_clone(orig_skb, GFP_ATOMIC);
- if (skb_orphan_frags_rx(skb, GFP_ATOMIC))
+ if (skb_orphan_frags_rx(skb, GFP_ATOMIC)) {
+ kfree_skb(skb);
return;
+ }
}
if (!skb)
return;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index f81883759d38..a29508e1ff35 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -481,8 +481,6 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
msg_rx = sk_psock_peek_msg(psock);
}
out:
- if (psock->work_state.skb && copied > 0)
- schedule_work(&psock->work);
return copied;
}
EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
@@ -624,42 +622,33 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
static void sk_psock_skb_state(struct sk_psock *psock,
struct sk_psock_work_state *state,
- struct sk_buff *skb,
int len, int off)
{
spin_lock_bh(&psock->ingress_lock);
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
- state->skb = skb;
state->len = len;
state->off = off;
- } else {
- sock_drop(psock->sk, skb);
}
spin_unlock_bh(&psock->ingress_lock);
}
static void sk_psock_backlog(struct work_struct *work)
{
- struct sk_psock *psock = container_of(work, struct sk_psock, work);
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct sk_psock *psock = container_of(dwork, struct sk_psock, work);
struct sk_psock_work_state *state = &psock->work_state;
struct sk_buff *skb = NULL;
+ u32 len = 0, off = 0;
bool ingress;
- u32 len, off;
int ret;
mutex_lock(&psock->work_mutex);
- if (unlikely(state->skb)) {
- spin_lock_bh(&psock->ingress_lock);
- skb = state->skb;
+ if (unlikely(state->len)) {
len = state->len;
off = state->off;
- state->skb = NULL;
- spin_unlock_bh(&psock->ingress_lock);
}
- if (skb)
- goto start;
- while ((skb = skb_dequeue(&psock->ingress_skb))) {
+ while ((skb = skb_peek(&psock->ingress_skb))) {
len = skb->len;
off = 0;
if (skb_bpf_strparser(skb)) {
@@ -668,7 +657,6 @@ static void sk_psock_backlog(struct work_struct *work)
off = stm->offset;
len = stm->full_len;
}
-start:
ingress = skb_bpf_ingress(skb);
skb_bpf_redirect_clear(skb);
do {
@@ -678,22 +666,28 @@ start:
len, ingress);
if (ret <= 0) {
if (ret == -EAGAIN) {
- sk_psock_skb_state(psock, state, skb,
- len, off);
+ sk_psock_skb_state(psock, state, len, off);
+
+ /* Delay slightly to prioritize any
+ * other work that might be here.
+ */
+ if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
+ schedule_delayed_work(&psock->work, 1);
goto end;
}
/* Hard errors break pipe and stop xmit. */
sk_psock_report_error(psock, ret ? -ret : EPIPE);
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
- sock_drop(psock->sk, skb);
goto end;
}
off += ret;
len -= ret;
} while (len);
- if (!ingress)
+ skb = skb_dequeue(&psock->ingress_skb);
+ if (!ingress) {
kfree_skb(skb);
+ }
}
end:
mutex_unlock(&psock->work_mutex);
@@ -734,7 +728,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
INIT_LIST_HEAD(&psock->link);
spin_lock_init(&psock->link_lock);
- INIT_WORK(&psock->work, sk_psock_backlog);
+ INIT_DELAYED_WORK(&psock->work, sk_psock_backlog);
mutex_init(&psock->work_mutex);
INIT_LIST_HEAD(&psock->ingress_msg);
spin_lock_init(&psock->ingress_lock);
@@ -786,11 +780,6 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock)
skb_bpf_redirect_clear(skb);
sock_drop(psock->sk, skb);
}
- kfree_skb(psock->work_state.skb);
- /* We null the skb here to ensure that calls to sk_psock_backlog
- * do not pick up the free'd skb.
- */
- psock->work_state.skb = NULL;
__sk_psock_purge_ingress_msg(psock);
}
@@ -809,7 +798,6 @@ void sk_psock_stop(struct sk_psock *psock)
spin_lock_bh(&psock->ingress_lock);
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
sk_psock_cork_free(psock);
- __sk_psock_zap_ingress(psock);
spin_unlock_bh(&psock->ingress_lock);
}
@@ -823,7 +811,8 @@ static void sk_psock_destroy(struct work_struct *work)
sk_psock_done_strp(psock);
- cancel_work_sync(&psock->work);
+ cancel_delayed_work_sync(&psock->work);
+ __sk_psock_zap_ingress(psock);
mutex_destroy(&psock->work_mutex);
psock_progs_drop(&psock->progs);
@@ -938,7 +927,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb)
}
skb_queue_tail(&psock_other->ingress_skb, skb);
- schedule_work(&psock_other->work);
+ schedule_delayed_work(&psock_other->work, 0);
spin_unlock_bh(&psock_other->ingress_lock);
return 0;
}
@@ -990,10 +979,8 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
err = -EIO;
sk_other = psock->sk;
if (sock_flag(sk_other, SOCK_DEAD) ||
- !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
- skb_bpf_redirect_clear(skb);
+ !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
goto out_free;
- }
skb_bpf_set_ingress(skb);
@@ -1018,22 +1005,23 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
spin_lock_bh(&psock->ingress_lock);
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
skb_queue_tail(&psock->ingress_skb, skb);
- schedule_work(&psock->work);
+ schedule_delayed_work(&psock->work, 0);
err = 0;
}
spin_unlock_bh(&psock->ingress_lock);
- if (err < 0) {
- skb_bpf_redirect_clear(skb);
+ if (err < 0)
goto out_free;
- }
}
break;
case __SK_REDIRECT:
+ tcp_eat_skb(psock->sk, skb);
err = sk_psock_skb_redirect(psock, skb);
break;
case __SK_DROP:
default:
out_free:
+ skb_bpf_redirect_clear(skb);
+ tcp_eat_skb(psock->sk, skb);
sock_drop(psock->sk, skb);
}
@@ -1049,7 +1037,7 @@ static void sk_psock_write_space(struct sock *sk)
psock = sk_psock(sk);
if (likely(psock)) {
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
- schedule_work(&psock->work);
+ schedule_delayed_work(&psock->work, 0);
write_space = psock->saved_write_space;
}
rcu_read_unlock();
@@ -1078,8 +1066,7 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
skb_dst_drop(skb);
skb_bpf_redirect_clear(skb);
ret = bpf_prog_run_pin_on_cpu(prog, skb);
- if (ret == SK_PASS)
- skb_bpf_set_strparser(skb);
+ skb_bpf_set_strparser(skb);
ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
skb->sk = NULL;
}
@@ -1183,12 +1170,11 @@ static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
int ret = __SK_DROP;
int len = skb->len;
- skb_get(skb);
-
rcu_read_lock();
psock = sk_psock(sk);
if (unlikely(!psock)) {
len = 0;
+ tcp_eat_skb(sk, skb);
sock_drop(sk, skb);
goto out;
}
@@ -1212,12 +1198,22 @@ out:
static void sk_psock_verdict_data_ready(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
+ int copied;
trace_sk_data_ready(sk);
if (unlikely(!sock || !sock->ops || !sock->ops->read_skb))
return;
- sock->ops->read_skb(sk, sk_psock_verdict_recv);
+ copied = sock->ops->read_skb(sk, sk_psock_verdict_recv);
+ if (copied >= 0) {
+ struct sk_psock *psock;
+
+ rcu_read_lock();
+ psock = sk_psock(sk);
+ if (psock)
+ psock->saved_data_ready(sk);
+ rcu_read_unlock();
+ }
}
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
diff --git a/net/core/sock.c b/net/core/sock.c
index 5440e67bcfe3..6e5662ca00fe 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1362,12 +1362,6 @@ set_sndbuf:
__sock_set_mark(sk, val);
break;
case SO_RCVMARK:
- if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
- !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
- ret = -EPERM;
- break;
- }
-
sock_valbool_flag(sk, SOCK_RCVMARK, valbool);
break;
@@ -2381,7 +2375,6 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
{
u32 max_segs = 1;
- sk_dst_set(sk, dst);
sk->sk_route_caps = dst->dev->features;
if (sk_is_tcp(sk))
sk->sk_route_caps |= NETIF_F_GSO;
@@ -2400,6 +2393,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
}
}
sk->sk_gso_max_segs = max_segs;
+ sk_dst_set(sk, dst);
}
EXPORT_SYMBOL_GPL(sk_setup_caps);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 7c189c2e2fbf..00afb66cd095 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1644,9 +1644,10 @@ void sock_map_close(struct sock *sk, long timeout)
rcu_read_unlock();
sk_psock_stop(psock);
release_sock(sk);
- cancel_work_sync(&psock->work);
+ cancel_delayed_work_sync(&psock->work);
sk_psock_put(sk, psock);
}
+
/* Make sure we do not recurse. This is a bug.
* Leak the socket instead of crashing on a stack overflow.
*/
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index a06b5641287a..b0ebf853cb07 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -191,6 +191,9 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
struct dccp_sock *dp = dccp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ pr_warn_once("DCCP is deprecated and scheduled to be removed in 2025, "
+ "please contact the netdev mailing list\n");
+
icsk->icsk_rto = DCCP_TIMEOUT_INIT;
icsk->icsk_syn_retries = sysctl_dccp_request_retries;
sk->sk_state = DCCP_CLOSED;
diff --git a/net/devlink/core.c b/net/devlink/core.c
index 777b091ef74d..c23ebabadc52 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -204,11 +204,6 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
if (ret < 0)
goto err_xa_alloc;
- devlink->netdevice_nb.notifier_call = devlink_port_netdevice_event;
- ret = register_netdevice_notifier(&devlink->netdevice_nb);
- if (ret)
- goto err_register_netdevice_notifier;
-
devlink->dev = dev;
devlink->ops = ops;
xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC);
@@ -233,8 +228,6 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
return devlink;
-err_register_netdevice_notifier:
- xa_erase(&devlinks, devlink->index);
err_xa_alloc:
kfree(devlink);
return NULL;
@@ -266,8 +259,6 @@ void devlink_free(struct devlink *devlink)
xa_destroy(&devlink->params);
xa_destroy(&devlink->ports);
- WARN_ON_ONCE(unregister_netdevice_notifier(&devlink->netdevice_nb));
-
xa_erase(&devlinks, devlink->index);
devlink_put(devlink);
@@ -303,6 +294,10 @@ static struct pernet_operations devlink_pernet_ops __net_initdata = {
.pre_exit = devlink_pernet_pre_exit,
};
+static struct notifier_block devlink_port_netdevice_nb = {
+ .notifier_call = devlink_port_netdevice_event,
+};
+
static int __init devlink_init(void)
{
int err;
@@ -311,6 +306,9 @@ static int __init devlink_init(void)
if (err)
goto out;
err = register_pernet_subsys(&devlink_pernet_ops);
+ if (err)
+ goto out;
+ err = register_netdevice_notifier(&devlink_port_netdevice_nb);
out:
WARN_ON(err);
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index e133f423294a..62921b2eb0d3 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -50,7 +50,6 @@ struct devlink {
u8 reload_failed:1;
refcount_t refcount;
struct rcu_work rwork;
- struct notifier_block netdevice_nb;
char priv[] __aligned(NETDEV_ALIGN);
};
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index dffca2f9bfa7..cd0254968076 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -7073,10 +7073,9 @@ int devlink_port_netdevice_event(struct notifier_block *nb,
struct devlink_port *devlink_port = netdev->devlink_port;
struct devlink *devlink;
- devlink = container_of(nb, struct devlink, netdevice_nb);
-
- if (!devlink_port || devlink_port->devlink != devlink)
+ if (!devlink_port)
return NOTIFY_OK;
+ devlink = devlink_port->devlink;
switch (event) {
case NETDEV_POST_INIT:
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index ab1afe67fd18..1afed89e03c0 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -403,6 +403,24 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
return 0;
}
+static struct dsa_port *
+dsa_switch_preferred_default_local_cpu_port(struct dsa_switch *ds)
+{
+ struct dsa_port *cpu_dp;
+
+ if (!ds->ops->preferred_default_local_cpu_port)
+ return NULL;
+
+ cpu_dp = ds->ops->preferred_default_local_cpu_port(ds);
+ if (!cpu_dp)
+ return NULL;
+
+ if (WARN_ON(!dsa_port_is_cpu(cpu_dp) || cpu_dp->ds != ds))
+ return NULL;
+
+ return cpu_dp;
+}
+
/* Perform initial assignment of CPU ports to user ports and DSA links in the
* fabric, giving preference to CPU ports local to each switch. Default to
* using the first CPU port in the switch tree if the port does not have a CPU
@@ -410,12 +428,16 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
*/
static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst)
{
- struct dsa_port *cpu_dp, *dp;
+ struct dsa_port *preferred_cpu_dp, *cpu_dp, *dp;
list_for_each_entry(cpu_dp, &dst->ports, list) {
if (!dsa_port_is_cpu(cpu_dp))
continue;
+ preferred_cpu_dp = dsa_switch_preferred_default_local_cpu_port(cpu_dp->ds);
+ if (preferred_cpu_dp && preferred_cpu_dp != cpu_dp)
+ continue;
+
/* Prefer a local CPU port */
dsa_switch_for_each_port(dp, cpu_dp->ds) {
/* Prefer the first local CPU port found */
diff --git a/net/handshake/handshake-test.c b/net/handshake/handshake-test.c
index e6adc5dec11a..6d37bab35c8f 100644
--- a/net/handshake/handshake-test.c
+++ b/net/handshake/handshake-test.c
@@ -102,7 +102,7 @@ struct handshake_req_alloc_test_param handshake_req_alloc_params[] = {
{
.desc = "handshake_req_alloc excessive privsize",
.proto = &handshake_req_alloc_proto_6,
- .gfp = GFP_KERNEL,
+ .gfp = GFP_KERNEL | __GFP_NOWARN,
.expect_success = false,
},
{
@@ -209,6 +209,7 @@ static void handshake_req_submit_test4(struct kunit *test)
{
struct handshake_req *req, *result;
struct socket *sock;
+ struct file *filp;
int err;
/* Arrange */
@@ -218,9 +219,10 @@ static void handshake_req_submit_test4(struct kunit *test)
err = __sock_create(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
&sock, 1);
KUNIT_ASSERT_EQ(test, err, 0);
- sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
+ filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
KUNIT_ASSERT_NOT_NULL(test, sock->sk);
+ sock->file = filp;
err = handshake_req_submit(sock, req, GFP_KERNEL);
KUNIT_ASSERT_EQ(test, err, 0);
@@ -241,6 +243,7 @@ static void handshake_req_submit_test5(struct kunit *test)
struct handshake_req *req;
struct handshake_net *hn;
struct socket *sock;
+ struct file *filp;
struct net *net;
int saved, err;
@@ -251,9 +254,10 @@ static void handshake_req_submit_test5(struct kunit *test)
err = __sock_create(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
&sock, 1);
KUNIT_ASSERT_EQ(test, err, 0);
- sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
+ filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
KUNIT_ASSERT_NOT_NULL(test, sock->sk);
+ sock->file = filp;
net = sock_net(sock->sk);
hn = handshake_pernet(net);
@@ -276,6 +280,7 @@ static void handshake_req_submit_test6(struct kunit *test)
{
struct handshake_req *req1, *req2;
struct socket *sock;
+ struct file *filp;
int err;
/* Arrange */
@@ -287,9 +292,10 @@ static void handshake_req_submit_test6(struct kunit *test)
err = __sock_create(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
&sock, 1);
KUNIT_ASSERT_EQ(test, err, 0);
- sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
+ filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
KUNIT_ASSERT_NOT_NULL(test, sock->sk);
+ sock->file = filp;
/* Act */
err = handshake_req_submit(sock, req1, GFP_KERNEL);
@@ -307,6 +313,7 @@ static void handshake_req_cancel_test1(struct kunit *test)
{
struct handshake_req *req;
struct socket *sock;
+ struct file *filp;
bool result;
int err;
@@ -318,8 +325,9 @@ static void handshake_req_cancel_test1(struct kunit *test)
&sock, 1);
KUNIT_ASSERT_EQ(test, err, 0);
- sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
+ filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
+ sock->file = filp;
err = handshake_req_submit(sock, req, GFP_KERNEL);
KUNIT_ASSERT_EQ(test, err, 0);
@@ -340,6 +348,7 @@ static void handshake_req_cancel_test2(struct kunit *test)
struct handshake_req *req, *next;
struct handshake_net *hn;
struct socket *sock;
+ struct file *filp;
struct net *net;
bool result;
int err;
@@ -352,8 +361,9 @@ static void handshake_req_cancel_test2(struct kunit *test)
&sock, 1);
KUNIT_ASSERT_EQ(test, err, 0);
- sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
+ filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
+ sock->file = filp;
err = handshake_req_submit(sock, req, GFP_KERNEL);
KUNIT_ASSERT_EQ(test, err, 0);
@@ -380,6 +390,7 @@ static void handshake_req_cancel_test3(struct kunit *test)
struct handshake_req *req, *next;
struct handshake_net *hn;
struct socket *sock;
+ struct file *filp;
struct net *net;
bool result;
int err;
@@ -392,8 +403,9 @@ static void handshake_req_cancel_test3(struct kunit *test)
&sock, 1);
KUNIT_ASSERT_EQ(test, err, 0);
- sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
+ filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
+ sock->file = filp;
err = handshake_req_submit(sock, req, GFP_KERNEL);
KUNIT_ASSERT_EQ(test, err, 0);
@@ -436,6 +448,7 @@ static void handshake_req_destroy_test1(struct kunit *test)
{
struct handshake_req *req;
struct socket *sock;
+ struct file *filp;
int err;
/* Arrange */
@@ -448,8 +461,9 @@ static void handshake_req_destroy_test1(struct kunit *test)
&sock, 1);
KUNIT_ASSERT_EQ(test, err, 0);
- sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file);
+ filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
+ sock->file = filp;
err = handshake_req_submit(sock, req, GFP_KERNEL);
KUNIT_ASSERT_EQ(test, err, 0);
diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c
index 35c9c445e0b8..1086653e1fad 100644
--- a/net/handshake/netlink.c
+++ b/net/handshake/netlink.c
@@ -48,7 +48,7 @@ int handshake_genl_notify(struct net *net, const struct handshake_proto *proto,
proto->hp_handler_class))
return -ESRCH;
- msg = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ msg = genlmsg_new(GENLMSG_DEFAULT_SIZE, flags);
if (!msg)
return -ENOMEM;
@@ -99,9 +99,6 @@ static int handshake_dup(struct socket *sock)
struct file *file;
int newfd;
- if (!sock->file)
- return -EBADF;
-
file = get_file(sock->file);
newfd = get_unused_fd_flags(O_CLOEXEC);
if (newfd < 0) {
@@ -142,15 +139,16 @@ int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info)
goto out_complete;
}
err = req->hr_proto->hp_accept(req, info, fd);
- if (err)
+ if (err) {
+ fput(sock->file);
goto out_complete;
+ }
trace_handshake_cmd_accept(net, req, req->hr_sk, fd);
return 0;
out_complete:
handshake_complete(req, -EIO, NULL);
- fput(sock->file);
out_status:
trace_handshake_cmd_accept_err(net, req, NULL, err);
return err;
@@ -159,8 +157,8 @@ out_status:
int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = sock_net(skb->sk);
+ struct handshake_req *req = NULL;
struct socket *sock = NULL;
- struct handshake_req *req;
int fd, status, err;
if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_DONE_SOCKFD))
diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c
index fcbeb63b4eb1..b735f5cced2f 100644
--- a/net/handshake/tlshd.c
+++ b/net/handshake/tlshd.c
@@ -31,6 +31,7 @@ struct tls_handshake_req {
int th_type;
unsigned int th_timeout_ms;
int th_auth_mode;
+ const char *th_peername;
key_serial_t th_keyring;
key_serial_t th_certificate;
key_serial_t th_privkey;
@@ -48,6 +49,7 @@ tls_handshake_req_init(struct handshake_req *req,
treq->th_timeout_ms = args->ta_timeout_ms;
treq->th_consumer_done = args->ta_done;
treq->th_consumer_data = args->ta_data;
+ treq->th_peername = args->ta_peername;
treq->th_keyring = args->ta_keyring;
treq->th_num_peerids = 0;
treq->th_certificate = TLS_NO_CERT;
@@ -214,6 +216,12 @@ static int tls_handshake_accept(struct handshake_req *req,
ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_MESSAGE_TYPE, treq->th_type);
if (ret < 0)
goto out_cancel;
+ if (treq->th_peername) {
+ ret = nla_put_string(msg, HANDSHAKE_A_ACCEPT_PEERNAME,
+ treq->th_peername);
+ if (ret < 0)
+ goto out_cancel;
+ }
if (treq->th_timeout_ms) {
ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_TIMEOUT, treq->th_timeout_ms);
if (ret < 0)
diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h
index e5d8439b9e45..c16db0b326fa 100644
--- a/net/ieee802154/trace.h
+++ b/net/ieee802154/trace.h
@@ -13,7 +13,7 @@
#define MAXNAME 32
#define WPAN_PHY_ENTRY __array(char, wpan_phy_name, MAXNAME)
-#define WPAN_PHY_ASSIGN strlcpy(__entry->wpan_phy_name, \
+#define WPAN_PHY_ASSIGN strscpy(__entry->wpan_phy_name, \
wpan_phy_name(wpan_phy), \
MAXNAME)
#define WPAN_PHY_PR_FMT "%s"
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c4aab3aacbd8..4a76ebf793b8 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -586,6 +586,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
add_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending += writebias;
+ sk->sk_wait_pending++;
/* Basic assumption: if someone sets sk->sk_err, he _must_
* change state of the socket from TCP_SYN_*.
@@ -601,6 +602,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
}
remove_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending -= writebias;
+ sk->sk_wait_pending--;
return timeo;
}
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 3969fa805679..ee848be59e65 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -340,6 +340,9 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
secpath_reset(skb);
+ if (skb_needs_linearize(skb, skb->dev->features) &&
+ __skb_linearize(skb))
+ return -ENOMEM;
return 0;
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 65ad4251f6fd..1386787eaf1a 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1142,6 +1142,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
if (newsk) {
struct inet_connection_sock *newicsk = inet_csk(newsk);
+ newsk->sk_wait_pending = 0;
inet_sk_set_state(newsk, TCP_SYN_RECV);
newicsk->icsk_bind_hash = NULL;
newicsk->icsk_bind2_hash = NULL;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index b511ff0adc0a..8e97d8d4cc9d 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -317,7 +317,14 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
ipc->tos = val;
ipc->priority = rt_tos2priority(ipc->tos);
break;
-
+ case IP_PROTOCOL:
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
+ return -EINVAL;
+ val = *(int *)CMSG_DATA(cmsg);
+ if (val < 1 || val > 255)
+ return -EINVAL;
+ ipc->protocol = val;
+ break;
default:
return -EINVAL;
}
@@ -1761,6 +1768,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_LOCAL_PORT_RANGE:
val = inet->local_port_range.hi << 16 | inet->local_port_range.lo;
break;
+ case IP_PROTOCOL:
+ val = inet_sk(sk)->inet_num;
+ break;
default:
sockopt_release_sock(sk);
return -ENOPROTOOPT;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index ff712bf2a98d..eadf1c9ef7e4 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -532,6 +532,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
ipcm_init_sk(&ipc, inet);
+ /* Keep backward compat */
+ if (hdrincl)
+ ipc.protocol = IPPROTO_RAW;
if (msg->msg_controllen) {
err = ip_cmsg_send(sk, msg, &ipc, false);
@@ -599,7 +602,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos,
RT_SCOPE_UNIVERSE,
- hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+ hdrincl ? ipc.protocol : sk->sk_protocol,
inet_sk_flowi_flags(sk) |
(hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
daddr, saddr, 0, 0, sk->sk_uid);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 40fe70fc2015..88dfe51e68f3 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -34,8 +34,8 @@ static int ip_ttl_min = 1;
static int ip_ttl_max = 255;
static int tcp_syn_retries_min = 1;
static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
-static int ip_ping_group_range_min[] = { 0, 0 };
-static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
+static unsigned long ip_ping_group_range_min[] = { 0, 0 };
+static unsigned long ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
static u32 u32_max_div_HZ = UINT_MAX / HZ;
static int one_day_secs = 24 * 3600;
static u32 fib_multipath_hash_fields_all_mask __maybe_unused =
@@ -165,7 +165,7 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write,
{
struct user_namespace *user_ns = current_user_ns();
int ret;
- gid_t urange[2];
+ unsigned long urange[2];
kgid_t low, high;
struct ctl_table tmp = {
.data = &urange,
@@ -178,7 +178,7 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write,
inet_get_ping_group_range_table(table, &low, &high);
urange[0] = from_kgid_munged(user_ns, low);
urange[1] = from_kgid_munged(user_ns, high);
- ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+ ret = proc_doulongvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && ret == 0) {
low = make_kgid(user_ns, urange[0]);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4d6392c16b7a..8d20d9221238 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1571,7 +1571,7 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
* calculation of whether or not we must ACK for the sake of
* a window update.
*/
-static void __tcp_cleanup_rbuf(struct sock *sk, int copied)
+void __tcp_cleanup_rbuf(struct sock *sk, int copied)
{
struct tcp_sock *tp = tcp_sk(sk);
bool time_to_ack = false;
@@ -1773,7 +1773,6 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
tcp_flags = TCP_SKB_CB(skb)->tcp_flags;
used = recv_actor(sk, skb);
- consume_skb(skb);
if (used < 0) {
if (!copied)
copied = used;
@@ -1787,14 +1786,6 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
break;
}
}
- WRITE_ONCE(tp->copied_seq, seq);
-
- tcp_rcv_space_adjust(sk);
-
- /* Clean up data we have read: This will do ACK frames. */
- if (copied > 0)
- __tcp_cleanup_rbuf(sk, copied);
-
return copied;
}
EXPORT_SYMBOL(tcp_read_skb);
@@ -3090,6 +3081,12 @@ int tcp_disconnect(struct sock *sk, int flags)
int old_state = sk->sk_state;
u32 seq;
+ /* Deny disconnect if other threads are blocked in sk_wait_event()
+ * or inet_wait_for_connect().
+ */
+ if (sk->sk_wait_pending)
+ return -EBUSY;
+
if (old_state != TCP_CLOSE)
tcp_set_state(sk, TCP_CLOSE);
@@ -4081,7 +4078,8 @@ int do_tcp_getsockopt(struct sock *sk, int level,
switch (optname) {
case TCP_MAXSEG:
val = tp->mss_cache;
- if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
+ if (tp->rx_opt.user_mss &&
+ ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
val = tp->rx_opt.user_mss;
if (tp->repair)
val = tp->rx_opt.mss_clamp;
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 2e9547467edb..5f93918c063c 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -11,6 +11,24 @@
#include <net/inet_common.h>
#include <net/tls.h>
+void tcp_eat_skb(struct sock *sk, struct sk_buff *skb)
+{
+ struct tcp_sock *tcp;
+ int copied;
+
+ if (!skb || !skb->len || !sk_is_tcp(sk))
+ return;
+
+ if (skb_bpf_strparser(skb))
+ return;
+
+ tcp = tcp_sk(sk);
+ copied = tcp->copied_seq + skb->len;
+ WRITE_ONCE(tcp->copied_seq, copied);
+ tcp_rcv_space_adjust(sk);
+ __tcp_cleanup_rbuf(sk, skb->len);
+}
+
static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
struct sk_msg *msg, u32 apply_bytes, int flags)
{
@@ -174,14 +192,34 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
return ret;
}
+static bool is_next_msg_fin(struct sk_psock *psock)
+{
+ struct scatterlist *sge;
+ struct sk_msg *msg_rx;
+ int i;
+
+ msg_rx = sk_psock_peek_msg(psock);
+ i = msg_rx->sg.start;
+ sge = sk_msg_elem(msg_rx, i);
+ if (!sge->length) {
+ struct sk_buff *skb = msg_rx->skb;
+
+ if (skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
+ return true;
+ }
+ return false;
+}
+
static int tcp_bpf_recvmsg_parser(struct sock *sk,
struct msghdr *msg,
size_t len,
int flags,
int *addr_len)
{
+ struct tcp_sock *tcp = tcp_sk(sk);
+ u32 seq = tcp->copied_seq;
struct sk_psock *psock;
- int copied;
+ int copied = 0;
if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len);
@@ -194,8 +232,43 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
return tcp_recvmsg(sk, msg, len, flags, addr_len);
lock_sock(sk);
+
+ /* We may have received data on the sk_receive_queue pre-accept and
+ * then we can not use read_skb in this context because we haven't
+ * assigned a sk_socket yet so have no link to the ops. The work-around
+ * is to check the sk_receive_queue and in these cases read skbs off
+ * queue again. The read_skb hook is not running at this point because
+ * of lock_sock so we avoid having multiple runners in read_skb.
+ */
+ if (unlikely(!skb_queue_empty(&sk->sk_receive_queue))) {
+ tcp_data_ready(sk);
+ /* This handles the ENOMEM errors if we both receive data
+ * pre accept and are already under memory pressure. At least
+ * let user know to retry.
+ */
+ if (unlikely(!skb_queue_empty(&sk->sk_receive_queue))) {
+ copied = -EAGAIN;
+ goto out;
+ }
+ }
+
msg_bytes_ready:
copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
+ /* The typical case for EFAULT is the socket was gracefully
+ * shutdown with a FIN pkt. So check here the other case is
+ * some error on copy_page_to_iter which would be unexpected.
+ * On fin return correct return code to zero.
+ */
+ if (copied == -EFAULT) {
+ bool is_fin = is_next_msg_fin(psock);
+
+ if (is_fin) {
+ copied = 0;
+ seq++;
+ goto out;
+ }
+ }
+ seq += copied;
if (!copied) {
long timeo;
int data;
@@ -233,6 +306,10 @@ msg_bytes_ready:
copied = -EAGAIN;
}
out:
+ WRITE_ONCE(tcp->copied_seq, seq);
+ tcp_rcv_space_adjust(sk);
+ if (copied > 0)
+ __tcp_cleanup_rbuf(sk, copied);
release_sock(sk);
sk_psock_put(sk, psock);
return copied;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 61b6710f337a..bf8b22218dd4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4530,7 +4530,7 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
}
}
-static void tcp_sack_compress_send_ack(struct sock *sk)
+void tcp_sack_compress_send_ack(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 39bda2b1066e..06d2573685ca 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -829,6 +829,9 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
inet_twsk(sk)->tw_priority : sk->sk_priority;
transmit_time = tcp_transmit_time(sk);
xfrm_sk_clone_policy(ctl_sk, sk);
+ } else {
+ ctl_sk->sk_mark = 0;
+ ctl_sk->sk_priority = 0;
}
ip_send_unicast_reply(ctl_sk,
skb, &TCP_SKB_CB(skb)->header.h4.opt,
@@ -836,7 +839,6 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
&arg, arg.iov[0].iov_len,
transmit_time);
- ctl_sk->sk_mark = 0;
xfrm_sk_free_policy(ctl_sk);
sock_net_set(ctl_sk, &init_net);
__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
@@ -935,7 +937,6 @@ static void tcp_v4_send_ack(const struct sock *sk,
&arg, arg.iov[0].iov_len,
transmit_time);
- ctl_sk->sk_mark = 0;
sock_net_set(ctl_sk, &init_net);
__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
local_bh_enable();
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 45dda7889387..4851211aa60d 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -60,12 +60,12 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
struct tcphdr *th;
unsigned int thlen;
unsigned int seq;
- __be32 delta;
unsigned int oldlen;
unsigned int mss;
struct sk_buff *gso_skb = skb;
__sum16 newcheck;
bool ooo_okay, copy_destructor;
+ __wsum delta;
th = tcp_hdr(skb);
thlen = th->doff * 4;
@@ -75,7 +75,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
if (!pskb_may_pull(skb, thlen))
goto out;
- oldlen = (u16)~skb->len;
+ oldlen = ~skb->len;
__skb_pull(skb, thlen);
mss = skb_shinfo(skb)->gso_size;
@@ -110,7 +110,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
if (skb_is_gso(segs))
mss *= skb_shinfo(segs)->gso_segs;
- delta = htonl(oldlen + (thlen + mss));
+ delta = (__force __wsum)htonl(oldlen + thlen + mss);
skb = segs;
th = tcp_hdr(skb);
@@ -119,8 +119,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
if (unlikely(skb_shinfo(gso_skb)->tx_flags & SKBTX_SW_TSTAMP))
tcp_gso_tstamp(segs, skb_shinfo(gso_skb)->tskey, seq, mss);
- newcheck = ~csum_fold((__force __wsum)((__force u32)th->check +
- (__force u32)delta));
+ newcheck = ~csum_fold(csum_add(csum_unfold(th->check), delta));
while (skb->next) {
th->fin = th->psh = 0;
@@ -165,11 +164,11 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
WARN_ON_ONCE(refcount_sub_and_test(-delta, &skb->sk->sk_wmem_alloc));
}
- delta = htonl(oldlen + (skb_tail_pointer(skb) -
- skb_transport_header(skb)) +
- skb->data_len);
- th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
- (__force u32)delta));
+ delta = (__force __wsum)htonl(oldlen +
+ (skb_tail_pointer(skb) -
+ skb_transport_header(skb)) +
+ skb->data_len);
+ th->check = ~csum_fold(csum_add(csum_unfold(th->check), delta));
if (skb->ip_summed == CHECKSUM_PARTIAL)
gso_reset_checksum(skb, ~th->check);
else
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b839c2f91292..39eb947fe392 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -290,9 +290,19 @@ static int tcp_write_timeout(struct sock *sk)
void tcp_delack_timer_handler(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
- if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
- !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
+ if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
+ return;
+
+ /* Handling the sack compression case */
+ if (tp->compressed_ack) {
+ tcp_mstamp_refresh(tp);
+ tcp_sack_compress_send_ack(sk);
+ return;
+ }
+
+ if (!(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
return;
if (time_after(icsk->icsk_ack.timeout, jiffies)) {
@@ -312,7 +322,7 @@ void tcp_delack_timer_handler(struct sock *sk)
inet_csk_exit_pingpong_mode(sk);
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
- tcp_mstamp_refresh(tcp_sk(sk));
+ tcp_mstamp_refresh(tp);
tcp_send_ack(sk);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS);
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index aa32afd871ee..9482def1f310 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1818,7 +1818,7 @@ EXPORT_SYMBOL(__skb_recv_udp);
int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
struct sk_buff *skb;
- int err, copied;
+ int err;
try_again:
skb = skb_recv_udp(sk, MSG_DONTWAIT, &err);
@@ -1837,10 +1837,7 @@ try_again:
}
WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
- copied = recv_actor(sk, skb);
- kfree_skb(skb);
-
- return copied;
+ return recv_actor(sk, skb);
}
EXPORT_SYMBOL(udp_read_skb);
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index e0c9cc39b81e..143f93a12f25 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -22,6 +22,8 @@ static int udplite_sk_init(struct sock *sk)
{
udp_init_sock(sk);
udp_sk(sk)->pcflag = UDPLITE_BIT;
+ pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, "
+ "please contact the netdev mailing list\n");
return 0;
}
@@ -64,6 +66,8 @@ struct proto udplite_prot = {
.per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc,
.sysctl_mem = sysctl_udp_mem,
+ .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
+ .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp_sock),
.h.udp_table = &udplite_table,
};
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index ad2afeef4f10..eac206a290d0 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -164,6 +164,7 @@ drop:
kfree_skb(skb);
return 0;
}
+EXPORT_SYMBOL(xfrm4_udp_encap_rcv);
int xfrm4_rcv(struct sk_buff *skb)
{
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 75c02992c520..772340268997 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -374,6 +374,9 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features
secpath_reset(skb);
+ if (skb_needs_linearize(skb, skb->dev->features) &&
+ __skb_linearize(skb))
+ return -ENOMEM;
return 0;
}
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index a8d961d3a477..5fa0e37305d9 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -569,24 +569,6 @@ looped_back:
return -1;
}
- if (skb_cloned(skb)) {
- if (pskb_expand_head(skb, IPV6_RPL_SRH_WORST_SWAP_SIZE, 0,
- GFP_ATOMIC)) {
- __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUTDISCARDS);
- kfree_skb(skb);
- return -1;
- }
- } else {
- err = skb_cow_head(skb, IPV6_RPL_SRH_WORST_SWAP_SIZE);
- if (unlikely(err)) {
- kfree_skb(skb);
- return -1;
- }
- }
-
- hdr = (struct ipv6_rpl_sr_hdr *)skb_transport_header(skb);
-
if (!pskb_may_pull(skb, ipv6_rpl_srh_size(n, hdr->cmpri,
hdr->cmpre))) {
kfree_skb(skb);
@@ -630,6 +612,17 @@ looped_back:
skb_pull(skb, ((hdr->hdrlen + 1) << 3));
skb_postpull_rcsum(skb, oldhdr,
sizeof(struct ipv6hdr) + ((hdr->hdrlen + 1) << 3));
+ if (unlikely(!hdr->segments_left)) {
+ if (pskb_expand_head(skb, sizeof(struct ipv6hdr) + ((chdr->hdrlen + 1) << 3), 0,
+ GFP_ATOMIC)) {
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS);
+ kfree_skb(skb);
+ kfree(buf);
+ return -1;
+ }
+
+ oldhdr = ipv6_hdr(skb);
+ }
skb_push(skb, ((chdr->hdrlen + 1) << 3) + sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
skb_mac_header_rebuild(skb);
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index da46c4284676..49e31e4ae7b7 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -143,6 +143,8 @@ int ipv6_find_tlv(const struct sk_buff *skb, int offset, int type)
optlen = 1;
break;
default:
+ if (len < 2)
+ goto bad;
optlen = nh[offset + 1] + 2;
if (optlen > len)
goto bad;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 2438da5ff6da..bac768d36cc1 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2491,7 +2491,7 @@ static int ipv6_route_native_seq_show(struct seq_file *seq, void *v)
const struct net_device *dev;
if (rt->nh)
- fib6_nh = nexthop_fib6_nh_bh(rt->nh);
+ fib6_nh = nexthop_fib6_nh(rt->nh);
seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
@@ -2556,14 +2556,14 @@ static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
if (tbl) {
h = (tbl->tb6_id & (FIB6_TABLE_HASHSZ - 1)) + 1;
- node = rcu_dereference_bh(hlist_next_rcu(&tbl->tb6_hlist));
+ node = rcu_dereference(hlist_next_rcu(&tbl->tb6_hlist));
} else {
h = 0;
node = NULL;
}
while (!node && h < FIB6_TABLE_HASHSZ) {
- node = rcu_dereference_bh(
+ node = rcu_dereference(
hlist_first_rcu(&net->ipv6.fib_table_hash[h++]));
}
return hlist_entry_safe(node, struct fib6_table, tb6_hlist);
@@ -2593,7 +2593,7 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
if (!v)
goto iter_table;
- n = rcu_dereference_bh(((struct fib6_info *)v)->fib6_next);
+ n = rcu_dereference(((struct fib6_info *)v)->fib6_next);
if (n)
return n;
@@ -2619,12 +2619,12 @@ iter_table:
}
static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(RCU_BH)
+ __acquires(RCU)
{
struct net *net = seq_file_net(seq);
struct ipv6_route_iter *iter = seq->private;
- rcu_read_lock_bh();
+ rcu_read_lock();
iter->tbl = ipv6_route_seq_next_table(NULL, net);
iter->skip = *pos;
@@ -2645,7 +2645,7 @@ static bool ipv6_route_iter_active(struct ipv6_route_iter *iter)
}
static void ipv6_route_native_seq_stop(struct seq_file *seq, void *v)
- __releases(RCU_BH)
+ __releases(RCU)
{
struct net *net = seq_file_net(seq);
struct ipv6_route_iter *iter = seq->private;
@@ -2653,7 +2653,7 @@ static void ipv6_route_native_seq_stop(struct seq_file *seq, void *v)
if (ipv6_route_iter_active(iter))
fib6_walker_unlink(net, &iter->w);
- rcu_read_unlock_bh();
+ rcu_read_unlock();
}
#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index a4ecfc9d2593..da80974ad23a 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1015,12 +1015,14 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
ntohl(tun_id),
ntohl(md->u.index), truncate,
false);
+ proto = htons(ETH_P_ERSPAN);
} else if (md->version == 2) {
erspan_build_header_v2(skb,
ntohl(tun_id),
md->u.md2.dir,
get_hwid(&md->u.md2),
truncate, false);
+ proto = htons(ETH_P_ERSPAN2);
} else {
goto tx_err;
}
@@ -1043,24 +1045,25 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
break;
}
- if (t->parms.erspan_ver == 1)
+ if (t->parms.erspan_ver == 1) {
erspan_build_header(skb, ntohl(t->parms.o_key),
t->parms.index,
truncate, false);
- else if (t->parms.erspan_ver == 2)
+ proto = htons(ETH_P_ERSPAN);
+ } else if (t->parms.erspan_ver == 2) {
erspan_build_header_v2(skb, ntohl(t->parms.o_key),
t->parms.dir,
t->parms.hwid,
truncate, false);
- else
+ proto = htons(ETH_P_ERSPAN2);
+ } else {
goto tx_err;
+ }
fl6.daddr = t->parms.raddr;
}
/* Push GRE header. */
- proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN)
- : htons(ETH_P_ERSPAN2);
gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(atomic_fetch_inc(&t->o_seqno)));
/* TooBig packet may have updated dst->dev's mtu */
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index c4835dbdfcff..f804c11e2146 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -114,7 +114,8 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
addr_type = ipv6_addr_type(daddr);
if ((__ipv6_addr_needs_scope_id(addr_type) && !oif) ||
(addr_type & IPV6_ADDR_MAPPED) ||
- (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if))
+ (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if &&
+ l3mdev_master_ifindex_by_index(sock_net(sk), oif) != sk->sk_bound_dev_if))
return -EINVAL;
ipcm6_init_sk(&ipc6, np);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 7d0adb612bdd..44ee7a2e72ac 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -793,7 +793,8 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!proto)
proto = inet->inet_num;
- else if (proto != inet->inet_num)
+ else if (proto != inet->inet_num &&
+ inet->inet_num != IPPROTO_RAW)
return -EINVAL;
if (proto > 255)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e3aec46bd466..392aaa373b66 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -6412,9 +6412,9 @@ static struct ctl_table ipv6_route_table_template[] = {
{
.procname = "skip_notify_on_dev_down",
.data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 67eaf3ca14ce..8e010d07917a 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -8,6 +8,8 @@
* Changes:
* Fixes:
*/
+#define pr_fmt(fmt) "UDPLite6: " fmt
+
#include <linux/export.h>
#include <linux/proc_fs.h>
#include "udp_impl.h"
@@ -16,6 +18,8 @@ static int udplitev6_sk_init(struct sock *sk)
{
udpv6_init_sock(sk);
udp_sk(sk)->pcflag = UDPLITE_BIT;
+ pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, "
+ "please contact the netdev mailing list\n");
return 0;
}
@@ -60,6 +64,8 @@ struct proto udplitev6_prot = {
.per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc,
.sysctl_mem = sysctl_udp_mem,
+ .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
+ .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp6_sock),
.h.udp_table = &udplite_table,
};
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 04cbeefd8982..4907ab241d6b 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -86,6 +86,9 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
__be32 *udpdata32;
__u16 encap_type = up->encap_type;
+ if (skb->protocol == htons(ETH_P_IP))
+ return xfrm4_udp_encap_rcv(sk, skb);
+
/* if this is not encapsulated socket, then just return now */
if (!encap_type)
return 1;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index a815f5ab4c49..31ab12fd720a 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1940,7 +1940,8 @@ static u32 gen_reqid(struct net *net)
}
static int
-parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
+parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_policy *pol,
+ struct sadb_x_ipsecrequest *rq)
{
struct net *net = xp_net(xp);
struct xfrm_tmpl *t = xp->xfrm_vec + xp->xfrm_nr;
@@ -1958,9 +1959,12 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
if ((mode = pfkey_mode_to_xfrm(rq->sadb_x_ipsecrequest_mode)) < 0)
return -EINVAL;
t->mode = mode;
- if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_USE)
+ if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_USE) {
+ if ((mode == XFRM_MODE_TUNNEL || mode == XFRM_MODE_BEET) &&
+ pol->sadb_x_policy_dir == IPSEC_DIR_OUTBOUND)
+ return -EINVAL;
t->optional = 1;
- else if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_UNIQUE) {
+ } else if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_UNIQUE) {
t->reqid = rq->sadb_x_ipsecrequest_reqid;
if (t->reqid > IPSEC_MANUAL_REQID_MAX)
t->reqid = 0;
@@ -2002,7 +2006,7 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol)
rq->sadb_x_ipsecrequest_len < sizeof(*rq))
return -EINVAL;
- if ((err = parse_ipsecrequest(xp, rq)) < 0)
+ if ((err = parse_ipsecrequest(xp, pol, rq)) < 0)
return err;
len -= rq->sadb_x_ipsecrequest_len;
rq = (void*)((u8*)rq + rq->sadb_x_ipsecrequest_len);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 7317e4a5d1ff..f2d08dbccfb7 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1578,9 +1578,10 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
sdata_dereference(link->u.ap.unsol_bcast_probe_resp,
sdata);
- /* abort any running channel switch */
+ /* abort any running channel switch or color change */
mutex_lock(&local->mtx);
link_conf->csa_active = false;
+ link_conf->color_change_active = false;
if (link->csa_block_tx) {
ieee80211_wake_vif_queues(local, sdata,
IEEE80211_QUEUE_STOP_REASON_CSA);
@@ -3589,7 +3590,7 @@ void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif, bool block_t
EXPORT_SYMBOL(ieee80211_channel_switch_disconnect);
static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata,
- u32 *changed)
+ u64 *changed)
{
int err;
@@ -3632,7 +3633,7 @@ static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata,
static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_local *local = sdata->local;
- u32 changed = 0;
+ u64 changed = 0;
int err;
sdata_assert_lock(sdata);
@@ -4864,11 +4865,16 @@ static int ieee80211_add_intf_link(struct wiphy *wiphy,
unsigned int link_id)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+ int res;
if (wdev->use_4addr)
return -EOPNOTSUPP;
- return ieee80211_vif_set_links(sdata, wdev->valid_links);
+ mutex_lock(&sdata->local->mtx);
+ res = ieee80211_vif_set_links(sdata, wdev->valid_links);
+ mutex_unlock(&sdata->local->mtx);
+
+ return res;
}
static void ieee80211_del_intf_link(struct wiphy *wiphy,
@@ -4877,7 +4883,9 @@ static void ieee80211_del_intf_link(struct wiphy *wiphy,
{
struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+ mutex_lock(&sdata->local->mtx);
ieee80211_vif_set_links(sdata, wdev->valid_links);
+ mutex_unlock(&sdata->local->mtx);
}
static int sta_add_link_station(struct ieee80211_local *local,
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index dbc34fbe7c8f..77c90ed8f5d7 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -258,7 +258,8 @@ ieee80211_get_max_required_bw(struct ieee80211_sub_if_data *sdata,
static enum nl80211_chan_width
ieee80211_get_chanctx_vif_max_required_bw(struct ieee80211_sub_if_data *sdata,
- struct ieee80211_chanctx_conf *conf)
+ struct ieee80211_chanctx *ctx,
+ struct ieee80211_link_data *rsvd_for)
{
enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT;
struct ieee80211_vif *vif = &sdata->vif;
@@ -267,13 +268,14 @@ ieee80211_get_chanctx_vif_max_required_bw(struct ieee80211_sub_if_data *sdata,
rcu_read_lock();
for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) {
enum nl80211_chan_width width = NL80211_CHAN_WIDTH_20_NOHT;
- struct ieee80211_bss_conf *link_conf =
- rcu_dereference(sdata->vif.link_conf[link_id]);
+ struct ieee80211_link_data *link =
+ rcu_dereference(sdata->link[link_id]);
- if (!link_conf)
+ if (!link)
continue;
- if (rcu_access_pointer(link_conf->chanctx_conf) != conf)
+ if (link != rsvd_for &&
+ rcu_access_pointer(link->conf->chanctx_conf) != &ctx->conf)
continue;
switch (vif->type) {
@@ -287,7 +289,7 @@ ieee80211_get_chanctx_vif_max_required_bw(struct ieee80211_sub_if_data *sdata,
* point, so take the width from the chandef, but
* account also for TDLS peers
*/
- width = max(link_conf->chandef.width,
+ width = max(link->conf->chandef.width,
ieee80211_get_max_required_bw(sdata, link_id));
break;
case NL80211_IFTYPE_P2P_DEVICE:
@@ -296,7 +298,7 @@ ieee80211_get_chanctx_vif_max_required_bw(struct ieee80211_sub_if_data *sdata,
case NL80211_IFTYPE_ADHOC:
case NL80211_IFTYPE_MESH_POINT:
case NL80211_IFTYPE_OCB:
- width = link_conf->chandef.width;
+ width = link->conf->chandef.width;
break;
case NL80211_IFTYPE_WDS:
case NL80211_IFTYPE_UNSPECIFIED:
@@ -316,7 +318,8 @@ ieee80211_get_chanctx_vif_max_required_bw(struct ieee80211_sub_if_data *sdata,
static enum nl80211_chan_width
ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
- struct ieee80211_chanctx_conf *conf)
+ struct ieee80211_chanctx *ctx,
+ struct ieee80211_link_data *rsvd_for)
{
struct ieee80211_sub_if_data *sdata;
enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT;
@@ -328,7 +331,8 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
if (!ieee80211_sdata_running(sdata))
continue;
- width = ieee80211_get_chanctx_vif_max_required_bw(sdata, conf);
+ width = ieee80211_get_chanctx_vif_max_required_bw(sdata, ctx,
+ rsvd_for);
max_bw = max(max_bw, width);
}
@@ -336,8 +340,8 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
/* use the configured bandwidth in case of monitor interface */
sdata = rcu_dereference(local->monitor_sdata);
if (sdata &&
- rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) == conf)
- max_bw = max(max_bw, conf->def.width);
+ rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) == &ctx->conf)
+ max_bw = max(max_bw, ctx->conf.def.width);
rcu_read_unlock();
@@ -349,8 +353,10 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
* the max of min required widths of all the interfaces bound to this
* channel context.
*/
-static u32 _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
- struct ieee80211_chanctx *ctx)
+static u32
+_ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx,
+ struct ieee80211_link_data *rsvd_for)
{
enum nl80211_chan_width max_bw;
struct cfg80211_chan_def min_def;
@@ -370,7 +376,7 @@ static u32 _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
return 0;
}
- max_bw = ieee80211_get_chanctx_max_required_bw(local, &ctx->conf);
+ max_bw = ieee80211_get_chanctx_max_required_bw(local, ctx, rsvd_for);
/* downgrade chandef up to max_bw */
min_def = ctx->conf.def;
@@ -448,9 +454,10 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local,
* channel context.
*/
void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
- struct ieee80211_chanctx *ctx)
+ struct ieee80211_chanctx *ctx,
+ struct ieee80211_link_data *rsvd_for)
{
- u32 changed = _ieee80211_recalc_chanctx_min_def(local, ctx);
+ u32 changed = _ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for);
if (!changed)
return;
@@ -464,10 +471,11 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
ieee80211_chan_bw_change(local, ctx, false);
}
-static void ieee80211_change_chanctx(struct ieee80211_local *local,
- struct ieee80211_chanctx *ctx,
- struct ieee80211_chanctx *old_ctx,
- const struct cfg80211_chan_def *chandef)
+static void _ieee80211_change_chanctx(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx,
+ struct ieee80211_chanctx *old_ctx,
+ const struct cfg80211_chan_def *chandef,
+ struct ieee80211_link_data *rsvd_for)
{
u32 changed;
@@ -492,7 +500,7 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local,
ieee80211_chan_bw_change(local, old_ctx, true);
if (cfg80211_chandef_identical(&ctx->conf.def, chandef)) {
- ieee80211_recalc_chanctx_min_def(local, ctx);
+ ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for);
return;
}
@@ -502,7 +510,7 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local,
/* check if min chanctx also changed */
changed = IEEE80211_CHANCTX_CHANGE_WIDTH |
- _ieee80211_recalc_chanctx_min_def(local, ctx);
+ _ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for);
drv_change_chanctx(local, ctx, changed);
if (!local->use_chanctx) {
@@ -514,6 +522,14 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local,
ieee80211_chan_bw_change(local, old_ctx, false);
}
+static void ieee80211_change_chanctx(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx,
+ struct ieee80211_chanctx *old_ctx,
+ const struct cfg80211_chan_def *chandef)
+{
+ _ieee80211_change_chanctx(local, ctx, old_ctx, chandef, NULL);
+}
+
static struct ieee80211_chanctx *
ieee80211_find_chanctx(struct ieee80211_local *local,
const struct cfg80211_chan_def *chandef,
@@ -638,7 +654,7 @@ ieee80211_alloc_chanctx(struct ieee80211_local *local,
ctx->conf.rx_chains_dynamic = 1;
ctx->mode = mode;
ctx->conf.radar_enabled = false;
- ieee80211_recalc_chanctx_min_def(local, ctx);
+ _ieee80211_recalc_chanctx_min_def(local, ctx, NULL);
return ctx;
}
@@ -855,6 +871,9 @@ static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link,
}
if (new_ctx) {
+ /* recalc considering the link we'll use it for now */
+ ieee80211_recalc_chanctx_min_def(local, new_ctx, link);
+
ret = drv_assign_vif_chanctx(local, sdata, link->conf, new_ctx);
if (ret)
goto out;
@@ -873,12 +892,12 @@ out:
ieee80211_recalc_chanctx_chantype(local, curr_ctx);
ieee80211_recalc_smps_chanctx(local, curr_ctx);
ieee80211_recalc_radar_chanctx(local, curr_ctx);
- ieee80211_recalc_chanctx_min_def(local, curr_ctx);
+ ieee80211_recalc_chanctx_min_def(local, curr_ctx, NULL);
}
if (new_ctx && ieee80211_chanctx_num_assigned(local, new_ctx) > 0) {
ieee80211_recalc_txpower(sdata, false);
- ieee80211_recalc_chanctx_min_def(local, new_ctx);
+ ieee80211_recalc_chanctx_min_def(local, new_ctx, NULL);
}
if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
@@ -1270,7 +1289,7 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link)
ieee80211_link_update_chandef(link, &link->reserved_chandef);
- ieee80211_change_chanctx(local, new_ctx, old_ctx, chandef);
+ _ieee80211_change_chanctx(local, new_ctx, old_ctx, chandef, link);
vif_chsw[0].vif = &sdata->vif;
vif_chsw[0].old_ctx = &old_ctx->conf;
@@ -1300,7 +1319,7 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link)
if (ieee80211_chanctx_refcount(local, old_ctx) == 0)
ieee80211_free_chanctx(local, old_ctx);
- ieee80211_recalc_chanctx_min_def(local, new_ctx);
+ ieee80211_recalc_chanctx_min_def(local, new_ctx, NULL);
ieee80211_recalc_smps_chanctx(local, new_ctx);
ieee80211_recalc_radar_chanctx(local, new_ctx);
@@ -1665,7 +1684,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
ieee80211_recalc_chanctx_chantype(local, ctx);
ieee80211_recalc_smps_chanctx(local, ctx);
ieee80211_recalc_radar_chanctx(local, ctx);
- ieee80211_recalc_chanctx_min_def(local, ctx);
+ ieee80211_recalc_chanctx_min_def(local, ctx, NULL);
list_for_each_entry_safe(link, link_tmp, &ctx->reserved_links,
reserved_chanctx_list) {
diff --git a/net/mac80211/he.c b/net/mac80211/he.c
index 729f261520c7..0322abae0825 100644
--- a/net/mac80211/he.c
+++ b/net/mac80211/he.c
@@ -3,7 +3,7 @@
* HE handling
*
* Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(c) 2019 - 2022 Intel Corporation
+ * Copyright(c) 2019 - 2023 Intel Corporation
*/
#include "ieee80211_i.h"
@@ -114,6 +114,7 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
struct link_sta_info *link_sta)
{
struct ieee80211_sta_he_cap *he_cap = &link_sta->pub->he_cap;
+ const struct ieee80211_sta_he_cap *own_he_cap_ptr;
struct ieee80211_sta_he_cap own_he_cap;
struct ieee80211_he_cap_elem *he_cap_ie_elem = (void *)he_cap_ie;
u8 he_ppe_size;
@@ -123,12 +124,16 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
memset(he_cap, 0, sizeof(*he_cap));
- if (!he_cap_ie ||
- !ieee80211_get_he_iftype_cap(sband,
- ieee80211_vif_type_p2p(&sdata->vif)))
+ if (!he_cap_ie)
return;
- own_he_cap = sband->iftype_data->he_cap;
+ own_he_cap_ptr =
+ ieee80211_get_he_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif));
+ if (!own_he_cap_ptr)
+ return;
+
+ own_he_cap = *own_he_cap_ptr;
/* Make sure size is OK */
mcs_nss_size = ieee80211_he_mcs_nss_size(he_cap_ie_elem);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a0a7839cb961..4159fb65038b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -2312,7 +2312,7 @@ ieee802_11_parse_elems(const u8 *start, size_t len, bool action,
return ieee802_11_parse_elems_crc(start, len, action, 0, 0, bss);
}
-void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos);
+void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id);
extern const int ieee802_1d_to_ac[8];
@@ -2537,7 +2537,8 @@ int ieee80211_chanctx_refcount(struct ieee80211_local *local,
void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
struct ieee80211_chanctx *chanctx);
void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
- struct ieee80211_chanctx *ctx);
+ struct ieee80211_chanctx *ctx,
+ struct ieee80211_link_data *rsvd_for);
bool ieee80211_is_radar_required(struct ieee80211_local *local);
void ieee80211_dfs_cac_timer(unsigned long data);
diff --git a/net/mac80211/link.c b/net/mac80211/link.c
index e82db88a47f8..40f030b8ece9 100644
--- a/net/mac80211/link.c
+++ b/net/mac80211/link.c
@@ -2,7 +2,7 @@
/*
* MLO link handling
*
- * Copyright (C) 2022 Intel Corporation
+ * Copyright (C) 2022-2023 Intel Corporation
*/
#include <linux/slab.h>
#include <linux/kernel.h>
@@ -409,6 +409,7 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata,
IEEE80211_CHANCTX_SHARED);
WARN_ON_ONCE(ret);
+ ieee80211_mgd_set_link_qos_params(link);
ieee80211_link_info_change_notify(sdata, link,
BSS_CHANGED_ERP_CTS_PROT |
BSS_CHANGED_ERP_PREAMBLE |
@@ -423,7 +424,6 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata,
BSS_CHANGED_TWT |
BSS_CHANGED_HE_OBSS_PD |
BSS_CHANGED_HE_BSS_COLOR);
- ieee80211_mgd_set_link_qos_params(link);
}
old_active = sdata->vif.active_links;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index e13a0354c397..5a4303130ef2 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1217,6 +1217,7 @@ static void ieee80211_add_non_inheritance_elem(struct sk_buff *skb,
const u16 *inner)
{
unsigned int skb_len = skb->len;
+ bool at_extension = false;
bool added = false;
int i, j;
u8 *len, *list_len = NULL;
@@ -1228,7 +1229,6 @@ static void ieee80211_add_non_inheritance_elem(struct sk_buff *skb,
for (i = 0; i < PRESENT_ELEMS_MAX && outer[i]; i++) {
u16 elem = outer[i];
bool have_inner = false;
- bool at_extension = false;
/* should at least be sorted in the sense of normal -> ext */
WARN_ON(at_extension && elem < PRESENT_ELEM_EXT_OFFS);
@@ -1257,8 +1257,14 @@ static void ieee80211_add_non_inheritance_elem(struct sk_buff *skb,
}
*list_len += 1;
skb_put_u8(skb, (u8)elem);
+ added = true;
}
+ /* if we added a list but no extension list, make a zero-len one */
+ if (added && (!at_extension || !list_len))
+ skb_put_u8(skb, 0);
+
+ /* if nothing added remove extension element completely */
if (!added)
skb_trim(skb, skb_len);
else
@@ -1366,10 +1372,11 @@ static void ieee80211_assoc_add_ml_elem(struct ieee80211_sub_if_data *sdata,
ieee80211_add_non_inheritance_elem(skb, outer_present_elems,
link_present_elems);
- ieee80211_fragment_element(skb, subelem_len);
+ ieee80211_fragment_element(skb, subelem_len,
+ IEEE80211_MLE_SUBELEM_FRAGMENT);
}
- ieee80211_fragment_element(skb, ml_elem_len);
+ ieee80211_fragment_element(skb, ml_elem_len, WLAN_EID_FRAGMENT);
}
static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 58222c077898..fc6e130364da 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2110,7 +2110,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
/* either the frame has been decrypted or will be dropped */
status->flag |= RX_FLAG_DECRYPTED;
- if (unlikely(ieee80211_is_beacon(fc) && result == RX_DROP_UNUSABLE &&
+ if (unlikely(ieee80211_is_beacon(fc) && (result & RX_DROP_UNUSABLE) &&
rx->sdata->dev))
cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
skb->data, skb->len);
@@ -4965,7 +4965,9 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
}
if (unlikely(rx->sta && rx->sta->sta.mlo) &&
- is_unicast_ether_addr(hdr->addr1)) {
+ is_unicast_ether_addr(hdr->addr1) &&
+ !ieee80211_is_probe_resp(hdr->frame_control) &&
+ !ieee80211_is_beacon(hdr->frame_control)) {
/* translate to MLD addresses */
if (ether_addr_equal(link->conf->addr, hdr->addr1))
ether_addr_copy(hdr->addr1, rx->sdata->vif.addr);
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index de5d69f21306..db0d0132c58c 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -67,7 +67,7 @@
__entry->min_freq_offset = (c)->chan ? (c)->chan->freq_offset : 0; \
__entry->min_chan_width = (c)->width; \
__entry->min_center_freq1 = (c)->center_freq1; \
- __entry->freq1_offset = (c)->freq1_offset; \
+ __entry->min_freq1_offset = (c)->freq1_offset; \
__entry->min_center_freq2 = (c)->center_freq2;
#define MIN_CHANDEF_PR_FMT " min_control:%d.%03d MHz min_width:%d min_center: %d.%03d/%d MHz"
#define MIN_CHANDEF_PR_ARG __entry->min_control_freq, __entry->min_freq_offset, \
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1a3327407552..13b522dab0a3 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3791,6 +3791,7 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
ieee80211_tx_result r;
struct ieee80211_vif *vif = txq->vif;
int q = vif->hw_queue[txq->ac];
+ unsigned long flags;
bool q_stopped;
WARN_ON_ONCE(softirq_count() == 0);
@@ -3799,9 +3800,9 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
return NULL;
begin:
- spin_lock(&local->queue_stop_reason_lock);
+ spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
q_stopped = local->queue_stop_reasons[q];
- spin_unlock(&local->queue_stop_reason_lock);
+ spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
if (unlikely(q_stopped)) {
/* mark for waking later */
@@ -4444,7 +4445,7 @@ static void ieee80211_mlo_multicast_tx(struct net_device *dev,
struct sk_buff *skb)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
- unsigned long links = sdata->vif.valid_links;
+ unsigned long links = sdata->vif.active_links;
unsigned int link;
u32 ctrl_flags = IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX;
@@ -5527,7 +5528,7 @@ ieee80211_beacon_get_template_ema_list(struct ieee80211_hw *hw,
{
struct ieee80211_ema_beacons *ema_beacons = NULL;
- WARN_ON(__ieee80211_beacon_get(hw, vif, NULL, false, link_id, 0,
+ WARN_ON(__ieee80211_beacon_get(hw, vif, NULL, true, link_id, 0,
&ema_beacons));
return ema_beacons;
@@ -6039,7 +6040,7 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
rcu_read_unlock();
if (WARN_ON_ONCE(link == ARRAY_SIZE(sdata->vif.link_conf)))
- link = ffs(sdata->vif.valid_links) - 1;
+ link = ffs(sdata->vif.active_links) - 1;
}
IEEE80211_SKB_CB(skb)->control.flags |=
@@ -6075,7 +6076,7 @@ void ieee80211_tx_skb_tid(struct ieee80211_sub_if_data *sdata,
band = chanctx_conf->def.chan->band;
} else {
WARN_ON(link_id >= 0 &&
- !(sdata->vif.valid_links & BIT(link_id)));
+ !(sdata->vif.active_links & BIT(link_id)));
/* MLD transmissions must not rely on the band */
band = 0;
}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 1527d6aafc14..3bd07a0a782f 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -3015,7 +3015,7 @@ void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata,
chanctx = container_of(chanctx_conf, struct ieee80211_chanctx,
conf);
- ieee80211_recalc_chanctx_min_def(local, chanctx);
+ ieee80211_recalc_chanctx_min_def(local, chanctx, NULL);
}
unlock:
mutex_unlock(&local->chanctx_mtx);
@@ -5049,7 +5049,7 @@ u8 *ieee80211_ie_build_eht_cap(u8 *pos,
return pos;
}
-void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos)
+void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id)
{
unsigned int elem_len;
@@ -5069,7 +5069,7 @@ void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos)
memmove(len_pos + 255 + 3, len_pos + 255 + 1, elem_len);
/* place the fragment ID */
len_pos += 255 + 1;
- *len_pos = WLAN_EID_FRAGMENT;
+ *len_pos = frag_id;
/* and point to fragment length to update later */
len_pos++;
}
diff --git a/net/mac802154/trace.h b/net/mac802154/trace.h
index 689396d6c76a..1574ecc48075 100644
--- a/net/mac802154/trace.h
+++ b/net/mac802154/trace.h
@@ -14,7 +14,7 @@
#define MAXNAME 32
#define LOCAL_ENTRY __array(char, wpan_phy_name, MAXNAME)
-#define LOCAL_ASSIGN strlcpy(__entry->wpan_phy_name, \
+#define LOCAL_ASSIGN strscpy(__entry->wpan_phy_name, \
wpan_phy_name(local->hw.phy), MAXNAME)
#define LOCAL_PR_FMT "%s"
#define LOCAL_PR_ARG __entry->wpan_phy_name
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 78c924506e83..76612bca275a 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -87,8 +87,15 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
unsigned int subflows_max;
int ret = 0;
- if (mptcp_pm_is_userspace(msk))
- return mptcp_userspace_pm_active(msk);
+ if (mptcp_pm_is_userspace(msk)) {
+ if (mptcp_userspace_pm_active(msk)) {
+ spin_lock_bh(&pm->lock);
+ pm->subflows++;
+ spin_unlock_bh(&pm->lock);
+ return true;
+ }
+ return false;
+ }
subflows_max = mptcp_pm_get_subflows_max(msk);
@@ -181,8 +188,16 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
struct mptcp_pm_data *pm = &msk->pm;
bool update_subflows;
- update_subflows = (subflow->request_join || subflow->mp_join) &&
- mptcp_pm_is_kernel(msk);
+ update_subflows = subflow->request_join || subflow->mp_join;
+ if (mptcp_pm_is_userspace(msk)) {
+ if (update_subflows) {
+ spin_lock_bh(&pm->lock);
+ pm->subflows--;
+ spin_unlock_bh(&pm->lock);
+ }
+ return;
+ }
+
if (!READ_ONCE(pm->work_pending) && !update_subflows)
return;
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index bc343dab5e3f..1224dfca5bf3 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1047,6 +1047,7 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
if (err)
return err;
+ inet_sk_state_store(newsk, TCP_LISTEN);
err = kernel_listen(ssock, backlog);
if (err)
return err;
@@ -1558,6 +1559,24 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
return ret;
}
+void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list)
+{
+ struct mptcp_rm_list alist = { .nr = 0 };
+ struct mptcp_pm_addr_entry *entry;
+
+ list_for_each_entry(entry, rm_list, list) {
+ remove_anno_list_by_saddr(msk, &entry->addr);
+ if (alist.nr < MPTCP_RM_IDS_MAX)
+ alist.ids[alist.nr++] = entry->addr.id;
+ }
+
+ if (alist.nr) {
+ spin_lock_bh(&msk->pm.lock);
+ mptcp_pm_remove_addr(msk, &alist);
+ spin_unlock_bh(&msk->pm.lock);
+ }
+}
+
void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
struct list_head *rm_list)
{
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index 27a275805c06..b06aa58dfcf2 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -69,6 +69,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
MPTCP_PM_MAX_ADDR_ID + 1,
1);
list_add_tail_rcu(&e->list, &msk->pm.userspace_pm_local_addr_list);
+ msk->pm.local_addr_used++;
ret = e->addr.id;
} else if (match) {
ret = entry->addr.id;
@@ -79,6 +80,31 @@ append_err:
return ret;
}
+/* If the subflow is closed from the other peer (not via a
+ * subflow destroy command then), we want to keep the entry
+ * not to assign the same ID to another address and to be
+ * able to send RM_ADDR after the removal of the subflow.
+ */
+static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk,
+ struct mptcp_pm_addr_entry *addr)
+{
+ struct mptcp_pm_addr_entry *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &msk->pm.userspace_pm_local_addr_list, list) {
+ if (mptcp_addresses_equal(&entry->addr, &addr->addr, false)) {
+ /* TODO: a refcount is needed because the entry can
+ * be used multiple times (e.g. fullmesh mode).
+ */
+ list_del_rcu(&entry->list);
+ kfree(entry);
+ msk->pm.local_addr_used--;
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
unsigned int id,
u8 *flags, int *ifindex)
@@ -171,6 +197,7 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info)
spin_lock_bh(&msk->pm.lock);
if (mptcp_pm_alloc_anno_list(msk, &addr_val)) {
+ msk->pm.add_addr_signaled++;
mptcp_pm_announce_addr(msk, &addr_val.addr, false);
mptcp_pm_nl_addr_send_ack(msk);
}
@@ -232,7 +259,7 @@ int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info)
list_move(&match->list, &free_list);
- mptcp_pm_remove_addrs_and_subflows(msk, &free_list);
+ mptcp_pm_remove_addrs(msk, &free_list);
release_sock((struct sock *)msk);
@@ -251,6 +278,7 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info)
struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR];
+ struct mptcp_pm_addr_entry local = { 0 };
struct mptcp_addr_info addr_r;
struct mptcp_addr_info addr_l;
struct mptcp_sock *msk;
@@ -302,12 +330,26 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info)
goto create_err;
}
+ local.addr = addr_l;
+ err = mptcp_userspace_pm_append_new_local_addr(msk, &local);
+ if (err < 0) {
+ GENL_SET_ERR_MSG(info, "did not match address and id");
+ goto create_err;
+ }
+
lock_sock(sk);
err = __mptcp_subflow_connect(sk, &addr_l, &addr_r);
release_sock(sk);
+ spin_lock_bh(&msk->pm.lock);
+ if (err)
+ mptcp_userspace_pm_delete_local_addr(msk, &local);
+ else
+ msk->pm.subflows++;
+ spin_unlock_bh(&msk->pm.lock);
+
create_err:
sock_put((struct sock *)msk);
return err;
@@ -420,7 +462,11 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info)
ssk = mptcp_nl_find_ssk(msk, &addr_l, &addr_r);
if (ssk) {
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct mptcp_pm_addr_entry entry = { .addr = addr_l };
+ spin_lock_bh(&msk->pm.lock);
+ mptcp_userspace_pm_delete_local_addr(msk, &entry);
+ spin_unlock_bh(&msk->pm.lock);
mptcp_subflow_shutdown(sk, ssk, RCV_SHUTDOWN | SEND_SHUTDOWN);
mptcp_close_ssk(sk, ssk, subflow);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 08dc53f56bc2..a6c7f2d24909 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -44,7 +44,7 @@ enum {
static struct percpu_counter mptcp_sockets_allocated ____cacheline_aligned_in_smp;
static void __mptcp_destroy_sock(struct sock *sk);
-static void __mptcp_check_send_data_fin(struct sock *sk);
+static void mptcp_check_send_data_fin(struct sock *sk);
DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
static struct net_device mptcp_napi_dev;
@@ -90,8 +90,8 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
if (err)
return err;
- msk->first = ssock->sk;
- msk->subflow = ssock;
+ WRITE_ONCE(msk->first, ssock->sk);
+ WRITE_ONCE(msk->subflow, ssock);
subflow = mptcp_subflow_ctx(ssock->sk);
list_add(&subflow->node, &msk->conn_list);
sock_hold(ssock->sk);
@@ -424,8 +424,7 @@ static bool mptcp_pending_data_fin_ack(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- return !__mptcp_check_fallback(msk) &&
- ((1 << sk->sk_state) &
+ return ((1 << sk->sk_state) &
(TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK)) &&
msk->write_seq == READ_ONCE(msk->snd_una);
}
@@ -583,9 +582,6 @@ static bool mptcp_check_data_fin(struct sock *sk)
u64 rcv_data_fin_seq;
bool ret = false;
- if (__mptcp_check_fallback(msk))
- return ret;
-
/* Need to ack a DATA_FIN received from a peer while this side
* of the connection is in ESTABLISHED, FIN_WAIT1, or FIN_WAIT2.
* msk->rcv_data_fin was set when parsing the incoming options
@@ -603,7 +599,7 @@ static bool mptcp_check_data_fin(struct sock *sk)
WRITE_ONCE(msk->ack_seq, msk->ack_seq + 1);
WRITE_ONCE(msk->rcv_data_fin, 0);
- sk->sk_shutdown |= RCV_SHUTDOWN;
+ WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN);
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
switch (sk->sk_state) {
@@ -623,7 +619,8 @@ static bool mptcp_check_data_fin(struct sock *sk)
}
ret = true;
- mptcp_send_ack(msk);
+ if (!__mptcp_check_fallback(msk))
+ mptcp_send_ack(msk);
mptcp_close_wake_up(sk);
}
return ret;
@@ -825,6 +822,13 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
mptcp_data_unlock(sk);
}
+static void mptcp_subflow_joined(struct mptcp_sock *msk, struct sock *ssk)
+{
+ mptcp_subflow_ctx(ssk)->map_seq = READ_ONCE(msk->ack_seq);
+ WRITE_ONCE(msk->allow_infinite_fallback, false);
+ mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
+}
+
static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
{
struct sock *sk = (struct sock *)msk;
@@ -839,15 +843,16 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
mptcp_sock_graft(ssk, sk->sk_socket);
mptcp_sockopt_sync_locked(msk, ssk);
+ mptcp_subflow_joined(msk, ssk);
return true;
}
-static void __mptcp_flush_join_list(struct sock *sk)
+static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list)
{
struct mptcp_subflow_context *tmp, *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
- list_for_each_entry_safe(subflow, tmp, &msk->join_list, node) {
+ list_for_each_entry_safe(subflow, tmp, join_list, node) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bool slow = lock_sock_fast(ssk);
@@ -889,49 +894,6 @@ bool mptcp_schedule_work(struct sock *sk)
return false;
}
-void mptcp_subflow_eof(struct sock *sk)
-{
- if (!test_and_set_bit(MPTCP_WORK_EOF, &mptcp_sk(sk)->flags))
- mptcp_schedule_work(sk);
-}
-
-static void mptcp_check_for_eof(struct mptcp_sock *msk)
-{
- struct mptcp_subflow_context *subflow;
- struct sock *sk = (struct sock *)msk;
- int receivers = 0;
-
- mptcp_for_each_subflow(msk, subflow)
- receivers += !subflow->rx_eof;
- if (receivers)
- return;
-
- if (!(sk->sk_shutdown & RCV_SHUTDOWN)) {
- /* hopefully temporary hack: propagate shutdown status
- * to msk, when all subflows agree on it
- */
- sk->sk_shutdown |= RCV_SHUTDOWN;
-
- smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
- sk->sk_data_ready(sk);
- }
-
- switch (sk->sk_state) {
- case TCP_ESTABLISHED:
- inet_sk_state_store(sk, TCP_CLOSE_WAIT);
- break;
- case TCP_FIN_WAIT1:
- inet_sk_state_store(sk, TCP_CLOSING);
- break;
- case TCP_FIN_WAIT2:
- inet_sk_state_store(sk, TCP_CLOSE);
- break;
- default:
- return;
- }
- mptcp_close_wake_up(sk);
-}
-
static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
@@ -1601,7 +1563,7 @@ out:
if (!mptcp_timer_pending(sk))
mptcp_reset_timer(sk);
if (do_check_data_fin)
- __mptcp_check_send_data_fin(sk);
+ mptcp_check_send_data_fin(sk);
}
static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool first)
@@ -1702,7 +1664,6 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
lock_sock(ssk);
msg->msg_flags |= MSG_DONTWAIT;
- msk->connect_flags = O_NONBLOCK;
msk->fastopening = 1;
ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL);
msk->fastopening = 0;
@@ -1720,7 +1681,13 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR)
*copied_syn = 0;
} else if (ret && ret != -EINPROGRESS) {
- mptcp_disconnect(sk, 0);
+ /* The disconnect() op called by tcp_sendmsg_fastopen()/
+ * __inet_stream_connect() can fail, due to looking check,
+ * see mptcp_disconnect().
+ * Attempt it again outside the problematic scope.
+ */
+ if (!mptcp_disconnect(sk, 0))
+ sk->sk_socket->state = SS_UNCONNECTED;
}
inet_sk(sk)->defer_connect = 0;
@@ -2151,9 +2118,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
break;
}
- if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
- mptcp_check_for_eof(msk);
-
if (sk->sk_shutdown & RCV_SHUTDOWN) {
/* race breaker: the shutdown could be after the
* previous receive queue check
@@ -2283,7 +2247,7 @@ static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)
{
if (msk->subflow) {
iput(SOCK_INODE(msk->subflow));
- msk->subflow = NULL;
+ WRITE_ONCE(msk->subflow, NULL);
}
}
@@ -2382,7 +2346,10 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk);
if (!dispose_it) {
- tcp_disconnect(ssk, 0);
+ /* The MPTCP code never wait on the subflow sockets, TCP-level
+ * disconnect should never fail
+ */
+ WARN_ON_ONCE(tcp_disconnect(ssk, 0));
msk->subflow->state = SS_UNCONNECTED;
mptcp_subflow_ctx_reset(subflow);
release_sock(ssk);
@@ -2401,13 +2368,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
kfree_rcu(subflow, rcu);
} else {
/* otherwise tcp will dispose of the ssk and subflow ctx */
- if (ssk->sk_state == TCP_LISTEN) {
- tcp_set_state(ssk, TCP_CLOSE);
- mptcp_subflow_queue_clean(sk, ssk);
- inet_csk_listen_stop(ssk);
- mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
- }
-
__tcp_close(ssk, 0);
/* close acquired an extra ref */
@@ -2420,7 +2380,7 @@ out_release:
sock_put(ssk);
if (ssk == msk->first)
- msk->first = NULL;
+ WRITE_ONCE(msk->first, NULL);
out:
if (ssk == msk->last_snd)
@@ -2527,7 +2487,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
}
inet_sk_state_store(sk, TCP_CLOSE);
- sk->sk_shutdown = SHUTDOWN_MASK;
+ WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags);
@@ -2664,16 +2624,12 @@ static void mptcp_worker(struct work_struct *work)
if (unlikely((1 << state) & (TCPF_CLOSE | TCPF_LISTEN)))
goto unlock;
- mptcp_check_data_fin_ack(sk);
-
mptcp_check_fastclose(msk);
mptcp_pm_nl_work(msk);
- if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
- mptcp_check_for_eof(msk);
-
- __mptcp_check_send_data_fin(sk);
+ mptcp_check_send_data_fin(sk);
+ mptcp_check_data_fin_ack(sk);
mptcp_check_data_fin(sk);
if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
@@ -2721,7 +2677,7 @@ static int __mptcp_init_sock(struct sock *sk)
WRITE_ONCE(msk->rmem_released, 0);
msk->timer_ival = TCP_RTO_MIN;
- msk->first = NULL;
+ WRITE_ONCE(msk->first, NULL);
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
WRITE_ONCE(msk->allow_infinite_fallback, true);
@@ -2805,13 +2761,19 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
break;
fallthrough;
case TCP_SYN_SENT:
- tcp_disconnect(ssk, O_NONBLOCK);
+ WARN_ON_ONCE(tcp_disconnect(ssk, O_NONBLOCK));
break;
default:
if (__mptcp_check_fallback(mptcp_sk(sk))) {
pr_debug("Fallback");
ssk->sk_shutdown |= how;
tcp_shutdown(ssk, how);
+
+ /* simulate the data_fin ack reception to let the state
+ * machine move forward
+ */
+ WRITE_ONCE(mptcp_sk(sk)->snd_una, mptcp_sk(sk)->snd_nxt);
+ mptcp_schedule_work(sk);
} else {
pr_debug("Sending DATA_FIN on subflow %p", ssk);
tcp_send_ack(ssk);
@@ -2851,7 +2813,7 @@ static int mptcp_close_state(struct sock *sk)
return next & TCP_ACTION_FIN;
}
-static void __mptcp_check_send_data_fin(struct sock *sk)
+static void mptcp_check_send_data_fin(struct sock *sk)
{
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -2869,19 +2831,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk)
WRITE_ONCE(msk->snd_nxt, msk->write_seq);
- /* fallback socket will not get data_fin/ack, can move to the next
- * state now
- */
- if (__mptcp_check_fallback(msk)) {
- WRITE_ONCE(msk->snd_una, msk->write_seq);
- if ((1 << sk->sk_state) & (TCPF_CLOSING | TCPF_LAST_ACK)) {
- inet_sk_state_store(sk, TCP_CLOSE);
- mptcp_close_wake_up(sk);
- } else if (sk->sk_state == TCP_FIN_WAIT1) {
- inet_sk_state_store(sk, TCP_FIN_WAIT2);
- }
- }
-
mptcp_for_each_subflow(msk, subflow) {
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
@@ -2901,7 +2850,7 @@ static void __mptcp_wr_shutdown(struct sock *sk)
WRITE_ONCE(msk->write_seq, msk->write_seq + 1);
WRITE_ONCE(msk->snd_data_fin_enable, 1);
- __mptcp_check_send_data_fin(sk);
+ mptcp_check_send_data_fin(sk);
}
static void __mptcp_destroy_sock(struct sock *sk)
@@ -2946,10 +2895,24 @@ static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
return EPOLLIN | EPOLLRDNORM;
}
-static void mptcp_listen_inuse_dec(struct sock *sk)
+static void mptcp_check_listen_stop(struct sock *sk)
{
- if (inet_sk_state_load(sk) == TCP_LISTEN)
- sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+ struct sock *ssk;
+
+ if (inet_sk_state_load(sk) != TCP_LISTEN)
+ return;
+
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+ ssk = mptcp_sk(sk)->first;
+ if (WARN_ON_ONCE(!ssk || inet_sk_state_load(ssk) != TCP_LISTEN))
+ return;
+
+ lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
+ mptcp_subflow_queue_clean(sk, ssk);
+ inet_csk_listen_stop(ssk);
+ mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
+ tcp_set_state(ssk, TCP_CLOSE);
+ release_sock(ssk);
}
bool __mptcp_close(struct sock *sk, long timeout)
@@ -2959,10 +2922,10 @@ bool __mptcp_close(struct sock *sk, long timeout)
bool do_cancel_work = false;
int subflows_alive = 0;
- sk->sk_shutdown = SHUTDOWN_MASK;
+ WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) {
- mptcp_listen_inuse_dec(sk);
+ mptcp_check_listen_stop(sk);
inet_sk_state_store(sk, TCP_CLOSE);
goto cleanup;
}
@@ -3039,7 +3002,7 @@ static void mptcp_close(struct sock *sk, long timeout)
sock_put(sk);
}
-void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
+static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
{
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
const struct ipv6_pinfo *ssk6 = inet6_sk(ssk);
@@ -3066,15 +3029,20 @@ static int mptcp_disconnect(struct sock *sk, int flags)
{
struct mptcp_sock *msk = mptcp_sk(sk);
+ /* Deny disconnect if other threads are blocked in sk_wait_event()
+ * or inet_wait_for_connect().
+ */
+ if (sk->sk_wait_pending)
+ return -EBUSY;
+
/* We are on the fastopen error path. We can't call straight into the
* subflows cleanup code due to lock nesting (we are already under
- * msk->firstsocket lock). Do nothing and leave the cleanup to the
- * caller.
+ * msk->firstsocket lock).
*/
if (msk->fastopening)
- return 0;
+ return -EBUSY;
- mptcp_listen_inuse_dec(sk);
+ mptcp_check_listen_stop(sk);
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_stop_timer(sk);
@@ -3102,7 +3070,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
mptcp_pm_data_reset(msk);
mptcp_ca_reset(sk);
- sk->sk_shutdown = 0;
+ WRITE_ONCE(sk->sk_shutdown, 0);
sk_error_report(sk);
return 0;
}
@@ -3116,9 +3084,10 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
}
#endif
-struct sock *mptcp_sk_clone(const struct sock *sk,
- const struct mptcp_options_received *mp_opt,
- struct request_sock *req)
+struct sock *mptcp_sk_clone_init(const struct sock *sk,
+ const struct mptcp_options_received *mp_opt,
+ struct sock *ssk,
+ struct request_sock *req)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
@@ -3132,12 +3101,13 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk);
#endif
+ nsk->sk_wait_pending = 0;
__mptcp_init_sock(nsk);
msk = mptcp_sk(nsk);
msk->local_key = subflow_req->local_key;
msk->token = subflow_req->token;
- msk->subflow = NULL;
+ WRITE_ONCE(msk->subflow, NULL);
msk->in_accept_queue = 1;
WRITE_ONCE(msk->fully_established, false);
if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
@@ -3150,10 +3120,30 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
sock_reset_flag(nsk, SOCK_RCU_FREE);
- /* will be fully established after successful MPC subflow creation */
- inet_sk_state_store(nsk, TCP_SYN_RECV);
-
security_inet_csk_clone(nsk, req);
+
+ /* this can't race with mptcp_close(), as the msk is
+ * not yet exposted to user-space
+ */
+ inet_sk_state_store(nsk, TCP_ESTABLISHED);
+
+ /* The msk maintain a ref to each subflow in the connections list */
+ WRITE_ONCE(msk->first, ssk);
+ list_add(&mptcp_subflow_ctx(ssk)->node, &msk->conn_list);
+ sock_hold(ssk);
+
+ /* new mpc subflow takes ownership of the newly
+ * created mptcp socket
+ */
+ mptcp_token_accept(subflow_req, msk);
+
+ /* set msk addresses early to ensure mptcp_pm_get_local_id()
+ * uses the correct data
+ */
+ mptcp_copy_inaddrs(nsk, ssk);
+ mptcp_propagate_sndbuf(nsk, ssk);
+
+ mptcp_rcv_space_init(msk, ssk);
bh_unlock_sock(nsk);
/* note: the newly allocated socket refcount is 2 now */
@@ -3185,7 +3175,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
struct socket *listener;
struct sock *newsk;
- listener = msk->subflow;
+ listener = READ_ONCE(msk->subflow);
if (WARN_ON_ONCE(!listener)) {
*err = -EINVAL;
return NULL;
@@ -3299,9 +3289,14 @@ static void mptcp_release_cb(struct sock *sk)
for (;;) {
unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) |
msk->push_pending;
+ struct list_head join_list;
+
if (!flags)
break;
+ INIT_LIST_HEAD(&join_list);
+ list_splice_init(&msk->join_list, &join_list);
+
/* the following actions acquire the subflow socket lock
*
* 1) can't be invoked in atomic scope
@@ -3312,8 +3307,9 @@ static void mptcp_release_cb(struct sock *sk)
msk->push_pending = 0;
msk->cb_flags &= ~flags;
spin_unlock_bh(&sk->sk_lock.slock);
+
if (flags & BIT(MPTCP_FLUSH_JOIN_LIST))
- __mptcp_flush_join_list(sk);
+ __mptcp_flush_join_list(sk, &join_list);
if (flags & BIT(MPTCP_PUSH_PENDING))
__mptcp_push_pending(sk, 0);
if (flags & BIT(MPTCP_RETRANSMIT))
@@ -3465,14 +3461,16 @@ bool mptcp_finish_join(struct sock *ssk)
return false;
}
- if (!list_empty(&subflow->node))
- goto out;
+ /* active subflow, already present inside the conn_list */
+ if (!list_empty(&subflow->node)) {
+ mptcp_subflow_joined(msk, ssk);
+ return true;
+ }
if (!mptcp_pm_allow_new_subflow(msk))
goto err_prohibited;
- /* active connections are already on conn_list.
- * If we can't acquire msk socket lock here, let the release callback
+ /* If we can't acquire msk socket lock here, let the release callback
* handle it
*/
mptcp_data_lock(parent);
@@ -3495,11 +3493,6 @@ err_prohibited:
return false;
}
- subflow->map_seq = READ_ONCE(msk->ack_seq);
- WRITE_ONCE(msk->allow_infinite_fallback, false);
-
-out:
- mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
return true;
}
@@ -3617,9 +3610,9 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
* acquired the subflow socket lock, too.
*/
if (msk->fastopening)
- err = __inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags, 1);
+ err = __inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK, 1);
else
- err = inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags);
+ err = inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK);
inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
/* on successful connect, the msk state will be moved to established by
@@ -3632,12 +3625,10 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
mptcp_copy_inaddrs(sk, ssock->sk);
- /* unblocking connect, mptcp-level inet_stream_connect will error out
- * without changing the socket state, update it here.
+ /* silence EINPROGRESS and let the caller inet_stream_connect
+ * handle the connection in progress
*/
- if (err == -EINPROGRESS)
- sk->sk_socket->state = ssock->state;
- return err;
+ return 0;
}
static struct proto mptcp_prot = {
@@ -3696,18 +3687,6 @@ unlock:
return err;
}
-static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
- int addr_len, int flags)
-{
- int ret;
-
- lock_sock(sock->sk);
- mptcp_sk(sock->sk)->connect_flags = flags;
- ret = __inet_stream_connect(sock, uaddr, addr_len, flags, 0);
- release_sock(sock->sk);
- return ret;
-}
-
static int mptcp_listen(struct socket *sock, int backlog)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
@@ -3751,10 +3730,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
pr_debug("msk=%p", msk);
- /* buggy applications can call accept on socket states other then LISTEN
+ /* Buggy applications can call accept on socket states other then LISTEN
* but no need to allocate the first subflow just to error out.
*/
- ssock = msk->subflow;
+ ssock = READ_ONCE(msk->subflow);
if (!ssock)
return -EINVAL;
@@ -3800,9 +3779,6 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
- if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN))
- return EPOLLOUT | EPOLLWRNORM;
-
if (sk_stream_is_writeable(sk))
return EPOLLOUT | EPOLLWRNORM;
@@ -3820,6 +3796,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
struct sock *sk = sock->sk;
struct mptcp_sock *msk;
__poll_t mask = 0;
+ u8 shutdown;
int state;
msk = mptcp_sk(sk);
@@ -3828,23 +3805,30 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
state = inet_sk_state_load(sk);
pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags);
if (state == TCP_LISTEN) {
- if (WARN_ON_ONCE(!msk->subflow || !msk->subflow->sk))
+ struct socket *ssock = READ_ONCE(msk->subflow);
+
+ if (WARN_ON_ONCE(!ssock || !ssock->sk))
return 0;
- return inet_csk_listen_poll(msk->subflow->sk);
+ return inet_csk_listen_poll(ssock->sk);
}
+ shutdown = READ_ONCE(sk->sk_shutdown);
+ if (shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
+ mask |= EPOLLHUP;
+ if (shutdown & RCV_SHUTDOWN)
+ mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
+
if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) {
mask |= mptcp_check_readable(msk);
- mask |= mptcp_check_writeable(msk);
+ if (shutdown & SEND_SHUTDOWN)
+ mask |= EPOLLOUT | EPOLLWRNORM;
+ else
+ mask |= mptcp_check_writeable(msk);
} else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
/* cf tcp_poll() note about TFO */
mask |= EPOLLOUT | EPOLLWRNORM;
}
- if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
- mask |= EPOLLHUP;
- if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
/* This barrier is coupled with smp_wmb() in __mptcp_error_report() */
smp_rmb();
@@ -3859,7 +3843,7 @@ static const struct proto_ops mptcp_stream_ops = {
.owner = THIS_MODULE,
.release = inet_release,
.bind = mptcp_bind,
- .connect = mptcp_stream_connect,
+ .connect = inet_stream_connect,
.socketpair = sock_no_socketpair,
.accept = mptcp_stream_accept,
.getname = inet_getname,
@@ -3954,7 +3938,7 @@ static const struct proto_ops mptcp_v6_stream_ops = {
.owner = THIS_MODULE,
.release = inet6_release,
.bind = mptcp_bind,
- .connect = mptcp_stream_connect,
+ .connect = inet_stream_connect,
.socketpair = sock_no_socketpair,
.accept = mptcp_stream_accept,
.getname = inet6_getname,
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 2d7b2c80a164..d3783a7056e1 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -113,7 +113,6 @@
/* MPTCP socket atomic flags */
#define MPTCP_NOSPACE 1
#define MPTCP_WORK_RTX 2
-#define MPTCP_WORK_EOF 3
#define MPTCP_FALLBACK_DONE 4
#define MPTCP_WORK_CLOSE_SUBFLOW 5
@@ -297,7 +296,6 @@ struct mptcp_sock {
nodelay:1,
fastopening:1,
in_accept_queue:1;
- int connect_flags;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
@@ -306,7 +304,11 @@ struct mptcp_sock {
struct list_head rtx_queue;
struct mptcp_data_frag *first_pending;
struct list_head join_list;
- struct socket *subflow; /* outgoing connect/listener/!mp_capable */
+ struct socket *subflow; /* outgoing connect/listener/!mp_capable
+ * The mptcp ops can safely dereference, using suitable
+ * ONCE annotation, the subflow outside the socket
+ * lock as such sock is freed after close().
+ */
struct sock *first;
struct mptcp_pm_data pm;
struct {
@@ -473,14 +475,13 @@ struct mptcp_subflow_context {
send_mp_fail : 1,
send_fastclose : 1,
send_infinite_map : 1,
- rx_eof : 1,
remote_key_valid : 1, /* received the peer key from */
disposable : 1, /* ctx can be free at ulp release time */
stale : 1, /* unable to snd/rcv data, do not use for xmit */
local_id_valid : 1, /* local_id is correctly initialized */
valid_csum_seen : 1, /* at least one csum validated */
is_mptfo : 1, /* subflow is doing TFO */
- __unused : 8;
+ __unused : 9;
enum mptcp_data_avail data_avail;
u32 remote_nonce;
u64 thmac;
@@ -613,7 +614,6 @@ int mptcp_is_checksum_enabled(const struct net *net);
int mptcp_allow_join_id0(const struct net *net);
unsigned int mptcp_stale_loss_cnt(const struct net *net);
int mptcp_get_pm_type(const struct net *net);
-void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk);
@@ -683,9 +683,10 @@ void __init mptcp_proto_init(void);
int __init mptcp_proto_v6_init(void);
#endif
-struct sock *mptcp_sk_clone(const struct sock *sk,
- const struct mptcp_options_received *mp_opt,
- struct request_sock *req);
+struct sock *mptcp_sk_clone_init(const struct sock *sk,
+ const struct mptcp_options_received *mp_opt,
+ struct sock *ssk,
+ struct request_sock *req);
void mptcp_get_options(const struct sk_buff *skb,
struct mptcp_options_received *mp_opt);
@@ -717,7 +718,6 @@ static inline u64 mptcp_expand_seq(u64 old_seq, u64 cur_seq, bool use_64bit)
void __mptcp_check_push(struct sock *sk, struct sock *ssk);
void __mptcp_data_acked(struct sock *sk);
void __mptcp_error_report(struct sock *sk);
-void mptcp_subflow_eof(struct sock *sk);
bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit);
static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
{
@@ -829,6 +829,7 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
bool echo);
int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
+void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list);
void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
struct list_head *rm_list);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index ba065b66551a..d9c8b21c6076 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -815,38 +815,12 @@ create_child:
ctx->setsockopt_seq = listener->setsockopt_seq;
if (ctx->mp_capable) {
- ctx->conn = mptcp_sk_clone(listener->conn, &mp_opt, req);
+ ctx->conn = mptcp_sk_clone_init(listener->conn, &mp_opt, child, req);
if (!ctx->conn)
goto fallback;
owner = mptcp_sk(ctx->conn);
-
- /* this can't race with mptcp_close(), as the msk is
- * not yet exposted to user-space
- */
- inet_sk_state_store(ctx->conn, TCP_ESTABLISHED);
-
- /* record the newly created socket as the first msk
- * subflow, but don't link it yet into conn_list
- */
- WRITE_ONCE(owner->first, child);
-
- /* new mpc subflow takes ownership of the newly
- * created mptcp socket
- */
- owner->setsockopt_seq = ctx->setsockopt_seq;
mptcp_pm_new_connection(owner, child, 1);
- mptcp_token_accept(subflow_req, owner);
-
- /* set msk addresses early to ensure mptcp_pm_get_local_id()
- * uses the correct data
- */
- mptcp_copy_inaddrs(ctx->conn, child);
- mptcp_propagate_sndbuf(ctx->conn, child);
-
- mptcp_rcv_space_init(owner, child);
- list_add(&ctx->node, &owner->conn_list);
- sock_hold(child);
/* with OoO packets we can reach here without ingress
* mpc option
@@ -1775,14 +1749,16 @@ static void subflow_state_change(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct sock *parent = subflow->conn;
+ struct mptcp_sock *msk;
__subflow_state_change(sk);
+ msk = mptcp_sk(parent);
if (subflow_simultaneous_connect(sk)) {
mptcp_propagate_sndbuf(parent, sk);
mptcp_do_fallback(sk);
- mptcp_rcv_space_init(mptcp_sk(parent), sk);
- pr_fallback(mptcp_sk(parent));
+ mptcp_rcv_space_init(msk, sk);
+ pr_fallback(msk);
subflow->conn_finished = 1;
mptcp_set_connected(parent);
}
@@ -1798,11 +1774,12 @@ static void subflow_state_change(struct sock *sk)
subflow_sched_work_if_closed(mptcp_sk(parent), sk);
- if (__mptcp_check_fallback(mptcp_sk(parent)) &&
- !subflow->rx_eof && subflow_is_done(sk)) {
- subflow->rx_eof = 1;
- mptcp_subflow_eof(parent);
- }
+ /* when the fallback subflow closes the rx side, trigger a 'dummy'
+ * ingress data fin, so that the msk state will follow along
+ */
+ if (__mptcp_check_fallback(msk) && subflow_is_done(sk) && msk->first == sk &&
+ mptcp_update_rcv_data_fin(msk, READ_ONCE(msk->ack_seq), true))
+ mptcp_schedule_work(parent);
}
void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 46ebee9400da..9a6b64779e64 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1694,6 +1694,14 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb,
bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
do {
+ if (retried) {
+ __ip_set_get(set);
+ nfnl_unlock(NFNL_SUBSYS_IPSET);
+ cond_resched();
+ nfnl_lock(NFNL_SUBSYS_IPSET);
+ __ip_set_put(set);
+ }
+
ip_set_lock(set);
ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
ip_set_unlock(set);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index feb1d7fcb09f..a80b960223e1 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -1207,6 +1207,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->transport_header = skb->network_header;
skb_set_inner_ipproto(skb, next_protocol);
+ skb_set_inner_mac_header(skb, skb_inner_network_offset(skb));
if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
bool check = false;
@@ -1349,6 +1350,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->transport_header = skb->network_header;
skb_set_inner_ipproto(skb, next_protocol);
+ skb_set_inner_mac_header(skb, skb_inner_network_offset(skb));
if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
bool check = false;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c4ccfec6cb98..d119f1d4c2fc 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2260,6 +2260,9 @@ static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct,
return 0;
helper = rcu_dereference(help->helper);
+ if (!helper)
+ return 0;
+
if (!(helper->flags & NF_CT_HELPER_F_USERSPACE))
return 0;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d40544cd61a6..69c8c8c7e9b8 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2976,7 +2976,9 @@ nla_put_failure:
return -1;
}
+#if IS_ENABLED(CONFIG_NF_NAT)
static const union nf_inet_addr any_addr;
+#endif
static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp)
{
@@ -3460,10 +3462,12 @@ ctnetlink_change_expect(struct nf_conntrack_expect *x,
return 0;
}
+#if IS_ENABLED(CONFIG_NF_NAT)
static const struct nla_policy exp_nat_nla_policy[CTA_EXPECT_NAT_MAX+1] = {
[CTA_EXPECT_NAT_DIR] = { .type = NLA_U32 },
[CTA_EXPECT_NAT_TUPLE] = { .type = NLA_NESTED },
};
+#endif
static int
ctnetlink_parse_expect_nat(const struct nlattr *attr,
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 04bd0ed4d2ae..b0ef48b21dcb 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -317,12 +317,12 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
EXPORT_SYMBOL_GPL(flow_offload_add);
void flow_offload_refresh(struct nf_flowtable *flow_table,
- struct flow_offload *flow)
+ struct flow_offload *flow, bool force)
{
u32 timeout;
timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
- if (timeout - READ_ONCE(flow->timeout) > HZ)
+ if (force || timeout - READ_ONCE(flow->timeout) > HZ)
WRITE_ONCE(flow->timeout, timeout);
else
return;
@@ -334,6 +334,12 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
}
EXPORT_SYMBOL_GPL(flow_offload_refresh);
+static bool nf_flow_is_outdated(const struct flow_offload *flow)
+{
+ return test_bit(IPS_SEEN_REPLY_BIT, &flow->ct->status) &&
+ !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags);
+}
+
static inline bool nf_flow_has_expired(const struct flow_offload *flow)
{
return nf_flow_timeout_delta(flow->timeout) <= 0;
@@ -423,7 +429,8 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
struct flow_offload *flow, void *data)
{
if (nf_flow_has_expired(flow) ||
- nf_ct_is_dying(flow->ct))
+ nf_ct_is_dying(flow->ct) ||
+ nf_flow_is_outdated(flow))
flow_offload_teardown(flow);
if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 19efba1e51ef..3bbaf9c7ea46 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -384,7 +384,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
if (skb_try_make_writable(skb, thoff + hdrsize))
return NF_DROP;
- flow_offload_refresh(flow_table, flow);
+ flow_offload_refresh(flow_table, flow, false);
nf_flow_encap_pop(skb, tuplehash);
thoff -= offset;
@@ -650,7 +650,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
if (skb_try_make_writable(skb, thoff + hdrsize))
return NF_DROP;
- flow_offload_refresh(flow_table, flow);
+ flow_offload_refresh(flow_table, flow, false);
nf_flow_encap_pop(skb, tuplehash);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 59fb8320ab4d..4c7937fd803f 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -151,6 +151,7 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx,
return NULL;
INIT_LIST_HEAD(&trans->list);
+ INIT_LIST_HEAD(&trans->binding_list);
trans->msg_type = msg_type;
trans->ctx = *ctx;
@@ -163,13 +164,20 @@ static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx,
return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL);
}
-static void nft_trans_destroy(struct nft_trans *trans)
+static void nft_trans_list_del(struct nft_trans *trans)
{
list_del(&trans->list);
+ list_del(&trans->binding_list);
+}
+
+static void nft_trans_destroy(struct nft_trans *trans)
+{
+ nft_trans_list_del(trans);
kfree(trans);
}
-static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
+static void __nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set,
+ bool bind)
{
struct nftables_pernet *nft_net;
struct net *net = ctx->net;
@@ -183,16 +191,80 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
switch (trans->msg_type) {
case NFT_MSG_NEWSET:
if (nft_trans_set(trans) == set)
- nft_trans_set_bound(trans) = true;
+ nft_trans_set_bound(trans) = bind;
break;
case NFT_MSG_NEWSETELEM:
if (nft_trans_elem_set(trans) == set)
- nft_trans_elem_set_bound(trans) = true;
+ nft_trans_elem_set_bound(trans) = bind;
+ break;
+ }
+ }
+}
+
+static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ return __nft_set_trans_bind(ctx, set, true);
+}
+
+static void nft_set_trans_unbind(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ return __nft_set_trans_bind(ctx, set, false);
+}
+
+static void __nft_chain_trans_bind(const struct nft_ctx *ctx,
+ struct nft_chain *chain, bool bind)
+{
+ struct nftables_pernet *nft_net;
+ struct net *net = ctx->net;
+ struct nft_trans *trans;
+
+ if (!nft_chain_binding(chain))
+ return;
+
+ nft_net = nft_pernet(net);
+ list_for_each_entry_reverse(trans, &nft_net->commit_list, list) {
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWCHAIN:
+ if (nft_trans_chain(trans) == chain)
+ nft_trans_chain_bound(trans) = bind;
+ break;
+ case NFT_MSG_NEWRULE:
+ if (trans->ctx.chain == chain)
+ nft_trans_rule_bound(trans) = bind;
break;
}
}
}
+static void nft_chain_trans_bind(const struct nft_ctx *ctx,
+ struct nft_chain *chain)
+{
+ __nft_chain_trans_bind(ctx, chain, true);
+}
+
+int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain)
+{
+ if (!nft_chain_binding(chain))
+ return 0;
+
+ if (nft_chain_binding(ctx->chain))
+ return -EOPNOTSUPP;
+
+ if (chain->bound)
+ return -EBUSY;
+
+ chain->bound = true;
+ chain->use++;
+ nft_chain_trans_bind(ctx, chain);
+
+ return 0;
+}
+
+void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain)
+{
+ __nft_chain_trans_bind(ctx, chain, false);
+}
+
static int nft_netdev_register_hooks(struct net *net,
struct list_head *hook_list)
{
@@ -292,6 +364,19 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr
{
struct nftables_pernet *nft_net = nft_pernet(net);
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWSET:
+ if (!nft_trans_set_update(trans) &&
+ nft_set_is_anonymous(nft_trans_set(trans)))
+ list_add_tail(&trans->binding_list, &nft_net->binding_list);
+ break;
+ case NFT_MSG_NEWCHAIN:
+ if (!nft_trans_chain_update(trans) &&
+ nft_chain_binding(nft_trans_chain(trans)))
+ list_add_tail(&trans->binding_list, &nft_net->binding_list);
+ break;
+ }
+
list_add_tail(&trans->list, &nft_net->commit_list);
}
@@ -338,8 +423,9 @@ static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
ntohl(nla_get_be32(ctx->nla[NFTA_CHAIN_ID]));
}
}
-
+ nft_trans_chain(trans) = ctx->chain;
nft_trans_commit_list_add_tail(ctx->net, trans);
+
return trans;
}
@@ -357,8 +443,7 @@ static int nft_delchain(struct nft_ctx *ctx)
return 0;
}
-static void nft_rule_expr_activate(const struct nft_ctx *ctx,
- struct nft_rule *rule)
+void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule)
{
struct nft_expr *expr;
@@ -371,9 +456,8 @@ static void nft_rule_expr_activate(const struct nft_ctx *ctx,
}
}
-static void nft_rule_expr_deactivate(const struct nft_ctx *ctx,
- struct nft_rule *rule,
- enum nft_trans_phase phase)
+void nft_rule_expr_deactivate(const struct nft_ctx *ctx, struct nft_rule *rule,
+ enum nft_trans_phase phase)
{
struct nft_expr *expr;
@@ -495,6 +579,58 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
return __nft_trans_set_add(ctx, msg_type, set, NULL);
}
+static void nft_setelem_data_deactivate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem);
+
+static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
+ struct nft_set *set,
+ const struct nft_set_iter *iter,
+ struct nft_set_elem *elem)
+{
+ nft_setelem_data_deactivate(ctx->net, set, elem);
+
+ return 0;
+}
+
+struct nft_set_elem_catchall {
+ struct list_head list;
+ struct rcu_head rcu;
+ void *elem;
+};
+
+static void nft_map_catchall_deactivate(const struct nft_ctx *ctx,
+ struct nft_set *set)
+{
+ u8 genmask = nft_genmask_next(ctx->net);
+ struct nft_set_elem_catchall *catchall;
+ struct nft_set_elem elem;
+ struct nft_set_ext *ext;
+
+ list_for_each_entry(catchall, &set->catchall_list, list) {
+ ext = nft_set_elem_ext(set, catchall->elem);
+ if (!nft_set_elem_active(ext, genmask))
+ continue;
+
+ elem.priv = catchall->elem;
+ nft_setelem_data_deactivate(ctx->net, set, &elem);
+ break;
+ }
+}
+
+static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ struct nft_set_iter iter = {
+ .genmask = nft_genmask_next(ctx->net),
+ .fn = nft_mapelem_deactivate,
+ };
+
+ set->ops->walk(ctx, set, &iter);
+ WARN_ON_ONCE(iter.err);
+
+ nft_map_catchall_deactivate(ctx, set);
+}
+
static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
{
int err;
@@ -503,6 +639,9 @@ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
if (err < 0)
return err;
+ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_deactivate(ctx, set);
+
nft_deactivate_next(ctx->net, set);
ctx->table->use--;
@@ -1600,6 +1739,8 @@ static int nft_dump_basechain_hook(struct sk_buff *skb, int family,
if (nft_base_chain_netdev(family, ops->hooknum)) {
nest_devs = nla_nest_start_noflag(skb, NFTA_HOOK_DEVS);
+ if (!nest_devs)
+ goto nla_put_failure;
if (!hook_list)
hook_list = &basechain->hook_list;
@@ -2224,7 +2365,7 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family,
return 0;
}
-static int nft_chain_add(struct nft_table *table, struct nft_chain *chain)
+int nft_chain_add(struct nft_table *table, struct nft_chain *chain)
{
int err;
@@ -2526,6 +2667,8 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
nft_trans_basechain(trans) = basechain;
INIT_LIST_HEAD(&nft_trans_chain_hooks(trans));
list_splice(&hook.list, &nft_trans_chain_hooks(trans));
+ if (nla[NFTA_CHAIN_HOOK])
+ module_put(hook.type->owner);
nft_trans_commit_list_add_tail(ctx->net, trans);
@@ -2668,21 +2811,18 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info,
return nf_tables_addchain(&ctx, family, genmask, policy, flags, extack);
}
-static int nft_delchain_hook(struct nft_ctx *ctx, struct nft_chain *chain,
+static int nft_delchain_hook(struct nft_ctx *ctx,
+ struct nft_base_chain *basechain,
struct netlink_ext_ack *extack)
{
+ const struct nft_chain *chain = &basechain->chain;
const struct nlattr * const *nla = ctx->nla;
struct nft_chain_hook chain_hook = {};
- struct nft_base_chain *basechain;
struct nft_hook *this, *hook;
LIST_HEAD(chain_del_list);
struct nft_trans *trans;
int err;
- if (!nft_is_base_chain(chain))
- return -EOPNOTSUPP;
-
- basechain = nft_base_chain(chain);
err = nft_chain_parse_hook(ctx->net, basechain, nla, &chain_hook,
ctx->family, chain->flags, extack);
if (err < 0)
@@ -2767,7 +2907,12 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
if (chain->flags & NFT_CHAIN_HW_OFFLOAD)
return -EOPNOTSUPP;
- return nft_delchain_hook(&ctx, chain, extack);
+ if (nft_is_base_chain(chain)) {
+ struct nft_base_chain *basechain = nft_base_chain(chain);
+
+ if (nft_base_chain_netdev(table->family, basechain->ops.hooknum))
+ return nft_delchain_hook(&ctx, basechain, extack);
+ }
}
if (info->nlh->nlmsg_flags & NLM_F_NONREC &&
@@ -3488,8 +3633,7 @@ err_fill_rule_info:
return err;
}
-static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
- struct nft_rule *rule)
+void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule)
{
struct nft_expr *expr, *next;
@@ -3506,7 +3650,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
kfree(rule);
}
-void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule)
+static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule)
{
nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE);
nf_tables_rule_destroy(ctx, rule);
@@ -3594,12 +3738,6 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
return 0;
}
-struct nft_set_elem_catchall {
- struct list_head list;
- struct rcu_head rcu;
- void *elem;
-};
-
int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set)
{
u8 genmask = nft_genmask_next(ctx->net);
@@ -3842,7 +3980,8 @@ err_destroy_flow_rule:
if (flow)
nft_flow_rule_destroy(flow);
err_release_rule:
- nf_tables_rule_release(&ctx, rule);
+ nft_rule_expr_deactivate(&ctx, rule, NFT_TRANS_PREPARE_ERROR);
+ nf_tables_rule_destroy(&ctx, rule);
err_release_expr:
for (i = 0; i < n; i++) {
if (expr_info[i].ops) {
@@ -3865,12 +4004,10 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
struct nft_trans *trans;
list_for_each_entry(trans, &nft_net->commit_list, list) {
- struct nft_rule *rule = nft_trans_rule(trans);
-
if (trans->msg_type == NFT_MSG_NEWRULE &&
trans->ctx.chain == chain &&
id == nft_trans_rule_id(trans))
- return rule;
+ return nft_trans_rule(trans);
}
return ERR_PTR(-ENOENT);
}
@@ -4776,6 +4913,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
+ if (flags & NFT_SET_ANONYMOUS)
+ return -EOPNOTSUPP;
+
err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout);
if (err)
return err;
@@ -4784,6 +4924,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
+
+ if (flags & NFT_SET_ANONYMOUS)
+ return -EOPNOTSUPP;
+
desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
}
@@ -4830,6 +4974,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
+ if (nft_set_is_anonymous(set))
+ return -EOPNOTSUPP;
+
err = nft_set_expr_alloc(&ctx, set, nla, exprs, &num_exprs, flags);
if (err < 0)
return err;
@@ -4919,6 +5066,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
set->num_exprs = num_exprs;
set->handle = nf_tables_alloc_handle(table);
+ INIT_LIST_HEAD(&set->pending_update);
err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
if (err < 0)
@@ -4932,7 +5080,7 @@ err_set_expr_alloc:
for (i = 0; i < set->num_exprs; i++)
nft_expr_destroy(&ctx, set->exprs[i]);
err_set_destroy:
- ops->destroy(set);
+ ops->destroy(&ctx, set);
err_set_init:
kfree(set->name);
err_set_name:
@@ -4947,7 +5095,7 @@ static void nft_set_catchall_destroy(const struct nft_ctx *ctx,
list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
list_del_rcu(&catchall->list);
- nft_set_elem_destroy(set, catchall->elem, true);
+ nf_tables_set_elem_destroy(ctx, set, catchall->elem);
kfree_rcu(catchall, rcu);
}
}
@@ -4962,7 +5110,7 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
for (i = 0; i < set->num_exprs; i++)
nft_expr_destroy(ctx, set->exprs[i]);
- set->ops->destroy(set);
+ set->ops->destroy(ctx, set);
nft_set_catchall_destroy(ctx, set);
kfree(set->name);
kvfree(set);
@@ -5127,10 +5275,60 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
}
}
+static void nft_setelem_data_activate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem);
+
+static int nft_mapelem_activate(const struct nft_ctx *ctx,
+ struct nft_set *set,
+ const struct nft_set_iter *iter,
+ struct nft_set_elem *elem)
+{
+ nft_setelem_data_activate(ctx->net, set, elem);
+
+ return 0;
+}
+
+static void nft_map_catchall_activate(const struct nft_ctx *ctx,
+ struct nft_set *set)
+{
+ u8 genmask = nft_genmask_next(ctx->net);
+ struct nft_set_elem_catchall *catchall;
+ struct nft_set_elem elem;
+ struct nft_set_ext *ext;
+
+ list_for_each_entry(catchall, &set->catchall_list, list) {
+ ext = nft_set_elem_ext(set, catchall->elem);
+ if (!nft_set_elem_active(ext, genmask))
+ continue;
+
+ elem.priv = catchall->elem;
+ nft_setelem_data_activate(ctx->net, set, &elem);
+ break;
+ }
+}
+
+static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ struct nft_set_iter iter = {
+ .genmask = nft_genmask_next(ctx->net),
+ .fn = nft_mapelem_activate,
+ };
+
+ set->ops->walk(ctx, set, &iter);
+ WARN_ON_ONCE(iter.err);
+
+ nft_map_catchall_activate(ctx, set);
+}
+
void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set)
{
- if (nft_set_is_anonymous(set))
+ if (nft_set_is_anonymous(set)) {
+ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_activate(ctx, set);
+
nft_clear(ctx->net, set);
+ }
set->use++;
}
@@ -5141,14 +5339,28 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
enum nft_trans_phase phase)
{
switch (phase) {
- case NFT_TRANS_PREPARE:
+ case NFT_TRANS_PREPARE_ERROR:
+ nft_set_trans_unbind(ctx, set);
if (nft_set_is_anonymous(set))
nft_deactivate_next(ctx->net, set);
set->use--;
+ break;
+ case NFT_TRANS_PREPARE:
+ if (nft_set_is_anonymous(set)) {
+ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_deactivate(ctx, set);
+
+ nft_deactivate_next(ctx->net, set);
+ }
+ set->use--;
return;
case NFT_TRANS_ABORT:
case NFT_TRANS_RELEASE:
+ if (nft_set_is_anonymous(set) &&
+ set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_deactivate(ctx, set);
+
set->use--;
fallthrough;
default:
@@ -5901,6 +6113,7 @@ static void nft_set_elem_expr_destroy(const struct nft_ctx *ctx,
__nft_set_elem_expr_destroy(ctx, expr);
}
+/* Drop references and destroy. Called from gc, dynset and abort path. */
void nft_set_elem_destroy(const struct nft_set *set, void *elem,
bool destroy_expr)
{
@@ -5922,11 +6135,11 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
}
EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
-/* Only called from commit path, nft_setelem_data_deactivate() already deals
- * with the refcounting from the preparation phase.
+/* Destroy element. References have been already dropped in the preparation
+ * path via nft_setelem_data_deactivate().
*/
-static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
- const struct nft_set *set, void *elem)
+void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set, void *elem)
{
struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
@@ -6489,19 +6702,19 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (flags)
*nft_set_ext_flags(ext) = flags;
+ if (obj) {
+ *nft_set_ext_obj(ext) = obj;
+ obj->use++;
+ }
if (ulen > 0) {
if (nft_set_ext_check(&tmpl, NFT_SET_EXT_USERDATA, ulen) < 0) {
err = -EINVAL;
- goto err_elem_userdata;
+ goto err_elem_free;
}
udata = nft_set_ext_userdata(ext);
udata->len = ulen - 1;
nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen);
}
- if (obj) {
- *nft_set_ext_obj(ext) = obj;
- obj->use++;
- }
err = nft_set_elem_expr_setup(ctx, &tmpl, ext, expr_array, num_exprs);
if (err < 0)
goto err_elem_free;
@@ -6556,10 +6769,7 @@ err_set_full:
err_element_clash:
kfree(trans);
err_elem_free:
- if (obj)
- obj->use--;
-err_elem_userdata:
- nf_tables_set_elem_destroy(ctx, set, elem.priv);
+ nft_set_elem_destroy(set, elem.priv, true);
err_parse_data:
if (nla[NFTA_SET_ELEM_DATA] != NULL)
nft_data_release(&elem.data.val, desc.type);
@@ -6603,7 +6813,8 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
if (IS_ERR(set))
return PTR_ERR(set);
- if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
+ if (!list_empty(&set->bindings) &&
+ (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS)))
return -EBUSY;
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
@@ -6636,7 +6847,6 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
{
struct nft_chain *chain;
- struct nft_rule *rule;
if (type == NFT_DATA_VERDICT) {
switch (data->verdict.code) {
@@ -6644,15 +6854,6 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
case NFT_GOTO:
chain = data->verdict.chain;
chain->use++;
-
- if (!nft_chain_is_bound(chain))
- break;
-
- chain->table->use++;
- list_for_each_entry(rule, &chain->rules, list)
- chain->use++;
-
- nft_chain_add(chain->table, chain);
break;
}
}
@@ -6887,7 +7088,9 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
if (IS_ERR(set))
return PTR_ERR(set);
- if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
+
+ if (!list_empty(&set->bindings) &&
+ (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS)))
return -EBUSY;
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
@@ -7669,6 +7872,7 @@ void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx,
enum nft_trans_phase phase)
{
switch (phase) {
+ case NFT_TRANS_PREPARE_ERROR:
case NFT_TRANS_PREPARE:
case NFT_TRANS_ABORT:
case NFT_TRANS_RELEASE:
@@ -8941,7 +9145,7 @@ static void nf_tables_trans_destroy_work(struct work_struct *w)
synchronize_rcu();
list_for_each_entry_safe(trans, next, &head, list) {
- list_del(&trans->list);
+ nft_trans_list_del(trans);
nft_commit_release(trans);
}
}
@@ -9007,7 +9211,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
continue;
}
- if (WARN_ON_ONCE(data + expr->ops->size > data_boundary))
+ if (WARN_ON_ONCE(data + size + expr->ops->size > data_boundary))
return -ENOMEM;
memcpy(data + size, expr, expr->ops->size);
@@ -9275,10 +9479,25 @@ static void nf_tables_commit_audit_log(struct list_head *adl, u32 generation)
}
}
+static void nft_set_commit_update(struct list_head *set_update_list)
+{
+ struct nft_set *set, *next;
+
+ list_for_each_entry_safe(set, next, set_update_list, pending_update) {
+ list_del_init(&set->pending_update);
+
+ if (!set->ops->commit)
+ continue;
+
+ set->ops->commit(set);
+ }
+}
+
static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans *trans, *next;
+ LIST_HEAD(set_update_list);
struct nft_trans_elem *te;
struct nft_chain *chain;
struct nft_table *table;
@@ -9291,6 +9510,27 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
return 0;
}
+ list_for_each_entry(trans, &nft_net->binding_list, binding_list) {
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWSET:
+ if (!nft_trans_set_update(trans) &&
+ nft_set_is_anonymous(nft_trans_set(trans)) &&
+ !nft_trans_set_bound(trans)) {
+ pr_warn_once("nftables ruleset with unbound set\n");
+ return -EINVAL;
+ }
+ break;
+ case NFT_MSG_NEWCHAIN:
+ if (!nft_trans_chain_update(trans) &&
+ nft_chain_binding(nft_trans_chain(trans)) &&
+ !nft_trans_chain_bound(trans)) {
+ pr_warn_once("nftables ruleset with unbound chain\n");
+ return -EINVAL;
+ }
+ break;
+ }
+ }
+
/* 0. Validate ruleset, otherwise roll back for error reporting. */
if (nf_tables_validate(net) < 0)
return -EAGAIN;
@@ -9453,6 +9693,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_setelem_notify(&trans->ctx, te->set,
&te->elem,
NFT_MSG_NEWSETELEM);
+ if (te->set->ops->commit &&
+ list_empty(&te->set->pending_update)) {
+ list_add_tail(&te->set->pending_update,
+ &set_update_list);
+ }
nft_trans_destroy(trans);
break;
case NFT_MSG_DELSETELEM:
@@ -9467,6 +9712,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
atomic_dec(&te->set->nelems);
te->set->ndeact--;
}
+ if (te->set->ops->commit &&
+ list_empty(&te->set->pending_update)) {
+ list_add_tail(&te->set->pending_update,
+ &set_update_list);
+ }
break;
case NFT_MSG_NEWOBJ:
if (nft_trans_obj_update(trans)) {
@@ -9529,6 +9779,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
}
}
+ nft_set_commit_update(&set_update_list);
+
nft_commit_notify(net, NETLINK_CB(skb).portid);
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
nf_tables_commit_audit_log(&adl, nft_net->base_seq);
@@ -9588,10 +9840,25 @@ static void nf_tables_abort_release(struct nft_trans *trans)
kfree(trans);
}
+static void nft_set_abort_update(struct list_head *set_update_list)
+{
+ struct nft_set *set, *next;
+
+ list_for_each_entry_safe(set, next, set_update_list, pending_update) {
+ list_del_init(&set->pending_update);
+
+ if (!set->ops->abort)
+ continue;
+
+ set->ops->abort(set);
+ }
+}
+
static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
{
struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans *trans, *next;
+ LIST_HEAD(set_update_list);
struct nft_trans_elem *te;
if (action == NFNL_ABORT_VALIDATE &&
@@ -9633,7 +9900,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
kfree(nft_trans_chain_name(trans));
nft_trans_destroy(trans);
} else {
- if (nft_chain_is_bound(trans->ctx.chain)) {
+ if (nft_trans_chain_bound(trans)) {
nft_trans_destroy(trans);
break;
}
@@ -9656,6 +9923,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWRULE:
+ if (nft_trans_rule_bound(trans)) {
+ nft_trans_destroy(trans);
+ break;
+ }
trans->ctx.chain->use--;
list_del_rcu(&nft_trans_rule(trans)->list);
nft_rule_expr_deactivate(&trans->ctx,
@@ -9690,6 +9961,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
case NFT_MSG_DESTROYSET:
trans->ctx.table->use++;
nft_clear(trans->ctx.net, nft_trans_set(trans));
+ if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_activate(&trans->ctx, nft_trans_set(trans));
+
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSETELEM:
@@ -9701,6 +9975,12 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_setelem_remove(net, te->set, &te->elem);
if (!nft_setelem_is_catchall(te->set, &te->elem))
atomic_dec(&te->set->nelems);
+
+ if (te->set->ops->abort &&
+ list_empty(&te->set->pending_update)) {
+ list_add_tail(&te->set->pending_update,
+ &set_update_list);
+ }
break;
case NFT_MSG_DELSETELEM:
case NFT_MSG_DESTROYSETELEM:
@@ -9711,6 +9991,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
if (!nft_setelem_is_catchall(te->set, &te->elem))
te->set->ndeact--;
+ if (te->set->ops->abort &&
+ list_empty(&te->set->pending_update)) {
+ list_add_tail(&te->set->pending_update,
+ &set_update_list);
+ }
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWOBJ:
@@ -9753,11 +10038,13 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
}
}
+ nft_set_abort_update(&set_update_list);
+
synchronize_rcu();
list_for_each_entry_safe_reverse(trans, next,
&nft_net->commit_list, list) {
- list_del(&trans->list);
+ nft_trans_list_del(trans);
nf_tables_abort_release(trans);
}
@@ -10206,22 +10493,12 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
static void nft_verdict_uninit(const struct nft_data *data)
{
struct nft_chain *chain;
- struct nft_rule *rule;
switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
chain = data->verdict.chain;
chain->use--;
-
- if (!nft_chain_is_bound(chain))
- break;
-
- chain->table->use--;
- list_for_each_entry(rule, &chain->rules, list)
- chain->use--;
-
- nft_chain_del(chain);
break;
}
}
@@ -10456,6 +10733,9 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
list_for_each_entry_safe(set, ns, &table->sets, list) {
list_del(&set->list);
table->use--;
+ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_deactivate(&ctx, set);
+
nft_set_destroy(&ctx, set);
}
list_for_each_entry_safe(obj, ne, &table->objects, list) {
@@ -10540,6 +10820,7 @@ static int __net_init nf_tables_init_net(struct net *net)
INIT_LIST_HEAD(&nft_net->tables);
INIT_LIST_HEAD(&nft_net->commit_list);
+ INIT_LIST_HEAD(&nft_net->binding_list);
INIT_LIST_HEAD(&nft_net->module_list);
INIT_LIST_HEAD(&nft_net->notify_list);
mutex_init(&nft_net->commit_mutex);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index ae7146475d17..c9fbe0f707b5 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -533,7 +533,8 @@ ack:
* processed, this avoids that the same error is
* reported several times when replaying the batch.
*/
- if (nfnl_err_add(&err_list, nlh, err, &extack) < 0) {
+ if (err == -ENOMEM ||
+ nfnl_err_add(&err_list, nlh, err, &extack) < 0) {
/* We failed to enqueue an error, reset the
* list of errors and send OOM to userspace
* pointing to the batch header.
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index ee6840bd5933..8f1bfa6ccc2d 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -439,3 +439,4 @@ module_init(nfnl_osf_init);
module_exit(nfnl_osf_fini);
MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index 84eae7cabc67..2527a01486ef 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -323,7 +323,7 @@ static bool nft_bitwise_reduce(struct nft_regs_track *track,
dreg = priv->dreg;
regcount = DIV_ROUND_UP(priv->len, NFT_REG32_SIZE);
for (i = 0; i < regcount; i++, dreg++)
- track->regs[priv->dreg].bitwise = expr;
+ track->regs[dreg].bitwise = expr;
return false;
}
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index c9d2f7c29f53..3d76ebfe8939 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -76,11 +76,9 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
switch (priv->data.verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
- if (nft_chain_is_bound(chain)) {
- err = -EBUSY;
- goto err1;
- }
- chain->bound = true;
+ err = nf_tables_bind_chain(ctx, chain);
+ if (err < 0)
+ return err;
break;
default:
break;
@@ -98,6 +96,31 @@ static void nft_immediate_activate(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+ const struct nft_data *data = &priv->data;
+ struct nft_ctx chain_ctx;
+ struct nft_chain *chain;
+ struct nft_rule *rule;
+
+ if (priv->dreg == NFT_REG_VERDICT) {
+ switch (data->verdict.code) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ chain = data->verdict.chain;
+ if (!nft_chain_binding(chain))
+ break;
+
+ chain_ctx = *ctx;
+ chain_ctx.chain = chain;
+
+ list_for_each_entry(rule, &chain->rules, list)
+ nft_rule_expr_activate(&chain_ctx, rule);
+
+ nft_clear(ctx->net, chain);
+ break;
+ default:
+ break;
+ }
+ }
return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg));
}
@@ -107,6 +130,43 @@ static void nft_immediate_deactivate(const struct nft_ctx *ctx,
enum nft_trans_phase phase)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+ const struct nft_data *data = &priv->data;
+ struct nft_ctx chain_ctx;
+ struct nft_chain *chain;
+ struct nft_rule *rule;
+
+ if (priv->dreg == NFT_REG_VERDICT) {
+ switch (data->verdict.code) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ chain = data->verdict.chain;
+ if (!nft_chain_binding(chain))
+ break;
+
+ chain_ctx = *ctx;
+ chain_ctx.chain = chain;
+
+ list_for_each_entry(rule, &chain->rules, list)
+ nft_rule_expr_deactivate(&chain_ctx, rule, phase);
+
+ switch (phase) {
+ case NFT_TRANS_PREPARE_ERROR:
+ nf_tables_unbind_chain(ctx, chain);
+ fallthrough;
+ case NFT_TRANS_PREPARE:
+ nft_deactivate_next(ctx->net, chain);
+ break;
+ default:
+ nft_chain_del(chain);
+ chain->bound = false;
+ chain->table->use--;
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ }
if (phase == NFT_TRANS_COMMIT)
return;
@@ -131,15 +191,27 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx,
case NFT_GOTO:
chain = data->verdict.chain;
- if (!nft_chain_is_bound(chain))
+ if (!nft_chain_binding(chain))
+ break;
+
+ /* Rule construction failed, but chain is already bound:
+ * let the transaction records release this chain and its rules.
+ */
+ if (chain->bound) {
+ chain->use--;
break;
+ }
+ /* Rule has been deleted, release chain and its rules. */
chain_ctx = *ctx;
chain_ctx.chain = chain;
- list_for_each_entry_safe(rule, n, &chain->rules, list)
- nf_tables_rule_release(&chain_ctx, rule);
-
+ chain->use--;
+ list_for_each_entry_safe(rule, n, &chain->rules, list) {
+ chain->use--;
+ list_del(&rule->list);
+ nf_tables_rule_destroy(&chain_ctx, rule);
+ }
nf_tables_chain_destroy(&chain_ctx);
break;
default:
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 96081ac8d2b4..1e5e7a181e0b 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -271,13 +271,14 @@ static int nft_bitmap_init(const struct nft_set *set,
return 0;
}
-static void nft_bitmap_destroy(const struct nft_set *set)
+static void nft_bitmap_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_bitmap *priv = nft_set_priv(set);
struct nft_bitmap_elem *be, *n;
list_for_each_entry_safe(be, n, &priv->list, head)
- nft_set_elem_destroy(set, be, true);
+ nf_tables_set_elem_destroy(ctx, set, be);
}
static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 76de6c8d9865..0b73cb0e752f 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -400,19 +400,31 @@ static int nft_rhash_init(const struct nft_set *set,
return 0;
}
+struct nft_rhash_ctx {
+ const struct nft_ctx ctx;
+ const struct nft_set *set;
+};
+
static void nft_rhash_elem_destroy(void *ptr, void *arg)
{
- nft_set_elem_destroy(arg, ptr, true);
+ struct nft_rhash_ctx *rhash_ctx = arg;
+
+ nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, ptr);
}
-static void nft_rhash_destroy(const struct nft_set *set)
+static void nft_rhash_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_rhash *priv = nft_set_priv(set);
+ struct nft_rhash_ctx rhash_ctx = {
+ .ctx = *ctx,
+ .set = set,
+ };
cancel_delayed_work_sync(&priv->gc_work);
rcu_barrier();
rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
- (void *)set);
+ (void *)&rhash_ctx);
}
/* Number of buckets is stored in u32, so cap our result to 1U<<31 */
@@ -643,7 +655,8 @@ static int nft_hash_init(const struct nft_set *set,
return 0;
}
-static void nft_hash_destroy(const struct nft_set *set)
+static void nft_hash_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_hash *priv = nft_set_priv(set);
struct nft_hash_elem *he;
@@ -653,7 +666,7 @@ static void nft_hash_destroy(const struct nft_set *set)
for (i = 0; i < priv->buckets; i++) {
hlist_for_each_entry_safe(he, next, &priv->table[i], node) {
hlist_del_rcu(&he->node);
- nft_set_elem_destroy(set, he, true);
+ nf_tables_set_elem_destroy(ctx, set, he);
}
}
}
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 06d46d182634..0452ee586c1c 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1600,17 +1600,10 @@ static void pipapo_free_fields(struct nft_pipapo_match *m)
}
}
-/**
- * pipapo_reclaim_match - RCU callback to free fields from old matching data
- * @rcu: RCU head
- */
-static void pipapo_reclaim_match(struct rcu_head *rcu)
+static void pipapo_free_match(struct nft_pipapo_match *m)
{
- struct nft_pipapo_match *m;
int i;
- m = container_of(rcu, struct nft_pipapo_match, rcu);
-
for_each_possible_cpu(i)
kfree(*per_cpu_ptr(m->scratch, i));
@@ -1625,7 +1618,19 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
}
/**
- * pipapo_commit() - Replace lookup data with current working copy
+ * pipapo_reclaim_match - RCU callback to free fields from old matching data
+ * @rcu: RCU head
+ */
+static void pipapo_reclaim_match(struct rcu_head *rcu)
+{
+ struct nft_pipapo_match *m;
+
+ m = container_of(rcu, struct nft_pipapo_match, rcu);
+ pipapo_free_match(m);
+}
+
+/**
+ * nft_pipapo_commit() - Replace lookup data with current working copy
* @set: nftables API set representation
*
* While at it, check if we should perform garbage collection on the working
@@ -1635,7 +1640,7 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
* We also need to create a new working copy for subsequent insertions and
* deletions.
*/
-static void pipapo_commit(const struct nft_set *set)
+static void nft_pipapo_commit(const struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *new_clone, *old;
@@ -1660,6 +1665,26 @@ static void pipapo_commit(const struct nft_set *set)
priv->clone = new_clone;
}
+static void nft_pipapo_abort(const struct nft_set *set)
+{
+ struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_pipapo_match *new_clone, *m;
+
+ if (!priv->dirty)
+ return;
+
+ m = rcu_dereference(priv->match);
+
+ new_clone = pipapo_clone(m);
+ if (IS_ERR(new_clone))
+ return;
+
+ priv->dirty = false;
+
+ pipapo_free_match(priv->clone);
+ priv->clone = new_clone;
+}
+
/**
* nft_pipapo_activate() - Mark element reference as active given key, commit
* @net: Network namespace
@@ -1667,8 +1692,7 @@ static void pipapo_commit(const struct nft_set *set)
* @elem: nftables API element representation containing key data
*
* On insertion, elements are added to a copy of the matching data currently
- * in use for lookups, and not directly inserted into current lookup data, so
- * we'll take care of that by calling pipapo_commit() here. Both
+ * in use for lookups, and not directly inserted into current lookup data. Both
* nft_pipapo_insert() and nft_pipapo_activate() are called once for each
* element, hence we can't purpose either one as a real commit operation.
*/
@@ -1684,8 +1708,6 @@ static void nft_pipapo_activate(const struct net *net,
nft_set_elem_change_active(net, set, &e->ext);
nft_set_elem_clear_busy(&e->ext);
-
- pipapo_commit(set);
}
/**
@@ -1931,7 +1953,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
if (i == m->field_count) {
priv->dirty = true;
pipapo_drop(m, rulemap);
- pipapo_commit(set);
return;
}
@@ -1953,12 +1974,16 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_iter *iter)
{
struct nft_pipapo *priv = nft_set_priv(set);
+ struct net *net = read_pnet(&set->net);
struct nft_pipapo_match *m;
struct nft_pipapo_field *f;
int i, r;
rcu_read_lock();
- m = rcu_dereference(priv->match);
+ if (iter->genmask == nft_genmask_cur(net))
+ m = rcu_dereference(priv->match);
+ else
+ m = priv->clone;
if (unlikely(!m))
goto out;
@@ -2127,10 +2152,12 @@ out_scratch:
/**
* nft_set_pipapo_match_destroy() - Destroy elements from key mapping array
+ * @ctx: context
* @set: nftables API set representation
* @m: matching data pointing to key mapping array
*/
-static void nft_set_pipapo_match_destroy(const struct nft_set *set,
+static void nft_set_pipapo_match_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set,
struct nft_pipapo_match *m)
{
struct nft_pipapo_field *f;
@@ -2147,15 +2174,17 @@ static void nft_set_pipapo_match_destroy(const struct nft_set *set,
e = f->mt[r].e;
- nft_set_elem_destroy(set, e, true);
+ nf_tables_set_elem_destroy(ctx, set, e);
}
}
/**
* nft_pipapo_destroy() - Free private data for set and all committed elements
+ * @ctx: context
* @set: nftables API set representation
*/
-static void nft_pipapo_destroy(const struct nft_set *set)
+static void nft_pipapo_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m;
@@ -2165,7 +2194,7 @@ static void nft_pipapo_destroy(const struct nft_set *set)
if (m) {
rcu_barrier();
- nft_set_pipapo_match_destroy(set, m);
+ nft_set_pipapo_match_destroy(ctx, set, m);
#ifdef NFT_PIPAPO_ALIGN
free_percpu(m->scratch_aligned);
@@ -2182,7 +2211,7 @@ static void nft_pipapo_destroy(const struct nft_set *set)
m = priv->clone;
if (priv->dirty)
- nft_set_pipapo_match_destroy(set, m);
+ nft_set_pipapo_match_destroy(ctx, set, m);
#ifdef NFT_PIPAPO_ALIGN
free_percpu(priv->clone->scratch_aligned);
@@ -2230,6 +2259,8 @@ const struct nft_set_type nft_set_pipapo_type = {
.init = nft_pipapo_init,
.destroy = nft_pipapo_destroy,
.gc_init = nft_pipapo_gc_init,
+ .commit = nft_pipapo_commit,
+ .abort = nft_pipapo_abort,
.elemsize = offsetof(struct nft_pipapo_elem, ext),
},
};
@@ -2252,6 +2283,8 @@ const struct nft_set_type nft_set_pipapo_avx2_type = {
.init = nft_pipapo_init,
.destroy = nft_pipapo_destroy,
.gc_init = nft_pipapo_gc_init,
+ .commit = nft_pipapo_commit,
+ .abort = nft_pipapo_abort,
.elemsize = offsetof(struct nft_pipapo_elem, ext),
},
};
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 19ea4d3c3553..5c05c9b990fb 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -221,7 +221,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
{
struct nft_set *set = (struct nft_set *)__set;
struct rb_node *prev = rb_prev(&rbe->node);
- struct nft_rbtree_elem *rbe_prev;
+ struct nft_rbtree_elem *rbe_prev = NULL;
struct nft_set_gc_batch *gcb;
gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC);
@@ -229,17 +229,21 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
return -ENOMEM;
/* search for expired end interval coming before this element. */
- do {
+ while (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
if (nft_rbtree_interval_end(rbe_prev))
break;
prev = rb_prev(prev);
- } while (prev != NULL);
+ }
+
+ if (rbe_prev) {
+ rb_erase(&rbe_prev->node, &priv->root);
+ atomic_dec(&set->nelems);
+ }
- rb_erase(&rbe_prev->node, &priv->root);
rb_erase(&rbe->node, &priv->root);
- atomic_sub(2, &set->nelems);
+ atomic_dec(&set->nelems);
nft_set_gc_batch_add(gcb, rbe);
nft_set_gc_batch_complete(gcb);
@@ -268,7 +272,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_set_ext **ext)
{
struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
- struct rb_node *node, *parent, **p, *first = NULL;
+ struct rb_node *node, *next, *parent, **p, *first = NULL;
struct nft_rbtree *priv = nft_set_priv(set);
u8 genmask = nft_genmask_next(net);
int d, err;
@@ -307,7 +311,9 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
* Values stored in the tree are in reversed order, starting from
* highest to lowest value.
*/
- for (node = first; node != NULL; node = rb_next(node)) {
+ for (node = first; node != NULL; node = next) {
+ next = rb_next(node);
+
rbe = rb_entry(node, struct nft_rbtree_elem, node);
if (!nft_set_elem_active(&rbe->ext, genmask))
@@ -658,7 +664,8 @@ static int nft_rbtree_init(const struct nft_set *set,
return 0;
}
-static void nft_rbtree_destroy(const struct nft_set *set)
+static void nft_rbtree_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe;
@@ -669,7 +676,7 @@ static void nft_rbtree_destroy(const struct nft_set *set)
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
- nft_set_elem_destroy(set, rbe, true);
+ nf_tables_set_elem_destroy(ctx, set, rbe);
}
}
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index e1990baf3a3b..dc9485854002 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -71,4 +71,3 @@ MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
MODULE_DESCRIPTION("Passive OS fingerprint matching.");
MODULE_ALIAS("ipt_osf");
MODULE_ALIAS("ip6t_osf");
-MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 54c083003947..27511c90a26f 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -857,7 +857,8 @@ int netlbl_catmap_setlong(struct netlbl_lsm_catmap **catmap,
offset -= iter->startbit;
idx = offset / NETLBL_CATMAP_MAPSIZE;
- iter->bitmap[idx] |= bitmap << (offset % NETLBL_CATMAP_MAPSIZE);
+ iter->bitmap[idx] |= (NETLBL_CATMAP_MAPTYPE)bitmap
+ << (offset % NETLBL_CATMAP_MAPSIZE);
return 0;
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c87804112d0c..3a1e0fd5bf14 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1779,7 +1779,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
break;
}
}
- if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen))
+ if (put_user(ALIGN(BITS_TO_BYTES(nlk->ngroups), sizeof(u32)), optlen))
err = -EFAULT;
netlink_unlock_table();
return err;
diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c
index 3f99b432ea70..e2d2af924cff 100644
--- a/net/netrom/nr_subr.c
+++ b/net/netrom/nr_subr.c
@@ -123,7 +123,7 @@ void nr_write_internal(struct sock *sk, int frametype)
unsigned char *dptr;
int len, timeout;
- len = NR_NETWORK_LEN + NR_TRANSPORT_LEN;
+ len = NR_TRANSPORT_LEN;
switch (frametype & 0x0F) {
case NR_CONNREQ:
@@ -141,7 +141,8 @@ void nr_write_internal(struct sock *sk, int frametype)
return;
}
- if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL)
+ skb = alloc_skb(NR_NETWORK_LEN + len, GFP_ATOMIC);
+ if (!skb)
return;
/*
@@ -149,7 +150,7 @@ void nr_write_internal(struct sock *sk, int frametype)
*/
skb_reserve(skb, NR_NETWORK_LEN);
- dptr = skb_put(skb, skb_tailroom(skb));
+ dptr = skb_put(skb, len);
switch (frametype & 0x0F) {
case NR_CONNREQ:
diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
index e9ca007718b7..0f23e5e8e03e 100644
--- a/net/nsh/nsh.c
+++ b/net/nsh/nsh.c
@@ -77,13 +77,12 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
+ u16 mac_offset = skb->mac_header;
unsigned int nsh_len, mac_len;
__be16 proto;
- int nhoff;
skb_reset_network_header(skb);
- nhoff = skb->network_header - skb->mac_header;
mac_len = skb->mac_len;
if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN)))
@@ -108,15 +107,14 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
segs = skb_mac_gso_segment(skb, features);
if (IS_ERR_OR_NULL(segs)) {
skb_gso_error_unwind(skb, htons(ETH_P_NSH), nsh_len,
- skb->network_header - nhoff,
- mac_len);
+ mac_offset, mac_len);
goto out;
}
for (skb = segs; skb; skb = skb->next) {
skb->protocol = htons(ETH_P_NSH);
__skb_push(skb, nsh_len);
- skb_set_mac_header(skb, -nhoff);
+ skb->mac_header = mac_offset;
skb->network_header = skb->mac_header + mac_len;
skb->mac_len = mac_len;
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index fcee6012293b..58f530f60172 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -236,9 +236,6 @@ void ovs_dp_detach_port(struct vport *p)
/* First drop references to device. */
hlist_del_rcu(&p->dp_hash_node);
- /* Free percpu memory */
- free_percpu(p->upcall_stats);
-
/* Then destroy it. */
ovs_vport_del(p);
}
@@ -1858,12 +1855,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
goto err_destroy_portids;
}
- vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
- if (!vport->upcall_stats) {
- err = -ENOMEM;
- goto err_destroy_vport;
- }
-
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
info->snd_seq, 0, OVS_DP_CMD_NEW);
BUG_ON(err < 0);
@@ -1876,8 +1867,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_notify(&dp_datapath_genl_family, reply, info);
return 0;
-err_destroy_vport:
- ovs_dp_detach_port(vport);
err_destroy_portids:
kfree(rcu_dereference_raw(dp->upcall_portids));
err_unlock_and_destroy_meters:
@@ -2322,12 +2311,6 @@ restart:
goto exit_unlock_free;
}
- vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
- if (!vport->upcall_stats) {
- err = -ENOMEM;
- goto exit_unlock_free_vport;
- }
-
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
OVS_VPORT_CMD_NEW, GFP_KERNEL);
@@ -2345,8 +2328,6 @@ restart:
ovs_notify(&dp_vport_genl_family, reply, info);
return 0;
-exit_unlock_free_vport:
- ovs_dp_detach_port(vport);
exit_unlock_free:
ovs_unlock();
kfree_skb(reply);
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 7e0f5c45b512..972ae01a70f7 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -124,6 +124,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
{
struct vport *vport;
size_t alloc_size;
+ int err;
alloc_size = sizeof(struct vport);
if (priv_size) {
@@ -135,17 +136,29 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
if (!vport)
return ERR_PTR(-ENOMEM);
+ vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
+ if (!vport->upcall_stats) {
+ err = -ENOMEM;
+ goto err_kfree_vport;
+ }
+
vport->dp = parms->dp;
vport->port_no = parms->port_no;
vport->ops = ops;
INIT_HLIST_NODE(&vport->dp_hash_node);
if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) {
- kfree(vport);
- return ERR_PTR(-EINVAL);
+ err = -EINVAL;
+ goto err_free_percpu;
}
return vport;
+
+err_free_percpu:
+ free_percpu(vport->upcall_stats);
+err_kfree_vport:
+ kfree(vport);
+ return ERR_PTR(err);
}
EXPORT_SYMBOL_GPL(ovs_vport_alloc);
@@ -165,6 +178,7 @@ void ovs_vport_free(struct vport *vport)
* it is safe to use raw dereference.
*/
kfree(rcu_dereference_raw(vport->upcall_portids));
+ free_percpu(vport->upcall_stats);
kfree(vport);
}
EXPORT_SYMBOL_GPL(ovs_vport_free);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 94c6a1ffa459..a2dbeb264f26 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3201,6 +3201,9 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
lock_sock(sk);
spin_lock(&po->bind_lock);
+ if (!proto)
+ proto = po->num;
+
rcu_read_lock();
if (po->fanout) {
@@ -3299,7 +3302,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data_min));
name[sizeof(uaddr->sa_data_min)] = 0;
- return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
+ return packet_do_bind(sk, name, 0, 0);
}
static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
@@ -3316,8 +3319,7 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len
if (sll->sll_family != AF_PACKET)
return -EINVAL;
- return packet_do_bind(sk, NULL, sll->sll_ifindex,
- sll->sll_protocol ? : pkt_sk(sk)->num);
+ return packet_do_bind(sk, NULL, sll->sll_ifindex, sll->sll_protocol);
}
static struct proto packet_proto = {
diff --git a/net/packet/diag.c b/net/packet/diag.c
index d0c4eda4cdc6..f6b200cb3c06 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -143,7 +143,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
rp = nlmsg_data(nlh);
rp->pdiag_family = AF_PACKET;
rp->pdiag_type = sk->sk_type;
- rp->pdiag_num = ntohs(po->num);
+ rp->pdiag_num = ntohs(READ_ONCE(po->num));
rp->pdiag_ino = sk_ino;
sock_diag_save_cookie(sk, rp->pdiag_cookie);
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 31f738d65f1c..da0b3b5157d5 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -980,6 +980,7 @@ static int __init af_rxrpc_init(void)
BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > sizeof_field(struct sk_buff, cb));
ret = -ENOMEM;
+ rxrpc_gen_version_string();
rxrpc_call_jar = kmem_cache_create(
"rxrpc_call_jar", sizeof(struct rxrpc_call), 0,
SLAB_HWCACHE_ALIGN, NULL);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 5d44dc08f66d..e8e14c6f904d 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -1068,6 +1068,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time64_t,
/*
* local_event.c
*/
+void rxrpc_gen_version_string(void);
void rxrpc_send_version_request(struct rxrpc_local *local,
struct rxrpc_host_header *hdr,
struct sk_buff *skb);
diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c
index 5e69ea6b233d..993c69f97488 100644
--- a/net/rxrpc/local_event.c
+++ b/net/rxrpc/local_event.c
@@ -16,7 +16,16 @@
#include <generated/utsrelease.h>
#include "ar-internal.h"
-static const char rxrpc_version_string[65] = "linux-" UTS_RELEASE " AF_RXRPC";
+static char rxrpc_version_string[65]; // "linux-" UTS_RELEASE " AF_RXRPC";
+
+/*
+ * Generate the VERSION packet string.
+ */
+void rxrpc_gen_version_string(void)
+{
+ snprintf(rxrpc_version_string, sizeof(rxrpc_version_string),
+ "linux-%.49s AF_RXRPC", UTS_RELEASE);
+}
/*
* Reply to a version request
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 9cc0bc7c71ed..abc71a06d634 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -610,6 +610,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
struct flow_offload_tuple tuple = {};
enum ip_conntrack_info ctinfo;
struct tcphdr *tcph = NULL;
+ bool force_refresh = false;
struct flow_offload *flow;
struct nf_conn *ct;
u8 dir;
@@ -647,6 +648,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
* established state, then don't refresh.
*/
return false;
+ force_refresh = true;
}
if (tcph && (unlikely(tcph->fin || tcph->rst))) {
@@ -660,7 +662,12 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
else
ctinfo = IP_CT_ESTABLISHED_REPLY;
- flow_offload_refresh(nf_ft, flow);
+ flow_offload_refresh(nf_ft, flow, force_refresh);
+ if (!test_bit(IPS_ASSURED_BIT, &ct->status)) {
+ /* Process this flow in SW to allow promoting to ASSURED */
+ return false;
+ }
+
nf_conntrack_get(&ct->ct_general);
nf_ct_set(skb, ct, ctinfo);
if (nf_ft->flags & NF_FLOWTABLE_COUNTER)
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index fc945c7e4123..c819b812a899 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -13,7 +13,10 @@
#include <linux/rtnetlink.h>
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
#include <linux/slab.h>
+#include <net/ipv6.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <linux/tc_act/tc_pedit.h>
@@ -327,28 +330,58 @@ static bool offset_valid(struct sk_buff *skb, int offset)
return true;
}
-static void pedit_skb_hdr_offset(struct sk_buff *skb,
+static int pedit_l4_skb_offset(struct sk_buff *skb, int *hoffset, const int header_type)
+{
+ const int noff = skb_network_offset(skb);
+ int ret = -EINVAL;
+ struct iphdr _iph;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP): {
+ const struct iphdr *iph = skb_header_pointer(skb, noff, sizeof(_iph), &_iph);
+
+ if (!iph)
+ goto out;
+ *hoffset = noff + iph->ihl * 4;
+ ret = 0;
+ break;
+ }
+ case htons(ETH_P_IPV6):
+ ret = ipv6_find_hdr(skb, hoffset, header_type, NULL, NULL) == header_type ? 0 : -EINVAL;
+ break;
+ }
+out:
+ return ret;
+}
+
+static int pedit_skb_hdr_offset(struct sk_buff *skb,
enum pedit_header_type htype, int *hoffset)
{
+ int ret = -EINVAL;
/* 'htype' is validated in the netlink parsing */
switch (htype) {
case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH:
- if (skb_mac_header_was_set(skb))
+ if (skb_mac_header_was_set(skb)) {
*hoffset = skb_mac_offset(skb);
+ ret = 0;
+ }
break;
case TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK:
case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4:
case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6:
*hoffset = skb_network_offset(skb);
+ ret = 0;
break;
case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP:
+ ret = pedit_l4_skb_offset(skb, hoffset, IPPROTO_TCP);
+ break;
case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP:
- if (skb_transport_header_was_set(skb))
- *hoffset = skb_transport_offset(skb);
+ ret = pedit_l4_skb_offset(skb, hoffset, IPPROTO_UDP);
break;
default:
break;
}
+ return ret;
}
TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
@@ -384,6 +417,7 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
int hoffset = 0;
u32 *ptr, hdata;
u32 val;
+ int rc;
if (tkey_ex) {
htype = tkey_ex->htype;
@@ -392,7 +426,11 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
tkey_ex++;
}
- pedit_skb_hdr_offset(skb, htype, &hoffset);
+ rc = pedit_skb_hdr_offset(skb, htype, &hoffset);
+ if (rc) {
+ pr_info_ratelimited("tc action pedit unable to extract header offset for header type (0x%x)\n", htype);
+ goto bad;
+ }
if (tkey->offmask) {
u8 *d, _d;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 227cba58ce9f..2e9dce03d1ec 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -357,23 +357,23 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a,
opt.burst = PSCHED_NS2TICKS(p->tcfp_burst);
if (p->rate_present) {
psched_ratecfg_getrate(&opt.rate, &p->rate);
- if ((police->params->rate.rate_bytes_ps >= (1ULL << 32)) &&
+ if ((p->rate.rate_bytes_ps >= (1ULL << 32)) &&
nla_put_u64_64bit(skb, TCA_POLICE_RATE64,
- police->params->rate.rate_bytes_ps,
+ p->rate.rate_bytes_ps,
TCA_POLICE_PAD))
goto nla_put_failure;
}
if (p->peak_present) {
psched_ratecfg_getrate(&opt.peakrate, &p->peak);
- if ((police->params->peak.rate_bytes_ps >= (1ULL << 32)) &&
+ if ((p->peak.rate_bytes_ps >= (1ULL << 32)) &&
nla_put_u64_64bit(skb, TCA_POLICE_PEAKRATE64,
- police->params->peak.rate_bytes_ps,
+ p->peak.rate_bytes_ps,
TCA_POLICE_PAD))
goto nla_put_failure;
}
if (p->pps_present) {
if (nla_put_u64_64bit(skb, TCA_POLICE_PKTRATE64,
- police->params->ppsrate.rate_pkts_ps,
+ p->ppsrate.rate_pkts_ps,
TCA_POLICE_PAD))
goto nla_put_failure;
if (nla_put_u64_64bit(skb, TCA_POLICE_PKTBURST64,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 2621550bfddc..a193cc7b3241 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -43,8 +43,6 @@
#include <net/flow_offload.h>
#include <net/tc_wrapper.h>
-extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
-
/* The list of all installed classifier types */
static LIST_HEAD(tcf_proto_base);
@@ -659,8 +657,8 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
{
struct tcf_block *block = chain->block;
const struct tcf_proto_ops *tmplt_ops;
+ unsigned int refcnt, non_act_refcnt;
bool free_block = false;
- unsigned int refcnt;
void *tmplt_priv;
mutex_lock(&block->lock);
@@ -680,13 +678,15 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
* save these to temporary variables.
*/
refcnt = --chain->refcnt;
+ non_act_refcnt = refcnt - chain->action_refcnt;
tmplt_ops = chain->tmplt_ops;
tmplt_priv = chain->tmplt_priv;
- /* The last dropped non-action reference will trigger notification. */
- if (refcnt - chain->action_refcnt == 0 && !by_act) {
- tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index,
- block, NULL, 0, 0, false);
+ if (non_act_refcnt == chain->explicitly_created && !by_act) {
+ if (non_act_refcnt == 0)
+ tc_chain_notify_delete(tmplt_ops, tmplt_priv,
+ chain->index, block, NULL, 0, 0,
+ false);
/* Last reference to chain, no need to lock. */
chain->flushing = false;
}
@@ -2952,6 +2952,7 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
return PTR_ERR(ops);
if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
+ module_put(ops->owner);
return -EOPNOTSUPP;
}
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 9dbc43388e57..815c3e416bc5 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1153,6 +1153,9 @@ static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key,
if (option_len > sizeof(struct geneve_opt))
data_len = option_len - sizeof(struct geneve_opt);
+ if (key->enc_opts.len > FLOW_DIS_TUN_OPTS_MAX - 4)
+ return -ERANGE;
+
opt = (struct geneve_opt *)&key->enc_opts.data[key->enc_opts.len];
memset(opt, 0xff, option_len);
opt->length = data_len / 4;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 4e2e269f121f..d15d50de7980 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -718,13 +718,19 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
struct nlattr *est, u32 flags, u32 fl_flags,
struct netlink_ext_ack *extack)
{
- int err;
+ int err, ifindex = -1;
err = tcf_exts_validate_ex(net, tp, tb, est, &n->exts, flags,
fl_flags, extack);
if (err < 0)
return err;
+ if (tb[TCA_U32_INDEV]) {
+ ifindex = tcf_change_indev(net, tb[TCA_U32_INDEV], extack);
+ if (ifindex < 0)
+ return -EINVAL;
+ }
+
if (tb[TCA_U32_LINK]) {
u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
struct tc_u_hnode *ht_down = NULL, *ht_old;
@@ -759,13 +765,9 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
tcf_bind_filter(tp, &n->res, base);
}
- if (tb[TCA_U32_INDEV]) {
- int ret;
- ret = tcf_change_indev(net, tb[TCA_U32_INDEV], extack);
- if (ret < 0)
- return -EINVAL;
- n->ifindex = ret;
- }
+ if (ifindex >= 0)
+ n->ifindex = ifindex;
+
return 0;
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index fdb8f429333d..aa6b1fe65151 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -309,7 +309,7 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
if (dev_ingress_queue(dev))
q = qdisc_match_from_root(
- dev_ingress_queue(dev)->qdisc_sleeping,
+ rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping),
handle);
out:
return q;
@@ -328,7 +328,8 @@ struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
nq = dev_ingress_queue_rcu(dev);
if (nq)
- q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
+ q = qdisc_match_from_root(rcu_dereference(nq->qdisc_sleeping),
+ handle);
out:
return q;
}
@@ -634,8 +635,13 @@ EXPORT_SYMBOL(qdisc_watchdog_init);
void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
u64 delta_ns)
{
- if (test_bit(__QDISC_STATE_DEACTIVATED,
- &qdisc_root_sleeping(wd->qdisc)->state))
+ bool deactivated;
+
+ rcu_read_lock();
+ deactivated = test_bit(__QDISC_STATE_DEACTIVATED,
+ &qdisc_root_sleeping(wd->qdisc)->state);
+ rcu_read_unlock();
+ if (deactivated)
return;
if (hrtimer_is_queued(&wd->timer)) {
@@ -1073,17 +1079,29 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
if (parent == NULL) {
unsigned int i, num_q, ingress;
+ struct netdev_queue *dev_queue;
ingress = 0;
num_q = dev->num_tx_queues;
if ((q && q->flags & TCQ_F_INGRESS) ||
(new && new->flags & TCQ_F_INGRESS)) {
- num_q = 1;
ingress = 1;
- if (!dev_ingress_queue(dev)) {
+ dev_queue = dev_ingress_queue(dev);
+ if (!dev_queue) {
NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
return -ENOENT;
}
+
+ q = rtnl_dereference(dev_queue->qdisc_sleeping);
+
+ /* This is the counterpart of that qdisc_refcount_inc_nz() call in
+ * __tcf_qdisc_find() for filter requests.
+ */
+ if (!qdisc_refcount_dec_if_one(q)) {
+ NL_SET_ERR_MSG(extack,
+ "Current ingress or clsact Qdisc has ongoing filter requests");
+ return -EBUSY;
+ }
}
if (dev->flags & IFF_UP)
@@ -1094,18 +1112,26 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
if (new && new->ops->attach && !ingress)
goto skip;
- for (i = 0; i < num_q; i++) {
- struct netdev_queue *dev_queue = dev_ingress_queue(dev);
-
- if (!ingress)
+ if (!ingress) {
+ for (i = 0; i < num_q; i++) {
dev_queue = netdev_get_tx_queue(dev, i);
+ old = dev_graft_qdisc(dev_queue, new);
- old = dev_graft_qdisc(dev_queue, new);
- if (new && i > 0)
- qdisc_refcount_inc(new);
-
- if (!ingress)
+ if (new && i > 0)
+ qdisc_refcount_inc(new);
qdisc_put(old);
+ }
+ } else {
+ old = dev_graft_qdisc(dev_queue, NULL);
+
+ /* {ingress,clsact}_destroy() @old before grafting @new to avoid
+ * unprotected concurrent accesses to net_device::miniq_{in,e}gress
+ * pointer(s) in mini_qdisc_pair_swap().
+ */
+ qdisc_notify(net, skb, n, classid, old, new, extack);
+ qdisc_destroy(old);
+
+ dev_graft_qdisc(dev_queue, new);
}
skip:
@@ -1119,8 +1145,6 @@ skip:
if (new && new->ops->attach)
new->ops->attach(new);
- } else {
- notify_and_destroy(net, skb, n, classid, old, new, extack);
}
if (dev->flags & IFF_UP)
@@ -1252,7 +1276,12 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
sch->parent = parent;
if (handle == TC_H_INGRESS) {
- sch->flags |= TCQ_F_INGRESS;
+ if (!(sch->flags & TCQ_F_INGRESS)) {
+ NL_SET_ERR_MSG(extack,
+ "Specified parent ID is reserved for ingress and clsact Qdiscs");
+ err = -EINVAL;
+ goto err_out3;
+ }
handle = TC_H_MAKE(TC_H_INGRESS, 0);
} else {
if (handle == 0) {
@@ -1473,7 +1502,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
}
q = qdisc_leaf(p, clid);
} else if (dev_ingress_queue(dev)) {
- q = dev_ingress_queue(dev)->qdisc_sleeping;
+ q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
}
} else {
q = rtnl_dereference(dev->qdisc);
@@ -1559,7 +1588,7 @@ replay:
}
q = qdisc_leaf(p, clid);
} else if (dev_ingress_queue_create(dev)) {
- q = dev_ingress_queue(dev)->qdisc_sleeping;
+ q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
}
} else {
q = rtnl_dereference(dev->qdisc);
@@ -1591,11 +1620,20 @@ replay:
NL_SET_ERR_MSG(extack, "Invalid qdisc name");
return -EINVAL;
}
+ if (q->flags & TCQ_F_INGRESS) {
+ NL_SET_ERR_MSG(extack,
+ "Cannot regraft ingress or clsact Qdiscs");
+ return -EINVAL;
+ }
if (q == p ||
(p && check_loop(q, p, 0))) {
NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
return -ELOOP;
}
+ if (clid == TC_H_INGRESS) {
+ NL_SET_ERR_MSG(extack, "Ingress cannot graft directly");
+ return -EINVAL;
+ }
qdisc_refcount_inc(q);
goto graft;
} else {
@@ -1791,8 +1829,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
dev_queue = dev_ingress_queue(dev);
if (dev_queue &&
- tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
- &q_idx, s_q_idx, false,
+ tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping),
+ skb, cb, &q_idx, s_q_idx, false,
tca[TCA_DUMP_INVISIBLE]) < 0)
goto done;
@@ -2235,8 +2273,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
dev_queue = dev_ingress_queue(dev);
if (dev_queue &&
- tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
- &t, s_t, false) < 0)
+ tc_dump_tclass_root(rtnl_dereference(dev_queue->qdisc_sleeping),
+ skb, tcm, cb, &t, s_t, false) < 0)
goto done;
done:
@@ -2288,7 +2326,9 @@ static struct pernet_operations psched_net_ops = {
.exit = psched_net_exit,
};
+#if IS_ENABLED(CONFIG_RETPOLINE)
DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper);
+#endif
static int __init pktsched_init(void)
{
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index 6980796d435d..591d87d5e5c0 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -201,6 +201,11 @@ out:
return NET_XMIT_CN;
}
+static struct netlink_range_validation fq_pie_q_range = {
+ .min = 1,
+ .max = 1 << 20,
+};
+
static const struct nla_policy fq_pie_policy[TCA_FQ_PIE_MAX + 1] = {
[TCA_FQ_PIE_LIMIT] = {.type = NLA_U32},
[TCA_FQ_PIE_FLOWS] = {.type = NLA_U32},
@@ -208,7 +213,8 @@ static const struct nla_policy fq_pie_policy[TCA_FQ_PIE_MAX + 1] = {
[TCA_FQ_PIE_TUPDATE] = {.type = NLA_U32},
[TCA_FQ_PIE_ALPHA] = {.type = NLA_U32},
[TCA_FQ_PIE_BETA] = {.type = NLA_U32},
- [TCA_FQ_PIE_QUANTUM] = {.type = NLA_U32},
+ [TCA_FQ_PIE_QUANTUM] =
+ NLA_POLICY_FULL_RANGE(NLA_U32, &fq_pie_q_range),
[TCA_FQ_PIE_MEMORY_LIMIT] = {.type = NLA_U32},
[TCA_FQ_PIE_ECN_PROB] = {.type = NLA_U32},
[TCA_FQ_PIE_ECN] = {.type = NLA_U32},
@@ -373,6 +379,7 @@ static void fq_pie_timer(struct timer_list *t)
spinlock_t *root_lock; /* to lock qdisc for probability calculations */
u32 idx;
+ rcu_read_lock();
root_lock = qdisc_lock(qdisc_root_sleeping(sch));
spin_lock(root_lock);
@@ -385,6 +392,7 @@ static void fq_pie_timer(struct timer_list *t)
mod_timer(&q->adapt_timer, jiffies + q->p_params.tupdate);
spin_unlock(root_lock);
+ rcu_read_unlock();
}
static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 37e41f972f69..5d7e23f4cc0e 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -648,7 +648,7 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = {
static struct netdev_queue noop_netdev_queue = {
RCU_POINTER_INITIALIZER(qdisc, &noop_qdisc),
- .qdisc_sleeping = &noop_qdisc,
+ RCU_POINTER_INITIALIZER(qdisc_sleeping, &noop_qdisc),
};
struct Qdisc noop_qdisc = {
@@ -1046,7 +1046,7 @@ static void qdisc_free_cb(struct rcu_head *head)
qdisc_free(q);
}
-static void qdisc_destroy(struct Qdisc *qdisc)
+static void __qdisc_destroy(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops = qdisc->ops;
@@ -1070,6 +1070,14 @@ static void qdisc_destroy(struct Qdisc *qdisc)
call_rcu(&qdisc->rcu, qdisc_free_cb);
}
+void qdisc_destroy(struct Qdisc *qdisc)
+{
+ if (qdisc->flags & TCQ_F_BUILTIN)
+ return;
+
+ __qdisc_destroy(qdisc);
+}
+
void qdisc_put(struct Qdisc *qdisc)
{
if (!qdisc)
@@ -1079,7 +1087,7 @@ void qdisc_put(struct Qdisc *qdisc)
!refcount_dec_and_test(&qdisc->refcnt))
return;
- qdisc_destroy(qdisc);
+ __qdisc_destroy(qdisc);
}
EXPORT_SYMBOL(qdisc_put);
@@ -1094,7 +1102,7 @@ void qdisc_put_unlocked(struct Qdisc *qdisc)
!refcount_dec_and_rtnl_lock(&qdisc->refcnt))
return;
- qdisc_destroy(qdisc);
+ __qdisc_destroy(qdisc);
rtnl_unlock();
}
EXPORT_SYMBOL(qdisc_put_unlocked);
@@ -1103,7 +1111,7 @@ EXPORT_SYMBOL(qdisc_put_unlocked);
struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
struct Qdisc *qdisc)
{
- struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
+ struct Qdisc *oqdisc = rtnl_dereference(dev_queue->qdisc_sleeping);
spinlock_t *root_lock;
root_lock = qdisc_lock(oqdisc);
@@ -1112,7 +1120,7 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
/* ... and graft new one */
if (qdisc == NULL)
qdisc = &noop_qdisc;
- dev_queue->qdisc_sleeping = qdisc;
+ rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc);
rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
spin_unlock_bh(root_lock);
@@ -1125,12 +1133,12 @@ static void shutdown_scheduler_queue(struct net_device *dev,
struct netdev_queue *dev_queue,
void *_qdisc_default)
{
- struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+ struct Qdisc *qdisc = rtnl_dereference(dev_queue->qdisc_sleeping);
struct Qdisc *qdisc_default = _qdisc_default;
if (qdisc) {
rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
- dev_queue->qdisc_sleeping = qdisc_default;
+ rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc_default);
qdisc_put(qdisc);
}
@@ -1154,7 +1162,7 @@ static void attach_one_default_qdisc(struct net_device *dev,
if (!netif_is_multiqueue(dev))
qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
- dev_queue->qdisc_sleeping = qdisc;
+ rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc);
}
static void attach_default_qdiscs(struct net_device *dev)
@@ -1167,7 +1175,7 @@ static void attach_default_qdiscs(struct net_device *dev)
if (!netif_is_multiqueue(dev) ||
dev->priv_flags & IFF_NO_QUEUE) {
netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
- qdisc = txq->qdisc_sleeping;
+ qdisc = rtnl_dereference(txq->qdisc_sleeping);
rcu_assign_pointer(dev->qdisc, qdisc);
qdisc_refcount_inc(qdisc);
} else {
@@ -1186,7 +1194,7 @@ static void attach_default_qdiscs(struct net_device *dev)
netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
dev->priv_flags |= IFF_NO_QUEUE;
netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
- qdisc = txq->qdisc_sleeping;
+ qdisc = rtnl_dereference(txq->qdisc_sleeping);
rcu_assign_pointer(dev->qdisc, qdisc);
qdisc_refcount_inc(qdisc);
dev->priv_flags ^= IFF_NO_QUEUE;
@@ -1202,7 +1210,7 @@ static void transition_one_qdisc(struct net_device *dev,
struct netdev_queue *dev_queue,
void *_need_watchdog)
{
- struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
+ struct Qdisc *new_qdisc = rtnl_dereference(dev_queue->qdisc_sleeping);
int *need_watchdog_p = _need_watchdog;
if (!(new_qdisc->flags & TCQ_F_BUILTIN))
@@ -1272,7 +1280,7 @@ static void dev_reset_queue(struct net_device *dev,
struct Qdisc *qdisc;
bool nolock;
- qdisc = dev_queue->qdisc_sleeping;
+ qdisc = rtnl_dereference(dev_queue->qdisc_sleeping);
if (!qdisc)
return;
@@ -1303,7 +1311,7 @@ static bool some_qdisc_is_busy(struct net_device *dev)
int val;
dev_queue = netdev_get_tx_queue(dev, i);
- q = dev_queue->qdisc_sleeping;
+ q = rtnl_dereference(dev_queue->qdisc_sleeping);
root_lock = qdisc_lock(q);
spin_lock_bh(root_lock);
@@ -1379,7 +1387,7 @@ EXPORT_SYMBOL(dev_deactivate);
static int qdisc_change_tx_queue_len(struct net_device *dev,
struct netdev_queue *dev_queue)
{
- struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+ struct Qdisc *qdisc = rtnl_dereference(dev_queue->qdisc_sleeping);
const struct Qdisc_ops *ops = qdisc->ops;
if (ops->change_tx_queue_len)
@@ -1404,7 +1412,7 @@ void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx)
unsigned int i;
for (i = new_real_tx; i < dev->real_num_tx_queues; i++) {
- qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping;
+ qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc_sleeping);
/* Only update the default qdiscs we created,
* qdiscs with handles are always hashed.
*/
@@ -1412,7 +1420,7 @@ void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx)
qdisc_hash_del(qdisc);
}
for (i = dev->real_num_tx_queues; i < new_real_tx; i++) {
- qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping;
+ qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc_sleeping);
if (qdisc != &noop_qdisc && !qdisc->handle)
qdisc_hash_add(qdisc, false);
}
@@ -1449,7 +1457,7 @@ static void dev_init_scheduler_queue(struct net_device *dev,
struct Qdisc *qdisc = _qdisc;
rcu_assign_pointer(dev_queue->qdisc, qdisc);
- dev_queue->qdisc_sleeping = qdisc;
+ rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc);
}
void dev_init_scheduler(struct net_device *dev)
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 84838128b9c5..e43a45499372 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -80,6 +80,9 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
struct net_device *dev = qdisc_dev(sch);
int err;
+ if (sch->parent != TC_H_INGRESS)
+ return -EOPNOTSUPP;
+
net_inc_ingress_queue();
mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress);
@@ -101,6 +104,9 @@ static void ingress_destroy(struct Qdisc *sch)
{
struct ingress_sched_data *q = qdisc_priv(sch);
+ if (sch->parent != TC_H_INGRESS)
+ return;
+
tcf_block_put_ext(q->block, sch, &q->block_info);
net_dec_ingress_queue();
}
@@ -134,7 +140,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
.cl_ops = &ingress_class_ops,
.id = "ingress",
.priv_size = sizeof(struct ingress_sched_data),
- .static_flags = TCQ_F_CPUSTATS,
+ .static_flags = TCQ_F_INGRESS | TCQ_F_CPUSTATS,
.init = ingress_init,
.destroy = ingress_destroy,
.dump = ingress_dump,
@@ -219,6 +225,9 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
struct net_device *dev = qdisc_dev(sch);
int err;
+ if (sch->parent != TC_H_CLSACT)
+ return -EOPNOTSUPP;
+
net_inc_ingress_queue();
net_inc_egress_queue();
@@ -248,6 +257,9 @@ static void clsact_destroy(struct Qdisc *sch)
{
struct clsact_sched_data *q = qdisc_priv(sch);
+ if (sch->parent != TC_H_CLSACT)
+ return;
+
tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
@@ -269,7 +281,7 @@ static struct Qdisc_ops clsact_qdisc_ops __read_mostly = {
.cl_ops = &clsact_class_ops,
.id = "clsact",
.priv_size = sizeof(struct clsact_sched_data),
- .static_flags = TCQ_F_CPUSTATS,
+ .static_flags = TCQ_F_INGRESS | TCQ_F_CPUSTATS,
.init = clsact_init,
.destroy = clsact_destroy,
.dump = ingress_dump,
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index d0bc660d7401..c860119a8f09 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -141,7 +141,7 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
* qdisc totals are added at end.
*/
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
- qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
+ qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping);
spin_lock_bh(qdisc_lock(qdisc));
gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
@@ -202,7 +202,7 @@ static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl)
{
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
- return dev_queue->qdisc_sleeping;
+ return rtnl_dereference(dev_queue->qdisc_sleeping);
}
static unsigned long mq_find(struct Qdisc *sch, u32 classid)
@@ -221,7 +221,7 @@ static int mq_dump_class(struct Qdisc *sch, unsigned long cl,
tcm->tcm_parent = TC_H_ROOT;
tcm->tcm_handle |= TC_H_MIN(cl);
- tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+ tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle;
return 0;
}
@@ -230,7 +230,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
{
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
- sch = dev_queue->qdisc_sleeping;
+ sch = rtnl_dereference(dev_queue->qdisc_sleeping);
if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 ||
qdisc_qstats_copy(d, sch) < 0)
return -1;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index dc5a0ff50b14..ab69ff7577fc 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -557,7 +557,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
* qdisc totals are added at end.
*/
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
- qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
+ qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping);
spin_lock_bh(qdisc_lock(qdisc));
gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
@@ -604,7 +604,7 @@ static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl)
if (!dev_queue)
return NULL;
- return dev_queue->qdisc_sleeping;
+ return rtnl_dereference(dev_queue->qdisc_sleeping);
}
static unsigned long mqprio_find(struct Qdisc *sch, u32 classid)
@@ -637,7 +637,7 @@ static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
tcm->tcm_parent = (tc < 0) ? 0 :
TC_H_MAKE(TC_H_MAJ(sch->handle),
TC_H_MIN(tc + TC_H_MIN_PRIORITY));
- tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+ tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle;
} else {
tcm->tcm_parent = TC_H_ROOT;
tcm->tcm_info = 0;
@@ -693,7 +693,7 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
} else {
struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
- sch = dev_queue->qdisc_sleeping;
+ sch = rtnl_dereference(dev_queue->qdisc_sleeping);
if (gnet_stats_copy_basic(d, sch->cpu_bstats,
&sch->bstats, true) < 0 ||
qdisc_qstats_copy(d, sch) < 0)
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 6ef3021e1169..e79be1b3e74d 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -966,6 +966,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
if (ret < 0)
return ret;
+ sch_tree_lock(sch);
/* backup q->clg and q->loss_model */
old_clg = q->clg;
old_loss_model = q->loss_model;
@@ -974,7 +975,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
if (ret) {
q->loss_model = old_loss_model;
- return ret;
+ goto unlock;
}
} else {
q->loss_model = CLG_RANDOM;
@@ -1041,6 +1042,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
/* capping jitter to the range acceptable by tabledist() */
q->jitter = min_t(s64, abs(q->jitter), INT_MAX);
+unlock:
+ sch_tree_unlock(sch);
return ret;
get_table_failure:
@@ -1050,7 +1053,8 @@ get_table_failure:
*/
q->clg = old_clg;
q->loss_model = old_loss_model;
- return ret;
+
+ goto unlock;
}
static int netem_init(struct Qdisc *sch, struct nlattr *opt,
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 2152a56d73f8..2da6250ec346 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -421,8 +421,10 @@ static void pie_timer(struct timer_list *t)
{
struct pie_sched_data *q = from_timer(q, t, adapt_timer);
struct Qdisc *sch = q->sch;
- spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+ spinlock_t *root_lock;
+ rcu_read_lock();
+ root_lock = qdisc_lock(qdisc_root_sleeping(sch));
spin_lock(root_lock);
pie_calculate_probability(&q->params, &q->vars, sch->qstats.backlog);
@@ -430,6 +432,7 @@ static void pie_timer(struct timer_list *t)
if (q->params.tupdate)
mod_timer(&q->adapt_timer, jiffies + q->params.tupdate);
spin_unlock(root_lock);
+ rcu_read_unlock();
}
static int pie_init(struct Qdisc *sch, struct nlattr *opt,
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 98129324e157..16277b6a0238 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -321,12 +321,15 @@ static inline void red_adaptative_timer(struct timer_list *t)
{
struct red_sched_data *q = from_timer(q, t, adapt_timer);
struct Qdisc *sch = q->sch;
- spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+ spinlock_t *root_lock;
+ rcu_read_lock();
+ root_lock = qdisc_lock(qdisc_root_sleeping(sch));
spin_lock(root_lock);
red_adaptative_algo(&q->parms, &q->vars);
mod_timer(&q->adapt_timer, jiffies + HZ/2);
spin_unlock(root_lock);
+ rcu_read_unlock();
}
static int red_init(struct Qdisc *sch, struct nlattr *opt,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index abd436307d6a..66dcb18638fe 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -606,10 +606,12 @@ static void sfq_perturbation(struct timer_list *t)
{
struct sfq_sched_data *q = from_timer(q, t, perturb_timer);
struct Qdisc *sch = q->sch;
- spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+ spinlock_t *root_lock;
siphash_key_t nkey;
get_random_bytes(&nkey, sizeof(nkey));
+ rcu_read_lock();
+ root_lock = qdisc_lock(qdisc_root_sleeping(sch));
spin_lock(root_lock);
q->perturbation = nkey;
if (!q->filter_list && q->tail)
@@ -618,6 +620,7 @@ static void sfq_perturbation(struct timer_list *t)
if (q->perturb_period)
mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
+ rcu_read_unlock();
}
static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 76db9a10ef50..cf0e61ed9225 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -797,6 +797,9 @@ static struct sk_buff *taprio_dequeue_tc_priority(struct Qdisc *sch,
taprio_next_tc_txq(dev, tc, &q->cur_txq[tc]);
+ if (q->cur_txq[tc] >= dev->num_tx_queues)
+ q->cur_txq[tc] = first_txq;
+
if (skb)
return skb;
} while (q->cur_txq[tc] != first_txq);
@@ -2358,7 +2361,7 @@ static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)
if (!dev_queue)
return NULL;
- return dev_queue->qdisc_sleeping;
+ return rtnl_dereference(dev_queue->qdisc_sleeping);
}
static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
@@ -2377,7 +2380,7 @@ static int taprio_dump_class(struct Qdisc *sch, unsigned long cl,
tcm->tcm_parent = TC_H_ROOT;
tcm->tcm_handle |= TC_H_MIN(cl);
- tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+ tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle;
return 0;
}
@@ -2389,7 +2392,7 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
{
struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
- sch = dev_queue->qdisc_sleeping;
+ sch = rtnl_dereference(dev_queue->qdisc_sleeping);
if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 ||
qdisc_qstats_copy(d, sch) < 0)
return -1;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 16f9238aa51d..7721239c185f 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -297,7 +297,7 @@ restart:
struct net_device *slave = qdisc_dev(q);
struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
- if (slave_txq->qdisc_sleeping != q)
+ if (rcu_access_pointer(slave_txq->qdisc_sleeping) != q)
continue;
if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
!netif_running(slave)) {
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 7fbeb99d8d32..23d6633966b1 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1250,7 +1250,10 @@ static int sctp_side_effects(enum sctp_event_type event_type,
default:
pr_err("impossible disposition %d in state %d, event_type %d, event_id %d\n",
status, state, event_type, subtype.chunk);
- BUG();
+ error = status;
+ if (error >= 0)
+ error = -EINVAL;
+ WARN_ON_ONCE(1);
break;
}
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 97f1155a2045..08fdf1251f46 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -4482,7 +4482,7 @@ enum sctp_disposition sctp_sf_eat_auth(struct net *net,
SCTP_AUTH_NEW_KEY, GFP_ATOMIC);
if (!ev)
- return -ENOMEM;
+ return SCTP_DISPOSITION_NOMEM;
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
SCTP_ULPEVENT(ev));
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 2f66a2006517..2abe45af98e7 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -324,9 +324,12 @@ bool sctp_transport_pl_recv(struct sctp_transport *t)
t->pl.probe_size += SCTP_PL_BIG_STEP;
} else if (t->pl.state == SCTP_PL_SEARCH) {
if (!t->pl.probe_high) {
- t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP,
- SCTP_MAX_PLPMTU);
- return false;
+ if (t->pl.probe_size < SCTP_MAX_PLPMTU) {
+ t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP,
+ SCTP_MAX_PLPMTU);
+ return false;
+ }
+ t->pl.probe_high = SCTP_MAX_PLPMTU;
}
t->pl.probe_size += SCTP_PL_MIN_STEP;
if (t->pl.probe_size >= t->pl.probe_high) {
@@ -341,7 +344,7 @@ bool sctp_transport_pl_recv(struct sctp_transport *t)
} else if (t->pl.state == SCTP_PL_COMPLETE) {
/* Raise probe_size again after 30 * interval in Search Complete */
t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */
- t->pl.probe_size += SCTP_PL_MIN_STEP;
+ t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_MIN_STEP, SCTP_MAX_PLPMTU);
}
return t->pl.state == SCTP_PL_COMPLETE;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 50c38b624f77..538e9c6ec8c9 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -2000,8 +2000,10 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
return rc;
/* create send buffer and rmb */
- if (smc_buf_create(new_smc, false))
+ if (smc_buf_create(new_smc, false)) {
+ smc_conn_abort(new_smc, ini->first_contact_local);
return SMC_CLC_DECL_MEM;
+ }
return 0;
}
@@ -2217,8 +2219,11 @@ static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
smcr_version = ini->smcr_version;
ini->smcr_version = SMC_V2;
rc = smc_listen_rdma_init(new_smc, ini);
- if (!rc)
+ if (!rc) {
rc = smc_listen_rdma_reg(new_smc, ini->first_contact_local);
+ if (rc)
+ smc_conn_abort(new_smc, ini->first_contact_local);
+ }
if (!rc)
return;
ini->smcr_version = smcr_version;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 454356771cda..3f465faf2b68 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -127,6 +127,7 @@ static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
int i, j;
/* do link balancing */
+ conn->lnk = NULL; /* reset conn->lnk first */
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
struct smc_link *lnk = &conn->lgr->lnk[i];
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index a0840b8c935b..90f0b60b196a 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -578,7 +578,10 @@ static struct smc_buf_desc *smc_llc_get_next_rmb(struct smc_link_group *lgr,
{
struct smc_buf_desc *buf_next;
- if (!buf_pos || list_is_last(&buf_pos->list, &lgr->rmbs[*buf_lst])) {
+ if (!buf_pos)
+ return _smc_llc_get_next_rmb(lgr, buf_lst);
+
+ if (list_is_last(&buf_pos->list, &lgr->rmbs[*buf_lst])) {
(*buf_lst)++;
return _smc_llc_get_next_rmb(lgr, buf_lst);
}
@@ -614,6 +617,8 @@ static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext,
goto out;
buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst);
for (i = 0; i < ext->num_rkeys; i++) {
+ while (buf_pos && !(buf_pos)->used)
+ buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos);
if (!buf_pos)
break;
rmb = buf_pos;
@@ -623,8 +628,6 @@ static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext,
cpu_to_be64((uintptr_t)rmb->cpu_addr) :
cpu_to_be64((u64)sg_dma_address(rmb->sgt[lnk_idx].sgl));
buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos);
- while (buf_pos && !(buf_pos)->used)
- buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos);
}
len += i * sizeof(ext->rt[0]);
out:
@@ -848,6 +851,8 @@ static int smc_llc_add_link_cont(struct smc_link *link,
addc_llc->num_rkeys = *num_rkeys_todo;
n = *num_rkeys_todo;
for (i = 0; i < min_t(u8, n, SMC_LLC_RKEYS_PER_CONT_MSG); i++) {
+ while (*buf_pos && !(*buf_pos)->used)
+ *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos);
if (!*buf_pos) {
addc_llc->num_rkeys = addc_llc->num_rkeys -
*num_rkeys_todo;
@@ -864,8 +869,6 @@ static int smc_llc_add_link_cont(struct smc_link *link,
(*num_rkeys_todo)--;
*buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos);
- while (*buf_pos && !(*buf_pos)->used)
- *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos);
}
addc_llc->hd.common.llc_type = SMC_LLC_ADD_LINK_CONT;
addc_llc->hd.length = sizeof(struct smc_llc_msg_add_link_cont);
diff --git a/net/socket.c b/net/socket.c
index 401778380195..e46b162f1182 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -471,6 +471,7 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
return file;
}
+ file->f_mode |= FMODE_NOWAIT;
sock->file = file;
file->private_data = sock;
stream_open(SOCK_INODE(sock), file);
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 212c5d57465a..9734e1d9f991 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -639,6 +639,16 @@ gss_krb5_cts_crypt(struct crypto_sync_skcipher *cipher, struct xdr_buf *buf,
ret = write_bytes_to_xdr_buf(buf, offset, data, len);
+#if IS_ENABLED(CONFIG_KUNIT)
+ /*
+ * CBC-CTS does not define an output IV but RFC 3962 defines it as the
+ * penultimate block of ciphertext, so copy that into the IV buffer
+ * before returning.
+ */
+ if (encrypt)
+ memcpy(iv, data, crypto_sync_skcipher_ivsize(cipher));
+#endif
+
out:
kfree(data);
return ret;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index c8321de341ee..6debf4fd42d4 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -927,11 +927,10 @@ static void __rpc_execute(struct rpc_task *task)
*/
do_action = task->tk_action;
/* Tasks with an RPC error status should exit */
- if (do_action != rpc_exit_task &&
+ if (do_action && do_action != rpc_exit_task &&
(status = READ_ONCE(task->tk_rpc_status)) != 0) {
task->tk_status = status;
- if (do_action != NULL)
- do_action = rpc_exit_task;
+ do_action = rpc_exit_task;
}
/* Callbacks override all actions */
if (task->tk_callback) {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 26367cf4c17a..e7c101290425 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -109,15 +109,15 @@ param_get_pool_mode(char *buf, const struct kernel_param *kp)
switch (*ip)
{
case SVC_POOL_AUTO:
- return strlcpy(buf, "auto\n", 20);
+ return sysfs_emit(buf, "auto\n");
case SVC_POOL_GLOBAL:
- return strlcpy(buf, "global\n", 20);
+ return sysfs_emit(buf, "global\n");
case SVC_POOL_PERCPU:
- return strlcpy(buf, "percpu\n", 20);
+ return sysfs_emit(buf, "percpu\n");
case SVC_POOL_PERNODE:
- return strlcpy(buf, "pernode\n", 20);
+ return sysfs_emit(buf, "pernode\n");
default:
- return sprintf(buf, "%d\n", *ip);
+ return sysfs_emit(buf, "%d\n", *ip);
}
}
@@ -597,34 +597,25 @@ svc_destroy(struct kref *ref)
}
EXPORT_SYMBOL_GPL(svc_destroy);
-/*
- * Allocate an RPC server's buffer space.
- * We allocate pages and place them in rq_pages.
- */
-static int
+static bool
svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node)
{
- unsigned int pages, arghi;
+ unsigned long pages, ret;
/* bc_xprt uses fore channel allocated buffers */
if (svc_is_backchannel(rqstp))
- return 1;
+ return true;
pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply.
* We assume one is at most one page
*/
- arghi = 0;
WARN_ON_ONCE(pages > RPCSVC_MAXPAGES);
if (pages > RPCSVC_MAXPAGES)
pages = RPCSVC_MAXPAGES;
- while (pages) {
- struct page *p = alloc_pages_node(node, GFP_KERNEL, 0);
- if (!p)
- break;
- rqstp->rq_pages[arghi++] = p;
- pages--;
- }
- return pages == 0;
+
+ ret = alloc_pages_bulk_array_node(GFP_KERNEL, node, pages,
+ rqstp->rq_pages);
+ return ret == pages;
}
/*
@@ -1052,7 +1043,7 @@ static int __svc_register(struct net *net, const char *progname,
#endif
}
- trace_svc_register(progname, version, protocol, port, family, error);
+ trace_svc_register(progname, version, family, protocol, port, error);
return error;
}
@@ -1173,6 +1164,7 @@ static void __svc_unregister(struct net *net, const u32 program, const u32 versi
*/
static void svc_unregister(const struct svc_serv *serv, struct net *net)
{
+ struct sighand_struct *sighand;
struct svc_program *progp;
unsigned long flags;
unsigned int i;
@@ -1189,9 +1181,12 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net)
}
}
- spin_lock_irqsave(&current->sighand->siglock, flags);
+ rcu_read_lock();
+ sighand = rcu_dereference(current->sighand);
+ spin_lock_irqsave(&sighand->siglock, flags);
recalc_sigpending();
- spin_unlock_irqrestore(&current->sighand->siglock, flags);
+ spin_unlock_irqrestore(&sighand->siglock, flags);
+ rcu_read_unlock();
}
/*
@@ -1416,7 +1411,7 @@ err_bad_rpc:
/* Only RPCv2 supported */
xdr_stream_encode_u32(xdr, RPC_VERSION);
xdr_stream_encode_u32(xdr, RPC_VERSION);
- goto sendit;
+ return 1; /* don't wrap */
err_bad_auth:
dprintk("svc: authentication failed (%d)\n",
@@ -1432,7 +1427,7 @@ err_bad_auth:
err_bad_prog:
dprintk("svc: unknown program %d\n", rqstp->rq_prog);
serv->sv_stats->rpcbadfmt++;
- xdr_stream_encode_u32(xdr, RPC_PROG_UNAVAIL);
+ *rqstp->rq_accept_statp = rpc_prog_unavail;
goto sendit;
err_bad_vers:
@@ -1440,7 +1435,12 @@ err_bad_vers:
rqstp->rq_vers, rqstp->rq_prog, progp->pg_name);
serv->sv_stats->rpcbadfmt++;
- xdr_stream_encode_u32(xdr, RPC_PROG_MISMATCH);
+ *rqstp->rq_accept_statp = rpc_prog_mismatch;
+
+ /*
+ * svc_authenticate() has already added the verifier and
+ * advanced the stream just past rq_accept_statp.
+ */
xdr_stream_encode_u32(xdr, process.mismatch.lovers);
xdr_stream_encode_u32(xdr, process.mismatch.hivers);
goto sendit;
@@ -1449,19 +1449,19 @@ err_bad_proc:
svc_printk(rqstp, "unknown procedure (%d)\n", rqstp->rq_proc);
serv->sv_stats->rpcbadfmt++;
- xdr_stream_encode_u32(xdr, RPC_PROC_UNAVAIL);
+ *rqstp->rq_accept_statp = rpc_proc_unavail;
goto sendit;
err_garbage_args:
svc_printk(rqstp, "failed to decode RPC header\n");
serv->sv_stats->rpcbadfmt++;
- xdr_stream_encode_u32(xdr, RPC_GARBAGE_ARGS);
+ *rqstp->rq_accept_statp = rpc_garbage_args;
goto sendit;
err_system_err:
serv->sv_stats->rpcbadfmt++;
- xdr_stream_encode_u32(xdr, RPC_SYSTEM_ERR);
+ *rqstp->rq_accept_statp = rpc_system_err;
goto sendit;
}
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 84e5d7d31481..62c7919ea610 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -74,13 +74,18 @@ static LIST_HEAD(svc_xprt_class_list);
* that no other thread will be using the transport or will
* try to set XPT_DEAD.
*/
+
+/**
+ * svc_reg_xprt_class - Register a server-side RPC transport class
+ * @xcl: New transport class to be registered
+ *
+ * Returns zero on success; otherwise a negative errno is returned.
+ */
int svc_reg_xprt_class(struct svc_xprt_class *xcl)
{
struct svc_xprt_class *cl;
int res = -EEXIST;
- dprintk("svc: Adding svc transport class '%s'\n", xcl->xcl_name);
-
INIT_LIST_HEAD(&xcl->xcl_list);
spin_lock(&svc_xprt_class_lock);
/* Make sure there isn't already a class with the same name */
@@ -96,9 +101,13 @@ out:
}
EXPORT_SYMBOL_GPL(svc_reg_xprt_class);
+/**
+ * svc_unreg_xprt_class - Unregister a server-side RPC transport class
+ * @xcl: Transport class to be unregistered
+ *
+ */
void svc_unreg_xprt_class(struct svc_xprt_class *xcl)
{
- dprintk("svc: Removing svc transport class '%s'\n", xcl->xcl_name);
spin_lock(&svc_xprt_class_lock);
list_del_init(&xcl->xcl_list);
spin_unlock(&svc_xprt_class_lock);
@@ -532,13 +541,23 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
}
EXPORT_SYMBOL_GPL(svc_reserve);
+static void free_deferred(struct svc_xprt *xprt, struct svc_deferred_req *dr)
+{
+ if (!dr)
+ return;
+
+ xprt->xpt_ops->xpo_release_ctxt(xprt, dr->xprt_ctxt);
+ kfree(dr);
+}
+
static void svc_xprt_release(struct svc_rqst *rqstp)
{
struct svc_xprt *xprt = rqstp->rq_xprt;
- xprt->xpt_ops->xpo_release_rqst(rqstp);
+ xprt->xpt_ops->xpo_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
+ rqstp->rq_xprt_ctxt = NULL;
- kfree(rqstp->rq_deferred);
+ free_deferred(xprt, rqstp->rq_deferred);
rqstp->rq_deferred = NULL;
svc_rqst_release_pages(rqstp);
@@ -675,8 +694,9 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
}
for (filled = 0; filled < pages; filled = ret) {
- ret = alloc_pages_bulk_array(GFP_KERNEL, pages,
- rqstp->rq_pages);
+ ret = alloc_pages_bulk_array_node(GFP_KERNEL,
+ rqstp->rq_pool->sp_id,
+ pages, rqstp->rq_pages);
if (ret > filled)
/* Made progress, don't sleep yet */
continue;
@@ -833,15 +853,11 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
svc_xprt_received(xprt);
} else if (svc_xprt_reserve_slot(rqstp, xprt)) {
/* XPT_DATA|XPT_DEFERRED case: */
- dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
- rqstp, rqstp->rq_pool->sp_id, xprt,
- kref_read(&xprt->xpt_ref));
rqstp->rq_deferred = svc_deferred_dequeue(xprt);
if (rqstp->rq_deferred)
len = svc_deferred_recv(rqstp);
else
len = xprt->xpt_ops->xpo_recvfrom(rqstp);
- rqstp->rq_stime = ktime_get();
rqstp->rq_reserved = serv->sv_max_mesg;
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
} else
@@ -884,6 +900,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
err = -EAGAIN;
if (len <= 0)
goto out_release;
+
trace_svc_xdr_recvfrom(&rqstp->rq_arg);
clear_bit(XPT_OLD, &xprt->xpt_flags);
@@ -892,6 +909,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
if (serv->sv_stats)
serv->sv_stats->netcnt++;
+ rqstp->rq_stime = ktime_get();
return len;
out_release:
rqstp->rq_res.len = 0;
@@ -1054,7 +1072,7 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
spin_unlock_bh(&serv->sv_lock);
while ((dr = svc_deferred_dequeue(xprt)) != NULL)
- kfree(dr);
+ free_deferred(xprt, dr);
call_xpt_users(xprt);
svc_xprt_put(xprt);
@@ -1176,8 +1194,8 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) {
spin_unlock(&xprt->xpt_lock);
trace_svc_defer_drop(dr);
+ free_deferred(xprt, dr);
svc_xprt_put(xprt);
- kfree(dr);
return;
}
dr->xprt = NULL;
@@ -1222,14 +1240,14 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
dr->addrlen = rqstp->rq_addrlen;
dr->daddr = rqstp->rq_daddr;
dr->argslen = rqstp->rq_arg.len >> 2;
- dr->xprt_ctxt = rqstp->rq_xprt_ctxt;
- rqstp->rq_xprt_ctxt = NULL;
/* back up head to the start of the buffer and copy */
skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip,
dr->argslen << 2);
}
+ dr->xprt_ctxt = rqstp->rq_xprt_ctxt;
+ rqstp->rq_xprt_ctxt = NULL;
trace_svc_defer(rqstp);
svc_xprt_get(rqstp->rq_xprt);
dr->xprt = rqstp->rq_xprt;
@@ -1262,6 +1280,8 @@ static noinline int svc_deferred_recv(struct svc_rqst *rqstp)
rqstp->rq_daddr = dr->daddr;
rqstp->rq_respages = rqstp->rq_pages;
rqstp->rq_xprt_ctxt = dr->xprt_ctxt;
+
+ dr->xprt_ctxt = NULL;
svc_xprt_received(rqstp->rq_xprt);
return dr->argslen << 2;
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index a51c9b989d58..5f519fc0541b 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -121,27 +121,27 @@ static void svc_reclassify_socket(struct socket *sock)
#endif
/**
- * svc_tcp_release_rqst - Release transport-related resources
- * @rqstp: request structure with resources to be released
+ * svc_tcp_release_ctxt - Release transport-related resources
+ * @xprt: the transport which owned the context
+ * @ctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
*
*/
-static void svc_tcp_release_rqst(struct svc_rqst *rqstp)
+static void svc_tcp_release_ctxt(struct svc_xprt *xprt, void *ctxt)
{
}
/**
- * svc_udp_release_rqst - Release transport-related resources
- * @rqstp: request structure with resources to be released
+ * svc_udp_release_ctxt - Release transport-related resources
+ * @xprt: the transport which owned the context
+ * @ctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
*
*/
-static void svc_udp_release_rqst(struct svc_rqst *rqstp)
+static void svc_udp_release_ctxt(struct svc_xprt *xprt, void *ctxt)
{
- struct sk_buff *skb = rqstp->rq_xprt_ctxt;
+ struct sk_buff *skb = ctxt;
- if (skb) {
- rqstp->rq_xprt_ctxt = NULL;
+ if (skb)
consume_skb(skb);
- }
}
union svc_pktinfo_u {
@@ -696,7 +696,8 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
unsigned int sent;
int err;
- svc_udp_release_rqst(rqstp);
+ svc_udp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
+ rqstp->rq_xprt_ctxt = NULL;
svc_set_cmsg_data(rqstp, cmh);
@@ -768,7 +769,7 @@ static const struct svc_xprt_ops svc_udp_ops = {
.xpo_recvfrom = svc_udp_recvfrom,
.xpo_sendto = svc_udp_sendto,
.xpo_result_payload = svc_sock_result_payload,
- .xpo_release_rqst = svc_udp_release_rqst,
+ .xpo_release_ctxt = svc_udp_release_ctxt,
.xpo_detach = svc_sock_detach,
.xpo_free = svc_sock_free,
.xpo_has_wspace = svc_udp_has_wspace,
@@ -825,12 +826,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
trace_sk_data_ready(sk);
- if (svsk) {
- /* Refer to svc_setup_socket() for details. */
- rmb();
- svsk->sk_odata(sk);
- }
-
/*
* This callback may called twice when a new connection
* is established as a child socket inherits everything
@@ -839,13 +834,18 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
* when one of child sockets become ESTABLISHED.
* 2) data_ready method of the child socket may be called
* when it receives data before the socket is accepted.
- * In case of 2, we should ignore it silently.
+ * In case of 2, we should ignore it silently and DO NOT
+ * dereference svsk.
*/
- if (sk->sk_state == TCP_LISTEN) {
- if (svsk) {
- set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
- svc_xprt_enqueue(&svsk->sk_xprt);
- }
+ if (sk->sk_state != TCP_LISTEN)
+ return;
+
+ if (svsk) {
+ /* Refer to svc_setup_socket() for details. */
+ rmb();
+ svsk->sk_odata(sk);
+ set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
+ svc_xprt_enqueue(&svsk->sk_xprt);
}
}
@@ -886,15 +886,13 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
err = kernel_accept(sock, &newsock, O_NONBLOCK);
if (err < 0) {
- if (err == -ENOMEM)
- printk(KERN_WARNING "%s: no more sockets!\n",
- serv->sv_name);
- else if (err != -EAGAIN)
- net_warn_ratelimited("%s: accept failed (err %d)!\n",
- serv->sv_name, -err);
- trace_svcsock_accept_err(xprt, serv->sv_name, err);
+ if (err != -EAGAIN)
+ trace_svcsock_accept_err(xprt, serv->sv_name, err);
return NULL;
}
+ if (IS_ERR(sock_alloc_file(newsock, O_NONBLOCK, NULL)))
+ return NULL;
+
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
err = kernel_getpeername(newsock, sin);
@@ -935,7 +933,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
return &newsvsk->sk_xprt;
failed:
- sock_release(newsock);
+ sockfd_put(newsock);
return NULL;
}
@@ -1298,7 +1296,8 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
unsigned int sent;
int err;
- svc_tcp_release_rqst(rqstp);
+ svc_tcp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
+ rqstp->rq_xprt_ctxt = NULL;
atomic_inc(&svsk->sk_sendqlen);
mutex_lock(&xprt->xpt_mutex);
@@ -1343,7 +1342,7 @@ static const struct svc_xprt_ops svc_tcp_ops = {
.xpo_recvfrom = svc_tcp_recvfrom,
.xpo_sendto = svc_tcp_sendto,
.xpo_result_payload = svc_sock_result_payload,
- .xpo_release_rqst = svc_tcp_release_rqst,
+ .xpo_release_ctxt = svc_tcp_release_ctxt,
.xpo_detach = svc_tcp_sock_detach,
.xpo_free = svc_sock_free,
.xpo_has_wspace = svc_tcp_has_wspace,
@@ -1430,7 +1429,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
struct socket *sock,
int flags)
{
- struct file *filp = NULL;
struct svc_sock *svsk;
struct sock *inet;
int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
@@ -1439,14 +1437,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
if (!svsk)
return ERR_PTR(-ENOMEM);
- if (!sock->file) {
- filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
- if (IS_ERR(filp)) {
- kfree(svsk);
- return ERR_CAST(filp);
- }
- }
-
inet = sock->sk;
if (pmap_register) {
@@ -1456,8 +1446,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
inet->sk_protocol,
ntohs(inet_sk(inet)->inet_sport));
if (err < 0) {
- if (filp)
- fput(filp);
kfree(svsk);
return ERR_PTR(err);
}
@@ -1470,7 +1458,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
svsk->sk_owspace = inet->sk_write_space;
/*
* This barrier is necessary in order to prevent race condition
- * with svc_data_ready(), svc_listen_data_ready() and others
+ * with svc_data_ready(), svc_tcp_listen_data_ready(), and others
* when calling callbacks above.
*/
wmb();
@@ -1482,29 +1470,14 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
else
svc_tcp_init(svsk, serv);
- trace_svcsock_new_socket(sock);
+ trace_svcsock_new(svsk, sock);
return svsk;
}
-bool svc_alien_sock(struct net *net, int fd)
-{
- int err;
- struct socket *sock = sockfd_lookup(fd, &err);
- bool ret = false;
-
- if (!sock)
- goto out;
- if (sock_net(sock->sk) != net)
- ret = true;
- sockfd_put(sock);
-out:
- return ret;
-}
-EXPORT_SYMBOL_GPL(svc_alien_sock);
-
/**
* svc_addsock - add a listener socket to an RPC service
* @serv: pointer to RPC service to which to add a new listener
+ * @net: caller's network namespace
* @fd: file descriptor of the new listener
* @name_return: pointer to buffer to fill in with name of listener
* @len: size of the buffer
@@ -1514,8 +1487,8 @@ EXPORT_SYMBOL_GPL(svc_alien_sock);
* Name is terminated with '\n'. On error, returns a negative errno
* value.
*/
-int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
- const size_t len, const struct cred *cred)
+int svc_addsock(struct svc_serv *serv, struct net *net, const int fd,
+ char *name_return, const size_t len, const struct cred *cred)
{
int err = 0;
struct socket *so = sockfd_lookup(fd, &err);
@@ -1526,6 +1499,9 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
if (!so)
return err;
+ err = -EINVAL;
+ if (sock_net(so->sk) != net)
+ goto out;
err = -EAFNOSUPPORT;
if ((so->sk->sk_family != PF_INET) && (so->sk->sk_family != PF_INET6))
goto out;
@@ -1675,6 +1651,8 @@ static void svc_sock_free(struct svc_xprt *xprt)
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
struct socket *sock = svsk->sk_sock;
+ trace_svcsock_free(svsk, sock);
+
tls_handshake_cancel(sock->sk);
if (sock->file)
sockfd_put(sock);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 36835b2f5446..2a22e78af116 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1070,22 +1070,22 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
}
EXPORT_SYMBOL_GPL(xdr_reserve_space);
-
/**
* xdr_reserve_space_vec - Reserves a large amount of buffer space for sending
* @xdr: pointer to xdr_stream
- * @vec: pointer to a kvec array
* @nbytes: number of bytes to reserve
*
- * Reserves enough buffer space to encode 'nbytes' of data and stores the
- * pointers in 'vec'. The size argument passed to xdr_reserve_space() is
- * determined based on the number of bytes remaining in the current page to
- * avoid invalidating iov_base pointers when xdr_commit_encode() is called.
+ * The size argument passed to xdr_reserve_space() is determined based
+ * on the number of bytes remaining in the current page to avoid
+ * invalidating iov_base pointers when xdr_commit_encode() is called.
+ *
+ * Return values:
+ * %0: success
+ * %-EMSGSIZE: not enough space is available in @xdr
*/
-int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbytes)
+int xdr_reserve_space_vec(struct xdr_stream *xdr, size_t nbytes)
{
- int thislen;
- int v = 0;
+ size_t thislen;
__be32 *p;
/*
@@ -1097,21 +1097,19 @@ int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbyte
xdr->end = xdr->p;
}
+ /* XXX: Let's find a way to make this more efficient */
while (nbytes) {
thislen = xdr->buf->page_len % PAGE_SIZE;
thislen = min_t(size_t, nbytes, PAGE_SIZE - thislen);
p = xdr_reserve_space(xdr, thislen);
if (!p)
- return -EIO;
+ return -EMSGSIZE;
- vec[v].iov_base = p;
- vec[v].iov_len = thislen;
- v++;
nbytes -= thislen;
}
- return v;
+ return 0;
}
EXPORT_SYMBOL_GPL(xdr_reserve_space_vec);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index aa2227a7e552..7420a2c990c7 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -93,13 +93,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
*/
get_page(virt_to_page(rqst->rq_buffer));
sctxt->sc_send_wr.opcode = IB_WR_SEND;
- ret = svc_rdma_send(rdma, sctxt);
- if (ret < 0)
- return ret;
-
- ret = wait_for_completion_killable(&sctxt->sc_done);
- svc_rdma_send_ctxt_put(rdma, sctxt);
- return ret;
+ return svc_rdma_send(rdma, sctxt);
}
/* Server-side transport endpoint wants a whole page for its send
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 1c658fa43063..85c8bcaebb80 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -125,14 +125,15 @@ static void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma,
static struct svc_rdma_recv_ctxt *
svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
{
+ int node = ibdev_to_node(rdma->sc_cm_id->device);
struct svc_rdma_recv_ctxt *ctxt;
dma_addr_t addr;
void *buffer;
- ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
+ ctxt = kmalloc_node(sizeof(*ctxt), GFP_KERNEL, node);
if (!ctxt)
goto fail0;
- buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL);
+ buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node);
if (!buffer)
goto fail1;
addr = ib_dma_map_single(rdma->sc_pd->device, buffer,
@@ -155,7 +156,6 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
ctxt->rc_recv_sge.length = rdma->sc_max_req_size;
ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey;
ctxt->rc_recv_buf = buffer;
- ctxt->rc_temp = false;
return ctxt;
fail2:
@@ -232,34 +232,30 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
pcl_free(&ctxt->rc_write_pcl);
pcl_free(&ctxt->rc_reply_pcl);
- if (!ctxt->rc_temp)
- llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
- else
- svc_rdma_recv_ctxt_destroy(rdma, ctxt);
+ llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
}
/**
- * svc_rdma_release_rqst - Release transport-specific per-rqst resources
- * @rqstp: svc_rqst being released
+ * svc_rdma_release_ctxt - Release transport-specific per-rqst resources
+ * @xprt: the transport which owned the context
+ * @vctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
*
* Ensure that the recv_ctxt is released whether or not a Reply
* was sent. For example, the client could close the connection,
* or svc_process could drop an RPC, before the Reply is sent.
*/
-void svc_rdma_release_rqst(struct svc_rqst *rqstp)
+void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *vctxt)
{
- struct svc_rdma_recv_ctxt *ctxt = rqstp->rq_xprt_ctxt;
- struct svc_xprt *xprt = rqstp->rq_xprt;
+ struct svc_rdma_recv_ctxt *ctxt = vctxt;
struct svcxprt_rdma *rdma =
container_of(xprt, struct svcxprt_rdma, sc_xprt);
- rqstp->rq_xprt_ctxt = NULL;
if (ctxt)
svc_rdma_recv_ctxt_put(rdma, ctxt);
}
static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
- unsigned int wanted, bool temp)
+ unsigned int wanted)
{
const struct ib_recv_wr *bad_wr = NULL;
struct svc_rdma_recv_ctxt *ctxt;
@@ -276,7 +272,6 @@ static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
break;
trace_svcrdma_post_recv(ctxt);
- ctxt->rc_temp = temp;
ctxt->rc_recv_wr.next = recv_chain;
recv_chain = &ctxt->rc_recv_wr;
rdma->sc_pending_recvs++;
@@ -310,7 +305,7 @@ err_free:
*/
bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
{
- return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests, true);
+ return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests);
}
/**
@@ -344,7 +339,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
* client reconnects.
*/
if (rdma->sc_pending_recvs < rdma->sc_max_requests)
- if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch, false))
+ if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch))
goto dropped;
/* All wc fields are now known to be valid */
@@ -776,9 +771,6 @@ static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt,
*
* The next ctxt is removed from the "receive" lists.
*
- * - If the ctxt completes a Read, then finish assembling the Call
- * message and return the number of bytes in the message.
- *
* - If the ctxt completes a Receive, then construct the Call
* message from the contents of the Receive buffer.
*
@@ -787,7 +779,8 @@ static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt,
* in the message.
*
* - If there are Read chunks in this message, post Read WRs to
- * pull that payload and return 0.
+ * pull that payload. When the Read WRs complete, build the
+ * full message and return the number of bytes in it.
*/
int svc_rdma_recvfrom(struct svc_rqst *rqstp)
{
@@ -797,6 +790,12 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
struct svc_rdma_recv_ctxt *ctxt;
int ret;
+ /* Prevent svc_xprt_release() from releasing pages in rq_pages
+ * when returning 0 or an error.
+ */
+ rqstp->rq_respages = rqstp->rq_pages;
+ rqstp->rq_next_page = rqstp->rq_respages;
+
rqstp->rq_xprt_ctxt = NULL;
ctxt = NULL;
@@ -820,12 +819,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
DMA_FROM_DEVICE);
svc_rdma_build_arg_xdr(rqstp, ctxt);
- /* Prevent svc_xprt_release from releasing pages in rq_pages
- * if we return 0 or an error.
- */
- rqstp->rq_respages = rqstp->rq_pages;
- rqstp->rq_next_page = rqstp->rq_respages;
-
ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt);
if (ret < 0)
goto out_err;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 11cf7c646644..e460e25a1d6d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -62,8 +62,8 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
if (node) {
ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node);
} else {
- ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE),
- GFP_KERNEL);
+ ctxt = kmalloc_node(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE),
+ GFP_KERNEL, ibdev_to_node(rdma->sc_cm_id->device));
if (!ctxt)
goto out_noctx;
@@ -84,8 +84,7 @@ out_noctx:
return NULL;
}
-static void __svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
- struct svc_rdma_rw_ctxt *ctxt,
+static void __svc_rdma_put_rw_ctxt(struct svc_rdma_rw_ctxt *ctxt,
struct llist_head *list)
{
sg_free_table_chained(&ctxt->rw_sg_table, SG_CHUNK_SIZE);
@@ -95,7 +94,7 @@ static void __svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
struct svc_rdma_rw_ctxt *ctxt)
{
- __svc_rdma_put_rw_ctxt(rdma, ctxt, &rdma->sc_rw_ctxts);
+ __svc_rdma_put_rw_ctxt(ctxt, &rdma->sc_rw_ctxts);
}
/**
@@ -191,6 +190,8 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
struct svc_rdma_rw_ctxt *ctxt;
LLIST_HEAD(free);
+ trace_svcrdma_cc_release(&cc->cc_cid, cc->cc_sqecount);
+
first = last = NULL;
while ((ctxt = svc_rdma_next_ctxt(&cc->cc_rwctxts)) != NULL) {
list_del(&ctxt->rw_list);
@@ -198,7 +199,7 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
rdma_rw_ctx_destroy(&ctxt->rw_ctx, rdma->sc_qp,
rdma->sc_port_num, ctxt->rw_sg_table.sgl,
ctxt->rw_nents, dir);
- __svc_rdma_put_rw_ctxt(rdma, ctxt, &free);
+ __svc_rdma_put_rw_ctxt(ctxt, &free);
ctxt->rw_node.next = first;
first = &ctxt->rw_node;
@@ -234,7 +235,8 @@ svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma,
{
struct svc_rdma_write_info *info;
- info = kmalloc(sizeof(*info), GFP_KERNEL);
+ info = kmalloc_node(sizeof(*info), GFP_KERNEL,
+ ibdev_to_node(rdma->sc_cm_id->device));
if (!info)
return info;
@@ -304,7 +306,8 @@ svc_rdma_read_info_alloc(struct svcxprt_rdma *rdma)
{
struct svc_rdma_read_info *info;
- info = kmalloc(sizeof(*info), GFP_KERNEL);
+ info = kmalloc_node(sizeof(*info), GFP_KERNEL,
+ ibdev_to_node(rdma->sc_cm_id->device));
if (!info)
return info;
@@ -351,8 +354,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
return;
}
-/* This function sleeps when the transport's Send Queue is congested.
- *
+/*
* Assumptions:
* - If ib_post_send() succeeds, only one completion is expected,
* even if one or more WRs are flushed. This is true when posting
@@ -367,6 +369,8 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
struct ib_cqe *cqe;
int ret;
+ might_sleep();
+
if (cc->cc_sqecount > rdma->sc_sq_depth)
return -EINVAL;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 22a871e6fe4d..c6644cca52c5 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -123,18 +123,17 @@ static void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma,
static struct svc_rdma_send_ctxt *
svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
{
+ int node = ibdev_to_node(rdma->sc_cm_id->device);
struct svc_rdma_send_ctxt *ctxt;
dma_addr_t addr;
void *buffer;
- size_t size;
int i;
- size = sizeof(*ctxt);
- size += rdma->sc_max_send_sges * sizeof(struct ib_sge);
- ctxt = kmalloc(size, GFP_KERNEL);
+ ctxt = kmalloc_node(struct_size(ctxt, sc_sges, rdma->sc_max_send_sges),
+ GFP_KERNEL, node);
if (!ctxt)
goto fail0;
- buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL);
+ buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node);
if (!buffer)
goto fail1;
addr = ib_dma_map_single(rdma->sc_pd->device, buffer,
@@ -148,7 +147,6 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe;
ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
- init_completion(&ctxt->sc_done);
ctxt->sc_cqe.done = svc_rdma_wc_send;
ctxt->sc_xprt_buf = buffer;
xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
@@ -214,6 +212,7 @@ out:
ctxt->sc_send_wr.num_sge = 0;
ctxt->sc_cur_sge_no = 0;
+ ctxt->sc_page_count = 0;
return ctxt;
out_empty:
@@ -228,6 +227,8 @@ out_empty:
* svc_rdma_send_ctxt_put - Return send_ctxt to free list
* @rdma: controlling svcxprt_rdma
* @ctxt: object to return to the free list
+ *
+ * Pages left in sc_pages are DMA unmapped and released.
*/
void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt)
@@ -235,6 +236,9 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
struct ib_device *device = rdma->sc_cm_id->device;
unsigned int i;
+ if (ctxt->sc_page_count)
+ release_pages(ctxt->sc_pages, ctxt->sc_page_count);
+
/* The first SGE contains the transport header, which
* remains mapped until @ctxt is destroyed.
*/
@@ -281,12 +285,12 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe);
svc_rdma_wake_send_waiters(rdma, 1);
- complete(&ctxt->sc_done);
if (unlikely(wc->status != IB_WC_SUCCESS))
goto flushed;
trace_svcrdma_wc_send(wc, &ctxt->sc_cid);
+ svc_rdma_send_ctxt_put(rdma, ctxt);
return;
flushed:
@@ -294,6 +298,7 @@ flushed:
trace_svcrdma_wc_send_err(wc, &ctxt->sc_cid);
else
trace_svcrdma_wc_send_flush(wc, &ctxt->sc_cid);
+ svc_rdma_send_ctxt_put(rdma, ctxt);
svc_xprt_deferred_close(&rdma->sc_xprt);
}
@@ -310,7 +315,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
struct ib_send_wr *wr = &ctxt->sc_send_wr;
int ret;
- reinit_completion(&ctxt->sc_done);
+ might_sleep();
/* Sync the transport header buffer */
ib_dma_sync_single_for_device(rdma->sc_pd->device,
@@ -799,6 +804,25 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
svc_rdma_xb_dma_map, &args);
}
+/* The svc_rqst and all resources it owns are released as soon as
+ * svc_rdma_sendto returns. Transfer pages under I/O to the ctxt
+ * so they are released by the Send completion handler.
+ */
+static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
+ struct svc_rdma_send_ctxt *ctxt)
+{
+ int i, pages = rqstp->rq_next_page - rqstp->rq_respages;
+
+ ctxt->sc_page_count += pages;
+ for (i = 0; i < pages; i++) {
+ ctxt->sc_pages[i] = rqstp->rq_respages[i];
+ rqstp->rq_respages[i] = NULL;
+ }
+
+ /* Prevent svc_xprt_release from releasing pages in rq_pages */
+ rqstp->rq_next_page = rqstp->rq_respages;
+}
+
/* Prepare the portion of the RPC Reply that will be transmitted
* via RDMA Send. The RPC-over-RDMA transport header is prepared
* in sc_sges[0], and the RPC xdr_buf is prepared in following sges.
@@ -828,6 +852,8 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
if (ret < 0)
return ret;
+ svc_rdma_save_io_pages(rqstp, sctxt);
+
if (rctxt->rc_inv_rkey) {
sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey;
@@ -835,13 +861,7 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
sctxt->sc_send_wr.opcode = IB_WR_SEND;
}
- ret = svc_rdma_send(rdma, sctxt);
- if (ret < 0)
- return ret;
-
- ret = wait_for_completion_killable(&sctxt->sc_done);
- svc_rdma_send_ctxt_put(rdma, sctxt);
- return ret;
+ return svc_rdma_send(rdma, sctxt);
}
/**
@@ -907,8 +927,7 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
if (svc_rdma_send(rdma, sctxt))
goto put_ctxt;
-
- wait_for_completion_killable(&sctxt->sc_done);
+ return;
put_ctxt:
svc_rdma_send_ctxt_put(rdma, sctxt);
@@ -976,17 +995,16 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp);
if (ret < 0)
goto put_ctxt;
-
- /* Prevent svc_xprt_release() from releasing the page backing
- * rq_res.head[0].iov_base. It's no longer being accessed by
- * the I/O device. */
- rqstp->rq_respages++;
return 0;
reply_chunk:
if (ret != -E2BIG && ret != -EINVAL)
goto put_ctxt;
+ /* Send completion releases payload pages that were part
+ * of previously posted RDMA Writes.
+ */
+ svc_rdma_save_io_pages(rqstp, sctxt);
svc_rdma_send_error_msg(rdma, sctxt, rctxt, ret);
return 0;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 416b298f74dd..2abd895046ee 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -64,7 +64,7 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
- struct net *net);
+ struct net *net, int node);
static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
struct net *net,
struct sockaddr *sa, int salen,
@@ -80,7 +80,7 @@ static const struct svc_xprt_ops svc_rdma_ops = {
.xpo_recvfrom = svc_rdma_recvfrom,
.xpo_sendto = svc_rdma_sendto,
.xpo_result_payload = svc_rdma_result_payload,
- .xpo_release_rqst = svc_rdma_release_rqst,
+ .xpo_release_ctxt = svc_rdma_release_ctxt,
.xpo_detach = svc_rdma_detach,
.xpo_free = svc_rdma_free,
.xpo_has_wspace = svc_rdma_has_wspace,
@@ -123,14 +123,14 @@ static void qp_event_handler(struct ib_event *event, void *context)
}
static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
- struct net *net)
+ struct net *net, int node)
{
- struct svcxprt_rdma *cma_xprt = kzalloc(sizeof *cma_xprt, GFP_KERNEL);
+ struct svcxprt_rdma *cma_xprt;
- if (!cma_xprt) {
- dprintk("svcrdma: failed to create new transport\n");
+ cma_xprt = kzalloc_node(sizeof(*cma_xprt), GFP_KERNEL, node);
+ if (!cma_xprt)
return NULL;
- }
+
svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
@@ -193,9 +193,9 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id,
struct svcxprt_rdma *newxprt;
struct sockaddr *sa;
- /* Create a new transport */
newxprt = svc_rdma_create_xprt(listen_xprt->sc_xprt.xpt_server,
- listen_xprt->sc_xprt.xpt_net);
+ listen_xprt->sc_xprt.xpt_net,
+ ibdev_to_node(new_cma_id->device));
if (!newxprt)
return;
newxprt->sc_cm_id = new_cma_id;
@@ -304,7 +304,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6)
return ERR_PTR(-EAFNOSUPPORT);
- cma_xprt = svc_rdma_create_xprt(serv, net);
+ cma_xprt = svc_rdma_create_xprt(serv, net, NUMA_NO_NODE);
if (!cma_xprt)
return ERR_PTR(-ENOMEM);
set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 35cac7733fd3..cdcd2731860b 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -541,6 +541,19 @@ int tipc_bearer_mtu(struct net *net, u32 bearer_id)
return mtu;
}
+int tipc_bearer_min_mtu(struct net *net, u32 bearer_id)
+{
+ int mtu = TIPC_MIN_BEARER_MTU;
+ struct tipc_bearer *b;
+
+ rcu_read_lock();
+ b = bearer_get(net, bearer_id);
+ if (b)
+ mtu += b->encap_hlen;
+ rcu_read_unlock();
+ return mtu;
+}
+
/* tipc_bearer_xmit_skb - sends buffer to destination over bearer
*/
void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id,
@@ -1138,8 +1151,8 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
#ifdef CONFIG_TIPC_MEDIA_UDP
- if (tipc_udp_mtu_bad(nla_get_u32
- (props[TIPC_NLA_PROP_MTU]))) {
+ if (nla_get_u32(props[TIPC_NLA_PROP_MTU]) <
+ b->encap_hlen + TIPC_MIN_BEARER_MTU) {
NL_SET_ERR_MSG(info->extack,
"MTU value is out-of-range");
return -EINVAL;
@@ -1245,7 +1258,7 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info)
struct tipc_nl_msg msg;
struct tipc_media *media;
struct sk_buff *rep;
- struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
+ struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1];
if (!info->attrs[TIPC_NLA_MEDIA])
return -EINVAL;
@@ -1294,7 +1307,7 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
int err;
char *name;
struct tipc_media *m;
- struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
+ struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1];
if (!info->attrs[TIPC_NLA_MEDIA])
return -EINVAL;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 490ad6e5f7a3..bd0cc5c287ef 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -146,6 +146,7 @@ struct tipc_media {
* @identity: array index of this bearer within TIPC bearer array
* @disc: ptr to link setup request
* @net_plane: network plane ('A' through 'H') currently associated with bearer
+ * @encap_hlen: encap headers length
* @up: bearer up flag (bit 0)
* @refcnt: tipc_bearer reference counter
*
@@ -170,6 +171,7 @@ struct tipc_bearer {
u32 identity;
struct tipc_discoverer *disc;
char net_plane;
+ u16 encap_hlen;
unsigned long up;
refcount_t refcnt;
};
@@ -232,6 +234,7 @@ int tipc_bearer_setup(void);
void tipc_bearer_cleanup(void);
void tipc_bearer_stop(struct net *net);
int tipc_bearer_mtu(struct net *net, u32 bearer_id);
+int tipc_bearer_min_mtu(struct net *net, u32 bearer_id);
bool tipc_bearer_bcast_support(struct net *net, u32 bearer_id);
void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id,
struct sk_buff *skb,
diff --git a/net/tipc/link.c b/net/tipc/link.c
index b3ce24823f50..2eff1c7949cb 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2200,7 +2200,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
struct tipc_msg *hdr = buf_msg(skb);
struct tipc_gap_ack_blks *ga = NULL;
bool reply = msg_probe(hdr), retransmitted = false;
- u32 dlen = msg_data_sz(hdr), glen = 0;
+ u32 dlen = msg_data_sz(hdr), glen = 0, msg_max;
u16 peers_snd_nxt = msg_next_sent(hdr);
u16 peers_tol = msg_link_tolerance(hdr);
u16 peers_prio = msg_linkprio(hdr);
@@ -2239,6 +2239,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
switch (mtyp) {
case RESET_MSG:
case ACTIVATE_MSG:
+ msg_max = msg_max_pkt(hdr);
+ if (msg_max < tipc_bearer_min_mtu(l->net, l->bearer_id))
+ break;
/* Complete own link name with peer's interface name */
if_name = strrchr(l->name, ':') + 1;
if (sizeof(l->name) - (if_name - l->name) <= TIPC_MAX_IF_NAME)
@@ -2283,8 +2286,8 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
l->peer_session = msg_session(hdr);
l->in_session = true;
l->peer_bearer_id = msg_bearer_id(hdr);
- if (l->mtu > msg_max_pkt(hdr))
- l->mtu = msg_max_pkt(hdr);
+ if (l->mtu > msg_max)
+ l->mtu = msg_max;
break;
case STATE_MSG:
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index c2bb818704c8..0a85244fd618 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -738,8 +738,8 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
udp_conf.local_ip.s_addr = local.ipv4.s_addr;
udp_conf.use_udp_checksums = false;
ub->ifindex = dev->ifindex;
- if (tipc_mtu_bad(dev, sizeof(struct iphdr) +
- sizeof(struct udphdr))) {
+ b->encap_hlen = sizeof(struct iphdr) + sizeof(struct udphdr);
+ if (tipc_mtu_bad(dev, b->encap_hlen)) {
err = -EINVAL;
goto err;
}
@@ -760,6 +760,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
else
udp_conf.local_ip6 = local.ipv6;
ub->ifindex = dev->ifindex;
+ b->encap_hlen = sizeof(struct ipv6hdr) + sizeof(struct udphdr);
b->mtu = 1280;
#endif
} else {
diff --git a/net/tls/tls.h b/net/tls/tls.h
index 804c3880d028..0672acab2773 100644
--- a/net/tls/tls.h
+++ b/net/tls/tls.h
@@ -167,6 +167,11 @@ static inline bool tls_strp_msg_ready(struct tls_sw_context_rx *ctx)
return ctx->strp.msg_ready;
}
+static inline bool tls_strp_msg_mixed_decrypted(struct tls_sw_context_rx *ctx)
+{
+ return ctx->strp.mixed_decrypted;
+}
+
#ifdef CONFIG_TLS_DEVICE
int tls_device_init(void);
void tls_device_cleanup(void);
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index a7cc4f9faac2..bf69c9d6d06c 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -1007,20 +1007,14 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx)
struct tls_sw_context_rx *sw_ctx = tls_sw_ctx_rx(tls_ctx);
struct sk_buff *skb = tls_strp_msg(sw_ctx);
struct strp_msg *rxm = strp_msg(skb);
- int is_decrypted = skb->decrypted;
- int is_encrypted = !is_decrypted;
- struct sk_buff *skb_iter;
- int left;
-
- left = rxm->full_len - skb->len;
- /* Check if all the data is decrypted already */
- skb_iter = skb_shinfo(skb)->frag_list;
- while (skb_iter && left > 0) {
- is_decrypted &= skb_iter->decrypted;
- is_encrypted &= !skb_iter->decrypted;
-
- left -= skb_iter->len;
- skb_iter = skb_iter->next;
+ int is_decrypted, is_encrypted;
+
+ if (!tls_strp_msg_mixed_decrypted(sw_ctx)) {
+ is_decrypted = skb->decrypted;
+ is_encrypted = !is_decrypted;
+ } else {
+ is_decrypted = 0;
+ is_encrypted = 0;
}
trace_tls_device_decrypted(sk, tcp_sk(sk)->copied_seq - rxm->full_len,
diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
index 955ac3e0bf4d..f37f4a0fcd3c 100644
--- a/net/tls/tls_strp.c
+++ b/net/tls/tls_strp.c
@@ -20,7 +20,9 @@ static void tls_strp_abort_strp(struct tls_strparser *strp, int err)
strp->stopped = 1;
/* Report an error on the lower socket */
- strp->sk->sk_err = -err;
+ WRITE_ONCE(strp->sk->sk_err, -err);
+ /* Paired with smp_rmb() in tcp_poll() */
+ smp_wmb();
sk_error_report(strp->sk);
}
@@ -29,34 +31,50 @@ static void tls_strp_anchor_free(struct tls_strparser *strp)
struct skb_shared_info *shinfo = skb_shinfo(strp->anchor);
DEBUG_NET_WARN_ON_ONCE(atomic_read(&shinfo->dataref) != 1);
- shinfo->frag_list = NULL;
+ if (!strp->copy_mode)
+ shinfo->frag_list = NULL;
consume_skb(strp->anchor);
strp->anchor = NULL;
}
-/* Create a new skb with the contents of input copied to its page frags */
-static struct sk_buff *tls_strp_msg_make_copy(struct tls_strparser *strp)
+static struct sk_buff *
+tls_strp_skb_copy(struct tls_strparser *strp, struct sk_buff *in_skb,
+ int offset, int len)
{
- struct strp_msg *rxm;
struct sk_buff *skb;
- int i, err, offset;
+ int i, err;
- skb = alloc_skb_with_frags(0, strp->stm.full_len, TLS_PAGE_ORDER,
+ skb = alloc_skb_with_frags(0, len, TLS_PAGE_ORDER,
&err, strp->sk->sk_allocation);
if (!skb)
return NULL;
- offset = strp->stm.offset;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- WARN_ON_ONCE(skb_copy_bits(strp->anchor, offset,
+ WARN_ON_ONCE(skb_copy_bits(in_skb, offset,
skb_frag_address(frag),
skb_frag_size(frag)));
offset += skb_frag_size(frag);
}
- skb_copy_header(skb, strp->anchor);
+ skb->len = len;
+ skb->data_len = len;
+ skb_copy_header(skb, in_skb);
+ return skb;
+}
+
+/* Create a new skb with the contents of input copied to its page frags */
+static struct sk_buff *tls_strp_msg_make_copy(struct tls_strparser *strp)
+{
+ struct strp_msg *rxm;
+ struct sk_buff *skb;
+
+ skb = tls_strp_skb_copy(strp, strp->anchor, strp->stm.offset,
+ strp->stm.full_len);
+ if (!skb)
+ return NULL;
+
rxm = strp_msg(skb);
rxm->offset = 0;
return skb;
@@ -180,22 +198,22 @@ static void tls_strp_flush_anchor_copy(struct tls_strparser *strp)
for (i = 0; i < shinfo->nr_frags; i++)
__skb_frag_unref(&shinfo->frags[i], false);
shinfo->nr_frags = 0;
+ if (strp->copy_mode) {
+ kfree_skb_list(shinfo->frag_list);
+ shinfo->frag_list = NULL;
+ }
strp->copy_mode = 0;
+ strp->mixed_decrypted = 0;
}
-static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
- unsigned int offset, size_t in_len)
+static int tls_strp_copyin_frag(struct tls_strparser *strp, struct sk_buff *skb,
+ struct sk_buff *in_skb, unsigned int offset,
+ size_t in_len)
{
- struct tls_strparser *strp = (struct tls_strparser *)desc->arg.data;
- struct sk_buff *skb;
- skb_frag_t *frag;
size_t len, chunk;
+ skb_frag_t *frag;
int sz;
- if (strp->msg_ready)
- return 0;
-
- skb = strp->anchor;
frag = &skb_shinfo(skb)->frags[skb->len / PAGE_SIZE];
len = in_len;
@@ -208,19 +226,26 @@ static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
skb_frag_size(frag),
chunk));
- sz = tls_rx_msg_size(strp, strp->anchor);
- if (sz < 0) {
- desc->error = sz;
- return 0;
- }
-
- /* We may have over-read, sz == 0 is guaranteed under-read */
- if (sz > 0)
- chunk = min_t(size_t, chunk, sz - skb->len);
-
skb->len += chunk;
skb->data_len += chunk;
skb_frag_size_add(frag, chunk);
+
+ sz = tls_rx_msg_size(strp, skb);
+ if (sz < 0)
+ return sz;
+
+ /* We may have over-read, sz == 0 is guaranteed under-read */
+ if (unlikely(sz && sz < skb->len)) {
+ int over = skb->len - sz;
+
+ WARN_ON_ONCE(over > chunk);
+ skb->len -= over;
+ skb->data_len -= over;
+ skb_frag_size_add(frag, -over);
+
+ chunk -= over;
+ }
+
frag++;
len -= chunk;
offset += chunk;
@@ -247,15 +272,99 @@ static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
offset += chunk;
}
- if (strp->stm.full_len == skb->len) {
+read_done:
+ return in_len - len;
+}
+
+static int tls_strp_copyin_skb(struct tls_strparser *strp, struct sk_buff *skb,
+ struct sk_buff *in_skb, unsigned int offset,
+ size_t in_len)
+{
+ struct sk_buff *nskb, *first, *last;
+ struct skb_shared_info *shinfo;
+ size_t chunk;
+ int sz;
+
+ if (strp->stm.full_len)
+ chunk = strp->stm.full_len - skb->len;
+ else
+ chunk = TLS_MAX_PAYLOAD_SIZE + PAGE_SIZE;
+ chunk = min(chunk, in_len);
+
+ nskb = tls_strp_skb_copy(strp, in_skb, offset, chunk);
+ if (!nskb)
+ return -ENOMEM;
+
+ shinfo = skb_shinfo(skb);
+ if (!shinfo->frag_list) {
+ shinfo->frag_list = nskb;
+ nskb->prev = nskb;
+ } else {
+ first = shinfo->frag_list;
+ last = first->prev;
+ last->next = nskb;
+ first->prev = nskb;
+ }
+
+ skb->len += chunk;
+ skb->data_len += chunk;
+
+ if (!strp->stm.full_len) {
+ sz = tls_rx_msg_size(strp, skb);
+ if (sz < 0)
+ return sz;
+
+ /* We may have over-read, sz == 0 is guaranteed under-read */
+ if (unlikely(sz && sz < skb->len)) {
+ int over = skb->len - sz;
+
+ WARN_ON_ONCE(over > chunk);
+ skb->len -= over;
+ skb->data_len -= over;
+ __pskb_trim(nskb, nskb->len - over);
+
+ chunk -= over;
+ }
+
+ strp->stm.full_len = sz;
+ }
+
+ return chunk;
+}
+
+static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
+ unsigned int offset, size_t in_len)
+{
+ struct tls_strparser *strp = (struct tls_strparser *)desc->arg.data;
+ struct sk_buff *skb;
+ int ret;
+
+ if (strp->msg_ready)
+ return 0;
+
+ skb = strp->anchor;
+ if (!skb->len)
+ skb_copy_decrypted(skb, in_skb);
+ else
+ strp->mixed_decrypted |= !!skb_cmp_decrypted(skb, in_skb);
+
+ if (IS_ENABLED(CONFIG_TLS_DEVICE) && strp->mixed_decrypted)
+ ret = tls_strp_copyin_skb(strp, skb, in_skb, offset, in_len);
+ else
+ ret = tls_strp_copyin_frag(strp, skb, in_skb, offset, in_len);
+ if (ret < 0) {
+ desc->error = ret;
+ ret = 0;
+ }
+
+ if (strp->stm.full_len && strp->stm.full_len == skb->len) {
desc->count = 0;
strp->msg_ready = 1;
tls_rx_msg_ready(strp);
}
-read_done:
- return in_len - len;
+ return ret;
}
static int tls_strp_read_copyin(struct tls_strparser *strp)
@@ -315,15 +424,19 @@ static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort)
return 0;
}
-static bool tls_strp_check_no_dup(struct tls_strparser *strp)
+static bool tls_strp_check_queue_ok(struct tls_strparser *strp)
{
unsigned int len = strp->stm.offset + strp->stm.full_len;
- struct sk_buff *skb;
+ struct sk_buff *first, *skb;
u32 seq;
- skb = skb_shinfo(strp->anchor)->frag_list;
- seq = TCP_SKB_CB(skb)->seq;
+ first = skb_shinfo(strp->anchor)->frag_list;
+ skb = first;
+ seq = TCP_SKB_CB(first)->seq;
+ /* Make sure there's no duplicate data in the queue,
+ * and the decrypted status matches.
+ */
while (skb->len < len) {
seq += skb->len;
len -= skb->len;
@@ -331,6 +444,8 @@ static bool tls_strp_check_no_dup(struct tls_strparser *strp)
if (TCP_SKB_CB(skb)->seq != seq)
return false;
+ if (skb_cmp_decrypted(first, skb))
+ return false;
}
return true;
@@ -411,7 +526,7 @@ static int tls_strp_read_sock(struct tls_strparser *strp)
return tls_strp_read_copy(strp, true);
}
- if (!tls_strp_check_no_dup(strp))
+ if (!tls_strp_check_queue_ok(strp))
return tls_strp_read_copy(strp, false);
strp->msg_ready = 1;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 635b8bf6b937..1a53c8f481e9 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -70,7 +70,9 @@ noinline void tls_err_abort(struct sock *sk, int err)
{
WARN_ON_ONCE(err >= 0);
/* sk->sk_err should contain a positive error code. */
- sk->sk_err = -err;
+ WRITE_ONCE(sk->sk_err, -err);
+ /* Paired with smp_rmb() in tcp_poll() */
+ smp_wmb();
sk_error_report(sk);
}
@@ -2304,10 +2306,14 @@ static void tls_data_ready(struct sock *sk)
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct sk_psock *psock;
+ gfp_t alloc_save;
trace_sk_data_ready(sk);
+ alloc_save = sk->sk_allocation;
+ sk->sk_allocation = GFP_ATOMIC;
tls_strp_data_ready(&ctx->strp);
+ sk->sk_allocation = alloc_save;
psock = sk_psock_get(sk);
if (psock) {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index cc695c9f09ec..e7728b57a8c7 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2553,7 +2553,7 @@ static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
struct unix_sock *u = unix_sk(sk);
struct sk_buff *skb;
- int err, copied;
+ int err;
mutex_lock(&u->iolock);
skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
@@ -2561,10 +2561,7 @@ static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
if (!skb)
return err;
- copied = recv_actor(sk, skb);
- kfree_skb(skb);
-
- return copied;
+ return recv_actor(sk, skb);
}
/*
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 413407bb646c..efb8a0937a13 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1462,7 +1462,7 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr,
vsock_transport_cancel_pkt(vsk);
vsock_remove_connected(vsk);
goto out_wait;
- } else if (timeout == 0) {
+ } else if ((sk->sk_state != TCP_ESTABLISHED) && (timeout == 0)) {
err = -ETIMEDOUT;
sk->sk_state = TCP_CLOSE;
sock->state = SS_UNCONNECTED;
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index e4878551f140..b769fc258931 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -1441,7 +1441,6 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
struct sock *sk = sk_vsock(vsk);
struct sk_buff *skb;
int off = 0;
- int copied;
int err;
spin_lock_bh(&vvs->rx_lock);
@@ -1454,9 +1453,7 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
if (!skb)
return err;
- copied = recv_actor(sk, skb);
- kfree_skb(skb);
- return copied;
+ return recv_actor(sk, skb);
}
EXPORT_SYMBOL_GPL(virtio_transport_read_skb);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 5b0c4d5b80cf..b3ec9eaec36b 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -368,12 +368,12 @@ static void cfg80211_sched_scan_stop_wk(struct work_struct *work)
rdev = container_of(work, struct cfg80211_registered_device,
sched_scan_stop_wk);
- rtnl_lock();
+ wiphy_lock(&rdev->wiphy);
list_for_each_entry_safe(req, tmp, &rdev->sched_scan_req_list, list) {
if (req->nl_owner_dead)
cfg80211_stop_sched_scan_req(rdev, req, false);
}
- rtnl_unlock();
+ wiphy_unlock(&rdev->wiphy);
}
static void cfg80211_propagate_radar_detect_wk(struct work_struct *work)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d95f8053020d..087d60c0f6e4 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -10723,6 +10723,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
if (!info->attrs[NL80211_ATTR_MLD_ADDR])
return -EINVAL;
req.ap_mld_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]);
+ if (!is_valid_ether_addr(req.ap_mld_addr))
+ return -EINVAL;
}
req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len,
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 2e497cf26ef2..69b508743e57 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -2,7 +2,7 @@
/*
* Portions of this file
* Copyright(c) 2016-2017 Intel Deutschland GmbH
- * Copyright (C) 2018, 2021-2022 Intel Corporation
+ * Copyright (C) 2018, 2021-2023 Intel Corporation
*/
#ifndef __CFG80211_RDEV_OPS
#define __CFG80211_RDEV_OPS
@@ -1441,8 +1441,8 @@ rdev_del_intf_link(struct cfg80211_registered_device *rdev,
unsigned int link_id)
{
trace_rdev_del_intf_link(&rdev->wiphy, wdev, link_id);
- if (rdev->ops->add_intf_link)
- rdev->ops->add_intf_link(&rdev->wiphy, wdev, link_id);
+ if (rdev->ops->del_intf_link)
+ rdev->ops->del_intf_link(&rdev->wiphy, wdev, link_id);
trace_rdev_return_void(&rdev->wiphy);
}
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 0d40d6af7e10..26f11e4746c0 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2404,11 +2404,8 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev)
case NL80211_IFTYPE_P2P_GO:
case NL80211_IFTYPE_ADHOC:
case NL80211_IFTYPE_MESH_POINT:
- wiphy_lock(wiphy);
ret = cfg80211_reg_can_beacon_relax(wiphy, &chandef,
iftype);
- wiphy_unlock(wiphy);
-
if (!ret)
return ret;
break;
@@ -2440,11 +2437,11 @@ static void reg_leave_invalid_chans(struct wiphy *wiphy)
struct wireless_dev *wdev;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
- ASSERT_RTNL();
-
+ wiphy_lock(wiphy);
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list)
if (!reg_wdev_chan_valid(wiphy, wdev))
cfg80211_leave(rdev, wdev);
+ wiphy_unlock(wiphy);
}
static void reg_check_chans_work(struct work_struct *work)
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index a1382255fab3..c501db7bbdb3 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -5,7 +5,7 @@
* Copyright 2008 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2016 Intel Deutschland GmbH
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2023 Intel Corporation
*/
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -540,6 +540,10 @@ static int cfg80211_parse_ap_info(struct cfg80211_colocated_ap *entry,
/* skip the TBTT offset */
pos++;
+ /* ignore entries with invalid BSSID */
+ if (!is_valid_ether_addr(pos))
+ return -EINVAL;
+
memcpy(entry->bssid, pos, ETH_ALEN);
pos += ETH_ALEN;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 3bc0c3072e78..9755ef281040 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -5,7 +5,7 @@
* Copyright 2007-2009 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2023 Intel Corporation
*/
#include <linux/export.h>
#include <linux/bitops.h>
@@ -2558,6 +2558,13 @@ void cfg80211_remove_links(struct wireless_dev *wdev)
{
unsigned int link_id;
+ /*
+ * links are controlled by upper layers (userspace/cfg)
+ * only for AP mode, so only remove them here for AP
+ */
+ if (wdev->iftype != NL80211_IFTYPE_AP)
+ return;
+
wdev_lock(wdev);
if (wdev->valid_links) {
for_each_valid_link(wdev, link_id)
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index bef28c6187eb..408f5e55744e 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -378,7 +378,7 @@ int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp,
break;
default:
xdo->dev = NULL;
- dev_put(dev);
+ netdev_put(dev, &xdo->dev_tracker);
NL_SET_ERR_MSG(extack, "Unrecognized offload direction");
return -EINVAL;
}
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 39fb91ff23d9..815b38080401 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -131,6 +131,7 @@ struct sec_path *secpath_set(struct sk_buff *skb)
memset(sp->ovec, 0, sizeof(sp->ovec));
sp->olen = 0;
sp->len = 0;
+ sp->verified_cnt = 0;
return sp;
}
@@ -330,11 +331,10 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x,
{
switch (x->props.mode) {
case XFRM_MODE_BEET:
- switch (XFRM_MODE_SKB_CB(skb)->protocol) {
- case IPPROTO_IPIP:
- case IPPROTO_BEETPH:
+ switch (x->sel.family) {
+ case AF_INET:
return xfrm4_remove_beet_encap(x, skb);
- case IPPROTO_IPV6:
+ case AF_INET6:
return xfrm6_remove_beet_encap(x, skb);
}
break;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 5c61ec04b839..e7617c9959c3 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1831,6 +1831,7 @@ again:
__xfrm_policy_unlink(pol, dir);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ xfrm_dev_policy_delete(pol);
cnt++;
xfrm_audit_policy_delete(pol, 1, task_valid);
xfrm_policy_kill(pol);
@@ -1869,6 +1870,7 @@ again:
__xfrm_policy_unlink(pol, dir);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ xfrm_dev_policy_delete(pol);
cnt++;
xfrm_audit_policy_delete(pol, 1, task_valid);
xfrm_policy_kill(pol);
@@ -3312,7 +3314,7 @@ xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
static inline int
xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
- unsigned short family)
+ unsigned short family, u32 if_id)
{
if (xfrm_state_kern(x))
return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
@@ -3323,7 +3325,8 @@ xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
(tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
!(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
!(x->props.mode != XFRM_MODE_TRANSPORT &&
- xfrm_state_addr_cmp(tmpl, x, family));
+ xfrm_state_addr_cmp(tmpl, x, family)) &&
+ (if_id == 0 || if_id == x->if_id);
}
/*
@@ -3335,7 +3338,7 @@ xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
*/
static inline int
xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start,
- unsigned short family)
+ unsigned short family, u32 if_id)
{
int idx = start;
@@ -3345,9 +3348,16 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star
} else
start = -1;
for (; idx < sp->len; idx++) {
- if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
+ if (xfrm_state_ok(tmpl, sp->xvec[idx], family, if_id))
return ++idx;
if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
+ if (idx < sp->verified_cnt) {
+ /* Secpath entry previously verified, consider optional and
+ * continue searching
+ */
+ continue;
+ }
+
if (start == -1)
start = -2-idx;
break;
@@ -3712,12 +3722,6 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
}
xfrm_nr = ti;
- if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK &&
- !xfrm_nr) {
- XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
- goto reject;
- }
-
if (npols > 1) {
xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
tpp = stp;
@@ -3728,9 +3732,12 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
* Order is _important_. Later we will implement
* some barriers, but at the moment barriers
* are implied between each two transformations.
+ * Upon success, marks secpath entries as having been
+ * verified to allow them to be skipped in future policy
+ * checks (e.g. nested tunnels).
*/
for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
- k = xfrm_policy_ok(tpp[i], sp, k, family);
+ k = xfrm_policy_ok(tpp[i], sp, k, family, if_id);
if (k < 0) {
if (k < -1)
/* "-2 - errored_index" returned */
@@ -3745,10 +3752,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
goto reject;
}
- if (if_id)
- secpath_reset(skb);
-
xfrm_pols_put(pols, npols);
+ sp->verified_cnt = k;
+
return 1;
}
XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index d720e163ae6e..c34a2a06ca94 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1770,7 +1770,7 @@ static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
}
static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family,
- struct netlink_ext_ack *extack)
+ int dir, struct netlink_ext_ack *extack)
{
u16 prev_family;
int i;
@@ -1796,6 +1796,10 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family,
switch (ut[i].mode) {
case XFRM_MODE_TUNNEL:
case XFRM_MODE_BEET:
+ if (ut[i].optional && dir == XFRM_POLICY_OUT) {
+ NL_SET_ERR_MSG(extack, "Mode in optional template not allowed in outbound policy");
+ return -EINVAL;
+ }
break;
default:
if (ut[i].family != prev_family) {
@@ -1833,7 +1837,7 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family,
}
static int copy_from_user_tmpl(struct xfrm_policy *pol, struct nlattr **attrs,
- struct netlink_ext_ack *extack)
+ int dir, struct netlink_ext_ack *extack)
{
struct nlattr *rt = attrs[XFRMA_TMPL];
@@ -1844,7 +1848,7 @@ static int copy_from_user_tmpl(struct xfrm_policy *pol, struct nlattr **attrs,
int nr = nla_len(rt) / sizeof(*utmpl);
int err;
- err = validate_tmpl(nr, utmpl, pol->family, extack);
+ err = validate_tmpl(nr, utmpl, pol->family, dir, extack);
if (err)
return err;
@@ -1921,7 +1925,7 @@ static struct xfrm_policy *xfrm_policy_construct(struct net *net,
if (err)
goto error;
- if (!(err = copy_from_user_tmpl(xp, attrs, extack)))
+ if (!(err = copy_from_user_tmpl(xp, attrs, p->dir, extack)))
err = copy_from_user_sec_ctx(xp, attrs);
if (err)
goto error;
@@ -1980,6 +1984,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err) {
xfrm_dev_policy_delete(xp);
+ xfrm_dev_policy_free(xp);
security_xfrm_policy_free(xp->security);
kfree(xp);
return err;
@@ -3499,7 +3504,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt,
return NULL;
nr = ((len - sizeof(*p)) / sizeof(*ut));
- if (validate_tmpl(nr, ut, p->sel.family, NULL))
+ if (validate_tmpl(nr, ut, p->sel.family, p->dir, NULL))
return NULL;
if (p->dir > XFRM_POLICY_OUT)
diff --git a/rust/alloc/README.md b/rust/alloc/README.md
index c89c753720b5..eb6f22e94ebf 100644
--- a/rust/alloc/README.md
+++ b/rust/alloc/README.md
@@ -10,6 +10,9 @@ upstream. In general, only additions should be performed (e.g. new
methods). Eventually, changes should make it into upstream so that,
at some point, this fork can be dropped from the kernel tree.
+The Rust upstream version on top of which these files are based matches
+the output of `scripts/min-tool-version.sh rustc`.
+
## Rationale
diff --git a/rust/alloc/alloc.rs b/rust/alloc/alloc.rs
index ca224a541770..acf22d45e6f2 100644
--- a/rust/alloc/alloc.rs
+++ b/rust/alloc/alloc.rs
@@ -22,21 +22,24 @@ use core::marker::Destruct;
mod tests;
extern "Rust" {
- // These are the magic symbols to call the global allocator. rustc generates
+ // These are the magic symbols to call the global allocator. rustc generates
// them to call `__rg_alloc` etc. if there is a `#[global_allocator]` attribute
// (the code expanding that attribute macro generates those functions), or to call
- // the default implementations in libstd (`__rdl_alloc` etc. in `library/std/src/alloc.rs`)
+ // the default implementations in std (`__rdl_alloc` etc. in `library/std/src/alloc.rs`)
// otherwise.
- // The rustc fork of LLVM also special-cases these function names to be able to optimize them
+ // The rustc fork of LLVM 14 and earlier also special-cases these function names to be able to optimize them
// like `malloc`, `realloc`, and `free`, respectively.
#[rustc_allocator]
- #[rustc_allocator_nounwind]
+ #[rustc_nounwind]
fn __rust_alloc(size: usize, align: usize) -> *mut u8;
- #[rustc_allocator_nounwind]
+ #[rustc_deallocator]
+ #[rustc_nounwind]
fn __rust_dealloc(ptr: *mut u8, size: usize, align: usize);
- #[rustc_allocator_nounwind]
+ #[rustc_reallocator]
+ #[rustc_nounwind]
fn __rust_realloc(ptr: *mut u8, old_size: usize, align: usize, new_size: usize) -> *mut u8;
- #[rustc_allocator_nounwind]
+ #[rustc_allocator_zeroed]
+ #[rustc_nounwind]
fn __rust_alloc_zeroed(size: usize, align: usize) -> *mut u8;
}
@@ -72,11 +75,14 @@ pub use std::alloc::Global;
/// # Examples
///
/// ```
-/// use std::alloc::{alloc, dealloc, Layout};
+/// use std::alloc::{alloc, dealloc, handle_alloc_error, Layout};
///
/// unsafe {
/// let layout = Layout::new::<u16>();
/// let ptr = alloc(layout);
+/// if ptr.is_null() {
+/// handle_alloc_error(layout);
+/// }
///
/// *(ptr as *mut u16) = 42;
/// assert_eq!(*(ptr as *mut u16), 42);
@@ -349,7 +355,7 @@ pub(crate) const unsafe fn box_free<T: ?Sized, A: ~const Allocator + ~const Dest
#[cfg(not(no_global_oom_handling))]
extern "Rust" {
- // This is the magic symbol to call the global alloc error handler. rustc generates
+ // This is the magic symbol to call the global alloc error handler. rustc generates
// it to call `__rg_oom` if there is a `#[alloc_error_handler]`, or to call the
// default implementations below (`__rdl_oom`) otherwise.
fn __rust_alloc_error_handler(size: usize, align: usize) -> !;
@@ -394,25 +400,24 @@ pub use std::alloc::handle_alloc_error;
#[allow(unused_attributes)]
#[unstable(feature = "alloc_internals", issue = "none")]
pub mod __alloc_error_handler {
- use crate::alloc::Layout;
-
- // called via generated `__rust_alloc_error_handler`
-
- // if there is no `#[alloc_error_handler]`
+ // called via generated `__rust_alloc_error_handler` if there is no
+ // `#[alloc_error_handler]`.
#[rustc_std_internal_symbol]
- pub unsafe extern "C-unwind" fn __rdl_oom(size: usize, _align: usize) -> ! {
- panic!("memory allocation of {size} bytes failed")
- }
-
- // if there is an `#[alloc_error_handler]`
- #[rustc_std_internal_symbol]
- pub unsafe extern "C-unwind" fn __rg_oom(size: usize, align: usize) -> ! {
- let layout = unsafe { Layout::from_size_align_unchecked(size, align) };
+ pub unsafe fn __rdl_oom(size: usize, _align: usize) -> ! {
extern "Rust" {
- #[lang = "oom"]
- fn oom_impl(layout: Layout) -> !;
+ // This symbol is emitted by rustc next to __rust_alloc_error_handler.
+ // Its value depends on the -Zoom={panic,abort} compiler option.
+ static __rust_alloc_error_handler_should_panic: u8;
+ }
+
+ #[allow(unused_unsafe)]
+ if unsafe { __rust_alloc_error_handler_should_panic != 0 } {
+ panic!("memory allocation of {size} bytes failed")
+ } else {
+ core::panicking::panic_nounwind_fmt(format_args!(
+ "memory allocation of {size} bytes failed"
+ ))
}
- unsafe { oom_impl(layout) }
}
}
diff --git a/rust/alloc/boxed.rs b/rust/alloc/boxed.rs
index dcfe87b14f3a..14af9860c36c 100644
--- a/rust/alloc/boxed.rs
+++ b/rust/alloc/boxed.rs
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
-//! A pointer type for heap allocation.
+//! The `Box<T>` type for heap allocation.
//!
//! [`Box<T>`], casually referred to as a 'box', provides the simplest form of
//! heap allocation in Rust. Boxes provide ownership for this allocation, and
@@ -124,7 +124,21 @@
//! definition is just using `T*` can lead to undefined behavior, as
//! described in [rust-lang/unsafe-code-guidelines#198][ucg#198].
//!
+//! # Considerations for unsafe code
+//!
+//! **Warning: This section is not normative and is subject to change, possibly
+//! being relaxed in the future! It is a simplified summary of the rules
+//! currently implemented in the compiler.**
+//!
+//! The aliasing rules for `Box<T>` are the same as for `&mut T`. `Box<T>`
+//! asserts uniqueness over its content. Using raw pointers derived from a box
+//! after that box has been mutated through, moved or borrowed as `&mut T`
+//! is not allowed. For more guidance on working with box from unsafe code, see
+//! [rust-lang/unsafe-code-guidelines#326][ucg#326].
+//!
+//!
//! [ucg#198]: https://github.com/rust-lang/unsafe-code-guidelines/issues/198
+//! [ucg#326]: https://github.com/rust-lang/unsafe-code-guidelines/issues/326
//! [dereferencing]: core::ops::Deref
//! [`Box::<T>::from_raw(value)`]: Box::from_raw
//! [`Global`]: crate::alloc::Global
@@ -139,12 +153,14 @@ use core::async_iter::AsyncIterator;
use core::borrow;
use core::cmp::Ordering;
use core::convert::{From, TryFrom};
+use core::error::Error;
use core::fmt;
use core::future::Future;
use core::hash::{Hash, Hasher};
#[cfg(not(no_global_oom_handling))]
use core::iter::FromIterator;
use core::iter::{FusedIterator, Iterator};
+use core::marker::Tuple;
use core::marker::{Destruct, Unpin, Unsize};
use core::mem;
use core::ops::{
@@ -163,6 +179,8 @@ use crate::raw_vec::RawVec;
#[cfg(not(no_global_oom_handling))]
use crate::str::from_boxed_utf8_unchecked;
#[cfg(not(no_global_oom_handling))]
+use crate::string::String;
+#[cfg(not(no_global_oom_handling))]
use crate::vec::Vec;
#[cfg(not(no_thin))]
@@ -172,7 +190,7 @@ pub use thin::ThinBox;
#[cfg(not(no_thin))]
mod thin;
-/// A pointer type for heap allocation.
+/// A pointer type that uniquely owns a heap allocation of type `T`.
///
/// See the [module-level documentation](../../std/boxed/index.html) for more.
#[lang = "owned_box"]
@@ -196,12 +214,13 @@ impl<T> Box<T> {
/// ```
/// let five = Box::new(5);
/// ```
- #[cfg(not(no_global_oom_handling))]
+ #[cfg(all(not(no_global_oom_handling)))]
#[inline(always)]
#[stable(feature = "rust1", since = "1.0.0")]
#[must_use]
pub fn new(x: T) -> Self {
- box x
+ #[rustc_box]
+ Box::new(x)
}
/// Constructs a new box with uninitialized contents.
@@ -256,14 +275,21 @@ impl<T> Box<T> {
Self::new_zeroed_in(Global)
}
- /// Constructs a new `Pin<Box<T>>`. If `T` does not implement `Unpin`, then
+ /// Constructs a new `Pin<Box<T>>`. If `T` does not implement [`Unpin`], then
/// `x` will be pinned in memory and unable to be moved.
+ ///
+ /// Constructing and pinning of the `Box` can also be done in two steps: `Box::pin(x)`
+ /// does the same as <code>[Box::into_pin]\([Box::new]\(x))</code>. Consider using
+ /// [`into_pin`](Box::into_pin) if you already have a `Box<T>`, or if you want to
+ /// construct a (pinned) `Box` in a different way than with [`Box::new`].
#[cfg(not(no_global_oom_handling))]
#[stable(feature = "pin", since = "1.33.0")]
#[must_use]
#[inline(always)]
pub fn pin(x: T) -> Pin<Box<T>> {
- (box x).into()
+ (#[rustc_box]
+ Box::new(x))
+ .into()
}
/// Allocates memory on the heap then places `x` into it,
@@ -543,8 +569,13 @@ impl<T, A: Allocator> Box<T, A> {
unsafe { Ok(Box::from_raw_in(ptr.as_ptr(), alloc)) }
}
- /// Constructs a new `Pin<Box<T, A>>`. If `T` does not implement `Unpin`, then
+ /// Constructs a new `Pin<Box<T, A>>`. If `T` does not implement [`Unpin`], then
/// `x` will be pinned in memory and unable to be moved.
+ ///
+ /// Constructing and pinning of the `Box` can also be done in two steps: `Box::pin_in(x, alloc)`
+ /// does the same as <code>[Box::into_pin]\([Box::new_in]\(x, alloc))</code>. Consider using
+ /// [`into_pin`](Box::into_pin) if you already have a `Box<T, A>`, or if you want to
+ /// construct a (pinned) `Box` in a different way than with [`Box::new_in`].
#[cfg(not(no_global_oom_handling))]
#[unstable(feature = "allocator_api", issue = "32838")]
#[rustc_const_unstable(feature = "const_box", issue = "92521")]
@@ -926,6 +957,7 @@ impl<T: ?Sized> Box<T> {
/// [`Layout`]: crate::Layout
#[stable(feature = "box_raw", since = "1.4.0")]
#[inline]
+ #[must_use = "call `drop(Box::from_raw(ptr))` if you intend to drop the `Box`"]
pub unsafe fn from_raw(raw: *mut T) -> Self {
unsafe { Self::from_raw_in(raw, Global) }
}
@@ -1160,19 +1192,44 @@ impl<T: ?Sized, A: Allocator> Box<T, A> {
unsafe { &mut *mem::ManuallyDrop::new(b).0.as_ptr() }
}
- /// Converts a `Box<T>` into a `Pin<Box<T>>`
+ /// Converts a `Box<T>` into a `Pin<Box<T>>`. If `T` does not implement [`Unpin`], then
+ /// `*boxed` will be pinned in memory and unable to be moved.
///
/// This conversion does not allocate on the heap and happens in place.
///
/// This is also available via [`From`].
- #[unstable(feature = "box_into_pin", issue = "62370")]
+ ///
+ /// Constructing and pinning a `Box` with <code>Box::into_pin([Box::new]\(x))</code>
+ /// can also be written more concisely using <code>[Box::pin]\(x)</code>.
+ /// This `into_pin` method is useful if you already have a `Box<T>`, or you are
+ /// constructing a (pinned) `Box` in a different way than with [`Box::new`].
+ ///
+ /// # Notes
+ ///
+ /// It's not recommended that crates add an impl like `From<Box<T>> for Pin<T>`,
+ /// as it'll introduce an ambiguity when calling `Pin::from`.
+ /// A demonstration of such a poor impl is shown below.
+ ///
+ /// ```compile_fail
+ /// # use std::pin::Pin;
+ /// struct Foo; // A type defined in this crate.
+ /// impl From<Box<()>> for Pin<Foo> {
+ /// fn from(_: Box<()>) -> Pin<Foo> {
+ /// Pin::new(Foo)
+ /// }
+ /// }
+ ///
+ /// let foo = Box::new(());
+ /// let bar = Pin::from(foo);
+ /// ```
+ #[stable(feature = "box_into_pin", since = "1.63.0")]
#[rustc_const_unstable(feature = "const_box", issue = "92521")]
pub const fn into_pin(boxed: Self) -> Pin<Self>
where
A: 'static,
{
// It's not possible to move or replace the insides of a `Pin<Box<T>>`
- // when `T: !Unpin`, so it's safe to pin it directly without any
+ // when `T: !Unpin`, so it's safe to pin it directly without any
// additional requirements.
unsafe { Pin::new_unchecked(boxed) }
}
@@ -1190,7 +1247,8 @@ unsafe impl<#[may_dangle] T: ?Sized, A: Allocator> Drop for Box<T, A> {
impl<T: Default> Default for Box<T> {
/// Creates a `Box<T>`, with the `Default` value for T.
fn default() -> Self {
- box T::default()
+ #[rustc_box]
+ Box::new(T::default())
}
}
@@ -1408,9 +1466,17 @@ impl<T: ?Sized, A: Allocator> const From<Box<T, A>> for Pin<Box<T, A>>
where
A: 'static,
{
- /// Converts a `Box<T>` into a `Pin<Box<T>>`
+ /// Converts a `Box<T>` into a `Pin<Box<T>>`. If `T` does not implement [`Unpin`], then
+ /// `*boxed` will be pinned in memory and unable to be moved.
///
/// This conversion does not allocate on the heap and happens in place.
+ ///
+ /// This is also available via [`Box::into_pin`].
+ ///
+ /// Constructing and pinning a `Box` with <code><Pin<Box\<T>>>::from([Box::new]\(x))</code>
+ /// can also be written more concisely using <code>[Box::pin]\(x)</code>.
+ /// This `From` implementation is useful if you already have a `Box<T>`, or you are
+ /// constructing a (pinned) `Box` in a different way than with [`Box::new`].
fn from(boxed: Box<T, A>) -> Self {
Box::into_pin(boxed)
}
@@ -1422,7 +1488,7 @@ impl<T: Copy> From<&[T]> for Box<[T]> {
/// Converts a `&[T]` into a `Box<[T]>`
///
/// This conversion allocates on the heap
- /// and performs a copy of `slice`.
+ /// and performs a copy of `slice` and its contents.
///
/// # Examples
/// ```rust
@@ -1554,10 +1620,27 @@ impl<T, const N: usize> From<[T; N]> for Box<[T]> {
/// println!("{boxed:?}");
/// ```
fn from(array: [T; N]) -> Box<[T]> {
- box array
+ #[rustc_box]
+ Box::new(array)
}
}
+/// Casts a boxed slice to a boxed array.
+///
+/// # Safety
+///
+/// `boxed_slice.len()` must be exactly `N`.
+unsafe fn boxed_slice_as_array_unchecked<T, A: Allocator, const N: usize>(
+ boxed_slice: Box<[T], A>,
+) -> Box<[T; N], A> {
+ debug_assert_eq!(boxed_slice.len(), N);
+
+ let (ptr, alloc) = Box::into_raw_with_allocator(boxed_slice);
+ // SAFETY: Pointer and allocator came from an existing box,
+ // and our safety condition requires that the length is exactly `N`
+ unsafe { Box::from_raw_in(ptr as *mut [T; N], alloc) }
+}
+
#[stable(feature = "boxed_slice_try_from", since = "1.43.0")]
impl<T, const N: usize> TryFrom<Box<[T]>> for Box<[T; N]> {
type Error = Box<[T]>;
@@ -1573,13 +1656,46 @@ impl<T, const N: usize> TryFrom<Box<[T]>> for Box<[T; N]> {
/// `boxed_slice.len()` does not equal `N`.
fn try_from(boxed_slice: Box<[T]>) -> Result<Self, Self::Error> {
if boxed_slice.len() == N {
- Ok(unsafe { Box::from_raw(Box::into_raw(boxed_slice) as *mut [T; N]) })
+ Ok(unsafe { boxed_slice_as_array_unchecked(boxed_slice) })
} else {
Err(boxed_slice)
}
}
}
+#[cfg(not(no_global_oom_handling))]
+#[stable(feature = "boxed_array_try_from_vec", since = "1.66.0")]
+impl<T, const N: usize> TryFrom<Vec<T>> for Box<[T; N]> {
+ type Error = Vec<T>;
+
+ /// Attempts to convert a `Vec<T>` into a `Box<[T; N]>`.
+ ///
+ /// Like [`Vec::into_boxed_slice`], this is in-place if `vec.capacity() == N`,
+ /// but will require a reallocation otherwise.
+ ///
+ /// # Errors
+ ///
+ /// Returns the original `Vec<T>` in the `Err` variant if
+ /// `boxed_slice.len()` does not equal `N`.
+ ///
+ /// # Examples
+ ///
+ /// This can be used with [`vec!`] to create an array on the heap:
+ ///
+ /// ```
+ /// let state: Box<[f32; 100]> = vec![1.0; 100].try_into().unwrap();
+ /// assert_eq!(state.len(), 100);
+ /// ```
+ fn try_from(vec: Vec<T>) -> Result<Self, Self::Error> {
+ if vec.len() == N {
+ let boxed_slice = vec.into_boxed_slice();
+ Ok(unsafe { boxed_slice_as_array_unchecked(boxed_slice) })
+ } else {
+ Err(vec)
+ }
+ }
+}
+
impl<A: Allocator> Box<dyn Any, A> {
/// Attempt to downcast the box to a concrete type.
///
@@ -1869,7 +1985,7 @@ impl<I: ExactSizeIterator + ?Sized, A: Allocator> ExactSizeIterator for Box<I, A
impl<I: FusedIterator + ?Sized, A: Allocator> FusedIterator for Box<I, A> {}
#[stable(feature = "boxed_closure_impls", since = "1.35.0")]
-impl<Args, F: FnOnce<Args> + ?Sized, A: Allocator> FnOnce<Args> for Box<F, A> {
+impl<Args: Tuple, F: FnOnce<Args> + ?Sized, A: Allocator> FnOnce<Args> for Box<F, A> {
type Output = <F as FnOnce<Args>>::Output;
extern "rust-call" fn call_once(self, args: Args) -> Self::Output {
@@ -1878,20 +1994,20 @@ impl<Args, F: FnOnce<Args> + ?Sized, A: Allocator> FnOnce<Args> for Box<F, A> {
}
#[stable(feature = "boxed_closure_impls", since = "1.35.0")]
-impl<Args, F: FnMut<Args> + ?Sized, A: Allocator> FnMut<Args> for Box<F, A> {
+impl<Args: Tuple, F: FnMut<Args> + ?Sized, A: Allocator> FnMut<Args> for Box<F, A> {
extern "rust-call" fn call_mut(&mut self, args: Args) -> Self::Output {
<F as FnMut<Args>>::call_mut(self, args)
}
}
#[stable(feature = "boxed_closure_impls", since = "1.35.0")]
-impl<Args, F: Fn<Args> + ?Sized, A: Allocator> Fn<Args> for Box<F, A> {
+impl<Args: Tuple, F: Fn<Args> + ?Sized, A: Allocator> Fn<Args> for Box<F, A> {
extern "rust-call" fn call(&self, args: Args) -> Self::Output {
<F as Fn<Args>>::call(self, args)
}
}
-#[unstable(feature = "coerce_unsized", issue = "27732")]
+#[unstable(feature = "coerce_unsized", issue = "18598")]
impl<T: ?Sized + Unsize<U>, U: ?Sized, A: Allocator> CoerceUnsized<Box<U, A>> for Box<T, A> {}
#[unstable(feature = "dispatch_from_dyn", issue = "none")]
@@ -1973,8 +2089,7 @@ impl<T: ?Sized, A: Allocator> AsMut<T> for Box<T, A> {
* could have a method to project a Pin<T> from it.
*/
#[stable(feature = "pin", since = "1.33.0")]
-#[rustc_const_unstable(feature = "const_box", issue = "92521")]
-impl<T: ?Sized, A: Allocator> const Unpin for Box<T, A> where A: 'static {}
+impl<T: ?Sized, A: Allocator> Unpin for Box<T, A> where A: 'static {}
#[unstable(feature = "generator_trait", issue = "43122")]
impl<G: ?Sized + Generator<R> + Unpin, R, A: Allocator> Generator<R> for Box<G, A>
@@ -2026,3 +2141,292 @@ impl<S: ?Sized + AsyncIterator + Unpin> AsyncIterator for Box<S> {
(**self).size_hint()
}
}
+
+impl dyn Error {
+ #[inline]
+ #[stable(feature = "error_downcast", since = "1.3.0")]
+ #[rustc_allow_incoherent_impl]
+ /// Attempts to downcast the box to a concrete type.
+ pub fn downcast<T: Error + 'static>(self: Box<Self>) -> Result<Box<T>, Box<dyn Error>> {
+ if self.is::<T>() {
+ unsafe {
+ let raw: *mut dyn Error = Box::into_raw(self);
+ Ok(Box::from_raw(raw as *mut T))
+ }
+ } else {
+ Err(self)
+ }
+ }
+}
+
+impl dyn Error + Send {
+ #[inline]
+ #[stable(feature = "error_downcast", since = "1.3.0")]
+ #[rustc_allow_incoherent_impl]
+ /// Attempts to downcast the box to a concrete type.
+ pub fn downcast<T: Error + 'static>(self: Box<Self>) -> Result<Box<T>, Box<dyn Error + Send>> {
+ let err: Box<dyn Error> = self;
+ <dyn Error>::downcast(err).map_err(|s| unsafe {
+ // Reapply the `Send` marker.
+ mem::transmute::<Box<dyn Error>, Box<dyn Error + Send>>(s)
+ })
+ }
+}
+
+impl dyn Error + Send + Sync {
+ #[inline]
+ #[stable(feature = "error_downcast", since = "1.3.0")]
+ #[rustc_allow_incoherent_impl]
+ /// Attempts to downcast the box to a concrete type.
+ pub fn downcast<T: Error + 'static>(self: Box<Self>) -> Result<Box<T>, Box<Self>> {
+ let err: Box<dyn Error> = self;
+ <dyn Error>::downcast(err).map_err(|s| unsafe {
+ // Reapply the `Send + Sync` marker.
+ mem::transmute::<Box<dyn Error>, Box<dyn Error + Send + Sync>>(s)
+ })
+ }
+}
+
+#[cfg(not(no_global_oom_handling))]
+#[stable(feature = "rust1", since = "1.0.0")]
+impl<'a, E: Error + 'a> From<E> for Box<dyn Error + 'a> {
+ /// Converts a type of [`Error`] into a box of dyn [`Error`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use std::fmt;
+ /// use std::mem;
+ ///
+ /// #[derive(Debug)]
+ /// struct AnError;
+ ///
+ /// impl fmt::Display for AnError {
+ /// fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ /// write!(f, "An error")
+ /// }
+ /// }
+ ///
+ /// impl Error for AnError {}
+ ///
+ /// let an_error = AnError;
+ /// assert!(0 == mem::size_of_val(&an_error));
+ /// let a_boxed_error = Box::<dyn Error>::from(an_error);
+ /// assert!(mem::size_of::<Box<dyn Error>>() == mem::size_of_val(&a_boxed_error))
+ /// ```
+ fn from(err: E) -> Box<dyn Error + 'a> {
+ Box::new(err)
+ }
+}
+
+#[cfg(not(no_global_oom_handling))]
+#[stable(feature = "rust1", since = "1.0.0")]
+impl<'a, E: Error + Send + Sync + 'a> From<E> for Box<dyn Error + Send + Sync + 'a> {
+ /// Converts a type of [`Error`] + [`Send`] + [`Sync`] into a box of
+ /// dyn [`Error`] + [`Send`] + [`Sync`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use std::fmt;
+ /// use std::mem;
+ ///
+ /// #[derive(Debug)]
+ /// struct AnError;
+ ///
+ /// impl fmt::Display for AnError {
+ /// fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ /// write!(f, "An error")
+ /// }
+ /// }
+ ///
+ /// impl Error for AnError {}
+ ///
+ /// unsafe impl Send for AnError {}
+ ///
+ /// unsafe impl Sync for AnError {}
+ ///
+ /// let an_error = AnError;
+ /// assert!(0 == mem::size_of_val(&an_error));
+ /// let a_boxed_error = Box::<dyn Error + Send + Sync>::from(an_error);
+ /// assert!(
+ /// mem::size_of::<Box<dyn Error + Send + Sync>>() == mem::size_of_val(&a_boxed_error))
+ /// ```
+ fn from(err: E) -> Box<dyn Error + Send + Sync + 'a> {
+ Box::new(err)
+ }
+}
+
+#[cfg(not(no_global_oom_handling))]
+#[stable(feature = "rust1", since = "1.0.0")]
+impl From<String> for Box<dyn Error + Send + Sync> {
+ /// Converts a [`String`] into a box of dyn [`Error`] + [`Send`] + [`Sync`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use std::mem;
+ ///
+ /// let a_string_error = "a string error".to_string();
+ /// let a_boxed_error = Box::<dyn Error + Send + Sync>::from(a_string_error);
+ /// assert!(
+ /// mem::size_of::<Box<dyn Error + Send + Sync>>() == mem::size_of_val(&a_boxed_error))
+ /// ```
+ #[inline]
+ fn from(err: String) -> Box<dyn Error + Send + Sync> {
+ struct StringError(String);
+
+ impl Error for StringError {
+ #[allow(deprecated)]
+ fn description(&self) -> &str {
+ &self.0
+ }
+ }
+
+ impl fmt::Display for StringError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Display::fmt(&self.0, f)
+ }
+ }
+
+ // Purposefully skip printing "StringError(..)"
+ impl fmt::Debug for StringError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Debug::fmt(&self.0, f)
+ }
+ }
+
+ Box::new(StringError(err))
+ }
+}
+
+#[cfg(not(no_global_oom_handling))]
+#[stable(feature = "string_box_error", since = "1.6.0")]
+impl From<String> for Box<dyn Error> {
+ /// Converts a [`String`] into a box of dyn [`Error`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use std::mem;
+ ///
+ /// let a_string_error = "a string error".to_string();
+ /// let a_boxed_error = Box::<dyn Error>::from(a_string_error);
+ /// assert!(mem::size_of::<Box<dyn Error>>() == mem::size_of_val(&a_boxed_error))
+ /// ```
+ fn from(str_err: String) -> Box<dyn Error> {
+ let err1: Box<dyn Error + Send + Sync> = From::from(str_err);
+ let err2: Box<dyn Error> = err1;
+ err2
+ }
+}
+
+#[cfg(not(no_global_oom_handling))]
+#[stable(feature = "rust1", since = "1.0.0")]
+impl<'a> From<&str> for Box<dyn Error + Send + Sync + 'a> {
+ /// Converts a [`str`] into a box of dyn [`Error`] + [`Send`] + [`Sync`].
+ ///
+ /// [`str`]: prim@str
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use std::mem;
+ ///
+ /// let a_str_error = "a str error";
+ /// let a_boxed_error = Box::<dyn Error + Send + Sync>::from(a_str_error);
+ /// assert!(
+ /// mem::size_of::<Box<dyn Error + Send + Sync>>() == mem::size_of_val(&a_boxed_error))
+ /// ```
+ #[inline]
+ fn from(err: &str) -> Box<dyn Error + Send + Sync + 'a> {
+ From::from(String::from(err))
+ }
+}
+
+#[cfg(not(no_global_oom_handling))]
+#[stable(feature = "string_box_error", since = "1.6.0")]
+impl From<&str> for Box<dyn Error> {
+ /// Converts a [`str`] into a box of dyn [`Error`].
+ ///
+ /// [`str`]: prim@str
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use std::mem;
+ ///
+ /// let a_str_error = "a str error";
+ /// let a_boxed_error = Box::<dyn Error>::from(a_str_error);
+ /// assert!(mem::size_of::<Box<dyn Error>>() == mem::size_of_val(&a_boxed_error))
+ /// ```
+ fn from(err: &str) -> Box<dyn Error> {
+ From::from(String::from(err))
+ }
+}
+
+#[cfg(not(no_global_oom_handling))]
+#[stable(feature = "cow_box_error", since = "1.22.0")]
+impl<'a, 'b> From<Cow<'b, str>> for Box<dyn Error + Send + Sync + 'a> {
+ /// Converts a [`Cow`] into a box of dyn [`Error`] + [`Send`] + [`Sync`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use std::mem;
+ /// use std::borrow::Cow;
+ ///
+ /// let a_cow_str_error = Cow::from("a str error");
+ /// let a_boxed_error = Box::<dyn Error + Send + Sync>::from(a_cow_str_error);
+ /// assert!(
+ /// mem::size_of::<Box<dyn Error + Send + Sync>>() == mem::size_of_val(&a_boxed_error))
+ /// ```
+ fn from(err: Cow<'b, str>) -> Box<dyn Error + Send + Sync + 'a> {
+ From::from(String::from(err))
+ }
+}
+
+#[cfg(not(no_global_oom_handling))]
+#[stable(feature = "cow_box_error", since = "1.22.0")]
+impl<'a> From<Cow<'a, str>> for Box<dyn Error> {
+ /// Converts a [`Cow`] into a box of dyn [`Error`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use std::mem;
+ /// use std::borrow::Cow;
+ ///
+ /// let a_cow_str_error = Cow::from("a str error");
+ /// let a_boxed_error = Box::<dyn Error>::from(a_cow_str_error);
+ /// assert!(mem::size_of::<Box<dyn Error>>() == mem::size_of_val(&a_boxed_error))
+ /// ```
+ fn from(err: Cow<'a, str>) -> Box<dyn Error> {
+ From::from(String::from(err))
+ }
+}
+
+#[stable(feature = "box_error", since = "1.8.0")]
+impl<T: core::error::Error> core::error::Error for Box<T> {
+ #[allow(deprecated, deprecated_in_future)]
+ fn description(&self) -> &str {
+ core::error::Error::description(&**self)
+ }
+
+ #[allow(deprecated)]
+ fn cause(&self) -> Option<&dyn core::error::Error> {
+ core::error::Error::cause(&**self)
+ }
+
+ fn source(&self) -> Option<&(dyn core::error::Error + 'static)> {
+ core::error::Error::source(&**self)
+ }
+}
diff --git a/rust/alloc/collections/mod.rs b/rust/alloc/collections/mod.rs
index 1eec265b28f8..2506065d158a 100644
--- a/rust/alloc/collections/mod.rs
+++ b/rust/alloc/collections/mod.rs
@@ -141,7 +141,7 @@ impl Display for TryReserveError {
" because the computed capacity exceeded the collection's maximum"
}
TryReserveErrorKind::AllocError { .. } => {
- " because the memory allocator returned a error"
+ " because the memory allocator returned an error"
}
};
fmt.write_str(reason)
@@ -154,3 +154,6 @@ trait SpecExtend<I: IntoIterator> {
/// Extends `self` with the contents of the given iterator.
fn spec_extend(&mut self, iter: I);
}
+
+#[stable(feature = "try_reserve", since = "1.57.0")]
+impl core::error::Error for TryReserveError {}
diff --git a/rust/alloc/lib.rs b/rust/alloc/lib.rs
index 3aebf83c9967..5f374378b0d4 100644
--- a/rust/alloc/lib.rs
+++ b/rust/alloc/lib.rs
@@ -5,7 +5,7 @@
//! This library provides smart pointers and collections for managing
//! heap-allocated values.
//!
-//! This library, like libcore, normally doesn’t need to be used directly
+//! This library, like core, normally doesn’t need to be used directly
//! since its contents are re-exported in the [`std` crate](../std/index.html).
//! Crates that use the `#![no_std]` attribute however will typically
//! not depend on `std`, so they’d use this crate instead.
@@ -58,10 +58,6 @@
//! [`Rc`]: rc
//! [`RefCell`]: core::cell
-// To run liballoc tests without x.py without ending up with two copies of liballoc, Miri needs to be
-// able to "empty" this crate. See <https://github.com/rust-lang/miri-test-libstd/issues/4>.
-// rustc itself never sets the feature, so this line has no affect there.
-#![cfg(any(not(feature = "miri-test-libstd"), test, doctest))]
#![allow(unused_attributes)]
#![stable(feature = "alloc", since = "1.36.0")]
#![doc(
@@ -75,23 +71,30 @@
any(not(feature = "miri-test-libstd"), test, doctest),
no_global_oom_handling,
not(no_global_oom_handling),
+ not(no_rc),
+ not(no_sync),
target_has_atomic = "ptr"
))]
#![no_std]
#![needs_allocator]
+// To run alloc tests without x.py without ending up with two copies of alloc, Miri needs to be
+// able to "empty" this crate. See <https://github.com/rust-lang/miri-test-libstd/issues/4>.
+// rustc itself never sets the feature, so this line has no affect there.
+#![cfg(any(not(feature = "miri-test-libstd"), test, doctest))]
//
// Lints:
#![deny(unsafe_op_in_unsafe_fn)]
+#![deny(fuzzy_provenance_casts)]
#![warn(deprecated_in_future)]
#![warn(missing_debug_implementations)]
#![warn(missing_docs)]
#![allow(explicit_outlives_requirements)]
//
// Library features:
-#![cfg_attr(not(no_global_oom_handling), feature(alloc_c_string))]
#![feature(alloc_layout_extra)]
#![feature(allocator_api)]
#![feature(array_chunks)]
+#![feature(array_into_iter_constructors)]
#![feature(array_methods)]
#![feature(array_windows)]
#![feature(assert_matches)]
@@ -99,39 +102,53 @@
#![feature(coerce_unsized)]
#![cfg_attr(not(no_global_oom_handling), feature(const_alloc_error))]
#![feature(const_box)]
-#![cfg_attr(not(no_global_oom_handling), feature(const_btree_new))]
+#![cfg_attr(not(no_global_oom_handling), feature(const_btree_len))]
#![cfg_attr(not(no_borrow), feature(const_cow_is_borrowed))]
#![feature(const_convert)]
#![feature(const_size_of_val)]
#![feature(const_align_of_val)]
#![feature(const_ptr_read)]
+#![feature(const_maybe_uninit_zeroed)]
#![feature(const_maybe_uninit_write)]
#![feature(const_maybe_uninit_as_mut_ptr)]
#![feature(const_refs_to_cell)]
-#![feature(core_c_str)]
#![feature(core_intrinsics)]
-#![feature(core_ffi_c)]
+#![feature(core_panic)]
#![feature(const_eval_select)]
#![feature(const_pin)]
+#![feature(const_waker)]
#![feature(cstr_from_bytes_until_nul)]
#![feature(dispatch_from_dyn)]
+#![feature(error_generic_member_access)]
+#![feature(error_in_core)]
#![feature(exact_size_is_empty)]
#![feature(extend_one)]
#![feature(fmt_internals)]
#![feature(fn_traits)]
#![feature(hasher_prefixfree_extras)]
+#![feature(inline_const)]
#![feature(inplace_iteration)]
+#![cfg_attr(test, feature(is_sorted))]
#![feature(iter_advance_by)]
+#![feature(iter_next_chunk)]
+#![feature(iter_repeat_n)]
#![feature(layout_for_ptr)]
#![feature(maybe_uninit_slice)]
+#![feature(maybe_uninit_uninit_array)]
+#![feature(maybe_uninit_uninit_array_transpose)]
#![cfg_attr(test, feature(new_uninit))]
#![feature(nonnull_slice_from_raw_parts)]
#![feature(pattern)]
+#![feature(pointer_byte_offsets)]
+#![feature(provide_any)]
#![feature(ptr_internals)]
#![feature(ptr_metadata)]
#![feature(ptr_sub_ptr)]
#![feature(receiver_trait)]
+#![feature(saturating_int_impl)]
#![feature(set_ptr_value)]
+#![feature(sized_type_properties)]
+#![feature(slice_from_ptr_range)]
#![feature(slice_group_by)]
#![feature(slice_ptr_get)]
#![feature(slice_ptr_len)]
@@ -141,15 +158,17 @@
#![feature(trusted_len)]
#![feature(trusted_random_access)]
#![feature(try_trait_v2)]
+#![feature(tuple_trait)]
#![feature(unchecked_math)]
#![feature(unicode_internals)]
#![feature(unsize)]
+#![feature(utf8_chunks)]
+#![feature(std_internals)]
//
// Language features:
#![feature(allocator_internals)]
#![feature(allow_internal_unstable)]
#![feature(associated_type_bounds)]
-#![feature(box_syntax)]
#![feature(cfg_sanitize)]
#![feature(const_deref)]
#![feature(const_mut_refs)]
@@ -163,19 +182,21 @@
#![cfg_attr(not(test), feature(generator_trait))]
#![feature(hashmap_internals)]
#![feature(lang_items)]
-#![feature(let_else)]
#![feature(min_specialization)]
#![feature(negative_impls)]
#![feature(never_type)]
-#![feature(nll)] // Not necessary, but here to test the `nll` feature.
#![feature(rustc_allow_const_fn_unstable)]
#![feature(rustc_attrs)]
+#![feature(pointer_is_aligned)]
#![feature(slice_internals)]
#![feature(staged_api)]
+#![feature(stmt_expr_attributes)]
#![cfg_attr(test, feature(test))]
#![feature(unboxed_closures)]
#![feature(unsized_fn_params)]
#![feature(c_unwind)]
+#![feature(with_negative_coherence)]
+#![cfg_attr(test, feature(panic_update_hook))]
//
// Rustdoc features:
#![feature(doc_cfg)]
@@ -192,6 +213,8 @@
extern crate std;
#[cfg(test)]
extern crate test;
+#[cfg(test)]
+mod testing;
// Module with internal macros used by other modules (needs to be included before other modules).
#[cfg(not(no_macros))]
@@ -218,7 +241,7 @@ mod boxed {
#[cfg(not(no_borrow))]
pub mod borrow;
pub mod collections;
-#[cfg(not(no_global_oom_handling))]
+#[cfg(all(not(no_rc), not(no_sync), not(no_global_oom_handling)))]
pub mod ffi;
#[cfg(not(no_fmt))]
pub mod fmt;
@@ -229,10 +252,9 @@ pub mod slice;
pub mod str;
#[cfg(not(no_string))]
pub mod string;
-#[cfg(not(no_sync))]
-#[cfg(target_has_atomic = "ptr")]
+#[cfg(all(not(no_rc), not(no_sync), target_has_atomic = "ptr"))]
pub mod sync;
-#[cfg(all(not(no_global_oom_handling), target_has_atomic = "ptr"))]
+#[cfg(all(not(no_global_oom_handling), not(no_rc), not(no_sync), target_has_atomic = "ptr"))]
pub mod task;
#[cfg(test)]
mod tests;
@@ -243,3 +265,20 @@ pub mod vec;
pub mod __export {
pub use core::format_args;
}
+
+#[cfg(test)]
+#[allow(dead_code)] // Not used in all configurations
+pub(crate) mod test_helpers {
+ /// Copied from `std::test_helpers::test_rng`, since these tests rely on the
+ /// seed not being the same for every RNG invocation too.
+ pub(crate) fn test_rng() -> rand_xorshift::XorShiftRng {
+ use std::hash::{BuildHasher, Hash, Hasher};
+ let mut hasher = std::collections::hash_map::RandomState::new().build_hasher();
+ std::panic::Location::caller().hash(&mut hasher);
+ let hc64 = hasher.finish();
+ let seed_vec =
+ hc64.to_le_bytes().into_iter().chain(0u8..8).collect::<crate::vec::Vec<u8>>();
+ let seed: [u8; 16] = seed_vec.as_slice().try_into().unwrap();
+ rand::SeedableRng::from_seed(seed)
+ }
+}
diff --git a/rust/alloc/raw_vec.rs b/rust/alloc/raw_vec.rs
index eb77db5def55..5db87eac53b7 100644
--- a/rust/alloc/raw_vec.rs
+++ b/rust/alloc/raw_vec.rs
@@ -5,7 +5,7 @@
use core::alloc::LayoutError;
use core::cmp;
use core::intrinsics;
-use core::mem::{self, ManuallyDrop, MaybeUninit};
+use core::mem::{self, ManuallyDrop, MaybeUninit, SizedTypeProperties};
use core::ops::Drop;
use core::ptr::{self, NonNull, Unique};
use core::slice;
@@ -177,7 +177,7 @@ impl<T, A: Allocator> RawVec<T, A> {
#[cfg(not(no_global_oom_handling))]
fn allocate_in(capacity: usize, init: AllocInit, alloc: A) -> Self {
// Don't allocate here because `Drop` will not deallocate when `capacity` is 0.
- if mem::size_of::<T>() == 0 || capacity == 0 {
+ if T::IS_ZST || capacity == 0 {
Self::new_in(alloc)
} else {
// We avoid `unwrap_or_else` here because it bloats the amount of
@@ -212,7 +212,7 @@ impl<T, A: Allocator> RawVec<T, A> {
fn try_allocate_in(capacity: usize, init: AllocInit, alloc: A) -> Result<Self, TryReserveError> {
// Don't allocate here because `Drop` will not deallocate when `capacity` is 0.
- if mem::size_of::<T>() == 0 || capacity == 0 {
+ if T::IS_ZST || capacity == 0 {
return Ok(Self::new_in(alloc));
}
@@ -262,7 +262,7 @@ impl<T, A: Allocator> RawVec<T, A> {
/// This will always be `usize::MAX` if `T` is zero-sized.
#[inline(always)]
pub fn capacity(&self) -> usize {
- if mem::size_of::<T>() == 0 { usize::MAX } else { self.cap }
+ if T::IS_ZST { usize::MAX } else { self.cap }
}
/// Returns a shared reference to the allocator backing this `RawVec`.
@@ -271,7 +271,7 @@ impl<T, A: Allocator> RawVec<T, A> {
}
fn current_memory(&self) -> Option<(NonNull<u8>, Layout)> {
- if mem::size_of::<T>() == 0 || self.cap == 0 {
+ if T::IS_ZST || self.cap == 0 {
None
} else {
// We have an allocated chunk of memory, so we can bypass runtime
@@ -419,7 +419,7 @@ impl<T, A: Allocator> RawVec<T, A> {
// This is ensured by the calling contexts.
debug_assert!(additional > 0);
- if mem::size_of::<T>() == 0 {
+ if T::IS_ZST {
// Since we return a capacity of `usize::MAX` when `elem_size` is
// 0, getting to here necessarily means the `RawVec` is overfull.
return Err(CapacityOverflow.into());
@@ -445,7 +445,7 @@ impl<T, A: Allocator> RawVec<T, A> {
// `grow_amortized`, but this method is usually instantiated less often so
// it's less critical.
fn grow_exact(&mut self, len: usize, additional: usize) -> Result<(), TryReserveError> {
- if mem::size_of::<T>() == 0 {
+ if T::IS_ZST {
// Since we return a capacity of `usize::MAX` when the type size is
// 0, getting to here necessarily means the `RawVec` is overfull.
return Err(CapacityOverflow.into());
@@ -460,7 +460,7 @@ impl<T, A: Allocator> RawVec<T, A> {
Ok(())
}
- #[allow(dead_code)]
+ #[cfg(not(no_global_oom_handling))]
fn shrink(&mut self, cap: usize) -> Result<(), TryReserveError> {
assert!(cap <= self.capacity(), "Tried to shrink to a larger capacity");
diff --git a/rust/alloc/slice.rs b/rust/alloc/slice.rs
index e444e97fa145..245e01590df7 100644
--- a/rust/alloc/slice.rs
+++ b/rust/alloc/slice.rs
@@ -1,84 +1,14 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
-//! A dynamically-sized view into a contiguous sequence, `[T]`.
+//! Utilities for the slice primitive type.
//!
//! *[See also the slice primitive type](slice).*
//!
-//! Slices are a view into a block of memory represented as a pointer and a
-//! length.
+//! Most of the structs in this module are iterator types which can only be created
+//! using a certain function. For example, `slice.iter()` yields an [`Iter`].
//!
-//! ```
-//! // slicing a Vec
-//! let vec = vec![1, 2, 3];
-//! let int_slice = &vec[..];
-//! // coercing an array to a slice
-//! let str_slice: &[&str] = &["one", "two", "three"];
-//! ```
-//!
-//! Slices are either mutable or shared. The shared slice type is `&[T]`,
-//! while the mutable slice type is `&mut [T]`, where `T` represents the element
-//! type. For example, you can mutate the block of memory that a mutable slice
-//! points to:
-//!
-//! ```
-//! let x = &mut [1, 2, 3];
-//! x[1] = 7;
-//! assert_eq!(x, &[1, 7, 3]);
-//! ```
-//!
-//! Here are some of the things this module contains:
-//!
-//! ## Structs
-//!
-//! There are several structs that are useful for slices, such as [`Iter`], which
-//! represents iteration over a slice.
-//!
-//! ## Trait Implementations
-//!
-//! There are several implementations of common traits for slices. Some examples
-//! include:
-//!
-//! * [`Clone`]
-//! * [`Eq`], [`Ord`] - for slices whose element type are [`Eq`] or [`Ord`].
-//! * [`Hash`] - for slices whose element type is [`Hash`].
-//!
-//! ## Iteration
-//!
-//! The slices implement `IntoIterator`. The iterator yields references to the
-//! slice elements.
-//!
-//! ```
-//! let numbers = &[0, 1, 2];
-//! for n in numbers {
-//! println!("{n} is a number!");
-//! }
-//! ```
-//!
-//! The mutable slice yields mutable references to the elements:
-//!
-//! ```
-//! let mut scores = [7, 8, 9];
-//! for score in &mut scores[..] {
-//! *score += 1;
-//! }
-//! ```
-//!
-//! This iterator yields mutable references to the slice's elements, so while
-//! the element type of the slice is `i32`, the element type of the iterator is
-//! `&mut i32`.
-//!
-//! * [`.iter`] and [`.iter_mut`] are the explicit methods to return the default
-//! iterators.
-//! * Further methods that return iterators are [`.split`], [`.splitn`],
-//! [`.chunks`], [`.windows`] and more.
-//!
-//! [`Hash`]: core::hash::Hash
-//! [`.iter`]: slice::iter
-//! [`.iter_mut`]: slice::iter_mut
-//! [`.split`]: slice::split
-//! [`.splitn`]: slice::splitn
-//! [`.chunks`]: slice::chunks
-//! [`.windows`]: slice::windows
+//! A few functions are provided to create a slice from a value reference
+//! or from a raw pointer.
#![stable(feature = "rust1", since = "1.0.0")]
// Many of the usings in this module are only used in the test configuration.
// It's cleaner to just turn off the unused_imports warning than to fix them.
@@ -88,20 +18,23 @@ use core::borrow::{Borrow, BorrowMut};
#[cfg(not(no_global_oom_handling))]
use core::cmp::Ordering::{self, Less};
#[cfg(not(no_global_oom_handling))]
-use core::mem;
-#[cfg(not(no_global_oom_handling))]
-use core::mem::size_of;
+use core::mem::{self, SizedTypeProperties};
#[cfg(not(no_global_oom_handling))]
use core::ptr;
+#[cfg(not(no_global_oom_handling))]
+use core::slice::sort;
use crate::alloc::Allocator;
#[cfg(not(no_global_oom_handling))]
-use crate::alloc::Global;
+use crate::alloc::{self, Global};
#[cfg(not(no_global_oom_handling))]
use crate::borrow::ToOwned;
use crate::boxed::Box;
use crate::vec::Vec;
+#[cfg(test)]
+mod tests;
+
#[unstable(feature = "slice_range", issue = "76393")]
pub use core::slice::range;
#[unstable(feature = "array_chunks", issue = "74985")]
@@ -116,6 +49,8 @@ pub use core::slice::EscapeAscii;
pub use core::slice::SliceIndex;
#[stable(feature = "from_ref", since = "1.28.0")]
pub use core::slice::{from_mut, from_ref};
+#[unstable(feature = "slice_from_ptr_range", issue = "89792")]
+pub use core::slice::{from_mut_ptr_range, from_ptr_range};
#[stable(feature = "rust1", since = "1.0.0")]
pub use core::slice::{from_raw_parts, from_raw_parts_mut};
#[stable(feature = "rust1", since = "1.0.0")]
@@ -275,7 +210,7 @@ impl<T> [T] {
where
T: Ord,
{
- merge_sort(self, |a, b| a.lt(b));
+ stable_sort(self, T::lt);
}
/// Sorts the slice with a comparator function.
@@ -331,7 +266,7 @@ impl<T> [T] {
where
F: FnMut(&T, &T) -> Ordering,
{
- merge_sort(self, |a, b| compare(a, b) == Less);
+ stable_sort(self, |a, b| compare(a, b) == Less);
}
/// Sorts the slice with a key extraction function.
@@ -374,7 +309,7 @@ impl<T> [T] {
F: FnMut(&T) -> K,
K: Ord,
{
- merge_sort(self, |a, b| f(a).lt(&f(b)));
+ stable_sort(self, |a, b| f(a).lt(&f(b)));
}
/// Sorts the slice with a key extraction function.
@@ -530,7 +465,7 @@ impl<T> [T] {
hack::into_vec(self)
}
- /// Creates a vector by repeating a slice `n` times.
+ /// Creates a vector by copying a slice `n` times.
///
/// # Panics
///
@@ -725,7 +660,7 @@ impl [u8] {
///
/// ```error
/// error[E0207]: the type parameter `T` is not constrained by the impl trait, self type, or predica
-/// --> src/liballoc/slice.rs:608:6
+/// --> library/alloc/src/slice.rs:608:6
/// |
/// 608 | impl<T: Clone, V: Borrow<[T]>> Concat for [V] {
/// | ^ unconstrained type parameter
@@ -836,14 +771,14 @@ impl<T: Clone, V: Borrow<[T]>> Join<&[T]> for [V] {
////////////////////////////////////////////////////////////////////////////////
#[stable(feature = "rust1", since = "1.0.0")]
-impl<T> Borrow<[T]> for Vec<T> {
+impl<T, A: Allocator> Borrow<[T]> for Vec<T, A> {
fn borrow(&self) -> &[T] {
&self[..]
}
}
#[stable(feature = "rust1", since = "1.0.0")]
-impl<T> BorrowMut<[T]> for Vec<T> {
+impl<T, A: Allocator> BorrowMut<[T]> for Vec<T, A> {
fn borrow_mut(&mut self) -> &mut [T] {
&mut self[..]
}
@@ -881,324 +816,52 @@ impl<T: Clone> ToOwned for [T] {
// Sorting
////////////////////////////////////////////////////////////////////////////////
-/// Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted.
-///
-/// This is the integral subroutine of insertion sort.
+#[inline]
#[cfg(not(no_global_oom_handling))]
-fn insert_head<T, F>(v: &mut [T], is_less: &mut F)
+fn stable_sort<T, F>(v: &mut [T], mut is_less: F)
where
F: FnMut(&T, &T) -> bool,
{
- if v.len() >= 2 && is_less(&v[1], &v[0]) {
- unsafe {
- // There are three ways to implement insertion here:
- //
- // 1. Swap adjacent elements until the first one gets to its final destination.
- // However, this way we copy data around more than is necessary. If elements are big
- // structures (costly to copy), this method will be slow.
- //
- // 2. Iterate until the right place for the first element is found. Then shift the
- // elements succeeding it to make room for it and finally place it into the
- // remaining hole. This is a good method.
- //
- // 3. Copy the first element into a temporary variable. Iterate until the right place
- // for it is found. As we go along, copy every traversed element into the slot
- // preceding it. Finally, copy data from the temporary variable into the remaining
- // hole. This method is very good. Benchmarks demonstrated slightly better
- // performance than with the 2nd method.
- //
- // All methods were benchmarked, and the 3rd showed best results. So we chose that one.
- let tmp = mem::ManuallyDrop::new(ptr::read(&v[0]));
-
- // Intermediate state of the insertion process is always tracked by `hole`, which
- // serves two purposes:
- // 1. Protects integrity of `v` from panics in `is_less`.
- // 2. Fills the remaining hole in `v` in the end.
- //
- // Panic safety:
- //
- // If `is_less` panics at any point during the process, `hole` will get dropped and
- // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it
- // initially held exactly once.
- let mut hole = InsertionHole { src: &*tmp, dest: &mut v[1] };
- ptr::copy_nonoverlapping(&v[1], &mut v[0], 1);
-
- for i in 2..v.len() {
- if !is_less(&v[i], &*tmp) {
- break;
- }
- ptr::copy_nonoverlapping(&v[i], &mut v[i - 1], 1);
- hole.dest = &mut v[i];
- }
- // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
- }
- }
-
- // When dropped, copies from `src` into `dest`.
- struct InsertionHole<T> {
- src: *const T,
- dest: *mut T,
- }
-
- impl<T> Drop for InsertionHole<T> {
- fn drop(&mut self) {
- unsafe {
- ptr::copy_nonoverlapping(self.src, self.dest, 1);
- }
- }
+ if T::IS_ZST {
+ // Sorting has no meaningful behavior on zero-sized types. Do nothing.
+ return;
}
-}
-
-/// Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and
-/// stores the result into `v[..]`.
-///
-/// # Safety
-///
-/// The two slices must be non-empty and `mid` must be in bounds. Buffer `buf` must be long enough
-/// to hold a copy of the shorter slice. Also, `T` must not be a zero-sized type.
-#[cfg(not(no_global_oom_handling))]
-unsafe fn merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, is_less: &mut F)
-where
- F: FnMut(&T, &T) -> bool,
-{
- let len = v.len();
- let v = v.as_mut_ptr();
- let (v_mid, v_end) = unsafe { (v.add(mid), v.add(len)) };
- // The merge process first copies the shorter run into `buf`. Then it traces the newly copied
- // run and the longer run forwards (or backwards), comparing their next unconsumed elements and
- // copying the lesser (or greater) one into `v`.
- //
- // As soon as the shorter run is fully consumed, the process is done. If the longer run gets
- // consumed first, then we must copy whatever is left of the shorter run into the remaining
- // hole in `v`.
- //
- // Intermediate state of the process is always tracked by `hole`, which serves two purposes:
- // 1. Protects integrity of `v` from panics in `is_less`.
- // 2. Fills the remaining hole in `v` if the longer run gets consumed first.
- //
- // Panic safety:
- //
- // If `is_less` panics at any point during the process, `hole` will get dropped and fill the
- // hole in `v` with the unconsumed range in `buf`, thus ensuring that `v` still holds every
- // object it initially held exactly once.
- let mut hole;
+ let elem_alloc_fn = |len: usize| -> *mut T {
+ // SAFETY: Creating the layout is safe as long as merge_sort never calls this with len >
+ // v.len(). Alloc in general will only be used as 'shadow-region' to store temporary swap
+ // elements.
+ unsafe { alloc::alloc(alloc::Layout::array::<T>(len).unwrap_unchecked()) as *mut T }
+ };
- if mid <= len - mid {
- // The left run is shorter.
+ let elem_dealloc_fn = |buf_ptr: *mut T, len: usize| {
+ // SAFETY: Creating the layout is safe as long as merge_sort never calls this with len >
+ // v.len(). The caller must ensure that buf_ptr was created by elem_alloc_fn with the same
+ // len.
unsafe {
- ptr::copy_nonoverlapping(v, buf, mid);
- hole = MergeHole { start: buf, end: buf.add(mid), dest: v };
+ alloc::dealloc(buf_ptr as *mut u8, alloc::Layout::array::<T>(len).unwrap_unchecked());
}
+ };
- // Initially, these pointers point to the beginnings of their arrays.
- let left = &mut hole.start;
- let mut right = v_mid;
- let out = &mut hole.dest;
-
- while *left < hole.end && right < v_end {
- // Consume the lesser side.
- // If equal, prefer the left run to maintain stability.
- unsafe {
- let to_copy = if is_less(&*right, &**left) {
- get_and_increment(&mut right)
- } else {
- get_and_increment(left)
- };
- ptr::copy_nonoverlapping(to_copy, get_and_increment(out), 1);
- }
- }
- } else {
- // The right run is shorter.
+ let run_alloc_fn = |len: usize| -> *mut sort::TimSortRun {
+ // SAFETY: Creating the layout is safe as long as merge_sort never calls this with an
+ // obscene length or 0.
unsafe {
- ptr::copy_nonoverlapping(v_mid, buf, len - mid);
- hole = MergeHole { start: buf, end: buf.add(len - mid), dest: v_mid };
+ alloc::alloc(alloc::Layout::array::<sort::TimSortRun>(len).unwrap_unchecked())
+ as *mut sort::TimSortRun
}
+ };
- // Initially, these pointers point past the ends of their arrays.
- let left = &mut hole.dest;
- let right = &mut hole.end;
- let mut out = v_end;
-
- while v < *left && buf < *right {
- // Consume the greater side.
- // If equal, prefer the right run to maintain stability.
- unsafe {
- let to_copy = if is_less(&*right.offset(-1), &*left.offset(-1)) {
- decrement_and_get(left)
- } else {
- decrement_and_get(right)
- };
- ptr::copy_nonoverlapping(to_copy, decrement_and_get(&mut out), 1);
- }
- }
- }
- // Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of
- // it will now be copied into the hole in `v`.
-
- unsafe fn get_and_increment<T>(ptr: &mut *mut T) -> *mut T {
- let old = *ptr;
- *ptr = unsafe { ptr.offset(1) };
- old
- }
-
- unsafe fn decrement_and_get<T>(ptr: &mut *mut T) -> *mut T {
- *ptr = unsafe { ptr.offset(-1) };
- *ptr
- }
-
- // When dropped, copies the range `start..end` into `dest..`.
- struct MergeHole<T> {
- start: *mut T,
- end: *mut T,
- dest: *mut T,
- }
-
- impl<T> Drop for MergeHole<T> {
- fn drop(&mut self) {
- // `T` is not a zero-sized type, and these are pointers into a slice's elements.
- unsafe {
- let len = self.end.sub_ptr(self.start);
- ptr::copy_nonoverlapping(self.start, self.dest, len);
- }
- }
- }
-}
-
-/// This merge sort borrows some (but not all) ideas from TimSort, which is described in detail
-/// [here](https://github.com/python/cpython/blob/main/Objects/listsort.txt).
-///
-/// The algorithm identifies strictly descending and non-descending subsequences, which are called
-/// natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed
-/// onto the stack, and then some pairs of adjacent runs are merged until these two invariants are
-/// satisfied:
-///
-/// 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len`
-/// 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len`
-///
-/// The invariants ensure that the total running time is *O*(*n* \* log(*n*)) worst-case.
-#[cfg(not(no_global_oom_handling))]
-fn merge_sort<T, F>(v: &mut [T], mut is_less: F)
-where
- F: FnMut(&T, &T) -> bool,
-{
- // Slices of up to this length get sorted using insertion sort.
- const MAX_INSERTION: usize = 20;
- // Very short runs are extended using insertion sort to span at least this many elements.
- const MIN_RUN: usize = 10;
-
- // Sorting has no meaningful behavior on zero-sized types.
- if size_of::<T>() == 0 {
- return;
- }
-
- let len = v.len();
-
- // Short arrays get sorted in-place via insertion sort to avoid allocations.
- if len <= MAX_INSERTION {
- if len >= 2 {
- for i in (0..len - 1).rev() {
- insert_head(&mut v[i..], &mut is_less);
- }
- }
- return;
- }
-
- // Allocate a buffer to use as scratch memory. We keep the length 0 so we can keep in it
- // shallow copies of the contents of `v` without risking the dtors running on copies if
- // `is_less` panics. When merging two sorted runs, this buffer holds a copy of the shorter run,
- // which will always have length at most `len / 2`.
- let mut buf = Vec::with_capacity(len / 2);
-
- // In order to identify natural runs in `v`, we traverse it backwards. That might seem like a
- // strange decision, but consider the fact that merges more often go in the opposite direction
- // (forwards). According to benchmarks, merging forwards is slightly faster than merging
- // backwards. To conclude, identifying runs by traversing backwards improves performance.
- let mut runs = vec![];
- let mut end = len;
- while end > 0 {
- // Find the next natural run, and reverse it if it's strictly descending.
- let mut start = end - 1;
- if start > 0 {
- start -= 1;
- unsafe {
- if is_less(v.get_unchecked(start + 1), v.get_unchecked(start)) {
- while start > 0 && is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) {
- start -= 1;
- }
- v[start..end].reverse();
- } else {
- while start > 0 && !is_less(v.get_unchecked(start), v.get_unchecked(start - 1))
- {
- start -= 1;
- }
- }
- }
- }
-
- // Insert some more elements into the run if it's too short. Insertion sort is faster than
- // merge sort on short sequences, so this significantly improves performance.
- while start > 0 && end - start < MIN_RUN {
- start -= 1;
- insert_head(&mut v[start..end], &mut is_less);
- }
-
- // Push this run onto the stack.
- runs.push(Run { start, len: end - start });
- end = start;
-
- // Merge some pairs of adjacent runs to satisfy the invariants.
- while let Some(r) = collapse(&runs) {
- let left = runs[r + 1];
- let right = runs[r];
- unsafe {
- merge(
- &mut v[left.start..right.start + right.len],
- left.len,
- buf.as_mut_ptr(),
- &mut is_less,
- );
- }
- runs[r] = Run { start: left.start, len: left.len + right.len };
- runs.remove(r + 1);
- }
- }
-
- // Finally, exactly one run must remain in the stack.
- debug_assert!(runs.len() == 1 && runs[0].start == 0 && runs[0].len == len);
-
- // Examines the stack of runs and identifies the next pair of runs to merge. More specifically,
- // if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the
- // algorithm should continue building a new run instead, `None` is returned.
- //
- // TimSort is infamous for its buggy implementations, as described here:
- // http://envisage-project.eu/timsort-specification-and-verification/
- //
- // The gist of the story is: we must enforce the invariants on the top four runs on the stack.
- // Enforcing them on just top three is not sufficient to ensure that the invariants will still
- // hold for *all* runs in the stack.
- //
- // This function correctly checks invariants for the top four runs. Additionally, if the top
- // run starts at index 0, it will always demand a merge operation until the stack is fully
- // collapsed, in order to complete the sort.
- #[inline]
- fn collapse(runs: &[Run]) -> Option<usize> {
- let n = runs.len();
- if n >= 2
- && (runs[n - 1].start == 0
- || runs[n - 2].len <= runs[n - 1].len
- || (n >= 3 && runs[n - 3].len <= runs[n - 2].len + runs[n - 1].len)
- || (n >= 4 && runs[n - 4].len <= runs[n - 3].len + runs[n - 2].len))
- {
- if n >= 3 && runs[n - 3].len < runs[n - 1].len { Some(n - 3) } else { Some(n - 2) }
- } else {
- None
+ let run_dealloc_fn = |buf_ptr: *mut sort::TimSortRun, len: usize| {
+ // SAFETY: The caller must ensure that buf_ptr was created by elem_alloc_fn with the same
+ // len.
+ unsafe {
+ alloc::dealloc(
+ buf_ptr as *mut u8,
+ alloc::Layout::array::<sort::TimSortRun>(len).unwrap_unchecked(),
+ );
}
- }
+ };
- #[derive(Clone, Copy)]
- struct Run {
- start: usize,
- len: usize,
- }
+ sort::merge_sort(v, &mut is_less, elem_alloc_fn, elem_dealloc_fn, run_alloc_fn, run_dealloc_fn);
}
diff --git a/rust/alloc/vec/drain.rs b/rust/alloc/vec/drain.rs
index b6a5f98e4fcd..d503d2f478ce 100644
--- a/rust/alloc/vec/drain.rs
+++ b/rust/alloc/vec/drain.rs
@@ -3,7 +3,7 @@
use crate::alloc::{Allocator, Global};
use core::fmt;
use core::iter::{FusedIterator, TrustedLen};
-use core::mem;
+use core::mem::{self, ManuallyDrop, SizedTypeProperties};
use core::ptr::{self, NonNull};
use core::slice::{self};
@@ -67,6 +67,77 @@ impl<'a, T, A: Allocator> Drain<'a, T, A> {
pub fn allocator(&self) -> &A {
unsafe { self.vec.as_ref().allocator() }
}
+
+ /// Keep unyielded elements in the source `Vec`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// #![feature(drain_keep_rest)]
+ ///
+ /// let mut vec = vec!['a', 'b', 'c'];
+ /// let mut drain = vec.drain(..);
+ ///
+ /// assert_eq!(drain.next().unwrap(), 'a');
+ ///
+ /// // This call keeps 'b' and 'c' in the vec.
+ /// drain.keep_rest();
+ ///
+ /// // If we wouldn't call `keep_rest()`,
+ /// // `vec` would be empty.
+ /// assert_eq!(vec, ['b', 'c']);
+ /// ```
+ #[unstable(feature = "drain_keep_rest", issue = "101122")]
+ pub fn keep_rest(self) {
+ // At this moment layout looks like this:
+ //
+ // [head] [yielded by next] [unyielded] [yielded by next_back] [tail]
+ // ^-- start \_________/-- unyielded_len \____/-- self.tail_len
+ // ^-- unyielded_ptr ^-- tail
+ //
+ // Normally `Drop` impl would drop [unyielded] and then move [tail] to the `start`.
+ // Here we want to
+ // 1. Move [unyielded] to `start`
+ // 2. Move [tail] to a new start at `start + len(unyielded)`
+ // 3. Update length of the original vec to `len(head) + len(unyielded) + len(tail)`
+ // a. In case of ZST, this is the only thing we want to do
+ // 4. Do *not* drop self, as everything is put in a consistent state already, there is nothing to do
+ let mut this = ManuallyDrop::new(self);
+
+ unsafe {
+ let source_vec = this.vec.as_mut();
+
+ let start = source_vec.len();
+ let tail = this.tail_start;
+
+ let unyielded_len = this.iter.len();
+ let unyielded_ptr = this.iter.as_slice().as_ptr();
+
+ // ZSTs have no identity, so we don't need to move them around.
+ let needs_move = mem::size_of::<T>() != 0;
+
+ if needs_move {
+ let start_ptr = source_vec.as_mut_ptr().add(start);
+
+ // memmove back unyielded elements
+ if unyielded_ptr != start_ptr {
+ let src = unyielded_ptr;
+ let dst = start_ptr;
+
+ ptr::copy(src, dst, unyielded_len);
+ }
+
+ // memmove back untouched tail
+ if tail != (start + unyielded_len) {
+ let src = source_vec.as_ptr().add(tail);
+ let dst = start_ptr.add(unyielded_len);
+ ptr::copy(src, dst, this.tail_len);
+ }
+ }
+
+ source_vec.set_len(start + unyielded_len + this.tail_len);
+ }
+ }
}
#[stable(feature = "vec_drain_as_slice", since = "1.46.0")]
@@ -133,7 +204,7 @@ impl<T, A: Allocator> Drop for Drain<'_, T, A> {
let mut vec = self.vec;
- if mem::size_of::<T>() == 0 {
+ if T::IS_ZST {
// ZSTs have no identity, so we don't need to move them around, we only need to drop the correct amount.
// this can be achieved by manipulating the Vec length instead of moving values out from `iter`.
unsafe {
@@ -154,9 +225,9 @@ impl<T, A: Allocator> Drop for Drain<'_, T, A> {
}
// as_slice() must only be called when iter.len() is > 0 because
- // vec::Splice modifies vec::Drain fields and may grow the vec which would invalidate
- // the iterator's internal pointers. Creating a reference to deallocated memory
- // is invalid even when it is zero-length
+ // it also gets touched by vec::Splice which may turn it into a dangling pointer
+ // which would make it and the vec pointer point to different allocations which would
+ // lead to invalid pointer arithmetic below.
let drop_ptr = iter.as_slice().as_ptr();
unsafe {
diff --git a/rust/alloc/vec/drain_filter.rs b/rust/alloc/vec/drain_filter.rs
index b04fce041622..4b019220657d 100644
--- a/rust/alloc/vec/drain_filter.rs
+++ b/rust/alloc/vec/drain_filter.rs
@@ -1,8 +1,9 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
use crate::alloc::{Allocator, Global};
-use core::ptr::{self};
-use core::slice::{self};
+use core::mem::{self, ManuallyDrop};
+use core::ptr;
+use core::slice;
use super::Vec;
@@ -56,6 +57,61 @@ where
pub fn allocator(&self) -> &A {
self.vec.allocator()
}
+
+ /// Keep unyielded elements in the source `Vec`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// #![feature(drain_filter)]
+ /// #![feature(drain_keep_rest)]
+ ///
+ /// let mut vec = vec!['a', 'b', 'c'];
+ /// let mut drain = vec.drain_filter(|_| true);
+ ///
+ /// assert_eq!(drain.next().unwrap(), 'a');
+ ///
+ /// // This call keeps 'b' and 'c' in the vec.
+ /// drain.keep_rest();
+ ///
+ /// // If we wouldn't call `keep_rest()`,
+ /// // `vec` would be empty.
+ /// assert_eq!(vec, ['b', 'c']);
+ /// ```
+ #[unstable(feature = "drain_keep_rest", issue = "101122")]
+ pub fn keep_rest(self) {
+ // At this moment layout looks like this:
+ //
+ // _____________________/-- old_len
+ // / \
+ // [kept] [yielded] [tail]
+ // \_______/ ^-- idx
+ // \-- del
+ //
+ // Normally `Drop` impl would drop [tail] (via .for_each(drop), ie still calling `pred`)
+ //
+ // 1. Move [tail] after [kept]
+ // 2. Update length of the original vec to `old_len - del`
+ // a. In case of ZST, this is the only thing we want to do
+ // 3. Do *not* drop self, as everything is put in a consistent state already, there is nothing to do
+ let mut this = ManuallyDrop::new(self);
+
+ unsafe {
+ // ZSTs have no identity, so we don't need to move them around.
+ let needs_move = mem::size_of::<T>() != 0;
+
+ if needs_move && this.idx < this.old_len && this.del > 0 {
+ let ptr = this.vec.as_mut_ptr();
+ let src = ptr.add(this.idx);
+ let dst = src.sub(this.del);
+ let tail_len = this.old_len - this.idx;
+ src.copy_to(dst, tail_len);
+ }
+
+ let new_len = this.old_len - this.del;
+ this.vec.set_len(new_len);
+ }
+ }
}
#[unstable(feature = "drain_filter", reason = "recently added", issue = "43244")]
diff --git a/rust/alloc/vec/into_iter.rs b/rust/alloc/vec/into_iter.rs
index f7a50e76691e..34a2a70d6ded 100644
--- a/rust/alloc/vec/into_iter.rs
+++ b/rust/alloc/vec/into_iter.rs
@@ -3,14 +3,16 @@
#[cfg(not(no_global_oom_handling))]
use super::AsVecIntoIter;
use crate::alloc::{Allocator, Global};
+#[cfg(not(no_global_oom_handling))]
+use crate::collections::VecDeque;
use crate::raw_vec::RawVec;
+use core::array;
use core::fmt;
-use core::intrinsics::arith_offset;
use core::iter::{
FusedIterator, InPlaceIterable, SourceIter, TrustedLen, TrustedRandomAccessNoCoerce,
};
use core::marker::PhantomData;
-use core::mem::{self, ManuallyDrop};
+use core::mem::{self, ManuallyDrop, MaybeUninit, SizedTypeProperties};
#[cfg(not(no_global_oom_handling))]
use core::ops::Deref;
use core::ptr::{self, NonNull};
@@ -40,7 +42,9 @@ pub struct IntoIter<
// to avoid dropping the allocator twice we need to wrap it into ManuallyDrop
pub(super) alloc: ManuallyDrop<A>,
pub(super) ptr: *const T,
- pub(super) end: *const T,
+ pub(super) end: *const T, // If T is a ZST, this is actually ptr+len. This encoding is picked so that
+ // ptr == end is a quick test for the Iterator being empty, that works
+ // for both ZST and non-ZST.
}
#[stable(feature = "vec_intoiter_debug", since = "1.13.0")]
@@ -97,13 +101,16 @@ impl<T, A: Allocator> IntoIter<T, A> {
}
/// Drops remaining elements and relinquishes the backing allocation.
+ /// This method guarantees it won't panic before relinquishing
+ /// the backing allocation.
///
/// This is roughly equivalent to the following, but more efficient
///
/// ```
/// # let mut into_iter = Vec::<u8>::with_capacity(10).into_iter();
+ /// let mut into_iter = std::mem::replace(&mut into_iter, Vec::new().into_iter());
/// (&mut into_iter).for_each(core::mem::drop);
- /// unsafe { core::ptr::write(&mut into_iter, Vec::new().into_iter()); }
+ /// std::mem::forget(into_iter);
/// ```
///
/// This method is used by in-place iteration, refer to the vec::in_place_collect
@@ -120,15 +127,45 @@ impl<T, A: Allocator> IntoIter<T, A> {
self.ptr = self.buf.as_ptr();
self.end = self.buf.as_ptr();
+ // Dropping the remaining elements can panic, so this needs to be
+ // done only after updating the other fields.
unsafe {
ptr::drop_in_place(remaining);
}
}
/// Forgets to Drop the remaining elements while still allowing the backing allocation to be freed.
- #[allow(dead_code)]
pub(crate) fn forget_remaining_elements(&mut self) {
- self.ptr = self.end;
+ // For th ZST case, it is crucial that we mutate `end` here, not `ptr`.
+ // `ptr` must stay aligned, while `end` may be unaligned.
+ self.end = self.ptr;
+ }
+
+ #[cfg(not(no_global_oom_handling))]
+ #[inline]
+ pub(crate) fn into_vecdeque(self) -> VecDeque<T, A> {
+ // Keep our `Drop` impl from dropping the elements and the allocator
+ let mut this = ManuallyDrop::new(self);
+
+ // SAFETY: This allocation originally came from a `Vec`, so it passes
+ // all those checks. We have `this.buf` ≤ `this.ptr` ≤ `this.end`,
+ // so the `sub_ptr`s below cannot wrap, and will produce a well-formed
+ // range. `end` ≤ `buf + cap`, so the range will be in-bounds.
+ // Taking `alloc` is ok because nothing else is going to look at it,
+ // since our `Drop` impl isn't going to run so there's no more code.
+ unsafe {
+ let buf = this.buf.as_ptr();
+ let initialized = if T::IS_ZST {
+ // All the pointers are the same for ZSTs, so it's fine to
+ // say that they're all at the beginning of the "allocation".
+ 0..this.len()
+ } else {
+ this.ptr.sub_ptr(buf)..this.end.sub_ptr(buf)
+ };
+ let cap = this.cap;
+ let alloc = ManuallyDrop::take(&mut this.alloc);
+ VecDeque::from_contiguous_raw_parts_in(buf, initialized, cap, alloc)
+ }
}
}
@@ -150,19 +187,18 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> {
#[inline]
fn next(&mut self) -> Option<T> {
- if self.ptr as *const _ == self.end {
+ if self.ptr == self.end {
None
- } else if mem::size_of::<T>() == 0 {
- // purposefully don't use 'ptr.offset' because for
- // vectors with 0-size elements this would return the
- // same pointer.
- self.ptr = unsafe { arith_offset(self.ptr as *const i8, 1) as *mut T };
+ } else if T::IS_ZST {
+ // `ptr` has to stay where it is to remain aligned, so we reduce the length by 1 by
+ // reducing the `end`.
+ self.end = self.end.wrapping_byte_sub(1);
// Make up a value of this ZST.
Some(unsafe { mem::zeroed() })
} else {
let old = self.ptr;
- self.ptr = unsafe { self.ptr.offset(1) };
+ self.ptr = unsafe { self.ptr.add(1) };
Some(unsafe { ptr::read(old) })
}
@@ -170,7 +206,7 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> {
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
- let exact = if mem::size_of::<T>() == 0 {
+ let exact = if T::IS_ZST {
self.end.addr().wrapping_sub(self.ptr.addr())
} else {
unsafe { self.end.sub_ptr(self.ptr) }
@@ -182,11 +218,9 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> {
fn advance_by(&mut self, n: usize) -> Result<(), usize> {
let step_size = self.len().min(n);
let to_drop = ptr::slice_from_raw_parts_mut(self.ptr as *mut T, step_size);
- if mem::size_of::<T>() == 0 {
- // SAFETY: due to unchecked casts of unsigned amounts to signed offsets the wraparound
- // effectively results in unsigned pointers representing positions 0..usize::MAX,
- // which is valid for ZSTs.
- self.ptr = unsafe { arith_offset(self.ptr as *const i8, step_size as isize) as *mut T }
+ if T::IS_ZST {
+ // See `next` for why we sub `end` here.
+ self.end = self.end.wrapping_byte_sub(step_size);
} else {
// SAFETY: the min() above ensures that step_size is in bounds
self.ptr = unsafe { self.ptr.add(step_size) };
@@ -206,6 +240,43 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> {
self.len()
}
+ #[inline]
+ fn next_chunk<const N: usize>(&mut self) -> Result<[T; N], core::array::IntoIter<T, N>> {
+ let mut raw_ary = MaybeUninit::uninit_array();
+
+ let len = self.len();
+
+ if T::IS_ZST {
+ if len < N {
+ self.forget_remaining_elements();
+ // Safety: ZSTs can be conjured ex nihilo, only the amount has to be correct
+ return Err(unsafe { array::IntoIter::new_unchecked(raw_ary, 0..len) });
+ }
+
+ self.end = self.end.wrapping_byte_sub(N);
+ // Safety: ditto
+ return Ok(unsafe { raw_ary.transpose().assume_init() });
+ }
+
+ if len < N {
+ // Safety: `len` indicates that this many elements are available and we just checked that
+ // it fits into the array.
+ unsafe {
+ ptr::copy_nonoverlapping(self.ptr, raw_ary.as_mut_ptr() as *mut T, len);
+ self.forget_remaining_elements();
+ return Err(array::IntoIter::new_unchecked(raw_ary, 0..len));
+ }
+ }
+
+ // Safety: `len` is larger than the array size. Copy a fixed amount here to fully initialize
+ // the array.
+ return unsafe {
+ ptr::copy_nonoverlapping(self.ptr, raw_ary.as_mut_ptr() as *mut T, N);
+ self.ptr = self.ptr.add(N);
+ Ok(raw_ary.transpose().assume_init())
+ };
+ }
+
unsafe fn __iterator_get_unchecked(&mut self, i: usize) -> Self::Item
where
Self: TrustedRandomAccessNoCoerce,
@@ -219,7 +290,7 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> {
// that `T: Copy` so reading elements from the buffer doesn't invalidate
// them for `Drop`.
unsafe {
- if mem::size_of::<T>() == 0 { mem::zeroed() } else { ptr::read(self.ptr.add(i)) }
+ if T::IS_ZST { mem::zeroed() } else { ptr::read(self.ptr.add(i)) }
}
}
}
@@ -230,14 +301,14 @@ impl<T, A: Allocator> DoubleEndedIterator for IntoIter<T, A> {
fn next_back(&mut self) -> Option<T> {
if self.end == self.ptr {
None
- } else if mem::size_of::<T>() == 0 {
+ } else if T::IS_ZST {
// See above for why 'ptr.offset' isn't used
- self.end = unsafe { arith_offset(self.end as *const i8, -1) as *mut T };
+ self.end = self.end.wrapping_byte_sub(1);
// Make up a value of this ZST.
Some(unsafe { mem::zeroed() })
} else {
- self.end = unsafe { self.end.offset(-1) };
+ self.end = unsafe { self.end.sub(1) };
Some(unsafe { ptr::read(self.end) })
}
@@ -246,14 +317,12 @@ impl<T, A: Allocator> DoubleEndedIterator for IntoIter<T, A> {
#[inline]
fn advance_back_by(&mut self, n: usize) -> Result<(), usize> {
let step_size = self.len().min(n);
- if mem::size_of::<T>() == 0 {
+ if T::IS_ZST {
// SAFETY: same as for advance_by()
- self.end = unsafe {
- arith_offset(self.end as *const i8, step_size.wrapping_neg() as isize) as *mut T
- }
+ self.end = self.end.wrapping_byte_sub(step_size);
} else {
// SAFETY: same as for advance_by()
- self.end = unsafe { self.end.offset(step_size.wrapping_neg() as isize) };
+ self.end = unsafe { self.end.sub(step_size) };
}
let to_drop = ptr::slice_from_raw_parts_mut(self.end as *mut T, step_size);
// SAFETY: same as for advance_by()
diff --git a/rust/alloc/vec/is_zero.rs b/rust/alloc/vec/is_zero.rs
index 377f3d172777..d928dcf90e80 100644
--- a/rust/alloc/vec/is_zero.rs
+++ b/rust/alloc/vec/is_zero.rs
@@ -1,10 +1,13 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
+use core::num::{Saturating, Wrapping};
+
use crate::boxed::Box;
#[rustc_specialization_trait]
pub(super) unsafe trait IsZero {
- /// Whether this value's representation is all zeros
+ /// Whether this value's representation is all zeros,
+ /// or can be represented with all zeroes.
fn is_zero(&self) -> bool;
}
@@ -19,12 +22,14 @@ macro_rules! impl_is_zero {
};
}
+impl_is_zero!(i8, |x| x == 0); // It is needed to impl for arrays and tuples of i8.
impl_is_zero!(i16, |x| x == 0);
impl_is_zero!(i32, |x| x == 0);
impl_is_zero!(i64, |x| x == 0);
impl_is_zero!(i128, |x| x == 0);
impl_is_zero!(isize, |x| x == 0);
+impl_is_zero!(u8, |x| x == 0); // It is needed to impl for arrays and tuples of u8.
impl_is_zero!(u16, |x| x == 0);
impl_is_zero!(u32, |x| x == 0);
impl_is_zero!(u64, |x| x == 0);
@@ -55,16 +60,42 @@ unsafe impl<T: IsZero, const N: usize> IsZero for [T; N] {
#[inline]
fn is_zero(&self) -> bool {
// Because this is generated as a runtime check, it's not obvious that
- // it's worth doing if the array is really long. The threshold here
- // is largely arbitrary, but was picked because as of 2022-05-01 LLVM
- // can const-fold the check in `vec![[0; 32]; n]` but not in
- // `vec![[0; 64]; n]`: https://godbolt.org/z/WTzjzfs5b
+ // it's worth doing if the array is really long. The threshold here
+ // is largely arbitrary, but was picked because as of 2022-07-01 LLVM
+ // fails to const-fold the check in `vec![[1; 32]; n]`
+ // See https://github.com/rust-lang/rust/pull/97581#issuecomment-1166628022
// Feel free to tweak if you have better evidence.
- N <= 32 && self.iter().all(IsZero::is_zero)
+ N <= 16 && self.iter().all(IsZero::is_zero)
+ }
+}
+
+// This is recursive macro.
+macro_rules! impl_for_tuples {
+ // Stopper
+ () => {
+ // No use for implementing for empty tuple because it is ZST.
+ };
+ ($first_arg:ident $(,$rest:ident)*) => {
+ unsafe impl <$first_arg: IsZero, $($rest: IsZero,)*> IsZero for ($first_arg, $($rest,)*){
+ #[inline]
+ fn is_zero(&self) -> bool{
+ // Destructure tuple to N references
+ // Rust allows to hide generic params by local variable names.
+ #[allow(non_snake_case)]
+ let ($first_arg, $($rest,)*) = self;
+
+ $first_arg.is_zero()
+ $( && $rest.is_zero() )*
+ }
+ }
+
+ impl_for_tuples!($($rest),*);
}
}
+impl_for_tuples!(A, B, C, D, E, F, G, H);
+
// `Option<&T>` and `Option<Box<T>>` are guaranteed to represent `None` as null.
// For fat pointers, the bytes that would be the pointer metadata in the `Some`
// variant are padding in the `None` variant, so ignoring them and
@@ -118,3 +149,56 @@ impl_is_zero_option_of_nonzero!(
NonZeroUsize,
NonZeroIsize,
);
+
+macro_rules! impl_is_zero_option_of_num {
+ ($($t:ty,)+) => {$(
+ unsafe impl IsZero for Option<$t> {
+ #[inline]
+ fn is_zero(&self) -> bool {
+ const {
+ let none: Self = unsafe { core::mem::MaybeUninit::zeroed().assume_init() };
+ assert!(none.is_none());
+ }
+ self.is_none()
+ }
+ }
+ )+};
+}
+
+impl_is_zero_option_of_num!(u8, u16, u32, u64, u128, i8, i16, i32, i64, i128, usize, isize,);
+
+unsafe impl<T: IsZero> IsZero for Wrapping<T> {
+ #[inline]
+ fn is_zero(&self) -> bool {
+ self.0.is_zero()
+ }
+}
+
+unsafe impl<T: IsZero> IsZero for Saturating<T> {
+ #[inline]
+ fn is_zero(&self) -> bool {
+ self.0.is_zero()
+ }
+}
+
+macro_rules! impl_for_optional_bool {
+ ($($t:ty,)+) => {$(
+ unsafe impl IsZero for $t {
+ #[inline]
+ fn is_zero(&self) -> bool {
+ // SAFETY: This is *not* a stable layout guarantee, but
+ // inside `core` we're allowed to rely on the current rustc
+ // behaviour that options of bools will be one byte with
+ // no padding, so long as they're nested less than 254 deep.
+ let raw: u8 = unsafe { core::mem::transmute(*self) };
+ raw == 0
+ }
+ }
+ )+};
+}
+impl_for_optional_bool! {
+ Option<bool>,
+ Option<Option<bool>>,
+ Option<Option<Option<bool>>>,
+ // Could go further, but not worth the metadata overhead
+}
diff --git a/rust/alloc/vec/mod.rs b/rust/alloc/vec/mod.rs
index fe4fff5064bc..94995913566b 100644
--- a/rust/alloc/vec/mod.rs
+++ b/rust/alloc/vec/mod.rs
@@ -61,12 +61,12 @@ use core::cmp::Ordering;
use core::convert::TryFrom;
use core::fmt;
use core::hash::{Hash, Hasher};
-use core::intrinsics::{arith_offset, assume};
+use core::intrinsics::assume;
use core::iter;
#[cfg(not(no_global_oom_handling))]
use core::iter::FromIterator;
use core::marker::PhantomData;
-use core::mem::{self, ManuallyDrop, MaybeUninit};
+use core::mem::{self, ManuallyDrop, MaybeUninit, SizedTypeProperties};
use core::ops::{self, Index, IndexMut, Range, RangeBounds};
use core::ptr::{self, NonNull};
use core::slice::{self, SliceIndex};
@@ -75,7 +75,7 @@ use crate::alloc::{Allocator, Global};
#[cfg(not(no_borrow))]
use crate::borrow::{Cow, ToOwned};
use crate::boxed::Box;
-use crate::collections::TryReserveError;
+use crate::collections::{TryReserveError, TryReserveErrorKind};
use crate::raw_vec::RawVec;
#[unstable(feature = "drain_filter", reason = "recently added", issue = "43244")]
@@ -127,7 +127,7 @@ use self::set_len_on_drop::SetLenOnDrop;
mod set_len_on_drop;
#[cfg(not(no_global_oom_handling))]
-use self::in_place_drop::InPlaceDrop;
+use self::in_place_drop::{InPlaceDrop, InPlaceDstBufDrop};
#[cfg(not(no_global_oom_handling))]
mod in_place_drop;
@@ -169,7 +169,7 @@ mod spec_extend;
/// vec[0] = 7;
/// assert_eq!(vec[0], 7);
///
-/// vec.extend([1, 2, 3].iter().copied());
+/// vec.extend([1, 2, 3]);
///
/// for x in &vec {
/// println!("{x}");
@@ -428,17 +428,25 @@ impl<T> Vec<T> {
Vec { buf: RawVec::NEW, len: 0 }
}
- /// Constructs a new, empty `Vec<T>` with the specified capacity.
+ /// Constructs a new, empty `Vec<T>` with at least the specified capacity.
///
- /// The vector will be able to hold exactly `capacity` elements without
- /// reallocating. If `capacity` is 0, the vector will not allocate.
+ /// The vector will be able to hold at least `capacity` elements without
+ /// reallocating. This method is allowed to allocate for more elements than
+ /// `capacity`. If `capacity` is 0, the vector will not allocate.
///
/// It is important to note that although the returned vector has the
- /// *capacity* specified, the vector will have a zero *length*. For an
- /// explanation of the difference between length and capacity, see
+ /// minimum *capacity* specified, the vector will have a zero *length*. For
+ /// an explanation of the difference between length and capacity, see
/// *[Capacity and reallocation]*.
///
+ /// If it is important to know the exact allocated capacity of a `Vec`,
+ /// always use the [`capacity`] method after construction.
+ ///
+ /// For `Vec<T>` where `T` is a zero-sized type, there will be no allocation
+ /// and the capacity will always be `usize::MAX`.
+ ///
/// [Capacity and reallocation]: #capacity-and-reallocation
+ /// [`capacity`]: Vec::capacity
///
/// # Panics
///
@@ -451,19 +459,24 @@ impl<T> Vec<T> {
///
/// // The vector contains no items, even though it has capacity for more
/// assert_eq!(vec.len(), 0);
- /// assert_eq!(vec.capacity(), 10);
+ /// assert!(vec.capacity() >= 10);
///
/// // These are all done without reallocating...
/// for i in 0..10 {
/// vec.push(i);
/// }
/// assert_eq!(vec.len(), 10);
- /// assert_eq!(vec.capacity(), 10);
+ /// assert!(vec.capacity() >= 10);
///
/// // ...but this may make the vector reallocate
/// vec.push(11);
/// assert_eq!(vec.len(), 11);
/// assert!(vec.capacity() >= 11);
+ ///
+ /// // A vector of a zero-sized type will always over-allocate, since no
+ /// // allocation is necessary
+ /// let vec_units = Vec::<()>::with_capacity(10);
+ /// assert_eq!(vec_units.capacity(), usize::MAX);
/// ```
#[cfg(not(no_global_oom_handling))]
#[inline]
@@ -473,17 +486,25 @@ impl<T> Vec<T> {
Self::with_capacity_in(capacity, Global)
}
- /// Tries to construct a new, empty `Vec<T>` with the specified capacity.
+ /// Tries to construct a new, empty `Vec<T>` with at least the specified capacity.
///
- /// The vector will be able to hold exactly `capacity` elements without
- /// reallocating. If `capacity` is 0, the vector will not allocate.
+ /// The vector will be able to hold at least `capacity` elements without
+ /// reallocating. This method is allowed to allocate for more elements than
+ /// `capacity`. If `capacity` is 0, the vector will not allocate.
///
/// It is important to note that although the returned vector has the
- /// *capacity* specified, the vector will have a zero *length*. For an
- /// explanation of the difference between length and capacity, see
+ /// minimum *capacity* specified, the vector will have a zero *length*. For
+ /// an explanation of the difference between length and capacity, see
/// *[Capacity and reallocation]*.
///
+ /// If it is important to know the exact allocated capacity of a `Vec`,
+ /// always use the [`capacity`] method after construction.
+ ///
+ /// For `Vec<T>` where `T` is a zero-sized type, there will be no allocation
+ /// and the capacity will always be `usize::MAX`.
+ ///
/// [Capacity and reallocation]: #capacity-and-reallocation
+ /// [`capacity`]: Vec::capacity
///
/// # Examples
///
@@ -492,14 +513,14 @@ impl<T> Vec<T> {
///
/// // The vector contains no items, even though it has capacity for more
/// assert_eq!(vec.len(), 0);
- /// assert_eq!(vec.capacity(), 10);
+ /// assert!(vec.capacity() >= 10);
///
/// // These are all done without reallocating...
/// for i in 0..10 {
/// vec.push(i);
/// }
/// assert_eq!(vec.len(), 10);
- /// assert_eq!(vec.capacity(), 10);
+ /// assert!(vec.capacity() >= 10);
///
/// // ...but this may make the vector reallocate
/// vec.push(11);
@@ -508,6 +529,11 @@ impl<T> Vec<T> {
///
/// let mut result = Vec::try_with_capacity(usize::MAX);
/// assert!(result.is_err());
+ ///
+ /// // A vector of a zero-sized type will always over-allocate, since no
+ /// // allocation is necessary
+ /// let vec_units = Vec::<()>::try_with_capacity(10).unwrap();
+ /// assert_eq!(vec_units.capacity(), usize::MAX);
/// ```
#[inline]
#[stable(feature = "kernel", since = "1.0.0")]
@@ -515,15 +541,15 @@ impl<T> Vec<T> {
Self::try_with_capacity_in(capacity, Global)
}
- /// Creates a `Vec<T>` directly from the raw components of another vector.
+ /// Creates a `Vec<T>` directly from a pointer, a capacity, and a length.
///
/// # Safety
///
/// This is highly unsafe, due to the number of invariants that aren't
/// checked:
///
- /// * `ptr` needs to have been previously allocated via [`String`]/`Vec<T>`
- /// (at least, it's highly likely to be incorrect if it wasn't).
+ /// * `ptr` must have been allocated using the global allocator, such as via
+ /// the [`alloc::alloc`] function.
/// * `T` needs to have the same alignment as what `ptr` was allocated with.
/// (`T` having a less strict alignment is not sufficient, the alignment really
/// needs to be equal to satisfy the [`dealloc`] requirement that memory must be
@@ -532,6 +558,14 @@ impl<T> Vec<T> {
/// to be the same size as the pointer was allocated with. (Because similar to
/// alignment, [`dealloc`] must be called with the same layout `size`.)
/// * `length` needs to be less than or equal to `capacity`.
+ /// * The first `length` values must be properly initialized values of type `T`.
+ /// * `capacity` needs to be the capacity that the pointer was allocated with.
+ /// * The allocated size in bytes must be no larger than `isize::MAX`.
+ /// See the safety documentation of [`pointer::offset`].
+ ///
+ /// These requirements are always upheld by any `ptr` that has been allocated
+ /// via `Vec<T>`. Other allocation sources are allowed if the invariants are
+ /// upheld.
///
/// Violating these may cause problems like corrupting the allocator's
/// internal data structures. For example it is normally **not** safe
@@ -552,6 +586,7 @@ impl<T> Vec<T> {
/// function.
///
/// [`String`]: crate::string::String
+ /// [`alloc::alloc`]: crate::alloc::alloc
/// [`dealloc`]: crate::alloc::GlobalAlloc::dealloc
///
/// # Examples
@@ -574,8 +609,8 @@ impl<T> Vec<T> {
///
/// unsafe {
/// // Overwrite memory with 4, 5, 6
- /// for i in 0..len as isize {
- /// ptr::write(p.offset(i), 4 + i);
+ /// for i in 0..len {
+ /// ptr::write(p.add(i), 4 + i);
/// }
///
/// // Put everything back together into a Vec
@@ -583,6 +618,32 @@ impl<T> Vec<T> {
/// assert_eq!(rebuilt, [4, 5, 6]);
/// }
/// ```
+ ///
+ /// Using memory that was allocated elsewhere:
+ ///
+ /// ```rust
+ /// #![feature(allocator_api)]
+ ///
+ /// use std::alloc::{AllocError, Allocator, Global, Layout};
+ ///
+ /// fn main() {
+ /// let layout = Layout::array::<u32>(16).expect("overflow cannot happen");
+ ///
+ /// let vec = unsafe {
+ /// let mem = match Global.allocate(layout) {
+ /// Ok(mem) => mem.cast::<u32>().as_ptr(),
+ /// Err(AllocError) => return,
+ /// };
+ ///
+ /// mem.write(1_000_000);
+ ///
+ /// Vec::from_raw_parts_in(mem, 1, 16, Global)
+ /// };
+ ///
+ /// assert_eq!(vec, &[1_000_000]);
+ /// assert_eq!(vec.capacity(), 16);
+ /// }
+ /// ```
#[inline]
#[stable(feature = "rust1", since = "1.0.0")]
pub unsafe fn from_raw_parts(ptr: *mut T, length: usize, capacity: usize) -> Self {
@@ -611,18 +672,26 @@ impl<T, A: Allocator> Vec<T, A> {
Vec { buf: RawVec::new_in(alloc), len: 0 }
}
- /// Constructs a new, empty `Vec<T, A>` with the specified capacity with the provided
- /// allocator.
+ /// Constructs a new, empty `Vec<T, A>` with at least the specified capacity
+ /// with the provided allocator.
///
- /// The vector will be able to hold exactly `capacity` elements without
- /// reallocating. If `capacity` is 0, the vector will not allocate.
+ /// The vector will be able to hold at least `capacity` elements without
+ /// reallocating. This method is allowed to allocate for more elements than
+ /// `capacity`. If `capacity` is 0, the vector will not allocate.
///
/// It is important to note that although the returned vector has the
- /// *capacity* specified, the vector will have a zero *length*. For an
- /// explanation of the difference between length and capacity, see
+ /// minimum *capacity* specified, the vector will have a zero *length*. For
+ /// an explanation of the difference between length and capacity, see
/// *[Capacity and reallocation]*.
///
+ /// If it is important to know the exact allocated capacity of a `Vec`,
+ /// always use the [`capacity`] method after construction.
+ ///
+ /// For `Vec<T, A>` where `T` is a zero-sized type, there will be no allocation
+ /// and the capacity will always be `usize::MAX`.
+ ///
/// [Capacity and reallocation]: #capacity-and-reallocation
+ /// [`capacity`]: Vec::capacity
///
/// # Panics
///
@@ -652,6 +721,11 @@ impl<T, A: Allocator> Vec<T, A> {
/// vec.push(11);
/// assert_eq!(vec.len(), 11);
/// assert!(vec.capacity() >= 11);
+ ///
+ /// // A vector of a zero-sized type will always over-allocate, since no
+ /// // allocation is necessary
+ /// let vec_units = Vec::<(), System>::with_capacity_in(10, System);
+ /// assert_eq!(vec_units.capacity(), usize::MAX);
/// ```
#[cfg(not(no_global_oom_handling))]
#[inline]
@@ -660,18 +734,26 @@ impl<T, A: Allocator> Vec<T, A> {
Vec { buf: RawVec::with_capacity_in(capacity, alloc), len: 0 }
}
- /// Tries to construct a new, empty `Vec<T, A>` with the specified capacity
+ /// Tries to construct a new, empty `Vec<T, A>` with at least the specified capacity
/// with the provided allocator.
///
- /// The vector will be able to hold exactly `capacity` elements without
- /// reallocating. If `capacity` is 0, the vector will not allocate.
+ /// The vector will be able to hold at least `capacity` elements without
+ /// reallocating. This method is allowed to allocate for more elements than
+ /// `capacity`. If `capacity` is 0, the vector will not allocate.
///
/// It is important to note that although the returned vector has the
- /// *capacity* specified, the vector will have a zero *length*. For an
- /// explanation of the difference between length and capacity, see
+ /// minimum *capacity* specified, the vector will have a zero *length*. For
+ /// an explanation of the difference between length and capacity, see
/// *[Capacity and reallocation]*.
///
+ /// If it is important to know the exact allocated capacity of a `Vec`,
+ /// always use the [`capacity`] method after construction.
+ ///
+ /// For `Vec<T, A>` where `T` is a zero-sized type, there will be no allocation
+ /// and the capacity will always be `usize::MAX`.
+ ///
/// [Capacity and reallocation]: #capacity-and-reallocation
+ /// [`capacity`]: Vec::capacity
///
/// # Examples
///
@@ -700,6 +782,11 @@ impl<T, A: Allocator> Vec<T, A> {
///
/// let mut result = Vec::try_with_capacity_in(usize::MAX, System);
/// assert!(result.is_err());
+ ///
+ /// // A vector of a zero-sized type will always over-allocate, since no
+ /// // allocation is necessary
+ /// let vec_units = Vec::<(), System>::try_with_capacity_in(10, System).unwrap();
+ /// assert_eq!(vec_units.capacity(), usize::MAX);
/// ```
#[inline]
#[stable(feature = "kernel", since = "1.0.0")]
@@ -707,21 +794,31 @@ impl<T, A: Allocator> Vec<T, A> {
Ok(Vec { buf: RawVec::try_with_capacity_in(capacity, alloc)?, len: 0 })
}
- /// Creates a `Vec<T, A>` directly from the raw components of another vector.
+ /// Creates a `Vec<T, A>` directly from a pointer, a capacity, a length,
+ /// and an allocator.
///
/// # Safety
///
/// This is highly unsafe, due to the number of invariants that aren't
/// checked:
///
- /// * `ptr` needs to have been previously allocated via [`String`]/`Vec<T>`
- /// (at least, it's highly likely to be incorrect if it wasn't).
- /// * `T` needs to have the same size and alignment as what `ptr` was allocated with.
+ /// * `ptr` must be [*currently allocated*] via the given allocator `alloc`.
+ /// * `T` needs to have the same alignment as what `ptr` was allocated with.
/// (`T` having a less strict alignment is not sufficient, the alignment really
/// needs to be equal to satisfy the [`dealloc`] requirement that memory must be
/// allocated and deallocated with the same layout.)
+ /// * The size of `T` times the `capacity` (ie. the allocated size in bytes) needs
+ /// to be the same size as the pointer was allocated with. (Because similar to
+ /// alignment, [`dealloc`] must be called with the same layout `size`.)
/// * `length` needs to be less than or equal to `capacity`.
- /// * `capacity` needs to be the capacity that the pointer was allocated with.
+ /// * The first `length` values must be properly initialized values of type `T`.
+ /// * `capacity` needs to [*fit*] the layout size that the pointer was allocated with.
+ /// * The allocated size in bytes must be no larger than `isize::MAX`.
+ /// See the safety documentation of [`pointer::offset`].
+ ///
+ /// These requirements are always upheld by any `ptr` that has been allocated
+ /// via `Vec<T, A>`. Other allocation sources are allowed if the invariants are
+ /// upheld.
///
/// Violating these may cause problems like corrupting the allocator's
/// internal data structures. For example it is **not** safe
@@ -739,6 +836,8 @@ impl<T, A: Allocator> Vec<T, A> {
///
/// [`String`]: crate::string::String
/// [`dealloc`]: crate::alloc::GlobalAlloc::dealloc
+ /// [*currently allocated*]: crate::alloc::Allocator#currently-allocated-memory
+ /// [*fit*]: crate::alloc::Allocator#memory-fitting
///
/// # Examples
///
@@ -768,8 +867,8 @@ impl<T, A: Allocator> Vec<T, A> {
///
/// unsafe {
/// // Overwrite memory with 4, 5, 6
- /// for i in 0..len as isize {
- /// ptr::write(p.offset(i), 4 + i);
+ /// for i in 0..len {
+ /// ptr::write(p.add(i), 4 + i);
/// }
///
/// // Put everything back together into a Vec
@@ -777,6 +876,29 @@ impl<T, A: Allocator> Vec<T, A> {
/// assert_eq!(rebuilt, [4, 5, 6]);
/// }
/// ```
+ ///
+ /// Using memory that was allocated elsewhere:
+ ///
+ /// ```rust
+ /// use std::alloc::{alloc, Layout};
+ ///
+ /// fn main() {
+ /// let layout = Layout::array::<u32>(16).expect("overflow cannot happen");
+ /// let vec = unsafe {
+ /// let mem = alloc(layout).cast::<u32>();
+ /// if mem.is_null() {
+ /// return;
+ /// }
+ ///
+ /// mem.write(1_000_000);
+ ///
+ /// Vec::from_raw_parts(mem, 1, 16)
+ /// };
+ ///
+ /// assert_eq!(vec, &[1_000_000]);
+ /// assert_eq!(vec.capacity(), 16);
+ /// }
+ /// ```
#[inline]
#[unstable(feature = "allocator_api", issue = "32838")]
pub unsafe fn from_raw_parts_in(ptr: *mut T, length: usize, capacity: usize, alloc: A) -> Self {
@@ -869,13 +991,14 @@ impl<T, A: Allocator> Vec<T, A> {
(ptr, len, capacity, alloc)
}
- /// Returns the number of elements the vector can hold without
+ /// Returns the total number of elements the vector can hold without
/// reallocating.
///
/// # Examples
///
/// ```
- /// let vec: Vec<i32> = Vec::with_capacity(10);
+ /// let mut vec: Vec<i32> = Vec::with_capacity(10);
+ /// vec.push(42);
/// assert_eq!(vec.capacity(), 10);
/// ```
#[inline]
@@ -885,10 +1008,10 @@ impl<T, A: Allocator> Vec<T, A> {
}
/// Reserves capacity for at least `additional` more elements to be inserted
- /// in the given `Vec<T>`. The collection may reserve more space to avoid
- /// frequent reallocations. After calling `reserve`, capacity will be
- /// greater than or equal to `self.len() + additional`. Does nothing if
- /// capacity is already sufficient.
+ /// in the given `Vec<T>`. The collection may reserve more space to
+ /// speculatively avoid frequent reallocations. After calling `reserve`,
+ /// capacity will be greater than or equal to `self.len() + additional`.
+ /// Does nothing if capacity is already sufficient.
///
/// # Panics
///
@@ -907,10 +1030,12 @@ impl<T, A: Allocator> Vec<T, A> {
self.buf.reserve(self.len, additional);
}
- /// Reserves the minimum capacity for exactly `additional` more elements to
- /// be inserted in the given `Vec<T>`. After calling `reserve_exact`,
- /// capacity will be greater than or equal to `self.len() + additional`.
- /// Does nothing if the capacity is already sufficient.
+ /// Reserves the minimum capacity for at least `additional` more elements to
+ /// be inserted in the given `Vec<T>`. Unlike [`reserve`], this will not
+ /// deliberately over-allocate to speculatively avoid frequent allocations.
+ /// After calling `reserve_exact`, capacity will be greater than or equal to
+ /// `self.len() + additional`. Does nothing if the capacity is already
+ /// sufficient.
///
/// Note that the allocator may give the collection more space than it
/// requests. Therefore, capacity can not be relied upon to be precisely
@@ -936,10 +1061,11 @@ impl<T, A: Allocator> Vec<T, A> {
}
/// Tries to reserve capacity for at least `additional` more elements to be inserted
- /// in the given `Vec<T>`. The collection may reserve more space to avoid
+ /// in the given `Vec<T>`. The collection may reserve more space to speculatively avoid
/// frequent reallocations. After calling `try_reserve`, capacity will be
- /// greater than or equal to `self.len() + additional`. Does nothing if
- /// capacity is already sufficient.
+ /// greater than or equal to `self.len() + additional` if it returns
+ /// `Ok(())`. Does nothing if capacity is already sufficient. This method
+ /// preserves the contents even if an error occurs.
///
/// # Errors
///
@@ -971,10 +1097,11 @@ impl<T, A: Allocator> Vec<T, A> {
self.buf.try_reserve(self.len, additional)
}
- /// Tries to reserve the minimum capacity for exactly `additional`
- /// elements to be inserted in the given `Vec<T>`. After calling
- /// `try_reserve_exact`, capacity will be greater than or equal to
- /// `self.len() + additional` if it returns `Ok(())`.
+ /// Tries to reserve the minimum capacity for at least `additional`
+ /// elements to be inserted in the given `Vec<T>`. Unlike [`try_reserve`],
+ /// this will not deliberately over-allocate to speculatively avoid frequent
+ /// allocations. After calling `try_reserve_exact`, capacity will be greater
+ /// than or equal to `self.len() + additional` if it returns `Ok(())`.
/// Does nothing if the capacity is already sufficient.
///
/// Note that the allocator may give the collection more space than it
@@ -1066,7 +1193,8 @@ impl<T, A: Allocator> Vec<T, A> {
/// Converts the vector into [`Box<[T]>`][owned slice].
///
- /// Note that this will drop any excess capacity.
+ /// If the vector has excess capacity, its items will be moved into a
+ /// newly-allocated buffer with exactly the right capacity.
///
/// [owned slice]: Box
///
@@ -1199,7 +1327,8 @@ impl<T, A: Allocator> Vec<T, A> {
self
}
- /// Returns a raw pointer to the vector's buffer.
+ /// Returns a raw pointer to the vector's buffer, or a dangling raw pointer
+ /// valid for zero sized reads if the vector didn't allocate.
///
/// The caller must ensure that the vector outlives the pointer this
/// function returns, or else it will end up pointing to garbage.
@@ -1236,7 +1365,8 @@ impl<T, A: Allocator> Vec<T, A> {
ptr
}
- /// Returns an unsafe mutable pointer to the vector's buffer.
+ /// Returns an unsafe mutable pointer to the vector's buffer, or a dangling
+ /// raw pointer valid for zero sized reads if the vector didn't allocate.
///
/// The caller must ensure that the vector outlives the pointer this
/// function returns, or else it will end up pointing to garbage.
@@ -1440,9 +1570,6 @@ impl<T, A: Allocator> Vec<T, A> {
}
let len = self.len();
- if index > len {
- assert_failed(index, len);
- }
// space for the new element
if len == self.buf.capacity() {
@@ -1454,9 +1581,15 @@ impl<T, A: Allocator> Vec<T, A> {
// The spot to put the new value
{
let p = self.as_mut_ptr().add(index);
- // Shift everything over to make space. (Duplicating the
- // `index`th element into two consecutive places.)
- ptr::copy(p, p.offset(1), len - index);
+ if index < len {
+ // Shift everything over to make space. (Duplicating the
+ // `index`th element into two consecutive places.)
+ ptr::copy(p, p.add(1), len - index);
+ } else if index == len {
+ // No elements need shifting.
+ } else {
+ assert_failed(index, len);
+ }
// Write it in, overwriting the first copy of the `index`th
// element.
ptr::write(p, element);
@@ -1513,7 +1646,7 @@ impl<T, A: Allocator> Vec<T, A> {
ret = ptr::read(ptr);
// Shift everything down to fill in that spot.
- ptr::copy(ptr.offset(1), ptr, len - index - 1);
+ ptr::copy(ptr.add(1), ptr, len - index - 1);
}
self.set_len(len - 1);
ret
@@ -1562,11 +1695,11 @@ impl<T, A: Allocator> Vec<T, A> {
///
/// ```
/// let mut vec = vec![1, 2, 3, 4];
- /// vec.retain_mut(|x| if *x > 3 {
- /// false
- /// } else {
+ /// vec.retain_mut(|x| if *x <= 3 {
/// *x += 1;
/// true
+ /// } else {
+ /// false
/// });
/// assert_eq!(vec, [2, 3, 4]);
/// ```
@@ -1854,6 +1987,51 @@ impl<T, A: Allocator> Vec<T, A> {
Ok(())
}
+ /// Appends an element if there is sufficient spare capacity, otherwise an error is returned
+ /// with the element.
+ ///
+ /// Unlike [`push`] this method will not reallocate when there's insufficient capacity.
+ /// The caller should use [`reserve`] or [`try_reserve`] to ensure that there is enough capacity.
+ ///
+ /// [`push`]: Vec::push
+ /// [`reserve`]: Vec::reserve
+ /// [`try_reserve`]: Vec::try_reserve
+ ///
+ /// # Examples
+ ///
+ /// A manual, panic-free alternative to [`FromIterator`]:
+ ///
+ /// ```
+ /// #![feature(vec_push_within_capacity)]
+ ///
+ /// use std::collections::TryReserveError;
+ /// fn from_iter_fallible<T>(iter: impl Iterator<Item=T>) -> Result<Vec<T>, TryReserveError> {
+ /// let mut vec = Vec::new();
+ /// for value in iter {
+ /// if let Err(value) = vec.push_within_capacity(value) {
+ /// vec.try_reserve(1)?;
+ /// // this cannot fail, the previous line either returned or added at least 1 free slot
+ /// let _ = vec.push_within_capacity(value);
+ /// }
+ /// }
+ /// Ok(vec)
+ /// }
+ /// assert_eq!(from_iter_fallible(0..100), Ok(Vec::from_iter(0..100)));
+ /// ```
+ #[inline]
+ #[unstable(feature = "vec_push_within_capacity", issue = "100486")]
+ pub fn push_within_capacity(&mut self, value: T) -> Result<(), T> {
+ if self.len == self.buf.capacity() {
+ return Err(value);
+ }
+ unsafe {
+ let end = self.as_mut_ptr().add(self.len);
+ ptr::write(end, value);
+ self.len += 1;
+ }
+ Ok(())
+ }
+
/// Removes the last element from a vector and returns it, or [`None`] if it
/// is empty.
///
@@ -1886,7 +2064,7 @@ impl<T, A: Allocator> Vec<T, A> {
///
/// # Panics
///
- /// Panics if the number of elements in the vector overflows a `usize`.
+ /// Panics if the new capacity exceeds `isize::MAX` bytes.
///
/// # Examples
///
@@ -1980,9 +2158,7 @@ impl<T, A: Allocator> Vec<T, A> {
unsafe {
// set self.vec length's to start, to be safe in case Drain is leaked
self.set_len(start);
- // Use the borrow in the IterMut to indicate borrowing behavior of the
- // whole Drain iterator (like &mut T).
- let range_slice = slice::from_raw_parts_mut(self.as_mut_ptr().add(start), end - start);
+ let range_slice = slice::from_raw_parts(self.as_ptr().add(start), end - start);
Drain {
tail_start: end,
tail_len: len - end,
@@ -2145,7 +2321,7 @@ impl<T, A: Allocator> Vec<T, A> {
{
let len = self.len();
if new_len > len {
- self.extend_with(new_len - len, ExtendFunc(f));
+ self.extend_trusted(iter::repeat_with(f).take(new_len - len));
} else {
self.truncate(new_len);
}
@@ -2174,7 +2350,6 @@ impl<T, A: Allocator> Vec<T, A> {
/// static_ref[0] += 1;
/// assert_eq!(static_ref, &[2, 2, 3]);
/// ```
- #[cfg(not(no_global_oom_handling))]
#[stable(feature = "vec_leak", since = "1.47.0")]
#[inline]
pub fn leak<'a>(self) -> &'a mut [T]
@@ -2469,7 +2644,7 @@ impl<T: Clone, A: Allocator> Vec<T, A> {
self.reserve(range.len());
// SAFETY:
- // - `slice::range` guarantees that the given range is valid for indexing self
+ // - `slice::range` guarantees that the given range is valid for indexing self
unsafe {
self.spec_extend_from_within(range);
}
@@ -2501,7 +2676,7 @@ impl<T, A: Allocator, const N: usize> Vec<[T; N], A> {
#[unstable(feature = "slice_flatten", issue = "95629")]
pub fn into_flattened(self) -> Vec<T, A> {
let (ptr, len, cap, alloc) = self.into_raw_parts_with_alloc();
- let (new_len, new_cap) = if mem::size_of::<T>() == 0 {
+ let (new_len, new_cap) = if T::IS_ZST {
(len.checked_mul(N).expect("vec len overflow"), usize::MAX)
} else {
// SAFETY:
@@ -2537,16 +2712,6 @@ impl<T: Clone> ExtendWith<T> for ExtendElement<T> {
}
}
-struct ExtendFunc<F>(F);
-impl<T, F: FnMut() -> T> ExtendWith<T> for ExtendFunc<F> {
- fn next(&mut self) -> T {
- (self.0)()
- }
- fn last(mut self) -> T {
- (self.0)()
- }
-}
-
impl<T, A: Allocator> Vec<T, A> {
#[cfg(not(no_global_oom_handling))]
/// Extend the vector by `n` values, using the given generator.
@@ -2563,7 +2728,7 @@ impl<T, A: Allocator> Vec<T, A> {
// Write all elements except the last one
for _ in 1..n {
ptr::write(ptr, value.next());
- ptr = ptr.offset(1);
+ ptr = ptr.add(1);
// Increment the length in every step in case next() panics
local_len.increment_len(1);
}
@@ -2592,7 +2757,7 @@ impl<T, A: Allocator> Vec<T, A> {
// Write all elements except the last one
for _ in 1..n {
ptr::write(ptr, value.next());
- ptr = ptr.offset(1);
+ ptr = ptr.add(1);
// Increment the length in every step in case next() panics
local_len.increment_len(1);
}
@@ -2664,7 +2829,7 @@ impl<T: Clone, A: Allocator> ExtendFromWithinSpec for Vec<T, A> {
let (this, spare, len) = unsafe { self.split_at_spare_mut_with_len() };
// SAFETY:
- // - caller guaratees that src is a valid index
+ // - caller guarantees that src is a valid index
let to_clone = unsafe { this.get_unchecked(src) };
iter::zip(to_clone, spare)
@@ -2683,13 +2848,13 @@ impl<T: Copy, A: Allocator> ExtendFromWithinSpec for Vec<T, A> {
let (init, spare) = self.split_at_spare_mut();
// SAFETY:
- // - caller guaratees that `src` is a valid index
+ // - caller guarantees that `src` is a valid index
let source = unsafe { init.get_unchecked(src) };
// SAFETY:
// - Both pointers are created from unique slice references (`&mut [_]`)
// so they are valid and do not overlap.
- // - Elements are :Copy so it's OK to to copy them, without doing
+ // - Elements are :Copy so it's OK to copy them, without doing
// anything with the original values
// - `count` is equal to the len of `source`, so source is valid for
// `count` reads
@@ -2712,6 +2877,7 @@ impl<T: Copy, A: Allocator> ExtendFromWithinSpec for Vec<T, A> {
impl<T, A: Allocator> ops::Deref for Vec<T, A> {
type Target = [T];
+ #[inline]
fn deref(&self) -> &[T] {
unsafe { slice::from_raw_parts(self.as_ptr(), self.len) }
}
@@ -2719,6 +2885,7 @@ impl<T, A: Allocator> ops::Deref for Vec<T, A> {
#[stable(feature = "rust1", since = "1.0.0")]
impl<T, A: Allocator> ops::DerefMut for Vec<T, A> {
+ #[inline]
fn deref_mut(&mut self) -> &mut [T] {
unsafe { slice::from_raw_parts_mut(self.as_mut_ptr(), self.len) }
}
@@ -2764,7 +2931,7 @@ impl<T: Clone, A: Allocator + Clone> Clone for Vec<T, A> {
// HACK(japaric): with cfg(test) the inherent `[T]::to_vec` method, which is
// required for this method definition, is not available. Instead use the
- // `slice::to_vec` function which is only available with cfg(test)
+ // `slice::to_vec` function which is only available with cfg(test)
// NB see the slice::hack module in slice.rs for more information
#[cfg(test)]
fn clone(&self) -> Self {
@@ -2845,19 +3012,22 @@ impl<T, A: Allocator> IntoIterator for Vec<T, A> {
///
/// ```
/// let v = vec!["a".to_string(), "b".to_string()];
- /// for s in v.into_iter() {
- /// // s has type String, not &String
- /// println!("{s}");
- /// }
+ /// let mut v_iter = v.into_iter();
+ ///
+ /// let first_element: Option<String> = v_iter.next();
+ ///
+ /// assert_eq!(first_element, Some("a".to_string()));
+ /// assert_eq!(v_iter.next(), Some("b".to_string()));
+ /// assert_eq!(v_iter.next(), None);
/// ```
#[inline]
- fn into_iter(self) -> IntoIter<T, A> {
+ fn into_iter(self) -> Self::IntoIter {
unsafe {
let mut me = ManuallyDrop::new(self);
let alloc = ManuallyDrop::new(ptr::read(me.allocator()));
let begin = me.as_mut_ptr();
- let end = if mem::size_of::<T>() == 0 {
- arith_offset(begin as *const i8, me.len() as isize) as *const T
+ let end = if T::IS_ZST {
+ begin.wrapping_byte_add(me.len())
} else {
begin.add(me.len()) as *const T
};
@@ -2879,7 +3049,7 @@ impl<'a, T, A: Allocator> IntoIterator for &'a Vec<T, A> {
type Item = &'a T;
type IntoIter = slice::Iter<'a, T>;
- fn into_iter(self) -> slice::Iter<'a, T> {
+ fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
@@ -2889,7 +3059,7 @@ impl<'a, T, A: Allocator> IntoIterator for &'a mut Vec<T, A> {
type Item = &'a mut T;
type IntoIter = slice::IterMut<'a, T>;
- fn into_iter(self) -> slice::IterMut<'a, T> {
+ fn into_iter(self) -> Self::IntoIter {
self.iter_mut()
}
}
@@ -2969,6 +3139,69 @@ impl<T, A: Allocator> Vec<T, A> {
Ok(())
}
+ // specific extend for `TrustedLen` iterators, called both by the specializations
+ // and internal places where resolving specialization makes compilation slower
+ #[cfg(not(no_global_oom_handling))]
+ fn extend_trusted(&mut self, iterator: impl iter::TrustedLen<Item = T>) {
+ let (low, high) = iterator.size_hint();
+ if let Some(additional) = high {
+ debug_assert_eq!(
+ low,
+ additional,
+ "TrustedLen iterator's size hint is not exact: {:?}",
+ (low, high)
+ );
+ self.reserve(additional);
+ unsafe {
+ let ptr = self.as_mut_ptr();
+ let mut local_len = SetLenOnDrop::new(&mut self.len);
+ iterator.for_each(move |element| {
+ ptr::write(ptr.add(local_len.current_len()), element);
+ // Since the loop executes user code which can panic we have to update
+ // the length every step to correctly drop what we've written.
+ // NB can't overflow since we would have had to alloc the address space
+ local_len.increment_len(1);
+ });
+ }
+ } else {
+ // Per TrustedLen contract a `None` upper bound means that the iterator length
+ // truly exceeds usize::MAX, which would eventually lead to a capacity overflow anyway.
+ // Since the other branch already panics eagerly (via `reserve()`) we do the same here.
+ // This avoids additional codegen for a fallback code path which would eventually
+ // panic anyway.
+ panic!("capacity overflow");
+ }
+ }
+
+ // specific extend for `TrustedLen` iterators, called both by the specializations
+ // and internal places where resolving specialization makes compilation slower
+ fn try_extend_trusted(&mut self, iterator: impl iter::TrustedLen<Item = T>) -> Result<(), TryReserveError> {
+ let (low, high) = iterator.size_hint();
+ if let Some(additional) = high {
+ debug_assert_eq!(
+ low,
+ additional,
+ "TrustedLen iterator's size hint is not exact: {:?}",
+ (low, high)
+ );
+ self.try_reserve(additional)?;
+ unsafe {
+ let ptr = self.as_mut_ptr();
+ let mut local_len = SetLenOnDrop::new(&mut self.len);
+ iterator.for_each(move |element| {
+ ptr::write(ptr.add(local_len.current_len()), element);
+ // Since the loop executes user code which can panic we have to update
+ // the length every step to correctly drop what we've written.
+ // NB can't overflow since we would have had to alloc the address space
+ local_len.increment_len(1);
+ });
+ }
+ Ok(())
+ } else {
+ Err(TryReserveErrorKind::CapacityOverflow.into())
+ }
+ }
+
/// Creates a splicing iterator that replaces the specified range in the vector
/// with the given `replace_with` iterator and yields the removed items.
/// `replace_with` does not need to be the same length as `range`.
@@ -3135,6 +3368,8 @@ unsafe impl<#[may_dangle] T, A: Allocator> Drop for Vec<T, A> {
#[rustc_const_unstable(feature = "const_default_impls", issue = "87864")]
impl<T> const Default for Vec<T> {
/// Creates an empty `Vec<T>`.
+ ///
+ /// The vector will not allocate until elements are pushed onto it.
fn default() -> Vec<T> {
Vec::new()
}
@@ -3227,12 +3462,15 @@ impl<T, const N: usize> From<[T; N]> for Vec<T> {
/// ```
#[cfg(not(test))]
fn from(s: [T; N]) -> Vec<T> {
- <[T]>::into_vec(box s)
+ <[T]>::into_vec(
+ #[rustc_box]
+ Box::new(s),
+ )
}
#[cfg(test)]
fn from(s: [T; N]) -> Vec<T> {
- crate::slice::into_vec(box s)
+ crate::slice::into_vec(Box::new(s))
}
}
@@ -3261,7 +3499,7 @@ where
}
}
-// note: test pulls in libstd, which causes errors here
+// note: test pulls in std, which causes errors here
#[cfg(not(test))]
#[stable(feature = "vec_from_box", since = "1.18.0")]
impl<T, A: Allocator> From<Box<[T], A>> for Vec<T, A> {
@@ -3279,7 +3517,7 @@ impl<T, A: Allocator> From<Box<[T], A>> for Vec<T, A> {
}
}
-// note: test pulls in libstd, which causes errors here
+// note: test pulls in std, which causes errors here
#[cfg(not(no_global_oom_handling))]
#[cfg(not(test))]
#[stable(feature = "box_from_vec", since = "1.20.0")]
@@ -3294,6 +3532,14 @@ impl<T, A: Allocator> From<Vec<T, A>> for Box<[T], A> {
/// ```
/// assert_eq!(Box::from(vec![1, 2, 3]), vec![1, 2, 3].into_boxed_slice());
/// ```
+ ///
+ /// Any excess capacity is removed:
+ /// ```
+ /// let mut vec = Vec::with_capacity(10);
+ /// vec.extend([1, 2, 3]);
+ ///
+ /// assert_eq!(Box::from(vec), vec![1, 2, 3].into_boxed_slice());
+ /// ```
fn from(v: Vec<T, A>) -> Self {
v.into_boxed_slice()
}
diff --git a/rust/alloc/vec/set_len_on_drop.rs b/rust/alloc/vec/set_len_on_drop.rs
index 448bf5076a0b..d3c7297b80ec 100644
--- a/rust/alloc/vec/set_len_on_drop.rs
+++ b/rust/alloc/vec/set_len_on_drop.rs
@@ -20,6 +20,11 @@ impl<'a> SetLenOnDrop<'a> {
pub(super) fn increment_len(&mut self, increment: usize) {
self.local_len += increment;
}
+
+ #[inline]
+ pub(super) fn current_len(&self) -> usize {
+ self.local_len
+ }
}
impl Drop for SetLenOnDrop<'_> {
diff --git a/rust/alloc/vec/spec_extend.rs b/rust/alloc/vec/spec_extend.rs
index 5ce2d00991bc..a6a735201e59 100644
--- a/rust/alloc/vec/spec_extend.rs
+++ b/rust/alloc/vec/spec_extend.rs
@@ -1,12 +1,11 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
use crate::alloc::Allocator;
-use crate::collections::{TryReserveError, TryReserveErrorKind};
+use crate::collections::TryReserveError;
use core::iter::TrustedLen;
-use core::ptr::{self};
use core::slice::{self};
-use super::{IntoIter, SetLenOnDrop, Vec};
+use super::{IntoIter, Vec};
// Specialization trait used for Vec::extend
#[cfg(not(no_global_oom_handling))]
@@ -44,36 +43,7 @@ where
I: TrustedLen<Item = T>,
{
default fn spec_extend(&mut self, iterator: I) {
- // This is the case for a TrustedLen iterator.
- let (low, high) = iterator.size_hint();
- if let Some(additional) = high {
- debug_assert_eq!(
- low,
- additional,
- "TrustedLen iterator's size hint is not exact: {:?}",
- (low, high)
- );
- self.reserve(additional);
- unsafe {
- let mut ptr = self.as_mut_ptr().add(self.len());
- let mut local_len = SetLenOnDrop::new(&mut self.len);
- iterator.for_each(move |element| {
- ptr::write(ptr, element);
- ptr = ptr.offset(1);
- // Since the loop executes user code which can panic we have to bump the pointer
- // after each step.
- // NB can't overflow since we would have had to alloc the address space
- local_len.increment_len(1);
- });
- }
- } else {
- // Per TrustedLen contract a `None` upper bound means that the iterator length
- // truly exceeds usize::MAX, which would eventually lead to a capacity overflow anyway.
- // Since the other branch already panics eagerly (via `reserve()`) we do the same here.
- // This avoids additional codegen for a fallback code path which would eventually
- // panic anyway.
- panic!("capacity overflow");
- }
+ self.extend_trusted(iterator)
}
}
@@ -82,32 +52,7 @@ where
I: TrustedLen<Item = T>,
{
default fn try_spec_extend(&mut self, iterator: I) -> Result<(), TryReserveError> {
- // This is the case for a TrustedLen iterator.
- let (low, high) = iterator.size_hint();
- if let Some(additional) = high {
- debug_assert_eq!(
- low,
- additional,
- "TrustedLen iterator's size hint is not exact: {:?}",
- (low, high)
- );
- self.try_reserve(additional)?;
- unsafe {
- let mut ptr = self.as_mut_ptr().add(self.len());
- let mut local_len = SetLenOnDrop::new(&mut self.len);
- iterator.for_each(move |element| {
- ptr::write(ptr, element);
- ptr = ptr.offset(1);
- // Since the loop executes user code which can panic we have to bump the pointer
- // after each step.
- // NB can't overflow since we would have had to alloc the address space
- local_len.increment_len(1);
- });
- }
- Ok(())
- } else {
- Err(TryReserveErrorKind::CapacityOverflow.into())
- }
+ self.try_extend_trusted(iterator)
}
}
diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index 50e7a76d5455..3e601ce2548d 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -6,6 +6,7 @@
* Sorted alphabetically.
*/
+#include <linux/errname.h>
#include <linux/slab.h>
#include <linux/refcount.h>
#include <linux/wait.h>
diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs
index 7b246454e009..9bcbea04dac3 100644
--- a/rust/bindings/lib.rs
+++ b/rust/bindings/lib.rs
@@ -9,7 +9,6 @@
//! using this crate.
#![no_std]
-#![feature(core_ffi_c)]
// See <https://github.com/rust-lang/rust-bindgen/issues/1651>.
#![cfg_attr(test, allow(deref_nullptr))]
#![cfg_attr(test, allow(unaligned_references))]
diff --git a/rust/helpers.c b/rust/helpers.c
index 81e80261d597..bb594da56137 100644
--- a/rust/helpers.c
+++ b/rust/helpers.c
@@ -21,6 +21,7 @@
#include <linux/bug.h>
#include <linux/build_bug.h>
#include <linux/err.h>
+#include <linux/errname.h>
#include <linux/refcount.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
@@ -110,6 +111,12 @@ long rust_helper_PTR_ERR(__force const void *ptr)
}
EXPORT_SYMBOL_GPL(rust_helper_PTR_ERR);
+const char *rust_helper_errname(int err)
+{
+ return errname(err);
+}
+EXPORT_SYMBOL_GPL(rust_helper_errname);
+
struct task_struct *rust_helper_get_current(void)
{
return current;
diff --git a/rust/kernel/build_assert.rs b/rust/kernel/build_assert.rs
index 659542393c09..9e37120bc69c 100644
--- a/rust/kernel/build_assert.rs
+++ b/rust/kernel/build_assert.rs
@@ -67,6 +67,8 @@ macro_rules! build_error {
/// assert!(n > 1); // Run-time check
/// }
/// ```
+///
+/// [`static_assert!`]: crate::static_assert!
#[macro_export]
macro_rules! build_assert {
($cond:expr $(,)?) => {{
diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs
index 5f4114b30b94..05fcab6abfe6 100644
--- a/rust/kernel/error.rs
+++ b/rust/kernel/error.rs
@@ -4,16 +4,20 @@
//!
//! C header: [`include/uapi/asm-generic/errno-base.h`](../../../include/uapi/asm-generic/errno-base.h)
+use crate::str::CStr;
+
use alloc::{
alloc::{AllocError, LayoutError},
collections::TryReserveError,
};
use core::convert::From;
+use core::fmt;
use core::num::TryFromIntError;
use core::str::Utf8Error;
/// Contains the C-compatible error codes.
+#[rustfmt::skip]
pub mod code {
macro_rules! declare_err {
($err:tt $(,)? $($doc:expr),+) => {
@@ -58,6 +62,25 @@ pub mod code {
declare_err!(EPIPE, "Broken pipe.");
declare_err!(EDOM, "Math argument out of domain of func.");
declare_err!(ERANGE, "Math result not representable.");
+ declare_err!(ERESTARTSYS, "Restart the system call.");
+ declare_err!(ERESTARTNOINTR, "System call was interrupted by a signal and will be restarted.");
+ declare_err!(ERESTARTNOHAND, "Restart if no handler.");
+ declare_err!(ENOIOCTLCMD, "No ioctl command.");
+ declare_err!(ERESTART_RESTARTBLOCK, "Restart by calling sys_restart_syscall.");
+ declare_err!(EPROBE_DEFER, "Driver requests probe retry.");
+ declare_err!(EOPENSTALE, "Open found a stale dentry.");
+ declare_err!(ENOPARAM, "Parameter not supported.");
+ declare_err!(EBADHANDLE, "Illegal NFS file handle.");
+ declare_err!(ENOTSYNC, "Update synchronization mismatch.");
+ declare_err!(EBADCOOKIE, "Cookie is stale.");
+ declare_err!(ENOTSUPP, "Operation is not supported.");
+ declare_err!(ETOOSMALL, "Buffer or request is too small.");
+ declare_err!(ESERVERFAULT, "An untranslatable error occurred.");
+ declare_err!(EBADTYPE, "Type not supported by server.");
+ declare_err!(EJUKEBOX, "Request initiated, but will not complete before timeout.");
+ declare_err!(EIOCBQUEUED, "iocb queued, will get completion event.");
+ declare_err!(ERECALLCONFLICT, "Conflict with recalled state.");
+ declare_err!(ENOGRACE, "NFS file lock reclaim refused.");
}
/// Generic integer kernel error.
@@ -113,6 +136,42 @@ impl Error {
// SAFETY: self.0 is a valid error due to its invariant.
unsafe { bindings::ERR_PTR(self.0.into()) as *mut _ }
}
+
+ /// Returns a string representing the error, if one exists.
+ #[cfg(not(testlib))]
+ pub fn name(&self) -> Option<&'static CStr> {
+ // SAFETY: Just an FFI call, there are no extra safety requirements.
+ let ptr = unsafe { bindings::errname(-self.0) };
+ if ptr.is_null() {
+ None
+ } else {
+ // SAFETY: The string returned by `errname` is static and `NUL`-terminated.
+ Some(unsafe { CStr::from_char_ptr(ptr) })
+ }
+ }
+
+ /// Returns a string representing the error, if one exists.
+ ///
+ /// When `testlib` is configured, this always returns `None` to avoid the dependency on a
+ /// kernel function so that tests that use this (e.g., by calling [`Result::unwrap`]) can still
+ /// run in userspace.
+ #[cfg(testlib)]
+ pub fn name(&self) -> Option<&'static CStr> {
+ None
+ }
+}
+
+impl fmt::Debug for Error {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self.name() {
+ // Print out number if no name can be found.
+ None => f.debug_tuple("Error").field(&-self.0).finish(),
+ // SAFETY: These strings are ASCII-only.
+ Some(name) => f
+ .debug_tuple(unsafe { core::str::from_utf8_unchecked(name) })
+ .finish(),
+ }
+ }
}
impl From<AllocError> for Error {
@@ -177,7 +236,7 @@ impl From<core::convert::Infallible> for Error {
/// Note that even if a function does not return anything when it succeeds,
/// it should still be modeled as returning a `Result` rather than
/// just an [`Error`].
-pub type Result<T = ()> = core::result::Result<T, Error>;
+pub type Result<T = (), E = Error> = core::result::Result<T, E>;
/// Converts an integer as returned by a C kernel function to an error if it's negative, and
/// `Ok(())` otherwise.
diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs
index 4ebfb08dab11..b4332a4ec1f4 100644
--- a/rust/kernel/init.rs
+++ b/rust/kernel/init.rs
@@ -197,6 +197,7 @@
//! [`Opaque`]: kernel::types::Opaque
//! [`Opaque::ffi_init`]: kernel::types::Opaque::ffi_init
//! [`pin_data`]: ::macros::pin_data
+//! [`pin_init!`]: crate::pin_init!
use crate::{
error::{self, Error},
@@ -255,6 +256,8 @@ pub mod macros;
/// A normal `let` binding with optional type annotation. The expression is expected to implement
/// [`PinInit`]/[`Init`] with the error type [`Infallible`]. If you want to use a different error
/// type, then use [`stack_try_pin_init!`].
+///
+/// [`stack_try_pin_init!`]: crate::stack_try_pin_init!
#[macro_export]
macro_rules! stack_pin_init {
(let $var:ident $(: $t:ty)? = $val:expr) => {
@@ -804,6 +807,8 @@ macro_rules! try_pin_init {
///
/// This initializer is for initializing data in-place that might later be moved. If you want to
/// pin-initialize, use [`pin_init!`].
+///
+/// [`try_init!`]: crate::try_init!
// For a detailed example of how this macro works, see the module documentation of the hidden
// module `__internal` inside of `init/__internal.rs`.
#[macro_export]
diff --git a/rust/kernel/init/macros.rs b/rust/kernel/init/macros.rs
index 541cfad1d8be..00aa4e956c0a 100644
--- a/rust/kernel/init/macros.rs
+++ b/rust/kernel/init/macros.rs
@@ -16,8 +16,9 @@
//!
//! We will look at the following example:
//!
-//! ```rust
+//! ```rust,ignore
//! # use kernel::init::*;
+//! # use core::pin::Pin;
//! #[pin_data]
//! #[repr(C)]
//! struct Bar<T> {
@@ -71,11 +72,12 @@
//!
//! Here is the definition of `Bar` from our example:
//!
-//! ```rust
+//! ```rust,ignore
//! # use kernel::init::*;
//! #[pin_data]
//! #[repr(C)]
//! struct Bar<T> {
+//! #[pin]
//! t: T,
//! pub x: usize,
//! }
@@ -83,7 +85,7 @@
//!
//! This expands to the following code:
//!
-//! ```rust
+//! ```rust,ignore
//! // Firstly the normal definition of the struct, attributes are preserved:
//! #[repr(C)]
//! struct Bar<T> {
@@ -116,20 +118,22 @@
//! unsafe fn t<E>(
//! self,
//! slot: *mut T,
-//! init: impl ::kernel::init::Init<T, E>,
+//! // Since `t` is `#[pin]`, this is `PinInit`.
+//! init: impl ::kernel::init::PinInit<T, E>,
//! ) -> ::core::result::Result<(), E> {
-//! unsafe { ::kernel::init::Init::__init(init, slot) }
+//! unsafe { ::kernel::init::PinInit::__pinned_init(init, slot) }
//! }
//! pub unsafe fn x<E>(
//! self,
//! slot: *mut usize,
+//! // Since `x` is not `#[pin]`, this is `Init`.
//! init: impl ::kernel::init::Init<usize, E>,
//! ) -> ::core::result::Result<(), E> {
//! unsafe { ::kernel::init::Init::__init(init, slot) }
//! }
//! }
//! // Implement the internal `HasPinData` trait that associates `Bar` with the pin-data struct
-//! // that we constructed beforehand.
+//! // that we constructed above.
//! unsafe impl<T> ::kernel::init::__internal::HasPinData for Bar<T> {
//! type PinData = __ThePinData<T>;
//! unsafe fn __pin_data() -> Self::PinData {
@@ -160,6 +164,8 @@
//! struct __Unpin<'__pin, T> {
//! __phantom_pin: ::core::marker::PhantomData<fn(&'__pin ()) -> &'__pin ()>,
//! __phantom: ::core::marker::PhantomData<fn(Bar<T>) -> Bar<T>>,
+//! // Our only `#[pin]` field is `t`.
+//! t: T,
//! }
//! #[doc(hidden)]
//! impl<'__pin, T>
@@ -193,7 +199,7 @@
//!
//! Here is the impl on `Bar` defining the new function:
//!
-//! ```rust
+//! ```rust,ignore
//! impl<T> Bar<T> {
//! fn new(t: T) -> impl PinInit<Self> {
//! pin_init!(Self { t, x: 0 })
@@ -203,7 +209,7 @@
//!
//! This expands to the following code:
//!
-//! ```rust
+//! ```rust,ignore
//! impl<T> Bar<T> {
//! fn new(t: T) -> impl PinInit<Self> {
//! {
@@ -232,25 +238,31 @@
//! // that will refer to this struct instead of the one defined above.
//! struct __InitOk;
//! // This is the expansion of `t,`, which is syntactic sugar for `t: t,`.
-//! unsafe { ::core::ptr::write(&raw mut (*slot).t, t) };
+//! unsafe { ::core::ptr::write(::core::addr_of_mut!((*slot).t), t) };
//! // Since initialization could fail later (not in this case, since the error
-//! // type is `Infallible`) we will need to drop this field if it fails. This
-//! // `DropGuard` will drop the field when it gets dropped and has not yet
-//! // been forgotten. We make a reference to it, so users cannot `mem::forget`
-//! // it from the initializer, since the name is the same as the field.
+//! // type is `Infallible`) we will need to drop this field if there is an
+//! // error later. This `DropGuard` will drop the field when it gets dropped
+//! // and has not yet been forgotten. We make a reference to it, so users
+//! // cannot `mem::forget` it from the initializer, since the name is the same
+//! // as the field (including hygiene).
//! let t = &unsafe {
-//! ::kernel::init::__internal::DropGuard::new(&raw mut (*slot).t)
+//! ::kernel::init::__internal::DropGuard::new(
+//! ::core::addr_of_mut!((*slot).t),
+//! )
//! };
//! // Expansion of `x: 0,`:
//! // Since this can be an arbitrary expression we cannot place it inside of
//! // the `unsafe` block, so we bind it here.
//! let x = 0;
-//! unsafe { ::core::ptr::write(&raw mut (*slot).x, x) };
+//! unsafe { ::core::ptr::write(::core::addr_of_mut!((*slot).x), x) };
+//! // We again create a `DropGuard`.
//! let x = &unsafe {
-//! ::kernel::init::__internal::DropGuard::new(&raw mut (*slot).x)
+//! ::kernel::init::__internal::DropGuard::new(
+//! ::core::addr_of_mut!((*slot).x),
+//! )
//! };
//!
-//! // Here we use the type checker to ensuer that every field has been
+//! // Here we use the type checker to ensure that every field has been
//! // initialized exactly once, since this is `if false` it will never get
//! // executed, but still type-checked.
//! // Additionally we abuse `slot` to automatically infer the correct type for
@@ -272,7 +284,7 @@
//! };
//! }
//! // Since initialization has successfully completed, we can now forget the
-//! // guards.
+//! // guards. This is not `mem::forget`, since we only have `&DropGuard`.
//! unsafe { ::kernel::init::__internal::DropGuard::forget(t) };
//! unsafe { ::kernel::init::__internal::DropGuard::forget(x) };
//! }
@@ -280,7 +292,7 @@
//! // `__InitOk` that we need to return.
//! Ok(__InitOk)
//! });
-//! // Change the return type of the closure.
+//! // Change the return type from `__InitOk` to `()`.
//! let init = move |slot| -> ::core::result::Result<(), ::core::convert::Infallible> {
//! init(slot).map(|__InitOk| ())
//! };
@@ -299,7 +311,7 @@
//! Since we already took a look at `#[pin_data]` on `Bar`, this section will only explain the
//! differences/new things in the expansion of the `Foo` definition:
//!
-//! ```rust
+//! ```rust,ignore
//! #[pin_data(PinnedDrop)]
//! struct Foo {
//! a: usize,
@@ -310,7 +322,7 @@
//!
//! This expands to the following code:
//!
-//! ```rust
+//! ```rust,ignore
//! struct Foo {
//! a: usize,
//! b: Bar<u32>,
@@ -330,8 +342,6 @@
//! unsafe fn b<E>(
//! self,
//! slot: *mut Bar<u32>,
-//! // Note that this is `PinInit` instead of `Init`, this is because `b` is
-//! // structurally pinned, as marked by the `#[pin]` attribute.
//! init: impl ::kernel::init::PinInit<Bar<u32>, E>,
//! ) -> ::core::result::Result<(), E> {
//! unsafe { ::kernel::init::PinInit::__pinned_init(init, slot) }
@@ -359,14 +369,13 @@
//! struct __Unpin<'__pin> {
//! __phantom_pin: ::core::marker::PhantomData<fn(&'__pin ()) -> &'__pin ()>,
//! __phantom: ::core::marker::PhantomData<fn(Foo) -> Foo>,
-//! // Since this field is `#[pin]`, it is listed here.
//! b: Bar<u32>,
//! }
//! #[doc(hidden)]
//! impl<'__pin> ::core::marker::Unpin for Foo where __Unpin<'__pin>: ::core::marker::Unpin {}
//! // Since we specified `PinnedDrop` as the argument to `#[pin_data]`, we expect `Foo` to
//! // implement `PinnedDrop`. Thus we do not need to prevent `Drop` implementations like
-//! // before, instead we implement it here and delegate to `PinnedDrop`.
+//! // before, instead we implement `Drop` here and delegate to `PinnedDrop`.
//! impl ::core::ops::Drop for Foo {
//! fn drop(&mut self) {
//! // Since we are getting dropped, no one else has a reference to `self` and thus we
@@ -388,7 +397,7 @@
//!
//! Here is the `PinnedDrop` impl for `Foo`:
//!
-//! ```rust
+//! ```rust,ignore
//! #[pinned_drop]
//! impl PinnedDrop for Foo {
//! fn drop(self: Pin<&mut Self>) {
@@ -399,7 +408,7 @@
//!
//! This expands to the following code:
//!
-//! ```rust
+//! ```rust,ignore
//! // `unsafe`, full path and the token parameter are added, everything else stays the same.
//! unsafe impl ::kernel::init::PinnedDrop for Foo {
//! fn drop(self: Pin<&mut Self>, _: ::kernel::init::__internal::OnlyCallFromDrop) {
@@ -410,10 +419,10 @@
//!
//! ## `pin_init!` on `Foo`
//!
-//! Since we already took a look at `pin_init!` on `Bar`, this section will only explain the
-//! differences/new things in the expansion of `pin_init!` on `Foo`:
+//! Since we already took a look at `pin_init!` on `Bar`, this section will only show the expansion
+//! of `pin_init!` on `Foo`:
//!
-//! ```rust
+//! ```rust,ignore
//! let a = 42;
//! let initializer = pin_init!(Foo {
//! a,
@@ -423,7 +432,7 @@
//!
//! This expands to the following code:
//!
-//! ```rust
+//! ```rust,ignore
//! let a = 42;
//! let initializer = {
//! struct __InitOk;
@@ -438,13 +447,15 @@
//! >(data, move |slot| {
//! {
//! struct __InitOk;
-//! unsafe { ::core::ptr::write(&raw mut (*slot).a, a) };
-//! let a = &unsafe { ::kernel::init::__internal::DropGuard::new(&raw mut (*slot).a) };
+//! unsafe { ::core::ptr::write(::core::addr_of_mut!((*slot).a), a) };
+//! let a = &unsafe {
+//! ::kernel::init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).a))
+//! };
//! let b = Bar::new(36);
-//! // Here we use `data` to access the correct field and require that `b` is of type
-//! // `PinInit<Bar<u32>, Infallible>`.
-//! unsafe { data.b(&raw mut (*slot).b, b)? };
-//! let b = &unsafe { ::kernel::init::__internal::DropGuard::new(&raw mut (*slot).b) };
+//! unsafe { data.b(::core::addr_of_mut!((*slot).b), b)? };
+//! let b = &unsafe {
+//! ::kernel::init::__internal::DropGuard::new(::core::addr_of_mut!((*slot).b))
+//! };
//!
//! #[allow(unreachable_code, clippy::diverging_sub_expression)]
//! if false {
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index 676995d4e460..85b261209977 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -14,12 +14,8 @@
#![no_std]
#![feature(allocator_api)]
#![feature(coerce_unsized)]
-#![feature(core_ffi_c)]
#![feature(dispatch_from_dyn)]
-#![feature(explicit_generic_args_with_impl_trait)]
-#![feature(generic_associated_types)]
#![feature(new_uninit)]
-#![feature(pin_macro)]
#![feature(receiver_trait)]
#![feature(unsize)]
diff --git a/rust/kernel/std_vendor.rs b/rust/kernel/std_vendor.rs
index b3e68b24a8c6..388d6a5147a2 100644
--- a/rust/kernel/std_vendor.rs
+++ b/rust/kernel/std_vendor.rs
@@ -137,6 +137,8 @@
/// [`std::dbg`]: https://doc.rust-lang.org/std/macro.dbg.html
/// [`eprintln`]: https://doc.rust-lang.org/std/macro.eprintln.html
/// [`printk`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html
+/// [`pr_info`]: crate::pr_info!
+/// [`pr_debug`]: crate::pr_debug!
#[macro_export]
macro_rules! dbg {
// NOTE: We cannot use `concat!` to make a static string as a format argument
diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs
index cd3d2a6cf1fc..c9dd3bf59e34 100644
--- a/rust/kernel/str.rs
+++ b/rust/kernel/str.rs
@@ -2,6 +2,7 @@
//! String representations.
+use alloc::alloc::AllocError;
use alloc::vec::Vec;
use core::fmt::{self, Write};
use core::ops::{self, Deref, Index};
@@ -199,6 +200,12 @@ impl CStr {
pub unsafe fn as_str_unchecked(&self) -> &str {
unsafe { core::str::from_utf8_unchecked(self.as_bytes()) }
}
+
+ /// Convert this [`CStr`] into a [`CString`] by allocating memory and
+ /// copying over the string data.
+ pub fn to_cstring(&self) -> Result<CString, AllocError> {
+ CString::try_from(self)
+ }
}
impl fmt::Display for CStr {
@@ -584,6 +591,21 @@ impl Deref for CString {
}
}
+impl<'a> TryFrom<&'a CStr> for CString {
+ type Error = AllocError;
+
+ fn try_from(cstr: &'a CStr) -> Result<CString, AllocError> {
+ let mut buf = Vec::new();
+
+ buf.try_extend_from_slice(cstr.as_bytes_with_nul())
+ .map_err(|_| AllocError)?;
+
+ // INVARIANT: The `CStr` and `CString` types have the same invariants for
+ // the string data, and we copied it over without changes.
+ Ok(CString { buf })
+ }
+}
+
/// A convenience alias for [`core::format_args`].
#[macro_export]
macro_rules! fmt {
diff --git a/rust/kernel/sync/arc.rs b/rust/kernel/sync/arc.rs
index e6d206242465..a89843cacaad 100644
--- a/rust/kernel/sync/arc.rs
+++ b/rust/kernel/sync/arc.rs
@@ -146,13 +146,15 @@ impl<T: ?Sized + Unsize<U>, U: ?Sized> core::ops::DispatchFromDyn<Arc<U>> for Ar
// SAFETY: It is safe to send `Arc<T>` to another thread when the underlying `T` is `Sync` because
// it effectively means sharing `&T` (which is safe because `T` is `Sync`); additionally, it needs
-// `T` to be `Send` because any thread that has an `Arc<T>` may ultimately access `T` directly, for
-// example, when the reference count reaches zero and `T` is dropped.
+// `T` to be `Send` because any thread that has an `Arc<T>` may ultimately access `T` using a
+// mutable reference when the reference count reaches zero and `T` is dropped.
unsafe impl<T: ?Sized + Sync + Send> Send for Arc<T> {}
-// SAFETY: It is safe to send `&Arc<T>` to another thread when the underlying `T` is `Sync` for the
-// same reason as above. `T` needs to be `Send` as well because a thread can clone an `&Arc<T>`
-// into an `Arc<T>`, which may lead to `T` being accessed by the same reasoning as above.
+// SAFETY: It is safe to send `&Arc<T>` to another thread when the underlying `T` is `Sync`
+// because it effectively means sharing `&T` (which is safe because `T` is `Sync`); additionally,
+// it needs `T` to be `Send` because any thread that has a `&Arc<T>` may clone it and get an
+// `Arc<T>` on that thread, so the thread may ultimately access `T` using a mutable reference when
+// the reference count reaches zero and `T` is dropped.
unsafe impl<T: ?Sized + Sync + Send> Sync for Arc<T> {}
impl<T> Arc<T> {
@@ -185,7 +187,7 @@ impl<T> Arc<T> {
/// Use the given initializer to in-place initialize a `T`.
///
- /// This is equivalent to [`pin_init`], since an [`Arc`] is always pinned.
+ /// This is equivalent to [`Arc<T>::pin_init`], since an [`Arc`] is always pinned.
#[inline]
pub fn init<E>(init: impl Init<T, E>) -> error::Result<Self>
where
@@ -221,6 +223,11 @@ impl<T: ?Sized> Arc<T> {
// reference can be created.
unsafe { ArcBorrow::new(self.ptr) }
}
+
+ /// Compare whether two [`Arc`] pointers reference the same underlying object.
+ pub fn ptr_eq(this: &Self, other: &Self) -> bool {
+ core::ptr::eq(this.ptr.as_ptr(), other.ptr.as_ptr())
+ }
}
impl<T: 'static> ForeignOwnable for Arc<T> {
@@ -259,6 +266,12 @@ impl<T: ?Sized> Deref for Arc<T> {
}
}
+impl<T: ?Sized> AsRef<T> for Arc<T> {
+ fn as_ref(&self) -> &T {
+ self.deref()
+ }
+}
+
impl<T: ?Sized> Clone for Arc<T> {
fn clone(&self) -> Self {
// INVARIANT: C `refcount_inc` saturates the refcount, so it cannot overflow to zero.
diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs
index 526d29a0ae27..7eda15e5f1b3 100644
--- a/rust/kernel/task.rs
+++ b/rust/kernel/task.rs
@@ -64,8 +64,14 @@ macro_rules! current {
#[repr(transparent)]
pub struct Task(pub(crate) Opaque<bindings::task_struct>);
-// SAFETY: It's OK to access `Task` through references from other threads because we're either
-// accessing properties that don't change (e.g., `pid`, `group_leader`) or that are properly
+// SAFETY: By design, the only way to access a `Task` is via the `current` function or via an
+// `ARef<Task>` obtained through the `AlwaysRefCounted` impl. This means that the only situation in
+// which a `Task` can be accessed mutably is when the refcount drops to zero and the destructor
+// runs. It is safe for that to happen on any thread, so it is ok for this type to be `Send`.
+unsafe impl Send for Task {}
+
+// SAFETY: It's OK to access `Task` through shared references from other threads because we're
+// either accessing properties that don't change (e.g., `pid`, `group_leader`) or that are properly
// synchronised by C code (e.g., `signal_pending`).
unsafe impl Sync for Task {}
diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs
index 29db59d6119a..1e5380b16ed5 100644
--- a/rust/kernel/types.rs
+++ b/rust/kernel/types.rs
@@ -321,6 +321,19 @@ pub struct ARef<T: AlwaysRefCounted> {
_p: PhantomData<T>,
}
+// SAFETY: It is safe to send `ARef<T>` to another thread when the underlying `T` is `Sync` because
+// it effectively means sharing `&T` (which is safe because `T` is `Sync`); additionally, it needs
+// `T` to be `Send` because any thread that has an `ARef<T>` may ultimately access `T` using a
+// mutable reference, for example, when the reference count reaches zero and `T` is dropped.
+unsafe impl<T: AlwaysRefCounted + Sync + Send> Send for ARef<T> {}
+
+// SAFETY: It is safe to send `&ARef<T>` to another thread when the underlying `T` is `Sync`
+// because it effectively means sharing `&T` (which is safe because `T` is `Sync`); additionally,
+// it needs `T` to be `Send` because any thread that has a `&ARef<T>` may clone it and get an
+// `ARef<T>` on that thread, so the thread may ultimately access `T` using a mutable reference, for
+// example, when the reference count reaches zero and `T` is dropped.
+unsafe impl<T: AlwaysRefCounted + Sync + Send> Sync for ARef<T> {}
+
impl<T: AlwaysRefCounted> ARef<T> {
/// Creates a new instance of [`ARef`].
///
diff --git a/rust/macros/helpers.rs b/rust/macros/helpers.rs
index b2bdd4d8c958..afb0f2e3a36a 100644
--- a/rust/macros/helpers.rs
+++ b/rust/macros/helpers.rs
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
-use proc_macro::{token_stream, Group, TokenTree};
+use proc_macro::{token_stream, Group, Punct, Spacing, TokenStream, TokenTree};
pub(crate) fn try_ident(it: &mut token_stream::IntoIter) -> Option<String> {
if let Some(TokenTree::Ident(ident)) = it.next() {
@@ -69,3 +69,87 @@ pub(crate) fn expect_end(it: &mut token_stream::IntoIter) {
panic!("Expected end");
}
}
+
+pub(crate) struct Generics {
+ pub(crate) impl_generics: Vec<TokenTree>,
+ pub(crate) ty_generics: Vec<TokenTree>,
+}
+
+/// Parses the given `TokenStream` into `Generics` and the rest.
+///
+/// The generics are not present in the rest, but a where clause might remain.
+pub(crate) fn parse_generics(input: TokenStream) -> (Generics, Vec<TokenTree>) {
+ // `impl_generics`, the declared generics with their bounds.
+ let mut impl_generics = vec![];
+ // Only the names of the generics, without any bounds.
+ let mut ty_generics = vec![];
+ // Tokens not related to the generics e.g. the `where` token and definition.
+ let mut rest = vec![];
+ // The current level of `<`.
+ let mut nesting = 0;
+ let mut toks = input.into_iter();
+ // If we are at the beginning of a generic parameter.
+ let mut at_start = true;
+ for tt in &mut toks {
+ match tt.clone() {
+ TokenTree::Punct(p) if p.as_char() == '<' => {
+ if nesting >= 1 {
+ // This is inside of the generics and part of some bound.
+ impl_generics.push(tt);
+ }
+ nesting += 1;
+ }
+ TokenTree::Punct(p) if p.as_char() == '>' => {
+ // This is a parsing error, so we just end it here.
+ if nesting == 0 {
+ break;
+ } else {
+ nesting -= 1;
+ if nesting >= 1 {
+ // We are still inside of the generics and part of some bound.
+ impl_generics.push(tt);
+ }
+ if nesting == 0 {
+ break;
+ }
+ }
+ }
+ tt => {
+ if nesting == 1 {
+ // Here depending on the token, it might be a generic variable name.
+ match &tt {
+ // Ignore const.
+ TokenTree::Ident(i) if i.to_string() == "const" => {}
+ TokenTree::Ident(_) if at_start => {
+ ty_generics.push(tt.clone());
+ // We also already push the `,` token, this makes it easier to append
+ // generics.
+ ty_generics.push(TokenTree::Punct(Punct::new(',', Spacing::Alone)));
+ at_start = false;
+ }
+ TokenTree::Punct(p) if p.as_char() == ',' => at_start = true,
+ // Lifetimes begin with `'`.
+ TokenTree::Punct(p) if p.as_char() == '\'' && at_start => {
+ ty_generics.push(tt.clone());
+ }
+ _ => {}
+ }
+ }
+ if nesting >= 1 {
+ impl_generics.push(tt);
+ } else if nesting == 0 {
+ // If we haven't entered the generics yet, we still want to keep these tokens.
+ rest.push(tt);
+ }
+ }
+ }
+ }
+ rest.extend(toks);
+ (
+ Generics {
+ impl_generics,
+ ty_generics,
+ },
+ rest,
+ )
+}
diff --git a/rust/macros/pin_data.rs b/rust/macros/pin_data.rs
index 954149d77181..6d58cfda9872 100644
--- a/rust/macros/pin_data.rs
+++ b/rust/macros/pin_data.rs
@@ -1,79 +1,127 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
-use proc_macro::{Punct, Spacing, TokenStream, TokenTree};
+use crate::helpers::{parse_generics, Generics};
+use proc_macro::{Group, Punct, Spacing, TokenStream, TokenTree};
pub(crate) fn pin_data(args: TokenStream, input: TokenStream) -> TokenStream {
// This proc-macro only does some pre-parsing and then delegates the actual parsing to
// `kernel::__pin_data!`.
- //
- // In here we only collect the generics, since parsing them in declarative macros is very
- // elaborate. We also do not need to analyse their structure, we only need to collect them.
- // `impl_generics`, the declared generics with their bounds.
- let mut impl_generics = vec![];
- // Only the names of the generics, without any bounds.
- let mut ty_generics = vec![];
- // Tokens not related to the generics e.g. the `impl` token.
- let mut rest = vec![];
- // The current level of `<`.
- let mut nesting = 0;
- let mut toks = input.into_iter();
- // If we are at the beginning of a generic parameter.
- let mut at_start = true;
- for tt in &mut toks {
- match tt.clone() {
- TokenTree::Punct(p) if p.as_char() == '<' => {
- if nesting >= 1 {
- impl_generics.push(tt);
- }
- nesting += 1;
- }
- TokenTree::Punct(p) if p.as_char() == '>' => {
- if nesting == 0 {
- break;
- } else {
- nesting -= 1;
- if nesting >= 1 {
- impl_generics.push(tt);
- }
- if nesting == 0 {
- break;
- }
+ let (
+ Generics {
+ impl_generics,
+ ty_generics,
+ },
+ rest,
+ ) = parse_generics(input);
+ // The struct definition might contain the `Self` type. Since `__pin_data!` will define a new
+ // type with the same generics and bounds, this poses a problem, since `Self` will refer to the
+ // new type as opposed to this struct definition. Therefore we have to replace `Self` with the
+ // concrete name.
+
+ // Errors that occur when replacing `Self` with `struct_name`.
+ let mut errs = TokenStream::new();
+ // The name of the struct with ty_generics.
+ let struct_name = rest
+ .iter()
+ .skip_while(|tt| !matches!(tt, TokenTree::Ident(i) if i.to_string() == "struct"))
+ .nth(1)
+ .and_then(|tt| match tt {
+ TokenTree::Ident(_) => {
+ let tt = tt.clone();
+ let mut res = vec![tt];
+ if !ty_generics.is_empty() {
+ // We add this, so it is maximally compatible with e.g. `Self::CONST` which
+ // will be replaced by `StructName::<$generics>::CONST`.
+ res.push(TokenTree::Punct(Punct::new(':', Spacing::Joint)));
+ res.push(TokenTree::Punct(Punct::new(':', Spacing::Alone)));
+ res.push(TokenTree::Punct(Punct::new('<', Spacing::Alone)));
+ res.extend(ty_generics.iter().cloned());
+ res.push(TokenTree::Punct(Punct::new('>', Spacing::Alone)));
}
+ Some(res)
}
- tt => {
- if nesting == 1 {
- match &tt {
- TokenTree::Ident(i) if i.to_string() == "const" => {}
- TokenTree::Ident(_) if at_start => {
- ty_generics.push(tt.clone());
- ty_generics.push(TokenTree::Punct(Punct::new(',', Spacing::Alone)));
- at_start = false;
- }
- TokenTree::Punct(p) if p.as_char() == ',' => at_start = true,
- TokenTree::Punct(p) if p.as_char() == '\'' && at_start => {
- ty_generics.push(tt.clone());
- }
- _ => {}
- }
- }
- if nesting >= 1 {
- impl_generics.push(tt);
- } else if nesting == 0 {
- rest.push(tt);
- }
+ _ => None,
+ })
+ .unwrap_or_else(|| {
+ // If we did not find the name of the struct then we will use `Self` as the replacement
+ // and add a compile error to ensure it does not compile.
+ errs.extend(
+ "::core::compile_error!(\"Could not locate type name.\");"
+ .parse::<TokenStream>()
+ .unwrap(),
+ );
+ "Self".parse::<TokenStream>().unwrap().into_iter().collect()
+ });
+ let impl_generics = impl_generics
+ .into_iter()
+ .flat_map(|tt| replace_self_and_deny_type_defs(&struct_name, tt, &mut errs))
+ .collect::<Vec<_>>();
+ let mut rest = rest
+ .into_iter()
+ .flat_map(|tt| {
+ // We ignore top level `struct` tokens, since they would emit a compile error.
+ if matches!(&tt, TokenTree::Ident(i) if i.to_string() == "struct") {
+ vec![tt]
+ } else {
+ replace_self_and_deny_type_defs(&struct_name, tt, &mut errs)
}
- }
- }
- rest.extend(toks);
+ })
+ .collect::<Vec<_>>();
// This should be the body of the struct `{...}`.
let last = rest.pop();
- quote!(::kernel::__pin_data! {
+ let mut quoted = quote!(::kernel::__pin_data! {
parse_input:
@args(#args),
@sig(#(#rest)*),
@impl_generics(#(#impl_generics)*),
@ty_generics(#(#ty_generics)*),
@body(#last),
- })
+ });
+ quoted.extend(errs);
+ quoted
+}
+
+/// Replaces `Self` with `struct_name` and errors on `enum`, `trait`, `struct` `union` and `impl`
+/// keywords.
+///
+/// The error is appended to `errs` to allow normal parsing to continue.
+fn replace_self_and_deny_type_defs(
+ struct_name: &Vec<TokenTree>,
+ tt: TokenTree,
+ errs: &mut TokenStream,
+) -> Vec<TokenTree> {
+ match tt {
+ TokenTree::Ident(ref i)
+ if i.to_string() == "enum"
+ || i.to_string() == "trait"
+ || i.to_string() == "struct"
+ || i.to_string() == "union"
+ || i.to_string() == "impl" =>
+ {
+ errs.extend(
+ format!(
+ "::core::compile_error!(\"Cannot use `{i}` inside of struct definition with \
+ `#[pin_data]`.\");"
+ )
+ .parse::<TokenStream>()
+ .unwrap()
+ .into_iter()
+ .map(|mut tok| {
+ tok.set_span(tt.span());
+ tok
+ }),
+ );
+ vec![tt]
+ }
+ TokenTree::Ident(i) if i.to_string() == "Self" => struct_name.clone(),
+ TokenTree::Literal(_) | TokenTree::Punct(_) | TokenTree::Ident(_) => vec![tt],
+ TokenTree::Group(g) => vec![TokenTree::Group(Group::new(
+ g.delimiter(),
+ g.stream()
+ .into_iter()
+ .flat_map(|tt| replace_self_and_deny_type_defs(struct_name, tt, errs))
+ .collect(),
+ ))],
+ }
}
diff --git a/rust/macros/quote.rs b/rust/macros/quote.rs
index c8e08b3c1e4c..dddbb4e6f4cb 100644
--- a/rust/macros/quote.rs
+++ b/rust/macros/quote.rs
@@ -39,12 +39,14 @@ impl ToTokens for TokenStream {
/// [`quote_spanned!`](https://docs.rs/quote/latest/quote/macro.quote_spanned.html) macro from the
/// `quote` crate but provides only just enough functionality needed by the current `macros` crate.
macro_rules! quote_spanned {
- ($span:expr => $($tt:tt)*) => {
- #[allow(clippy::vec_init_then_push)]
- {
- let mut tokens = ::std::vec::Vec::new();
- let span = $span;
- quote_spanned!(@proc tokens span $($tt)*);
+ ($span:expr => $($tt:tt)*) => {{
+ let mut tokens;
+ #[allow(clippy::vec_init_then_push)]
+ {
+ tokens = ::std::vec::Vec::new();
+ let span = $span;
+ quote_spanned!(@proc tokens span $($tt)*);
+ }
::proc_macro::TokenStream::from_iter(tokens)
}};
(@proc $v:ident $span:ident) => {};
diff --git a/rust/uapi/lib.rs b/rust/uapi/lib.rs
index 29f69f3a52de..0caad902ba40 100644
--- a/rust/uapi/lib.rs
+++ b/rust/uapi/lib.rs
@@ -8,7 +8,6 @@
//! userspace APIs.
#![no_std]
-#![feature(core_ffi_c)]
// See <https://github.com/rust-lang/rust-bindgen/issues/1651>.
#![cfg_attr(test, allow(deref_nullptr))]
#![cfg_attr(test, allow(unaligned_references))]
diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c
index 6448b7826107..bf66277115e2 100644
--- a/samples/bpf/hbm.c
+++ b/samples/bpf/hbm.c
@@ -498,7 +498,6 @@ int main(int argc, char **argv)
"Option -%c requires an argument.\n\n",
optopt);
case 'h':
- __fallthrough;
default:
Usage();
return 0;
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 9f94fc83f086..78175231c969 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -277,7 +277,7 @@ $(obj)/%.lst: $(src)/%.c FORCE
# Compile Rust sources (.rs)
# ---------------------------------------------------------------------------
-rust_allowed_features := core_ffi_c,explicit_generic_args_with_impl_trait,new_uninit,pin_macro
+rust_allowed_features := new_uninit
rust_common_cmd = \
RUST_MODFILE=$(modfile) $(RUSTC_OR_CLIPPY) $(rust_flags) \
diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in
index 471300ba176c..50a92c4e9984 100644
--- a/scripts/gdb/linux/constants.py.in
+++ b/scripts/gdb/linux/constants.py.in
@@ -48,12 +48,12 @@ if IS_BUILTIN(CONFIG_COMMON_CLK):
LX_GDBPARSED(CLK_GET_RATE_NOCACHE)
/* linux/fs.h */
-LX_VALUE(SB_RDONLY)
-LX_VALUE(SB_SYNCHRONOUS)
-LX_VALUE(SB_MANDLOCK)
-LX_VALUE(SB_DIRSYNC)
-LX_VALUE(SB_NOATIME)
-LX_VALUE(SB_NODIRATIME)
+LX_GDBPARSED(SB_RDONLY)
+LX_GDBPARSED(SB_SYNCHRONOUS)
+LX_GDBPARSED(SB_MANDLOCK)
+LX_GDBPARSED(SB_DIRSYNC)
+LX_GDBPARSED(SB_NOATIME)
+LX_GDBPARSED(SB_NODIRATIME)
/* linux/htimer.h */
LX_GDBPARSED(hrtimer_resolution)
diff --git a/scripts/gfp-translate b/scripts/gfp-translate
index b2ce416d944b..6c9aed17cf56 100755
--- a/scripts/gfp-translate
+++ b/scripts/gfp-translate
@@ -63,11 +63,11 @@ fi
# Extract GFP flags from the kernel source
TMPFILE=`mktemp -t gfptranslate-XXXXXX` || exit 1
-grep -q ___GFP $SOURCE/include/linux/gfp.h
+grep -q ___GFP $SOURCE/include/linux/gfp_types.h
if [ $? -eq 0 ]; then
- grep "^#define ___GFP" $SOURCE/include/linux/gfp.h | sed -e 's/u$//' | grep -v GFP_BITS > $TMPFILE
+ grep "^#define ___GFP" $SOURCE/include/linux/gfp_types.h | sed -e 's/u$//' | grep -v GFP_BITS > $TMPFILE
else
- grep "^#define __GFP" $SOURCE/include/linux/gfp.h | sed -e 's/(__force gfp_t)//' | sed -e 's/u)/)/' | grep -v GFP_BITS | sed -e 's/)\//) \//' > $TMPFILE
+ grep "^#define __GFP" $SOURCE/include/linux/gfp_types.h | sed -e 's/(__force gfp_t)//' | sed -e 's/u)/)/' | grep -v GFP_BITS | sed -e 's/)\//) \//' > $TMPFILE
fi
# Parse the flags
diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh
index 20d483ec6f5f..131be76d2130 100755
--- a/scripts/min-tool-version.sh
+++ b/scripts/min-tool-version.sh
@@ -27,7 +27,7 @@ llvm)
fi
;;
rustc)
- echo 1.62.0
+ echo 1.68.2
;;
bindgen)
echo 0.56.0
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index d4531d09984d..c12150f96b88 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1979,6 +1979,11 @@ static void add_header(struct buffer *b, struct module *mod)
buf_printf(b, "#include <linux/vermagic.h>\n");
buf_printf(b, "#include <linux/compiler.h>\n");
buf_printf(b, "\n");
+ buf_printf(b, "#ifdef CONFIG_UNWINDER_ORC\n");
+ buf_printf(b, "#include <asm/orc_header.h>\n");
+ buf_printf(b, "ORC_HEADER;\n");
+ buf_printf(b, "#endif\n");
+ buf_printf(b, "\n");
buf_printf(b, "BUILD_SALT;\n");
buf_printf(b, "BUILD_LTO_INFO;\n");
buf_printf(b, "\n");
diff --git a/scripts/orc_hash.sh b/scripts/orc_hash.sh
new file mode 100644
index 000000000000..466611aa0053
--- /dev/null
+++ b/scripts/orc_hash.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+
+set -e
+
+printf '%s' '#define ORC_HASH '
+
+awk '
+/^#define ORC_(REG|TYPE)_/ { print }
+/^struct orc_entry {$/ { p=1 }
+p { print }
+/^}/ { p=0 }' |
+ sha1sum |
+ cut -d " " -f 1 |
+ sed 's/\([0-9a-f]\{2\}\)/0x\1,/g'
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index d3662f4acadc..ce541b0ee1d3 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -202,19 +202,19 @@ int ima_get_action(struct mnt_idmap *idmap, struct inode *inode,
allowed_algos);
}
-static int ima_get_verity_digest(struct integrity_iint_cache *iint,
- struct ima_max_digest_data *hash)
+static bool ima_get_verity_digest(struct integrity_iint_cache *iint,
+ struct ima_max_digest_data *hash)
{
- enum hash_algo verity_alg;
- int ret;
+ enum hash_algo alg;
+ int digest_len;
/*
* On failure, 'measure' policy rules will result in a file data
* hash containing 0's.
*/
- ret = fsverity_get_digest(iint->inode, hash->digest, &verity_alg);
- if (ret)
- return ret;
+ digest_len = fsverity_get_digest(iint->inode, hash->digest, NULL, &alg);
+ if (digest_len == 0)
+ return false;
/*
* Unlike in the case of actually calculating the file hash, in
@@ -223,9 +223,9 @@ static int ima_get_verity_digest(struct integrity_iint_cache *iint,
* mismatch between the verity algorithm and the xattr signature
* algorithm, if one exists, will be detected later.
*/
- hash->hdr.algo = verity_alg;
- hash->hdr.length = hash_digest_size[verity_alg];
- return 0;
+ hash->hdr.algo = alg;
+ hash->hdr.length = digest_len;
+ return true;
}
/*
@@ -276,16 +276,9 @@ int ima_collect_measurement(struct integrity_iint_cache *iint,
memset(&hash.digest, 0, sizeof(hash.digest));
if (iint->flags & IMA_VERITY_REQUIRED) {
- result = ima_get_verity_digest(iint, &hash);
- switch (result) {
- case 0:
- break;
- case -ENODATA:
+ if (!ima_get_verity_digest(iint, &hash)) {
audit_cause = "no-verity-digest";
- break;
- default:
- audit_cause = "invalid-verity-digest";
- break;
+ result = -ENODATA;
}
} else if (buf) {
result = ima_calc_buffer_hash(buf, size, &hash.hdr);
diff --git a/security/selinux/Makefile b/security/selinux/Makefile
index 0aecf9334ec3..8b21520bd4b9 100644
--- a/security/selinux/Makefile
+++ b/security/selinux/Makefile
@@ -26,5 +26,9 @@ quiet_cmd_flask = GEN $(obj)/flask.h $(obj)/av_permissions.h
cmd_flask = $< $(obj)/flask.h $(obj)/av_permissions.h
targets += flask.h av_permissions.h
-$(obj)/flask.h $(obj)/av_permissions.h &: scripts/selinux/genheaders/genheaders FORCE
+# once make >= 4.3 is required, we can use grouped targets in the rule below,
+# which basically involves adding both headers and a '&' before the colon, see
+# the example below:
+# $(obj)/flask.h $(obj)/av_permissions.h &: scripts/selinux/...
+$(obj)/flask.h: scripts/selinux/genheaders/genheaders FORCE
$(call if_changed,flask)
diff --git a/sound/core/oss/pcm_plugin.h b/sound/core/oss/pcm_plugin.h
index 46e273bd4a78..50a6b50f5db4 100644
--- a/sound/core/oss/pcm_plugin.h
+++ b/sound/core/oss/pcm_plugin.h
@@ -141,6 +141,14 @@ int snd_pcm_area_copy(const struct snd_pcm_channel_area *src_channel,
void *snd_pcm_plug_buf_alloc(struct snd_pcm_substream *plug, snd_pcm_uframes_t size);
void snd_pcm_plug_buf_unlock(struct snd_pcm_substream *plug, void *ptr);
+#else
+
+static inline snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *handle, snd_pcm_uframes_t drv_size) { return drv_size; }
+static inline snd_pcm_sframes_t snd_pcm_plug_slave_size(struct snd_pcm_substream *handle, snd_pcm_uframes_t clt_size) { return clt_size; }
+static inline int snd_pcm_plug_slave_format(int format, const struct snd_mask *format_mask) { return format; }
+
+#endif
+
snd_pcm_sframes_t snd_pcm_oss_write3(struct snd_pcm_substream *substream,
const char *ptr, snd_pcm_uframes_t size,
int in_kernel);
@@ -151,14 +159,6 @@ snd_pcm_sframes_t snd_pcm_oss_writev3(struct snd_pcm_substream *substream,
snd_pcm_sframes_t snd_pcm_oss_readv3(struct snd_pcm_substream *substream,
void **bufs, snd_pcm_uframes_t frames);
-#else
-
-static inline snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *handle, snd_pcm_uframes_t drv_size) { return drv_size; }
-static inline snd_pcm_sframes_t snd_pcm_plug_slave_size(struct snd_pcm_substream *handle, snd_pcm_uframes_t clt_size) { return clt_size; }
-static inline int snd_pcm_plug_slave_format(int format, const struct snd_mask *format_mask) { return format; }
-
-#endif
-
#ifdef PLUGIN_DEBUG
#define pdprintf(fmt, args...) printk(KERN_DEBUG "plugin: " fmt, ##args)
#else
diff --git a/sound/core/seq/oss/seq_oss_midi.c b/sound/core/seq/oss/seq_oss_midi.c
index 07efb38f58ac..f2940b29595f 100644
--- a/sound/core/seq/oss/seq_oss_midi.c
+++ b/sound/core/seq/oss/seq_oss_midi.c
@@ -37,6 +37,7 @@ struct seq_oss_midi {
struct snd_midi_event *coder; /* MIDI event coder */
struct seq_oss_devinfo *devinfo; /* assigned OSSseq device */
snd_use_lock_t use_lock;
+ struct mutex open_mutex;
};
@@ -172,6 +173,7 @@ snd_seq_oss_midi_check_new_port(struct snd_seq_port_info *pinfo)
mdev->flags = pinfo->capability;
mdev->opened = 0;
snd_use_lock_init(&mdev->use_lock);
+ mutex_init(&mdev->open_mutex);
/* copy and truncate the name of synth device */
strscpy(mdev->name, pinfo->name, sizeof(mdev->name));
@@ -322,15 +324,17 @@ snd_seq_oss_midi_open(struct seq_oss_devinfo *dp, int dev, int fmode)
int perm;
struct seq_oss_midi *mdev;
struct snd_seq_port_subscribe subs;
+ int err;
mdev = get_mididev(dp, dev);
if (!mdev)
return -ENODEV;
+ mutex_lock(&mdev->open_mutex);
/* already used? */
if (mdev->opened && mdev->devinfo != dp) {
- snd_use_lock_free(&mdev->use_lock);
- return -EBUSY;
+ err = -EBUSY;
+ goto unlock;
}
perm = 0;
@@ -340,14 +344,14 @@ snd_seq_oss_midi_open(struct seq_oss_devinfo *dp, int dev, int fmode)
perm |= PERM_READ;
perm &= mdev->flags;
if (perm == 0) {
- snd_use_lock_free(&mdev->use_lock);
- return -ENXIO;
+ err = -ENXIO;
+ goto unlock;
}
/* already opened? */
if ((mdev->opened & perm) == perm) {
- snd_use_lock_free(&mdev->use_lock);
- return 0;
+ err = 0;
+ goto unlock;
}
perm &= ~mdev->opened;
@@ -372,13 +376,17 @@ snd_seq_oss_midi_open(struct seq_oss_devinfo *dp, int dev, int fmode)
}
if (! mdev->opened) {
- snd_use_lock_free(&mdev->use_lock);
- return -ENXIO;
+ err = -ENXIO;
+ goto unlock;
}
mdev->devinfo = dp;
+ err = 0;
+
+ unlock:
+ mutex_unlock(&mdev->open_mutex);
snd_use_lock_free(&mdev->use_lock);
- return 0;
+ return err;
}
/*
@@ -393,10 +401,9 @@ snd_seq_oss_midi_close(struct seq_oss_devinfo *dp, int dev)
mdev = get_mididev(dp, dev);
if (!mdev)
return -ENODEV;
- if (! mdev->opened || mdev->devinfo != dp) {
- snd_use_lock_free(&mdev->use_lock);
- return 0;
- }
+ mutex_lock(&mdev->open_mutex);
+ if (!mdev->opened || mdev->devinfo != dp)
+ goto unlock;
memset(&subs, 0, sizeof(subs));
if (mdev->opened & PERM_WRITE) {
@@ -415,6 +422,8 @@ snd_seq_oss_midi_close(struct seq_oss_devinfo *dp, int dev)
mdev->opened = 0;
mdev->devinfo = NULL;
+ unlock:
+ mutex_unlock(&mdev->open_mutex);
snd_use_lock_free(&mdev->use_lock);
return 0;
}
diff --git a/sound/firewire/digi00x/digi00x-stream.c b/sound/firewire/digi00x/digi00x-stream.c
index a15f55b0dce3..295163bb8abb 100644
--- a/sound/firewire/digi00x/digi00x-stream.c
+++ b/sound/firewire/digi00x/digi00x-stream.c
@@ -259,8 +259,10 @@ int snd_dg00x_stream_init_duplex(struct snd_dg00x *dg00x)
return err;
err = init_stream(dg00x, &dg00x->tx_stream);
- if (err < 0)
+ if (err < 0) {
destroy_stream(dg00x, &dg00x->rx_stream);
+ return err;
+ }
err = amdtp_domain_init(&dg00x->domain);
if (err < 0) {
diff --git a/sound/hda/hdac_device.c b/sound/hda/hdac_device.c
index accc9d279ce5..6c043fbd606f 100644
--- a/sound/hda/hdac_device.c
+++ b/sound/hda/hdac_device.c
@@ -611,7 +611,7 @@ EXPORT_SYMBOL_GPL(snd_hdac_power_up_pm);
int snd_hdac_keep_power_up(struct hdac_device *codec)
{
if (!atomic_inc_not_zero(&codec->in_pm)) {
- int ret = pm_runtime_get_if_in_use(&codec->dev);
+ int ret = pm_runtime_get_if_active(&codec->dev, true);
if (!ret)
return -1;
if (ret < 0)
diff --git a/sound/isa/gus/gus_pcm.c b/sound/isa/gus/gus_pcm.c
index 230f65a0e4b0..388db5fb65bd 100644
--- a/sound/isa/gus/gus_pcm.c
+++ b/sound/isa/gus/gus_pcm.c
@@ -892,10 +892,10 @@ int snd_gf1_pcm_new(struct snd_gus_card *gus, int pcm_dev, int control_index)
kctl = snd_ctl_new1(&snd_gf1_pcm_volume_control1, gus);
else
kctl = snd_ctl_new1(&snd_gf1_pcm_volume_control, gus);
+ kctl->id.index = control_index;
err = snd_ctl_add(card, kctl);
if (err < 0)
return err;
- kctl->id.index = control_index;
return 0;
}
diff --git a/sound/pci/cmipci.c b/sound/pci/cmipci.c
index 727db6d43391..6d25c12d9ef0 100644
--- a/sound/pci/cmipci.c
+++ b/sound/pci/cmipci.c
@@ -2688,20 +2688,20 @@ static int snd_cmipci_mixer_new(struct cmipci *cm, int pcm_spdif_device)
}
if (cm->can_ac3_hw) {
kctl = snd_ctl_new1(&snd_cmipci_spdif_default, cm);
+ kctl->id.device = pcm_spdif_device;
err = snd_ctl_add(card, kctl);
if (err < 0)
return err;
- kctl->id.device = pcm_spdif_device;
kctl = snd_ctl_new1(&snd_cmipci_spdif_mask, cm);
+ kctl->id.device = pcm_spdif_device;
err = snd_ctl_add(card, kctl);
if (err < 0)
return err;
- kctl->id.device = pcm_spdif_device;
kctl = snd_ctl_new1(&snd_cmipci_spdif_stream, cm);
+ kctl->id.device = pcm_spdif_device;
err = snd_ctl_add(card, kctl);
if (err < 0)
return err;
- kctl->id.device = pcm_spdif_device;
}
if (cm->chip_version <= 37) {
sw = snd_cmipci_old_mixer_switches;
diff --git a/sound/pci/cs46xx/cs46xx_lib.c b/sound/pci/cs46xx/cs46xx_lib.c
index 62f45847b351..7d882b33d45e 100644
--- a/sound/pci/cs46xx/cs46xx_lib.c
+++ b/sound/pci/cs46xx/cs46xx_lib.c
@@ -531,7 +531,7 @@ static int load_firmware(struct snd_cs46xx *chip)
return err;
}
-int snd_cs46xx_download_image(struct snd_cs46xx *chip)
+static __maybe_unused int snd_cs46xx_download_image(struct snd_cs46xx *chip)
{
int idx, err;
unsigned int offset = 0;
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 9f79c0ac2bda..bd19f92aeeec 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -2458,10 +2458,14 @@ int snd_hda_create_dig_out_ctls(struct hda_codec *codec,
type == HDA_PCM_TYPE_HDMI) {
/* suppose a single SPDIF device */
for (dig_mix = dig_mixes; dig_mix->name; dig_mix++) {
+ struct snd_ctl_elem_id id;
+
kctl = find_mixer_ctl(codec, dig_mix->name, 0, 0);
if (!kctl)
break;
- kctl->id.index = spdif_index;
+ id = kctl->id;
+ id.index = spdif_index;
+ snd_ctl_rename_id(codec->card, &kctl->id, &id);
}
bus->primary_dig_out_type = HDA_PCM_TYPE_HDMI;
}
diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index fc114e522480..dbf7aa88e0e3 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -1155,8 +1155,8 @@ static bool path_has_mixer(struct hda_codec *codec, int path_idx, int ctl_type)
return path && path->ctls[ctl_type];
}
-static const char * const channel_name[4] = {
- "Front", "Surround", "CLFE", "Side"
+static const char * const channel_name[] = {
+ "Front", "Surround", "CLFE", "Side", "Back",
};
/* give some appropriate ctl name prefix for the given line out channel */
@@ -1182,7 +1182,7 @@ static const char *get_line_out_pfx(struct hda_codec *codec, int ch,
/* multi-io channels */
if (ch >= cfg->line_outs)
- return channel_name[ch];
+ goto fixed_name;
switch (cfg->line_out_type) {
case AUTO_PIN_SPEAKER_OUT:
@@ -1234,6 +1234,7 @@ static const char *get_line_out_pfx(struct hda_codec *codec, int ch,
if (cfg->line_outs == 1 && !spec->multi_ios)
return "Line Out";
+ fixed_name:
if (ch >= ARRAY_SIZE(channel_name)) {
snd_BUG();
return "PCM";
diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c
index 099722ebaed8..748a3c40966e 100644
--- a/sound/pci/hda/patch_ca0132.c
+++ b/sound/pci/hda/patch_ca0132.c
@@ -1306,6 +1306,7 @@ static const struct snd_pci_quirk ca0132_quirks[] = {
SND_PCI_QUIRK(0x1458, 0xA026, "Gigabyte G1.Sniper Z97", QUIRK_R3DI),
SND_PCI_QUIRK(0x1458, 0xA036, "Gigabyte GA-Z170X-Gaming 7", QUIRK_R3DI),
SND_PCI_QUIRK(0x3842, 0x1038, "EVGA X99 Classified", QUIRK_R3DI),
+ SND_PCI_QUIRK(0x3842, 0x104b, "EVGA X299 Dark", QUIRK_R3DI),
SND_PCI_QUIRK(0x3842, 0x1055, "EVGA Z390 DARK", QUIRK_R3DI),
SND_PCI_QUIRK(0x1102, 0x0013, "Recon3D", QUIRK_R3D),
SND_PCI_QUIRK(0x1102, 0x0018, "Recon3D", QUIRK_R3D),
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 64a944016c78..5c0b1a09fd57 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -4589,6 +4589,11 @@ HDA_CODEC_ENTRY(0x10de009d, "GPU 9d HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de009e, "GPU 9e HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de009f, "GPU 9f HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de00a0, "GPU a0 HDMI/DP", patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de00a3, "GPU a3 HDMI/DP", patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de00a4, "GPU a4 HDMI/DP", patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de00a5, "GPU a5 HDMI/DP", patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de00a6, "GPU a6 HDMI/DP", patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de00a7, "GPU a7 HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de8001, "MCP73 HDMI", patch_nvhdmi_2ch),
HDA_CODEC_ENTRY(0x10de8067, "MCP67/68 HDMI", patch_nvhdmi_2ch),
HDA_CODEC_ENTRY(0x67663d82, "Arise 82 HDMI/DP", patch_gf_hdmi),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 172ffc2c332b..dabfdecece26 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -7063,6 +7063,8 @@ enum {
ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
ALC295_FIXUP_DISABLE_DAC3,
ALC285_FIXUP_SPEAKER2_TO_DAC1,
+ ALC285_FIXUP_ASUS_SPEAKER2_TO_DAC1,
+ ALC285_FIXUP_ASUS_HEADSET_MIC,
ALC280_FIXUP_HP_HEADSET_MIC,
ALC221_FIXUP_HP_FRONT_MIC,
ALC292_FIXUP_TPT460,
@@ -8033,6 +8035,22 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC269_FIXUP_THINKPAD_ACPI
},
+ [ALC285_FIXUP_ASUS_SPEAKER2_TO_DAC1] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc285_fixup_speaker2_to_dac1,
+ .chained = true,
+ .chain_id = ALC245_FIXUP_CS35L41_SPI_2
+ },
+ [ALC285_FIXUP_ASUS_HEADSET_MIC] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x19, 0x03a11050 },
+ { 0x1b, 0x03a11c30 },
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC285_FIXUP_ASUS_SPEAKER2_TO_DAC1
+ },
[ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER] = {
.type = HDA_FIXUP_PINS,
.v.pins = (const struct hda_pintbl[]) {
@@ -9363,7 +9381,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x802f, "HP Z240", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x8077, "HP", ALC256_FIXUP_HP_HEADSET_MIC),
SND_PCI_QUIRK(0x103c, 0x8158, "HP", ALC256_FIXUP_HP_HEADSET_MIC),
- SND_PCI_QUIRK(0x103c, 0x820d, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3),
+ SND_PCI_QUIRK(0x103c, 0x820d, "HP Pavilion 15", ALC295_FIXUP_HP_X360),
SND_PCI_QUIRK(0x103c, 0x8256, "HP", ALC221_FIXUP_HP_FRONT_MIC),
SND_PCI_QUIRK(0x103c, 0x827e, "HP x360", ALC295_FIXUP_HP_X360),
SND_PCI_QUIRK(0x103c, 0x827f, "HP x360", ALC269_FIXUP_HP_MUTE_LED_MIC3),
@@ -9458,7 +9476,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8aa8, "HP EliteBook 640 G9 (MB 8AA6)", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8aab, "HP EliteBook 650 G9 (MB 8AA9)", ALC236_FIXUP_HP_GPIO_LED),
- SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
@@ -9469,18 +9487,25 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8b47, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+ SND_PCI_QUIRK(0x103c, 0x8b63, "HP Elite Dragonfly 13.5 inch G4", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b65, "HP ProBook 455 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8b66, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+ SND_PCI_QUIRK(0x103c, 0x8b70, "HP EliteBook 835 G10", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8b72, "HP EliteBook 845 G10", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8b74, "HP EliteBook 845W G10", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8b77, "HP ElieBook 865 G10", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8b7a, "HP", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b7d, "HP", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b87, "HP", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b8a, "HP", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b8b, "HP", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b8d, "HP", ALC236_FIXUP_HP_GPIO_LED),
- SND_PCI_QUIRK(0x103c, 0x8b8f, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8b8f, "HP", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b92, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+ SND_PCI_QUIRK(0x103c, 0x8b97, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8bf0, "HP", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8c26, "HP HP EliteBook 800G11", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
@@ -9500,6 +9525,9 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x1313, "Asus K42JZ", ALC269VB_FIXUP_ASUS_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1043, 0x13b0, "ASUS Z550SA", ALC256_FIXUP_ASUS_MIC),
SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK),
+ SND_PCI_QUIRK(0x1043, 0x1473, "ASUS GU604V", ALC285_FIXUP_ASUS_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1043, 0x1483, "ASUS GU603V", ALC285_FIXUP_ASUS_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1043, 0x1493, "ASUS GV601V", ALC285_FIXUP_ASUS_HEADSET_MIC),
SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A),
SND_PCI_QUIRK(0x1043, 0x1662, "ASUS GV301QH", ALC294_FIXUP_ASUS_DUAL_SPK),
SND_PCI_QUIRK(0x1043, 0x1683, "ASUS UM3402YAR", ALC287_FIXUP_CS35L41_I2C_2),
@@ -9520,9 +9548,12 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B),
SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC),
+ SND_PCI_QUIRK(0x1043, 0x1b93, "ASUS G614JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
+ SND_PCI_QUIRK(0x1043, 0x1c62, "ASUS GU603", ALC289_FIXUP_ASUS_GA401),
SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS),
+ SND_PCI_QUIRK(0x1043, 0x1caf, "ASUS G634JYR/JZR", ALC285_FIXUP_ASUS_HEADSET_MIC),
SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC),
SND_PCI_QUIRK(0x1043, 0x1d42, "ASUS Zephyrus G14 2022", ALC289_FIXUP_ASUS_GA401),
SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE),
@@ -9537,6 +9568,11 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x1f12, "ASUS UM5302", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1f92, "ASUS ROG Flow X16", ALC289_FIXUP_ASUS_GA401),
SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2),
+ SND_PCI_QUIRK(0x1043, 0x3a20, "ASUS G614JZR", ALC245_FIXUP_CS35L41_SPI_2),
+ SND_PCI_QUIRK(0x1043, 0x3a30, "ASUS G814JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2),
+ SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC245_FIXUP_CS35L41_SPI_2),
+ SND_PCI_QUIRK(0x1043, 0x3a50, "ASUS G834JYR/JZR", ALC245_FIXUP_CS35L41_SPI_2),
+ SND_PCI_QUIRK(0x1043, 0x3a60, "ASUS G634JYR/JZR", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC),
SND_PCI_QUIRK(0x1043, 0x834a, "ASUS S101", ALC269_FIXUP_STEREO_DMIC),
SND_PCI_QUIRK(0x1043, 0x8398, "ASUS P1005", ALC269_FIXUP_STEREO_DMIC),
@@ -9560,6 +9596,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x10ec, 0x124c, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
SND_PCI_QUIRK(0x10ec, 0x1252, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
SND_PCI_QUIRK(0x10ec, 0x1254, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
+ SND_PCI_QUIRK(0x10ec, 0x12cc, "Intel Reference board", ALC225_FIXUP_HEADSET_JACK),
SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE),
SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC),
SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_AMP),
@@ -9608,6 +9645,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1558, 0x5101, "Clevo S510WU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x5157, "Clevo W517GU1", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x51a1, "Clevo NS50MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x51b1, "Clevo NS50AU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x5630, "Clevo NP50RNJS", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x70a1, "Clevo NB70T[HJK]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x70b3, "Clevo NK70SB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
@@ -9618,6 +9656,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1558, 0x7716, "Clevo NS50PU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x7717, "Clevo NS70PU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x7718, "Clevo L140PU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x7724, "Clevo L140AU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x8228, "Clevo NR40BU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x8520, "Clevo NH50D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x8521, "Clevo NH77D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
@@ -9778,6 +9817,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC),
SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED),
SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10),
+ SND_PCI_QUIRK(0x8086, 0x3038, "Intel NUC 13", ALC225_FIXUP_HEADSET_JACK),
SND_PCI_QUIRK(0xf111, 0x0001, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE),
#if 0
@@ -11663,7 +11703,9 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800),
SND_PCI_QUIRK(0x103c, 0x870c, "HP", ALC897_FIXUP_HP_HSMIC_VERB),
SND_PCI_QUIRK(0x103c, 0x8719, "HP", ALC897_FIXUP_HP_HSMIC_VERB),
+ SND_PCI_QUIRK(0x103c, 0x872b, "HP", ALC897_FIXUP_HP_HSMIC_VERB),
SND_PCI_QUIRK(0x103c, 0x873e, "HP", ALC671_FIXUP_HP_HEADSET_MIC2),
+ SND_PCI_QUIRK(0x103c, 0x8768, "HP Slim Desktop S01", ALC671_FIXUP_HP_HEADSET_MIC2),
SND_PCI_QUIRK(0x103c, 0x877e, "HP 288 Pro G6", ALC671_FIXUP_HP_HEADSET_MIC2),
SND_PCI_QUIRK(0x103c, 0x885f, "HP 288 Pro G8", ALC671_FIXUP_HP_HEADSET_MIC2),
SND_PCI_QUIRK(0x1043, 0x1080, "Asus UX501VW", ALC668_FIXUP_HEADSET_MODE),
@@ -11685,10 +11727,13 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
SND_PCI_QUIRK(0x14cd, 0x5003, "USI", ALC662_FIXUP_USI_HEADSET_MODE),
SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC662_FIXUP_LENOVO_MULTI_CODECS),
SND_PCI_QUIRK(0x17aa, 0x1057, "Lenovo P360", ALC897_FIXUP_HEADSET_MIC_PIN),
+ SND_PCI_QUIRK(0x17aa, 0x1064, "Lenovo P3 Tower", ALC897_FIXUP_HEADSET_MIC_PIN),
SND_PCI_QUIRK(0x17aa, 0x32ca, "Lenovo ThinkCentre M80", ALC897_FIXUP_HEADSET_MIC_PIN),
SND_PCI_QUIRK(0x17aa, 0x32cb, "Lenovo ThinkCentre M70", ALC897_FIXUP_HEADSET_MIC_PIN),
SND_PCI_QUIRK(0x17aa, 0x32cf, "Lenovo ThinkCentre M950", ALC897_FIXUP_HEADSET_MIC_PIN),
SND_PCI_QUIRK(0x17aa, 0x32f7, "Lenovo ThinkCentre M90", ALC897_FIXUP_HEADSET_MIC_PIN),
+ SND_PCI_QUIRK(0x17aa, 0x3321, "Lenovo ThinkCentre M70 Gen4", ALC897_FIXUP_HEADSET_MIC_PIN),
+ SND_PCI_QUIRK(0x17aa, 0x331b, "Lenovo ThinkCentre M90 Gen4", ALC897_FIXUP_HEADSET_MIC_PIN),
SND_PCI_QUIRK(0x17aa, 0x3742, "Lenovo TianYi510Pro-14IOB", ALC897_FIXUP_HEADSET_MIC_PIN2),
SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo Ideapad Y550P", ALC662_FIXUP_IDEAPAD),
SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Ideapad Y550", ALC662_FIXUP_IDEAPAD),
@@ -11697,6 +11742,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
SND_PCI_QUIRK(0x1b0a, 0x01b8, "ACER Veriton", ALC662_FIXUP_ACER_VERITON),
SND_PCI_QUIRK(0x1b35, 0x1234, "CZC ET26", ALC662_FIXUP_CZC_ET26),
SND_PCI_QUIRK(0x1b35, 0x2206, "CZC P10T", ALC662_FIXUP_CZC_P10T),
+ SND_PCI_QUIRK(0x1c6c, 0x1239, "Compaq N14JP6-V2", ALC897_FIXUP_HP_HSMIC_VERB),
#if 0
/* Below is a quirk table taken from the old code.
diff --git a/sound/pci/ice1712/aureon.c b/sound/pci/ice1712/aureon.c
index 24b978234000..027849329c1b 100644
--- a/sound/pci/ice1712/aureon.c
+++ b/sound/pci/ice1712/aureon.c
@@ -1899,11 +1899,12 @@ static int aureon_add_controls(struct snd_ice1712 *ice)
else {
for (i = 0; i < ARRAY_SIZE(cs8415_controls); i++) {
struct snd_kcontrol *kctl;
- err = snd_ctl_add(ice->card, (kctl = snd_ctl_new1(&cs8415_controls[i], ice)));
- if (err < 0)
- return err;
+ kctl = snd_ctl_new1(&cs8415_controls[i], ice);
if (i > 1)
kctl->id.device = ice->pcm->device;
+ err = snd_ctl_add(ice->card, kctl);
+ if (err < 0)
+ return err;
}
}
}
diff --git a/sound/pci/ice1712/ice1712.c b/sound/pci/ice1712/ice1712.c
index a5241a287851..3b0c3e70987b 100644
--- a/sound/pci/ice1712/ice1712.c
+++ b/sound/pci/ice1712/ice1712.c
@@ -2371,22 +2371,26 @@ int snd_ice1712_spdif_build_controls(struct snd_ice1712 *ice)
if (snd_BUG_ON(!ice->pcm_pro))
return -EIO;
- err = snd_ctl_add(ice->card, kctl = snd_ctl_new1(&snd_ice1712_spdif_default, ice));
+ kctl = snd_ctl_new1(&snd_ice1712_spdif_default, ice);
+ kctl->id.device = ice->pcm_pro->device;
+ err = snd_ctl_add(ice->card, kctl);
if (err < 0)
return err;
+ kctl = snd_ctl_new1(&snd_ice1712_spdif_maskc, ice);
kctl->id.device = ice->pcm_pro->device;
- err = snd_ctl_add(ice->card, kctl = snd_ctl_new1(&snd_ice1712_spdif_maskc, ice));
+ err = snd_ctl_add(ice->card, kctl);
if (err < 0)
return err;
+ kctl = snd_ctl_new1(&snd_ice1712_spdif_maskp, ice);
kctl->id.device = ice->pcm_pro->device;
- err = snd_ctl_add(ice->card, kctl = snd_ctl_new1(&snd_ice1712_spdif_maskp, ice));
+ err = snd_ctl_add(ice->card, kctl);
if (err < 0)
return err;
+ kctl = snd_ctl_new1(&snd_ice1712_spdif_stream, ice);
kctl->id.device = ice->pcm_pro->device;
- err = snd_ctl_add(ice->card, kctl = snd_ctl_new1(&snd_ice1712_spdif_stream, ice));
+ err = snd_ctl_add(ice->card, kctl);
if (err < 0)
return err;
- kctl->id.device = ice->pcm_pro->device;
ice->spdif.stream_ctl = kctl;
return 0;
}
diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c
index 6fab2ad85bbe..1dc776acd637 100644
--- a/sound/pci/ice1712/ice1724.c
+++ b/sound/pci/ice1712/ice1724.c
@@ -2392,23 +2392,27 @@ static int snd_vt1724_spdif_build_controls(struct snd_ice1712 *ice)
if (err < 0)
return err;
- err = snd_ctl_add(ice->card, kctl = snd_ctl_new1(&snd_vt1724_spdif_default, ice));
+ kctl = snd_ctl_new1(&snd_vt1724_spdif_default, ice);
+ kctl->id.device = ice->pcm->device;
+ err = snd_ctl_add(ice->card, kctl);
if (err < 0)
return err;
+ kctl = snd_ctl_new1(&snd_vt1724_spdif_maskc, ice);
kctl->id.device = ice->pcm->device;
- err = snd_ctl_add(ice->card, kctl = snd_ctl_new1(&snd_vt1724_spdif_maskc, ice));
+ err = snd_ctl_add(ice->card, kctl);
if (err < 0)
return err;
+ kctl = snd_ctl_new1(&snd_vt1724_spdif_maskp, ice);
kctl->id.device = ice->pcm->device;
- err = snd_ctl_add(ice->card, kctl = snd_ctl_new1(&snd_vt1724_spdif_maskp, ice));
+ err = snd_ctl_add(ice->card, kctl);
if (err < 0)
return err;
- kctl->id.device = ice->pcm->device;
#if 0 /* use default only */
- err = snd_ctl_add(ice->card, kctl = snd_ctl_new1(&snd_vt1724_spdif_stream, ice));
+ kctl = snd_ctl_new1(&snd_vt1724_spdif_stream, ice);
+ kctl->id.device = ice->pcm->device;
+ err = snd_ctl_add(ice->card, kctl);
if (err < 0)
return err;
- kctl->id.device = ice->pcm->device;
ice->spdif.stream_ctl = kctl;
#endif
return 0;
diff --git a/sound/pci/ymfpci/ymfpci_main.c b/sound/pci/ymfpci/ymfpci_main.c
index 6971eec45a4d..6b8d8690b6b2 100644
--- a/sound/pci/ymfpci/ymfpci_main.c
+++ b/sound/pci/ymfpci/ymfpci_main.c
@@ -1822,20 +1822,20 @@ int snd_ymfpci_mixer(struct snd_ymfpci *chip, int rear_switch)
if (snd_BUG_ON(!chip->pcm_spdif))
return -ENXIO;
kctl = snd_ctl_new1(&snd_ymfpci_spdif_default, chip);
+ kctl->id.device = chip->pcm_spdif->device;
err = snd_ctl_add(chip->card, kctl);
if (err < 0)
return err;
- kctl->id.device = chip->pcm_spdif->device;
kctl = snd_ctl_new1(&snd_ymfpci_spdif_mask, chip);
+ kctl->id.device = chip->pcm_spdif->device;
err = snd_ctl_add(chip->card, kctl);
if (err < 0)
return err;
- kctl->id.device = chip->pcm_spdif->device;
kctl = snd_ctl_new1(&snd_ymfpci_spdif_stream, chip);
+ kctl->id.device = chip->pcm_spdif->device;
err = snd_ctl_add(chip->card, kctl);
if (err < 0)
return err;
- kctl->id.device = chip->pcm_spdif->device;
chip->spdif_pcm_ctl = kctl;
/* direct recording source */
diff --git a/sound/soc/amd/ps/pci-ps.c b/sound/soc/amd/ps/pci-ps.c
index afddb9a77ba4..b1337b96ea8d 100644
--- a/sound/soc/amd/ps/pci-ps.c
+++ b/sound/soc/amd/ps/pci-ps.c
@@ -211,8 +211,7 @@ static int create_acp63_platform_devs(struct pci_dev *pci, struct acp63_dev_data
case ACP63_PDM_DEV_MASK:
adata->pdm_dev_index = 0;
acp63_fill_platform_dev_info(&pdevinfo[0], parent, NULL, "acp_ps_pdm_dma",
- 0, adata->res, 1, &adata->acp_lock,
- sizeof(adata->acp_lock));
+ 0, adata->res, 1, NULL, 0);
acp63_fill_platform_dev_info(&pdevinfo[1], parent, NULL, "dmic-codec",
0, NULL, 0, NULL, 0);
acp63_fill_platform_dev_info(&pdevinfo[2], parent, NULL, "acp_ps_mach",
diff --git a/sound/soc/amd/ps/ps-pdm-dma.c b/sound/soc/amd/ps/ps-pdm-dma.c
index 46b91327168f..3a83dc178e7d 100644
--- a/sound/soc/amd/ps/ps-pdm-dma.c
+++ b/sound/soc/amd/ps/ps-pdm-dma.c
@@ -361,12 +361,12 @@ static int acp63_pdm_audio_probe(struct platform_device *pdev)
{
struct resource *res;
struct pdm_dev_data *adata;
+ struct acp63_dev_data *acp_data;
+ struct device *parent;
int status;
- if (!pdev->dev.platform_data) {
- dev_err(&pdev->dev, "platform_data not retrieved\n");
- return -ENODEV;
- }
+ parent = pdev->dev.parent;
+ acp_data = dev_get_drvdata(parent);
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res) {
dev_err(&pdev->dev, "IORESOURCE_MEM FAILED\n");
@@ -382,7 +382,7 @@ static int acp63_pdm_audio_probe(struct platform_device *pdev)
return -ENOMEM;
adata->capture_stream = NULL;
- adata->acp_lock = pdev->dev.platform_data;
+ adata->acp_lock = &acp_data->acp_lock;
dev_set_drvdata(&pdev->dev, adata);
status = devm_snd_soc_register_component(&pdev->dev,
&acp63_pdm_component,
diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c
index 0bc6e4066d0f..246299a178f9 100644
--- a/sound/soc/amd/yc/acp6x-mach.c
+++ b/sound/soc/amd/yc/acp6x-mach.c
@@ -175,6 +175,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
.driver_data = &acp6x_card,
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "21EF"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
DMI_MATCH(DMI_PRODUCT_NAME, "21EM"),
}
},
@@ -311,6 +318,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
DMI_MATCH(DMI_BOARD_NAME, "8A22"),
}
},
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "System76"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "pang12"),
+ }
+ },
{}
};
diff --git a/sound/soc/codecs/cs35l41-lib.c b/sound/soc/codecs/cs35l41-lib.c
index 8538e2871c5f..1e4205295a0d 100644
--- a/sound/soc/codecs/cs35l41-lib.c
+++ b/sound/soc/codecs/cs35l41-lib.c
@@ -46,7 +46,7 @@ static const struct reg_default cs35l41_reg[] = {
{ CS35L41_DSP1_RX5_SRC, 0x00000020 },
{ CS35L41_DSP1_RX6_SRC, 0x00000021 },
{ CS35L41_DSP1_RX7_SRC, 0x0000003A },
- { CS35L41_DSP1_RX8_SRC, 0x00000001 },
+ { CS35L41_DSP1_RX8_SRC, 0x0000003B },
{ CS35L41_NGATE1_SRC, 0x00000008 },
{ CS35L41_NGATE2_SRC, 0x00000009 },
{ CS35L41_AMP_DIG_VOL_CTRL, 0x00008000 },
@@ -58,8 +58,8 @@ static const struct reg_default cs35l41_reg[] = {
{ CS35L41_IRQ1_MASK2, 0xFFFFFFFF },
{ CS35L41_IRQ1_MASK3, 0xFFFF87FF },
{ CS35L41_IRQ1_MASK4, 0xFEFFFFFF },
- { CS35L41_GPIO1_CTRL1, 0xE1000001 },
- { CS35L41_GPIO2_CTRL1, 0xE1000001 },
+ { CS35L41_GPIO1_CTRL1, 0x81000001 },
+ { CS35L41_GPIO2_CTRL1, 0x81000001 },
{ CS35L41_MIXER_NGATE_CFG, 0x00000000 },
{ CS35L41_MIXER_NGATE_CH1_CFG, 0x00000303 },
{ CS35L41_MIXER_NGATE_CH2_CFG, 0x00000303 },
diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c
index 46762f7f1449..e0d2b9bb2326 100644
--- a/sound/soc/codecs/cs35l56.c
+++ b/sound/soc/codecs/cs35l56.c
@@ -704,9 +704,6 @@ static int cs35l56_sdw_dai_hw_free(struct snd_pcm_substream *substream,
static int cs35l56_sdw_dai_set_stream(struct snd_soc_dai *dai,
void *sdw_stream, int direction)
{
- if (!sdw_stream)
- return 0;
-
snd_soc_dai_dma_data_set(dai, direction, sdw_stream);
return 0;
@@ -852,10 +849,11 @@ static void cs35l56_dsp_work(struct work_struct *work)
*/
if (cs35l56->sdw_peripheral) {
cs35l56->sdw_irq_no_unmask = true;
- cancel_work_sync(&cs35l56->sdw_irq_work);
+ flush_work(&cs35l56->sdw_irq_work);
sdw_write_no_pm(cs35l56->sdw_peripheral, CS35L56_SDW_GEN_INT_MASK_1, 0);
sdw_read_no_pm(cs35l56->sdw_peripheral, CS35L56_SDW_GEN_INT_STAT_1);
sdw_write_no_pm(cs35l56->sdw_peripheral, CS35L56_SDW_GEN_INT_STAT_1, 0xFF);
+ flush_work(&cs35l56->sdw_irq_work);
}
ret = cs35l56_mbox_send(cs35l56, CS35L56_MBOX_CMD_SHUTDOWN);
diff --git a/sound/soc/codecs/lpass-tx-macro.c b/sound/soc/codecs/lpass-tx-macro.c
index da6fcf7f0991..de978c3d70b7 100644
--- a/sound/soc/codecs/lpass-tx-macro.c
+++ b/sound/soc/codecs/lpass-tx-macro.c
@@ -746,6 +746,8 @@ static int tx_macro_put_dec_enum(struct snd_kcontrol *kcontrol,
struct tx_macro *tx = snd_soc_component_get_drvdata(component);
val = ucontrol->value.enumerated.item[0];
+ if (val >= e->items)
+ return -EINVAL;
switch (e->reg) {
case CDC_TX_INP_MUX_ADC_MUX0_CFG0:
@@ -772,6 +774,9 @@ static int tx_macro_put_dec_enum(struct snd_kcontrol *kcontrol,
case CDC_TX_INP_MUX_ADC_MUX7_CFG0:
mic_sel_reg = CDC_TX7_TX_PATH_CFG0;
break;
+ default:
+ dev_err(component->dev, "Error in configuration!!\n");
+ return -EINVAL;
}
if (val != 0) {
diff --git a/sound/soc/codecs/max98363.c b/sound/soc/codecs/max98363.c
index dcce06bff756..e6b84e222b50 100644
--- a/sound/soc/codecs/max98363.c
+++ b/sound/soc/codecs/max98363.c
@@ -211,7 +211,7 @@ static int max98363_io_init(struct sdw_slave *slave)
}
#define MAX98363_RATES SNDRV_PCM_RATE_8000_192000
-#define MAX98363_FORMATS (SNDRV_PCM_FMTBIT_S32_LE)
+#define MAX98363_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE)
static int max98363_sdw_dai_hw_params(struct snd_pcm_substream *substream,
struct snd_pcm_hw_params *params,
@@ -246,7 +246,7 @@ static int max98363_sdw_dai_hw_params(struct snd_pcm_substream *substream,
stream_config.frame_rate = params_rate(params);
stream_config.bps = snd_pcm_format_width(params_format(params));
stream_config.direction = direction;
- stream_config.ch_count = params_channels(params);
+ stream_config.ch_count = 1;
if (stream_config.ch_count > runtime->hw.channels_max) {
stream_config.ch_count = runtime->hw.channels_max;
diff --git a/sound/soc/codecs/nau8824.c b/sound/soc/codecs/nau8824.c
index 4f19fd9b65d1..5a4db8944d06 100644
--- a/sound/soc/codecs/nau8824.c
+++ b/sound/soc/codecs/nau8824.c
@@ -1903,6 +1903,30 @@ static const struct dmi_system_id nau8824_quirk_table[] = {
},
.driver_data = (void *)(NAU8824_MONO_SPEAKER),
},
+ {
+ /* Positivo CW14Q01P */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Positivo Tecnologia SA"),
+ DMI_MATCH(DMI_BOARD_NAME, "CW14Q01P"),
+ },
+ .driver_data = (void *)(NAU8824_JD_ACTIVE_HIGH),
+ },
+ {
+ /* Positivo K1424G */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Positivo Tecnologia SA"),
+ DMI_MATCH(DMI_BOARD_NAME, "K1424G"),
+ },
+ .driver_data = (void *)(NAU8824_JD_ACTIVE_HIGH),
+ },
+ {
+ /* Positivo N14ZP74G */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Positivo Tecnologia SA"),
+ DMI_MATCH(DMI_BOARD_NAME, "N14ZP74G"),
+ },
+ .driver_data = (void *)(NAU8824_JD_ACTIVE_HIGH),
+ },
{}
};
diff --git a/sound/soc/codecs/rt5682-i2c.c b/sound/soc/codecs/rt5682-i2c.c
index 2935c1bb81f3..5bc46b041786 100644
--- a/sound/soc/codecs/rt5682-i2c.c
+++ b/sound/soc/codecs/rt5682-i2c.c
@@ -267,7 +267,9 @@ static int rt5682_i2c_probe(struct i2c_client *i2c)
ret = devm_request_threaded_irq(&i2c->dev, i2c->irq, NULL,
rt5682_irq, IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING
| IRQF_ONESHOT, "rt5682", rt5682);
- if (ret)
+ if (!ret)
+ rt5682->irq = i2c->irq;
+ else
dev_err(&i2c->dev, "Failed to reguest IRQ: %d\n", ret);
}
diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c
index f6c798b65c08..5d992543b791 100644
--- a/sound/soc/codecs/rt5682.c
+++ b/sound/soc/codecs/rt5682.c
@@ -2959,6 +2959,9 @@ static int rt5682_suspend(struct snd_soc_component *component)
if (rt5682->is_sdw)
return 0;
+ if (rt5682->irq)
+ disable_irq(rt5682->irq);
+
cancel_delayed_work_sync(&rt5682->jack_detect_work);
cancel_delayed_work_sync(&rt5682->jd_check_work);
if (rt5682->hs_jack && (rt5682->jack_type & SND_JACK_HEADSET) == SND_JACK_HEADSET) {
@@ -3027,6 +3030,9 @@ static int rt5682_resume(struct snd_soc_component *component)
mod_delayed_work(system_power_efficient_wq,
&rt5682->jack_detect_work, msecs_to_jiffies(0));
+ if (rt5682->irq)
+ enable_irq(rt5682->irq);
+
return 0;
}
#else
diff --git a/sound/soc/codecs/rt5682.h b/sound/soc/codecs/rt5682.h
index d568c6993c33..e8efd8a84a6c 100644
--- a/sound/soc/codecs/rt5682.h
+++ b/sound/soc/codecs/rt5682.h
@@ -1462,6 +1462,7 @@ struct rt5682_priv {
int pll_out[RT5682_PLLS];
int jack_type;
+ int irq;
int irq_work_delay_time;
};
diff --git a/sound/soc/codecs/ssm2602.c b/sound/soc/codecs/ssm2602.c
index 00b60369b029..c29324403e9d 100644
--- a/sound/soc/codecs/ssm2602.c
+++ b/sound/soc/codecs/ssm2602.c
@@ -53,6 +53,18 @@ static const struct reg_default ssm2602_reg[SSM2602_CACHEREGNUM] = {
{ .reg = 0x09, .def = 0x0000 }
};
+/*
+ * ssm2602 register patch
+ * Workaround for playback distortions after power up: activates digital
+ * core, and then powers on output, DAC, and whole chip at the same time
+ */
+
+static const struct reg_sequence ssm2602_patch[] = {
+ { SSM2602_ACTIVE, 0x01 },
+ { SSM2602_PWR, 0x07 },
+ { SSM2602_RESET, 0x00 },
+};
+
/*Appending several "None"s just for OSS mixer use*/
static const char *ssm2602_input_select[] = {
@@ -598,6 +610,9 @@ static int ssm260x_component_probe(struct snd_soc_component *component)
return ret;
}
+ regmap_register_patch(ssm2602->regmap, ssm2602_patch,
+ ARRAY_SIZE(ssm2602_patch));
+
/* set the update bits */
regmap_update_bits(ssm2602->regmap, SSM2602_LINVOL,
LINVOL_LRIN_BOTH, LINVOL_LRIN_BOTH);
diff --git a/sound/soc/codecs/wcd938x-sdw.c b/sound/soc/codecs/wcd938x-sdw.c
index 402286dfaea4..9c10200ff34b 100644
--- a/sound/soc/codecs/wcd938x-sdw.c
+++ b/sound/soc/codecs/wcd938x-sdw.c
@@ -1190,7 +1190,6 @@ static const struct regmap_config wcd938x_regmap_config = {
.readable_reg = wcd938x_readable_register,
.writeable_reg = wcd938x_writeable_register,
.volatile_reg = wcd938x_volatile_register,
- .can_multi_write = true,
};
static const struct sdw_slave_ops wcd9380_slave_ops = {
diff --git a/sound/soc/codecs/wsa881x.c b/sound/soc/codecs/wsa881x.c
index f709231b1277..97f6873a0a8c 100644
--- a/sound/soc/codecs/wsa881x.c
+++ b/sound/soc/codecs/wsa881x.c
@@ -645,7 +645,6 @@ static struct regmap_config wsa881x_regmap_config = {
.readable_reg = wsa881x_readable_register,
.reg_format_endian = REGMAP_ENDIAN_NATIVE,
.val_format_endian = REGMAP_ENDIAN_NATIVE,
- .can_multi_write = true,
};
enum {
diff --git a/sound/soc/codecs/wsa883x.c b/sound/soc/codecs/wsa883x.c
index c609cb63dae6..e80b53143569 100644
--- a/sound/soc/codecs/wsa883x.c
+++ b/sound/soc/codecs/wsa883x.c
@@ -946,7 +946,6 @@ static struct regmap_config wsa883x_regmap_config = {
.writeable_reg = wsa883x_writeable_register,
.reg_format_endian = REGMAP_ENDIAN_NATIVE,
.val_format_endian = REGMAP_ENDIAN_NATIVE,
- .can_multi_write = true,
.use_single_read = true,
};
diff --git a/sound/soc/dwc/dwc-i2s.c b/sound/soc/dwc/dwc-i2s.c
index acdf98b2ee9c..399a489f24f2 100644
--- a/sound/soc/dwc/dwc-i2s.c
+++ b/sound/soc/dwc/dwc-i2s.c
@@ -132,13 +132,13 @@ static irqreturn_t i2s_irq_handler(int irq, void *dev_id)
/* Error Handling: TX */
if (isr[i] & ISR_TXFO) {
- dev_err(dev->dev, "TX overrun (ch_id=%d)\n", i);
+ dev_err_ratelimited(dev->dev, "TX overrun (ch_id=%d)\n", i);
irq_valid = true;
}
/* Error Handling: TX */
if (isr[i] & ISR_RXFO) {
- dev_err(dev->dev, "RX overrun (ch_id=%d)\n", i);
+ dev_err_ratelimited(dev->dev, "RX overrun (ch_id=%d)\n", i);
irq_valid = true;
}
}
@@ -183,30 +183,6 @@ static void i2s_stop(struct dw_i2s_dev *dev,
}
}
-static int dw_i2s_startup(struct snd_pcm_substream *substream,
- struct snd_soc_dai *cpu_dai)
-{
- struct dw_i2s_dev *dev = snd_soc_dai_get_drvdata(cpu_dai);
- union dw_i2s_snd_dma_data *dma_data = NULL;
-
- if (!(dev->capability & DWC_I2S_RECORD) &&
- (substream->stream == SNDRV_PCM_STREAM_CAPTURE))
- return -EINVAL;
-
- if (!(dev->capability & DWC_I2S_PLAY) &&
- (substream->stream == SNDRV_PCM_STREAM_PLAYBACK))
- return -EINVAL;
-
- if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
- dma_data = &dev->play_dma_data;
- else if (substream->stream == SNDRV_PCM_STREAM_CAPTURE)
- dma_data = &dev->capture_dma_data;
-
- snd_soc_dai_set_dma_data(cpu_dai, substream, (void *)dma_data);
-
- return 0;
-}
-
static void dw_i2s_config(struct dw_i2s_dev *dev, int stream)
{
u32 ch_reg;
@@ -305,12 +281,6 @@ static int dw_i2s_hw_params(struct snd_pcm_substream *substream,
return 0;
}
-static void dw_i2s_shutdown(struct snd_pcm_substream *substream,
- struct snd_soc_dai *dai)
-{
- snd_soc_dai_set_dma_data(dai, substream, NULL);
-}
-
static int dw_i2s_prepare(struct snd_pcm_substream *substream,
struct snd_soc_dai *dai)
{
@@ -382,8 +352,6 @@ static int dw_i2s_set_fmt(struct snd_soc_dai *cpu_dai, unsigned int fmt)
}
static const struct snd_soc_dai_ops dw_i2s_dai_ops = {
- .startup = dw_i2s_startup,
- .shutdown = dw_i2s_shutdown,
.hw_params = dw_i2s_hw_params,
.prepare = dw_i2s_prepare,
.trigger = dw_i2s_trigger,
@@ -625,6 +593,14 @@ static int dw_configure_dai_by_dt(struct dw_i2s_dev *dev,
}
+static int dw_i2s_dai_probe(struct snd_soc_dai *dai)
+{
+ struct dw_i2s_dev *dev = snd_soc_dai_get_drvdata(dai);
+
+ snd_soc_dai_init_dma_data(dai, &dev->play_dma_data, &dev->capture_dma_data);
+ return 0;
+}
+
static int dw_i2s_probe(struct platform_device *pdev)
{
const struct i2s_platform_data *pdata = pdev->dev.platform_data;
@@ -643,6 +619,7 @@ static int dw_i2s_probe(struct platform_device *pdev)
return -ENOMEM;
dw_i2s_dai->ops = &dw_i2s_dai_ops;
+ dw_i2s_dai->probe = dw_i2s_dai_probe;
dev->i2s_base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
if (IS_ERR(dev->i2s_base))
diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c
index 94341e4352b3..3f08082a55be 100644
--- a/sound/soc/fsl/fsl_micfil.c
+++ b/sound/soc/fsl/fsl_micfil.c
@@ -1159,7 +1159,7 @@ static int fsl_micfil_probe(struct platform_device *pdev)
ret = devm_snd_dmaengine_pcm_register(&pdev->dev, NULL, 0);
if (ret) {
dev_err(&pdev->dev, "failed to pcm register\n");
- return ret;
+ goto err_pm_disable;
}
fsl_micfil_dai.capture.formats = micfil->soc->formats;
@@ -1169,9 +1169,20 @@ static int fsl_micfil_probe(struct platform_device *pdev)
if (ret) {
dev_err(&pdev->dev, "failed to register component %s\n",
fsl_micfil_component.name);
+ goto err_pm_disable;
}
return ret;
+
+err_pm_disable:
+ pm_runtime_disable(&pdev->dev);
+
+ return ret;
+}
+
+static void fsl_micfil_remove(struct platform_device *pdev)
+{
+ pm_runtime_disable(&pdev->dev);
}
static int __maybe_unused fsl_micfil_runtime_suspend(struct device *dev)
@@ -1232,6 +1243,7 @@ static const struct dev_pm_ops fsl_micfil_pm_ops = {
static struct platform_driver fsl_micfil_driver = {
.probe = fsl_micfil_probe,
+ .remove_new = fsl_micfil_remove,
.driver = {
.name = "fsl-micfil-dai",
.pm = &fsl_micfil_pm_ops,
diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c
index abdaffb00fbd..e3105d48fb65 100644
--- a/sound/soc/fsl/fsl_sai.c
+++ b/sound/soc/fsl/fsl_sai.c
@@ -491,14 +491,21 @@ static int fsl_sai_set_bclk(struct snd_soc_dai *dai, bool tx, u32 freq)
regmap_update_bits(sai->regmap, reg, FSL_SAI_CR2_MSEL_MASK,
FSL_SAI_CR2_MSEL(sai->mclk_id[tx]));
- if (savediv == 1)
+ if (savediv == 1) {
regmap_update_bits(sai->regmap, reg,
FSL_SAI_CR2_DIV_MASK | FSL_SAI_CR2_BYP,
FSL_SAI_CR2_BYP);
- else
+ if (fsl_sai_dir_is_synced(sai, adir))
+ regmap_update_bits(sai->regmap, FSL_SAI_xCR2(tx, ofs),
+ FSL_SAI_CR2_BCI, FSL_SAI_CR2_BCI);
+ else
+ regmap_update_bits(sai->regmap, FSL_SAI_xCR2(tx, ofs),
+ FSL_SAI_CR2_BCI, 0);
+ } else {
regmap_update_bits(sai->regmap, reg,
FSL_SAI_CR2_DIV_MASK | FSL_SAI_CR2_BYP,
savediv / 2 - 1);
+ }
if (sai->soc_data->max_register >= FSL_SAI_MCTL) {
/* SAI is in master mode at this point, so enable MCLK */
diff --git a/sound/soc/fsl/fsl_sai.h b/sound/soc/fsl/fsl_sai.h
index 197748a888d5..a53c4f0e25fa 100644
--- a/sound/soc/fsl/fsl_sai.h
+++ b/sound/soc/fsl/fsl_sai.h
@@ -116,6 +116,7 @@
/* SAI Transmit and Receive Configuration 2 Register */
#define FSL_SAI_CR2_SYNC BIT(30)
+#define FSL_SAI_CR2_BCI BIT(28)
#define FSL_SAI_CR2_MSEL_MASK (0x3 << 26)
#define FSL_SAI_CR2_MSEL_BUS 0
#define FSL_SAI_CR2_MSEL_MCLK1 BIT(26)
diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c
index 467edd96eae5..e5ff61c1e9d1 100644
--- a/sound/soc/generic/simple-card-utils.c
+++ b/sound/soc/generic/simple-card-utils.c
@@ -314,7 +314,7 @@ int asoc_simple_startup(struct snd_pcm_substream *substream)
}
ret = snd_pcm_hw_constraint_minmax(substream->runtime, SNDRV_PCM_HW_PARAM_RATE,
fixed_rate, fixed_rate);
- if (ret)
+ if (ret < 0)
goto codec_err;
}
diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c
index 6f044cc8357e..5a5e4ecd0f61 100644
--- a/sound/soc/generic/simple-card.c
+++ b/sound/soc/generic/simple-card.c
@@ -416,6 +416,7 @@ static int __simple_for_each_link(struct asoc_simple_priv *priv,
if (ret < 0) {
of_node_put(codec);
+ of_node_put(plat);
of_node_put(np);
goto error;
}
diff --git a/sound/soc/intel/avs/apl.c b/sound/soc/intel/avs/apl.c
index 02683dce277a..1860099c782a 100644
--- a/sound/soc/intel/avs/apl.c
+++ b/sound/soc/intel/avs/apl.c
@@ -169,6 +169,7 @@ static bool apl_lp_streaming(struct avs_dev *adev)
{
struct avs_path *path;
+ spin_lock(&adev->path_list_lock);
/* Any gateway without buffer allocated in LP area disqualifies D0IX. */
list_for_each_entry(path, &adev->path_list, node) {
struct avs_path_pipeline *ppl;
@@ -188,11 +189,14 @@ static bool apl_lp_streaming(struct avs_dev *adev)
if (cfg->copier.dma_type == INVALID_OBJECT_ID)
continue;
- if (!mod->gtw_attrs.lp_buffer_alloc)
+ if (!mod->gtw_attrs.lp_buffer_alloc) {
+ spin_unlock(&adev->path_list_lock);
return false;
+ }
}
}
}
+ spin_unlock(&adev->path_list_lock);
return true;
}
diff --git a/sound/soc/intel/avs/avs.h b/sound/soc/intel/avs/avs.h
index d7fccdcb9c16..0cf38c9e768e 100644
--- a/sound/soc/intel/avs/avs.h
+++ b/sound/soc/intel/avs/avs.h
@@ -283,8 +283,8 @@ void avs_release_firmwares(struct avs_dev *adev);
int avs_dsp_init_module(struct avs_dev *adev, u16 module_id, u8 ppl_instance_id,
u8 core_id, u8 domain, void *param, u32 param_size,
- u16 *instance_id);
-void avs_dsp_delete_module(struct avs_dev *adev, u16 module_id, u16 instance_id,
+ u8 *instance_id);
+void avs_dsp_delete_module(struct avs_dev *adev, u16 module_id, u8 instance_id,
u8 ppl_instance_id, u8 core_id);
int avs_dsp_create_pipeline(struct avs_dev *adev, u16 req_size, u8 priority,
bool lp, u16 attributes, u8 *instance_id);
diff --git a/sound/soc/intel/avs/board_selection.c b/sound/soc/intel/avs/board_selection.c
index b2823c2107f7..60f8fb0bff95 100644
--- a/sound/soc/intel/avs/board_selection.c
+++ b/sound/soc/intel/avs/board_selection.c
@@ -443,7 +443,7 @@ static int avs_register_i2s_boards(struct avs_dev *adev)
}
for (mach = boards->machs; mach->id[0]; mach++) {
- if (!acpi_dev_present(mach->id, NULL, -1))
+ if (!acpi_dev_present(mach->id, mach->uid, -1))
continue;
if (mach->machine_quirk)
diff --git a/sound/soc/intel/avs/control.c b/sound/soc/intel/avs/control.c
index a8b14b784f8a..3dfa2e9816db 100644
--- a/sound/soc/intel/avs/control.c
+++ b/sound/soc/intel/avs/control.c
@@ -21,17 +21,25 @@ static struct avs_dev *avs_get_kcontrol_adev(struct snd_kcontrol *kcontrol)
return to_avs_dev(w->dapm->component->dev);
}
-static struct avs_path_module *avs_get_kcontrol_module(struct avs_dev *adev, u32 id)
+static struct avs_path_module *avs_get_volume_module(struct avs_dev *adev, u32 id)
{
struct avs_path *path;
struct avs_path_pipeline *ppl;
struct avs_path_module *mod;
- list_for_each_entry(path, &adev->path_list, node)
- list_for_each_entry(ppl, &path->ppl_list, node)
- list_for_each_entry(mod, &ppl->mod_list, node)
- if (mod->template->ctl_id && mod->template->ctl_id == id)
+ spin_lock(&adev->path_list_lock);
+ list_for_each_entry(path, &adev->path_list, node) {
+ list_for_each_entry(ppl, &path->ppl_list, node) {
+ list_for_each_entry(mod, &ppl->mod_list, node) {
+ if (guid_equal(&mod->template->cfg_ext->type, &AVS_PEAKVOL_MOD_UUID)
+ && mod->template->ctl_id == id) {
+ spin_unlock(&adev->path_list_lock);
return mod;
+ }
+ }
+ }
+ }
+ spin_unlock(&adev->path_list_lock);
return NULL;
}
@@ -49,7 +57,7 @@ int avs_control_volume_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_va
/* prevent access to modules while path is being constructed */
mutex_lock(&adev->path_mutex);
- active_module = avs_get_kcontrol_module(adev, ctl_data->id);
+ active_module = avs_get_volume_module(adev, ctl_data->id);
if (active_module) {
ret = avs_ipc_peakvol_get_volume(adev, active_module->module_id,
active_module->instance_id, &dspvols,
@@ -89,7 +97,7 @@ int avs_control_volume_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_va
changed = 1;
}
- active_module = avs_get_kcontrol_module(adev, ctl_data->id);
+ active_module = avs_get_volume_module(adev, ctl_data->id);
if (active_module) {
dspvol.channel_id = AVS_ALL_CHANNELS_MASK;
dspvol.target_volume = *volume;
diff --git a/sound/soc/intel/avs/dsp.c b/sound/soc/intel/avs/dsp.c
index b881100d3e02..aa03af4473e9 100644
--- a/sound/soc/intel/avs/dsp.c
+++ b/sound/soc/intel/avs/dsp.c
@@ -225,7 +225,7 @@ err:
int avs_dsp_init_module(struct avs_dev *adev, u16 module_id, u8 ppl_instance_id,
u8 core_id, u8 domain, void *param, u32 param_size,
- u16 *instance_id)
+ u8 *instance_id)
{
struct avs_module_entry mentry;
bool was_loaded = false;
@@ -272,7 +272,7 @@ err_mod_entry:
return ret;
}
-void avs_dsp_delete_module(struct avs_dev *adev, u16 module_id, u16 instance_id,
+void avs_dsp_delete_module(struct avs_dev *adev, u16 module_id, u8 instance_id,
u8 ppl_instance_id, u8 core_id)
{
struct avs_module_entry mentry;
diff --git a/sound/soc/intel/avs/messages.h b/sound/soc/intel/avs/messages.h
index d3b60ae7d743..7f23a304b4a9 100644
--- a/sound/soc/intel/avs/messages.h
+++ b/sound/soc/intel/avs/messages.h
@@ -619,7 +619,7 @@ enum avs_channel_config {
AVS_CHANNEL_CONFIG_DUAL_MONO = 9,
AVS_CHANNEL_CONFIG_I2S_DUAL_STEREO_0 = 10,
AVS_CHANNEL_CONFIG_I2S_DUAL_STEREO_1 = 11,
- AVS_CHANNEL_CONFIG_4_CHANNEL = 12,
+ AVS_CHANNEL_CONFIG_7_1 = 12,
AVS_CHANNEL_CONFIG_INVALID
};
diff --git a/sound/soc/intel/avs/path.h b/sound/soc/intel/avs/path.h
index 197222c5e008..657f7b093e80 100644
--- a/sound/soc/intel/avs/path.h
+++ b/sound/soc/intel/avs/path.h
@@ -37,7 +37,7 @@ struct avs_path_pipeline {
struct avs_path_module {
u16 module_id;
- u16 instance_id;
+ u8 instance_id;
union avs_gtw_attributes gtw_attrs;
struct avs_tplg_module *template;
diff --git a/sound/soc/intel/avs/pcm.c b/sound/soc/intel/avs/pcm.c
index 31c032a0f7e4..1fbb2c2fadb5 100644
--- a/sound/soc/intel/avs/pcm.c
+++ b/sound/soc/intel/avs/pcm.c
@@ -468,21 +468,34 @@ static int avs_dai_fe_startup(struct snd_pcm_substream *substream, struct snd_so
host_stream = snd_hdac_ext_stream_assign(bus, substream, HDAC_EXT_STREAM_TYPE_HOST);
if (!host_stream) {
- kfree(data);
- return -EBUSY;
+ ret = -EBUSY;
+ goto err;
}
data->host_stream = host_stream;
- snd_pcm_hw_constraint_integer(runtime, SNDRV_PCM_HW_PARAM_PERIODS);
+ ret = snd_pcm_hw_constraint_integer(runtime, SNDRV_PCM_HW_PARAM_PERIODS);
+ if (ret < 0)
+ goto err;
+
/* avoid wrap-around with wall-clock */
- snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_BUFFER_TIME, 20, 178000000);
- snd_pcm_hw_constraint_list(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, &hw_rates);
+ ret = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_BUFFER_TIME, 20, 178000000);
+ if (ret < 0)
+ goto err;
+
+ ret = snd_pcm_hw_constraint_list(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, &hw_rates);
+ if (ret < 0)
+ goto err;
+
snd_pcm_set_sync(substream);
dev_dbg(dai->dev, "%s fe STARTUP tag %d str %p",
__func__, hdac_stream(host_stream)->stream_tag, substream);
return 0;
+
+err:
+ kfree(data);
+ return ret;
}
static void avs_dai_fe_shutdown(struct snd_pcm_substream *substream, struct snd_soc_dai *dai)
diff --git a/sound/soc/intel/avs/probes.c b/sound/soc/intel/avs/probes.c
index 70a94201d6a5..275928281c6c 100644
--- a/sound/soc/intel/avs/probes.c
+++ b/sound/soc/intel/avs/probes.c
@@ -18,7 +18,7 @@ static int avs_dsp_init_probe(struct avs_dev *adev, union avs_connector_node_id
{
struct avs_probe_cfg cfg = {{0}};
struct avs_module_entry mentry;
- u16 dummy;
+ u8 dummy;
avs_get_module_entry(adev, &AVS_PROBE_MOD_UUID, &mentry);
diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c
index 6faf4a43eaf5..144f082c63fd 100644
--- a/sound/soc/intel/boards/sof_sdw.c
+++ b/sound/soc/intel/boards/sof_sdw.c
@@ -1347,7 +1347,7 @@ static int sof_card_dai_links_create(struct device *dev,
if ((SDW_PART_ID(adr_link->adr_d[i].adr) !=
SDW_PART_ID(adr_link->adr_d[j].adr)) ||
(SDW_MFG_ID(adr_link->adr_d[i].adr) !=
- SDW_MFG_ID(adr_link->adr_d[i].adr))) {
+ SDW_MFG_ID(adr_link->adr_d[j].adr))) {
append_codec_type = true;
goto out;
}
diff --git a/sound/soc/jz4740/jz4740-i2s.c b/sound/soc/jz4740/jz4740-i2s.c
index 6d9cfe0a5041..d0f6c945d9ae 100644
--- a/sound/soc/jz4740/jz4740-i2s.c
+++ b/sound/soc/jz4740/jz4740-i2s.c
@@ -218,18 +218,48 @@ static int jz4740_i2s_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
return 0;
}
+static int jz4740_i2s_get_i2sdiv(unsigned long mclk, unsigned long rate,
+ unsigned long i2sdiv_max)
+{
+ unsigned long div, rate1, rate2, err1, err2;
+
+ div = mclk / (64 * rate);
+ if (div == 0)
+ div = 1;
+
+ rate1 = mclk / (64 * div);
+ rate2 = mclk / (64 * (div + 1));
+
+ err1 = abs(rate1 - rate);
+ err2 = abs(rate2 - rate);
+
+ /*
+ * Choose the divider that produces the smallest error in the
+ * output rate and reject dividers with a 5% or higher error.
+ * In the event that both dividers are outside the acceptable
+ * error margin, reject the rate to prevent distorted audio.
+ * (The number 5% is arbitrary.)
+ */
+ if (div <= i2sdiv_max && err1 <= err2 && err1 < rate/20)
+ return div;
+ if (div < i2sdiv_max && err2 < rate/20)
+ return div + 1;
+
+ return -EINVAL;
+}
+
static int jz4740_i2s_hw_params(struct snd_pcm_substream *substream,
struct snd_pcm_hw_params *params, struct snd_soc_dai *dai)
{
struct jz4740_i2s *i2s = snd_soc_dai_get_drvdata(dai);
struct regmap_field *div_field;
+ unsigned long i2sdiv_max;
unsigned int sample_size;
- uint32_t ctrl;
- int div;
+ uint32_t ctrl, conf;
+ int div = 1;
regmap_read(i2s->regmap, JZ_REG_AIC_CTRL, &ctrl);
-
- div = clk_get_rate(i2s->clk_i2s) / (64 * params_rate(params));
+ regmap_read(i2s->regmap, JZ_REG_AIC_CONF, &conf);
switch (params_format(params)) {
case SNDRV_PCM_FORMAT_S8:
@@ -258,11 +288,27 @@ static int jz4740_i2s_hw_params(struct snd_pcm_substream *substream,
ctrl &= ~JZ_AIC_CTRL_MONO_TO_STEREO;
div_field = i2s->field_i2sdiv_playback;
+ i2sdiv_max = GENMASK(i2s->soc_info->field_i2sdiv_playback.msb,
+ i2s->soc_info->field_i2sdiv_playback.lsb);
} else {
ctrl &= ~JZ_AIC_CTRL_INPUT_SAMPLE_SIZE;
ctrl |= FIELD_PREP(JZ_AIC_CTRL_INPUT_SAMPLE_SIZE, sample_size);
div_field = i2s->field_i2sdiv_capture;
+ i2sdiv_max = GENMASK(i2s->soc_info->field_i2sdiv_capture.msb,
+ i2s->soc_info->field_i2sdiv_capture.lsb);
+ }
+
+ /*
+ * Only calculate I2SDIV if we're supplying the bit or frame clock.
+ * If the codec is supplying both clocks then the divider output is
+ * unused, and we don't want it to limit the allowed sample rates.
+ */
+ if (conf & (JZ_AIC_CONF_BIT_CLK_MASTER | JZ_AIC_CONF_SYNC_CLK_MASTER)) {
+ div = jz4740_i2s_get_i2sdiv(clk_get_rate(i2s->clk_i2s),
+ params_rate(params), i2sdiv_max);
+ if (div < 0)
+ return div;
}
regmap_write(i2s->regmap, JZ_REG_AIC_CTRL, ctrl);
diff --git a/sound/soc/mediatek/mt8186/mt8186-afe-clk.c b/sound/soc/mediatek/mt8186/mt8186-afe-clk.c
index a6b4f29049bb..539e3a023bc4 100644
--- a/sound/soc/mediatek/mt8186/mt8186-afe-clk.c
+++ b/sound/soc/mediatek/mt8186/mt8186-afe-clk.c
@@ -644,9 +644,3 @@ int mt8186_init_clock(struct mtk_base_afe *afe)
return 0;
}
-
-void mt8186_deinit_clock(void *priv)
-{
- struct mtk_base_afe *afe = priv;
- mt8186_audsys_clk_unregister(afe);
-}
diff --git a/sound/soc/mediatek/mt8186/mt8186-afe-clk.h b/sound/soc/mediatek/mt8186/mt8186-afe-clk.h
index d5988717d8f2..a9d59e506d9a 100644
--- a/sound/soc/mediatek/mt8186/mt8186-afe-clk.h
+++ b/sound/soc/mediatek/mt8186/mt8186-afe-clk.h
@@ -81,7 +81,6 @@ enum {
struct mtk_base_afe;
int mt8186_set_audio_int_bus_parent(struct mtk_base_afe *afe, int clk_id);
int mt8186_init_clock(struct mtk_base_afe *afe);
-void mt8186_deinit_clock(void *priv);
int mt8186_afe_enable_cgs(struct mtk_base_afe *afe);
void mt8186_afe_disable_cgs(struct mtk_base_afe *afe);
int mt8186_afe_enable_clock(struct mtk_base_afe *afe);
diff --git a/sound/soc/mediatek/mt8186/mt8186-afe-pcm.c b/sound/soc/mediatek/mt8186/mt8186-afe-pcm.c
index 41172a82103e..a868a04ed4e7 100644
--- a/sound/soc/mediatek/mt8186/mt8186-afe-pcm.c
+++ b/sound/soc/mediatek/mt8186/mt8186-afe-pcm.c
@@ -2848,10 +2848,6 @@ static int mt8186_afe_pcm_dev_probe(struct platform_device *pdev)
return ret;
}
- ret = devm_add_action_or_reset(dev, mt8186_deinit_clock, (void *)afe);
- if (ret)
- return ret;
-
/* init memif */
afe->memif_32bit_supported = 0;
afe->memif_size = MT8186_MEMIF_NUM;
diff --git a/sound/soc/mediatek/mt8186/mt8186-audsys-clk.c b/sound/soc/mediatek/mt8186/mt8186-audsys-clk.c
index 578969ca91c8..5666be6b1bd2 100644
--- a/sound/soc/mediatek/mt8186/mt8186-audsys-clk.c
+++ b/sound/soc/mediatek/mt8186/mt8186-audsys-clk.c
@@ -84,6 +84,29 @@ static const struct afe_gate aud_clks[CLK_AUD_NR_CLK] = {
GATE_AUD2(CLK_AUD_ETDM_OUT1_BCLK, "aud_etdm_out1_bclk", "top_audio", 24),
};
+static void mt8186_audsys_clk_unregister(void *data)
+{
+ struct mtk_base_afe *afe = data;
+ struct mt8186_afe_private *afe_priv = afe->platform_priv;
+ struct clk *clk;
+ struct clk_lookup *cl;
+ int i;
+
+ if (!afe_priv)
+ return;
+
+ for (i = 0; i < CLK_AUD_NR_CLK; i++) {
+ cl = afe_priv->lookup[i];
+ if (!cl)
+ continue;
+
+ clk = cl->clk;
+ clk_unregister_gate(clk);
+
+ clkdev_drop(cl);
+ }
+}
+
int mt8186_audsys_clk_register(struct mtk_base_afe *afe)
{
struct mt8186_afe_private *afe_priv = afe->platform_priv;
@@ -124,27 +147,6 @@ int mt8186_audsys_clk_register(struct mtk_base_afe *afe)
afe_priv->lookup[i] = cl;
}
- return 0;
+ return devm_add_action_or_reset(afe->dev, mt8186_audsys_clk_unregister, afe);
}
-void mt8186_audsys_clk_unregister(struct mtk_base_afe *afe)
-{
- struct mt8186_afe_private *afe_priv = afe->platform_priv;
- struct clk *clk;
- struct clk_lookup *cl;
- int i;
-
- if (!afe_priv)
- return;
-
- for (i = 0; i < CLK_AUD_NR_CLK; i++) {
- cl = afe_priv->lookup[i];
- if (!cl)
- continue;
-
- clk = cl->clk;
- clk_unregister_gate(clk);
-
- clkdev_drop(cl);
- }
-}
diff --git a/sound/soc/mediatek/mt8186/mt8186-audsys-clk.h b/sound/soc/mediatek/mt8186/mt8186-audsys-clk.h
index b8d6a06e11e8..897a2914dc19 100644
--- a/sound/soc/mediatek/mt8186/mt8186-audsys-clk.h
+++ b/sound/soc/mediatek/mt8186/mt8186-audsys-clk.h
@@ -10,6 +10,5 @@
#define _MT8186_AUDSYS_CLK_H_
int mt8186_audsys_clk_register(struct mtk_base_afe *afe);
-void mt8186_audsys_clk_unregister(struct mtk_base_afe *afe);
#endif
diff --git a/sound/soc/mediatek/mt8188/mt8188-afe-clk.c b/sound/soc/mediatek/mt8188/mt8188-afe-clk.c
index 743d6a162cb9..0fb97517f82c 100644
--- a/sound/soc/mediatek/mt8188/mt8188-afe-clk.c
+++ b/sound/soc/mediatek/mt8188/mt8188-afe-clk.c
@@ -418,13 +418,6 @@ int mt8188_afe_init_clock(struct mtk_base_afe *afe)
return 0;
}
-void mt8188_afe_deinit_clock(void *priv)
-{
- struct mtk_base_afe *afe = priv;
-
- mt8188_audsys_clk_unregister(afe);
-}
-
int mt8188_afe_enable_clk(struct mtk_base_afe *afe, struct clk *clk)
{
int ret;
diff --git a/sound/soc/mediatek/mt8188/mt8188-afe-clk.h b/sound/soc/mediatek/mt8188/mt8188-afe-clk.h
index 084fdfb1d877..a4203a87a1e3 100644
--- a/sound/soc/mediatek/mt8188/mt8188-afe-clk.h
+++ b/sound/soc/mediatek/mt8188/mt8188-afe-clk.h
@@ -100,7 +100,6 @@ int mt8188_afe_get_mclk_source_clk_id(int sel);
int mt8188_afe_get_mclk_source_rate(struct mtk_base_afe *afe, int apll);
int mt8188_afe_get_default_mclk_source_by_rate(int rate);
int mt8188_afe_init_clock(struct mtk_base_afe *afe);
-void mt8188_afe_deinit_clock(void *priv);
int mt8188_afe_enable_clk(struct mtk_base_afe *afe, struct clk *clk);
void mt8188_afe_disable_clk(struct mtk_base_afe *afe, struct clk *clk);
int mt8188_afe_set_clk_rate(struct mtk_base_afe *afe, struct clk *clk,
diff --git a/sound/soc/mediatek/mt8188/mt8188-afe-pcm.c b/sound/soc/mediatek/mt8188/mt8188-afe-pcm.c
index e5f9373bed56..bcf7025886df 100644
--- a/sound/soc/mediatek/mt8188/mt8188-afe-pcm.c
+++ b/sound/soc/mediatek/mt8188/mt8188-afe-pcm.c
@@ -3185,10 +3185,6 @@ static int mt8188_afe_pcm_dev_probe(struct platform_device *pdev)
if (ret)
return dev_err_probe(dev, ret, "init clock error");
- ret = devm_add_action_or_reset(dev, mt8188_afe_deinit_clock, (void *)afe);
- if (ret)
- return ret;
-
spin_lock_init(&afe_priv->afe_ctrl_lock);
mutex_init(&afe->irq_alloc_lock);
diff --git a/sound/soc/mediatek/mt8188/mt8188-audsys-clk.c b/sound/soc/mediatek/mt8188/mt8188-audsys-clk.c
index be1c53bf4729..c796ad8b62ee 100644
--- a/sound/soc/mediatek/mt8188/mt8188-audsys-clk.c
+++ b/sound/soc/mediatek/mt8188/mt8188-audsys-clk.c
@@ -138,6 +138,29 @@ static const struct afe_gate aud_clks[CLK_AUD_NR_CLK] = {
GATE_AUD6(CLK_AUD_GASRC11, "aud_gasrc11", "top_asm_h", 11),
};
+static void mt8188_audsys_clk_unregister(void *data)
+{
+ struct mtk_base_afe *afe = data;
+ struct mt8188_afe_private *afe_priv = afe->platform_priv;
+ struct clk *clk;
+ struct clk_lookup *cl;
+ int i;
+
+ if (!afe_priv)
+ return;
+
+ for (i = 0; i < CLK_AUD_NR_CLK; i++) {
+ cl = afe_priv->lookup[i];
+ if (!cl)
+ continue;
+
+ clk = cl->clk;
+ clk_unregister_gate(clk);
+
+ clkdev_drop(cl);
+ }
+}
+
int mt8188_audsys_clk_register(struct mtk_base_afe *afe)
{
struct mt8188_afe_private *afe_priv = afe->platform_priv;
@@ -179,27 +202,5 @@ int mt8188_audsys_clk_register(struct mtk_base_afe *afe)
afe_priv->lookup[i] = cl;
}
- return 0;
-}
-
-void mt8188_audsys_clk_unregister(struct mtk_base_afe *afe)
-{
- struct mt8188_afe_private *afe_priv = afe->platform_priv;
- struct clk *clk;
- struct clk_lookup *cl;
- int i;
-
- if (!afe_priv)
- return;
-
- for (i = 0; i < CLK_AUD_NR_CLK; i++) {
- cl = afe_priv->lookup[i];
- if (!cl)
- continue;
-
- clk = cl->clk;
- clk_unregister_gate(clk);
-
- clkdev_drop(cl);
- }
+ return devm_add_action_or_reset(afe->dev, mt8188_audsys_clk_unregister, afe);
}
diff --git a/sound/soc/mediatek/mt8188/mt8188-audsys-clk.h b/sound/soc/mediatek/mt8188/mt8188-audsys-clk.h
index 6c5f463ad7e4..45b0948c4a06 100644
--- a/sound/soc/mediatek/mt8188/mt8188-audsys-clk.h
+++ b/sound/soc/mediatek/mt8188/mt8188-audsys-clk.h
@@ -10,6 +10,5 @@
#define _MT8188_AUDSYS_CLK_H_
int mt8188_audsys_clk_register(struct mtk_base_afe *afe);
-void mt8188_audsys_clk_unregister(struct mtk_base_afe *afe);
#endif
diff --git a/sound/soc/mediatek/mt8195/mt8195-afe-clk.c b/sound/soc/mediatek/mt8195/mt8195-afe-clk.c
index 9ca2cb8c8a9c..f35318ae0739 100644
--- a/sound/soc/mediatek/mt8195/mt8195-afe-clk.c
+++ b/sound/soc/mediatek/mt8195/mt8195-afe-clk.c
@@ -410,11 +410,6 @@ int mt8195_afe_init_clock(struct mtk_base_afe *afe)
return 0;
}
-void mt8195_afe_deinit_clock(struct mtk_base_afe *afe)
-{
- mt8195_audsys_clk_unregister(afe);
-}
-
int mt8195_afe_enable_clk(struct mtk_base_afe *afe, struct clk *clk)
{
int ret;
diff --git a/sound/soc/mediatek/mt8195/mt8195-afe-clk.h b/sound/soc/mediatek/mt8195/mt8195-afe-clk.h
index 40663e31becd..a08c0ee6c860 100644
--- a/sound/soc/mediatek/mt8195/mt8195-afe-clk.h
+++ b/sound/soc/mediatek/mt8195/mt8195-afe-clk.h
@@ -101,7 +101,6 @@ int mt8195_afe_get_mclk_source_clk_id(int sel);
int mt8195_afe_get_mclk_source_rate(struct mtk_base_afe *afe, int apll);
int mt8195_afe_get_default_mclk_source_by_rate(int rate);
int mt8195_afe_init_clock(struct mtk_base_afe *afe);
-void mt8195_afe_deinit_clock(struct mtk_base_afe *afe);
int mt8195_afe_enable_clk(struct mtk_base_afe *afe, struct clk *clk);
void mt8195_afe_disable_clk(struct mtk_base_afe *afe, struct clk *clk);
int mt8195_afe_prepare_clk(struct mtk_base_afe *afe, struct clk *clk);
diff --git a/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c b/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c
index 9e45efeada55..03dabc056b91 100644
--- a/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c
+++ b/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c
@@ -3255,15 +3255,11 @@ err_pm_put:
static void mt8195_afe_pcm_dev_remove(struct platform_device *pdev)
{
- struct mtk_base_afe *afe = platform_get_drvdata(pdev);
-
snd_soc_unregister_component(&pdev->dev);
pm_runtime_disable(&pdev->dev);
if (!pm_runtime_status_suspended(&pdev->dev))
mt8195_afe_runtime_suspend(&pdev->dev);
-
- mt8195_afe_deinit_clock(afe);
}
static const struct of_device_id mt8195_afe_pcm_dt_match[] = {
diff --git a/sound/soc/mediatek/mt8195/mt8195-audsys-clk.c b/sound/soc/mediatek/mt8195/mt8195-audsys-clk.c
index e0670e0dbd5b..38594bc3f2f7 100644
--- a/sound/soc/mediatek/mt8195/mt8195-audsys-clk.c
+++ b/sound/soc/mediatek/mt8195/mt8195-audsys-clk.c
@@ -148,6 +148,29 @@ static const struct afe_gate aud_clks[CLK_AUD_NR_CLK] = {
GATE_AUD6(CLK_AUD_GASRC19, "aud_gasrc19", "top_asm_h", 19),
};
+static void mt8195_audsys_clk_unregister(void *data)
+{
+ struct mtk_base_afe *afe = data;
+ struct mt8195_afe_private *afe_priv = afe->platform_priv;
+ struct clk *clk;
+ struct clk_lookup *cl;
+ int i;
+
+ if (!afe_priv)
+ return;
+
+ for (i = 0; i < CLK_AUD_NR_CLK; i++) {
+ cl = afe_priv->lookup[i];
+ if (!cl)
+ continue;
+
+ clk = cl->clk;
+ clk_unregister_gate(clk);
+
+ clkdev_drop(cl);
+ }
+}
+
int mt8195_audsys_clk_register(struct mtk_base_afe *afe)
{
struct mt8195_afe_private *afe_priv = afe->platform_priv;
@@ -188,27 +211,5 @@ int mt8195_audsys_clk_register(struct mtk_base_afe *afe)
afe_priv->lookup[i] = cl;
}
- return 0;
-}
-
-void mt8195_audsys_clk_unregister(struct mtk_base_afe *afe)
-{
- struct mt8195_afe_private *afe_priv = afe->platform_priv;
- struct clk *clk;
- struct clk_lookup *cl;
- int i;
-
- if (!afe_priv)
- return;
-
- for (i = 0; i < CLK_AUD_NR_CLK; i++) {
- cl = afe_priv->lookup[i];
- if (!cl)
- continue;
-
- clk = cl->clk;
- clk_unregister_gate(clk);
-
- clkdev_drop(cl);
- }
+ return devm_add_action_or_reset(afe->dev, mt8195_audsys_clk_unregister, afe);
}
diff --git a/sound/soc/mediatek/mt8195/mt8195-audsys-clk.h b/sound/soc/mediatek/mt8195/mt8195-audsys-clk.h
index 239d31016ba7..69db2dd1c9e0 100644
--- a/sound/soc/mediatek/mt8195/mt8195-audsys-clk.h
+++ b/sound/soc/mediatek/mt8195/mt8195-audsys-clk.h
@@ -10,6 +10,5 @@
#define _MT8195_AUDSYS_CLK_H_
int mt8195_audsys_clk_register(struct mtk_base_afe *afe);
-void mt8195_audsys_clk_unregister(struct mtk_base_afe *afe);
#endif
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index adb69d7820a8..4fb1ac8e1c4a 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -2405,6 +2405,9 @@ int dpcm_be_dai_prepare(struct snd_soc_pcm_runtime *fe, int stream)
if (!snd_soc_dpcm_be_can_update(fe, be, stream))
continue;
+ if (!snd_soc_dpcm_can_be_prepared(fe, be, stream))
+ continue;
+
if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_HW_PARAMS) &&
(be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP) &&
(be->dpcm[stream].state != SND_SOC_DPCM_STATE_SUSPEND) &&
@@ -3042,3 +3045,20 @@ int snd_soc_dpcm_can_be_params(struct snd_soc_pcm_runtime *fe,
return snd_soc_dpcm_check_state(fe, be, stream, state, ARRAY_SIZE(state));
}
EXPORT_SYMBOL_GPL(snd_soc_dpcm_can_be_params);
+
+/*
+ * We can only prepare a BE DAI if any of it's FE are not prepared,
+ * running or paused for the specified stream direction.
+ */
+int snd_soc_dpcm_can_be_prepared(struct snd_soc_pcm_runtime *fe,
+ struct snd_soc_pcm_runtime *be, int stream)
+{
+ const enum snd_soc_dpcm_state state[] = {
+ SND_SOC_DPCM_STATE_START,
+ SND_SOC_DPCM_STATE_PAUSED,
+ SND_SOC_DPCM_STATE_PREPARE,
+ };
+
+ return snd_soc_dpcm_check_state(fe, be, stream, state, ARRAY_SIZE(state));
+}
+EXPORT_SYMBOL_GPL(snd_soc_dpcm_can_be_prepared);
diff --git a/sound/soc/sof/amd/acp-ipc.c b/sound/soc/sof/amd/acp-ipc.c
index 4e0c48a36159..749e856dc601 100644
--- a/sound/soc/sof/amd/acp-ipc.c
+++ b/sound/soc/sof/amd/acp-ipc.c
@@ -209,7 +209,12 @@ int acp_sof_ipc_msg_data(struct snd_sof_dev *sdev, struct snd_sof_pcm_stream *sp
acp_mailbox_read(sdev, offset, p, sz);
} else {
struct snd_pcm_substream *substream = sps->substream;
- struct acp_dsp_stream *stream = substream->runtime->private_data;
+ struct acp_dsp_stream *stream;
+
+ if (!substream || !substream->runtime)
+ return -ESTRPIPE;
+
+ stream = substream->runtime->private_data;
if (!stream)
return -ESTRPIPE;
diff --git a/sound/soc/sof/debug.c b/sound/soc/sof/debug.c
index b42b5982cbbc..d547318e0d32 100644
--- a/sound/soc/sof/debug.c
+++ b/sound/soc/sof/debug.c
@@ -438,8 +438,8 @@ void snd_sof_handle_fw_exception(struct snd_sof_dev *sdev, const char *msg)
/* should we prevent DSP entering D3 ? */
if (!sdev->ipc_dump_printed)
dev_info(sdev->dev,
- "preventing DSP entering D3 state to preserve context\n");
- pm_runtime_get_noresume(sdev->dev);
+ "Attempting to prevent DSP from entering D3 state to preserve context\n");
+ pm_runtime_get_if_in_use(sdev->dev);
}
/* dump vital information to the logs */
diff --git a/sound/soc/sof/intel/hda-mlink.c b/sound/soc/sof/intel/hda-mlink.c
index 775582ab7494..b7cbf66badf5 100644
--- a/sound/soc/sof/intel/hda-mlink.c
+++ b/sound/soc/sof/intel/hda-mlink.c
@@ -19,6 +19,9 @@
#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA_MLINK)
+/* worst-case number of sublinks is used for sublink refcount array allocation only */
+#define HDAML_MAX_SUBLINKS (AZX_ML_LCTL_CPA_SHIFT - AZX_ML_LCTL_SPA_SHIFT)
+
/**
* struct hdac_ext2_link - HDAudio extended+alternate link
*
@@ -33,6 +36,7 @@
* @leptr: extended link pointer
* @eml_lock: mutual exclusion to access shared registers e.g. CPA/SPA bits
* in LCTL register
+ * @sublink_ref_count: array of refcounts, required to power-manage sublinks independently
* @base_ptr: pointer to shim/ip/shim_vs space
* @instance_offset: offset between each of @slcount instances managed by link
* @shim_offset: offset to SHIM register base
@@ -53,6 +57,7 @@ struct hdac_ext2_link {
u32 leptr;
struct mutex eml_lock; /* prevent concurrent access to e.g. CPA/SPA */
+ int sublink_ref_count[HDAML_MAX_SUBLINKS];
/* internal values computed from LCAP contents */
void __iomem *base_ptr;
@@ -68,6 +73,7 @@ struct hdac_ext2_link {
#define AZX_REG_SDW_SHIM_OFFSET 0x0
#define AZX_REG_SDW_IP_OFFSET 0x100
#define AZX_REG_SDW_VS_SHIM_OFFSET 0x6000
+#define AZX_REG_SDW_SHIM_PCMSyCM(y) (0x16 + 0x4 * (y))
/* only one instance supported */
#define AZX_REG_INTEL_DMIC_SHIM_OFFSET 0x0
@@ -91,7 +97,7 @@ struct hdac_ext2_link {
*/
static int hdaml_lnk_enum(struct device *dev, struct hdac_ext2_link *h2link,
- void __iomem *ml_addr, int link_idx)
+ void __iomem *remap_addr, void __iomem *ml_addr, int link_idx)
{
struct hdac_ext_link *hlink = &h2link->hext_link;
u32 base_offset;
@@ -126,15 +132,16 @@ static int hdaml_lnk_enum(struct device *dev, struct hdac_ext2_link *h2link,
link_idx, h2link->slcount);
/* find IP ID and offsets */
- h2link->leptr = readl(hlink->ml_addr + AZX_REG_ML_LEPTR);
+ h2link->leptr = readl(ml_addr + AZX_REG_ML_LEPTR);
h2link->elid = FIELD_GET(AZX_REG_ML_LEPTR_ID, h2link->leptr);
base_offset = FIELD_GET(AZX_REG_ML_LEPTR_PTR, h2link->leptr);
- h2link->base_ptr = hlink->ml_addr + base_offset;
+ h2link->base_ptr = remap_addr + base_offset;
switch (h2link->elid) {
case AZX_REG_ML_LEPTR_ID_SDW:
+ h2link->instance_offset = AZX_REG_SDW_INSTANCE_OFFSET;
h2link->shim_offset = AZX_REG_SDW_SHIM_OFFSET;
h2link->ip_offset = AZX_REG_SDW_IP_OFFSET;
h2link->shim_vs_offset = AZX_REG_SDW_VS_SHIM_OFFSET;
@@ -149,6 +156,7 @@ static int hdaml_lnk_enum(struct device *dev, struct hdac_ext2_link *h2link,
link_idx, base_offset);
break;
case AZX_REG_ML_LEPTR_ID_INTEL_SSP:
+ h2link->instance_offset = AZX_REG_INTEL_SSP_INSTANCE_OFFSET;
h2link->shim_offset = AZX_REG_INTEL_SSP_SHIM_OFFSET;
h2link->ip_offset = AZX_REG_INTEL_SSP_IP_OFFSET;
h2link->shim_vs_offset = AZX_REG_INTEL_SSP_VS_SHIM_OFFSET;
@@ -333,6 +341,21 @@ static void hdaml_link_set_lsdiid(u32 __iomem *lsdiid, int dev_num)
writel(val, lsdiid);
}
+static void hdaml_shim_map_stream_ch(u16 __iomem *pcmsycm, int lchan, int hchan,
+ int stream_id, int dir)
+{
+ u16 val;
+
+ val = readw(pcmsycm);
+
+ u16p_replace_bits(&val, lchan, GENMASK(3, 0));
+ u16p_replace_bits(&val, hchan, GENMASK(7, 4));
+ u16p_replace_bits(&val, stream_id, GENMASK(13, 8));
+ u16p_replace_bits(&val, dir, BIT(15));
+
+ writew(val, pcmsycm);
+}
+
static void hdaml_lctl_offload_enable(u32 __iomem *lctl, bool enable)
{
u32 val = readl(lctl);
@@ -364,7 +387,7 @@ static int hda_ml_alloc_h2link(struct hdac_bus *bus, int index)
hlink->bus = bus;
hlink->ml_addr = bus->mlcap + AZX_ML_BASE + (AZX_ML_INTERVAL * index);
- ret = hdaml_lnk_enum(bus->dev, h2link, hlink->ml_addr, index);
+ ret = hdaml_lnk_enum(bus->dev, h2link, bus->remap_addr, hlink->ml_addr, index);
if (ret < 0) {
kfree(h2link);
return ret;
@@ -641,8 +664,13 @@ static int hdac_bus_eml_power_up_base(struct hdac_bus *bus, bool alt, int elid,
if (eml_lock)
mutex_lock(&h2link->eml_lock);
- if (++hlink->ref_count > 1)
- goto skip_init;
+ if (!alt) {
+ if (++hlink->ref_count > 1)
+ goto skip_init;
+ } else {
+ if (++h2link->sublink_ref_count[sublink] > 1)
+ goto skip_init;
+ }
ret = hdaml_link_init(hlink->ml_addr + AZX_REG_ML_LCTL, sublink);
@@ -684,9 +712,13 @@ static int hdac_bus_eml_power_down_base(struct hdac_bus *bus, bool alt, int elid
if (eml_lock)
mutex_lock(&h2link->eml_lock);
- if (--hlink->ref_count > 0)
- goto skip_shutdown;
-
+ if (!alt) {
+ if (--hlink->ref_count > 0)
+ goto skip_shutdown;
+ } else {
+ if (--h2link->sublink_ref_count[sublink] > 0)
+ goto skip_shutdown;
+ }
ret = hdaml_link_shutdown(hlink->ml_addr + AZX_REG_ML_LCTL, sublink);
skip_shutdown:
@@ -740,6 +772,40 @@ int hdac_bus_eml_sdw_set_lsdiid(struct hdac_bus *bus, int sublink, int dev_num)
return 0;
} EXPORT_SYMBOL_NS(hdac_bus_eml_sdw_set_lsdiid, SND_SOC_SOF_HDA_MLINK);
+/*
+ * the 'y' parameter comes from the PCMSyCM hardware register naming. 'y' refers to the
+ * PDI index, i.e. the FIFO used for RX or TX
+ */
+int hdac_bus_eml_sdw_map_stream_ch(struct hdac_bus *bus, int sublink, int y,
+ int channel_mask, int stream_id, int dir)
+{
+ struct hdac_ext2_link *h2link;
+ u16 __iomem *pcmsycm;
+ u16 val;
+
+ h2link = find_ext2_link(bus, true, AZX_REG_ML_LEPTR_ID_SDW);
+ if (!h2link)
+ return -ENODEV;
+
+ pcmsycm = h2link->base_ptr + h2link->shim_offset +
+ h2link->instance_offset * sublink +
+ AZX_REG_SDW_SHIM_PCMSyCM(y);
+
+ mutex_lock(&h2link->eml_lock);
+
+ hdaml_shim_map_stream_ch(pcmsycm, 0, hweight32(channel_mask),
+ stream_id, dir);
+
+ mutex_unlock(&h2link->eml_lock);
+
+ val = readw(pcmsycm);
+
+ dev_dbg(bus->dev, "channel_mask %#x stream_id %d dir %d pcmscm %#x\n",
+ channel_mask, stream_id, dir, val);
+
+ return 0;
+} EXPORT_SYMBOL_NS(hdac_bus_eml_sdw_map_stream_ch, SND_SOC_SOF_HDA_MLINK);
+
void hda_bus_ml_put_all(struct hdac_bus *bus)
{
struct hdac_ext_link *hlink;
@@ -836,6 +902,18 @@ struct hdac_ext_link *hdac_bus_eml_dmic_get_hlink(struct hdac_bus *bus)
}
EXPORT_SYMBOL_NS(hdac_bus_eml_dmic_get_hlink, SND_SOC_SOF_HDA_MLINK);
+struct hdac_ext_link *hdac_bus_eml_sdw_get_hlink(struct hdac_bus *bus)
+{
+ struct hdac_ext2_link *h2link;
+
+ h2link = find_ext2_link(bus, true, AZX_REG_ML_LEPTR_ID_SDW);
+ if (!h2link)
+ return NULL;
+
+ return &h2link->hext_link;
+}
+EXPORT_SYMBOL_NS(hdac_bus_eml_sdw_get_hlink, SND_SOC_SOF_HDA_MLINK);
+
int hdac_bus_eml_enable_offload(struct hdac_bus *bus, bool alt, int elid, bool enable)
{
struct hdac_ext2_link *h2link;
diff --git a/sound/soc/sof/ipc3-topology.c b/sound/soc/sof/ipc3-topology.c
index fc1eb8e2de2c..ba4ef290b634 100644
--- a/sound/soc/sof/ipc3-topology.c
+++ b/sound/soc/sof/ipc3-topology.c
@@ -2103,10 +2103,13 @@ static int sof_ipc3_dai_config(struct snd_sof_dev *sdev, struct snd_sof_widget *
* For the case of PAUSE/HW_FREE, since there are no quirks, flags can be used as is.
*/
- if (flags & SOF_DAI_CONFIG_FLAGS_HW_PARAMS)
+ if (flags & SOF_DAI_CONFIG_FLAGS_HW_PARAMS) {
+ /* Clear stale command */
+ config->flags &= ~SOF_DAI_CONFIG_FLAGS_CMD_MASK;
config->flags |= flags;
- else
+ } else {
config->flags = flags;
+ }
/* only send the IPC if the widget is set up in the DSP */
if (swidget->use_count > 0) {
diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c
index 059eebf0a687..5abe616a2054 100644
--- a/sound/soc/sof/ipc4-topology.c
+++ b/sound/soc/sof/ipc4-topology.c
@@ -59,7 +59,7 @@ static const struct sof_topology_token ipc4_in_audio_format_tokens[] = {
audio_fmt.interleaving_style)},
{SOF_TKN_CAVS_AUDIO_FORMAT_IN_FMT_CFG, SND_SOC_TPLG_TUPLE_TYPE_WORD, get_token_u32,
offsetof(struct sof_ipc4_pin_format, audio_fmt.fmt_cfg)},
- {SOF_TKN_CAVS_AUDIO_FORMAT_PIN_INDEX, SND_SOC_TPLG_TUPLE_TYPE_WORD, get_token_u32,
+ {SOF_TKN_CAVS_AUDIO_FORMAT_INPUT_PIN_INDEX, SND_SOC_TPLG_TUPLE_TYPE_WORD, get_token_u32,
offsetof(struct sof_ipc4_pin_format, pin_index)},
{SOF_TKN_CAVS_AUDIO_FORMAT_IBS, SND_SOC_TPLG_TUPLE_TYPE_WORD, get_token_u32,
offsetof(struct sof_ipc4_pin_format, buffer_size)},
@@ -79,7 +79,7 @@ static const struct sof_topology_token ipc4_out_audio_format_tokens[] = {
audio_fmt.interleaving_style)},
{SOF_TKN_CAVS_AUDIO_FORMAT_OUT_FMT_CFG, SND_SOC_TPLG_TUPLE_TYPE_WORD, get_token_u32,
offsetof(struct sof_ipc4_pin_format, audio_fmt.fmt_cfg)},
- {SOF_TKN_CAVS_AUDIO_FORMAT_PIN_INDEX, SND_SOC_TPLG_TUPLE_TYPE_WORD, get_token_u32,
+ {SOF_TKN_CAVS_AUDIO_FORMAT_OUTPUT_PIN_INDEX, SND_SOC_TPLG_TUPLE_TYPE_WORD, get_token_u32,
offsetof(struct sof_ipc4_pin_format, pin_index)},
{SOF_TKN_CAVS_AUDIO_FORMAT_OBS, SND_SOC_TPLG_TUPLE_TYPE_WORD, get_token_u32,
offsetof(struct sof_ipc4_pin_format, buffer_size)},
diff --git a/sound/soc/sof/pcm.c b/sound/soc/sof/pcm.c
index 567db32173a8..d0ab6f390734 100644
--- a/sound/soc/sof/pcm.c
+++ b/sound/soc/sof/pcm.c
@@ -643,16 +643,17 @@ static int sof_pcm_probe(struct snd_soc_component *component)
"%s/%s",
plat_data->tplg_filename_prefix,
plat_data->tplg_filename);
- if (!tplg_filename)
- return -ENOMEM;
+ if (!tplg_filename) {
+ ret = -ENOMEM;
+ goto pm_error;
+ }
ret = snd_sof_load_topology(component, tplg_filename);
- if (ret < 0) {
+ if (ret < 0)
dev_err(component->dev, "error: failed to load DSP topology %d\n",
ret);
- return ret;
- }
+pm_error:
pm_runtime_mark_last_busy(component->dev);
pm_runtime_put_autosuspend(component->dev);
diff --git a/sound/soc/sof/pm.c b/sound/soc/sof/pm.c
index 2fdbc53ca715..2b232442e84b 100644
--- a/sound/soc/sof/pm.c
+++ b/sound/soc/sof/pm.c
@@ -164,7 +164,7 @@ static int sof_resume(struct device *dev, bool runtime_resume)
ret = tplg_ops->set_up_all_pipelines(sdev, false);
if (ret < 0) {
dev_err(sdev->dev, "Failed to restore pipeline after resume %d\n", ret);
- return ret;
+ goto setup_fail;
}
}
@@ -178,6 +178,18 @@ static int sof_resume(struct device *dev, bool runtime_resume)
dev_err(sdev->dev, "ctx_restore IPC error during resume: %d\n", ret);
}
+setup_fail:
+#if IS_ENABLED(CONFIG_SND_SOC_SOF_DEBUG_ENABLE_DEBUGFS_CACHE)
+ if (ret < 0) {
+ /*
+ * Debugfs cannot be read in runtime suspend, so cache
+ * the contents upon failure. This allows to capture
+ * possible DSP coredump information.
+ */
+ sof_cache_debugfs(sdev);
+ }
+#endif
+
return ret;
}
diff --git a/sound/soc/sof/sof-client-probes.c b/sound/soc/sof/sof-client-probes.c
index fff126808bc0..8d9e9d5f40e4 100644
--- a/sound/soc/sof/sof-client-probes.c
+++ b/sound/soc/sof/sof-client-probes.c
@@ -218,12 +218,7 @@ static ssize_t sof_probes_dfs_points_read(struct file *file, char __user *to,
ret = ipc->points_info(cdev, &desc, &num_desc);
if (ret < 0)
- goto exit;
-
- pm_runtime_mark_last_busy(dev);
- err = pm_runtime_put_autosuspend(dev);
- if (err < 0)
- dev_err_ratelimited(dev, "debugfs read failed to idle %d\n", err);
+ goto pm_error;
for (i = 0; i < num_desc; i++) {
offset = strlen(buf);
@@ -241,6 +236,13 @@ static ssize_t sof_probes_dfs_points_read(struct file *file, char __user *to,
ret = simple_read_from_buffer(to, count, ppos, buf, strlen(buf));
kfree(desc);
+
+pm_error:
+ pm_runtime_mark_last_busy(dev);
+ err = pm_runtime_put_autosuspend(dev);
+ if (err < 0)
+ dev_err_ratelimited(dev, "debugfs read failed to idle %d\n", err);
+
exit:
kfree(buf);
return ret;
diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c
index d3d536b0a8f5..f160dc454b44 100644
--- a/sound/soc/sof/topology.c
+++ b/sound/soc/sof/topology.c
@@ -586,6 +586,10 @@ static int sof_copy_tuples(struct snd_sof_dev *sdev, struct snd_soc_tplg_vendor_
if (*num_copied_tuples == tuples_size)
return 0;
}
+
+ /* stop when we've found the required token instances */
+ if (found == num_tokens * token_instance_num)
+ return 0;
}
/* next array */
@@ -1261,7 +1265,7 @@ static int sof_widget_parse_tokens(struct snd_soc_component *scomp, struct snd_s
if (num_sets > 1) {
struct snd_sof_tuple *new_tuples;
- num_tuples += token_list[object_token_list[i]].count * num_sets;
+ num_tuples += token_list[object_token_list[i]].count * (num_sets - 1);
new_tuples = krealloc(swidget->tuples,
sizeof(*new_tuples) * num_tuples, GFP_KERNEL);
if (!new_tuples) {
diff --git a/sound/soc/tegra/tegra_pcm.c b/sound/soc/tegra/tegra_pcm.c
index 468c8e77de21..0b69cebc9a33 100644
--- a/sound/soc/tegra/tegra_pcm.c
+++ b/sound/soc/tegra/tegra_pcm.c
@@ -117,6 +117,9 @@ int tegra_pcm_open(struct snd_soc_component *component,
return ret;
}
+ /* Set wait time to 500ms by default */
+ substream->wait_time = 500;
+
return 0;
}
EXPORT_SYMBOL_GPL(tegra_pcm_open);
diff --git a/sound/usb/format.c b/sound/usb/format.c
index 4b1c5ba121f3..ab5fed9f55b6 100644
--- a/sound/usb/format.c
+++ b/sound/usb/format.c
@@ -423,6 +423,7 @@ static int line6_parse_audio_format_rates_quirk(struct snd_usb_audio *chip,
case USB_ID(0x0e41, 0x4248): /* Line6 Helix >= fw 2.82 */
case USB_ID(0x0e41, 0x4249): /* Line6 Helix Rack >= fw 2.82 */
case USB_ID(0x0e41, 0x424a): /* Line6 Helix LT >= fw 2.82 */
+ case USB_ID(0x0e41, 0x424b): /* Line6 Pod Go */
case USB_ID(0x19f7, 0x0011): /* Rode Rodecaster Pro */
return set_fixed_rate(fp, 48000, SNDRV_PCM_RATE_48000);
}
diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index eec5232f9fb2..08bf535ed163 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c
@@ -650,6 +650,10 @@ static int snd_usb_pcm_prepare(struct snd_pcm_substream *substream)
goto unlock;
}
+ ret = snd_usb_pcm_change_state(subs, UAC3_PD_STATE_D0);
+ if (ret < 0)
+ goto unlock;
+
again:
if (subs->sync_endpoint) {
ret = snd_usb_endpoint_prepare(chip, subs->sync_endpoint);
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 3ecd1ba7fd4b..6cf55b7f7a04 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -2191,6 +2191,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
QUIRK_FLAG_DSD_RAW),
VENDOR_FLG(0x2ab6, /* T+A devices */
QUIRK_FLAG_DSD_RAW),
+ VENDOR_FLG(0x3336, /* HEM devices */
+ QUIRK_FLAG_DSD_RAW),
VENDOR_FLG(0x3353, /* Khadas devices */
QUIRK_FLAG_DSD_RAW),
VENDOR_FLG(0x3842, /* EVGA */
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index f8129c624b07..f7ddd73a8c0f 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -198,6 +198,15 @@ struct kvm_arm_copy_mte_tags {
__u64 reserved[2];
};
+/*
+ * Counter/Timer offset structure. Describe the virtual/physical offset.
+ * To be used with KVM_ARM_SET_COUNTER_OFFSET.
+ */
+struct kvm_arm_counter_offset {
+ __u64 counter_offset;
+ __u64 reserved;
+};
+
#define KVM_ARM_TAGS_TO_GUEST 0
#define KVM_ARM_TAGS_FROM_GUEST 1
@@ -372,6 +381,10 @@ enum {
#endif
};
+/* Device Control API on vm fd */
+#define KVM_ARM_VM_SMCCC_CTRL 0
+#define KVM_ARM_VM_SMCCC_FILTER 0
+
/* Device Control API: ARM VGIC */
#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
@@ -411,6 +424,8 @@ enum {
#define KVM_ARM_VCPU_TIMER_CTRL 1
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
+#define KVM_ARM_VCPU_TIMER_IRQ_HVTIMER 2
+#define KVM_ARM_VCPU_TIMER_IRQ_HPTIMER 3
#define KVM_ARM_VCPU_PVTIME_CTRL 2
#define KVM_ARM_VCPU_PVTIME_IPA 0
@@ -469,6 +484,27 @@ enum {
/* run->fail_entry.hardware_entry_failure_reason codes. */
#define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED (1ULL << 0)
+enum kvm_smccc_filter_action {
+ KVM_SMCCC_FILTER_HANDLE = 0,
+ KVM_SMCCC_FILTER_DENY,
+ KVM_SMCCC_FILTER_FWD_TO_USER,
+
+#ifdef __KERNEL__
+ NR_SMCCC_FILTER_ACTIONS
+#endif
+};
+
+struct kvm_smccc_filter {
+ __u32 base;
+ __u32 nr_functions;
+ __u8 action;
+ __u8 pad[15];
+};
+
+/* arm64-specific KVM_EXIT_HYPERCALL flags */
+#define KVM_HYPERCALL_EXIT_SMC (1U << 0)
+#define KVM_HYPERCALL_EXIT_16BIT (1U << 1)
+
#endif
#endif /* __ARM_KVM_H__ */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index b89005819cd5..cb8ca46213be 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -97,7 +97,7 @@
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
-#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
+/* FREE, was #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) "" LFENCE synchronizes RDTSC */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
@@ -226,10 +226,9 @@
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
-#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
-#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
-#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
-#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
+#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* Intel FlexPriority */
+#define X86_FEATURE_EPT ( 8*32+ 2) /* Intel Extended Page Table */
+#define X86_FEATURE_VPID ( 8*32+ 3) /* Intel Virtual Processor ID */
#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */
#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
@@ -307,14 +306,21 @@
#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* "" SGX EDECCSSA user leaf function */
#define X86_FEATURE_CALL_DEPTH (11*32+19) /* "" Call depth tracking for RSB stuffing */
#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
+#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */
+#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */
+#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* "" Intel Architectural PerfMon Extension */
+#define X86_FEATURE_FZRM (12*32+10) /* "" Fast zero-length REP MOVSB */
+#define X86_FEATURE_FSRS (12*32+11) /* "" Fast short REP STOSB */
+#define X86_FEATURE_FSRC (12*32+12) /* "" Fast short REP {CMPSB,SCASB} */
#define X86_FEATURE_LKGS (12*32+18) /* "" Load "kernel" (userspace) GS */
#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */
#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */
+#define X86_FEATURE_LAM (12*32+26) /* Linear Address Masking */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
@@ -331,6 +337,7 @@
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
+#define X86_FEATURE_AMD_PSFD (13*32+28) /* "" Predictive Store Forwarding Disable */
#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */
@@ -363,6 +370,7 @@
#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
#define X86_FEATURE_X2AVIC (15*32+18) /* Virtual x2apic */
#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */
+#define X86_FEATURE_VNMI (15*32+25) /* Virtual NMI */
#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
@@ -427,6 +435,13 @@
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
+/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
+#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */
+#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */
+#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* "" Null Selector Clears Base */
+#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */
+#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* "" SMM_CTL MSR is not present */
+
/*
* BUG word(s)
*/
@@ -467,5 +482,6 @@
#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */
#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */
#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
+#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 5dfa4fb76f4b..fafe9be7a6f4 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -75,6 +75,12 @@
# define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31))
#endif
+#ifdef CONFIG_ADDRESS_MASKING
+# define DISABLE_LAM 0
+#else
+# define DISABLE_LAM (1 << (X86_FEATURE_LAM & 31))
+#endif
+
#ifdef CONFIG_INTEL_IOMMU_SVM
# define DISABLE_ENQCMD 0
#else
@@ -115,7 +121,7 @@
#define DISABLED_MASK10 0
#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
DISABLE_CALL_DEPTH_TRACKING)
-#define DISABLED_MASK12 0
+#define DISABLED_MASK12 (DISABLE_LAM)
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0
#define DISABLED_MASK15 0
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index ad35355ee43e..3aedae61af4f 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -206,6 +206,8 @@
/* Abbreviated from Intel SDM name IA32_INTEGRITY_CAPABILITIES */
#define MSR_INTEGRITY_CAPS 0x000002d9
+#define MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT 2
+#define MSR_INTEGRITY_CAPS_ARRAY_BIST BIT(MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT)
#define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT 4
#define MSR_INTEGRITY_CAPS_PERIODIC_BIST BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT)
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 7f467fe05d42..1a6a1f987949 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -559,4 +559,7 @@ struct kvm_pmu_event_filter {
#define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */
#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */
+/* x86-specific KVM_EXIT_HYPERCALL flags. */
+#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0)
+
#endif /* _ASM_X86_KVM_H */
diff --git a/tools/arch/x86/include/uapi/asm/prctl.h b/tools/arch/x86/include/uapi/asm/prctl.h
index 500b96e71f18..e8d7ebbca1a4 100644
--- a/tools/arch/x86/include/uapi/asm/prctl.h
+++ b/tools/arch/x86/include/uapi/asm/prctl.h
@@ -16,8 +16,16 @@
#define ARCH_GET_XCOMP_GUEST_PERM 0x1024
#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025
+#define ARCH_XCOMP_TILECFG 17
+#define ARCH_XCOMP_TILEDATA 18
+
#define ARCH_MAP_VDSO_X32 0x2001
#define ARCH_MAP_VDSO_32 0x2002
#define ARCH_MAP_VDSO_64 0x2003
+#define ARCH_GET_UNTAG_MASK 0x4001
+#define ARCH_ENABLE_TAGGED_ADDR 0x4002
+#define ARCH_GET_MAX_TAG_BITS 0x4003
+#define ARCH_FORCE_TAGGED_SVA 0x4004
+
#endif /* _ASM_X86_PRCTL_H */
diff --git a/tools/arch/x86/include/uapi/asm/unistd_32.h b/tools/arch/x86/include/uapi/asm/unistd_32.h
index b8ddfc4c4ab0..bc48a4dabe5d 100644
--- a/tools/arch/x86/include/uapi/asm/unistd_32.h
+++ b/tools/arch/x86/include/uapi/asm/unistd_32.h
@@ -2,6 +2,9 @@
#ifndef __NR_fork
#define __NR_fork 2
#endif
+#ifndef __NR_execve
+#define __NR_execve 11
+#endif
#ifndef __NR_getppid
#define __NR_getppid 64
#endif
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index a91ac666f758..d055b82d22cc 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -10,13 +10,6 @@
.section .noinstr.text, "ax"
/*
- * We build a jump to memcpy_orig by default which gets NOPped out on
- * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
- * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
- * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
- */
-
-/*
* memcpy - Copy a memory block.
*
* Input:
@@ -26,17 +19,21 @@
*
* Output:
* rax original destination
+ *
+ * The FSRM alternative should be done inline (avoiding the call and
+ * the disgusting return handling), but that would require some help
+ * from the compiler for better calling conventions.
+ *
+ * The 'rep movsb' itself is small enough to replace the call, but the
+ * two register moves blow up the code. And one of them is "needed"
+ * only for the return value that is the same as the source input,
+ * which the compiler could/should do much better anyway.
*/
SYM_TYPED_FUNC_START(__memcpy)
- ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
- "jmp memcpy_erms", X86_FEATURE_ERMS
+ ALTERNATIVE "jmp memcpy_orig", "", X86_FEATURE_FSRM
movq %rdi, %rax
movq %rdx, %rcx
- shrq $3, %rcx
- andl $7, %edx
- rep movsq
- movl %edx, %ecx
rep movsb
RET
SYM_FUNC_END(__memcpy)
@@ -45,17 +42,6 @@ EXPORT_SYMBOL(__memcpy)
SYM_FUNC_ALIAS(memcpy, __memcpy)
EXPORT_SYMBOL(memcpy)
-/*
- * memcpy_erms() - enhanced fast string memcpy. This is faster and
- * simpler than memcpy. Use memcpy_erms when possible.
- */
-SYM_FUNC_START_LOCAL(memcpy_erms)
- movq %rdi, %rax
- movq %rdx, %rcx
- rep movsb
- RET
-SYM_FUNC_END(memcpy_erms)
-
SYM_FUNC_START_LOCAL(memcpy_orig)
movq %rdi, %rax
diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S
index 6143b1a6fa2c..7c59a704c458 100644
--- a/tools/arch/x86/lib/memset_64.S
+++ b/tools/arch/x86/lib/memset_64.S
@@ -18,27 +18,22 @@
* rdx count (bytes)
*
* rax original destination
+ *
+ * The FSRS alternative should be done inline (avoiding the call and
+ * the disgusting return handling), but that would require some help
+ * from the compiler for better calling conventions.
+ *
+ * The 'rep stosb' itself is small enough to replace the call, but all
+ * the register moves blow up the code. And two of them are "needed"
+ * only for the return value that is the same as the source input,
+ * which the compiler could/should do much better anyway.
*/
SYM_FUNC_START(__memset)
- /*
- * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
- * to use it when possible. If not available, use fast string instructions.
- *
- * Otherwise, use original memset function.
- */
- ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
- "jmp memset_erms", X86_FEATURE_ERMS
+ ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS
movq %rdi,%r9
+ movb %sil,%al
movq %rdx,%rcx
- andl $7,%edx
- shrq $3,%rcx
- /* expand byte value */
- movzbl %sil,%esi
- movabs $0x0101010101010101,%rax
- imulq %rsi,%rax
- rep stosq
- movl %edx,%ecx
rep stosb
movq %r9,%rax
RET
@@ -48,26 +43,6 @@ EXPORT_SYMBOL(__memset)
SYM_FUNC_ALIAS(memset, __memset)
EXPORT_SYMBOL(memset)
-/*
- * ISO C memset - set a memory block to a byte value. This function uses
- * enhanced rep stosb to override the fast string function.
- * The code is simpler and shorter than the fast string function as well.
- *
- * rdi destination
- * rsi value (char)
- * rdx count (bytes)
- *
- * rax original destination
- */
-SYM_FUNC_START_LOCAL(memset_erms)
- movq %rdi,%r9
- movb %sil,%al
- movq %rdx,%rcx
- rep stosb
- movq %r9,%rax
- RET
-SYM_FUNC_END(memset_erms)
-
SYM_FUNC_START_LOCAL(memset_orig)
movq %rdi,%r10
diff --git a/tools/gpio/lsgpio.c b/tools/gpio/lsgpio.c
index c61d061247e1..52a0be45410c 100644
--- a/tools/gpio/lsgpio.c
+++ b/tools/gpio/lsgpio.c
@@ -94,7 +94,7 @@ static void print_attributes(struct gpio_v2_line_info *info)
for (i = 0; i < info->num_attrs; i++) {
if (info->attrs[i].id == GPIO_V2_LINE_ATTR_ID_DEBOUNCE)
fprintf(stdout, ", debounce_period=%dusec",
- info->attrs[0].debounce_period_us);
+ info->attrs[i].debounce_period_us);
}
}
diff --git a/tools/include/asm/alternative.h b/tools/include/asm/alternative.h
index b54bd860dff6..7ce02a223732 100644
--- a/tools/include/asm/alternative.h
+++ b/tools/include/asm/alternative.h
@@ -4,7 +4,6 @@
/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */
-#define altinstruction_entry #
-#define ALTERNATIVE_2 #
+#define ALTERNATIVE #
#endif
diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h
index cef3b1c25335..51ac441a37c3 100644
--- a/tools/include/linux/coresight-pmu.h
+++ b/tools/include/linux/coresight-pmu.h
@@ -21,19 +21,6 @@
*/
#define CORESIGHT_LEGACY_CPU_TRACE_ID(cpu) (0x10 + (cpu * 2))
-/* CoreSight trace ID is currently the bottom 7 bits of the value */
-#define CORESIGHT_TRACE_ID_VAL_MASK GENMASK(6, 0)
-
-/*
- * perf record will set the legacy meta data values as unused initially.
- * This allows perf report to manage the decoders created when dynamic
- * allocation in operation.
- */
-#define CORESIGHT_TRACE_ID_UNUSED_FLAG BIT(31)
-
-/* Value to set for unused trace ID values */
-#define CORESIGHT_TRACE_ID_UNUSED_VAL 0x7F
-
/*
* Below are the definition of bit offsets for perf option, and works as
* arbitrary values for all ETM versions.
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index 642808520d92..a87bbbbca2d4 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -972,6 +972,19 @@ extern "C" {
#define DRM_IOCTL_GET_STATS DRM_IOR( 0x06, struct drm_stats)
#define DRM_IOCTL_SET_VERSION DRM_IOWR(0x07, struct drm_set_version)
#define DRM_IOCTL_MODESET_CTL DRM_IOW(0x08, struct drm_modeset_ctl)
+/**
+ * DRM_IOCTL_GEM_CLOSE - Close a GEM handle.
+ *
+ * GEM handles are not reference-counted by the kernel. User-space is
+ * responsible for managing their lifetime. For example, if user-space imports
+ * the same memory object twice on the same DRM file description, the same GEM
+ * handle is returned by both imports, and user-space needs to ensure
+ * &DRM_IOCTL_GEM_CLOSE is performed once only. The same situation can happen
+ * when a memory object is allocated, then exported and imported again on the
+ * same DRM file description. The &DRM_IOCTL_MODE_GETFB2 IOCTL is an exception
+ * and always returns fresh new GEM handles even if an existing GEM handle
+ * already refers to the same memory object before the IOCTL is performed.
+ */
#define DRM_IOCTL_GEM_CLOSE DRM_IOW (0x09, struct drm_gem_close)
#define DRM_IOCTL_GEM_FLINK DRM_IOWR(0x0a, struct drm_gem_flink)
#define DRM_IOCTL_GEM_OPEN DRM_IOWR(0x0b, struct drm_gem_open)
@@ -1012,7 +1025,37 @@ extern "C" {
#define DRM_IOCTL_UNLOCK DRM_IOW( 0x2b, struct drm_lock)
#define DRM_IOCTL_FINISH DRM_IOW( 0x2c, struct drm_lock)
+/**
+ * DRM_IOCTL_PRIME_HANDLE_TO_FD - Convert a GEM handle to a DMA-BUF FD.
+ *
+ * User-space sets &drm_prime_handle.handle with the GEM handle to export and
+ * &drm_prime_handle.flags, and gets back a DMA-BUF file descriptor in
+ * &drm_prime_handle.fd.
+ *
+ * The export can fail for any driver-specific reason, e.g. because export is
+ * not supported for this specific GEM handle (but might be for others).
+ *
+ * Support for exporting DMA-BUFs is advertised via &DRM_PRIME_CAP_EXPORT.
+ */
#define DRM_IOCTL_PRIME_HANDLE_TO_FD DRM_IOWR(0x2d, struct drm_prime_handle)
+/**
+ * DRM_IOCTL_PRIME_FD_TO_HANDLE - Convert a DMA-BUF FD to a GEM handle.
+ *
+ * User-space sets &drm_prime_handle.fd with a DMA-BUF file descriptor to
+ * import, and gets back a GEM handle in &drm_prime_handle.handle.
+ * &drm_prime_handle.flags is unused.
+ *
+ * If an existing GEM handle refers to the memory object backing the DMA-BUF,
+ * that GEM handle is returned. Therefore user-space which needs to handle
+ * arbitrary DMA-BUFs must have a user-space lookup data structure to manually
+ * reference-count duplicated GEM handles. For more information see
+ * &DRM_IOCTL_GEM_CLOSE.
+ *
+ * The import can fail for any driver-specific reason, e.g. because import is
+ * only supported for DMA-BUFs allocated on this DRM device.
+ *
+ * Support for importing DMA-BUFs is advertised via &DRM_PRIME_CAP_IMPORT.
+ */
#define DRM_IOCTL_PRIME_FD_TO_HANDLE DRM_IOWR(0x2e, struct drm_prime_handle)
#define DRM_IOCTL_AGP_ACQUIRE DRM_IO( 0x30)
@@ -1104,8 +1147,13 @@ extern "C" {
* struct as the output.
*
* If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles
- * will be filled with GEM buffer handles. Planes are valid until one has a
- * zero handle -- this can be used to compute the number of planes.
+ * will be filled with GEM buffer handles. Fresh new GEM handles are always
+ * returned, even if another GEM handle referring to the same memory object
+ * already exists on the DRM file description. The caller is responsible for
+ * removing the new handles, e.g. via the &DRM_IOCTL_GEM_CLOSE IOCTL. The same
+ * new handle will be returned for multiple planes in case they use the same
+ * memory object. Planes are valid until one has a zero handle -- this can be
+ * used to compute the number of planes.
*
* Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid
* until one has a zero &drm_mode_fb_cmd2.pitches.
@@ -1113,6 +1161,11 @@ extern "C" {
* If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set
* in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the
* modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier.
+ *
+ * To obtain DMA-BUF FDs for each plane without leaking GEM handles, user-space
+ * can export each handle via &DRM_IOCTL_PRIME_HANDLE_TO_FD, then immediately
+ * close each unique handle via &DRM_IOCTL_GEM_CLOSE, making sure to not
+ * double-close handles which are specified multiple times in the array.
*/
#define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2)
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 8df261c5ab9b..dba7c5a5b25e 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -2491,7 +2491,7 @@ struct i915_context_param_engines {
#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
#define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */
#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
- struct i915_engine_class_instance engines[0];
+ struct i915_engine_class_instance engines[];
} __attribute__((packed));
#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \
@@ -2676,6 +2676,10 @@ enum drm_i915_oa_format {
I915_OAR_FORMAT_A32u40_A4u32_B8_C8,
I915_OA_FORMAT_A24u40_A14u32_B8_C8,
+ /* MTL OAM */
+ I915_OAM_FORMAT_MPEC8u64_B8_C8,
+ I915_OAM_FORMAT_MPEC8u32_B8_C8,
+
I915_OA_FORMAT_MAX /* non-ABI */
};
@@ -2758,6 +2762,25 @@ enum drm_i915_perf_property_id {
*/
DRM_I915_PERF_PROP_POLL_OA_PERIOD,
+ /**
+ * Multiple engines may be mapped to the same OA unit. The OA unit is
+ * identified by class:instance of any engine mapped to it.
+ *
+ * This parameter specifies the engine class and must be passed along
+ * with DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE.
+ *
+ * This property is available in perf revision 6.
+ */
+ DRM_I915_PERF_PROP_OA_ENGINE_CLASS,
+
+ /**
+ * This parameter specifies the engine instance and must be passed along
+ * with DRM_I915_PERF_PROP_OA_ENGINE_CLASS.
+ *
+ * This property is available in perf revision 6.
+ */
+ DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE,
+
DRM_I915_PERF_PROP_MAX /* non-ABI */
};
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 1bb11a6ee667..c994ff5b157c 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1035,6 +1035,7 @@ enum bpf_attach_type {
BPF_TRACE_KPROBE_MULTI,
BPF_LSM_CGROUP,
BPF_STRUCT_OPS,
+ BPF_NETFILTER,
__MAX_BPF_ATTACH_TYPE
};
diff --git a/tools/include/uapi/linux/const.h b/tools/include/uapi/linux/const.h
index af2a44c08683..a429381e7ca5 100644
--- a/tools/include/uapi/linux/const.h
+++ b/tools/include/uapi/linux/const.h
@@ -28,7 +28,7 @@
#define _BITUL(x) (_UL(1) << (x))
#define _BITULL(x) (_ULL(1) << (x))
-#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
+#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1)
#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask))
#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
index 07a4cb149305..e682ab628dfa 100644
--- a/tools/include/uapi/linux/in.h
+++ b/tools/include/uapi/linux/in.h
@@ -162,6 +162,8 @@ struct in_addr {
#define MCAST_MSFILTER 48
#define IP_MULTICAST_ALL 49
#define IP_UNICAST_IF 50
+#define IP_LOCAL_PORT_RANGE 51
+#define IP_PROTOCOL 52
#define MCAST_EXCLUDE 0
#define MCAST_INCLUDE 1
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 4003a166328c..737318b1c1d9 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -341,8 +341,13 @@ struct kvm_run {
__u64 nr;
__u64 args[6];
__u64 ret;
- __u32 longmode;
- __u32 pad;
+
+ union {
+#ifndef __KERNEL__
+ __u32 longmode;
+#endif
+ __u64 flags;
+ };
} hypercall;
/* KVM_EXIT_TPR_ACCESS */
struct {
@@ -1184,6 +1189,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224
#define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
#define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226
+#define KVM_CAP_COUNTER_OFFSET 227
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1543,6 +1549,8 @@ struct kvm_s390_ucas_mapping {
#define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter)
#define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3)
#define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags)
+/* Available with KVM_CAP_COUNTER_OFFSET */
+#define KVM_ARM_SET_COUNTER_OFFSET _IOW(KVMIO, 0xb5, struct kvm_arm_counter_offset)
/* ioctl for vm fd */
#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index 759b3f53e53f..f23d9a16507f 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -290,6 +290,8 @@ struct prctl_mm_map {
#define PR_SET_VMA 0x53564d41
# define PR_SET_VMA_ANON_NAME 0
+#define PR_GET_AUXV 0x41555856
+
#define PR_SET_MEMORY_MERGE 67
#define PR_GET_MEMORY_MERGE 68
#endif /* _LINUX_PRCTL_H */
diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h
index de6810e94abe..0aa955aa8246 100644
--- a/tools/include/uapi/sound/asound.h
+++ b/tools/include/uapi/sound/asound.h
@@ -429,9 +429,14 @@ struct snd_pcm_sw_params {
snd_pcm_uframes_t avail_min; /* min avail frames for wakeup */
snd_pcm_uframes_t xfer_align; /* obsolete: xfer size need to be a multiple */
snd_pcm_uframes_t start_threshold; /* min hw_avail frames for automatic start */
- snd_pcm_uframes_t stop_threshold; /* min avail frames for automatic stop */
- snd_pcm_uframes_t silence_threshold; /* min distance from noise for silence filling */
- snd_pcm_uframes_t silence_size; /* silence block size */
+ /*
+ * The following two thresholds alleviate playback buffer underruns; when
+ * hw_avail drops below the threshold, the respective action is triggered:
+ */
+ snd_pcm_uframes_t stop_threshold; /* - stop playback */
+ snd_pcm_uframes_t silence_threshold; /* - pre-fill buffer with silence */
+ snd_pcm_uframes_t silence_size; /* max size of silence pre-fill; when >= boundary,
+ * fill played area with silence immediately */
snd_pcm_uframes_t boundary; /* pointers wrap point */
unsigned int proto; /* protocol version */
unsigned int tstamp_type; /* timestamp type (req. proto >= 2.0.12) */
@@ -570,7 +575,8 @@ struct __snd_pcm_mmap_status64 {
struct __snd_pcm_mmap_control64 {
__pad_before_uframe __pad1;
snd_pcm_uframes_t appl_ptr; /* RW: appl ptr (0...boundary-1) */
- __pad_before_uframe __pad2;
+ __pad_before_uframe __pad2; // This should be __pad_after_uframe, but binary
+ // backwards compatibility constraints prevent a fix.
__pad_before_uframe __pad3;
snd_pcm_uframes_t avail_min; /* RW: min available frames for wakeup */
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ad1ec893b41b..a27f6e9ccce7 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -117,6 +117,7 @@ static const char * const attach_type_name[] = {
[BPF_PERF_EVENT] = "perf_event",
[BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi",
[BPF_STRUCT_OPS] = "struct_ops",
+ [BPF_NETFILTER] = "netfilter",
};
static const char * const link_type_name[] = {
@@ -8712,7 +8713,7 @@ static const struct bpf_sec_def section_defs[] = {
SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE),
SEC_DEF("struct_ops.s+", STRUCT_OPS, 0, SEC_SLEEPABLE),
SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE),
- SEC_DEF("netfilter", NETFILTER, 0, SEC_NONE),
+ SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE),
};
static size_t custom_sec_def_cnt;
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 6065f408a59c..b7d443129f1c 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -180,7 +180,9 @@ static int probe_prog_load(enum bpf_prog_type prog_type,
case BPF_PROG_TYPE_SK_REUSEPORT:
case BPF_PROG_TYPE_FLOW_DISSECTOR:
case BPF_PROG_TYPE_CGROUP_SYSCTL:
+ break;
case BPF_PROG_TYPE_NETFILTER:
+ opts.expected_attach_type = BPF_NETFILTER;
break;
default:
return -EOPNOTSUPP;
diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index aa77bcae4807..3144f33196be 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -591,8 +591,9 @@ class YnlFamily(SpecFamily):
print('Unexpected message: ' + repr(gm))
continue
- rsp.append(self._decode(gm.raw_attrs, op.attr_set.name)
- | gm.fixed_header_attrs)
+ rsp_msg = self._decode(gm.raw_attrs, op.attr_set.name)
+ rsp_msg.update(gm.fixed_header_attrs)
+ rsp.append(rsp_msg)
if not rsp:
return None
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 0fcf99c91400..c19b1103e4ec 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -204,7 +204,6 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
"__ubsan_handle_builtin_unreachable",
"arch_call_rest_init",
"arch_cpu_idle_dead",
- "btrfs_assertfail",
"cpu_bringup_and_idle",
"cpu_startup_entry",
"do_exit",
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 4884520f954f..a794d9eca93d 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -216,6 +216,12 @@ ifeq ($(call get-executable,$(BISON)),)
dummy := $(error Error: $(BISON) is missing on this system, please install it)
endif
+ifeq ($(BUILD_BPF_SKEL),1)
+ ifeq ($(call get-executable,$(CLANG)),)
+ dummy := $(error $(CLANG) is missing on this system, please install it to be able to build with BUILD_BPF_SKEL=1)
+ endif
+endif
+
ifneq ($(OUTPUT),)
ifeq ($(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\).\([0-9]\+\)/\1\2\3/g') \>\= 371), 1)
BISON_FILE_PREFIX_MAP := --file-prefix-map=$(OUTPUT)=
@@ -921,6 +927,7 @@ ifndef NO_DEMANGLE
EXTLIBS += -lstdc++
CFLAGS += -DHAVE_CXA_DEMANGLE_SUPPORT
CXXFLAGS += -DHAVE_CXA_DEMANGLE_SUPPORT
+ $(call detected,CONFIG_CXX_DEMANGLE)
endif
ifdef BUILD_NONDISTRO
ifeq ($(filter -liberty,$(EXTLIBS)),)
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index a42a6a99c2bc..f48794816d82 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -181,7 +181,6 @@ HOSTCC ?= gcc
HOSTLD ?= ld
HOSTAR ?= ar
CLANG ?= clang
-LLVM_STRIP ?= llvm-strip
PKG_CONFIG = $(CROSS_COMPILE)pkg-config
@@ -1057,15 +1056,33 @@ $(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_
ifdef BUILD_BPF_SKEL
BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool
-BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(LIBBPF_INCLUDE)
+# Get Clang's default includes on this system, as opposed to those seen by
+# '-target bpf'. This fixes "missing" files on some architectures/distros,
+# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
+#
+# Use '-idirafter': Don't interfere with include mechanics except where the
+# build would have failed anyways.
+define get_sys_includes
+$(shell $(1) $(2) -v -E - </dev/null 2>&1 \
+ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
+$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
+endef
+
+ifneq ($(CROSS_COMPILE),)
+CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%))
+endif
+
+CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
+BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES)
+TOOLS_UAPI_INCLUDE := -I$(srctree)/tools/include/uapi
$(BPFTOOL): | $(SKEL_TMP_OUT)
$(Q)CFLAGS= $(MAKE) -C ../bpf/bpftool \
OUTPUT=$(SKEL_TMP_OUT)/ bootstrap
$(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(LIBBPF) | $(SKEL_TMP_OUT)
- $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf -Wall -Werror $(BPF_INCLUDE) \
- -c $(filter util/bpf_skel/%.bpf.c,$^) -o $@ && $(LLVM_STRIP) -g $@
+ $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf -Wall -Werror $(BPF_INCLUDE) $(TOOLS_UAPI_INCLUDE) \
+ -c $(filter util/bpf_skel/%.bpf.c,$^) -o $@
$(SKEL_OUT)/%.skel.h: $(SKEL_TMP_OUT)/%.bpf.o | $(BPFTOOL)
$(QUIET_GENSKEL)$(BPFTOOL) gen skeleton $< > $@
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 77cb03e6ff87..9ca040bfb1aa 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -78,9 +78,9 @@ static int cs_etm_validate_context_id(struct auxtrace_record *itr,
char path[PATH_MAX];
int err;
u32 val;
- u64 contextid =
- evsel->core.attr.config &
- (perf_pmu__format_bits(&cs_etm_pmu->format, "contextid1") |
+ u64 contextid = evsel->core.attr.config &
+ (perf_pmu__format_bits(&cs_etm_pmu->format, "contextid") |
+ perf_pmu__format_bits(&cs_etm_pmu->format, "contextid1") |
perf_pmu__format_bits(&cs_etm_pmu->format, "contextid2"));
if (!contextid)
@@ -114,8 +114,7 @@ static int cs_etm_validate_context_id(struct auxtrace_record *itr,
* 0b00100 Maximum of 32-bit Context ID size.
* All other values are reserved.
*/
- val = BMVAL(val, 5, 9);
- if (!val || val != 0x4) {
+ if (BMVAL(val, 5, 9) != 0x4) {
pr_err("%s: CONTEXTIDR_EL1 isn't supported, disable with %s/contextid1=0/\n",
CORESIGHT_ETM_PMU_NAME, CORESIGHT_ETM_PMU_NAME);
return -EINVAL;
diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c
index 860a8b42b4b5..a9623b128ece 100644
--- a/tools/perf/arch/arm/util/pmu.c
+++ b/tools/perf/arch/arm/util/pmu.c
@@ -12,7 +12,7 @@
#include "arm-spe.h"
#include "hisi-ptt.h"
#include "../../../util/pmu.h"
-#include "../cs-etm.h"
+#include "../../../util/cs-etm.h"
struct perf_event_attr
*perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c
index d730666ab95d..80b9f6287fe2 100644
--- a/tools/perf/arch/arm64/util/header.c
+++ b/tools/perf/arch/arm64/util/header.c
@@ -29,8 +29,8 @@ static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)
char path[PATH_MAX];
FILE *file;
- scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d"MIDR,
- sysfs, cpus->map[cpu]);
+ scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d" MIDR,
+ sysfs, RC_CHK_ACCESS(cpus)->map[cpu].cpu);
file = fopen(path, "r");
if (!file) {
diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c
index fa143acb4c8d..ef1ed645097c 100644
--- a/tools/perf/arch/arm64/util/pmu.c
+++ b/tools/perf/arch/arm64/util/pmu.c
@@ -18,7 +18,7 @@ static struct perf_pmu *pmu__find_core_pmu(void)
* The cpumap should cover all CPUs. Otherwise, some CPUs may
* not support some events or have different event IDs.
*/
- if (pmu->cpus->nr != cpu__max_cpu().cpu)
+ if (RC_CHK_ACCESS(pmu->cpus)->nr != cpu__max_cpu().cpu)
return NULL;
return pmu;
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index 799147658dee..b68f47541169 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -449,7 +449,7 @@
444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self
-# 447 reserved for memfd_secret
+447 common memfd_secret sys_memfd_secret sys_memfd_secret
448 common process_mrelease sys_process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
index 50ae8bd58296..6188e19d3129 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
@@ -7,7 +7,3 @@ MEMCPY_FN(memcpy_orig,
MEMCPY_FN(__memcpy,
"x86-64-movsq",
"movsq-based memcpy() in arch/x86/lib/memcpy_64.S")
-
-MEMCPY_FN(memcpy_erms,
- "x86-64-movsb",
- "movsb-based memcpy() in arch/x86/lib/memcpy_64.S")
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
index 6eb45a2aa8db..1b9fef7efcdc 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm.S
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S
@@ -2,7 +2,7 @@
/* Various wrappers to make the kernel .S file build in user-space: */
-// memcpy_orig and memcpy_erms are being defined as SYM_L_LOCAL but we need it
+// memcpy_orig is being defined as SYM_L_LOCAL but we need it
#define SYM_FUNC_START_LOCAL(name) \
SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
#define memcpy MEMCPY /* don't hide glibc's memcpy() */
diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h
index dac6d2b7c39b..247c72fdfb9d 100644
--- a/tools/perf/bench/mem-memset-x86-64-asm-def.h
+++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h
@@ -7,7 +7,3 @@ MEMSET_FN(memset_orig,
MEMSET_FN(__memset,
"x86-64-stosq",
"movsq-based memset() in arch/x86/lib/memset_64.S")
-
-MEMSET_FN(memset_erms,
- "x86-64-stosb",
- "movsb-based memset() in arch/x86/lib/memset_64.S")
diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S
index 6f093c483842..abd26c95f1aa 100644
--- a/tools/perf/bench/mem-memset-x86-64-asm.S
+++ b/tools/perf/bench/mem-memset-x86-64-asm.S
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// memset_orig and memset_erms are being defined as SYM_L_LOCAL but we need it
+// memset_orig is being defined as SYM_L_LOCAL but we need it
#define SYM_FUNC_START_LOCAL(name) \
SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
#define memset MEMSET /* don't hide glibc's memset() */
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 810e3376c7d6..f9906f52e4fa 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -1175,7 +1175,7 @@ int cmd_ftrace(int argc, const char **argv)
OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf,
"Use BPF to measure function latency"),
#endif
- OPT_BOOLEAN('n', "--use-nsec", &ftrace.use_nsec,
+ OPT_BOOLEAN('n', "use-nsec", &ftrace.use_nsec,
"Use nano-second histogram"),
OPT_PARENT(common_options),
};
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 006f522d0e7f..c57be48d65bb 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -3647,6 +3647,13 @@ static int process_stat_config_event(struct perf_session *session __maybe_unused
union perf_event *event)
{
perf_event__read_stat_config(&stat_config, &event->stat_config);
+
+ /*
+ * Aggregation modes are not used since post-processing scripts are
+ * supposed to take care of such requirements
+ */
+ stat_config.aggr_mode = AGGR_NONE;
+
return 0;
}
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index cc9fa48d636f..b9ad32f21e57 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -667,6 +667,13 @@ static enum counter_recovery stat_handle_error(struct evsel *counter)
evsel_list->core.threads->err_thread = -1;
return COUNTER_RETRY;
}
+ } else if (counter->skippable) {
+ if (verbose > 0)
+ ui__warning("skipping event %s that kernel failed to open .\n",
+ evsel__name(counter));
+ counter->supported = false;
+ counter->errored = true;
+ return COUNTER_SKIP;
}
evsel__open_strerror(counter, &target, errno, msg, sizeof(msg));
@@ -1890,15 +1897,28 @@ static int add_default_attributes(void)
* caused by exposing latent bugs. This is fixed properly in:
* https://lore.kernel.org/lkml/bff481ba-e60a-763f-0aa0-3ee53302c480@linux.intel.com/
*/
- if (metricgroup__has_metric("TopdownL1") && !perf_pmu__has_hybrid() &&
- metricgroup__parse_groups(evsel_list, "TopdownL1",
- /*metric_no_group=*/false,
- /*metric_no_merge=*/false,
- /*metric_no_threshold=*/true,
- stat_config.user_requested_cpu_list,
- stat_config.system_wide,
- &stat_config.metric_events) < 0)
- return -1;
+ if (metricgroup__has_metric("TopdownL1") && !perf_pmu__has_hybrid()) {
+ struct evlist *metric_evlist = evlist__new();
+ struct evsel *metric_evsel;
+
+ if (!metric_evlist)
+ return -1;
+
+ if (metricgroup__parse_groups(metric_evlist, "TopdownL1",
+ /*metric_no_group=*/false,
+ /*metric_no_merge=*/false,
+ /*metric_no_threshold=*/true,
+ stat_config.user_requested_cpu_list,
+ stat_config.system_wide,
+ &stat_config.metric_events) < 0)
+ return -1;
+
+ evlist__for_each_entry(metric_evlist, metric_evsel) {
+ metric_evsel->skippable = true;
+ }
+ evlist__splice_list_tail(evsel_list, &metric_evlist->core.entries);
+ evlist__delete(metric_evlist);
+ }
/* Platform specific attrs */
if (evlist__add_default_attrs(evsel_list, default_null_attrs) < 0)
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
index 75d80e70e5cd..1f9047553942 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
@@ -133,6 +133,7 @@
"MetricGroup": "TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.1",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. The rest of these subevents count backend stalls, in cycles, due to an outstanding request which is memory bound vs core bound. The subevents are not slot based events and therefore can not be precisely added or subtracted from the Backend_Bound_Aux subevents which are slot based.",
"ScaleUnit": "100%",
"Unit": "cpu_atom"
@@ -143,6 +144,7 @@
"MetricGroup": "TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound_aux",
"MetricThreshold": "tma_backend_bound_aux > 0.2",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that UOPS must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. All of these subevents count backend stalls, in slots, due to a resource limitation. These are not cycle based events and therefore can not be precisely added or subtracted from the Backend_Bound subevents which are cycle based. These subevents are supplementary to Backend_Bound and can be used to analyze results from a resource perspective at allocation.",
"ScaleUnit": "100%",
"Unit": "cpu_atom"
@@ -153,6 +155,7 @@
"MetricGroup": "TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the instruction queue (IQ). Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.",
"ScaleUnit": "100%",
"Unit": "cpu_atom"
@@ -163,6 +166,7 @@
"MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_base",
"MetricThreshold": "tma_base > 0.6",
+ "MetricgroupNoGroup": "TopdownL2",
"ScaleUnit": "100%",
"Unit": "cpu_atom"
},
@@ -182,6 +186,7 @@
"MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.05",
+ "MetricgroupNoGroup": "TopdownL2",
"ScaleUnit": "100%",
"Unit": "cpu_atom"
},
@@ -209,6 +214,7 @@
"MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_core_bound",
"MetricThreshold": "tma_core_bound > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
"ScaleUnit": "100%",
"Unit": "cpu_atom"
},
@@ -255,6 +261,7 @@
"MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_bandwidth",
"MetricThreshold": "tma_fetch_bandwidth > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
"ScaleUnit": "100%",
"Unit": "cpu_atom"
},
@@ -264,6 +271,7 @@
"MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"ScaleUnit": "100%",
"Unit": "cpu_atom"
},
@@ -291,6 +299,7 @@
"MetricGroup": "TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL1",
"ScaleUnit": "100%",
"Unit": "cpu_atom"
},
@@ -593,6 +602,7 @@
"MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.05",
+ "MetricgroupNoGroup": "TopdownL2",
"ScaleUnit": "100%",
"Unit": "cpu_atom"
},
@@ -611,6 +621,7 @@
"MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"ScaleUnit": "100%",
"Unit": "cpu_atom"
},
@@ -629,6 +640,7 @@
"MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_ms_uops",
"MetricThreshold": "tma_ms_uops > 0.05",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.",
"ScaleUnit": "100%",
"Unit": "cpu_atom"
@@ -729,6 +741,7 @@
"MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_aux_group",
"MetricName": "tma_resource_bound",
"MetricThreshold": "tma_resource_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count.",
"ScaleUnit": "100%",
"Unit": "cpu_atom"
@@ -739,6 +752,7 @@
"MetricGroup": "TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.75",
+ "MetricgroupNoGroup": "TopdownL1",
"ScaleUnit": "100%",
"Unit": "cpu_atom"
},
@@ -848,6 +862,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. Sample with: TOPDOWN.BACKEND_BOUND_SLOTS",
"ScaleUnit": "100%",
"Unit": "cpu_core"
@@ -858,6 +873,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
"ScaleUnit": "100%",
"Unit": "cpu_core"
@@ -868,6 +884,7 @@
"MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: TOPDOWN.BR_MISPREDICT_SLOTS. Related metrics: tma_info_branch_misprediction_cost, tma_info_mispredictions, tma_mispredicts_resteers",
"ScaleUnit": "100%",
"Unit": "cpu_core"
@@ -919,6 +936,7 @@
"MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_core_bound",
"MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).",
"ScaleUnit": "100%",
"Unit": "cpu_core"
@@ -1031,6 +1049,7 @@
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
"MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 6 > 0.35",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
"ScaleUnit": "100%",
"Unit": "cpu_core"
@@ -1041,6 +1060,7 @@
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: FRONTEND_RETIRED.LATENCY_GE_16_PS;FRONTEND_RETIRED.LATENCY_GE_8_PS",
"ScaleUnit": "100%",
"Unit": "cpu_core"
@@ -1121,6 +1141,7 @@
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS",
"ScaleUnit": "100%",
"Unit": "cpu_core"
@@ -1141,6 +1162,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. Sample with: UOPS_RETIRED.HEAVY",
"ScaleUnit": "100%",
"Unit": "cpu_core"
@@ -2023,6 +2045,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_light_operations",
"MetricThreshold": "tma_light_operations > 0.6",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
"ScaleUnit": "100%",
"Unit": "cpu_core"
@@ -2082,6 +2105,7 @@
"MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache",
"ScaleUnit": "100%",
"Unit": "cpu_core"
@@ -2112,6 +2136,7 @@
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).",
"ScaleUnit": "100%",
"Unit": "cpu_core"
@@ -2310,6 +2335,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.SLOTS",
"ScaleUnit": "100%",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
index 1a85d935c733..0402adbf7d92 100644
--- a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
@@ -98,6 +98,7 @@
"MetricGroup": "TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.1",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. The rest of these subevents count backend stalls, in cycles, due to an outstanding request which is memory bound vs core bound. The subevents are not slot based events and therefore can not be precisely added or subtracted from the Backend_Bound_Aux subevents which are slot based.",
"ScaleUnit": "100%"
},
@@ -107,6 +108,7 @@
"MetricGroup": "TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound_aux",
"MetricThreshold": "tma_backend_bound_aux > 0.2",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that UOPS must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. All of these subevents count backend stalls, in slots, due to a resource limitation. These are not cycle based events and therefore can not be precisely added or subtracted from the Backend_Bound subevents which are cycle based. These subevents are supplementary to Backend_Bound and can be used to analyze results from a resource perspective at allocation.",
"ScaleUnit": "100%"
},
@@ -116,6 +118,7 @@
"MetricGroup": "TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the instruction queue (IQ). Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.",
"ScaleUnit": "100%"
},
@@ -125,6 +128,7 @@
"MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_base",
"MetricThreshold": "tma_base > 0.6",
+ "MetricgroupNoGroup": "TopdownL2",
"ScaleUnit": "100%"
},
{
@@ -142,6 +146,7 @@
"MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.05",
+ "MetricgroupNoGroup": "TopdownL2",
"ScaleUnit": "100%"
},
{
@@ -166,6 +171,7 @@
"MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_core_bound",
"MetricThreshold": "tma_core_bound > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
"ScaleUnit": "100%"
},
{
@@ -207,6 +213,7 @@
"MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_bandwidth",
"MetricThreshold": "tma_fetch_bandwidth > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
"ScaleUnit": "100%"
},
{
@@ -215,6 +222,7 @@
"MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"ScaleUnit": "100%"
},
{
@@ -239,6 +247,7 @@
"MetricGroup": "TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL1",
"ScaleUnit": "100%"
},
{
@@ -499,6 +508,7 @@
"MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.05",
+ "MetricgroupNoGroup": "TopdownL2",
"ScaleUnit": "100%"
},
{
@@ -515,6 +525,7 @@
"MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"ScaleUnit": "100%"
},
{
@@ -531,6 +542,7 @@
"MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_ms_uops",
"MetricThreshold": "tma_ms_uops > 0.05",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.",
"ScaleUnit": "100%"
},
@@ -620,6 +632,7 @@
"MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_aux_group",
"MetricName": "tma_resource_bound",
"MetricThreshold": "tma_resource_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count.",
"ScaleUnit": "100%"
},
@@ -629,6 +642,7 @@
"MetricGroup": "TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.75",
+ "MetricgroupNoGroup": "TopdownL1",
"ScaleUnit": "100%"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
index 51cf8560a8d3..f9e2316601e1 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
@@ -103,6 +103,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
"ScaleUnit": "100%"
},
@@ -112,6 +113,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
"ScaleUnit": "100%"
},
@@ -122,6 +124,7 @@
"MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
@@ -170,6 +173,7 @@
"MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_core_bound",
"MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).",
"ScaleUnit": "100%"
},
@@ -263,6 +267,7 @@
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
"MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_iptb, tma_lcp",
"ScaleUnit": "100%"
},
@@ -272,6 +277,7 @@
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: RS_EVENTS.EMPTY_END",
"ScaleUnit": "100%"
},
@@ -326,6 +332,7 @@
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.",
"ScaleUnit": "100%"
},
@@ -335,6 +342,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
"ScaleUnit": "100%"
},
@@ -828,6 +836,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_light_operations",
"MetricThreshold": "tma_light_operations > 0.6",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
"ScaleUnit": "100%"
},
@@ -858,6 +867,7 @@
"MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache",
"ScaleUnit": "100%"
},
@@ -886,6 +896,7 @@
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).",
"ScaleUnit": "100%"
},
@@ -1048,6 +1059,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.RETIRE_SLOTS",
"ScaleUnit": "100%"
},
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
index fb57c7382408..e9c46d336a8e 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
@@ -97,6 +97,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. Sample with: TOPDOWN.BACKEND_BOUND_SLOTS",
"ScaleUnit": "100%"
},
@@ -106,6 +107,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
"ScaleUnit": "100%"
},
@@ -116,6 +118,7 @@
"MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: TOPDOWN.BR_MISPREDICT_SLOTS. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
@@ -164,6 +167,7 @@
"MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_core_bound",
"MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).",
"ScaleUnit": "100%"
},
@@ -248,6 +252,7 @@
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
"MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_iptb, tma_lcp",
"ScaleUnit": "100%"
},
@@ -257,6 +262,7 @@
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: FRONTEND_RETIRED.LATENCY_GE_16_PS;FRONTEND_RETIRED.LATENCY_GE_8_PS",
"ScaleUnit": "100%"
},
@@ -311,6 +317,7 @@
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS",
"ScaleUnit": "100%"
},
@@ -320,6 +327,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
"ScaleUnit": "100%"
},
@@ -795,6 +803,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_light_operations",
"MetricThreshold": "tma_light_operations > 0.6",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
"ScaleUnit": "100%"
},
@@ -825,6 +834,7 @@
"MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache",
"ScaleUnit": "100%"
},
@@ -853,6 +863,7 @@
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).",
"ScaleUnit": "100%"
},
@@ -1013,6 +1024,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.SLOTS",
"ScaleUnit": "100%"
},
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
index 65ec0c9e55d1..437b9867acb9 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
@@ -103,6 +103,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
"ScaleUnit": "100%"
},
@@ -112,6 +113,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
"ScaleUnit": "100%"
},
@@ -122,6 +124,7 @@
"MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
@@ -170,6 +173,7 @@
"MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_core_bound",
"MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).",
"ScaleUnit": "100%"
},
@@ -263,6 +267,7 @@
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
"MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_iptb, tma_lcp",
"ScaleUnit": "100%"
},
@@ -272,6 +277,7 @@
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: RS_EVENTS.EMPTY_END",
"ScaleUnit": "100%"
},
@@ -326,6 +332,7 @@
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.",
"ScaleUnit": "100%"
},
@@ -335,6 +342,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
"ScaleUnit": "100%"
},
@@ -829,6 +837,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_light_operations",
"MetricThreshold": "tma_light_operations > 0.6",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
"ScaleUnit": "100%"
},
@@ -869,6 +878,7 @@
"MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache",
"ScaleUnit": "100%"
},
@@ -897,6 +907,7 @@
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).",
"ScaleUnit": "100%"
},
@@ -1079,6 +1090,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.RETIRE_SLOTS",
"ScaleUnit": "100%"
},
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
index 8f7dc72accd0..875c766222e3 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
@@ -101,6 +101,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
"ScaleUnit": "100%"
},
@@ -110,6 +111,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
"ScaleUnit": "100%"
},
@@ -120,6 +122,7 @@
"MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_info_mispredictions, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
@@ -167,6 +170,7 @@
"MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_core_bound",
"MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).",
"ScaleUnit": "100%"
},
@@ -271,6 +275,7 @@
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
"MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
"ScaleUnit": "100%"
},
@@ -280,6 +285,7 @@
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: FRONTEND_RETIRED.LATENCY_GE_16_PS;FRONTEND_RETIRED.LATENCY_GE_8_PS",
"ScaleUnit": "100%"
},
@@ -354,6 +360,7 @@
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS",
"ScaleUnit": "100%"
},
@@ -372,6 +379,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
"ScaleUnit": "100%"
},
@@ -1142,6 +1150,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_light_operations",
"MetricThreshold": "tma_light_operations > 0.6",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
"ScaleUnit": "100%"
},
@@ -1196,6 +1205,7 @@
"MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache",
"ScaleUnit": "100%"
},
@@ -1224,6 +1234,7 @@
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).",
"ScaleUnit": "100%"
},
@@ -1458,6 +1469,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.RETIRE_SLOTS",
"ScaleUnit": "100%"
},
diff --git a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
index 2528418200bb..9570a88d6d1c 100644
--- a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
@@ -103,6 +103,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
"ScaleUnit": "100%"
},
@@ -112,6 +113,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
"ScaleUnit": "100%"
},
@@ -122,6 +124,7 @@
"MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
@@ -161,6 +164,7 @@
"MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_core_bound",
"MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).",
"ScaleUnit": "100%"
},
@@ -254,6 +258,7 @@
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
"MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_iptb, tma_lcp",
"ScaleUnit": "100%"
},
@@ -263,6 +268,7 @@
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: RS_EVENTS.EMPTY_END",
"ScaleUnit": "100%"
},
@@ -272,6 +278,7 @@
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.",
"ScaleUnit": "100%"
},
@@ -281,6 +288,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
"ScaleUnit": "100%"
},
@@ -663,6 +671,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_light_operations",
"MetricThreshold": "tma_light_operations > 0.6",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
"ScaleUnit": "100%"
},
@@ -693,6 +702,7 @@
"MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache",
"ScaleUnit": "100%"
},
@@ -721,6 +731,7 @@
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).",
"ScaleUnit": "100%"
},
@@ -874,6 +885,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.RETIRE_SLOTS",
"ScaleUnit": "100%"
},
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
index 11f152c346eb..a522202cf684 100644
--- a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
@@ -103,6 +103,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
"ScaleUnit": "100%"
},
@@ -112,6 +113,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
"ScaleUnit": "100%"
},
@@ -122,6 +124,7 @@
"MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
@@ -161,6 +164,7 @@
"MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_core_bound",
"MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).",
"ScaleUnit": "100%"
},
@@ -254,6 +258,7 @@
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
"MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_iptb, tma_lcp",
"ScaleUnit": "100%"
},
@@ -263,6 +268,7 @@
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: RS_EVENTS.EMPTY_END",
"ScaleUnit": "100%"
},
@@ -272,6 +278,7 @@
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.",
"ScaleUnit": "100%"
},
@@ -281,6 +288,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
"ScaleUnit": "100%"
},
@@ -664,6 +672,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_light_operations",
"MetricThreshold": "tma_light_operations > 0.6",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
"ScaleUnit": "100%"
},
@@ -704,6 +713,7 @@
"MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache",
"ScaleUnit": "100%"
},
@@ -732,6 +742,7 @@
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).",
"ScaleUnit": "100%"
},
@@ -905,6 +916,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.RETIRE_SLOTS",
"ScaleUnit": "100%"
},
diff --git a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
index f45ae3483df4..1a2154f28b7b 100644
--- a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
@@ -115,6 +115,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. Sample with: TOPDOWN.BACKEND_BOUND_SLOTS",
"ScaleUnit": "100%"
},
@@ -124,6 +125,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
"ScaleUnit": "100%"
},
@@ -141,6 +143,7 @@
"MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_info_mispredictions, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
@@ -187,6 +190,7 @@
"MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_core_bound",
"MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).",
"ScaleUnit": "100%"
},
@@ -288,6 +292,7 @@
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
"MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 5 > 0.35",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
"ScaleUnit": "100%"
},
@@ -297,6 +302,7 @@
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: FRONTEND_RETIRED.LATENCY_GE_16_PS;FRONTEND_RETIRED.LATENCY_GE_8_PS",
"ScaleUnit": "100%"
},
@@ -369,6 +375,7 @@
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS",
"ScaleUnit": "100%"
},
@@ -378,6 +385,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
"ScaleUnit": "100%"
},
@@ -1111,6 +1119,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_light_operations",
"MetricThreshold": "tma_light_operations > 0.6",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
"ScaleUnit": "100%"
},
@@ -1164,6 +1173,7 @@
"MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache",
"ScaleUnit": "100%"
},
@@ -1191,6 +1201,7 @@
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).",
"ScaleUnit": "100%"
},
@@ -1360,6 +1371,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.SLOTS",
"ScaleUnit": "100%"
},
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
index 0f9b174dfc22..1ef772b40e04 100644
--- a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
@@ -80,6 +80,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. Sample with: TOPDOWN.BACKEND_BOUND_SLOTS",
"ScaleUnit": "100%"
},
@@ -89,6 +90,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
"ScaleUnit": "100%"
},
@@ -106,6 +108,7 @@
"MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_info_mispredictions, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
@@ -152,6 +155,7 @@
"MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_core_bound",
"MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).",
"ScaleUnit": "100%"
},
@@ -253,6 +257,7 @@
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
"MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 5 > 0.35",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
"ScaleUnit": "100%"
},
@@ -262,6 +267,7 @@
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: FRONTEND_RETIRED.LATENCY_GE_16_PS;FRONTEND_RETIRED.LATENCY_GE_8_PS",
"ScaleUnit": "100%"
},
@@ -334,6 +340,7 @@
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS",
"ScaleUnit": "100%"
},
@@ -343,6 +350,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
"ScaleUnit": "100%"
},
@@ -1134,6 +1142,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_light_operations",
"MetricThreshold": "tma_light_operations > 0.6",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
"ScaleUnit": "100%"
},
@@ -1187,6 +1196,7 @@
"MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache",
"ScaleUnit": "100%"
},
@@ -1214,6 +1224,7 @@
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).",
"ScaleUnit": "100%"
},
@@ -1410,6 +1421,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.SLOTS",
"ScaleUnit": "100%"
},
diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
index 5247f69c13b6..11080ccffd51 100644
--- a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
@@ -103,6 +103,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
"ScaleUnit": "100%"
},
@@ -112,6 +113,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
"ScaleUnit": "100%"
},
@@ -122,6 +124,7 @@
"MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
@@ -161,6 +164,7 @@
"MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_core_bound",
"MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).",
"ScaleUnit": "100%"
},
@@ -254,6 +258,7 @@
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
"MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_iptb, tma_lcp",
"ScaleUnit": "100%"
},
@@ -263,6 +268,7 @@
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: RS_EVENTS.EMPTY_END",
"ScaleUnit": "100%"
},
@@ -299,6 +305,7 @@
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.",
"ScaleUnit": "100%"
},
@@ -308,6 +315,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
"ScaleUnit": "100%"
},
@@ -724,6 +732,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_light_operations",
"MetricThreshold": "tma_light_operations > 0.6",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
"ScaleUnit": "100%"
},
@@ -754,6 +763,7 @@
"MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache",
"ScaleUnit": "100%"
},
@@ -782,6 +792,7 @@
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).",
"ScaleUnit": "100%"
},
@@ -917,6 +928,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.RETIRE_SLOTS",
"ScaleUnit": "100%"
},
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
index 89469b10fa30..65a46d659c0a 100644
--- a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
@@ -103,6 +103,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
"ScaleUnit": "100%"
},
@@ -112,6 +113,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
"ScaleUnit": "100%"
},
@@ -122,6 +124,7 @@
"MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
@@ -161,6 +164,7 @@
"MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_core_bound",
"MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).",
"ScaleUnit": "100%"
},
@@ -254,6 +258,7 @@
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
"MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_iptb, tma_lcp",
"ScaleUnit": "100%"
},
@@ -263,6 +268,7 @@
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: RS_EVENTS.EMPTY_END",
"ScaleUnit": "100%"
},
@@ -299,6 +305,7 @@
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.",
"ScaleUnit": "100%"
},
@@ -308,6 +315,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
"ScaleUnit": "100%"
},
@@ -725,6 +733,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_light_operations",
"MetricThreshold": "tma_light_operations > 0.6",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
"ScaleUnit": "100%"
},
@@ -765,6 +774,7 @@
"MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache",
"ScaleUnit": "100%"
},
@@ -793,6 +803,7 @@
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).",
"ScaleUnit": "100%"
},
@@ -948,6 +959,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.RETIRE_SLOTS",
"ScaleUnit": "100%"
},
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
index e8f4e5c01c9f..66a6f657bd6f 100644
--- a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
@@ -76,6 +76,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
"ScaleUnit": "100%"
},
@@ -85,6 +86,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
"ScaleUnit": "100%"
},
@@ -95,6 +97,7 @@
"MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
@@ -114,6 +117,7 @@
"MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_core_bound",
"MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).",
"ScaleUnit": "100%"
},
@@ -160,6 +164,7 @@
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
"MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_lcp",
"ScaleUnit": "100%"
},
@@ -169,6 +174,7 @@
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: RS_EVENTS.EMPTY_END",
"ScaleUnit": "100%"
},
@@ -205,6 +211,7 @@
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.",
"ScaleUnit": "100%"
},
@@ -214,6 +221,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
"ScaleUnit": "100%"
},
@@ -412,6 +420,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_light_operations",
"MetricThreshold": "tma_light_operations > 0.6",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
"ScaleUnit": "100%"
},
@@ -422,6 +431,7 @@
"MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache",
"ScaleUnit": "100%"
},
@@ -450,6 +460,7 @@
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).",
"ScaleUnit": "100%"
},
@@ -487,6 +498,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.RETIRE_SLOTS",
"ScaleUnit": "100%"
},
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
index 4a99fe515f4b..4b8bc19392a4 100644
--- a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
@@ -76,6 +76,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
"ScaleUnit": "100%"
},
@@ -85,6 +86,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
"ScaleUnit": "100%"
},
@@ -95,6 +97,7 @@
"MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
@@ -114,6 +117,7 @@
"MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_core_bound",
"MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).",
"ScaleUnit": "100%"
},
@@ -160,6 +164,7 @@
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
"MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_lcp",
"ScaleUnit": "100%"
},
@@ -169,6 +174,7 @@
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: RS_EVENTS.EMPTY_END",
"ScaleUnit": "100%"
},
@@ -205,6 +211,7 @@
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.",
"ScaleUnit": "100%"
},
@@ -214,6 +221,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
"ScaleUnit": "100%"
},
@@ -411,6 +419,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_light_operations",
"MetricThreshold": "tma_light_operations > 0.6",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
"ScaleUnit": "100%"
},
@@ -421,6 +430,7 @@
"MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache",
"ScaleUnit": "100%"
},
@@ -449,6 +459,7 @@
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).",
"ScaleUnit": "100%"
},
@@ -486,6 +497,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.RETIRE_SLOTS",
"ScaleUnit": "100%"
},
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
index 126300b7ae77..620fc5bd2217 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
@@ -87,6 +87,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. Sample with: TOPDOWN.BACKEND_BOUND_SLOTS",
"ScaleUnit": "100%"
},
@@ -96,6 +97,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
"ScaleUnit": "100%"
},
@@ -105,6 +107,7 @@
"MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: TOPDOWN.BR_MISPREDICT_SLOTS. Related metrics: tma_info_branch_misprediction_cost, tma_info_mispredictions, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
@@ -151,6 +154,7 @@
"MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_core_bound",
"MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).",
"ScaleUnit": "100%"
},
@@ -252,6 +256,7 @@
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
"MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 6 > 0.35",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
"ScaleUnit": "100%"
},
@@ -261,6 +266,7 @@
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: FRONTEND_RETIRED.LATENCY_GE_16_PS;FRONTEND_RETIRED.LATENCY_GE_8_PS",
"ScaleUnit": "100%"
},
@@ -351,6 +357,7 @@
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS",
"ScaleUnit": "100%"
},
@@ -369,6 +376,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. Sample with: UOPS_RETIRED.HEAVY",
"ScaleUnit": "100%"
},
@@ -1216,6 +1224,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_light_operations",
"MetricThreshold": "tma_light_operations > 0.6",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
"ScaleUnit": "100%"
},
@@ -1269,6 +1278,7 @@
"MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache",
"ScaleUnit": "100%"
},
@@ -1304,6 +1314,7 @@
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).",
"ScaleUnit": "100%"
},
@@ -1509,6 +1520,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.SLOTS",
"ScaleUnit": "100%"
},
diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
index a6d212b349f5..21ef6c9be816 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
@@ -101,6 +101,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
"ScaleUnit": "100%"
},
@@ -110,6 +111,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
"ScaleUnit": "100%"
},
@@ -120,6 +122,7 @@
"MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_info_mispredictions, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
@@ -167,6 +170,7 @@
"MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_core_bound",
"MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).",
"ScaleUnit": "100%"
},
@@ -271,6 +275,7 @@
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
"MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
"ScaleUnit": "100%"
},
@@ -280,6 +285,7 @@
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: FRONTEND_RETIRED.LATENCY_GE_16_PS;FRONTEND_RETIRED.LATENCY_GE_8_PS",
"ScaleUnit": "100%"
},
@@ -345,6 +351,7 @@
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS",
"ScaleUnit": "100%"
},
@@ -363,6 +370,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
"ScaleUnit": "100%"
},
@@ -1065,6 +1073,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_light_operations",
"MetricThreshold": "tma_light_operations > 0.6",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
"ScaleUnit": "100%"
},
@@ -1110,6 +1119,7 @@
"MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache",
"ScaleUnit": "100%"
},
@@ -1138,6 +1148,7 @@
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).",
"ScaleUnit": "100%"
},
@@ -1343,6 +1354,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.RETIRE_SLOTS",
"ScaleUnit": "100%"
},
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
index fa2f7f126a30..eb6f12c0343d 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
@@ -101,6 +101,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
"ScaleUnit": "100%"
},
@@ -110,6 +111,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
"ScaleUnit": "100%"
},
@@ -120,6 +122,7 @@
"MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_info_mispredictions, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
@@ -167,6 +170,7 @@
"MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_core_bound",
"MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).",
"ScaleUnit": "100%"
},
@@ -271,6 +275,7 @@
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
"MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
"ScaleUnit": "100%"
},
@@ -280,6 +285,7 @@
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: FRONTEND_RETIRED.LATENCY_GE_16_PS;FRONTEND_RETIRED.LATENCY_GE_8_PS",
"ScaleUnit": "100%"
},
@@ -354,6 +360,7 @@
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS",
"ScaleUnit": "100%"
},
@@ -372,6 +379,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
"ScaleUnit": "100%"
},
@@ -1123,6 +1131,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_light_operations",
"MetricThreshold": "tma_light_operations > 0.6",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
"ScaleUnit": "100%"
},
@@ -1177,6 +1186,7 @@
"MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache",
"ScaleUnit": "100%"
},
@@ -1205,6 +1215,7 @@
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).",
"ScaleUnit": "100%"
},
@@ -1429,6 +1440,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.RETIRE_SLOTS",
"ScaleUnit": "100%"
},
diff --git a/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json b/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json
index 4c80d6be6cf1..b442ed4acfbb 100644
--- a/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json
@@ -109,6 +109,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. Sample with: TOPDOWN.BACKEND_BOUND_SLOTS",
"ScaleUnit": "100%"
},
@@ -118,6 +119,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
"ScaleUnit": "100%"
},
@@ -135,6 +137,7 @@
"MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_info_mispredictions, tma_mispredicts_resteers",
"ScaleUnit": "100%"
},
@@ -181,6 +184,7 @@
"MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_core_bound",
"MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).",
"ScaleUnit": "100%"
},
@@ -282,6 +286,7 @@
"MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
"MetricName": "tma_fetch_bandwidth",
"MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 5 > 0.35",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp",
"ScaleUnit": "100%"
},
@@ -291,6 +296,7 @@
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: FRONTEND_RETIRED.LATENCY_GE_16_PS;FRONTEND_RETIRED.LATENCY_GE_8_PS",
"ScaleUnit": "100%"
},
@@ -363,6 +369,7 @@
"MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS",
"ScaleUnit": "100%"
},
@@ -372,6 +379,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
"ScaleUnit": "100%"
},
@@ -1125,6 +1133,7 @@
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_light_operations",
"MetricThreshold": "tma_light_operations > 0.6",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
"ScaleUnit": "100%"
},
@@ -1178,6 +1187,7 @@
"MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache",
"ScaleUnit": "100%"
},
@@ -1205,6 +1215,7 @@
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
+ "MetricgroupNoGroup": "TopdownL2",
"PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).",
"ScaleUnit": "100%"
},
@@ -1374,6 +1385,7 @@
"MetricGroup": "TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
+ "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.SLOTS",
"ScaleUnit": "100%"
},
diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
index ca99b9cfe4ad..f57a8f274025 100755
--- a/tools/perf/pmu-events/jevents.py
+++ b/tools/perf/pmu-events/jevents.py
@@ -52,7 +52,8 @@ _json_event_attributes = [
# Attributes that are in pmu_metric rather than pmu_event.
_json_metric_attributes = [
'metric_name', 'metric_group', 'metric_expr', 'metric_threshold', 'desc',
- 'long_desc', 'unit', 'compat', 'aggr_mode', 'event_grouping'
+ 'long_desc', 'unit', 'compat', 'metricgroup_no_group', 'aggr_mode',
+ 'event_grouping'
]
# Attributes that are bools or enum int values, encoded as '0', '1',...
_json_enum_attributes = ['aggr_mode', 'deprecated', 'event_grouping', 'perpkg']
@@ -303,6 +304,7 @@ class JsonEvent:
self.deprecated = jd.get('Deprecated')
self.metric_name = jd.get('MetricName')
self.metric_group = jd.get('MetricGroup')
+ self.metricgroup_no_group = jd.get('MetricgroupNoGroup')
self.event_grouping = convert_metric_constraint(jd.get('MetricConstraint'))
self.metric_expr = None
if 'MetricExpr' in jd:
diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h
index b7dff8f1021f..80349685cf4d 100644
--- a/tools/perf/pmu-events/pmu-events.h
+++ b/tools/perf/pmu-events/pmu-events.h
@@ -59,6 +59,7 @@ struct pmu_metric {
const char *compat;
const char *desc;
const char *long_desc;
+ const char *metricgroup_no_group;
enum aggr_mode_class aggr_mode;
enum metric_event_groups event_grouping;
};
diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py
index ccfef861e931..e890c261ad26 100644
--- a/tools/perf/tests/attr.py
+++ b/tools/perf/tests/attr.py
@@ -152,7 +152,7 @@ def parse_version(version):
# - expected values assignments
class Test(object):
def __init__(self, path, options):
- parser = configparser.SafeConfigParser()
+ parser = configparser.ConfigParser()
parser.read(path)
log.warning("running '%s'" % path)
@@ -247,7 +247,7 @@ class Test(object):
return True
def load_events(self, path, events):
- parser_event = configparser.SafeConfigParser()
+ parser_event = configparser.ConfigParser()
parser_event.read(path)
# The event record section header contains 'event' word,
@@ -261,7 +261,7 @@ class Test(object):
# Read parent event if there's any
if (':' in section):
base = section[section.index(':') + 1:]
- parser_base = configparser.SafeConfigParser()
+ parser_base = configparser.ConfigParser()
parser_base.read(self.test_dir + '/' + base)
base_items = parser_base.items('event')
diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat
index a21fb65bc012..fccd8ec4d1b0 100644
--- a/tools/perf/tests/attr/base-stat
+++ b/tools/perf/tests/attr/base-stat
@@ -16,7 +16,7 @@ pinned=0
exclusive=0
exclude_user=0
exclude_kernel=0|1
-exclude_hv=0
+exclude_hv=0|1
exclude_idle=0
mmap=0
comm=0
diff --git a/tools/perf/tests/attr/test-stat-default b/tools/perf/tests/attr/test-stat-default
index d8ea6a88163f..a1e2da0a9a6d 100644
--- a/tools/perf/tests/attr/test-stat-default
+++ b/tools/perf/tests/attr/test-stat-default
@@ -40,7 +40,6 @@ fd=6
type=0
config=7
optional=1
-
# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_BACKEND
[event7:base-stat]
fd=7
@@ -89,79 +88,98 @@ enable_on_exec=0
read_format=15
optional=1
-# PERF_TYPE_RAW / topdown-bad-spec (0x8100)
+# PERF_TYPE_RAW / topdown-fe-bound (0x8200)
[event13:base-stat]
fd=13
group_fd=11
type=4
-config=33024
+config=33280
disabled=0
enable_on_exec=0
read_format=15
optional=1
-# PERF_TYPE_RAW / topdown-fe-bound (0x8200)
+# PERF_TYPE_RAW / topdown-be-bound (0x8300)
[event14:base-stat]
fd=14
group_fd=11
type=4
-config=33280
+config=33536
disabled=0
enable_on_exec=0
read_format=15
optional=1
-# PERF_TYPE_RAW / topdown-be-bound (0x8300)
+# PERF_TYPE_RAW / topdown-bad-spec (0x8100)
[event15:base-stat]
fd=15
group_fd=11
type=4
-config=33536
+config=33024
disabled=0
enable_on_exec=0
read_format=15
optional=1
-# PERF_TYPE_RAW / topdown-heavy-ops (0x8400)
+# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING
[event16:base-stat]
fd=16
-group_fd=11
type=4
-config=33792
-disabled=0
-enable_on_exec=0
-read_format=15
+config=4109
optional=1
-# PERF_TYPE_RAW / topdown-br-mispredict (0x8500)
+# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/
[event17:base-stat]
fd=17
-group_fd=11
type=4
-config=34048
-disabled=0
-enable_on_exec=0
-read_format=15
+config=17039629
optional=1
-# PERF_TYPE_RAW / topdown-fetch-lat (0x8600)
+# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD
[event18:base-stat]
fd=18
-group_fd=11
type=4
-config=34304
-disabled=0
-enable_on_exec=0
-read_format=15
+config=60
optional=1
-# PERF_TYPE_RAW / topdown-mem-bound (0x8700)
+# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY
[event19:base-stat]
fd=19
-group_fd=11
type=4
-config=34560
-disabled=0
-enable_on_exec=0
-read_format=15
+config=2097421
+optional=1
+
+# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK
+[event20:base-stat]
+fd=20
+type=4
+config=316
+optional=1
+
+# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE
+[event21:base-stat]
+fd=21
+type=4
+config=412
+optional=1
+
+# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE
+[event22:base-stat]
+fd=22
+type=4
+config=572
+optional=1
+
+# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS
+[event23:base-stat]
+fd=23
+type=4
+config=706
+optional=1
+
+# PERF_TYPE_RAW / UOPS_ISSUED.ANY
+[event24:base-stat]
+fd=24
+type=4
+config=270
optional=1
diff --git a/tools/perf/tests/attr/test-stat-detailed-1 b/tools/perf/tests/attr/test-stat-detailed-1
index b656ab93c5bf..1c52cb05c900 100644
--- a/tools/perf/tests/attr/test-stat-detailed-1
+++ b/tools/perf/tests/attr/test-stat-detailed-1
@@ -90,89 +90,108 @@ enable_on_exec=0
read_format=15
optional=1
-# PERF_TYPE_RAW / topdown-bad-spec (0x8100)
+# PERF_TYPE_RAW / topdown-fe-bound (0x8200)
[event13:base-stat]
fd=13
group_fd=11
type=4
-config=33024
+config=33280
disabled=0
enable_on_exec=0
read_format=15
optional=1
-# PERF_TYPE_RAW / topdown-fe-bound (0x8200)
+# PERF_TYPE_RAW / topdown-be-bound (0x8300)
[event14:base-stat]
fd=14
group_fd=11
type=4
-config=33280
+config=33536
disabled=0
enable_on_exec=0
read_format=15
optional=1
-# PERF_TYPE_RAW / topdown-be-bound (0x8300)
+# PERF_TYPE_RAW / topdown-bad-spec (0x8100)
[event15:base-stat]
fd=15
group_fd=11
type=4
-config=33536
+config=33024
disabled=0
enable_on_exec=0
read_format=15
optional=1
-# PERF_TYPE_RAW / topdown-heavy-ops (0x8400)
+# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING
[event16:base-stat]
fd=16
-group_fd=11
type=4
-config=33792
-disabled=0
-enable_on_exec=0
-read_format=15
+config=4109
optional=1
-# PERF_TYPE_RAW / topdown-br-mispredict (0x8500)
+# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/
[event17:base-stat]
fd=17
-group_fd=11
type=4
-config=34048
-disabled=0
-enable_on_exec=0
-read_format=15
+config=17039629
optional=1
-# PERF_TYPE_RAW / topdown-fetch-lat (0x8600)
+# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD
[event18:base-stat]
fd=18
-group_fd=11
type=4
-config=34304
-disabled=0
-enable_on_exec=0
-read_format=15
+config=60
optional=1
-# PERF_TYPE_RAW / topdown-mem-bound (0x8700)
+# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY
[event19:base-stat]
fd=19
-group_fd=11
type=4
-config=34560
-disabled=0
-enable_on_exec=0
-read_format=15
+config=2097421
+optional=1
+
+# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK
+[event20:base-stat]
+fd=20
+type=4
+config=316
+optional=1
+
+# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE
+[event21:base-stat]
+fd=21
+type=4
+config=412
+optional=1
+
+# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE
+[event22:base-stat]
+fd=22
+type=4
+config=572
+optional=1
+
+# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS
+[event23:base-stat]
+fd=23
+type=4
+config=706
+optional=1
+
+# PERF_TYPE_RAW / UOPS_ISSUED.ANY
+[event24:base-stat]
+fd=24
+type=4
+config=270
optional=1
# PERF_TYPE_HW_CACHE /
# PERF_COUNT_HW_CACHE_L1D << 0 |
# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
-[event20:base-stat]
-fd=20
+[event25:base-stat]
+fd=25
type=3
config=0
optional=1
@@ -181,8 +200,8 @@ optional=1
# PERF_COUNT_HW_CACHE_L1D << 0 |
# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
-[event21:base-stat]
-fd=21
+[event26:base-stat]
+fd=26
type=3
config=65536
optional=1
@@ -191,8 +210,8 @@ optional=1
# PERF_COUNT_HW_CACHE_LL << 0 |
# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
-[event22:base-stat]
-fd=22
+[event27:base-stat]
+fd=27
type=3
config=2
optional=1
@@ -201,8 +220,8 @@ optional=1
# PERF_COUNT_HW_CACHE_LL << 0 |
# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
-[event23:base-stat]
-fd=23
+[event28:base-stat]
+fd=28
type=3
config=65538
optional=1
diff --git a/tools/perf/tests/attr/test-stat-detailed-2 b/tools/perf/tests/attr/test-stat-detailed-2
index 97625090a1c4..7e961d24a885 100644
--- a/tools/perf/tests/attr/test-stat-detailed-2
+++ b/tools/perf/tests/attr/test-stat-detailed-2
@@ -90,89 +90,108 @@ enable_on_exec=0
read_format=15
optional=1
-# PERF_TYPE_RAW / topdown-bad-spec (0x8100)
+# PERF_TYPE_RAW / topdown-fe-bound (0x8200)
[event13:base-stat]
fd=13
group_fd=11
type=4
-config=33024
+config=33280
disabled=0
enable_on_exec=0
read_format=15
optional=1
-# PERF_TYPE_RAW / topdown-fe-bound (0x8200)
+# PERF_TYPE_RAW / topdown-be-bound (0x8300)
[event14:base-stat]
fd=14
group_fd=11
type=4
-config=33280
+config=33536
disabled=0
enable_on_exec=0
read_format=15
optional=1
-# PERF_TYPE_RAW / topdown-be-bound (0x8300)
+# PERF_TYPE_RAW / topdown-bad-spec (0x8100)
[event15:base-stat]
fd=15
group_fd=11
type=4
-config=33536
+config=33024
disabled=0
enable_on_exec=0
read_format=15
optional=1
-# PERF_TYPE_RAW / topdown-heavy-ops (0x8400)
+# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING
[event16:base-stat]
fd=16
-group_fd=11
type=4
-config=33792
-disabled=0
-enable_on_exec=0
-read_format=15
+config=4109
optional=1
-# PERF_TYPE_RAW / topdown-br-mispredict (0x8500)
+# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/
[event17:base-stat]
fd=17
-group_fd=11
type=4
-config=34048
-disabled=0
-enable_on_exec=0
-read_format=15
+config=17039629
optional=1
-# PERF_TYPE_RAW / topdown-fetch-lat (0x8600)
+# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD
[event18:base-stat]
fd=18
-group_fd=11
type=4
-config=34304
-disabled=0
-enable_on_exec=0
-read_format=15
+config=60
optional=1
-# PERF_TYPE_RAW / topdown-mem-bound (0x8700)
+# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY
[event19:base-stat]
fd=19
-group_fd=11
type=4
-config=34560
-disabled=0
-enable_on_exec=0
-read_format=15
+config=2097421
+optional=1
+
+# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK
+[event20:base-stat]
+fd=20
+type=4
+config=316
+optional=1
+
+# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE
+[event21:base-stat]
+fd=21
+type=4
+config=412
+optional=1
+
+# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE
+[event22:base-stat]
+fd=22
+type=4
+config=572
+optional=1
+
+# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS
+[event23:base-stat]
+fd=23
+type=4
+config=706
+optional=1
+
+# PERF_TYPE_RAW / UOPS_ISSUED.ANY
+[event24:base-stat]
+fd=24
+type=4
+config=270
optional=1
# PERF_TYPE_HW_CACHE /
# PERF_COUNT_HW_CACHE_L1D << 0 |
# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
-[event20:base-stat]
-fd=20
+[event25:base-stat]
+fd=25
type=3
config=0
optional=1
@@ -181,8 +200,8 @@ optional=1
# PERF_COUNT_HW_CACHE_L1D << 0 |
# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
-[event21:base-stat]
-fd=21
+[event26:base-stat]
+fd=26
type=3
config=65536
optional=1
@@ -191,8 +210,8 @@ optional=1
# PERF_COUNT_HW_CACHE_LL << 0 |
# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
-[event22:base-stat]
-fd=22
+[event27:base-stat]
+fd=27
type=3
config=2
optional=1
@@ -201,8 +220,8 @@ optional=1
# PERF_COUNT_HW_CACHE_LL << 0 |
# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
-[event23:base-stat]
-fd=23
+[event28:base-stat]
+fd=28
type=3
config=65538
optional=1
@@ -211,8 +230,8 @@ optional=1
# PERF_COUNT_HW_CACHE_L1I << 0 |
# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
-[event24:base-stat]
-fd=24
+[event29:base-stat]
+fd=29
type=3
config=1
optional=1
@@ -221,8 +240,8 @@ optional=1
# PERF_COUNT_HW_CACHE_L1I << 0 |
# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
-[event25:base-stat]
-fd=25
+[event30:base-stat]
+fd=30
type=3
config=65537
optional=1
@@ -231,8 +250,8 @@ optional=1
# PERF_COUNT_HW_CACHE_DTLB << 0 |
# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
-[event26:base-stat]
-fd=26
+[event31:base-stat]
+fd=31
type=3
config=3
optional=1
@@ -241,8 +260,8 @@ optional=1
# PERF_COUNT_HW_CACHE_DTLB << 0 |
# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
-[event27:base-stat]
-fd=27
+[event32:base-stat]
+fd=32
type=3
config=65539
optional=1
@@ -251,8 +270,8 @@ optional=1
# PERF_COUNT_HW_CACHE_ITLB << 0 |
# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
-[event28:base-stat]
-fd=28
+[event33:base-stat]
+fd=33
type=3
config=4
optional=1
@@ -261,8 +280,8 @@ optional=1
# PERF_COUNT_HW_CACHE_ITLB << 0 |
# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
-[event29:base-stat]
-fd=29
+[event34:base-stat]
+fd=34
type=3
config=65540
optional=1
diff --git a/tools/perf/tests/attr/test-stat-detailed-3 b/tools/perf/tests/attr/test-stat-detailed-3
index d555042e3fbf..e50535f45977 100644
--- a/tools/perf/tests/attr/test-stat-detailed-3
+++ b/tools/perf/tests/attr/test-stat-detailed-3
@@ -90,89 +90,108 @@ enable_on_exec=0
read_format=15
optional=1
-# PERF_TYPE_RAW / topdown-bad-spec (0x8100)
+# PERF_TYPE_RAW / topdown-fe-bound (0x8200)
[event13:base-stat]
fd=13
group_fd=11
type=4
-config=33024
+config=33280
disabled=0
enable_on_exec=0
read_format=15
optional=1
-# PERF_TYPE_RAW / topdown-fe-bound (0x8200)
+# PERF_TYPE_RAW / topdown-be-bound (0x8300)
[event14:base-stat]
fd=14
group_fd=11
type=4
-config=33280
+config=33536
disabled=0
enable_on_exec=0
read_format=15
optional=1
-# PERF_TYPE_RAW / topdown-be-bound (0x8300)
+# PERF_TYPE_RAW / topdown-bad-spec (0x8100)
[event15:base-stat]
fd=15
group_fd=11
type=4
-config=33536
+config=33024
disabled=0
enable_on_exec=0
read_format=15
optional=1
-# PERF_TYPE_RAW / topdown-heavy-ops (0x8400)
+# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING
[event16:base-stat]
fd=16
-group_fd=11
type=4
-config=33792
-disabled=0
-enable_on_exec=0
-read_format=15
+config=4109
optional=1
-# PERF_TYPE_RAW / topdown-br-mispredict (0x8500)
+# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/
[event17:base-stat]
fd=17
-group_fd=11
type=4
-config=34048
-disabled=0
-enable_on_exec=0
-read_format=15
+config=17039629
optional=1
-# PERF_TYPE_RAW / topdown-fetch-lat (0x8600)
+# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD
[event18:base-stat]
fd=18
-group_fd=11
type=4
-config=34304
-disabled=0
-enable_on_exec=0
-read_format=15
+config=60
optional=1
-# PERF_TYPE_RAW / topdown-mem-bound (0x8700)
+# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY
[event19:base-stat]
fd=19
-group_fd=11
type=4
-config=34560
-disabled=0
-enable_on_exec=0
-read_format=15
+config=2097421
+optional=1
+
+# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK
+[event20:base-stat]
+fd=20
+type=4
+config=316
+optional=1
+
+# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE
+[event21:base-stat]
+fd=21
+type=4
+config=412
+optional=1
+
+# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE
+[event22:base-stat]
+fd=22
+type=4
+config=572
+optional=1
+
+# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS
+[event23:base-stat]
+fd=23
+type=4
+config=706
+optional=1
+
+# PERF_TYPE_RAW / UOPS_ISSUED.ANY
+[event24:base-stat]
+fd=24
+type=4
+config=270
optional=1
# PERF_TYPE_HW_CACHE /
# PERF_COUNT_HW_CACHE_L1D << 0 |
# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
-[event20:base-stat]
-fd=20
+[event25:base-stat]
+fd=25
type=3
config=0
optional=1
@@ -181,8 +200,8 @@ optional=1
# PERF_COUNT_HW_CACHE_L1D << 0 |
# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
-[event21:base-stat]
-fd=21
+[event26:base-stat]
+fd=26
type=3
config=65536
optional=1
@@ -191,8 +210,8 @@ optional=1
# PERF_COUNT_HW_CACHE_LL << 0 |
# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
-[event22:base-stat]
-fd=22
+[event27:base-stat]
+fd=27
type=3
config=2
optional=1
@@ -201,8 +220,8 @@ optional=1
# PERF_COUNT_HW_CACHE_LL << 0 |
# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
-[event23:base-stat]
-fd=23
+[event28:base-stat]
+fd=28
type=3
config=65538
optional=1
@@ -211,8 +230,8 @@ optional=1
# PERF_COUNT_HW_CACHE_L1I << 0 |
# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
-[event24:base-stat]
-fd=24
+[event29:base-stat]
+fd=29
type=3
config=1
optional=1
@@ -221,8 +240,8 @@ optional=1
# PERF_COUNT_HW_CACHE_L1I << 0 |
# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
-[event25:base-stat]
-fd=25
+[event30:base-stat]
+fd=30
type=3
config=65537
optional=1
@@ -231,8 +250,8 @@ optional=1
# PERF_COUNT_HW_CACHE_DTLB << 0 |
# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
-[event26:base-stat]
-fd=26
+[event31:base-stat]
+fd=31
type=3
config=3
optional=1
@@ -241,8 +260,8 @@ optional=1
# PERF_COUNT_HW_CACHE_DTLB << 0 |
# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
-[event27:base-stat]
-fd=27
+[event32:base-stat]
+fd=32
type=3
config=65539
optional=1
@@ -251,8 +270,8 @@ optional=1
# PERF_COUNT_HW_CACHE_ITLB << 0 |
# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
-[event28:base-stat]
-fd=28
+[event33:base-stat]
+fd=33
type=3
config=4
optional=1
@@ -261,8 +280,8 @@ optional=1
# PERF_COUNT_HW_CACHE_ITLB << 0 |
# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
-[event29:base-stat]
-fd=29
+[event34:base-stat]
+fd=34
type=3
config=65540
optional=1
@@ -271,8 +290,8 @@ optional=1
# PERF_COUNT_HW_CACHE_L1D << 0 |
# (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
-[event30:base-stat]
-fd=30
+[event35:base-stat]
+fd=35
type=3
config=512
optional=1
@@ -281,8 +300,8 @@ optional=1
# PERF_COUNT_HW_CACHE_L1D << 0 |
# (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
-[event31:base-stat]
-fd=31
+[event36:base-stat]
+fd=36
type=3
config=66048
optional=1
diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index cbf0e0c74906..733ead151c63 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -120,7 +120,8 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
p = "FOO/0";
ret = expr__parse(&val, ctx, p);
- TEST_ASSERT_VAL("division by zero", ret == -1);
+ TEST_ASSERT_VAL("division by zero", ret == 0);
+ TEST_ASSERT_VAL("division by zero", isnan(val));
p = "BAR/";
ret = expr__parse(&val, ctx, p);
diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c
index 1185b79e6274..c05148ea400c 100644
--- a/tools/perf/tests/parse-metric.c
+++ b/tools/perf/tests/parse-metric.c
@@ -38,6 +38,7 @@ static void load_runtime_stat(struct evlist *evlist, struct value *vals)
evlist__alloc_aggr_stats(evlist, 1);
evlist__for_each_entry(evlist, evsel) {
count = find_value(evsel->name, vals);
+ evsel->supported = true;
evsel->stats->aggr->counts.val = count;
if (evsel__name_is(evsel, "duration_time"))
update_stats(&walltime_nsecs_stats, count);
diff --git a/tools/perf/tests/shell/stat.sh b/tools/perf/tests/shell/stat.sh
index 2c1d3f704995..b154fbb15d54 100755
--- a/tools/perf/tests/shell/stat.sh
+++ b/tools/perf/tests/shell/stat.sh
@@ -28,6 +28,18 @@ test_stat_record_report() {
echo "stat record and report test [Success]"
}
+test_stat_record_script() {
+ echo "stat record and script test"
+ if ! perf stat record -o - true | perf script -i - 2>&1 | \
+ grep -E -q "CPU[[:space:]]+THREAD[[:space:]]+VAL[[:space:]]+ENA[[:space:]]+RUN[[:space:]]+TIME[[:space:]]+EVENT"
+ then
+ echo "stat record and script test [Failed]"
+ err=1
+ return
+ fi
+ echo "stat record and script test [Success]"
+}
+
test_stat_repeat_weak_groups() {
echo "stat repeat weak groups test"
if ! perf stat -e '{cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles}' \
@@ -93,6 +105,7 @@ test_topdown_weak_groups() {
test_default_stat
test_stat_record_report
+test_stat_record_script
test_stat_repeat_weak_groups
test_topdown_groups
test_topdown_weak_groups
diff --git a/tools/perf/tests/shell/test_intel_pt.sh b/tools/perf/tests/shell/test_intel_pt.sh
index 4ddb17cb83c5..3a8b9bffa022 100755
--- a/tools/perf/tests/shell/test_intel_pt.sh
+++ b/tools/perf/tests/shell/test_intel_pt.sh
@@ -506,6 +506,13 @@ test_sample()
echo "perf record failed with --aux-sample"
return 1
fi
+ # Check with event with PMU name
+ if perf_record_no_decode -o "${perfdatafile}" -e br_misp_retired.all_branches:u uname ; then
+ if ! perf_record_no_decode -o "${perfdatafile}" -e '{intel_pt//,br_misp_retired.all_branches/aux-sample-size=8192/}:u' uname ; then
+ echo "perf record failed with --aux-sample-size"
+ return 1
+ fi
+ fi
echo OK
return 0
}
diff --git a/tools/perf/tests/shell/test_java_symbol.sh b/tools/perf/tests/shell/test_java_symbol.sh
index 90cea8811926..499539d1c479 100755
--- a/tools/perf/tests/shell/test_java_symbol.sh
+++ b/tools/perf/tests/shell/test_java_symbol.sh
@@ -56,7 +56,7 @@ if [ $? -ne 0 ]; then
exit 1
fi
-if ! perf inject -i $PERF_DATA -o $PERF_INJ_DATA -j; then
+if ! DEBUGINFOD_URLS='' perf inject -i $PERF_DATA -o $PERF_INJ_DATA -j; then
echo "Fail to inject samples"
exit 1
fi
diff --git a/tools/perf/trace/beauty/arch_prctl.c b/tools/perf/trace/beauty/arch_prctl.c
index fe022ca67e60..a211348d3204 100644
--- a/tools/perf/trace/beauty/arch_prctl.c
+++ b/tools/perf/trace/beauty/arch_prctl.c
@@ -12,10 +12,12 @@
static DEFINE_STRARRAY_OFFSET(x86_arch_prctl_codes_1, "ARCH_", x86_arch_prctl_codes_1_offset);
static DEFINE_STRARRAY_OFFSET(x86_arch_prctl_codes_2, "ARCH_", x86_arch_prctl_codes_2_offset);
+static DEFINE_STRARRAY_OFFSET(x86_arch_prctl_codes_3, "ARCH_", x86_arch_prctl_codes_3_offset);
static struct strarray *x86_arch_prctl_codes[] = {
&strarray__x86_arch_prctl_codes_1,
&strarray__x86_arch_prctl_codes_2,
+ &strarray__x86_arch_prctl_codes_3,
};
static DEFINE_STRARRAYS(x86_arch_prctl_codes);
diff --git a/tools/perf/trace/beauty/x86_arch_prctl.sh b/tools/perf/trace/beauty/x86_arch_prctl.sh
index 57fa6aaffe70..fd5c740512c5 100755
--- a/tools/perf/trace/beauty/x86_arch_prctl.sh
+++ b/tools/perf/trace/beauty/x86_arch_prctl.sh
@@ -24,3 +24,4 @@ print_range () {
print_range 1 0x1 0x1001
print_range 2 0x2 0x2001
+print_range 3 0x4 0x4001
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index bd18fe5f2719..f9df1df1eec0 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -214,7 +214,7 @@ perf-$(CONFIG_ZSTD) += zstd.o
perf-$(CONFIG_LIBCAP) += cap.o
-perf-y += demangle-cxx.o
+perf-$(CONFIG_CXX_DEMANGLE) += demangle-cxx.o
perf-y += demangle-ocaml.o
perf-y += demangle-java.o
perf-y += demangle-rust.o
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index 8d3cfbb3cc65..1d48226ae75d 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -416,6 +416,8 @@ int contention_end(u64 *ctx)
return 0;
}
+struct rq {};
+
extern struct rq runqueues __ksym;
struct rq___old {
diff --git a/tools/perf/util/bpf_skel/sample_filter.bpf.c b/tools/perf/util/bpf_skel/sample_filter.bpf.c
index cffe493af1ed..fb94f5280626 100644
--- a/tools/perf/util/bpf_skel/sample_filter.bpf.c
+++ b/tools/perf/util/bpf_skel/sample_filter.bpf.c
@@ -25,7 +25,7 @@ struct perf_sample_data___new {
} __attribute__((preserve_access_index));
/* new kernel perf_mem_data_src definition */
-union perf_mem_data_src__new {
+union perf_mem_data_src___new {
__u64 val;
struct {
__u64 mem_op:5, /* type of opcode */
@@ -108,7 +108,7 @@ static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx,
if (entry->part == 7)
return kctx->data->data_src.mem_blk;
if (entry->part == 8) {
- union perf_mem_data_src__new *data = (void *)&kctx->data->data_src;
+ union perf_mem_data_src___new *data = (void *)&kctx->data->data_src;
if (bpf_core_field_exists(data->mem_hops))
return data->mem_hops;
diff --git a/tools/perf/util/bpf_skel/vmlinux.h b/tools/perf/util/bpf_skel/vmlinux.h
index 449b1ea91fc4..c7ed51b0c1ef 100644
--- a/tools/perf/util/bpf_skel/vmlinux.h
+++ b/tools/perf/util/bpf_skel/vmlinux.h
@@ -1,6 +1,7 @@
#ifndef __VMLINUX_H
#define __VMLINUX_H
+#include <linux/stddef.h> // for define __always_inline
#include <linux/bpf.h>
#include <linux/types.h>
#include <linux/perf_event.h>
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index 70cac0375b34..ecca40787ac9 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -227,6 +227,19 @@ struct cs_etm_packet_queue {
#define INFO_HEADER_SIZE (sizeof(((struct perf_record_auxtrace_info *)0)->type) + \
sizeof(((struct perf_record_auxtrace_info *)0)->reserved__))
+/* CoreSight trace ID is currently the bottom 7 bits of the value */
+#define CORESIGHT_TRACE_ID_VAL_MASK GENMASK(6, 0)
+
+/*
+ * perf record will set the legacy meta data values as unused initially.
+ * This allows perf report to manage the decoders created when dynamic
+ * allocation in operation.
+ */
+#define CORESIGHT_TRACE_ID_UNUSED_FLAG BIT(31)
+
+/* Value to set for unused trace ID values */
+#define CORESIGHT_TRACE_ID_UNUSED_VAL 0x7F
+
int cs_etm__process_auxtrace_info(union perf_event *event,
struct perf_session *session);
struct perf_event_attr *cs_etm_get_default_config(struct perf_pmu *pmu);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 356c07f03be6..c2dbb5647e75 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -282,6 +282,7 @@ void evsel__init(struct evsel *evsel,
evsel->bpf_fd = -1;
INIT_LIST_HEAD(&evsel->config_terms);
INIT_LIST_HEAD(&evsel->bpf_counter_list);
+ INIT_LIST_HEAD(&evsel->bpf_filters);
perf_evsel__object.init(evsel);
evsel->sample_size = __evsel__sample_size(attr->sample_type);
evsel__calc_id_pos(evsel);
@@ -290,6 +291,7 @@ void evsel__init(struct evsel *evsel,
evsel->per_pkg_mask = NULL;
evsel->collect_stat = false;
evsel->pmu_name = NULL;
+ evsel->skippable = false;
}
struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx)
@@ -828,26 +830,26 @@ bool evsel__name_is(struct evsel *evsel, const char *name)
const char *evsel__group_pmu_name(const struct evsel *evsel)
{
- const struct evsel *leader;
+ struct evsel *leader = evsel__leader(evsel);
+ struct evsel *pos;
- /* If the pmu_name is set use it. pmu_name isn't set for CPU and software events. */
- if (evsel->pmu_name)
- return evsel->pmu_name;
/*
* Software events may be in a group with other uncore PMU events. Use
- * the pmu_name of the group leader to avoid breaking the software event
- * out of the group.
+ * the pmu_name of the first non-software event to avoid breaking the
+ * software event out of the group.
*
* Aux event leaders, like intel_pt, expect a group with events from
* other PMUs, so substitute the AUX event's PMU in this case.
*/
- leader = evsel__leader(evsel);
- if ((evsel->core.attr.type == PERF_TYPE_SOFTWARE || evsel__is_aux_event(leader)) &&
- leader->pmu_name) {
- return leader->pmu_name;
+ if (evsel->core.attr.type == PERF_TYPE_SOFTWARE || evsel__is_aux_event(leader)) {
+ /* Starting with the leader, find the first event with a named PMU. */
+ for_each_group_evsel(pos, leader) {
+ if (pos->pmu_name)
+ return pos->pmu_name;
+ }
}
- return "cpu";
+ return evsel->pmu_name ?: "cpu";
}
const char *evsel__metric_id(const struct evsel *evsel)
@@ -1725,9 +1727,13 @@ static int get_group_fd(struct evsel *evsel, int cpu_map_idx, int thread)
return -1;
fd = FD(leader, cpu_map_idx, thread);
- BUG_ON(fd == -1);
+ BUG_ON(fd == -1 && !leader->skippable);
- return fd;
+ /*
+ * When the leader has been skipped, return -2 to distinguish from no
+ * group leader case.
+ */
+ return fd == -1 ? -2 : fd;
}
static void evsel__remove_fd(struct evsel *pos, int nr_cpus, int nr_threads, int thread_idx)
@@ -2109,6 +2115,12 @@ retry_open:
group_fd = get_group_fd(evsel, idx, thread);
+ if (group_fd == -2) {
+ pr_debug("broken group leader for %s\n", evsel->name);
+ err = -EINVAL;
+ goto out_close;
+ }
+
test_attr__ready();
/* Debug message used by test scripts */
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index d575390d80bc..0f54f28a69c2 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -95,6 +95,7 @@ struct evsel {
bool weak_group;
bool bpf_counter;
bool use_config_name;
+ bool skippable;
int bpf_fd;
struct bpf_object *bpf_obj;
struct list_head config_terms;
@@ -150,10 +151,8 @@ struct evsel {
*/
struct bpf_counter_ops *bpf_counter_ops;
- union {
- struct list_head bpf_counter_list; /* for perf-stat -b */
- struct list_head bpf_filters; /* for perf-record --filter */
- };
+ struct list_head bpf_counter_list; /* for perf-stat -b */
+ struct list_head bpf_filters; /* for perf-record --filter */
/* for perf-stat --use-bpf */
int bperf_leader_prog_fd;
diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y
index 250e444bf032..4ce931cccb63 100644
--- a/tools/perf/util/expr.y
+++ b/tools/perf/util/expr.y
@@ -225,7 +225,11 @@ expr: NUMBER
{
if (fpclassify($3.val) == FP_ZERO) {
pr_debug("division by zero\n");
- YYABORT;
+ assert($3.ids == NULL);
+ if (compute_ids)
+ ids__free($1.ids);
+ $$.val = NAN;
+ $$.ids = NULL;
} else if (!compute_ids || (is_const($1.val) && is_const($3.val))) {
assert($1.ids == NULL);
assert($3.ids == NULL);
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index c566c6859302..5e9c657dd3f7 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -1144,12 +1144,12 @@ static int metricgroup__add_metric_callback(const struct pmu_metric *pm,
struct metricgroup__add_metric_data *data = vdata;
int ret = 0;
- if (pm->metric_expr &&
- (match_metric(pm->metric_group, data->metric_name) ||
- match_metric(pm->metric_name, data->metric_name))) {
+ if (pm->metric_expr && match_pm_metric(pm, data->metric_name)) {
+ bool metric_no_group = data->metric_no_group ||
+ match_metric(data->metric_name, pm->metricgroup_no_group);
data->has_match = true;
- ret = add_metric(data->list, pm, data->modifier, data->metric_no_group,
+ ret = add_metric(data->list, pm, data->modifier, metric_no_group,
data->metric_no_threshold, data->user_requested_cpu_list,
data->system_wide, /*root_metric=*/NULL,
/*visited_metrics=*/NULL, table);
@@ -1672,7 +1672,7 @@ static int metricgroup__topdown_max_level_callback(const struct pmu_metric *pm,
{
unsigned int *max_level = data;
unsigned int level;
- const char *p = strstr(pm->metric_group, "TopdownL");
+ const char *p = strstr(pm->metric_group ?: "", "TopdownL");
if (!p || p[8] == '\0')
return 0;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index d71019dcd614..34ba840ae19a 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -2140,25 +2140,32 @@ static int evlist__cmp(void *state, const struct list_head *l, const struct list
int *leader_idx = state;
int lhs_leader_idx = *leader_idx, rhs_leader_idx = *leader_idx, ret;
const char *lhs_pmu_name, *rhs_pmu_name;
+ bool lhs_has_group = false, rhs_has_group = false;
/*
* First sort by grouping/leader. Read the leader idx only if the evsel
* is part of a group, as -1 indicates no group.
*/
- if (lhs_core->leader != lhs_core || lhs_core->nr_members > 1)
+ if (lhs_core->leader != lhs_core || lhs_core->nr_members > 1) {
+ lhs_has_group = true;
lhs_leader_idx = lhs_core->leader->idx;
- if (rhs_core->leader != rhs_core || rhs_core->nr_members > 1)
+ }
+ if (rhs_core->leader != rhs_core || rhs_core->nr_members > 1) {
+ rhs_has_group = true;
rhs_leader_idx = rhs_core->leader->idx;
+ }
if (lhs_leader_idx != rhs_leader_idx)
return lhs_leader_idx - rhs_leader_idx;
- /* Group by PMU. Groups can't span PMUs. */
- lhs_pmu_name = evsel__group_pmu_name(lhs);
- rhs_pmu_name = evsel__group_pmu_name(rhs);
- ret = strcmp(lhs_pmu_name, rhs_pmu_name);
- if (ret)
- return ret;
+ /* Group by PMU if there is a group. Groups can't span PMUs. */
+ if (lhs_has_group && rhs_has_group) {
+ lhs_pmu_name = evsel__group_pmu_name(lhs);
+ rhs_pmu_name = evsel__group_pmu_name(rhs);
+ ret = strcmp(lhs_pmu_name, rhs_pmu_name);
+ if (ret)
+ return ret;
+ }
/* Architecture specific sorting. */
return arch_evlist__cmp(lhs, rhs);
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 73b2ff2ddf29..bf5a6c14dfcd 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -431,7 +431,7 @@ static void print_metric_json(struct perf_stat_config *config __maybe_unused,
struct outstate *os = ctx;
FILE *out = os->fh;
- fprintf(out, "\"metric-value\" : %f, ", val);
+ fprintf(out, "\"metric-value\" : \"%f\", ", val);
fprintf(out, "\"metric-unit\" : \"%s\"", unit);
if (!config->metric_only)
fprintf(out, "}");
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index eeccab6751d7..1566a206ba42 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -403,12 +403,25 @@ static int prepare_metric(struct evsel **metric_events,
if (!aggr)
break;
- /*
- * If an event was scaled during stat gathering, reverse
- * the scale before computing the metric.
- */
- val = aggr->counts.val * (1.0 / metric_events[i]->scale);
- source_count = evsel__source_count(metric_events[i]);
+ if (!metric_events[i]->supported) {
+ /*
+ * Not supported events will have a count of 0,
+ * which can be confusing in a
+ * metric. Explicitly set the value to NAN. Not
+ * counted events (enable time of 0) are read as
+ * 0.
+ */
+ val = NAN;
+ source_count = 0;
+ } else {
+ /*
+ * If an event was scaled during stat gathering,
+ * reverse the scale before computing the
+ * metric.
+ */
+ val = aggr->counts.val * (1.0 / metric_events[i]->scale);
+ source_count = evsel__source_count(metric_events[i]);
+ }
}
n = strdup(evsel__metric_id(metric_events[i]));
if (!n)
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index b2ed9cc52265..63882a4db5c7 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -31,6 +31,13 @@
#include <bfd.h>
#endif
+#if defined(HAVE_LIBBFD_SUPPORT) || defined(HAVE_CPLUS_DEMANGLE_SUPPORT)
+#ifndef DMGL_PARAMS
+#define DMGL_PARAMS (1 << 0) /* Include function args */
+#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
+#endif
+#endif
+
#ifndef EM_AARCH64
#define EM_AARCH64 183 /* ARM 64 bit */
#endif
@@ -271,6 +278,26 @@ static bool want_demangle(bool is_kernel_sym)
return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
}
+/*
+ * Demangle C++ function signature, typically replaced by demangle-cxx.cpp
+ * version.
+ */
+__weak char *cxx_demangle_sym(const char *str __maybe_unused, bool params __maybe_unused,
+ bool modifiers __maybe_unused)
+{
+#ifdef HAVE_LIBBFD_SUPPORT
+ int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0);
+
+ return bfd_demangle(NULL, str, flags);
+#elif defined(HAVE_CPLUS_DEMANGLE_SUPPORT)
+ int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0);
+
+ return cplus_demangle(str, flags);
+#else
+ return NULL;
+#endif
+}
+
static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
{
char *demangled = NULL;
diff --git a/tools/power/cpupower/lib/powercap.c b/tools/power/cpupower/lib/powercap.c
index 0ce29ee4c2e4..a7a59c6bacda 100644
--- a/tools/power/cpupower/lib/powercap.c
+++ b/tools/power/cpupower/lib/powercap.c
@@ -40,25 +40,34 @@ static int sysfs_get_enabled(char *path, int *mode)
{
int fd;
char yes_no;
+ int ret = 0;
*mode = 0;
fd = open(path, O_RDONLY);
- if (fd == -1)
- return -1;
+ if (fd == -1) {
+ ret = -1;
+ goto out;
+ }
if (read(fd, &yes_no, 1) != 1) {
- close(fd);
- return -1;
+ ret = -1;
+ goto out_close;
}
if (yes_no == '1') {
*mode = 1;
- return 0;
+ goto out_close;
} else if (yes_no == '0') {
- return 0;
+ goto out_close;
+ } else {
+ ret = -1;
+ goto out_close;
}
- return -1;
+out_close:
+ close(fd);
+out:
+ return ret;
}
int powercap_get_enabled(int *mode)
diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
index e7d48cb563c0..ae6af354a81d 100644
--- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
@@ -70,8 +70,8 @@ static int max_freq_mode;
*/
static unsigned long max_frequency;
-static unsigned long long tsc_at_measure_start;
-static unsigned long long tsc_at_measure_end;
+static unsigned long long *tsc_at_measure_start;
+static unsigned long long *tsc_at_measure_end;
static unsigned long long *mperf_previous_count;
static unsigned long long *aperf_previous_count;
static unsigned long long *mperf_current_count;
@@ -169,7 +169,7 @@ static int mperf_get_count_percent(unsigned int id, double *percent,
aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu];
if (max_freq_mode == MAX_FREQ_TSC_REF) {
- tsc_diff = tsc_at_measure_end - tsc_at_measure_start;
+ tsc_diff = tsc_at_measure_end[cpu] - tsc_at_measure_start[cpu];
*percent = 100.0 * mperf_diff / tsc_diff;
dprint("%s: TSC Ref - mperf_diff: %llu, tsc_diff: %llu\n",
mperf_cstates[id].name, mperf_diff, tsc_diff);
@@ -206,7 +206,7 @@ static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
if (max_freq_mode == MAX_FREQ_TSC_REF) {
/* Calculate max_freq from TSC count */
- tsc_diff = tsc_at_measure_end - tsc_at_measure_start;
+ tsc_diff = tsc_at_measure_end[cpu] - tsc_at_measure_start[cpu];
time_diff = timespec_diff_us(time_start, time_end);
max_frequency = tsc_diff / time_diff;
}
@@ -225,33 +225,27 @@ static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
static int mperf_start(void)
{
int cpu;
- unsigned long long dbg;
clock_gettime(CLOCK_REALTIME, &time_start);
- mperf_get_tsc(&tsc_at_measure_start);
- for (cpu = 0; cpu < cpu_count; cpu++)
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ mperf_get_tsc(&tsc_at_measure_start[cpu]);
mperf_init_stats(cpu);
+ }
- mperf_get_tsc(&dbg);
- dprint("TSC diff: %llu\n", dbg - tsc_at_measure_start);
return 0;
}
static int mperf_stop(void)
{
- unsigned long long dbg;
int cpu;
- for (cpu = 0; cpu < cpu_count; cpu++)
+ for (cpu = 0; cpu < cpu_count; cpu++) {
mperf_measure_stats(cpu);
+ mperf_get_tsc(&tsc_at_measure_end[cpu]);
+ }
- mperf_get_tsc(&tsc_at_measure_end);
clock_gettime(CLOCK_REALTIME, &time_end);
-
- mperf_get_tsc(&dbg);
- dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end);
-
return 0;
}
@@ -353,7 +347,8 @@ struct cpuidle_monitor *mperf_register(void)
aperf_previous_count = calloc(cpu_count, sizeof(unsigned long long));
mperf_current_count = calloc(cpu_count, sizeof(unsigned long long));
aperf_current_count = calloc(cpu_count, sizeof(unsigned long long));
-
+ tsc_at_measure_start = calloc(cpu_count, sizeof(unsigned long long));
+ tsc_at_measure_end = calloc(cpu_count, sizeof(unsigned long long));
mperf_monitor.name_len = strlen(mperf_monitor.name);
return &mperf_monitor;
}
@@ -364,6 +359,8 @@ void mperf_unregister(void)
free(aperf_previous_count);
free(mperf_current_count);
free(aperf_current_count);
+ free(tsc_at_measure_start);
+ free(tsc_at_measure_end);
free(is_valid);
}
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index fba7bec96acd..6f9347ade82c 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -6,6 +6,7 @@ ldflags-y += --wrap=acpi_pci_find_root
ldflags-y += --wrap=nvdimm_bus_register
ldflags-y += --wrap=devm_cxl_port_enumerate_dports
ldflags-y += --wrap=devm_cxl_setup_hdm
+ldflags-y += --wrap=devm_cxl_enable_hdm
ldflags-y += --wrap=devm_cxl_add_passthrough_decoder
ldflags-y += --wrap=devm_cxl_enumerate_decoders
ldflags-y += --wrap=cxl_await_media_ready
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index ba572d03c687..34b48027b3de 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -1256,6 +1256,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
if (rc)
return rc;
+ cxlds->media_ready = true;
rc = cxl_dev_state_identify(cxlds);
if (rc)
return rc;
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index de3933a776fd..284416527644 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -149,6 +149,21 @@ struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct cxl_port *port,
}
EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_setup_hdm, CXL);
+int __wrap_devm_cxl_enable_hdm(struct cxl_port *port, struct cxl_hdm *cxlhdm)
+{
+ int index, rc;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+ if (ops && ops->is_mock_port(port->uport))
+ rc = 0;
+ else
+ rc = devm_cxl_enable_hdm(port, cxlhdm);
+ put_cxl_mock_ops(index);
+
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_enable_hdm, CXL);
+
int __wrap_devm_cxl_add_passthrough_decoder(struct cxl_port *port)
{
int rc, index;
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index caf32a9b9608..7527f738b4a1 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -I. -I../../include -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address \
- -fsanitize=undefined
+CFLAGS += -I. -I../../include -I../../../lib -g -Og -Wall \
+ -D_LGPL_SOURCE -fsanitize=address -fsanitize=undefined
LDFLAGS += -fsanitize=address -fsanitize=undefined
LDLIBS+= -lpthread -lurcu
TARGETS = main idr-test multiorder xarray maple
@@ -49,6 +49,7 @@ $(OFILES): Makefile *.h */*.h generated/map-shift.h generated/bit-length.h \
../../../include/linux/xarray.h \
../../../include/linux/maple_tree.h \
../../../include/linux/radix-tree.h \
+ ../../../lib/radix-tree.h \
../../../include/linux/idr.h
radix-tree.c: ../../../lib/radix-tree.c
diff --git a/tools/testing/selftests/alsa/pcm-test.c b/tools/testing/selftests/alsa/pcm-test.c
index 3e390fe67eb9..b7eef32addb4 100644
--- a/tools/testing/selftests/alsa/pcm-test.c
+++ b/tools/testing/selftests/alsa/pcm-test.c
@@ -381,7 +381,7 @@ __format:
goto __close;
}
if (rrate != rate) {
- snprintf(msg, sizeof(msg), "rate mismatch %ld != %ld", rate, rrate);
+ snprintf(msg, sizeof(msg), "rate mismatch %ld != %d", rate, rrate);
goto __close;
}
rperiod_size = period_size;
@@ -447,24 +447,24 @@ __format:
frames = snd_pcm_writei(handle, samples, rate);
if (frames < 0) {
snprintf(msg, sizeof(msg),
- "Write failed: expected %d, wrote %li", rate, frames);
+ "Write failed: expected %ld, wrote %li", rate, frames);
goto __close;
}
if (frames < rate) {
snprintf(msg, sizeof(msg),
- "expected %d, wrote %li", rate, frames);
+ "expected %ld, wrote %li", rate, frames);
goto __close;
}
} else {
frames = snd_pcm_readi(handle, samples, rate);
if (frames < 0) {
snprintf(msg, sizeof(msg),
- "expected %d, wrote %li", rate, frames);
+ "expected %ld, wrote %li", rate, frames);
goto __close;
}
if (frames < rate) {
snprintf(msg, sizeof(msg),
- "expected %d, wrote %li", rate, frames);
+ "expected %ld, wrote %li", rate, frames);
goto __close;
}
}
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index c49e5403ad0e..28d2c77262be 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -197,7 +197,7 @@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_r
$(OUTPUT)/sign-file: ../../../../scripts/sign-file.c
$(call msg,SIGN-FILE,,$@)
- $(Q)$(CC) $(shell $(HOSTPKG_CONFIG)--cflags libcrypto 2> /dev/null) \
+ $(Q)$(CC) $(shell $(HOSTPKG_CONFIG) --cflags libcrypto 2> /dev/null) \
$< -o $@ \
$(shell $(HOSTPKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto)
diff --git a/tools/testing/selftests/bpf/prog_tests/inner_array_lookup.c b/tools/testing/selftests/bpf/prog_tests/inner_array_lookup.c
new file mode 100644
index 000000000000..9ab4cd195108
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/inner_array_lookup.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <test_progs.h>
+
+#include "inner_array_lookup.skel.h"
+
+void test_inner_array_lookup(void)
+{
+ int map1_fd, err;
+ int key = 3;
+ int val = 1;
+ struct inner_array_lookup *skel;
+
+ skel = inner_array_lookup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_load_skeleton"))
+ return;
+
+ err = inner_array_lookup__attach(skel);
+ if (!ASSERT_OK(err, "skeleton_attach"))
+ goto cleanup;
+
+ map1_fd = bpf_map__fd(skel->maps.inner_map1);
+ bpf_map_update_elem(map1_fd, &key, &val, 0);
+
+ /* Probe should have set the element at index 3 to 2 */
+ bpf_map_lookup_elem(map1_fd, &key, &val);
+ ASSERT_EQ(val, 2, "value_is_2");
+
+cleanup:
+ inner_array_lookup__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 0ce25a967481..064cc5e8d9ad 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -2,6 +2,7 @@
// Copyright (c) 2020 Cloudflare
#include <error.h>
#include <netinet/tcp.h>
+#include <sys/epoll.h>
#include "test_progs.h"
#include "test_skmsg_load_helpers.skel.h"
@@ -9,8 +10,12 @@
#include "test_sockmap_invalid_update.skel.h"
#include "test_sockmap_skb_verdict_attach.skel.h"
#include "test_sockmap_progs_query.skel.h"
+#include "test_sockmap_pass_prog.skel.h"
+#include "test_sockmap_drop_prog.skel.h"
#include "bpf_iter_sockmap.skel.h"
+#include "sockmap_helpers.h"
+
#define TCP_REPAIR 19 /* TCP sock is under repair right now */
#define TCP_REPAIR_ON 1
@@ -350,6 +355,126 @@ out:
test_sockmap_progs_query__destroy(skel);
}
+#define MAX_EVENTS 10
+static void test_sockmap_skb_verdict_shutdown(void)
+{
+ struct epoll_event ev, events[MAX_EVENTS];
+ int n, err, map, verdict, s, c1, p1;
+ struct test_sockmap_pass_prog *skel;
+ int epollfd;
+ int zero = 0;
+ char b;
+
+ skel = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ map = bpf_map__fd(skel->maps.sock_map_rx);
+
+ err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out;
+
+ s = socket_loopback(AF_INET, SOCK_STREAM);
+ if (s < 0)
+ goto out;
+ err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1);
+ if (err < 0)
+ goto out;
+
+ err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
+ if (err < 0)
+ goto out_close;
+
+ shutdown(p1, SHUT_WR);
+
+ ev.events = EPOLLIN;
+ ev.data.fd = c1;
+
+ epollfd = epoll_create1(0);
+ if (!ASSERT_GT(epollfd, -1, "epoll_create(0)"))
+ goto out_close;
+ err = epoll_ctl(epollfd, EPOLL_CTL_ADD, c1, &ev);
+ if (!ASSERT_OK(err, "epoll_ctl(EPOLL_CTL_ADD)"))
+ goto out_close;
+ err = epoll_wait(epollfd, events, MAX_EVENTS, -1);
+ if (!ASSERT_EQ(err, 1, "epoll_wait(fd)"))
+ goto out_close;
+
+ n = recv(c1, &b, 1, SOCK_NONBLOCK);
+ ASSERT_EQ(n, 0, "recv_timeout(fin)");
+out_close:
+ close(c1);
+ close(p1);
+out:
+ test_sockmap_pass_prog__destroy(skel);
+}
+
+static void test_sockmap_skb_verdict_fionread(bool pass_prog)
+{
+ int expected, zero = 0, sent, recvd, avail;
+ int err, map, verdict, s, c0, c1, p0, p1;
+ struct test_sockmap_pass_prog *pass;
+ struct test_sockmap_drop_prog *drop;
+ char buf[256] = "0123456789";
+
+ if (pass_prog) {
+ pass = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(pass, "open_and_load"))
+ return;
+ verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
+ map = bpf_map__fd(pass->maps.sock_map_rx);
+ expected = sizeof(buf);
+ } else {
+ drop = test_sockmap_drop_prog__open_and_load();
+ if (!ASSERT_OK_PTR(drop, "open_and_load"))
+ return;
+ verdict = bpf_program__fd(drop->progs.prog_skb_verdict);
+ map = bpf_map__fd(drop->maps.sock_map_rx);
+ /* On drop data is consumed immediately and copied_seq inc'd */
+ expected = 0;
+ }
+
+
+ err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out;
+
+ s = socket_loopback(AF_INET, SOCK_STREAM);
+ if (!ASSERT_GT(s, -1, "socket_loopback(s)"))
+ goto out;
+ err = create_socket_pairs(s, AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1);
+ if (!ASSERT_OK(err, "create_socket_pairs(s)"))
+ goto out;
+
+ err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(c1)"))
+ goto out_close;
+
+ sent = xsend(p1, &buf, sizeof(buf), 0);
+ ASSERT_EQ(sent, sizeof(buf), "xsend(p0)");
+ err = ioctl(c1, FIONREAD, &avail);
+ ASSERT_OK(err, "ioctl(FIONREAD) error");
+ ASSERT_EQ(avail, expected, "ioctl(FIONREAD)");
+ /* On DROP test there will be no data to read */
+ if (pass_prog) {
+ recvd = recv_timeout(c1, &buf, sizeof(buf), SOCK_NONBLOCK, IO_TIMEOUT_SEC);
+ ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(c0)");
+ }
+
+out_close:
+ close(c0);
+ close(p0);
+ close(c1);
+ close(p1);
+out:
+ if (pass_prog)
+ test_sockmap_pass_prog__destroy(pass);
+ else
+ test_sockmap_drop_prog__destroy(drop);
+}
+
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
@@ -384,4 +509,10 @@ void test_sockmap_basic(void)
test_sockmap_progs_query(BPF_SK_SKB_STREAM_VERDICT);
if (test__start_subtest("sockmap skb_verdict progs query"))
test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
+ if (test__start_subtest("sockmap skb_verdict shutdown"))
+ test_sockmap_skb_verdict_shutdown();
+ if (test__start_subtest("sockmap skb_verdict fionread"))
+ test_sockmap_skb_verdict_fionread(true);
+ if (test__start_subtest("sockmap skb_verdict fionread on drop"))
+ test_sockmap_skb_verdict_fionread(false);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
new file mode 100644
index 000000000000..d12665490a90
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
@@ -0,0 +1,390 @@
+#ifndef __SOCKMAP_HELPERS__
+#define __SOCKMAP_HELPERS__
+
+#include <linux/vm_sockets.h>
+
+#define IO_TIMEOUT_SEC 30
+#define MAX_STRERR_LEN 256
+#define MAX_TEST_NAME 80
+
+/* workaround for older vm_sockets.h */
+#ifndef VMADDR_CID_LOCAL
+#define VMADDR_CID_LOCAL 1
+#endif
+
+#define __always_unused __attribute__((__unused__))
+
+#define _FAIL(errnum, fmt...) \
+ ({ \
+ error_at_line(0, (errnum), __func__, __LINE__, fmt); \
+ CHECK_FAIL(true); \
+ })
+#define FAIL(fmt...) _FAIL(0, fmt)
+#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
+#define FAIL_LIBBPF(err, msg) \
+ ({ \
+ char __buf[MAX_STRERR_LEN]; \
+ libbpf_strerror((err), __buf, sizeof(__buf)); \
+ FAIL("%s: %s", (msg), __buf); \
+ })
+
+/* Wrappers that fail the test on error and report it. */
+
+#define xaccept_nonblock(fd, addr, len) \
+ ({ \
+ int __ret = \
+ accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \
+ if (__ret == -1) \
+ FAIL_ERRNO("accept"); \
+ __ret; \
+ })
+
+#define xbind(fd, addr, len) \
+ ({ \
+ int __ret = bind((fd), (addr), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("bind"); \
+ __ret; \
+ })
+
+#define xclose(fd) \
+ ({ \
+ int __ret = close((fd)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("close"); \
+ __ret; \
+ })
+
+#define xconnect(fd, addr, len) \
+ ({ \
+ int __ret = connect((fd), (addr), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("connect"); \
+ __ret; \
+ })
+
+#define xgetsockname(fd, addr, len) \
+ ({ \
+ int __ret = getsockname((fd), (addr), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("getsockname"); \
+ __ret; \
+ })
+
+#define xgetsockopt(fd, level, name, val, len) \
+ ({ \
+ int __ret = getsockopt((fd), (level), (name), (val), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("getsockopt(" #name ")"); \
+ __ret; \
+ })
+
+#define xlisten(fd, backlog) \
+ ({ \
+ int __ret = listen((fd), (backlog)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("listen"); \
+ __ret; \
+ })
+
+#define xsetsockopt(fd, level, name, val, len) \
+ ({ \
+ int __ret = setsockopt((fd), (level), (name), (val), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("setsockopt(" #name ")"); \
+ __ret; \
+ })
+
+#define xsend(fd, buf, len, flags) \
+ ({ \
+ ssize_t __ret = send((fd), (buf), (len), (flags)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("send"); \
+ __ret; \
+ })
+
+#define xrecv_nonblock(fd, buf, len, flags) \
+ ({ \
+ ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \
+ IO_TIMEOUT_SEC); \
+ if (__ret == -1) \
+ FAIL_ERRNO("recv"); \
+ __ret; \
+ })
+
+#define xsocket(family, sotype, flags) \
+ ({ \
+ int __ret = socket(family, sotype, flags); \
+ if (__ret == -1) \
+ FAIL_ERRNO("socket"); \
+ __ret; \
+ })
+
+#define xbpf_map_delete_elem(fd, key) \
+ ({ \
+ int __ret = bpf_map_delete_elem((fd), (key)); \
+ if (__ret < 0) \
+ FAIL_ERRNO("map_delete"); \
+ __ret; \
+ })
+
+#define xbpf_map_lookup_elem(fd, key, val) \
+ ({ \
+ int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
+ if (__ret < 0) \
+ FAIL_ERRNO("map_lookup"); \
+ __ret; \
+ })
+
+#define xbpf_map_update_elem(fd, key, val, flags) \
+ ({ \
+ int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
+ if (__ret < 0) \
+ FAIL_ERRNO("map_update"); \
+ __ret; \
+ })
+
+#define xbpf_prog_attach(prog, target, type, flags) \
+ ({ \
+ int __ret = \
+ bpf_prog_attach((prog), (target), (type), (flags)); \
+ if (__ret < 0) \
+ FAIL_ERRNO("prog_attach(" #type ")"); \
+ __ret; \
+ })
+
+#define xbpf_prog_detach2(prog, target, type) \
+ ({ \
+ int __ret = bpf_prog_detach2((prog), (target), (type)); \
+ if (__ret < 0) \
+ FAIL_ERRNO("prog_detach2(" #type ")"); \
+ __ret; \
+ })
+
+#define xpthread_create(thread, attr, func, arg) \
+ ({ \
+ int __ret = pthread_create((thread), (attr), (func), (arg)); \
+ errno = __ret; \
+ if (__ret) \
+ FAIL_ERRNO("pthread_create"); \
+ __ret; \
+ })
+
+#define xpthread_join(thread, retval) \
+ ({ \
+ int __ret = pthread_join((thread), (retval)); \
+ errno = __ret; \
+ if (__ret) \
+ FAIL_ERRNO("pthread_join"); \
+ __ret; \
+ })
+
+static inline int poll_read(int fd, unsigned int timeout_sec)
+{
+ struct timeval timeout = { .tv_sec = timeout_sec };
+ fd_set rfds;
+ int r;
+
+ FD_ZERO(&rfds);
+ FD_SET(fd, &rfds);
+
+ r = select(fd + 1, &rfds, NULL, NULL, &timeout);
+ if (r == 0)
+ errno = ETIME;
+
+ return r == 1 ? 0 : -1;
+}
+
+static inline int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
+ unsigned int timeout_sec)
+{
+ if (poll_read(fd, timeout_sec))
+ return -1;
+
+ return accept(fd, addr, len);
+}
+
+static inline int recv_timeout(int fd, void *buf, size_t len, int flags,
+ unsigned int timeout_sec)
+{
+ if (poll_read(fd, timeout_sec))
+ return -1;
+
+ return recv(fd, buf, len, flags);
+}
+
+static inline void init_addr_loopback4(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
+
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = 0;
+ addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ *len = sizeof(*addr4);
+}
+
+static inline void init_addr_loopback6(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
+
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = 0;
+ addr6->sin6_addr = in6addr_loopback;
+ *len = sizeof(*addr6);
+}
+
+static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss));
+
+ addr->svm_family = AF_VSOCK;
+ addr->svm_port = VMADDR_PORT_ANY;
+ addr->svm_cid = VMADDR_CID_LOCAL;
+ *len = sizeof(*addr);
+}
+
+static inline void init_addr_loopback(int family, struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ switch (family) {
+ case AF_INET:
+ init_addr_loopback4(ss, len);
+ return;
+ case AF_INET6:
+ init_addr_loopback6(ss, len);
+ return;
+ case AF_VSOCK:
+ init_addr_loopback_vsock(ss, len);
+ return;
+ default:
+ FAIL("unsupported address family %d", family);
+ }
+}
+
+static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
+{
+ return (struct sockaddr *)ss;
+}
+
+static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
+{
+ u64 value;
+ u32 key;
+ int err;
+
+ key = 0;
+ value = fd1;
+ err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ if (err)
+ return err;
+
+ key = 1;
+ value = fd2;
+ return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+}
+
+static inline int create_pair(int s, int family, int sotype, int *c, int *p)
+{
+ struct sockaddr_storage addr;
+ socklen_t len;
+ int err = 0;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ return err;
+
+ *c = xsocket(family, sotype, 0);
+ if (*c < 0)
+ return errno;
+ err = xconnect(*c, sockaddr(&addr), len);
+ if (err) {
+ err = errno;
+ goto close_cli0;
+ }
+
+ *p = xaccept_nonblock(s, NULL, NULL);
+ if (*p < 0) {
+ err = errno;
+ goto close_cli0;
+ }
+ return err;
+close_cli0:
+ close(*c);
+ return err;
+}
+
+static inline int create_socket_pairs(int s, int family, int sotype,
+ int *c0, int *c1, int *p0, int *p1)
+{
+ int err;
+
+ err = create_pair(s, family, sotype, c0, p0);
+ if (err)
+ return err;
+
+ err = create_pair(s, family, sotype, c1, p1);
+ if (err) {
+ close(*c0);
+ close(*p0);
+ }
+ return err;
+}
+
+static inline int enable_reuseport(int s, int progfd)
+{
+ int err, one = 1;
+
+ err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+ if (err)
+ return -1;
+ err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
+ sizeof(progfd));
+ if (err)
+ return -1;
+
+ return 0;
+}
+
+static inline int socket_loopback_reuseport(int family, int sotype, int progfd)
+{
+ struct sockaddr_storage addr;
+ socklen_t len;
+ int err, s;
+
+ init_addr_loopback(family, &addr, &len);
+
+ s = xsocket(family, sotype, 0);
+ if (s == -1)
+ return -1;
+
+ if (progfd >= 0)
+ enable_reuseport(s, progfd);
+
+ err = xbind(s, sockaddr(&addr), len);
+ if (err)
+ goto close;
+
+ if (sotype & SOCK_DGRAM)
+ return s;
+
+ err = xlisten(s, SOMAXCONN);
+ if (err)
+ goto close;
+
+ return s;
+close:
+ xclose(s);
+ return -1;
+}
+
+static inline int socket_loopback(int family, int sotype)
+{
+ return socket_loopback_reuseport(family, sotype, -1);
+}
+
+
+#endif // __SOCKMAP_HELPERS__
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 141c1e5944ee..b4f6f3a50ae5 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -20,11 +20,6 @@
#include <unistd.h>
#include <linux/vm_sockets.h>
-/* workaround for older vm_sockets.h */
-#ifndef VMADDR_CID_LOCAL
-#define VMADDR_CID_LOCAL 1
-#endif
-
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
@@ -32,315 +27,7 @@
#include "test_progs.h"
#include "test_sockmap_listen.skel.h"
-#define IO_TIMEOUT_SEC 30
-#define MAX_STRERR_LEN 256
-#define MAX_TEST_NAME 80
-
-#define __always_unused __attribute__((__unused__))
-
-#define _FAIL(errnum, fmt...) \
- ({ \
- error_at_line(0, (errnum), __func__, __LINE__, fmt); \
- CHECK_FAIL(true); \
- })
-#define FAIL(fmt...) _FAIL(0, fmt)
-#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
-#define FAIL_LIBBPF(err, msg) \
- ({ \
- char __buf[MAX_STRERR_LEN]; \
- libbpf_strerror((err), __buf, sizeof(__buf)); \
- FAIL("%s: %s", (msg), __buf); \
- })
-
-/* Wrappers that fail the test on error and report it. */
-
-#define xaccept_nonblock(fd, addr, len) \
- ({ \
- int __ret = \
- accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \
- if (__ret == -1) \
- FAIL_ERRNO("accept"); \
- __ret; \
- })
-
-#define xbind(fd, addr, len) \
- ({ \
- int __ret = bind((fd), (addr), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("bind"); \
- __ret; \
- })
-
-#define xclose(fd) \
- ({ \
- int __ret = close((fd)); \
- if (__ret == -1) \
- FAIL_ERRNO("close"); \
- __ret; \
- })
-
-#define xconnect(fd, addr, len) \
- ({ \
- int __ret = connect((fd), (addr), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("connect"); \
- __ret; \
- })
-
-#define xgetsockname(fd, addr, len) \
- ({ \
- int __ret = getsockname((fd), (addr), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("getsockname"); \
- __ret; \
- })
-
-#define xgetsockopt(fd, level, name, val, len) \
- ({ \
- int __ret = getsockopt((fd), (level), (name), (val), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("getsockopt(" #name ")"); \
- __ret; \
- })
-
-#define xlisten(fd, backlog) \
- ({ \
- int __ret = listen((fd), (backlog)); \
- if (__ret == -1) \
- FAIL_ERRNO("listen"); \
- __ret; \
- })
-
-#define xsetsockopt(fd, level, name, val, len) \
- ({ \
- int __ret = setsockopt((fd), (level), (name), (val), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("setsockopt(" #name ")"); \
- __ret; \
- })
-
-#define xsend(fd, buf, len, flags) \
- ({ \
- ssize_t __ret = send((fd), (buf), (len), (flags)); \
- if (__ret == -1) \
- FAIL_ERRNO("send"); \
- __ret; \
- })
-
-#define xrecv_nonblock(fd, buf, len, flags) \
- ({ \
- ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \
- IO_TIMEOUT_SEC); \
- if (__ret == -1) \
- FAIL_ERRNO("recv"); \
- __ret; \
- })
-
-#define xsocket(family, sotype, flags) \
- ({ \
- int __ret = socket(family, sotype, flags); \
- if (__ret == -1) \
- FAIL_ERRNO("socket"); \
- __ret; \
- })
-
-#define xbpf_map_delete_elem(fd, key) \
- ({ \
- int __ret = bpf_map_delete_elem((fd), (key)); \
- if (__ret < 0) \
- FAIL_ERRNO("map_delete"); \
- __ret; \
- })
-
-#define xbpf_map_lookup_elem(fd, key, val) \
- ({ \
- int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
- if (__ret < 0) \
- FAIL_ERRNO("map_lookup"); \
- __ret; \
- })
-
-#define xbpf_map_update_elem(fd, key, val, flags) \
- ({ \
- int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
- if (__ret < 0) \
- FAIL_ERRNO("map_update"); \
- __ret; \
- })
-
-#define xbpf_prog_attach(prog, target, type, flags) \
- ({ \
- int __ret = \
- bpf_prog_attach((prog), (target), (type), (flags)); \
- if (__ret < 0) \
- FAIL_ERRNO("prog_attach(" #type ")"); \
- __ret; \
- })
-
-#define xbpf_prog_detach2(prog, target, type) \
- ({ \
- int __ret = bpf_prog_detach2((prog), (target), (type)); \
- if (__ret < 0) \
- FAIL_ERRNO("prog_detach2(" #type ")"); \
- __ret; \
- })
-
-#define xpthread_create(thread, attr, func, arg) \
- ({ \
- int __ret = pthread_create((thread), (attr), (func), (arg)); \
- errno = __ret; \
- if (__ret) \
- FAIL_ERRNO("pthread_create"); \
- __ret; \
- })
-
-#define xpthread_join(thread, retval) \
- ({ \
- int __ret = pthread_join((thread), (retval)); \
- errno = __ret; \
- if (__ret) \
- FAIL_ERRNO("pthread_join"); \
- __ret; \
- })
-
-static int poll_read(int fd, unsigned int timeout_sec)
-{
- struct timeval timeout = { .tv_sec = timeout_sec };
- fd_set rfds;
- int r;
-
- FD_ZERO(&rfds);
- FD_SET(fd, &rfds);
-
- r = select(fd + 1, &rfds, NULL, NULL, &timeout);
- if (r == 0)
- errno = ETIME;
-
- return r == 1 ? 0 : -1;
-}
-
-static int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
- unsigned int timeout_sec)
-{
- if (poll_read(fd, timeout_sec))
- return -1;
-
- return accept(fd, addr, len);
-}
-
-static int recv_timeout(int fd, void *buf, size_t len, int flags,
- unsigned int timeout_sec)
-{
- if (poll_read(fd, timeout_sec))
- return -1;
-
- return recv(fd, buf, len, flags);
-}
-
-static void init_addr_loopback4(struct sockaddr_storage *ss, socklen_t *len)
-{
- struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
-
- addr4->sin_family = AF_INET;
- addr4->sin_port = 0;
- addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
- *len = sizeof(*addr4);
-}
-
-static void init_addr_loopback6(struct sockaddr_storage *ss, socklen_t *len)
-{
- struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
-
- addr6->sin6_family = AF_INET6;
- addr6->sin6_port = 0;
- addr6->sin6_addr = in6addr_loopback;
- *len = sizeof(*addr6);
-}
-
-static void init_addr_loopback_vsock(struct sockaddr_storage *ss, socklen_t *len)
-{
- struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss));
-
- addr->svm_family = AF_VSOCK;
- addr->svm_port = VMADDR_PORT_ANY;
- addr->svm_cid = VMADDR_CID_LOCAL;
- *len = sizeof(*addr);
-}
-
-static void init_addr_loopback(int family, struct sockaddr_storage *ss,
- socklen_t *len)
-{
- switch (family) {
- case AF_INET:
- init_addr_loopback4(ss, len);
- return;
- case AF_INET6:
- init_addr_loopback6(ss, len);
- return;
- case AF_VSOCK:
- init_addr_loopback_vsock(ss, len);
- return;
- default:
- FAIL("unsupported address family %d", family);
- }
-}
-
-static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
-{
- return (struct sockaddr *)ss;
-}
-
-static int enable_reuseport(int s, int progfd)
-{
- int err, one = 1;
-
- err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
- if (err)
- return -1;
- err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
- sizeof(progfd));
- if (err)
- return -1;
-
- return 0;
-}
-
-static int socket_loopback_reuseport(int family, int sotype, int progfd)
-{
- struct sockaddr_storage addr;
- socklen_t len;
- int err, s;
-
- init_addr_loopback(family, &addr, &len);
-
- s = xsocket(family, sotype, 0);
- if (s == -1)
- return -1;
-
- if (progfd >= 0)
- enable_reuseport(s, progfd);
-
- err = xbind(s, sockaddr(&addr), len);
- if (err)
- goto close;
-
- if (sotype & SOCK_DGRAM)
- return s;
-
- err = xlisten(s, SOMAXCONN);
- if (err)
- goto close;
-
- return s;
-close:
- xclose(s);
- return -1;
-}
-
-static int socket_loopback(int family, int sotype)
-{
- return socket_loopback_reuseport(family, sotype, -1);
-}
+#include "sockmap_helpers.h"
static void test_insert_invalid(struct test_sockmap_listen *skel __always_unused,
int family, int sotype, int mapfd)
@@ -984,31 +671,12 @@ static const char *redir_mode_str(enum redir_mode mode)
}
}
-static int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
-{
- u64 value;
- u32 key;
- int err;
-
- key = 0;
- value = fd1;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
- if (err)
- return err;
-
- key = 1;
- value = fd2;
- return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
-}
-
static void redir_to_connected(int family, int sotype, int sock_mapfd,
int verd_mapfd, enum redir_mode mode)
{
const char *log_prefix = redir_mode_str(mode);
- struct sockaddr_storage addr;
int s, c0, c1, p0, p1;
unsigned int pass;
- socklen_t len;
int err, n;
u32 key;
char b;
@@ -1019,36 +687,13 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
if (s < 0)
return;
- len = sizeof(addr);
- err = xgetsockname(s, sockaddr(&addr), &len);
+ err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
if (err)
goto close_srv;
- c0 = xsocket(family, sotype, 0);
- if (c0 < 0)
- goto close_srv;
- err = xconnect(c0, sockaddr(&addr), len);
- if (err)
- goto close_cli0;
-
- p0 = xaccept_nonblock(s, NULL, NULL);
- if (p0 < 0)
- goto close_cli0;
-
- c1 = xsocket(family, sotype, 0);
- if (c1 < 0)
- goto close_peer0;
- err = xconnect(c1, sockaddr(&addr), len);
- if (err)
- goto close_cli1;
-
- p1 = xaccept_nonblock(s, NULL, NULL);
- if (p1 < 0)
- goto close_cli1;
-
err = add_to_sockmap(sock_mapfd, p0, p1);
if (err)
- goto close_peer1;
+ goto close;
n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1);
if (n < 0)
@@ -1056,12 +701,12 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
if (n == 0)
FAIL("%s: incomplete write", log_prefix);
if (n < 1)
- goto close_peer1;
+ goto close;
key = SK_PASS;
err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
if (err)
- goto close_peer1;
+ goto close;
if (pass != 1)
FAIL("%s: want pass count 1, have %d", log_prefix, pass);
n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC);
@@ -1070,13 +715,10 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
if (n == 0)
FAIL("%s: incomplete recv", log_prefix);
-close_peer1:
+close:
xclose(p1);
-close_cli1:
xclose(c1);
-close_peer0:
xclose(p0);
-close_cli0:
xclose(c0);
close_srv:
xclose(s);
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
index 4512dd808c33..05d0e07da394 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
@@ -209,7 +209,7 @@ static int getsetsockopt(void)
err, errno);
goto err;
}
- ASSERT_EQ(optlen, 4, "Unexpected NETLINK_LIST_MEMBERSHIPS value");
+ ASSERT_EQ(optlen, 8, "Unexpected NETLINK_LIST_MEMBERSHIPS value");
free(big_buf);
close(fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/subprogs_extable.c b/tools/testing/selftests/bpf/prog_tests/subprogs_extable.c
new file mode 100644
index 000000000000..3afd9f775f68
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/subprogs_extable.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "test_subprogs_extable.skel.h"
+
+void test_subprogs_extable(void)
+{
+ const int read_sz = 456;
+ struct test_subprogs_extable *skel;
+ int err;
+
+ skel = test_subprogs_extable__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ err = test_subprogs_extable__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ ASSERT_OK(trigger_module_test_read(read_sz), "trigger_read");
+
+ ASSERT_NEQ(skel->bss->triggered, 0, "verify at least one program ran");
+
+ test_subprogs_extable__detach(skel);
+
+cleanup:
+ test_subprogs_extable__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/inner_array_lookup.c b/tools/testing/selftests/bpf/progs/inner_array_lookup.c
new file mode 100644
index 000000000000..c2c8f2fa451d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/inner_array_lookup.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct inner_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 5);
+ __type(key, int);
+ __type(value, int);
+} inner_map1 SEC(".maps");
+
+struct outer_map {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 3);
+ __type(key, int);
+ __array(values, struct inner_map);
+} outer_map1 SEC(".maps") = {
+ .values = {
+ [2] = &inner_map1,
+ },
+};
+
+SEC("raw_tp/sys_enter")
+int handle__sys_enter(void *ctx)
+{
+ int outer_key = 2, inner_key = 3;
+ int *val;
+ void *map;
+
+ map = bpf_map_lookup_elem(&outer_map1, &outer_key);
+ if (!map)
+ return 1;
+
+ val = bpf_map_lookup_elem(map, &inner_key);
+ if (!val)
+ return 1;
+
+ if (*val == 1)
+ *val = 2;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_drop_prog.c b/tools/testing/selftests/bpf/progs/test_sockmap_drop_prog.c
new file mode 100644
index 000000000000..29314805ce42
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_drop_prog.c
@@ -0,0 +1,32 @@
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_rx SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_tx SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_msg SEC(".maps");
+
+SEC("sk_skb")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ return SK_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
index baf9ebc6d903..99d2ea9fb658 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
@@ -191,7 +191,7 @@ SEC("sockops")
int bpf_sockmap(struct bpf_sock_ops *skops)
{
__u32 lport, rport;
- int op, err, ret;
+ int op, ret;
op = (int) skops->op;
@@ -203,10 +203,10 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
if (lport == 10000) {
ret = 1;
#ifdef SOCKMAP
- err = bpf_sock_map_update(skops, &sock_map, &ret,
+ bpf_sock_map_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#else
- err = bpf_sock_hash_update(skops, &sock_map, &ret,
+ bpf_sock_hash_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#endif
}
@@ -218,10 +218,10 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
if (bpf_ntohl(rport) == 10001) {
ret = 10;
#ifdef SOCKMAP
- err = bpf_sock_map_update(skops, &sock_map, &ret,
+ bpf_sock_map_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#else
- err = bpf_sock_hash_update(skops, &sock_map, &ret,
+ bpf_sock_hash_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#endif
}
@@ -230,8 +230,6 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
break;
}
- __sink(err);
-
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c b/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c
new file mode 100644
index 000000000000..1d86a717a290
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c
@@ -0,0 +1,32 @@
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_rx SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_tx SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_msg SEC(".maps");
+
+SEC("sk_skb")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_subprogs_extable.c b/tools/testing/selftests/bpf/progs/test_subprogs_extable.c
new file mode 100644
index 000000000000..e2a21fbd4e44
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_subprogs_extable.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 8);
+ __type(key, __u32);
+ __type(value, __u64);
+} test_array SEC(".maps");
+
+unsigned int triggered;
+
+static __u64 test_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data)
+{
+ return 1;
+}
+
+SEC("fexit/bpf_testmod_return_ptr")
+int BPF_PROG(handle_fexit_ret_subprogs, int arg, struct file *ret)
+{
+ *(volatile long *)ret;
+ *(volatile int *)&ret->f_mode;
+ bpf_for_each_map_elem(&test_array, test_cb, NULL, 0);
+ triggered++;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_return_ptr")
+int BPF_PROG(handle_fexit_ret_subprogs2, int arg, struct file *ret)
+{
+ *(volatile long *)ret;
+ *(volatile int *)&ret->f_mode;
+ bpf_for_each_map_elem(&test_array, test_cb, NULL, 0);
+ triggered++;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_return_ptr")
+int BPF_PROG(handle_fexit_ret_subprogs3, int arg, struct file *ret)
+{
+ *(volatile long *)ret;
+ *(volatile int *)&ret->f_mode;
+ bpf_for_each_map_elem(&test_array, test_cb, NULL, 0);
+ triggered++;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
index 136e5530b72c..6115520154e3 100644
--- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
+++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
@@ -371,4 +371,83 @@ __naked void and_then_at_fp_8(void)
" ::: __clobber_all);
}
+SEC("xdp")
+__description("32-bit spill of 64-bit reg should clear ID")
+__failure __msg("math between ctx pointer and 4294967295 is not allowed")
+__naked void spill_32bit_of_64bit_fail(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ /* Roll one bit to force the verifier to track both branches. */\
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x8; \
+ /* Put a large number into r1. */ \
+ r1 = 0xffffffff; \
+ r1 <<= 32; \
+ r1 += r0; \
+ /* Assign an ID to r1. */ \
+ r2 = r1; \
+ /* 32-bit spill r1 to stack - should clear the ID! */\
+ *(u32*)(r10 - 8) = r1; \
+ /* 32-bit fill r2 from stack. */ \
+ r2 = *(u32*)(r10 - 8); \
+ /* Compare r2 with another register to trigger find_equal_scalars.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. If the ID was mistakenly preserved on spill, this would\
+ * cause the verifier to think that r1 is also equal to zero in one of\
+ * the branches, and equal to eight on the other branch.\
+ */ \
+ r3 = 0; \
+ if r2 != r3 goto l0_%=; \
+l0_%=: r1 >>= 32; \
+ /* At this point, if the verifier thinks that r1 is 0, an out-of-bounds\
+ * read will happen, because it actually contains 0xffffffff.\
+ */ \
+ r6 += r1; \
+ r0 = *(u32*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("16-bit spill of 32-bit reg should clear ID")
+__failure __msg("dereference of modified ctx ptr R6 off=65535 disallowed")
+__naked void spill_16bit_of_32bit_fail(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ /* Roll one bit to force the verifier to track both branches. */\
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x8; \
+ /* Put a large number into r1. */ \
+ w1 = 0xffff0000; \
+ r1 += r0; \
+ /* Assign an ID to r1. */ \
+ r2 = r1; \
+ /* 16-bit spill r1 to stack - should clear the ID! */\
+ *(u16*)(r10 - 8) = r1; \
+ /* 16-bit fill r2 from stack. */ \
+ r2 = *(u16*)(r10 - 8); \
+ /* Compare r2 with another register to trigger find_equal_scalars.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. If the ID was mistakenly preserved on spill, this would\
+ * cause the verifier to think that r1 is also equal to zero in one of\
+ * the branches, and equal to eight on the other branch.\
+ */ \
+ r3 = 0; \
+ if r2 != r3 goto l0_%=; \
+l0_%=: r1 >>= 16; \
+ /* At this point, if the verifier thinks that r1 is 0, an out-of-bounds\
+ * read will happen, because it actually contains 0xffff.\
+ */ \
+ r6 += r1; \
+ r0 = *(u32*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile
index d6e106fbce11..a1e955d2de4c 100644
--- a/tools/testing/selftests/ftrace/Makefile
+++ b/tools/testing/selftests/ftrace/Makefile
@@ -1,7 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
all:
-TEST_PROGS := ftracetest
+TEST_PROGS_EXTENDED := ftracetest
+TEST_PROGS := ftracetest-ktap
TEST_FILES := test.d settings
EXTRA_CLEAN := $(OUTPUT)/logs/*
diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest
index c3311c8c4089..2506621e75df 100755
--- a/tools/testing/selftests/ftrace/ftracetest
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -13,6 +13,7 @@ echo "Usage: ftracetest [options] [testcase(s)] [testcase-directory(s)]"
echo " Options:"
echo " -h|--help Show help message"
echo " -k|--keep Keep passed test logs"
+echo " -K|--ktap Output in KTAP format"
echo " -v|--verbose Increase verbosity of test messages"
echo " -vv Alias of -v -v (Show all results in stdout)"
echo " -vvv Alias of -v -v -v (Show all commands immediately)"
@@ -85,6 +86,10 @@ parse_opts() { # opts
KEEP_LOG=1
shift 1
;;
+ --ktap|-K)
+ KTAP=1
+ shift 1
+ ;;
--verbose|-v|-vv|-vvv)
if [ $VERBOSE -eq -1 ]; then
usage "--console can not use with --verbose"
@@ -178,6 +183,7 @@ TEST_DIR=$TOP_DIR/test.d
TEST_CASES=`find_testcases $TEST_DIR`
LOG_DIR=$TOP_DIR/logs/`date +%Y%m%d-%H%M%S`/
KEEP_LOG=0
+KTAP=0
DEBUG=0
VERBOSE=0
UNSUPPORTED_RESULT=0
@@ -229,7 +235,7 @@ prlog() { # messages
newline=
shift
fi
- printf "$*$newline"
+ [ "$KTAP" != "1" ] && printf "$*$newline"
[ "$LOG_FILE" ] && printf "$*$newline" | strip_esc >> $LOG_FILE
}
catlog() { #file
@@ -260,11 +266,11 @@ TOTAL_RESULT=0
INSTANCE=
CASENO=0
+CASENAME=
testcase() { # testfile
CASENO=$((CASENO+1))
- desc=`grep "^#[ \t]*description:" $1 | cut -f2- -d:`
- prlog -n "[$CASENO]$INSTANCE$desc"
+ CASENAME=`grep "^#[ \t]*description:" $1 | cut -f2- -d:`
}
checkreq() { # testfile
@@ -277,40 +283,68 @@ test_on_instance() { # testfile
grep -q "^#[ \t]*flags:.*instance" $1
}
+ktaptest() { # result comment
+ if [ "$KTAP" != "1" ]; then
+ return
+ fi
+
+ local result=
+ if [ "$1" = "1" ]; then
+ result="ok"
+ else
+ result="not ok"
+ fi
+ shift
+
+ local comment=$*
+ if [ "$comment" != "" ]; then
+ comment="# $comment"
+ fi
+
+ echo $CASENO $result $INSTANCE$CASENAME $comment
+}
+
eval_result() { # sigval
case $1 in
$PASS)
prlog " [${color_green}PASS${color_reset}]"
+ ktaptest 1
PASSED_CASES="$PASSED_CASES $CASENO"
return 0
;;
$FAIL)
prlog " [${color_red}FAIL${color_reset}]"
+ ktaptest 0
FAILED_CASES="$FAILED_CASES $CASENO"
return 1 # this is a bug.
;;
$UNRESOLVED)
prlog " [${color_blue}UNRESOLVED${color_reset}]"
+ ktaptest 0 UNRESOLVED
UNRESOLVED_CASES="$UNRESOLVED_CASES $CASENO"
return $UNRESOLVED_RESULT # depends on use case
;;
$UNTESTED)
prlog " [${color_blue}UNTESTED${color_reset}]"
+ ktaptest 1 SKIP
UNTESTED_CASES="$UNTESTED_CASES $CASENO"
return 0
;;
$UNSUPPORTED)
prlog " [${color_blue}UNSUPPORTED${color_reset}]"
+ ktaptest 1 SKIP
UNSUPPORTED_CASES="$UNSUPPORTED_CASES $CASENO"
return $UNSUPPORTED_RESULT # depends on use case
;;
$XFAIL)
prlog " [${color_green}XFAIL${color_reset}]"
+ ktaptest 1 XFAIL
XFAILED_CASES="$XFAILED_CASES $CASENO"
return 0
;;
*)
prlog " [${color_blue}UNDEFINED${color_reset}]"
+ ktaptest 0 error
UNDEFINED_CASES="$UNDEFINED_CASES $CASENO"
return 1 # this must be a test bug
;;
@@ -371,6 +405,7 @@ __run_test() { # testfile
run_test() { # testfile
local testname=`basename $1`
testcase $1
+ prlog -n "[$CASENO]$INSTANCE$CASENAME"
if [ ! -z "$LOG_FILE" ] ; then
local testlog=`mktemp $LOG_DIR/${CASENO}-${testname}-log.XXXXXX`
else
@@ -405,6 +440,17 @@ run_test() { # testfile
# load in the helper functions
. $TEST_DIR/functions
+if [ "$KTAP" = "1" ]; then
+ echo "TAP version 13"
+
+ casecount=`echo $TEST_CASES | wc -w`
+ for t in $TEST_CASES; do
+ test_on_instance $t || continue
+ casecount=$((casecount+1))
+ done
+ echo "1..${casecount}"
+fi
+
# Main loop
for t in $TEST_CASES; do
run_test $t
@@ -439,6 +485,17 @@ prlog "# of unsupported: " `echo $UNSUPPORTED_CASES | wc -w`
prlog "# of xfailed: " `echo $XFAILED_CASES | wc -w`
prlog "# of undefined(test bug): " `echo $UNDEFINED_CASES | wc -w`
+if [ "$KTAP" = "1" ]; then
+ echo -n "# Totals:"
+ echo -n " pass:"`echo $PASSED_CASES | wc -w`
+ echo -n " faii:"`echo $FAILED_CASES | wc -w`
+ echo -n " xfail:"`echo $XFAILED_CASES | wc -w`
+ echo -n " xpass:0"
+ echo -n " skip:"`echo $UNTESTED_CASES $UNSUPPORTED_CASES | wc -w`
+ echo -n " error:"`echo $UNRESOLVED_CASES $UNDEFINED_CASES | wc -w`
+ echo
+fi
+
cleanup
# if no error, return 0
diff --git a/tools/testing/selftests/ftrace/ftracetest-ktap b/tools/testing/selftests/ftrace/ftracetest-ktap
new file mode 100755
index 000000000000..b3284679ef3a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/ftracetest-ktap
@@ -0,0 +1,8 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# ftracetest-ktap: Wrapper to integrate ftracetest with the kselftest runner
+#
+# Copyright (C) Arm Ltd., 2023
+
+./ftracetest -K
diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
index e2ff3bf4df80..2de7c61d1ae3 100644
--- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
+++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
@@ -9,18 +9,33 @@ fail() { #msg
exit_fail
}
-echo "Test event filter function name"
+sample_events() {
+ echo > trace
+ echo 1 > events/kmem/kmem_cache_free/enable
+ echo 1 > tracing_on
+ ls > /dev/null
+ echo 0 > tracing_on
+ echo 0 > events/kmem/kmem_cache_free/enable
+}
+
echo 0 > tracing_on
echo 0 > events/enable
+
+echo "Get the most frequently calling function"
+sample_events
+
+target_func=`cut -d: -f3 trace | sed 's/call_site=\([^+]*\)+0x.*/\1/' | sort | uniq -c | sort | tail -n 1 | sed 's/^[ 0-9]*//'`
+if [ -z "$target_func" ]; then
+ exit_fail
+fi
echo > trace
-echo 'call_site.function == exit_mmap' > events/kmem/kmem_cache_free/filter
-echo 1 > events/kmem/kmem_cache_free/enable
-echo 1 > tracing_on
-ls > /dev/null
-echo 0 > events/kmem/kmem_cache_free/enable
-hitcnt=`grep kmem_cache_free trace| grep exit_mmap | wc -l`
-misscnt=`grep kmem_cache_free trace| grep -v exit_mmap | wc -l`
+echo "Test event filter function name"
+echo "call_site.function == $target_func" > events/kmem/kmem_cache_free/filter
+sample_events
+
+hitcnt=`grep kmem_cache_free trace| grep $target_func | wc -l`
+misscnt=`grep kmem_cache_free trace| grep -v $target_func | wc -l`
if [ $hitcnt -eq 0 ]; then
exit_fail
@@ -30,20 +45,14 @@ if [ $misscnt -gt 0 ]; then
exit_fail
fi
-address=`grep ' exit_mmap$' /proc/kallsyms | cut -d' ' -f1`
+address=`grep " ${target_func}\$" /proc/kallsyms | cut -d' ' -f1`
echo "Test event filter function address"
-echo 0 > tracing_on
-echo 0 > events/enable
-echo > trace
echo "call_site.function == 0x$address" > events/kmem/kmem_cache_free/filter
-echo 1 > events/kmem/kmem_cache_free/enable
-echo 1 > tracing_on
-sleep 1
-echo 0 > events/kmem/kmem_cache_free/enable
+sample_events
-hitcnt=`grep kmem_cache_free trace| grep exit_mmap | wc -l`
-misscnt=`grep kmem_cache_free trace| grep -v exit_mmap | wc -l`
+hitcnt=`grep kmem_cache_free trace| grep $target_func | wc -l`
+misscnt=`grep kmem_cache_free trace| grep -v $target_func | wc -l`
if [ $hitcnt -eq 0 ]; then
exit_fail
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack-legacy.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack-legacy.tc
new file mode 100644
index 000000000000..d0cd91a93069
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack-legacy.tc
@@ -0,0 +1,24 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test inter-event histogram trigger trace action with dynamic string param (legacy stack)
+# requires: set_event synthetic_events events/sched/sched_process_exec/hist "long[] stack' >> synthetic_events":README
+
+fail() { #msg
+ echo $1
+ exit_fail
+}
+
+echo "Test create synthetic event with stack"
+
+# Test the old stacktrace keyword (for backward compatibility)
+echo 's:wake_lat pid_t pid; u64 delta; unsigned long[] stack;' > dynamic_events
+echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=stacktrace if prev_state == 1||prev_state == 2' >> events/sched/sched_switch/trigger
+echo 'hist:keys=prev_pid:delta=common_timestamp.usecs-$ts,s=$st:onmax($delta).trace(wake_lat,prev_pid,$delta,$s)' >> events/sched/sched_switch/trigger
+echo 1 > events/synthetic/wake_lat/enable
+sleep 1
+
+if ! grep -q "=>.*sched" trace; then
+ fail "Failed to create synthetic event with stack"
+fi
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack.tc
index 755dbe94ccf4..8f1cc9a86a06 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack.tc
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger trace action with dynamic string param
-# requires: set_event synthetic_events events/sched/sched_process_exec/hist "long[]' >> synthetic_events":README
+# requires: set_event synthetic_events events/sched/sched_process_exec/hist "can be any field, or the special string 'common_stacktrace'":README
fail() { #msg
echo $1
@@ -10,9 +10,8 @@ fail() { #msg
echo "Test create synthetic event with stack"
-
echo 's:wake_lat pid_t pid; u64 delta; unsigned long[] stack;' > dynamic_events
-echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=stacktrace if prev_state == 1||prev_state == 2' >> events/sched/sched_switch/trigger
+echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=common_stacktrace if prev_state == 1||prev_state == 2' >> events/sched/sched_switch/trigger
echo 'hist:keys=prev_pid:delta=common_timestamp.usecs-$ts,s=$st:onmax($delta).trace(wake_lat,prev_pid,$delta,$s)' >> events/sched/sched_switch/trigger
echo 1 > events/synthetic/wake_lat/enable
sleep 1
diff --git a/tools/testing/selftests/gpio/gpio-sim.sh b/tools/testing/selftests/gpio/gpio-sim.sh
index 9f539d454ee4..fa2ce2b9dd5f 100755
--- a/tools/testing/selftests/gpio/gpio-sim.sh
+++ b/tools/testing/selftests/gpio/gpio-sim.sh
@@ -389,6 +389,9 @@ create_chip chip
create_bank chip bank
set_num_lines chip bank 8
enable_chip chip
+DEVNAME=`configfs_dev_name chip`
+CHIPNAME=`configfs_chip_name chip bank`
+SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value"
$BASE_DIR/gpio-mockup-cdev -b pull-up /dev/`configfs_chip_name chip bank` 0
test `cat $SYSFS_PATH` = "1" || fail "bias setting does not work"
remove_chip chip
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index d8bff2005dfc..5fd49ad0c696 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -249,7 +249,7 @@
/**
* FIXTURE_SETUP() - Prepares the setup function for the fixture.
- * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly.
+ * *_metadata* is included so that EXPECT_*, ASSERT_* etc. work correctly.
*
* @fixture_name: fixture name
*
@@ -275,7 +275,7 @@
/**
* FIXTURE_TEARDOWN()
- * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly.
+ * *_metadata* is included so that EXPECT_*, ASSERT_* etc. work correctly.
*
* @fixture_name: fixture name
*
@@ -388,7 +388,7 @@
if (setjmp(_metadata->env) == 0) { \
fixture_name##_setup(_metadata, &self, variant->data); \
/* Let setup failure terminate early. */ \
- if (!_metadata->passed) \
+ if (!_metadata->passed || _metadata->skip) \
return; \
_metadata->setup_completed = true; \
fixture_name##_##test_name(_metadata, &self, variant->data); \
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 7a5ff646e7e7..4761b768b773 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -116,6 +116,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
TEST_GEN_PROGS_x86_64 += x86_64/amx_test
TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test
TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test
+TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test
TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
diff --git a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c b/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c
new file mode 100644
index 000000000000..4c416ebe7d66
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test edge cases and race conditions in kvm_recalculate_apic_map().
+ */
+
+#include <sys/ioctl.h>
+#include <pthread.h>
+#include <time.h>
+
+#include "processor.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "apic.h"
+
+#define TIMEOUT 5 /* seconds */
+
+#define LAPIC_DISABLED 0
+#define LAPIC_X2APIC (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)
+#define MAX_XAPIC_ID 0xff
+
+static void *race(void *arg)
+{
+ struct kvm_lapic_state lapic = {};
+ struct kvm_vcpu *vcpu = arg;
+
+ while (1) {
+ /* Trigger kvm_recalculate_apic_map(). */
+ vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic);
+ pthread_testcancel();
+ }
+
+ return NULL;
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+ struct kvm_vcpu *vcpuN;
+ struct kvm_vm *vm;
+ pthread_t thread;
+ time_t t;
+ int i;
+
+ kvm_static_assert(KVM_MAX_VCPUS > MAX_XAPIC_ID);
+
+ /*
+ * Create the max number of vCPUs supported by selftests so that KVM
+ * has decent amount of work to do when recalculating the map, i.e. to
+ * make the problematic window large enough to hit.
+ */
+ vm = vm_create_with_vcpus(KVM_MAX_VCPUS, NULL, vcpus);
+
+ /*
+ * Enable x2APIC on all vCPUs so that KVM doesn't bail from the recalc
+ * due to vCPUs having aliased xAPIC IDs (truncated to 8 bits).
+ */
+ for (i = 0; i < KVM_MAX_VCPUS; i++)
+ vcpu_set_msr(vcpus[i], MSR_IA32_APICBASE, LAPIC_X2APIC);
+
+ ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0);
+
+ vcpuN = vcpus[KVM_MAX_VCPUS - 1];
+ for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
+ vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_X2APIC);
+ vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_DISABLED);
+ }
+
+ ASSERT_EQ(pthread_cancel(thread), 0);
+ ASSERT_EQ(pthread_join(thread, NULL), 0);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index 23af4633f0f4..4f0c50c33ba7 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -5,12 +5,15 @@ LOCAL_HDRS += $(selfdir)/mm/local_config.h $(top_srcdir)/mm/gup_test.h
include local_config.mk
+ifeq ($(ARCH),)
+
ifeq ($(CROSS_COMPILE),)
uname_M := $(shell uname -m 2>/dev/null || echo not)
else
uname_M := $(shell echo $(CROSS_COMPILE) | grep -o '^[a-z0-9]\+')
endif
-MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/')
+ARCH ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/')
+endif
# Without this, failed build products remain, with up-to-date timestamps,
# thus tricking Make (and you!) into believing that All Is Well, in subsequent
@@ -65,7 +68,7 @@ TEST_GEN_PROGS += ksm_tests
TEST_GEN_PROGS += ksm_functional_tests
TEST_GEN_PROGS += mdwe_test
-ifeq ($(MACHINE),x86_64)
+ifeq ($(ARCH),x86_64)
CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_program.c -m32)
CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_program.c)
CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie)
@@ -87,13 +90,13 @@ TEST_GEN_PROGS += $(BINARIES_64)
endif
else
-ifneq (,$(findstring $(MACHINE),ppc64))
+ifneq (,$(findstring $(ARCH),ppc64))
TEST_GEN_PROGS += protection_keys
endif
endif
-ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sparc64 x86_64))
+ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sparc64 x86_64))
TEST_GEN_PROGS += va_high_addr_switch
TEST_GEN_PROGS += virtual_address_range
TEST_GEN_PROGS += write_to_hugetlbfs
@@ -112,7 +115,7 @@ $(TEST_GEN_PROGS): vm_util.c
$(OUTPUT)/uffd-stress: uffd-common.c
$(OUTPUT)/uffd-unit-tests: uffd-common.c
-ifeq ($(MACHINE),x86_64)
+ifeq ($(ARCH),x86_64)
BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 80f06aa62034..f27a7338b60e 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -8,8 +8,10 @@ diag_uid
fin_ack_lat
gro
hwtstamp_config
+io_uring_zerocopy_tx
ioam6_parser
ip_defrag
+ip_local_port_range
ipsec
ipv6_flowlabel
ipv6_flowlabel_mgr
@@ -26,6 +28,7 @@ reuseport_bpf_cpu
reuseport_bpf_numa
reuseport_dualstack
rxtimestamp
+sctp_hello
sk_bind_sendto_listen
sk_connect_zero_addr
socket
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 21ca91473c09..ee6880ac3e5e 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -92,6 +92,13 @@ NSC_CMD="ip netns exec ${NSC}"
which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+# Check if FIPS mode is enabled
+if [ -f /proc/sys/crypto/fips_enabled ]; then
+ fips_enabled=`cat /proc/sys/crypto/fips_enabled`
+else
+ fips_enabled=0
+fi
+
################################################################################
# utilities
@@ -1216,7 +1223,7 @@ ipv4_tcp_novrf()
run_cmd nettest -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 1 "No server, device client, local conn"
- ipv4_tcp_md5_novrf
+ [ "$fips_enabled" = "1" ] || ipv4_tcp_md5_novrf
}
ipv4_tcp_vrf()
@@ -1270,9 +1277,11 @@ ipv4_tcp_vrf()
log_test_addr ${a} $? 1 "Global server, local connection"
# run MD5 tests
- setup_vrf_dup
- ipv4_tcp_md5
- cleanup_vrf_dup
+ if [ "$fips_enabled" = "0" ]; then
+ setup_vrf_dup
+ ipv4_tcp_md5
+ cleanup_vrf_dup
+ fi
#
# enable VRF global server
@@ -2772,7 +2781,7 @@ ipv6_tcp_novrf()
log_test_addr ${a} $? 1 "No server, device client, local conn"
done
- ipv6_tcp_md5_novrf
+ [ "$fips_enabled" = "1" ] || ipv6_tcp_md5_novrf
}
ipv6_tcp_vrf()
@@ -2842,9 +2851,11 @@ ipv6_tcp_vrf()
log_test_addr ${a} $? 1 "Global server, local connection"
# run MD5 tests
- setup_vrf_dup
- ipv6_tcp_md5
- cleanup_vrf_dup
+ if [ "$fips_enabled" = "0" ]; then
+ setup_vrf_dup
+ ipv6_tcp_md5
+ cleanup_vrf_dup
+ fi
#
# enable VRF global server
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index a47b26ab48f2..0f5e88c8f4ff 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -2283,7 +2283,7 @@ EOF
################################################################################
# main
-while getopts :t:pP46hv:w: o
+while getopts :t:pP46hvw: o
do
case $o in
t) TESTS=$OPTARG;;
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 7da8ec838c63..35d89dfa6f11 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -68,7 +68,7 @@ setup()
cleanup()
{
$IP link del dev dummy0 &> /dev/null
- ip netns del ns1
+ ip netns del ns1 &> /dev/null
ip netns del ns2 &> /dev/null
}
diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh
index 432fe8469851..48584a51388f 100755
--- a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh
+++ b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh
@@ -84,8 +84,9 @@ h2_destroy()
router_rp1_200_create()
{
- ip link add name $rp1.200 up \
- link $rp1 addrgenmode eui64 type vlan id 200
+ ip link add name $rp1.200 link $rp1 type vlan id 200
+ ip link set dev $rp1.200 addrgenmode eui64
+ ip link set dev $rp1.200 up
ip address add dev $rp1.200 192.0.2.2/28
ip address add dev $rp1.200 2001:db8:1::2/64
ip stats set dev $rp1.200 l3_stats on
@@ -256,9 +257,11 @@ reapply_config()
router_rp1_200_destroy
- ip link add name $rp1.200 link $rp1 addrgenmode none type vlan id 200
+ ip link add name $rp1.200 link $rp1 type vlan id 200
+ ip link set dev $rp1.200 addrgenmode none
ip stats set dev $rp1.200 l3_stats on
- ip link set dev $rp1.200 up addrgenmode eui64
+ ip link set dev $rp1.200 addrgenmode eui64
+ ip link set dev $rp1.200 up
ip address add dev $rp1.200 192.0.2.2/28
ip address add dev $rp1.200 2001:db8:1::2/64
}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
index c5095da7f6bf..aec752a22e9e 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
@@ -93,12 +93,16 @@ cleanup()
test_gretap()
{
+ ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \
+ nud permanent dev br2
full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
}
test_ip6gretap()
{
+ ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \
+ nud permanent dev br2
full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
index 9ff22f28032d..0cf4c47a46f9 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
@@ -90,12 +90,16 @@ cleanup()
test_gretap()
{
+ ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \
+ nud permanent dev br1
full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
}
test_ip6gretap()
{
+ ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \
+ nud permanent dev br1
full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
}
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index 43a723626126..7b936a926859 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -9,7 +9,7 @@ TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq
-TEST_FILES := settings
+TEST_FILES := mptcp_lib.sh settings
EXTRA_CLEAN := *.pcap
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index 38021a0dd527..6032f9b23c4c 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -1,3 +1,4 @@
+CONFIG_KALLSYMS=y
CONFIG_MPTCP=y
CONFIG_IPV6=y
CONFIG_MPTCP_IPV6=y
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index ef628b16fe9b..fa9e09ad97d9 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -1,6 +1,8 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+. "$(dirname "${0}")/mptcp_lib.sh"
+
sec=$(date +%s)
rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
ns="ns1-$rndh"
@@ -31,6 +33,8 @@ cleanup()
ip netns del $ns
}
+mptcp_lib_check_mptcp
+
ip -Version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without ip tool"
@@ -51,16 +55,20 @@ __chk_nr()
{
local command="$1"
local expected=$2
- local msg nr
+ local msg="$3"
+ local skip="${4:-SKIP}"
+ local nr
- shift 2
- msg=$*
nr=$(eval $command)
printf "%-50s" "$msg"
if [ $nr != $expected ]; then
- echo "[ fail ] expected $expected found $nr"
- ret=$test_cnt
+ if [ $nr = "$skip" ] && ! mptcp_lib_expect_all_features; then
+ echo "[ skip ] Feature probably not supported"
+ else
+ echo "[ fail ] expected $expected found $nr"
+ ret=$test_cnt
+ fi
else
echo "[ ok ]"
fi
@@ -72,12 +80,12 @@ __chk_msk_nr()
local condition=$1
shift 1
- __chk_nr "ss -inmHMN $ns | $condition" $*
+ __chk_nr "ss -inmHMN $ns | $condition" "$@"
}
chk_msk_nr()
{
- __chk_msk_nr "grep -c token:" $*
+ __chk_msk_nr "grep -c token:" "$@"
}
wait_msk_nr()
@@ -115,37 +123,26 @@ wait_msk_nr()
chk_msk_fallback_nr()
{
- __chk_msk_nr "grep -c fallback" $*
+ __chk_msk_nr "grep -c fallback" "$@"
}
chk_msk_remote_key_nr()
{
- __chk_msk_nr "grep -c remote_key" $*
+ __chk_msk_nr "grep -c remote_key" "$@"
}
__chk_listen()
{
local filter="$1"
local expected=$2
+ local msg="$3"
- shift 2
- msg=$*
-
- nr=$(ss -N $ns -Ml "$filter" | grep -c LISTEN)
- printf "%-50s" "$msg"
-
- if [ $nr != $expected ]; then
- echo "[ fail ] expected $expected found $nr"
- ret=$test_cnt
- else
- echo "[ ok ]"
- fi
+ __chk_nr "ss -N $ns -Ml '$filter' | grep -c LISTEN" "$expected" "$msg" 0
}
chk_msk_listen()
{
lport=$1
- local msg="check for listen socket"
# destination port search should always return empty list
__chk_listen "dport $lport" 0 "listen match for dport $lport"
@@ -163,10 +160,9 @@ chk_msk_listen()
chk_msk_inuse()
{
local expected=$1
+ local msg="$2"
local listen_nr
- shift 1
-
listen_nr=$(ss -N "${ns}" -Ml | grep -c LISTEN)
expected=$((expected + listen_nr))
@@ -177,7 +173,7 @@ chk_msk_inuse()
sleep 0.1
done
- __chk_nr get_msk_inuse $expected $*
+ __chk_nr get_msk_inuse $expected "$msg" 0
}
# $1: ns, $2: port
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index a43d3e2f59bb..773dd770a567 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -1,6 +1,8 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+. "$(dirname "${0}")/mptcp_lib.sh"
+
time_start=$(date +%s)
optstring="S:R:d:e:l:r:h4cm:f:tC"
@@ -141,6 +143,9 @@ cleanup()
done
}
+mptcp_lib_check_mptcp
+mptcp_lib_check_kallsyms
+
ip -Version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without ip tool"
@@ -691,6 +696,15 @@ run_test_transparent()
return 0
fi
+ # IP(V6)_TRANSPARENT has been added after TOS support which came with
+ # the required infrastructure in MPTCP sockopt code. To support TOS, the
+ # following function has been exported (T). Not great but better than
+ # checking for a specific kernel version.
+ if ! mptcp_lib_kallsyms_has "T __ip_sock_set_tos$"; then
+ echo "INFO: ${msg} not supported by the kernel: SKIP"
+ return
+ fi
+
ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF"
flush ruleset
table inet mangle {
@@ -763,6 +777,11 @@ run_tests_peekmode()
run_tests_mptfo()
{
+ if ! mptcp_lib_kallsyms_has "mptcp_fastopen_"; then
+ echo "INFO: TFO not supported by the kernel: SKIP"
+ return
+ fi
+
echo "INFO: with MPTFO start"
ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=2
ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=1
@@ -783,6 +802,11 @@ run_tests_disconnect()
local old_cin=$cin
local old_sin=$sin
+ if ! mptcp_lib_kallsyms_has "mptcp_pm_data_reset$"; then
+ echo "INFO: Full disconnect not supported: SKIP"
+ return
+ fi
+
cat $cin $cin $cin > "$cin".disconnect
# force do_transfer to cope with the multiple tranmissions
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 26310c17b4c6..0ae8cafde439 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -10,6 +10,8 @@
# because it's invoked by variable name, see how the "tests" array is used
#shellcheck disable=SC2317
+. "$(dirname "${0}")/mptcp_lib.sh"
+
ret=0
sin=""
sinfail=""
@@ -17,11 +19,14 @@ sout=""
cin=""
cinfail=""
cinsent=""
+tmpfile=""
cout=""
capout=""
ns1=""
ns2=""
ksft_skip=4
+iptables="iptables"
+ip6tables="ip6tables"
timeout_poll=30
timeout_test=$((timeout_poll * 2 + 1))
capture=0
@@ -79,7 +84,7 @@ init_partial()
ip netns add $netns || exit $ksft_skip
ip -net $netns link set lo up
ip netns exec $netns sysctl -q net.mptcp.enabled=1
- ip netns exec $netns sysctl -q net.mptcp.pm_type=0
+ ip netns exec $netns sysctl -q net.mptcp.pm_type=0 2>/dev/null || true
ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
if [ $checksum -eq 1 ]; then
@@ -136,12 +141,19 @@ cleanup_partial()
check_tools()
{
+ mptcp_lib_check_mptcp
+ mptcp_lib_check_kallsyms
+
if ! ip -Version &> /dev/null; then
echo "SKIP: Could not run test without ip tool"
exit $ksft_skip
fi
- if ! iptables -V &> /dev/null; then
+ # Use the legacy version if available to support old kernel versions
+ if iptables-legacy -V &> /dev/null; then
+ iptables="iptables-legacy"
+ ip6tables="ip6tables-legacy"
+ elif ! iptables -V &> /dev/null; then
echo "SKIP: Could not run all tests without iptables tool"
exit $ksft_skip
fi
@@ -175,10 +187,37 @@ cleanup()
{
rm -f "$cin" "$cout" "$sinfail"
rm -f "$sin" "$sout" "$cinsent" "$cinfail"
+ rm -f "$tmpfile"
rm -rf $evts_ns1 $evts_ns2
cleanup_partial
}
+# $1: msg
+print_title()
+{
+ printf "%03u %-36s %s" "${TEST_COUNT}" "${TEST_NAME}" "${1}"
+}
+
+# [ $1: fail msg ]
+mark_as_skipped()
+{
+ local msg="${1:-"Feature not supported"}"
+
+ mptcp_lib_fail_if_expected_feature "${msg}"
+
+ print_title "[ skip ] ${msg}"
+ printf "\n"
+}
+
+# $@: condition
+continue_if()
+{
+ if ! "${@}"; then
+ mark_as_skipped
+ return 1
+ fi
+}
+
skip_test()
{
if [ "${#only_tests_ids[@]}" -eq 0 ] && [ "${#only_tests_names[@]}" -eq 0 ]; then
@@ -222,6 +261,19 @@ reset()
return 0
}
+# $1: test name ; $2: counter to check
+reset_check_counter()
+{
+ reset "${1}" || return 1
+
+ local counter="${2}"
+
+ if ! nstat -asz "${counter}" | grep -wq "${counter}"; then
+ mark_as_skipped "counter '${counter}' is not available"
+ return 1
+ fi
+}
+
# $1: test name
reset_with_cookies()
{
@@ -241,17 +293,21 @@ reset_with_add_addr_timeout()
reset "${1}" || return 1
- tables="iptables"
+ tables="${iptables}"
if [ $ip -eq 6 ]; then
- tables="ip6tables"
+ tables="${ip6tables}"
fi
ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1
- ip netns exec $ns2 $tables -A OUTPUT -p tcp \
- -m tcp --tcp-option 30 \
- -m bpf --bytecode \
- "$CBPF_MPTCP_SUBOPTION_ADD_ADDR" \
- -j DROP
+
+ if ! ip netns exec $ns2 $tables -A OUTPUT -p tcp \
+ -m tcp --tcp-option 30 \
+ -m bpf --bytecode \
+ "$CBPF_MPTCP_SUBOPTION_ADD_ADDR" \
+ -j DROP; then
+ mark_as_skipped "unable to set the 'add addr' rule"
+ return 1
+ fi
}
# $1: test name
@@ -295,22 +351,17 @@ reset_with_allow_join_id0()
# tc action pedit offset 162 out of bounds
#
# Netfilter is used to mark packets with enough data.
-reset_with_fail()
+setup_fail_rules()
{
- reset "${1}" || return 1
-
- ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=1
- ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=1
-
check_invert=1
validate_checksum=1
- local i="$2"
- local ip="${3:-4}"
+ local i="$1"
+ local ip="${2:-4}"
local tables
- tables="iptables"
+ tables="${iptables}"
if [ $ip -eq 6 ]; then
- tables="ip6tables"
+ tables="${ip6tables}"
fi
ip netns exec $ns2 $tables \
@@ -320,15 +371,32 @@ reset_with_fail()
-p tcp \
-m length --length 150:9999 \
-m statistic --mode nth --packet 1 --every 99999 \
- -j MARK --set-mark 42 || exit 1
+ -j MARK --set-mark 42 || return ${ksft_skip}
- tc -n $ns2 qdisc add dev ns2eth$i clsact || exit 1
+ tc -n $ns2 qdisc add dev ns2eth$i clsact || return ${ksft_skip}
tc -n $ns2 filter add dev ns2eth$i egress \
protocol ip prio 1000 \
handle 42 fw \
action pedit munge offset 148 u8 invert \
pipe csum tcp \
- index 100 || exit 1
+ index 100 || return ${ksft_skip}
+}
+
+reset_with_fail()
+{
+ reset_check_counter "${1}" "MPTcpExtInfiniteMapTx" || return 1
+ shift
+
+ ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=1
+ ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=1
+
+ local rc=0
+ setup_fail_rules "${@}" || rc=$?
+
+ if [ ${rc} -eq ${ksft_skip} ]; then
+ mark_as_skipped "unable to set the 'fail' rules"
+ return 1
+ fi
}
reset_with_events()
@@ -343,6 +411,25 @@ reset_with_events()
evts_ns2_pid=$!
}
+reset_with_tcp_filter()
+{
+ reset "${1}" || return 1
+ shift
+
+ local ns="${!1}"
+ local src="${2}"
+ local target="${3}"
+
+ if ! ip netns exec "${ns}" ${iptables} \
+ -A INPUT \
+ -s "${src}" \
+ -p tcp \
+ -j "${target}"; then
+ mark_as_skipped "unable to set the filter rules"
+ return 1
+ fi
+}
+
fail_test()
{
ret=1
@@ -383,9 +470,16 @@ check_transfer()
fail_test
return 1
fi
- bytes="--bytes=${bytes}"
+
+ # note: BusyBox's "cmp" command doesn't support --bytes
+ tmpfile=$(mktemp)
+ head --bytes="$bytes" "$in" > "$tmpfile"
+ mv "$tmpfile" "$in"
+ head --bytes="$bytes" "$out" > "$tmpfile"
+ mv "$tmpfile" "$out"
+ tmpfile=""
fi
- cmp -l "$in" "$out" ${bytes} | while read -r i a b; do
+ cmp -l "$in" "$out" | while read -r i a b; do
local sum=$((0${a} + 0${b}))
if [ $check_invert -eq 0 ] || [ $sum -ne $((0xff)) ]; then
echo "[ FAIL ] $what does not match (in, out):"
@@ -454,11 +548,25 @@ wait_local_port_listen()
done
}
-rm_addr_count()
+# $1: ns ; $2: counter
+get_counter()
{
- local ns=${1}
+ local ns="${1}"
+ local counter="${2}"
+ local count
- ip netns exec ${ns} nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'
+ count=$(ip netns exec ${ns} nstat -asz "${counter}" | awk 'NR==1 {next} {print $2}')
+ if [ -z "${count}" ]; then
+ mptcp_lib_fail_if_expected_feature "${counter} counter"
+ return 1
+ fi
+
+ echo "${count}"
+}
+
+rm_addr_count()
+{
+ get_counter "${1}" "MPTcpExtRmAddr"
}
# $1: ns, $2: old rm_addr counter in $ns
@@ -481,11 +589,11 @@ wait_mpj()
local ns="${1}"
local cnt old_cnt
- old_cnt=$(ip netns exec ${ns} nstat -as | grep MPJoinAckRx | awk '{print $2}')
+ old_cnt=$(get_counter ${ns} "MPTcpExtMPJoinAckRx")
local i
for i in $(seq 10); do
- cnt=$(ip netns exec ${ns} nstat -as | grep MPJoinAckRx | awk '{print $2}')
+ cnt=$(get_counter ${ns} "MPTcpExtMPJoinAckRx")
[ "$cnt" = "${old_cnt}" ] || break
sleep 0.1
done
@@ -685,15 +793,6 @@ pm_nl_check_endpoint()
fi
}
-filter_tcp_from()
-{
- local ns="${1}"
- local src="${2}"
- local target="${3}"
-
- ip netns exec "${ns}" iptables -A INPUT -s "${src}" -p tcp -j "${target}"
-}
-
do_transfer()
{
local listener_ns="$1"
@@ -849,7 +948,15 @@ do_transfer()
sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q')
ip netns exec ${listener_ns} ./pm_nl_ctl ann $addr token $tk id $id
sleep 1
+ sp=$(grep "type:10" "$evts_ns1" |
+ sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q')
+ da=$(grep "type:10" "$evts_ns1" |
+ sed -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q')
+ dp=$(grep "type:10" "$evts_ns1" |
+ sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q')
ip netns exec ${listener_ns} ./pm_nl_ctl rem token $tk id $id
+ ip netns exec ${listener_ns} ./pm_nl_ctl dsf lip "::ffff:$addr" \
+ lport $sp rip $da rport $dp token $tk
fi
counter=$((counter + 1))
@@ -915,6 +1022,7 @@ do_transfer()
sleep 1
sp=$(grep "type:10" "$evts_ns2" |
sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q')
+ ip netns exec ${connector_ns} ./pm_nl_ctl rem token $tk id $id
ip netns exec ${connector_ns} ./pm_nl_ctl dsf lip $addr lport $sp \
rip $da rport $dp token $tk
fi
@@ -1135,12 +1243,13 @@ chk_csum_nr()
fi
printf "%-${nr_blank}s %s" " " "sum"
- count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}')
- [ -z "$count" ] && count=0
+ count=$(get_counter ${ns1} "MPTcpExtDataCsumErr")
if [ "$count" != "$csum_ns1" ]; then
extra_msg="$extra_msg ns1=$count"
fi
- if { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } ||
+ if [ -z "$count" ]; then
+ echo -n "[skip]"
+ elif { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } ||
{ [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then
echo "[fail] got $count data checksum error[s] expected $csum_ns1"
fail_test
@@ -1149,12 +1258,13 @@ chk_csum_nr()
echo -n "[ ok ]"
fi
echo -n " - csum "
- count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}')
- [ -z "$count" ] && count=0
+ count=$(get_counter ${ns2} "MPTcpExtDataCsumErr")
if [ "$count" != "$csum_ns2" ]; then
extra_msg="$extra_msg ns2=$count"
fi
- if { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } ||
+ if [ -z "$count" ]; then
+ echo -n "[skip]"
+ elif { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } ||
{ [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then
echo "[fail] got $count data checksum error[s] expected $csum_ns2"
fail_test
@@ -1196,12 +1306,13 @@ chk_fail_nr()
fi
printf "%-${nr_blank}s %s" " " "ftx"
- count=$(ip netns exec $ns_tx nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}')
- [ -z "$count" ] && count=0
+ count=$(get_counter ${ns_tx} "MPTcpExtMPFailTx")
if [ "$count" != "$fail_tx" ]; then
extra_msg="$extra_msg,tx=$count"
fi
- if { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } ||
+ if [ -z "$count" ]; then
+ echo -n "[skip]"
+ elif { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } ||
{ [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then
echo "[fail] got $count MP_FAIL[s] TX expected $fail_tx"
fail_test
@@ -1211,12 +1322,13 @@ chk_fail_nr()
fi
echo -n " - failrx"
- count=$(ip netns exec $ns_rx nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}')
- [ -z "$count" ] && count=0
+ count=$(get_counter ${ns_rx} "MPTcpExtMPFailRx")
if [ "$count" != "$fail_rx" ]; then
extra_msg="$extra_msg,rx=$count"
fi
- if { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } ||
+ if [ -z "$count" ]; then
+ echo -n "[skip]"
+ elif { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } ||
{ [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then
echo "[fail] got $count MP_FAIL[s] RX expected $fail_rx"
fail_test
@@ -1248,10 +1360,11 @@ chk_fclose_nr()
fi
printf "%-${nr_blank}s %s" " " "ctx"
- count=$(ip netns exec $ns_tx nstat -as | grep MPTcpExtMPFastcloseTx | awk '{print $2}')
- [ -z "$count" ] && count=0
- [ "$count" != "$fclose_tx" ] && extra_msg="$extra_msg,tx=$count"
- if [ "$count" != "$fclose_tx" ]; then
+ count=$(get_counter ${ns_tx} "MPTcpExtMPFastcloseTx")
+ if [ -z "$count" ]; then
+ echo -n "[skip]"
+ elif [ "$count" != "$fclose_tx" ]; then
+ extra_msg="$extra_msg,tx=$count"
echo "[fail] got $count MP_FASTCLOSE[s] TX expected $fclose_tx"
fail_test
dump_stats=1
@@ -1260,10 +1373,11 @@ chk_fclose_nr()
fi
echo -n " - fclzrx"
- count=$(ip netns exec $ns_rx nstat -as | grep MPTcpExtMPFastcloseRx | awk '{print $2}')
- [ -z "$count" ] && count=0
- [ "$count" != "$fclose_rx" ] && extra_msg="$extra_msg,rx=$count"
- if [ "$count" != "$fclose_rx" ]; then
+ count=$(get_counter ${ns_rx} "MPTcpExtMPFastcloseRx")
+ if [ -z "$count" ]; then
+ echo -n "[skip]"
+ elif [ "$count" != "$fclose_rx" ]; then
+ extra_msg="$extra_msg,rx=$count"
echo "[fail] got $count MP_FASTCLOSE[s] RX expected $fclose_rx"
fail_test
dump_stats=1
@@ -1294,9 +1408,10 @@ chk_rst_nr()
fi
printf "%-${nr_blank}s %s" " " "rtx"
- count=$(ip netns exec $ns_tx nstat -as | grep MPTcpExtMPRstTx | awk '{print $2}')
- [ -z "$count" ] && count=0
- if [ $count -lt $rst_tx ]; then
+ count=$(get_counter ${ns_tx} "MPTcpExtMPRstTx")
+ if [ -z "$count" ]; then
+ echo -n "[skip]"
+ elif [ $count -lt $rst_tx ]; then
echo "[fail] got $count MP_RST[s] TX expected $rst_tx"
fail_test
dump_stats=1
@@ -1305,9 +1420,10 @@ chk_rst_nr()
fi
echo -n " - rstrx "
- count=$(ip netns exec $ns_rx nstat -as | grep MPTcpExtMPRstRx | awk '{print $2}')
- [ -z "$count" ] && count=0
- if [ "$count" -lt "$rst_rx" ]; then
+ count=$(get_counter ${ns_rx} "MPTcpExtMPRstRx")
+ if [ -z "$count" ]; then
+ echo -n "[skip]"
+ elif [ "$count" -lt "$rst_rx" ]; then
echo "[fail] got $count MP_RST[s] RX expected $rst_rx"
fail_test
dump_stats=1
@@ -1328,9 +1444,10 @@ chk_infi_nr()
local dump_stats
printf "%-${nr_blank}s %s" " " "itx"
- count=$(ip netns exec $ns2 nstat -as | grep InfiniteMapTx | awk '{print $2}')
- [ -z "$count" ] && count=0
- if [ "$count" != "$infi_tx" ]; then
+ count=$(get_counter ${ns2} "MPTcpExtInfiniteMapTx")
+ if [ -z "$count" ]; then
+ echo -n "[skip]"
+ elif [ "$count" != "$infi_tx" ]; then
echo "[fail] got $count infinite map[s] TX expected $infi_tx"
fail_test
dump_stats=1
@@ -1339,9 +1456,10 @@ chk_infi_nr()
fi
echo -n " - infirx"
- count=$(ip netns exec $ns1 nstat -as | grep InfiniteMapRx | awk '{print $2}')
- [ -z "$count" ] && count=0
- if [ "$count" != "$infi_rx" ]; then
+ count=$(get_counter ${ns1} "MPTcpExtInfiniteMapRx")
+ if [ -z "$count" ]; then
+ echo "[skip]"
+ elif [ "$count" != "$infi_rx" ]; then
echo "[fail] got $count infinite map[s] RX expected $infi_rx"
fail_test
dump_stats=1
@@ -1373,9 +1491,10 @@ chk_join_nr()
fi
printf "%03u %-36s %s" "${TEST_COUNT}" "${title}" "syn"
- count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}')
- [ -z "$count" ] && count=0
- if [ "$count" != "$syn_nr" ]; then
+ count=$(get_counter ${ns1} "MPTcpExtMPJoinSynRx")
+ if [ -z "$count" ]; then
+ echo -n "[skip]"
+ elif [ "$count" != "$syn_nr" ]; then
echo "[fail] got $count JOIN[s] syn expected $syn_nr"
fail_test
dump_stats=1
@@ -1385,9 +1504,10 @@ chk_join_nr()
echo -n " - synack"
with_cookie=$(ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies)
- count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinSynAckRx | awk '{print $2}')
- [ -z "$count" ] && count=0
- if [ "$count" != "$syn_ack_nr" ]; then
+ count=$(get_counter ${ns2} "MPTcpExtMPJoinSynAckRx")
+ if [ -z "$count" ]; then
+ echo -n "[skip]"
+ elif [ "$count" != "$syn_ack_nr" ]; then
# simult connections exceeding the limit with cookie enabled could go up to
# synack validation as the conn limit can be enforced reliably only after
# the subflow creation
@@ -1403,9 +1523,10 @@ chk_join_nr()
fi
echo -n " - ack"
- count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinAckRx | awk '{print $2}')
- [ -z "$count" ] && count=0
- if [ "$count" != "$ack_nr" ]; then
+ count=$(get_counter ${ns1} "MPTcpExtMPJoinAckRx")
+ if [ -z "$count" ]; then
+ echo "[skip]"
+ elif [ "$count" != "$ack_nr" ]; then
echo "[fail] got $count JOIN[s] ack expected $ack_nr"
fail_test
dump_stats=1
@@ -1437,12 +1558,12 @@ chk_stale_nr()
local recover_nr
printf "%-${nr_blank}s %-18s" " " "stale"
- stale_nr=$(ip netns exec $ns nstat -as | grep MPTcpExtSubflowStale | awk '{print $2}')
- [ -z "$stale_nr" ] && stale_nr=0
- recover_nr=$(ip netns exec $ns nstat -as | grep MPTcpExtSubflowRecover | awk '{print $2}')
- [ -z "$recover_nr" ] && recover_nr=0
- if [ $stale_nr -lt $stale_min ] ||
+ stale_nr=$(get_counter ${ns} "MPTcpExtSubflowStale")
+ recover_nr=$(get_counter ${ns} "MPTcpExtSubflowRecover")
+ if [ -z "$stale_nr" ] || [ -z "$recover_nr" ]; then
+ echo "[skip]"
+ elif [ $stale_nr -lt $stale_min ] ||
{ [ $stale_max -gt 0 ] && [ $stale_nr -gt $stale_max ]; } ||
[ $((stale_nr - recover_nr)) -ne $stale_delta ]; then
echo "[fail] got $stale_nr stale[s] $recover_nr recover[s], " \
@@ -1478,12 +1599,12 @@ chk_add_nr()
timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
printf "%-${nr_blank}s %s" " " "add"
- count=$(ip netns exec $ns2 nstat -as MPTcpExtAddAddr | grep MPTcpExtAddAddr | awk '{print $2}')
- [ -z "$count" ] && count=0
-
+ count=$(get_counter ${ns2} "MPTcpExtAddAddr")
+ if [ -z "$count" ]; then
+ echo -n "[skip]"
# if the test configured a short timeout tolerate greater then expected
# add addrs options, due to retransmissions
- if [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then
+ elif [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then
echo "[fail] got $count ADD_ADDR[s] expected $add_nr"
fail_test
dump_stats=1
@@ -1492,9 +1613,10 @@ chk_add_nr()
fi
echo -n " - echo "
- count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtEchoAdd | awk '{print $2}')
- [ -z "$count" ] && count=0
- if [ "$count" != "$echo_nr" ]; then
+ count=$(get_counter ${ns1} "MPTcpExtEchoAdd")
+ if [ -z "$count" ]; then
+ echo -n "[skip]"
+ elif [ "$count" != "$echo_nr" ]; then
echo "[fail] got $count ADD_ADDR echo[s] expected $echo_nr"
fail_test
dump_stats=1
@@ -1504,9 +1626,10 @@ chk_add_nr()
if [ $port_nr -gt 0 ]; then
echo -n " - pt "
- count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtPortAdd | awk '{print $2}')
- [ -z "$count" ] && count=0
- if [ "$count" != "$port_nr" ]; then
+ count=$(get_counter ${ns2} "MPTcpExtPortAdd")
+ if [ -z "$count" ]; then
+ echo "[skip]"
+ elif [ "$count" != "$port_nr" ]; then
echo "[fail] got $count ADD_ADDR[s] with a port-number expected $port_nr"
fail_test
dump_stats=1
@@ -1515,10 +1638,10 @@ chk_add_nr()
fi
printf "%-${nr_blank}s %s" " " "syn"
- count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinPortSynRx |
- awk '{print $2}')
- [ -z "$count" ] && count=0
- if [ "$count" != "$syn_nr" ]; then
+ count=$(get_counter ${ns1} "MPTcpExtMPJoinPortSynRx")
+ if [ -z "$count" ]; then
+ echo -n "[skip]"
+ elif [ "$count" != "$syn_nr" ]; then
echo "[fail] got $count JOIN[s] syn with a different \
port-number expected $syn_nr"
fail_test
@@ -1528,10 +1651,10 @@ chk_add_nr()
fi
echo -n " - synack"
- count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinPortSynAckRx |
- awk '{print $2}')
- [ -z "$count" ] && count=0
- if [ "$count" != "$syn_ack_nr" ]; then
+ count=$(get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx")
+ if [ -z "$count" ]; then
+ echo -n "[skip]"
+ elif [ "$count" != "$syn_ack_nr" ]; then
echo "[fail] got $count JOIN[s] synack with a different \
port-number expected $syn_ack_nr"
fail_test
@@ -1541,10 +1664,10 @@ chk_add_nr()
fi
echo -n " - ack"
- count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinPortAckRx |
- awk '{print $2}')
- [ -z "$count" ] && count=0
- if [ "$count" != "$ack_nr" ]; then
+ count=$(get_counter ${ns1} "MPTcpExtMPJoinPortAckRx")
+ if [ -z "$count" ]; then
+ echo "[skip]"
+ elif [ "$count" != "$ack_nr" ]; then
echo "[fail] got $count JOIN[s] ack with a different \
port-number expected $ack_nr"
fail_test
@@ -1554,10 +1677,10 @@ chk_add_nr()
fi
printf "%-${nr_blank}s %s" " " "syn"
- count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMismatchPortSynRx |
- awk '{print $2}')
- [ -z "$count" ] && count=0
- if [ "$count" != "$mis_syn_nr" ]; then
+ count=$(get_counter ${ns1} "MPTcpExtMismatchPortSynRx")
+ if [ -z "$count" ]; then
+ echo -n "[skip]"
+ elif [ "$count" != "$mis_syn_nr" ]; then
echo "[fail] got $count JOIN[s] syn with a mismatched \
port-number expected $mis_syn_nr"
fail_test
@@ -1567,10 +1690,10 @@ chk_add_nr()
fi
echo -n " - ack "
- count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMismatchPortAckRx |
- awk '{print $2}')
- [ -z "$count" ] && count=0
- if [ "$count" != "$mis_ack_nr" ]; then
+ count=$(get_counter ${ns1} "MPTcpExtMismatchPortAckRx")
+ if [ -z "$count" ]; then
+ echo "[skip]"
+ elif [ "$count" != "$mis_ack_nr" ]; then
echo "[fail] got $count JOIN[s] ack with a mismatched \
port-number expected $mis_ack_nr"
fail_test
@@ -1614,9 +1737,10 @@ chk_rm_nr()
fi
printf "%-${nr_blank}s %s" " " "rm "
- count=$(ip netns exec $addr_ns nstat -as | grep MPTcpExtRmAddr | awk '{print $2}')
- [ -z "$count" ] && count=0
- if [ "$count" != "$rm_addr_nr" ]; then
+ count=$(get_counter ${addr_ns} "MPTcpExtRmAddr")
+ if [ -z "$count" ]; then
+ echo -n "[skip]"
+ elif [ "$count" != "$rm_addr_nr" ]; then
echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr"
fail_test
dump_stats=1
@@ -1625,29 +1749,27 @@ chk_rm_nr()
fi
echo -n " - rmsf "
- count=$(ip netns exec $subflow_ns nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}')
- [ -z "$count" ] && count=0
- if [ -n "$simult" ]; then
+ count=$(get_counter ${subflow_ns} "MPTcpExtRmSubflow")
+ if [ -z "$count" ]; then
+ echo -n "[skip]"
+ elif [ -n "$simult" ]; then
local cnt suffix
- cnt=$(ip netns exec $addr_ns nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}')
+ cnt=$(get_counter ${addr_ns} "MPTcpExtRmSubflow")
# in case of simult flush, the subflow removal count on each side is
# unreliable
- [ -z "$cnt" ] && cnt=0
count=$((count + cnt))
[ "$count" != "$rm_subflow_nr" ] && suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]"
if [ $count -ge "$rm_subflow_nr" ] && \
[ "$count" -le "$((rm_subflow_nr *2 ))" ]; then
- echo "[ ok ] $suffix"
+ echo -n "[ ok ] $suffix"
else
echo "[fail] got $count RM_SUBFLOW[s] expected in range [$rm_subflow_nr:$((rm_subflow_nr*2))]"
fail_test
dump_stats=1
fi
- return
- fi
- if [ "$count" != "$rm_subflow_nr" ]; then
+ elif [ "$count" != "$rm_subflow_nr" ]; then
echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr"
fail_test
dump_stats=1
@@ -1668,9 +1790,10 @@ chk_prio_nr()
local dump_stats
printf "%-${nr_blank}s %s" " " "ptx"
- count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}')
- [ -z "$count" ] && count=0
- if [ "$count" != "$mp_prio_nr_tx" ]; then
+ count=$(get_counter ${ns1} "MPTcpExtMPPrioTx")
+ if [ -z "$count" ]; then
+ echo -n "[skip]"
+ elif [ "$count" != "$mp_prio_nr_tx" ]; then
echo "[fail] got $count MP_PRIO[s] TX expected $mp_prio_nr_tx"
fail_test
dump_stats=1
@@ -1679,9 +1802,10 @@ chk_prio_nr()
fi
echo -n " - prx "
- count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}')
- [ -z "$count" ] && count=0
- if [ "$count" != "$mp_prio_nr_rx" ]; then
+ count=$(get_counter ${ns1} "MPTcpExtMPPrioRx")
+ if [ -z "$count" ]; then
+ echo "[skip]"
+ elif [ "$count" != "$mp_prio_nr_rx" ]; then
echo "[fail] got $count MP_PRIO[s] RX expected $mp_prio_nr_rx"
fail_test
dump_stats=1
@@ -1797,7 +1921,7 @@ wait_attempt_fail()
while [ $time -lt $timeout_ms ]; do
local cnt
- cnt=$(ip netns exec $ns nstat -as TcpAttemptFails | grep TcpAttemptFails | awk '{print $2}')
+ cnt=$(get_counter ${ns} "TcpAttemptFails")
[ "$cnt" = 1 ] && return 1
time=$((time + 100))
@@ -1890,23 +2014,23 @@ subflows_error_tests()
fi
# multiple subflows, with subflow creation error
- if reset "multi subflows, with failing subflow"; then
+ if reset_with_tcp_filter "multi subflows, with failing subflow" ns1 10.0.3.2 REJECT &&
+ continue_if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 0 2
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
- filter_tcp_from $ns1 10.0.3.2 REJECT
run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
chk_join_nr 1 1 1
fi
# multiple subflows, with subflow timeout on MPJ
- if reset "multi subflows, with subflow timeout"; then
+ if reset_with_tcp_filter "multi subflows, with subflow timeout" ns1 10.0.3.2 DROP &&
+ continue_if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 0 2
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
- filter_tcp_from $ns1 10.0.3.2 DROP
run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
chk_join_nr 1 1 1
fi
@@ -1914,11 +2038,11 @@ subflows_error_tests()
# multiple subflows, check that the endpoint corresponding to
# closed subflow (due to reset) is not reused if additional
# subflows are added later
- if reset "multi subflows, fair usage on close"; then
+ if reset_with_tcp_filter "multi subflows, fair usage on close" ns1 10.0.3.2 REJECT &&
+ continue_if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- filter_tcp_from $ns1 10.0.3.2 REJECT
run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow &
# mpj subflow will be in TW after the reset
@@ -2018,11 +2142,18 @@ signal_address_tests()
# the peer could possibly miss some addr notification, allow retransmission
ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1
run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
- chk_join_nr 3 3 3
- # the server will not signal the address terminating
- # the MPC subflow
- chk_add_nr 3 3
+ # It is not directly linked to the commit introducing this
+ # symbol but for the parent one which is linked anyway.
+ if ! mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then
+ chk_join_nr 3 3 2
+ chk_add_nr 4 4
+ else
+ chk_join_nr 3 3 3
+ # the server will not signal the address terminating
+ # the MPC subflow
+ chk_add_nr 3 3
+ fi
fi
}
@@ -2263,7 +2394,12 @@ remove_tests()
pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
chk_join_nr 3 3 3
- chk_rm_nr 0 3 simult
+
+ if mptcp_lib_kversion_ge 5.18; then
+ chk_rm_nr 0 3 simult
+ else
+ chk_rm_nr 3 3
+ fi
fi
# addresses flush
@@ -2501,7 +2637,8 @@ v4mapped_tests()
mixed_tests()
{
- if reset "IPv4 sockets do not use IPv6 addresses"; then
+ if reset "IPv4 sockets do not use IPv6 addresses" &&
+ continue_if mptcp_lib_kversion_ge 6.3; then
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
@@ -2510,7 +2647,8 @@ mixed_tests()
fi
# Need an IPv6 mptcp socket to allow subflows of both families
- if reset "simult IPv4 and IPv6 subflows"; then
+ if reset "simult IPv4 and IPv6 subflows" &&
+ continue_if mptcp_lib_kversion_ge 6.3; then
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
@@ -2519,7 +2657,8 @@ mixed_tests()
fi
# cross families subflows will not be created even in fullmesh mode
- if reset "simult IPv4 and IPv6 subflows, fullmesh 1x1"; then
+ if reset "simult IPv4 and IPv6 subflows, fullmesh 1x1" &&
+ continue_if mptcp_lib_kversion_ge 6.3; then
pm_nl_set_limits $ns1 0 4
pm_nl_set_limits $ns2 1 4
pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow,fullmesh
@@ -2530,7 +2669,8 @@ mixed_tests()
# fullmesh still tries to create all the possibly subflows with
# matching family
- if reset "simult IPv4 and IPv6 subflows, fullmesh 2x2"; then
+ if reset "simult IPv4 and IPv6 subflows, fullmesh 2x2" &&
+ continue_if mptcp_lib_kversion_ge 6.3; then
pm_nl_set_limits $ns1 0 4
pm_nl_set_limits $ns2 2 4
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
@@ -2543,7 +2683,8 @@ mixed_tests()
backup_tests()
{
# single subflow, backup
- if reset "single subflow, backup"; then
+ if reset "single subflow, backup" &&
+ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
@@ -2553,7 +2694,8 @@ backup_tests()
fi
# single address, backup
- if reset "single address, backup"; then
+ if reset "single address, backup" &&
+ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 0 1
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
pm_nl_set_limits $ns2 1 1
@@ -2564,7 +2706,8 @@ backup_tests()
fi
# single address with port, backup
- if reset "single address with port, backup"; then
+ if reset "single address with port, backup" &&
+ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 0 1
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
pm_nl_set_limits $ns2 1 1
@@ -2574,14 +2717,16 @@ backup_tests()
chk_prio_nr 1 1
fi
- if reset "mpc backup"; then
+ if reset "mpc backup" &&
+ continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then
pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup
run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
chk_join_nr 0 0 0
chk_prio_nr 0 1
fi
- if reset "mpc backup both sides"; then
+ if reset "mpc backup both sides" &&
+ continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then
pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup
pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup
run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
@@ -2589,14 +2734,16 @@ backup_tests()
chk_prio_nr 1 1
fi
- if reset "mpc switch to backup"; then
+ if reset "mpc switch to backup" &&
+ continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then
pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
chk_join_nr 0 0 0
chk_prio_nr 0 1
fi
- if reset "mpc switch to backup both sides"; then
+ if reset "mpc switch to backup both sides" &&
+ continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then
pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow
pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
@@ -2622,38 +2769,41 @@ verify_listener_events()
local family
local saddr
local sport
+ local name
if [ $e_type = $LISTENER_CREATED ]; then
- stdbuf -o0 -e0 printf "\t\t\t\t\t CREATE_LISTENER %s:%s"\
- $e_saddr $e_sport
+ name="LISTENER_CREATED"
elif [ $e_type = $LISTENER_CLOSED ]; then
- stdbuf -o0 -e0 printf "\t\t\t\t\t CLOSE_LISTENER %s:%s "\
- $e_saddr $e_sport
+ name="LISTENER_CLOSED"
+ else
+ name="$e_type"
fi
- type=$(grep "type:$e_type," $evt |
- sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q')
- family=$(grep "type:$e_type," $evt |
- sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q')
- sport=$(grep "type:$e_type," $evt |
- sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q')
+ printf "%-${nr_blank}s %s %s:%s " " " "$name" "$e_saddr" "$e_sport"
+
+ if ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then
+ printf "[skip]: event not supported\n"
+ return
+ fi
+
+ type=$(grep "type:$e_type," $evt | sed -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q')
+ family=$(grep "type:$e_type," $evt | sed -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q')
+ sport=$(grep "type:$e_type," $evt | sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q')
if [ $family ] && [ $family = $AF_INET6 ]; then
- saddr=$(grep "type:$e_type," $evt |
- sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q')
+ saddr=$(grep "type:$e_type," $evt | sed -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q')
else
- saddr=$(grep "type:$e_type," $evt |
- sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q')
+ saddr=$(grep "type:$e_type," $evt | sed -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q')
fi
if [ $type ] && [ $type = $e_type ] &&
[ $family ] && [ $family = $e_family ] &&
[ $saddr ] && [ $saddr = $e_saddr ] &&
[ $sport ] && [ $sport = $e_sport ]; then
- stdbuf -o0 -e0 printf "[ ok ]\n"
+ echo "[ ok ]"
return 0
fi
fail_test
- stdbuf -o0 -e0 printf "[fail]\n"
+ echo "[fail]"
}
add_addr_ports_tests()
@@ -2959,7 +3109,8 @@ fullmesh_tests()
fi
# set fullmesh flag
- if reset "set fullmesh flag test"; then
+ if reset "set fullmesh flag test" &&
+ continue_if mptcp_lib_kversion_ge 5.18; then
pm_nl_set_limits $ns1 4 4
pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow
pm_nl_set_limits $ns2 4 4
@@ -2969,7 +3120,8 @@ fullmesh_tests()
fi
# set nofullmesh flag
- if reset "set nofullmesh flag test"; then
+ if reset "set nofullmesh flag test" &&
+ continue_if mptcp_lib_kversion_ge 5.18; then
pm_nl_set_limits $ns1 4 4
pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow,fullmesh
pm_nl_set_limits $ns2 4 4
@@ -2979,7 +3131,8 @@ fullmesh_tests()
fi
# set backup,fullmesh flags
- if reset "set backup,fullmesh flags test"; then
+ if reset "set backup,fullmesh flags test" &&
+ continue_if mptcp_lib_kversion_ge 5.18; then
pm_nl_set_limits $ns1 4 4
pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow
pm_nl_set_limits $ns2 4 4
@@ -2990,7 +3143,8 @@ fullmesh_tests()
fi
# set nobackup,nofullmesh flags
- if reset "set nobackup,nofullmesh flags test"; then
+ if reset "set nobackup,nofullmesh flags test" &&
+ continue_if mptcp_lib_kversion_ge 5.18; then
pm_nl_set_limits $ns1 4 4
pm_nl_set_limits $ns2 4 4
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup,fullmesh
@@ -3003,14 +3157,14 @@ fullmesh_tests()
fastclose_tests()
{
- if reset "fastclose test"; then
+ if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then
run_tests $ns1 $ns2 10.0.1.1 1024 0 fastclose_client
chk_join_nr 0 0 0
chk_fclose_nr 1 1
chk_rst_nr 1 1 invert
fi
- if reset "fastclose server test"; then
+ if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then
run_tests $ns1 $ns2 10.0.1.1 1024 0 fastclose_server
chk_join_nr 0 0 0
chk_fclose_nr 1 1 invert
@@ -3048,7 +3202,8 @@ fail_tests()
userspace_tests()
{
# userspace pm type prevents add_addr
- if reset "userspace pm type prevents add_addr"; then
+ if reset "userspace pm type prevents add_addr" &&
+ continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns1
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 0 2
@@ -3059,7 +3214,8 @@ userspace_tests()
fi
# userspace pm type does not echo add_addr without daemon
- if reset "userspace pm no echo w/o daemon"; then
+ if reset "userspace pm no echo w/o daemon" &&
+ continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns2
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 0 2
@@ -3070,7 +3226,8 @@ userspace_tests()
fi
# userspace pm type rejects join
- if reset "userspace pm type rejects join"; then
+ if reset "userspace pm type rejects join" &&
+ continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns1
pm_nl_set_limits $ns1 1 1
pm_nl_set_limits $ns2 1 1
@@ -3080,7 +3237,8 @@ userspace_tests()
fi
# userspace pm type does not send join
- if reset "userspace pm type does not send join"; then
+ if reset "userspace pm type does not send join" &&
+ continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns2
pm_nl_set_limits $ns1 1 1
pm_nl_set_limits $ns2 1 1
@@ -3090,7 +3248,8 @@ userspace_tests()
fi
# userspace pm type prevents mp_prio
- if reset "userspace pm type prevents mp_prio"; then
+ if reset "userspace pm type prevents mp_prio" &&
+ continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns1
pm_nl_set_limits $ns1 1 1
pm_nl_set_limits $ns2 1 1
@@ -3101,7 +3260,8 @@ userspace_tests()
fi
# userspace pm type prevents rm_addr
- if reset "userspace pm type prevents rm_addr"; then
+ if reset "userspace pm type prevents rm_addr" &&
+ continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns1
set_userspace_pm $ns2
pm_nl_set_limits $ns1 0 1
@@ -3113,7 +3273,8 @@ userspace_tests()
fi
# userspace pm add & remove address
- if reset_with_events "userspace pm add & remove address"; then
+ if reset_with_events "userspace pm add & remove address" &&
+ continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns1
pm_nl_set_limits $ns2 1 1
run_tests $ns1 $ns2 10.0.1.1 0 userspace_1 0 slow
@@ -3124,20 +3285,23 @@ userspace_tests()
fi
# userspace pm create destroy subflow
- if reset_with_events "userspace pm create destroy subflow"; then
+ if reset_with_events "userspace pm create destroy subflow" &&
+ continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns2
pm_nl_set_limits $ns1 0 1
run_tests $ns1 $ns2 10.0.1.1 0 0 userspace_1 slow
chk_join_nr 1 1 1
- chk_rm_nr 0 1
+ chk_rm_nr 1 1
kill_events_pids
fi
}
endpoint_tests()
{
+ # subflow_rebuild_header is needed to support the implicit flag
# userspace pm type prevents add_addr
- if reset "implicit EP"; then
+ if reset "implicit EP" &&
+ mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 2 2
pm_nl_set_limits $ns2 2 2
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
@@ -3157,7 +3321,8 @@ endpoint_tests()
kill_tests_wait
fi
- if reset "delete and re-add"; then
+ if reset "delete and re-add" &&
+ mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 1 1
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
new file mode 100644
index 000000000000..f32045b23b89
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -0,0 +1,104 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly KSFT_FAIL=1
+readonly KSFT_SKIP=4
+
+# SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var can be set when validating all
+# features using the last version of the kernel and the selftests to make sure
+# a test is not being skipped by mistake.
+mptcp_lib_expect_all_features() {
+ [ "${SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES:-}" = "1" ]
+}
+
+# $1: msg
+mptcp_lib_fail_if_expected_feature() {
+ if mptcp_lib_expect_all_features; then
+ echo "ERROR: missing feature: ${*}"
+ exit ${KSFT_FAIL}
+ fi
+
+ return 1
+}
+
+# $1: file
+mptcp_lib_has_file() {
+ local f="${1}"
+
+ if [ -f "${f}" ]; then
+ return 0
+ fi
+
+ mptcp_lib_fail_if_expected_feature "${f} file not found"
+}
+
+mptcp_lib_check_mptcp() {
+ if ! mptcp_lib_has_file "/proc/sys/net/mptcp/enabled"; then
+ echo "SKIP: MPTCP support is not available"
+ exit ${KSFT_SKIP}
+ fi
+}
+
+mptcp_lib_check_kallsyms() {
+ if ! mptcp_lib_has_file "/proc/kallsyms"; then
+ echo "SKIP: CONFIG_KALLSYMS is missing"
+ exit ${KSFT_SKIP}
+ fi
+}
+
+# Internal: use mptcp_lib_kallsyms_has() instead
+__mptcp_lib_kallsyms_has() {
+ local sym="${1}"
+
+ mptcp_lib_check_kallsyms
+
+ grep -q " ${sym}" /proc/kallsyms
+}
+
+# $1: part of a symbol to look at, add '$' at the end for full name
+mptcp_lib_kallsyms_has() {
+ local sym="${1}"
+
+ if __mptcp_lib_kallsyms_has "${sym}"; then
+ return 0
+ fi
+
+ mptcp_lib_fail_if_expected_feature "${sym} symbol not found"
+}
+
+# $1: part of a symbol to look at, add '$' at the end for full name
+mptcp_lib_kallsyms_doesnt_have() {
+ local sym="${1}"
+
+ if ! __mptcp_lib_kallsyms_has "${sym}"; then
+ return 0
+ fi
+
+ mptcp_lib_fail_if_expected_feature "${sym} symbol has been found"
+}
+
+# !!!AVOID USING THIS!!!
+# Features might not land in the expected version and features can be backported
+#
+# $1: kernel version, e.g. 6.3
+mptcp_lib_kversion_ge() {
+ local exp_maj="${1%.*}"
+ local exp_min="${1#*.}"
+ local v maj min
+
+ # If the kernel has backported features, set this env var to 1:
+ if [ "${SELFTESTS_MPTCP_LIB_NO_KVERSION_CHECK:-}" = "1" ]; then
+ return 0
+ fi
+
+ v=$(uname -r | cut -d'.' -f1,2)
+ maj=${v%.*}
+ min=${v#*.}
+
+ if [ "${maj}" -gt "${exp_maj}" ] ||
+ { [ "${maj}" -eq "${exp_maj}" ] && [ "${min}" -ge "${exp_min}" ]; }; then
+ return 0
+ fi
+
+ mptcp_lib_fail_if_expected_feature "kernel version ${1} lower than ${v}"
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
index ae61f39556ca..b35148edbf02 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
@@ -87,6 +87,10 @@ struct so_state {
uint64_t tcpi_rcv_delta;
};
+#ifndef MIN
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
static void die_perror(const char *msg)
{
perror(msg);
@@ -349,13 +353,14 @@ static void do_getsockopt_tcp_info(struct so_state *s, int fd, size_t r, size_t
xerror("getsockopt MPTCP_TCPINFO (tries %d, %m)");
assert(olen <= sizeof(ti));
- assert(ti.d.size_user == ti.d.size_kernel);
- assert(ti.d.size_user == sizeof(struct tcp_info));
+ assert(ti.d.size_kernel > 0);
+ assert(ti.d.size_user ==
+ MIN(ti.d.size_kernel, sizeof(struct tcp_info)));
assert(ti.d.num_subflows == 1);
assert(olen > (socklen_t)sizeof(struct mptcp_subflow_data));
olen -= sizeof(struct mptcp_subflow_data);
- assert(olen == sizeof(struct tcp_info));
+ assert(olen == ti.d.size_user);
if (ti.ti[0].tcpi_bytes_sent == w &&
ti.ti[0].tcpi_bytes_received == r)
@@ -401,13 +406,14 @@ static void do_getsockopt_subflow_addrs(int fd)
die_perror("getsockopt MPTCP_SUBFLOW_ADDRS");
assert(olen <= sizeof(addrs));
- assert(addrs.d.size_user == addrs.d.size_kernel);
- assert(addrs.d.size_user == sizeof(struct mptcp_subflow_addrs));
+ assert(addrs.d.size_kernel > 0);
+ assert(addrs.d.size_user ==
+ MIN(addrs.d.size_kernel, sizeof(struct mptcp_subflow_addrs)));
assert(addrs.d.num_subflows == 1);
assert(olen > (socklen_t)sizeof(struct mptcp_subflow_data));
olen -= sizeof(struct mptcp_subflow_data);
- assert(olen == sizeof(struct mptcp_subflow_addrs));
+ assert(olen == addrs.d.size_user);
llen = sizeof(local);
ret = getsockname(fd, (struct sockaddr *)&local, &llen);
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index 1b70c0a304ce..f295a371ff14 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -1,6 +1,8 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+. "$(dirname "${0}")/mptcp_lib.sh"
+
ret=0
sin=""
sout=""
@@ -84,6 +86,9 @@ cleanup()
rm -f "$sin" "$sout"
}
+mptcp_lib_check_mptcp
+mptcp_lib_check_kallsyms
+
ip -Version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without ip tool"
@@ -182,9 +187,14 @@ do_transfer()
local_addr="0.0.0.0"
fi
+ cmsg="TIMESTAMPNS"
+ if mptcp_lib_kallsyms_has "mptcp_ioctl$"; then
+ cmsg+=",TCPINQ"
+ fi
+
timeout ${timeout_test} \
ip netns exec ${listener_ns} \
- $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c TIMESTAMPNS,TCPINQ \
+ $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \
${local_addr} < "$sin" > "$sout" &
local spid=$!
@@ -192,7 +202,7 @@ do_transfer()
timeout ${timeout_test} \
ip netns exec ${connector_ns} \
- $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c TIMESTAMPNS,TCPINQ \
+ $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \
$connect_addr < "$cin" > "$cout" &
local cpid=$!
@@ -249,6 +259,11 @@ do_mptcp_sockopt_tests()
{
local lret=0
+ if ! mptcp_lib_kallsyms_has "mptcp_diag_fill_info$"; then
+ echo "INFO: MPTCP sockopt not supported: SKIP"
+ return
+ fi
+
ip netns exec "$ns_sbox" ./mptcp_sockopt
lret=$?
@@ -303,6 +318,11 @@ do_tcpinq_tests()
{
local lret=0
+ if ! mptcp_lib_kallsyms_has "mptcp_ioctl$"; then
+ echo "INFO: TCP_INQ not supported: SKIP"
+ return
+ fi
+
local args
for args in "-t tcp" "-r tcp"; do
do_tcpinq_test $args
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index 89839d1ff9d8..d02e0d63a8f9 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -1,6 +1,8 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+. "$(dirname "${0}")/mptcp_lib.sh"
+
ksft_skip=4
ret=0
@@ -34,6 +36,8 @@ cleanup()
ip netns del $ns1
}
+mptcp_lib_check_mptcp
+
ip -Version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without ip tool"
@@ -69,8 +73,12 @@ check()
}
check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "defaults addr list"
-check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
+
+default_limits="$(ip netns exec $ns1 ./pm_nl_ctl limits)"
+if mptcp_lib_expect_all_features; then
+ check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
subflows 2" "defaults limits"
+fi
ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1
ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 flags subflow dev lo
@@ -117,12 +125,10 @@ ip netns exec $ns1 ./pm_nl_ctl flush
check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "flush addrs"
ip netns exec $ns1 ./pm_nl_ctl limits 9 1
-check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
-subflows 2" "rcv addrs above hard limit"
+check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "rcv addrs above hard limit"
ip netns exec $ns1 ./pm_nl_ctl limits 1 9
-check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
-subflows 2" "subflows above hard limit"
+check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "subflows above hard limit"
ip netns exec $ns1 ./pm_nl_ctl limits 8 8
check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8
@@ -172,14 +178,19 @@ subflow,backup 10.0.1.1" "set flags (backup)"
ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags nobackup
check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
subflow 10.0.1.1" " (nobackup)"
+
+# fullmesh support has been added later
ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh
-check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+if ip netns exec $ns1 ./pm_nl_ctl dump | grep -q "fullmesh" ||
+ mptcp_lib_expect_all_features; then
+ check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
subflow,fullmesh 10.0.1.1" " (fullmesh)"
-ip netns exec $ns1 ./pm_nl_ctl set id 1 flags nofullmesh
-check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+ ip netns exec $ns1 ./pm_nl_ctl set id 1 flags nofullmesh
+ check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
subflow 10.0.1.1" " (nofullmesh)"
-ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh
-check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+ ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh
+ check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
subflow,backup,fullmesh 10.0.1.1" " (backup,fullmesh)"
+fi
exit $ret
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 9f22f7e5027d..36a3c9d92e20 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -1,6 +1,8 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+. "$(dirname "${0}")/mptcp_lib.sh"
+
sec=$(date +%s)
rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
ns1="ns1-$rndh"
@@ -34,6 +36,8 @@ cleanup()
done
}
+mptcp_lib_check_mptcp
+
ip -Version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without ip tool"
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index b1eb7bce599d..98d9e4d2d3fc 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -1,10 +1,20 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+. "$(dirname "${0}")/mptcp_lib.sh"
+
+mptcp_lib_check_mptcp
+mptcp_lib_check_kallsyms
+
+if ! mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+ echo "userspace pm tests are not supported by the kernel: SKIP"
+ exit ${KSFT_SKIP}
+fi
+
ip -Version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Cannot not run test without ip tool"
- exit 1
+ exit ${KSFT_SKIP}
fi
ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED
@@ -905,6 +915,11 @@ test_listener()
{
print_title "Listener tests"
+ if ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then
+ stdbuf -o0 -e0 printf "LISTENER events \t[SKIP] Not supported\n"
+ return
+ fi
+
# Capture events on the network namespace running the client
:>$client_evts
diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
index 1003119773e5..f96282362811 100755
--- a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
@@ -232,10 +232,14 @@ setup_rt_networking()
local nsname=rt-${rt}
ip netns add ${nsname}
+
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
ip link set veth-rt-${rt} netns ${nsname}
ip -netns ${nsname} link set veth-rt-${rt} name veth0
- ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0
+ ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad
ip -netns ${nsname} link set veth0 up
ip -netns ${nsname} link set lo up
@@ -254,6 +258,12 @@ setup_hs()
# set the networking for the host
ip netns add ${hsname}
+
+ # disable the rp_filter otherwise the kernel gets confused about how
+ # to route decap ipv4 packets.
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0
+
ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
ip -netns ${hsname} link set ${rtveth} netns ${rtname}
ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0
@@ -272,11 +282,6 @@ setup_hs()
ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.rp_filter=0
-
ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
}
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index e699548d4247..ff36844d14b4 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -25,6 +25,8 @@
#define TLS_PAYLOAD_MAX_LEN 16384
#define SOL_TLS 282
+static int fips_enabled;
+
struct tls_crypto_info_keys {
union {
struct tls12_crypto_info_aes_gcm_128 aes128;
@@ -235,7 +237,7 @@ FIXTURE_VARIANT(tls)
{
uint16_t tls_version;
uint16_t cipher_type;
- bool nopad;
+ bool nopad, fips_non_compliant;
};
FIXTURE_VARIANT_ADD(tls, 12_aes_gcm)
@@ -254,24 +256,28 @@ FIXTURE_VARIANT_ADD(tls, 12_chacha)
{
.tls_version = TLS_1_2_VERSION,
.cipher_type = TLS_CIPHER_CHACHA20_POLY1305,
+ .fips_non_compliant = true,
};
FIXTURE_VARIANT_ADD(tls, 13_chacha)
{
.tls_version = TLS_1_3_VERSION,
.cipher_type = TLS_CIPHER_CHACHA20_POLY1305,
+ .fips_non_compliant = true,
};
FIXTURE_VARIANT_ADD(tls, 13_sm4_gcm)
{
.tls_version = TLS_1_3_VERSION,
.cipher_type = TLS_CIPHER_SM4_GCM,
+ .fips_non_compliant = true,
};
FIXTURE_VARIANT_ADD(tls, 13_sm4_ccm)
{
.tls_version = TLS_1_3_VERSION,
.cipher_type = TLS_CIPHER_SM4_CCM,
+ .fips_non_compliant = true,
};
FIXTURE_VARIANT_ADD(tls, 12_aes_ccm)
@@ -311,6 +317,9 @@ FIXTURE_SETUP(tls)
int one = 1;
int ret;
+ if (fips_enabled && variant->fips_non_compliant)
+ SKIP(return, "Unsupported cipher in FIPS mode");
+
tls_crypto_info_init(variant->tls_version, variant->cipher_type,
&tls12);
@@ -1865,4 +1874,17 @@ TEST(prequeue) {
close(cfd);
}
+static void __attribute__((constructor)) fips_check(void) {
+ int res;
+ FILE *f;
+
+ f = fopen("/proc/sys/crypto/fips_enabled", "r");
+ if (f) {
+ res = fscanf(f, "%d", &fips_enabled);
+ if (res != 1)
+ ksft_print_msg("ERROR: Couldn't read /proc/sys/crypto/fips_enabled\n");
+ fclose(f);
+ }
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/vrf-xfrm-tests.sh b/tools/testing/selftests/net/vrf-xfrm-tests.sh
index 184da81f554f..452638ae8aed 100755
--- a/tools/testing/selftests/net/vrf-xfrm-tests.sh
+++ b/tools/testing/selftests/net/vrf-xfrm-tests.sh
@@ -264,60 +264,60 @@ setup_xfrm()
ip -netns host1 xfrm state add src ${HOST1_4} dst ${HOST2_4} \
proto esp spi ${SPI_1} reqid 0 mode tunnel \
replay-window 4 replay-oseq 0x4 \
- auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
- enc 'cbc(des3_ede)' ${ENC_1} \
+ auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \
+ enc 'cbc(aes)' ${ENC_1} \
sel src ${h1_4} dst ${h2_4} ${devarg}
ip -netns host2 xfrm state add src ${HOST1_4} dst ${HOST2_4} \
proto esp spi ${SPI_1} reqid 0 mode tunnel \
replay-window 4 replay-oseq 0x4 \
- auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
- enc 'cbc(des3_ede)' ${ENC_1} \
+ auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \
+ enc 'cbc(aes)' ${ENC_1} \
sel src ${h1_4} dst ${h2_4}
ip -netns host1 xfrm state add src ${HOST2_4} dst ${HOST1_4} \
proto esp spi ${SPI_2} reqid 0 mode tunnel \
replay-window 4 replay-oseq 0x4 \
- auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
- enc 'cbc(des3_ede)' ${ENC_2} \
+ auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \
+ enc 'cbc(aes)' ${ENC_2} \
sel src ${h2_4} dst ${h1_4} ${devarg}
ip -netns host2 xfrm state add src ${HOST2_4} dst ${HOST1_4} \
proto esp spi ${SPI_2} reqid 0 mode tunnel \
replay-window 4 replay-oseq 0x4 \
- auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
- enc 'cbc(des3_ede)' ${ENC_2} \
+ auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \
+ enc 'cbc(aes)' ${ENC_2} \
sel src ${h2_4} dst ${h1_4}
ip -6 -netns host1 xfrm state add src ${HOST1_6} dst ${HOST2_6} \
proto esp spi ${SPI_1} reqid 0 mode tunnel \
replay-window 4 replay-oseq 0x4 \
- auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
- enc 'cbc(des3_ede)' ${ENC_1} \
+ auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \
+ enc 'cbc(aes)' ${ENC_1} \
sel src ${h1_6} dst ${h2_6} ${devarg}
ip -6 -netns host2 xfrm state add src ${HOST1_6} dst ${HOST2_6} \
proto esp spi ${SPI_1} reqid 0 mode tunnel \
replay-window 4 replay-oseq 0x4 \
- auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
- enc 'cbc(des3_ede)' ${ENC_1} \
+ auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \
+ enc 'cbc(aes)' ${ENC_1} \
sel src ${h1_6} dst ${h2_6}
ip -6 -netns host1 xfrm state add src ${HOST2_6} dst ${HOST1_6} \
proto esp spi ${SPI_2} reqid 0 mode tunnel \
replay-window 4 replay-oseq 0x4 \
- auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
- enc 'cbc(des3_ede)' ${ENC_2} \
+ auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \
+ enc 'cbc(aes)' ${ENC_2} \
sel src ${h2_6} dst ${h1_6} ${devarg}
ip -6 -netns host2 xfrm state add src ${HOST2_6} dst ${HOST1_6} \
proto esp spi ${SPI_2} reqid 0 mode tunnel \
replay-window 4 replay-oseq 0x4 \
- auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
- enc 'cbc(des3_ede)' ${ENC_2} \
+ auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \
+ enc 'cbc(aes)' ${ENC_2} \
sel src ${h2_6} dst ${h1_6}
}
diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
index 198ad5f32187..cfa9562f3cd8 100644
--- a/tools/testing/selftests/ptp/testptp.c
+++ b/tools/testing/selftests/ptp/testptp.c
@@ -502,11 +502,11 @@ int main(int argc, char *argv[])
interval = t2 - t1;
offset = (t2 + t1) / 2 - tp;
- printf("system time: %lld.%u\n",
+ printf("system time: %lld.%09u\n",
(pct+2*i)->sec, (pct+2*i)->nsec);
- printf("phc time: %lld.%u\n",
+ printf("phc time: %lld.%09u\n",
(pct+2*i+1)->sec, (pct+2*i+1)->nsec);
- printf("system time: %lld.%u\n",
+ printf("system time: %lld.%09u\n",
(pct+2*i+2)->sec, (pct+2*i+2)->nsec);
printf("system/phc clock time offset is %" PRId64 " ns\n"
"system clock time delay is %" PRId64 " ns\n",
diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile
index 75af864e07b6..50aab6b57da3 100644
--- a/tools/testing/selftests/sgx/Makefile
+++ b/tools/testing/selftests/sgx/Makefile
@@ -17,6 +17,7 @@ ENCL_CFLAGS := -Wall -Werror -static -nostdlib -nostartfiles -fPIC \
-fno-stack-protector -mrdrnd $(INCLUDES)
TEST_CUSTOM_PROGS := $(OUTPUT)/test_sgx
+TEST_FILES := $(OUTPUT)/test_encl.elf
ifeq ($(CAN_BUILD_X86_64), 1)
all: $(TEST_CUSTOM_PROGS) $(OUTPUT)/test_encl.elf
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index 4638c63a339f..6e73b09c20c8 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -6,20 +6,18 @@ CONFIG_NF_CONNTRACK_MARK=y
CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CONNTRACK_LABELS=y
CONFIG_NF_NAT=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
CONFIG_NET_SCHED=y
#
# Queueing/Scheduling
#
-CONFIG_NET_SCH_ATM=m
CONFIG_NET_SCH_CAKE=m
-CONFIG_NET_SCH_CBQ=m
CONFIG_NET_SCH_CBS=m
CONFIG_NET_SCH_CHOKE=m
CONFIG_NET_SCH_CODEL=m
CONFIG_NET_SCH_DRR=m
-CONFIG_NET_SCH_DSMARK=m
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_FQ=m
CONFIG_NET_SCH_FQ_CODEL=m
@@ -57,8 +55,6 @@ CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_FLOWER=m
CONFIG_NET_CLS_MATCHALL=m
CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_EMATCH=y
CONFIG_NET_EMATCH_STACK=32
CONFIG_NET_EMATCH_CMP=m
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfb.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfb.json
index ba2f5e79cdbf..e21c7f22c6d4 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfb.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfb.json
@@ -58,10 +58,10 @@
"setup": [
"$IP link add dev $DUMMY type dummy || /bin/true"
],
- "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb db 10",
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb db 100",
"expExitCode": "0",
"verifyCmd": "$TC qdisc show dev $DUMMY",
- "matchPattern": "qdisc sfb 1: root refcnt [0-9]+ rehash 600s db 10ms",
+ "matchPattern": "qdisc sfb 1: root refcnt [0-9]+ rehash 600s db 100ms",
"matchCount": "1",
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root",
diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh
index afb0cd86fa3d..eb357bd7923c 100755
--- a/tools/testing/selftests/tc-testing/tdc.sh
+++ b/tools/testing/selftests/tc-testing/tdc.sh
@@ -2,5 +2,6 @@
# SPDX-License-Identifier: GPL-2.0
modprobe netdevsim
+modprobe sch_teql
./tdc.py -c actions --nobuildebpf
./tdc.py -c qdisc
diff --git a/tools/testing/selftests/user_events/dyn_test.c b/tools/testing/selftests/user_events/dyn_test.c
index 8879a7b04c6a..d6979a48478f 100644
--- a/tools/testing/selftests/user_events/dyn_test.c
+++ b/tools/testing/selftests/user_events/dyn_test.c
@@ -16,42 +16,140 @@
#include "../kselftest_harness.h"
-const char *dyn_file = "/sys/kernel/tracing/dynamic_events";
-const char *clear = "!u:__test_event";
+const char *abi_file = "/sys/kernel/tracing/user_events_data";
+const char *enable_file = "/sys/kernel/tracing/events/user_events/__test_event/enable";
-static int Append(const char *value)
+static bool wait_for_delete(void)
{
- int fd = open(dyn_file, O_RDWR | O_APPEND);
- int ret = write(fd, value, strlen(value));
+ int i;
+
+ for (i = 0; i < 1000; ++i) {
+ int fd = open(enable_file, O_RDONLY);
+
+ if (fd == -1)
+ return true;
+
+ close(fd);
+ usleep(1000);
+ }
+
+ return false;
+}
+
+static int reg_event(int fd, int *check, int bit, const char *value)
+{
+ struct user_reg reg = {0};
+
+ reg.size = sizeof(reg);
+ reg.name_args = (__u64)value;
+ reg.enable_bit = bit;
+ reg.enable_addr = (__u64)check;
+ reg.enable_size = sizeof(*check);
+
+ if (ioctl(fd, DIAG_IOCSREG, &reg) == -1)
+ return -1;
+
+ return 0;
+}
+
+static int unreg_event(int fd, int *check, int bit)
+{
+ struct user_unreg unreg = {0};
+
+ unreg.size = sizeof(unreg);
+ unreg.disable_bit = bit;
+ unreg.disable_addr = (__u64)check;
+
+ return ioctl(fd, DIAG_IOCSUNREG, &unreg);
+}
+
+static int parse(int *check, const char *value)
+{
+ int fd = open(abi_file, O_RDWR);
+ int ret;
+
+ if (fd == -1)
+ return -1;
+
+ /* Until we have persist flags via dynamic events, use the base name */
+ if (value[0] != 'u' || value[1] != ':') {
+ close(fd);
+ return -1;
+ }
+
+ ret = reg_event(fd, check, 31, value + 2);
+
+ if (ret != -1) {
+ if (unreg_event(fd, check, 31) == -1)
+ printf("WARN: Couldn't unreg event\n");
+ }
close(fd);
+
return ret;
}
-#define CLEAR() \
+static int check_match(int *check, const char *first, const char *second, bool *match)
+{
+ int fd = open(abi_file, O_RDWR);
+ int ret = -1;
+
+ if (fd == -1)
+ return -1;
+
+ if (reg_event(fd, check, 31, first) == -1)
+ goto cleanup;
+
+ if (reg_event(fd, check, 30, second) == -1) {
+ if (errno == EADDRINUSE) {
+ /* Name is in use, with different fields */
+ *match = false;
+ ret = 0;
+ }
+
+ goto cleanup;
+ }
+
+ *match = true;
+ ret = 0;
+cleanup:
+ unreg_event(fd, check, 31);
+ unreg_event(fd, check, 30);
+
+ close(fd);
+
+ wait_for_delete();
+
+ return ret;
+}
+
+#define TEST_MATCH(x, y) \
do { \
- int ret = Append(clear); \
- if (ret == -1) \
- ASSERT_EQ(ENOENT, errno); \
+ bool match; \
+ ASSERT_NE(-1, check_match(&self->check, x, y, &match)); \
+ ASSERT_EQ(true, match); \
} while (0)
-#define TEST_PARSE(x) \
+#define TEST_NMATCH(x, y) \
do { \
- ASSERT_NE(-1, Append(x)); \
- CLEAR(); \
+ bool match; \
+ ASSERT_NE(-1, check_match(&self->check, x, y, &match)); \
+ ASSERT_EQ(false, match); \
} while (0)
-#define TEST_NPARSE(x) ASSERT_EQ(-1, Append(x))
+#define TEST_PARSE(x) ASSERT_NE(-1, parse(&self->check, x))
+
+#define TEST_NPARSE(x) ASSERT_EQ(-1, parse(&self->check, x))
FIXTURE(user) {
+ int check;
};
FIXTURE_SETUP(user) {
- CLEAR();
}
FIXTURE_TEARDOWN(user) {
- CLEAR();
+ wait_for_delete();
}
TEST_F(user, basic_types) {
@@ -95,33 +193,30 @@ TEST_F(user, size_types) {
TEST_NPARSE("u:__test_event char a 20");
}
-TEST_F(user, flags) {
- /* Should work */
- TEST_PARSE("u:__test_event:BPF_ITER u32 a");
- /* Forward compat */
- TEST_PARSE("u:__test_event:BPF_ITER,FLAG_FUTURE u32 a");
-}
-
TEST_F(user, matching) {
- /* Register */
- ASSERT_NE(-1, Append("u:__test_event struct custom a 20"));
- /* Should not match */
- TEST_NPARSE("!u:__test_event struct custom b");
- /* Should match */
- TEST_PARSE("!u:__test_event struct custom a");
- /* Multi field reg */
- ASSERT_NE(-1, Append("u:__test_event u32 a; u32 b"));
- /* Non matching cases */
- TEST_NPARSE("!u:__test_event u32 a");
- TEST_NPARSE("!u:__test_event u32 b");
- TEST_NPARSE("!u:__test_event u32 a; u32 ");
- TEST_NPARSE("!u:__test_event u32 a; u32 a");
- /* Matching case */
- TEST_PARSE("!u:__test_event u32 a; u32 b");
- /* Register */
- ASSERT_NE(-1, Append("u:__test_event u32 a; u32 b"));
- /* Ensure trailing semi-colon case */
- TEST_PARSE("!u:__test_event u32 a; u32 b;");
+ /* Single name matches */
+ TEST_MATCH("__test_event u32 a",
+ "__test_event u32 a");
+
+ /* Multiple names match */
+ TEST_MATCH("__test_event u32 a; u32 b",
+ "__test_event u32 a; u32 b");
+
+ /* Multiple names match with dangling ; */
+ TEST_MATCH("__test_event u32 a; u32 b",
+ "__test_event u32 a; u32 b;");
+
+ /* Single name doesn't match */
+ TEST_NMATCH("__test_event u32 a",
+ "__test_event u32 b");
+
+ /* Multiple names don't match */
+ TEST_NMATCH("__test_event u32 a; u32 b",
+ "__test_event u32 b; u32 a");
+
+ /* Types don't match */
+ TEST_NMATCH("__test_event u64 a; u64 b",
+ "__test_event u32 a; u32 b");
}
int main(int argc, char **argv)
diff --git a/tools/testing/selftests/user_events/ftrace_test.c b/tools/testing/selftests/user_events/ftrace_test.c
index 7c99cef94a65..eb6904d89f14 100644
--- a/tools/testing/selftests/user_events/ftrace_test.c
+++ b/tools/testing/selftests/user_events/ftrace_test.c
@@ -102,30 +102,56 @@ err:
return -1;
}
+static bool wait_for_delete(void)
+{
+ int i;
+
+ for (i = 0; i < 1000; ++i) {
+ int fd = open(enable_file, O_RDONLY);
+
+ if (fd == -1)
+ return true;
+
+ close(fd);
+ usleep(1000);
+ }
+
+ return false;
+}
+
static int clear(int *check)
{
struct user_unreg unreg = {0};
+ int fd;
unreg.size = sizeof(unreg);
unreg.disable_bit = 31;
unreg.disable_addr = (__u64)check;
- int fd = open(data_file, O_RDWR);
+ fd = open(data_file, O_RDWR);
if (fd == -1)
return -1;
if (ioctl(fd, DIAG_IOCSUNREG, &unreg) == -1)
if (errno != ENOENT)
- return -1;
-
- if (ioctl(fd, DIAG_IOCSDEL, "__test_event") == -1)
- if (errno != ENOENT)
- return -1;
+ goto fail;
+
+ if (ioctl(fd, DIAG_IOCSDEL, "__test_event") == -1) {
+ if (errno == EBUSY) {
+ if (!wait_for_delete())
+ goto fail;
+ } else if (errno != ENOENT)
+ goto fail;
+ }
close(fd);
return 0;
+fail:
+ close(fd);
+
+ return -1;
}
static int check_print_fmt(const char *event, const char *expected, int *check)
@@ -155,9 +181,8 @@ static int check_print_fmt(const char *event, const char *expected, int *check)
/* Register should work */
ret = ioctl(fd, DIAG_IOCSREG, &reg);
- close(fd);
-
if (ret != 0) {
+ close(fd);
printf("Reg failed in fmt\n");
return ret;
}
@@ -165,6 +190,8 @@ static int check_print_fmt(const char *event, const char *expected, int *check)
/* Ensure correct print_fmt */
ret = get_print_fmt(print_fmt, sizeof(print_fmt));
+ close(fd);
+
if (ret != 0)
return ret;
@@ -228,6 +255,12 @@ TEST_F(user, register_events) {
ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
ASSERT_EQ(0, reg.write_index);
+ /* Multiple registers to same name but different args should fail */
+ reg.enable_bit = 29;
+ reg.name_args = (__u64)"__test_event u32 field1;";
+ ASSERT_EQ(-1, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
+ ASSERT_EQ(EADDRINUSE, errno);
+
/* Ensure disabled */
self->enable_fd = open(enable_file, O_RDWR);
ASSERT_NE(-1, self->enable_fd);
@@ -250,10 +283,10 @@ TEST_F(user, register_events) {
unreg.disable_bit = 30;
ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSUNREG, &unreg));
- /* Delete should work only after close and unregister */
+ /* Delete should have been auto-done after close and unregister */
close(self->data_fd);
- self->data_fd = open(data_file, O_RDWR);
- ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSDEL, "__test_event"));
+
+ ASSERT_EQ(true, wait_for_delete());
}
TEST_F(user, write_events) {
@@ -310,6 +343,39 @@ TEST_F(user, write_events) {
ASSERT_EQ(EINVAL, errno);
}
+TEST_F(user, write_empty_events) {
+ struct user_reg reg = {0};
+ struct iovec io[1];
+ int before = 0, after = 0;
+
+ reg.size = sizeof(reg);
+ reg.name_args = (__u64)"__test_event";
+ reg.enable_bit = 31;
+ reg.enable_addr = (__u64)&self->check;
+ reg.enable_size = sizeof(self->check);
+
+ io[0].iov_base = &reg.write_index;
+ io[0].iov_len = sizeof(reg.write_index);
+
+ /* Register should work */
+ ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
+ ASSERT_EQ(0, reg.write_index);
+ ASSERT_EQ(0, self->check);
+
+ /* Enable event */
+ self->enable_fd = open(enable_file, O_RDWR);
+ ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1")))
+
+ /* Event should now be enabled */
+ ASSERT_EQ(1 << reg.enable_bit, self->check);
+
+ /* Write should make it out to ftrace buffers */
+ before = trace_bytes();
+ ASSERT_NE(-1, writev(self->data_fd, (const struct iovec *)io, 1));
+ after = trace_bytes();
+ ASSERT_GT(after, before);
+}
+
TEST_F(user, write_fault) {
struct user_reg reg = {0};
struct iovec io[2];
diff --git a/tools/testing/selftests/user_events/perf_test.c b/tools/testing/selftests/user_events/perf_test.c
index a070258d4449..8b09be566fa2 100644
--- a/tools/testing/selftests/user_events/perf_test.c
+++ b/tools/testing/selftests/user_events/perf_test.c
@@ -81,6 +81,32 @@ static int get_offset(void)
return offset;
}
+static int clear(int *check)
+{
+ struct user_unreg unreg = {0};
+
+ unreg.size = sizeof(unreg);
+ unreg.disable_bit = 31;
+ unreg.disable_addr = (__u64)check;
+
+ int fd = open(data_file, O_RDWR);
+
+ if (fd == -1)
+ return -1;
+
+ if (ioctl(fd, DIAG_IOCSUNREG, &unreg) == -1)
+ if (errno != ENOENT)
+ return -1;
+
+ if (ioctl(fd, DIAG_IOCSDEL, "__test_event") == -1)
+ if (errno != ENOENT)
+ return -1;
+
+ close(fd);
+
+ return 0;
+}
+
FIXTURE(user) {
int data_fd;
int check;
@@ -93,6 +119,9 @@ FIXTURE_SETUP(user) {
FIXTURE_TEARDOWN(user) {
close(self->data_fd);
+
+ if (clear(&self->check) != 0)
+ printf("WARNING: Clear didn't work!\n");
}
TEST_F(user, perf_write) {
@@ -160,6 +189,59 @@ TEST_F(user, perf_write) {
ASSERT_EQ(0, self->check);
}
+TEST_F(user, perf_empty_events) {
+ struct perf_event_attr pe = {0};
+ struct user_reg reg = {0};
+ struct perf_event_mmap_page *perf_page;
+ int page_size = sysconf(_SC_PAGESIZE);
+ int id, fd;
+ __u32 *val;
+
+ reg.size = sizeof(reg);
+ reg.name_args = (__u64)"__test_event";
+ reg.enable_bit = 31;
+ reg.enable_addr = (__u64)&self->check;
+ reg.enable_size = sizeof(self->check);
+
+ /* Register should work */
+ ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
+ ASSERT_EQ(0, reg.write_index);
+ ASSERT_EQ(0, self->check);
+
+ /* Id should be there */
+ id = get_id();
+ ASSERT_NE(-1, id);
+
+ pe.type = PERF_TYPE_TRACEPOINT;
+ pe.size = sizeof(pe);
+ pe.config = id;
+ pe.sample_type = PERF_SAMPLE_RAW;
+ pe.sample_period = 1;
+ pe.wakeup_events = 1;
+
+ /* Tracepoint attach should work */
+ fd = perf_event_open(&pe, 0, -1, -1, 0);
+ ASSERT_NE(-1, fd);
+
+ perf_page = mmap(NULL, page_size * 2, PROT_READ, MAP_SHARED, fd, 0);
+ ASSERT_NE(MAP_FAILED, perf_page);
+
+ /* Status should be updated */
+ ASSERT_EQ(1 << reg.enable_bit, self->check);
+
+ /* Ensure write shows up at correct offset */
+ ASSERT_NE(-1, write(self->data_fd, &reg.write_index,
+ sizeof(reg.write_index)));
+ val = (void *)(((char *)perf_page) + perf_page->data_offset);
+ ASSERT_EQ(PERF_RECORD_SAMPLE, *val);
+
+ munmap(perf_page, page_size * 2);
+ close(fd);
+
+ /* Status should be updated */
+ ASSERT_EQ(0, self->check);
+}
+
int main(int argc, char **argv)
{
return test_harness_run(argc, argv);
diff --git a/tools/virtio/ringtest/.gitignore b/tools/virtio/ringtest/.gitignore
new file mode 100644
index 000000000000..100b9e30c0f4
--- /dev/null
+++ b/tools/virtio/ringtest/.gitignore
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+/noring
+/ptr_ring
+/ring
+/virtio_ring_0_9
+/virtio_ring_inorder
+/virtio_ring_poll
diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
index b68920d52750..d18dd317e27f 100644
--- a/tools/virtio/ringtest/main.h
+++ b/tools/virtio/ringtest/main.h
@@ -8,6 +8,7 @@
#ifndef MAIN_H
#define MAIN_H
+#include <assert.h>
#include <stdbool.h>
extern int param;
@@ -95,6 +96,8 @@ extern unsigned ring_size;
#define cpu_relax() asm ("rep; nop" ::: "memory")
#elif defined(__s390x__)
#define cpu_relax() barrier()
+#elif defined(__aarch64__)
+#define cpu_relax() asm ("yield" ::: "memory")
#else
#define cpu_relax() assert(0)
#endif
@@ -112,6 +115,8 @@ static inline void busy_wait(void)
#if defined(__x86_64__) || defined(__i386__)
#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
+#elif defined(__aarch64__)
+#define smp_mb() asm volatile("dmb ish" ::: "memory")
#else
/*
* Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
@@ -136,10 +141,16 @@ static inline void busy_wait(void)
#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
#define smp_wmb() barrier()
+#elif defined(__aarch64__)
+#define smp_wmb() asm volatile("dmb ishst" ::: "memory")
#else
#define smp_wmb() smp_release()
#endif
+#ifndef __always_inline
+#define __always_inline inline __attribute__((always_inline))
+#endif
+
static __always_inline
void __read_once_size(const volatile void *p, void *res, int size)
{
diff --git a/tools/virtio/virtio-trace/README b/tools/virtio/virtio-trace/README
index 4fb9368bf751..0127ff0c54b0 100644
--- a/tools/virtio/virtio-trace/README
+++ b/tools/virtio/virtio-trace/README
@@ -95,7 +95,7 @@ Run
1) Enable ftrace in the guest
<Example>
- # echo 1 > /sys/kernel/debug/tracing/events/sched/enable
+ # echo 1 > /sys/kernel/tracing/events/sched/enable
2) Run trace agent in the guest
This agent must be operated as root.
diff --git a/tools/virtio/virtio-trace/trace-agent.c b/tools/virtio/virtio-trace/trace-agent.c
index cdfe77c2b4c8..7e2d9bbf0b84 100644
--- a/tools/virtio/virtio-trace/trace-agent.c
+++ b/tools/virtio/virtio-trace/trace-agent.c
@@ -18,8 +18,9 @@
#define PIPE_DEF_BUFS 16
#define PIPE_MIN_SIZE (PAGE_SIZE*PIPE_DEF_BUFS)
#define PIPE_MAX_SIZE (1024*1024)
-#define READ_PATH_FMT \
- "/sys/kernel/debug/tracing/per_cpu/cpu%d/trace_pipe_raw"
+#define TRACEFS "/sys/kernel/tracing"
+#define DEBUGFS "/sys/kernel/debug/tracing"
+#define READ_PATH_FMT "%s/per_cpu/cpu%d/trace_pipe_raw"
#define WRITE_PATH_FMT "/dev/virtio-ports/trace-path-cpu%d"
#define CTL_PATH "/dev/virtio-ports/agent-ctl-path"
@@ -120,9 +121,12 @@ static const char *make_path(int cpu_num, bool this_is_write_path)
if (this_is_write_path)
/* write(output) path */
ret = snprintf(buf, PATH_MAX, WRITE_PATH_FMT, cpu_num);
- else
+ else {
/* read(input) path */
- ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, cpu_num);
+ ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, TRACEFS, cpu_num);
+ if (ret > 0 && access(buf, F_OK) != 0)
+ ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, DEBUGFS, cpu_num);
+ }
if (ret <= 0) {
pr_err("Failed to generate %s path(CPU#%d):%d\n",
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index cb5c13eee193..65f94f592ff8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -686,6 +686,24 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn
return __kvm_handle_hva_range(kvm, &range);
}
+
+static bool kvm_change_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ /*
+ * Skipping invalid memslots is correct if and only change_pte() is
+ * surrounded by invalidate_range_{start,end}(), which is currently
+ * guaranteed by the primary MMU. If that ever changes, KVM needs to
+ * unmap the memslot instead of skipping the memslot to ensure that KVM
+ * doesn't hold references to the old PFN.
+ */
+ WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count));
+
+ if (range->slot->flags & KVM_MEMSLOT_INVALID)
+ return false;
+
+ return kvm_set_spte_gfn(kvm, range);
+}
+
static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long address,
@@ -707,7 +725,7 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
if (!READ_ONCE(kvm->mmu_invalidate_in_progress))
return;
- kvm_handle_hva_range(mn, address, address + 1, pte, kvm_set_spte_gfn);
+ kvm_handle_hva_range(mn, address, address + 1, pte, kvm_change_spte_gfn);
}
void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,
@@ -3962,18 +3980,19 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
}
vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus);
- r = xa_insert(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, GFP_KERNEL_ACCOUNT);
- BUG_ON(r == -EBUSY);
+ r = xa_reserve(&kvm->vcpu_array, vcpu->vcpu_idx, GFP_KERNEL_ACCOUNT);
if (r)
goto unlock_vcpu_destroy;
/* Now it's all set up, let userspace reach it */
kvm_get_kvm(kvm);
r = create_vcpu_fd(vcpu);
- if (r < 0) {
- xa_erase(&kvm->vcpu_array, vcpu->vcpu_idx);
- kvm_put_kvm_no_destroy(kvm);
- goto unlock_vcpu_destroy;
+ if (r < 0)
+ goto kvm_put_xa_release;
+
+ if (KVM_BUG_ON(!!xa_store(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, 0), kvm)) {
+ r = -EINVAL;
+ goto kvm_put_xa_release;
}
/*
@@ -3988,6 +4007,9 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
kvm_create_vcpu_debugfs(vcpu);
return r;
+kvm_put_xa_release:
+ kvm_put_kvm_no_destroy(kvm);
+ xa_release(&kvm->vcpu_array, vcpu->vcpu_idx);
unlock_vcpu_destroy:
mutex_unlock(&kvm->lock);
kvm_dirty_ring_free(&vcpu->dirty_ring);
@@ -5184,7 +5206,20 @@ static void hardware_disable_all(void)
static int hardware_enable_all(void)
{
atomic_t failed = ATOMIC_INIT(0);
- int r = 0;
+ int r;
+
+ /*
+ * Do not enable hardware virtualization if the system is going down.
+ * If userspace initiated a forced reboot, e.g. reboot -f, then it's
+ * possible for an in-flight KVM_CREATE_VM to trigger hardware enabling
+ * after kvm_reboot() is called. Note, this relies on system_state
+ * being set _before_ kvm_reboot(), which is why KVM uses a syscore ops
+ * hook instead of registering a dedicated reboot notifier (the latter
+ * runs before system_state is updated).
+ */
+ if (system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF ||
+ system_state == SYSTEM_RESTART)
+ return -EBUSY;
/*
* When onlining a CPU, cpu_online_mask is set before kvm_online_cpu()
@@ -5197,6 +5232,8 @@ static int hardware_enable_all(void)
cpus_read_lock();
mutex_lock(&kvm_lock);
+ r = 0;
+
kvm_usage_count++;
if (kvm_usage_count == 1) {
on_each_cpu(hardware_enable_nolock, &failed, 1);
@@ -5213,26 +5250,24 @@ static int hardware_enable_all(void)
return r;
}
-static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
- void *v)
+static void kvm_shutdown(void)
{
/*
- * Some (well, at least mine) BIOSes hang on reboot if
- * in vmx root mode.
- *
- * And Intel TXT required VMX off for all cpu when system shutdown.
+ * Disable hardware virtualization and set kvm_rebooting to indicate
+ * that KVM has asynchronously disabled hardware virtualization, i.e.
+ * that relevant errors and exceptions aren't entirely unexpected.
+ * Some flavors of hardware virtualization need to be disabled before
+ * transferring control to firmware (to perform shutdown/reboot), e.g.
+ * on x86, virtualization can block INIT interrupts, which are used by
+ * firmware to pull APs back under firmware control. Note, this path
+ * is used for both shutdown and reboot scenarios, i.e. neither name is
+ * 100% comprehensive.
*/
pr_info("kvm: exiting hardware virtualization\n");
kvm_rebooting = true;
on_each_cpu(hardware_disable_nolock, NULL, 1);
- return NOTIFY_OK;
}
-static struct notifier_block kvm_reboot_notifier = {
- .notifier_call = kvm_reboot,
- .priority = 0,
-};
-
static int kvm_suspend(void)
{
/*
@@ -5263,6 +5298,7 @@ static void kvm_resume(void)
static struct syscore_ops kvm_syscore_ops = {
.suspend = kvm_suspend,
.resume = kvm_resume,
+ .shutdown = kvm_shutdown,
};
#else /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */
static int hardware_enable_all(void)
@@ -5967,7 +6003,6 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
if (r)
return r;
- register_reboot_notifier(&kvm_reboot_notifier);
register_syscore_ops(&kvm_syscore_ops);
#endif
@@ -6039,7 +6074,6 @@ err_cpu_kick_mask:
err_vcpu_cache:
#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
unregister_syscore_ops(&kvm_syscore_ops);
- unregister_reboot_notifier(&kvm_reboot_notifier);
cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE);
#endif
return r;
@@ -6065,7 +6099,6 @@ void kvm_exit(void)
kvm_async_pf_deinit();
#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
unregister_syscore_ops(&kvm_syscore_ops);
- unregister_reboot_notifier(&kvm_reboot_notifier);
cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE);
#endif
kvm_irqfd_exit();