summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap1
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst6
-rw-r--r--Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml3
-rw-r--r--MAINTAINERS4
-rw-r--r--Makefile2
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi3
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi1
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts20
-rw-r--r--arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts12
-rw-r--r--arch/arm64/boot/dts/freescale/imx95.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts2
-rw-r--r--arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi1
-rw-r--r--arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi23
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts28
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3576.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi20
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi5
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi35
-rw-r--r--arch/arm64/configs/defconfig1
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/include/asm/msr-index.h1
-rw-r--r--arch/x86/kernel/cpu/amd.c10
-rw-r--r--drivers/base/power/main.c19
-rw-r--r--drivers/block/nbd.c6
-rw-r--r--drivers/char/agp/amd64-agp.c16
-rw-r--r--drivers/clk/clk-scmi.c18
-rw-r--r--drivers/clk/imx/clk-imx95-blk-ctl.c12
-rw-r--r--drivers/gpu/drm/drm_framebuffer.c31
-rw-r--r--drivers/gpu/drm/drm_gem.c48
-rw-r--r--drivers/gpu/drm/drm_gem_framebuffer_helper.c16
-rw-r--r--drivers/gpu/drm/drm_internal.h2
-rw-r--r--drivers/gpu/drm/drm_panic_qr.rs2
-rw-r--r--drivers/gpu/drm/i915/display/intel_bios.c8
-rw-r--r--drivers/gpu/drm/imagination/pvr_power.c4
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_debugfs.c6
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_debugfs.h5
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drm.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c27
-rw-r--r--drivers/gpu/drm/tegra/nvdec.c6
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.c38
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.c1
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt.c11
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c2
-rw-r--r--drivers/gpu/drm/xe/xe_module.c2
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c1
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c11
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw.c6
-rw-r--r--drivers/gpu/drm/xe/xe_wa_oob.rules4
-rw-r--r--drivers/hid/hid-core.c19
-rw-r--r--drivers/hid/hid-debug.c2
-rw-r--r--drivers/md/dm-bufio.c6
-rw-r--r--drivers/md/md-bitmap.c3
-rw-r--r--drivers/md/raid1.c4
-rw-r--r--drivers/md/raid10.c12
-rw-r--r--drivers/pci/controller/pci-host-common.c4
-rw-r--r--drivers/pci/controller/pcie-apple.c53
-rw-r--r--drivers/pci/ecam.c2
-rw-r--r--drivers/pci/msi/msi.c4
-rw-r--r--drivers/soc/aspeed/aspeed-lpc-snoop.c13
-rw-r--r--fs/bcachefs/btree_cache.c26
-rw-r--r--fs/bcachefs/btree_cache.h1
-rw-r--r--fs/bcachefs/btree_io.c8
-rw-r--r--fs/bcachefs/btree_node_scan.c84
-rw-r--r--fs/bcachefs/debug.c11
-rw-r--r--fs/bcachefs/errcode.h1
-rw-r--r--fs/bcachefs/error.c6
-rw-r--r--fs/bcachefs/extents.c16
-rw-r--r--fs/bcachefs/fsck.c33
-rw-r--r--fs/bcachefs/io_misc.c27
-rw-r--r--fs/bcachefs/io_misc.h2
-rw-r--r--fs/bcachefs/journal_reclaim.c6
-rw-r--r--fs/bcachefs/recovery.c23
-rw-r--r--fs/bcachefs/recovery_passes.c2
-rw-r--r--fs/erofs/data.c21
-rw-r--r--fs/erofs/decompressor.c12
-rw-r--r--fs/erofs/dir.c6
-rw-r--r--fs/erofs/fileio.c14
-rw-r--r--fs/erofs/internal.h6
-rw-r--r--fs/erofs/zdata.c8
-rw-r--r--fs/erofs/zmap.c9
-rw-r--r--fs/eventpoll.c458
-rw-r--r--fs/proc/task_mmu.c14
-rw-r--r--fs/smb/server/smb2pdu.c29
-rw-r--r--fs/smb/server/transport_rdma.c5
-rw-r--r--fs/smb/server/vfs.c1
-rw-r--r--include/drm/drm_file.h3
-rw-r--r--include/drm/drm_framebuffer.h7
-rw-r--r--include/linux/blkdev.h5
-rw-r--r--include/linux/io_uring_types.h2
-rw-r--r--include/linux/irqchip/irq-msi-lib.h1
-rw-r--r--include/linux/mm.h5
-rw-r--r--include/linux/sched.h4
-rw-r--r--include/linux/vm_event_item.h2
-rw-r--r--io_uring/io_uring.c3
-rw-r--r--io_uring/msg_ring.c4
-rw-r--r--io_uring/zcrx.c3
-rw-r--r--kernel/events/core.c14
-rw-r--r--kernel/power/suspend.c5
-rw-r--r--kernel/sched/core.c9
-rw-r--r--kernel/sched/debug.c4
-rw-r--r--kernel/trace/trace_probe.c2
-rw-r--r--lib/alloc_tag.c3
-rw-r--r--lib/maple_tree.c1
-rw-r--r--mm/damon/core.c8
-rw-r--r--mm/hugetlb.c9
-rw-r--r--mm/kasan/report.c45
-rw-r--r--mm/memcontrol.c2
-rw-r--r--mm/migrate.c14
-rw-r--r--mm/rmap.c46
-rw-r--r--mm/vmalloc.c22
-rw-r--r--mm/vmstat.c2
-rw-r--r--rust/kernel/drm/device.rs12
-rw-r--r--rust/kernel/drm/driver.rs1
-rw-r--r--samples/damon/mtier.c8
-rw-r--r--samples/damon/prcl.c8
-rw-r--r--samples/damon/wsse.c8
-rw-r--r--scripts/gdb/linux/constants.py.in7
-rw-r--r--scripts/gdb/linux/interrupts.py16
-rw-r--r--scripts/gdb/linux/mapletree.py252
-rw-r--r--scripts/gdb/linux/vfs.py2
-rw-r--r--scripts/gdb/linux/xarray.py28
-rw-r--r--tools/arch/x86/include/asm/msr-index.h1
-rw-r--r--tools/include/linux/kallsyms.h4
-rw-r--r--tools/testing/selftests/hid/tests/test_mouse.py70
131 files changed, 1482 insertions, 612 deletions
diff --git a/.mailmap b/.mailmap
index b0ace71968ab..85ad46d20220 100644
--- a/.mailmap
+++ b/.mailmap
@@ -416,6 +416,7 @@ Kenneth W Chen <kenneth.w.chen@intel.com>
Kenneth Westfield <quic_kwestfie@quicinc.com> <kwestfie@codeaurora.org>
Kiran Gunda <quic_kgunda@quicinc.com> <kgunda@codeaurora.org>
Kirill Tkhai <tkhai@ya.ru> <ktkhai@virtuozzo.com>
+Kirill A. Shutemov <kas@kernel.org> <kirill.shutemov@linux.intel.com>
Kishon Vijay Abraham I <kishon@kernel.org> <kishon@ti.com>
Konrad Dybcio <konradybcio@kernel.org> <konrad.dybcio@linaro.org>
Konrad Dybcio <konradybcio@kernel.org> <konrad.dybcio@somainline.org>
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 0cc35a14afbe..bd98ea3175ec 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1732,12 +1732,6 @@ The following nested keys are defined.
numa_hint_faults (npn)
Number of NUMA hinting faults.
- numa_task_migrated (npn)
- Number of task migration by NUMA balancing.
-
- numa_task_swapped (npn)
- Number of task swap by NUMA balancing.
-
pgdemote_kswapd
Number of pages demoted by kswapd.
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml
index 2985c8c717d7..5403242545ab 100644
--- a/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml
@@ -52,6 +52,9 @@ properties:
'#clock-cells':
const: 1
+ '#reset-cells':
+ const: 1
+
required:
- compatible
- reg
diff --git a/MAINTAINERS b/MAINTAINERS
index 05841dc1d372..60bba48f5479 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10506,7 +10506,7 @@ S: Maintained
F: block/partitions/efi.*
HABANALABS PCI DRIVER
-M: Ofir Bitton <obitton@habana.ai>
+M: Yaron Avizrat <yaron.avizrat@intel.com>
L: dri-devel@lists.freedesktop.org
S: Supported
C: irc://irc.oftc.net/dri-devel
@@ -26939,7 +26939,7 @@ F: arch/x86/kernel/stacktrace.c
F: arch/x86/kernel/unwind_*.c
X86 TRUST DOMAIN EXTENSIONS (TDX)
-M: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+M: Kirill A. Shutemov <kas@kernel.org>
R: Dave Hansen <dave.hansen@linux.intel.com>
L: x86@kernel.org
L: linux-coco@lists.linux.dev
diff --git a/Makefile b/Makefile
index 7eea2a41c905..c09766beb7ef 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 16
SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION = -rc6
NAME = Baby Opossum Posse
# *DOCUMENTATION*
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
index 0baf256b4400..983b2f0e8797 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
@@ -687,11 +687,12 @@
};
wdog0: watchdog@2ad0000 {
- compatible = "fsl,imx21-wdt";
+ compatible = "fsl,ls1046a-wdt", "fsl,imx21-wdt";
reg = <0x0 0x2ad0000 0x0 0x10000>;
interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
QORIQ_CLK_PLL_DIV(2)>;
+ big-endian;
};
edma0: dma-controller@2c00000 {
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
index d29710772569..1594ce9182a5 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
@@ -464,6 +464,7 @@
};
reg_nvcc_sd: LDO5 {
+ regulator-always-on;
regulator-max-microvolt = <3300000>;
regulator-min-microvolt = <1800000>;
regulator-name = "On-module +V3.3_1.8_SD (LDO5)";
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi
index 2f740d74707b..4bf818873fe3 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw71xx.dtsi
@@ -70,7 +70,7 @@
tpm@1 {
compatible = "atmel,attpm20p", "tcg,tpm_tis-spi";
reg = <0x1>;
- spi-max-frequency = <36000000>;
+ spi-max-frequency = <25000000>;
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
index 5ab3ffe9931d..cf747ec6fa16 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
@@ -110,7 +110,7 @@
tpm@1 {
compatible = "atmel,attpm20p", "tcg,tpm_tis-spi";
reg = <0x1>;
- spi-max-frequency = <36000000>;
+ spi-max-frequency = <25000000>;
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
index e2b5e7ac3e46..5eb114d2360a 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
@@ -122,7 +122,7 @@
tpm@1 {
compatible = "atmel,attpm20p", "tcg,tpm_tis-spi";
reg = <0x1>;
- spi-max-frequency = <36000000>;
+ spi-max-frequency = <25000000>;
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts
index 6daa2313f879..568d24265ddf 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts
@@ -201,7 +201,7 @@
tpm@0 {
compatible = "atmel,attpm20p", "tcg,tpm_tis-spi";
reg = <0x0>;
- spi-max-frequency = <36000000>;
+ spi-max-frequency = <25000000>;
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts b/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts
index 6c47f4b47356..9f4d0899a94d 100644
--- a/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx95-15x15-evk.dts
@@ -574,17 +574,17 @@
&scmi_iomuxc {
pinctrl_emdio: emdiogrp {
fsl,pins = <
- IMX95_PAD_ENET2_MDC__NETCMIX_TOP_NETC_MDC 0x57e
- IMX95_PAD_ENET2_MDIO__NETCMIX_TOP_NETC_MDIO 0x97e
+ IMX95_PAD_ENET2_MDC__NETCMIX_TOP_NETC_MDC 0x50e
+ IMX95_PAD_ENET2_MDIO__NETCMIX_TOP_NETC_MDIO 0x90e
>;
};
pinctrl_enetc0: enetc0grp {
fsl,pins = <
- IMX95_PAD_ENET1_TD3__NETCMIX_TOP_ETH0_RGMII_TD3 0x57e
- IMX95_PAD_ENET1_TD2__NETCMIX_TOP_ETH0_RGMII_TD2 0x57e
- IMX95_PAD_ENET1_TD1__NETCMIX_TOP_ETH0_RGMII_TD1 0x57e
- IMX95_PAD_ENET1_TD0__NETCMIX_TOP_ETH0_RGMII_TD0 0x57e
+ IMX95_PAD_ENET1_TD3__NETCMIX_TOP_ETH0_RGMII_TD3 0x50e
+ IMX95_PAD_ENET1_TD2__NETCMIX_TOP_ETH0_RGMII_TD2 0x50e
+ IMX95_PAD_ENET1_TD1__NETCMIX_TOP_ETH0_RGMII_TD1 0x50e
+ IMX95_PAD_ENET1_TD0__NETCMIX_TOP_ETH0_RGMII_TD0 0x50e
IMX95_PAD_ENET1_TX_CTL__NETCMIX_TOP_ETH0_RGMII_TX_CTL 0x57e
IMX95_PAD_ENET1_TXC__NETCMIX_TOP_ETH0_RGMII_TX_CLK 0x58e
IMX95_PAD_ENET1_RX_CTL__NETCMIX_TOP_ETH0_RGMII_RX_CTL 0x57e
@@ -598,10 +598,10 @@
pinctrl_enetc1: enetc1grp {
fsl,pins = <
- IMX95_PAD_ENET2_TD3__NETCMIX_TOP_ETH1_RGMII_TD3 0x57e
- IMX95_PAD_ENET2_TD2__NETCMIX_TOP_ETH1_RGMII_TD2 0x57e
- IMX95_PAD_ENET2_TD1__NETCMIX_TOP_ETH1_RGMII_TD1 0x57e
- IMX95_PAD_ENET2_TD0__NETCMIX_TOP_ETH1_RGMII_TD0 0x57e
+ IMX95_PAD_ENET2_TD3__NETCMIX_TOP_ETH1_RGMII_TD3 0x50e
+ IMX95_PAD_ENET2_TD2__NETCMIX_TOP_ETH1_RGMII_TD2 0x50e
+ IMX95_PAD_ENET2_TD1__NETCMIX_TOP_ETH1_RGMII_TD1 0x50e
+ IMX95_PAD_ENET2_TD0__NETCMIX_TOP_ETH1_RGMII_TD0 0x50e
IMX95_PAD_ENET2_TX_CTL__NETCMIX_TOP_ETH1_RGMII_TX_CTL 0x57e
IMX95_PAD_ENET2_TXC__NETCMIX_TOP_ETH1_RGMII_TX_CLK 0x58e
IMX95_PAD_ENET2_RX_CTL__NETCMIX_TOP_ETH1_RGMII_RX_CTL 0x57e
diff --git a/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts b/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts
index 6886ea766655..d7d845231312 100644
--- a/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx95-19x19-evk.dts
@@ -566,17 +566,17 @@
&scmi_iomuxc {
pinctrl_emdio: emdiogrp{
fsl,pins = <
- IMX95_PAD_ENET1_MDC__NETCMIX_TOP_NETC_MDC 0x57e
- IMX95_PAD_ENET1_MDIO__NETCMIX_TOP_NETC_MDIO 0x97e
+ IMX95_PAD_ENET1_MDC__NETCMIX_TOP_NETC_MDC 0x50e
+ IMX95_PAD_ENET1_MDIO__NETCMIX_TOP_NETC_MDIO 0x90e
>;
};
pinctrl_enetc0: enetc0grp {
fsl,pins = <
- IMX95_PAD_ENET1_TD3__NETCMIX_TOP_ETH0_RGMII_TD3 0x57e
- IMX95_PAD_ENET1_TD2__NETCMIX_TOP_ETH0_RGMII_TD2 0x57e
- IMX95_PAD_ENET1_TD1__NETCMIX_TOP_ETH0_RGMII_TD1 0x57e
- IMX95_PAD_ENET1_TD0__NETCMIX_TOP_ETH0_RGMII_TD0 0x57e
+ IMX95_PAD_ENET1_TD3__NETCMIX_TOP_ETH0_RGMII_TD3 0x50e
+ IMX95_PAD_ENET1_TD2__NETCMIX_TOP_ETH0_RGMII_TD2 0x50e
+ IMX95_PAD_ENET1_TD1__NETCMIX_TOP_ETH0_RGMII_TD1 0x50e
+ IMX95_PAD_ENET1_TD0__NETCMIX_TOP_ETH0_RGMII_TD0 0x50e
IMX95_PAD_ENET1_TX_CTL__NETCMIX_TOP_ETH0_RGMII_TX_CTL 0x57e
IMX95_PAD_ENET1_TXC__NETCMIX_TOP_ETH0_RGMII_TX_CLK 0x58e
IMX95_PAD_ENET1_RX_CTL__NETCMIX_TOP_ETH0_RGMII_RX_CTL 0x57e
diff --git a/arch/arm64/boot/dts/freescale/imx95.dtsi b/arch/arm64/boot/dts/freescale/imx95.dtsi
index 632631a29112..5aecdd9b62ff 100644
--- a/arch/arm64/boot/dts/freescale/imx95.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx95.dtsi
@@ -1708,7 +1708,7 @@
<0x9 0 1 0>;
reg-names = "dbi","atu", "dbi2", "app", "dma", "addr_space";
num-lanes = <1>;
- interrupts = <GIC_SPI 317 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 311 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "dma";
clocks = <&scmi_clk IMX95_CLK_HSIO>,
<&scmi_clk IMX95_CLK_HSIOPLL>,
diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
index ae7a275fd223..cefecb7a23cf 100644
--- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
+++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
@@ -1090,6 +1090,8 @@
};
&pmk8280_rtc {
+ qcom,uefi-rtc-info;
+
status = "okay";
};
diff --git a/arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi b/arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi
index c02fd4d15c96..e3888bc143a0 100644
--- a/arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi
+++ b/arch/arm64/boot/dts/qcom/x1e80100-pmics.dtsi
@@ -224,6 +224,7 @@
reg-names = "rtc", "alarm";
interrupts = <0x0 0x62 0x1 IRQ_TYPE_EDGE_RISING>;
qcom,no-alarm; /* alarm owned by ADSP */
+ qcom,uefi-rtc-info;
};
pmk8550_sdam_2: nvram@7100 {
diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi
index ab232e5c7ad6..4203b335a263 100644
--- a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi
+++ b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi
@@ -379,6 +379,18 @@
<0 RK_PA7 RK_FUNC_GPIO &pcfg_pull_up>;
};
};
+
+ spi1 {
+ spi1_csn0_gpio_pin: spi1-csn0-gpio-pin {
+ rockchip,pins =
+ <3 RK_PB1 RK_FUNC_GPIO &pcfg_pull_up_4ma>;
+ };
+
+ spi1_csn1_gpio_pin: spi1-csn1-gpio-pin {
+ rockchip,pins =
+ <3 RK_PB2 RK_FUNC_GPIO &pcfg_pull_up_4ma>;
+ };
+ };
};
&pmu_io_domains {
@@ -396,6 +408,17 @@
vqmmc-supply = <&vccio_sd>;
};
+&spi1 {
+ /*
+ * Hardware CS has a very slow rise time of about 6us,
+ * causing transmission errors.
+ * With cs-gpios we have a rise time of about 20ns.
+ */
+ cs-gpios = <&gpio3 RK_PB1 GPIO_ACTIVE_LOW>, <&gpio3 RK_PB2 GPIO_ACTIVE_LOW>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&spi1_clk &spi1_csn0_gpio_pin &spi1_csn1_gpio_pin &spi1_miso &spi1_mosi>;
+};
+
&tsadc {
status = "okay";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts
index 3c127c5c2607..a9021c524afb 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts
@@ -30,6 +30,7 @@
fan: gpio_fan {
compatible = "gpio-fan";
+ fan-supply = <&vcc12v_dcin>;
gpios = <&gpio0 RK_PD5 GPIO_ACTIVE_HIGH>;
gpio-fan,speed-map =
< 0 0>,
diff --git a/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts b/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts
index b09e789c75c4..801b40fea4e8 100644
--- a/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts
@@ -211,10 +211,38 @@
status = "okay";
};
+&cpu_b0 {
+ cpu-supply = <&vdd_cpu_big_s0>;
+};
+
+&cpu_b1 {
+ cpu-supply = <&vdd_cpu_big_s0>;
+};
+
+&cpu_b2 {
+ cpu-supply = <&vdd_cpu_big_s0>;
+};
+
+&cpu_b3 {
+ cpu-supply = <&vdd_cpu_big_s0>;
+};
+
&cpu_l0 {
cpu-supply = <&vdd_cpu_lit_s0>;
};
+&cpu_l1 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
+&cpu_l2 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
+&cpu_l3 {
+ cpu-supply = <&vdd_cpu_lit_s0>;
+};
+
&gmac0 {
phy-mode = "rgmii-id";
clock_in_out = "output";
diff --git a/arch/arm64/boot/dts/rockchip/rk3576.dtsi b/arch/arm64/boot/dts/rockchip/rk3576.dtsi
index 1086482f0479..64812e3bcb61 100644
--- a/arch/arm64/boot/dts/rockchip/rk3576.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3576.dtsi
@@ -615,7 +615,7 @@
<0 0 0 2 &pcie1_intc 1>,
<0 0 0 3 &pcie1_intc 2>,
<0 0 0 4 &pcie1_intc 3>;
- linux,pci-domain = <0>;
+ linux,pci-domain = <1>;
max-link-speed = <2>;
num-ib-windows = <8>;
num-viewport = <8>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi
index 7f874c77410c..6584d73660f6 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-base-pinctrl.dtsi
@@ -578,14 +578,14 @@
hdmim0_tx0_scl: hdmim0-tx0-scl {
rockchip,pins =
/* hdmim0_tx0_scl */
- <4 RK_PB7 5 &pcfg_pull_none>;
+ <4 RK_PB7 5 &pcfg_pull_none_drv_level_5_smt>;
};
/omit-if-no-ref/
hdmim0_tx0_sda: hdmim0-tx0-sda {
rockchip,pins =
/* hdmim0_tx0_sda */
- <4 RK_PC0 5 &pcfg_pull_none>;
+ <4 RK_PC0 5 &pcfg_pull_none_drv_level_1_smt>;
};
/omit-if-no-ref/
@@ -640,14 +640,14 @@
hdmim1_tx0_scl: hdmim1-tx0-scl {
rockchip,pins =
/* hdmim1_tx0_scl */
- <0 RK_PD5 11 &pcfg_pull_none>;
+ <0 RK_PD5 11 &pcfg_pull_none_drv_level_5_smt>;
};
/omit-if-no-ref/
hdmim1_tx0_sda: hdmim1-tx0-sda {
rockchip,pins =
/* hdmim1_tx0_sda */
- <0 RK_PD4 11 &pcfg_pull_none>;
+ <0 RK_PD4 11 &pcfg_pull_none_drv_level_1_smt>;
};
/omit-if-no-ref/
@@ -668,14 +668,14 @@
hdmim1_tx1_scl: hdmim1-tx1-scl {
rockchip,pins =
/* hdmim1_tx1_scl */
- <3 RK_PC6 5 &pcfg_pull_none>;
+ <3 RK_PC6 5 &pcfg_pull_none_drv_level_5_smt>;
};
/omit-if-no-ref/
hdmim1_tx1_sda: hdmim1-tx1-sda {
rockchip,pins =
/* hdmim1_tx1_sda */
- <3 RK_PC5 5 &pcfg_pull_none>;
+ <3 RK_PC5 5 &pcfg_pull_none_drv_level_1_smt>;
};
/omit-if-no-ref/
hdmim2_rx_cec: hdmim2-rx-cec {
@@ -709,14 +709,14 @@
hdmim2_tx0_scl: hdmim2-tx0-scl {
rockchip,pins =
/* hdmim2_tx0_scl */
- <3 RK_PC7 5 &pcfg_pull_none>;
+ <3 RK_PC7 5 &pcfg_pull_none_drv_level_5_smt>;
};
/omit-if-no-ref/
hdmim2_tx0_sda: hdmim2-tx0-sda {
rockchip,pins =
/* hdmim2_tx0_sda */
- <3 RK_PD0 5 &pcfg_pull_none>;
+ <3 RK_PD0 5 &pcfg_pull_none_drv_level_1_smt>;
};
/omit-if-no-ref/
@@ -730,14 +730,14 @@
hdmim2_tx1_scl: hdmim2-tx1-scl {
rockchip,pins =
/* hdmim2_tx1_scl */
- <1 RK_PA4 5 &pcfg_pull_none>;
+ <1 RK_PA4 5 &pcfg_pull_none_drv_level_5_smt>;
};
/omit-if-no-ref/
hdmim2_tx1_sda: hdmim2-tx1-sda {
rockchip,pins =
/* hdmim2_tx1_sda */
- <1 RK_PA3 5 &pcfg_pull_none>;
+ <1 RK_PA3 5 &pcfg_pull_none_drv_level_1_smt>;
};
/omit-if-no-ref/
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi
index cc37f082adea..b07543315f87 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi
@@ -321,6 +321,7 @@
bus-width = <4>;
cap-mmc-highspeed;
cap-sd-highspeed;
+ cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
disable-wp;
max-frequency = <150000000>;
no-sdio;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi
index 244c66faa161..fb48ddc04bcb 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-extra-pinctrl.dtsi
@@ -160,14 +160,15 @@
hdmim0_tx1_scl: hdmim0-tx1-scl {
rockchip,pins =
/* hdmim0_tx1_scl */
- <2 RK_PB5 4 &pcfg_pull_none>;
+ <2 RK_PB5 4 &pcfg_pull_none_drv_level_3_smt>;
};
/omit-if-no-ref/
hdmim0_tx1_sda: hdmim0-tx1-sda {
rockchip,pins =
/* hdmim0_tx1_sda */
- <2 RK_PB4 4 &pcfg_pull_none>;
+ <2 RK_PB4 4 &pcfg_pull_none_drv_level_1_smt>;
+
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts
index 8b717c4017a4..b2947b36fada 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts
@@ -474,6 +474,7 @@
bus-width = <4>;
cap-mmc-highspeed;
cap-sd-highspeed;
+ cd-gpios = <&gpio0 RK_PA4 GPIO_ACTIVE_LOW>;
disable-wp;
max-frequency = <150000000>;
no-sdio;
diff --git a/arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi b/arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi
index 5c645437b507..b0475b7c655a 100644
--- a/arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rockchip-pinconf.dtsi
@@ -333,6 +333,41 @@
};
/omit-if-no-ref/
+ pcfg_pull_none_drv_level_1_smt: pcfg-pull-none-drv-level-1-smt {
+ bias-disable;
+ drive-strength = <1>;
+ input-schmitt-enable;
+ };
+
+ /omit-if-no-ref/
+ pcfg_pull_none_drv_level_2_smt: pcfg-pull-none-drv-level-2-smt {
+ bias-disable;
+ drive-strength = <2>;
+ input-schmitt-enable;
+ };
+
+ /omit-if-no-ref/
+ pcfg_pull_none_drv_level_3_smt: pcfg-pull-none-drv-level-3-smt {
+ bias-disable;
+ drive-strength = <3>;
+ input-schmitt-enable;
+ };
+
+ /omit-if-no-ref/
+ pcfg_pull_none_drv_level_4_smt: pcfg-pull-none-drv-level-4-smt {
+ bias-disable;
+ drive-strength = <4>;
+ input-schmitt-enable;
+ };
+
+ /omit-if-no-ref/
+ pcfg_pull_none_drv_level_5_smt: pcfg-pull-none-drv-level-5-smt {
+ bias-disable;
+ drive-strength = <5>;
+ input-schmitt-enable;
+ };
+
+ /omit-if-no-ref/
pcfg_output_high: pcfg-output-high {
output-high;
};
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 7e04a2905ce4..eb5c17d4c7ec 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1444,6 +1444,7 @@ CONFIG_PLATFORM_MHU=y
CONFIG_BCM2835_MBOX=y
CONFIG_QCOM_APCS_IPC=y
CONFIG_MTK_ADSP_MBOX=m
+CONFIG_QCOM_CPUCP_MBOX=m
CONFIG_QCOM_IPCC=y
CONFIG_ROCKCHIP_IOMMU=y
CONFIG_TEGRA_IOMMU_SMMU=y
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5b3362af7d65..8bed9030ad47 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -147,7 +147,7 @@ config X86
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
select ARCH_WANTS_NO_INSTR
select ARCH_WANT_GENERAL_HUGETLB
- select ARCH_WANT_HUGE_PMD_SHARE
+ select ARCH_WANT_HUGE_PMD_SHARE if X86_64
select ARCH_WANT_LD_ORPHAN_WARN
select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP if X86_64
select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP if X86_64
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b7dded3c8113..5cfb5d74dd5f 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -628,6 +628,7 @@
#define MSR_AMD64_OSVW_STATUS 0xc0010141
#define MSR_AMD_PPIN_CTL 0xc00102f0
#define MSR_AMD_PPIN 0xc00102f1
+#define MSR_AMD64_CPUID_FN_7 0xc0011002
#define MSR_AMD64_CPUID_FN_1 0xc0011004
#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index b750ac008b78..329ee185d8cc 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -974,6 +974,16 @@ static void init_amd_zen2(struct cpuinfo_x86 *c)
init_spectral_chicken(c);
fix_erratum_1386(c);
zen2_zenbleed_check(c);
+
+ /* Disable RDSEED on AMD Cyan Skillfish because of an error. */
+ if (c->x86_model == 0x47 && c->x86_stepping == 0x0) {
+ clear_cpu_cap(c, X86_FEATURE_RDSEED);
+ msr_clear_bit(MSR_AMD64_CPUID_FN_7, 18);
+ pr_emerg("RDSEED is not reliable on this platform; disabling.\n");
+ }
+
+ /* Correct misconfigured CPUID on some clients. */
+ clear_cpu_cap(c, X86_FEATURE_INVLPGB);
}
static void init_amd_zen3(struct cpuinfo_x86 *c)
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index a6ab666ef48a..7a50af416cac 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1280,6 +1280,22 @@ static void dpm_async_suspend_parent(struct device *dev, async_func_t func)
dpm_async_with_cleanup(dev->parent, func);
}
+static void dpm_async_suspend_complete_all(struct list_head *device_list)
+{
+ struct device *dev;
+
+ guard(mutex)(&async_wip_mtx);
+
+ list_for_each_entry_reverse(dev, device_list, power.entry) {
+ /*
+ * In case the device is being waited for and async processing
+ * has not started for it yet, let the waiters make progress.
+ */
+ if (!dev->power.work_in_progress)
+ complete_all(&dev->power.completion);
+ }
+}
+
/**
* resume_event - Return a "resume" message for given "suspend" sleep state.
* @sleep_state: PM message representing a sleep state.
@@ -1456,6 +1472,7 @@ static int dpm_noirq_suspend_devices(pm_message_t state)
mutex_lock(&dpm_list_mtx);
if (error || async_error) {
+ dpm_async_suspend_complete_all(&dpm_late_early_list);
/*
* Move all devices to the target list to resume them
* properly.
@@ -1658,6 +1675,7 @@ int dpm_suspend_late(pm_message_t state)
mutex_lock(&dpm_list_mtx);
if (error || async_error) {
+ dpm_async_suspend_complete_all(&dpm_suspended_list);
/*
* Move all devices to the target list to resume them
* properly.
@@ -1951,6 +1969,7 @@ int dpm_suspend(pm_message_t state)
mutex_lock(&dpm_list_mtx);
if (error || async_error) {
+ dpm_async_suspend_complete_all(&dpm_prepared_list);
/*
* Move all devices to the target list to resume them
* properly.
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 7bdc7eb808ea..2592bd19ebc1 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -2198,9 +2198,7 @@ again:
goto out;
}
}
- ret = nbd_start_device(nbd);
- if (ret)
- goto out;
+
if (info->attrs[NBD_ATTR_BACKEND_IDENTIFIER]) {
nbd->backend = nla_strdup(info->attrs[NBD_ATTR_BACKEND_IDENTIFIER],
GFP_KERNEL);
@@ -2216,6 +2214,8 @@ again:
goto out;
}
set_bit(NBD_RT_HAS_BACKEND_FILE, &config->runtime_flags);
+
+ ret = nbd_start_device(nbd);
out:
mutex_unlock(&nbd->config_lock);
if (!ret) {
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index bf490967241a..2505df1f4e69 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -720,11 +720,6 @@ static const struct pci_device_id agp_amd64_pci_table[] = {
MODULE_DEVICE_TABLE(pci, agp_amd64_pci_table);
-static const struct pci_device_id agp_amd64_pci_promisc_table[] = {
- { PCI_DEVICE_CLASS(0, 0) },
- { }
-};
-
static DEFINE_SIMPLE_DEV_PM_OPS(agp_amd64_pm_ops, NULL, agp_amd64_resume);
static struct pci_driver agp_amd64_pci_driver = {
@@ -739,6 +734,7 @@ static struct pci_driver agp_amd64_pci_driver = {
/* Not static due to IOMMU code calling it early. */
int __init agp_amd64_init(void)
{
+ struct pci_dev *pdev = NULL;
int err = 0;
if (agp_off)
@@ -767,9 +763,13 @@ int __init agp_amd64_init(void)
}
/* Look for any AGP bridge */
- agp_amd64_pci_driver.id_table = agp_amd64_pci_promisc_table;
- err = driver_attach(&agp_amd64_pci_driver.driver);
- if (err == 0 && agp_bridges_found == 0) {
+ for_each_pci_dev(pdev)
+ if (pci_find_capability(pdev, PCI_CAP_ID_AGP))
+ pci_add_dynid(&agp_amd64_pci_driver,
+ pdev->vendor, pdev->device,
+ pdev->subsystem_vendor,
+ pdev->subsystem_device, 0, 0, 0);
+ if (agp_bridges_found == 0) {
pci_unregister_driver(&agp_amd64_pci_driver);
err = -ENODEV;
}
diff --git a/drivers/clk/clk-scmi.c b/drivers/clk/clk-scmi.c
index 15510c2ff21c..1b1561c84127 100644
--- a/drivers/clk/clk-scmi.c
+++ b/drivers/clk/clk-scmi.c
@@ -404,6 +404,7 @@ static int scmi_clocks_probe(struct scmi_device *sdev)
const struct scmi_handle *handle = sdev->handle;
struct scmi_protocol_handle *ph;
const struct clk_ops *scmi_clk_ops_db[SCMI_MAX_CLK_OPS] = {};
+ struct scmi_clk *sclks;
if (!handle)
return -ENODEV;
@@ -430,18 +431,21 @@ static int scmi_clocks_probe(struct scmi_device *sdev)
transport_is_atomic = handle->is_transport_atomic(handle,
&atomic_threshold_us);
+ sclks = devm_kcalloc(dev, count, sizeof(*sclks), GFP_KERNEL);
+ if (!sclks)
+ return -ENOMEM;
+
+ for (idx = 0; idx < count; idx++)
+ hws[idx] = &sclks[idx].hw;
+
for (idx = 0; idx < count; idx++) {
- struct scmi_clk *sclk;
+ struct scmi_clk *sclk = &sclks[idx];
const struct clk_ops *scmi_ops;
- sclk = devm_kzalloc(dev, sizeof(*sclk), GFP_KERNEL);
- if (!sclk)
- return -ENOMEM;
-
sclk->info = scmi_proto_clk_ops->info_get(ph, idx);
if (!sclk->info) {
dev_dbg(dev, "invalid clock info for idx %d\n", idx);
- devm_kfree(dev, sclk);
+ hws[idx] = NULL;
continue;
}
@@ -479,13 +483,11 @@ static int scmi_clocks_probe(struct scmi_device *sdev)
if (err) {
dev_err(dev, "failed to register clock %d\n", idx);
devm_kfree(dev, sclk->parent_data);
- devm_kfree(dev, sclk);
hws[idx] = NULL;
} else {
dev_dbg(dev, "Registered clock:%s%s\n",
sclk->info->name,
scmi_ops->enable ? " (atomic ops)" : "");
- hws[idx] = &sclk->hw;
}
}
diff --git a/drivers/clk/imx/clk-imx95-blk-ctl.c b/drivers/clk/imx/clk-imx95-blk-ctl.c
index 25974947ad0c..cc2ee2be1819 100644
--- a/drivers/clk/imx/clk-imx95-blk-ctl.c
+++ b/drivers/clk/imx/clk-imx95-blk-ctl.c
@@ -219,11 +219,15 @@ static const struct imx95_blk_ctl_dev_data lvds_csr_dev_data = {
.clk_reg_offset = 0,
};
+static const char * const disp_engine_parents[] = {
+ "videopll1", "dsi_pll", "ldb_pll_div7"
+};
+
static const struct imx95_blk_ctl_clk_dev_data dispmix_csr_clk_dev_data[] = {
[IMX95_CLK_DISPMIX_ENG0_SEL] = {
.name = "disp_engine0_sel",
- .parent_names = (const char *[]){"videopll1", "dsi_pll", "ldb_pll_div7", },
- .num_parents = 4,
+ .parent_names = disp_engine_parents,
+ .num_parents = ARRAY_SIZE(disp_engine_parents),
.reg = 0,
.bit_idx = 0,
.bit_width = 2,
@@ -232,8 +236,8 @@ static const struct imx95_blk_ctl_clk_dev_data dispmix_csr_clk_dev_data[] = {
},
[IMX95_CLK_DISPMIX_ENG1_SEL] = {
.name = "disp_engine1_sel",
- .parent_names = (const char *[]){"videopll1", "dsi_pll", "ldb_pll_div7", },
- .num_parents = 4,
+ .parent_names = disp_engine_parents,
+ .num_parents = ARRAY_SIZE(disp_engine_parents),
.reg = 0,
.bit_idx = 2,
.bit_width = 2,
diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c
index b781601946db..63a70f285cce 100644
--- a/drivers/gpu/drm/drm_framebuffer.c
+++ b/drivers/gpu/drm/drm_framebuffer.c
@@ -862,11 +862,23 @@ EXPORT_SYMBOL_FOR_TESTS_ONLY(drm_framebuffer_free);
int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb,
const struct drm_framebuffer_funcs *funcs)
{
+ unsigned int i;
int ret;
+ bool exists;
if (WARN_ON_ONCE(fb->dev != dev || !fb->format))
return -EINVAL;
+ for (i = 0; i < fb->format->num_planes; i++) {
+ if (drm_WARN_ON_ONCE(dev, fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i)))
+ fb->internal_flags &= ~DRM_FRAMEBUFFER_HAS_HANDLE_REF(i);
+ if (fb->obj[i]) {
+ exists = drm_gem_object_handle_get_if_exists_unlocked(fb->obj[i]);
+ if (exists)
+ fb->internal_flags |= DRM_FRAMEBUFFER_HAS_HANDLE_REF(i);
+ }
+ }
+
INIT_LIST_HEAD(&fb->filp_head);
fb->funcs = funcs;
@@ -875,7 +887,7 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb,
ret = __drm_mode_object_add(dev, &fb->base, DRM_MODE_OBJECT_FB,
false, drm_framebuffer_free);
if (ret)
- goto out;
+ goto err;
mutex_lock(&dev->mode_config.fb_lock);
dev->mode_config.num_fb++;
@@ -883,7 +895,16 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb,
mutex_unlock(&dev->mode_config.fb_lock);
drm_mode_object_register(dev, &fb->base);
-out:
+
+ return 0;
+
+err:
+ for (i = 0; i < fb->format->num_planes; i++) {
+ if (fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i)) {
+ drm_gem_object_handle_put_unlocked(fb->obj[i]);
+ fb->internal_flags &= ~DRM_FRAMEBUFFER_HAS_HANDLE_REF(i);
+ }
+ }
return ret;
}
EXPORT_SYMBOL(drm_framebuffer_init);
@@ -960,6 +981,12 @@ EXPORT_SYMBOL(drm_framebuffer_unregister_private);
void drm_framebuffer_cleanup(struct drm_framebuffer *fb)
{
struct drm_device *dev = fb->dev;
+ unsigned int i;
+
+ for (i = 0; i < fb->format->num_planes; i++) {
+ if (fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i))
+ drm_gem_object_handle_put_unlocked(fb->obj[i]);
+ }
mutex_lock(&dev->mode_config.fb_lock);
list_del(&fb->head);
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 4bf0a76bb35e..ac0524595bd6 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -223,23 +223,34 @@ static void drm_gem_object_handle_get(struct drm_gem_object *obj)
}
/**
- * drm_gem_object_handle_get_unlocked - acquire reference on user-space handles
+ * drm_gem_object_handle_get_if_exists_unlocked - acquire reference on user-space handle, if any
* @obj: GEM object
*
- * Acquires a reference on the GEM buffer object's handle. Required
- * to keep the GEM object alive. Call drm_gem_object_handle_put_unlocked()
- * to release the reference.
+ * Acquires a reference on the GEM buffer object's handle. Required to keep
+ * the GEM object alive. Call drm_gem_object_handle_put_if_exists_unlocked()
+ * to release the reference. Does nothing if the buffer object has no handle.
+ *
+ * Returns:
+ * True if a handle exists, or false otherwise
*/
-void drm_gem_object_handle_get_unlocked(struct drm_gem_object *obj)
+bool drm_gem_object_handle_get_if_exists_unlocked(struct drm_gem_object *obj)
{
struct drm_device *dev = obj->dev;
guard(mutex)(&dev->object_name_lock);
- drm_WARN_ON(dev, !obj->handle_count); /* first ref taken in create-tail helper */
+ /*
+ * First ref taken during GEM object creation, if any. Some
+ * drivers set up internal framebuffers with GEM objects that
+ * do not have a GEM handle. Hence, this counter can be zero.
+ */
+ if (!obj->handle_count)
+ return false;
+
drm_gem_object_handle_get(obj);
+
+ return true;
}
-EXPORT_SYMBOL(drm_gem_object_handle_get_unlocked);
/**
* drm_gem_object_handle_free - release resources bound to userspace handles
@@ -272,7 +283,7 @@ static void drm_gem_object_exported_dma_buf_free(struct drm_gem_object *obj)
}
/**
- * drm_gem_object_handle_put_unlocked - releases reference on user-space handles
+ * drm_gem_object_handle_put_unlocked - releases reference on user-space handle
* @obj: GEM object
*
* Releases a reference on the GEM buffer object's handle. Possibly releases
@@ -283,14 +294,14 @@ void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj)
struct drm_device *dev = obj->dev;
bool final = false;
- if (WARN_ON(READ_ONCE(obj->handle_count) == 0))
+ if (drm_WARN_ON(dev, READ_ONCE(obj->handle_count) == 0))
return;
/*
- * Must bump handle count first as this may be the last
- * ref, in which case the object would disappear before we
- * checked for a name
- */
+ * Must bump handle count first as this may be the last
+ * ref, in which case the object would disappear before
+ * we checked for a name.
+ */
mutex_lock(&dev->object_name_lock);
if (--obj->handle_count == 0) {
@@ -303,7 +314,6 @@ void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj)
if (final)
drm_gem_object_put(obj);
}
-EXPORT_SYMBOL(drm_gem_object_handle_put_unlocked);
/*
* Called at device or object close to release the file's
@@ -315,6 +325,9 @@ drm_gem_object_release_handle(int id, void *ptr, void *data)
struct drm_file *file_priv = data;
struct drm_gem_object *obj = ptr;
+ if (drm_WARN_ON(obj->dev, !data))
+ return 0;
+
if (obj->funcs->close)
obj->funcs->close(obj, file_priv);
@@ -435,7 +448,7 @@ drm_gem_handle_create_tail(struct drm_file *file_priv,
idr_preload(GFP_KERNEL);
spin_lock(&file_priv->table_lock);
- ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_NOWAIT);
+ ret = idr_alloc(&file_priv->object_idr, NULL, 1, 0, GFP_NOWAIT);
spin_unlock(&file_priv->table_lock);
idr_preload_end();
@@ -456,6 +469,11 @@ drm_gem_handle_create_tail(struct drm_file *file_priv,
goto err_revoke;
}
+ /* mirrors drm_gem_handle_delete to avoid races */
+ spin_lock(&file_priv->table_lock);
+ obj = idr_replace(&file_priv->object_idr, obj, handle);
+ WARN_ON(obj != NULL);
+ spin_unlock(&file_priv->table_lock);
*handlep = handle;
return 0;
diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
index 14a87788695d..6f72e7a0f427 100644
--- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c
+++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
@@ -99,7 +99,7 @@ void drm_gem_fb_destroy(struct drm_framebuffer *fb)
unsigned int i;
for (i = 0; i < fb->format->num_planes; i++)
- drm_gem_object_handle_put_unlocked(fb->obj[i]);
+ drm_gem_object_put(fb->obj[i]);
drm_framebuffer_cleanup(fb);
kfree(fb);
@@ -182,10 +182,8 @@ int drm_gem_fb_init_with_funcs(struct drm_device *dev,
if (!objs[i]) {
drm_dbg_kms(dev, "Failed to lookup GEM object\n");
ret = -ENOENT;
- goto err_gem_object_handle_put_unlocked;
+ goto err_gem_object_put;
}
- drm_gem_object_handle_get_unlocked(objs[i]);
- drm_gem_object_put(objs[i]);
min_size = (height - 1) * mode_cmd->pitches[i]
+ drm_format_info_min_pitch(info, i, width)
@@ -195,22 +193,22 @@ int drm_gem_fb_init_with_funcs(struct drm_device *dev,
drm_dbg_kms(dev,
"GEM object size (%zu) smaller than minimum size (%u) for plane %d\n",
objs[i]->size, min_size, i);
- drm_gem_object_handle_put_unlocked(objs[i]);
+ drm_gem_object_put(objs[i]);
ret = -EINVAL;
- goto err_gem_object_handle_put_unlocked;
+ goto err_gem_object_put;
}
}
ret = drm_gem_fb_init(dev, fb, mode_cmd, objs, i, funcs);
if (ret)
- goto err_gem_object_handle_put_unlocked;
+ goto err_gem_object_put;
return 0;
-err_gem_object_handle_put_unlocked:
+err_gem_object_put:
while (i > 0) {
--i;
- drm_gem_object_handle_put_unlocked(objs[i]);
+ drm_gem_object_put(objs[i]);
}
return ret;
}
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index be77d61a16ce..60c282881958 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -161,7 +161,7 @@ void drm_sysfs_lease_event(struct drm_device *dev);
/* drm_gem.c */
int drm_gem_init(struct drm_device *dev);
-void drm_gem_object_handle_get_unlocked(struct drm_gem_object *obj);
+bool drm_gem_object_handle_get_if_exists_unlocked(struct drm_gem_object *obj);
void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj);
int drm_gem_handle_create_tail(struct drm_file *file_priv,
struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs
index dd55b1cb764d..18492daae4b3 100644
--- a/drivers/gpu/drm/drm_panic_qr.rs
+++ b/drivers/gpu/drm/drm_panic_qr.rs
@@ -27,7 +27,7 @@
//! * <https://github.com/erwanvivien/fast_qr>
//! * <https://github.com/bjguillot/qr>
-use kernel::{prelude::*, str::CStr};
+use kernel::prelude::*;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd)]
struct Version(usize);
diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index ba7b8938b17c..166ee11831ab 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -1938,7 +1938,7 @@ static int get_init_otp_deassert_fragment_len(struct intel_display *display,
int index, len;
if (drm_WARN_ON(display->drm,
- !data || panel->vbt.dsi.seq_version != 1))
+ !data || panel->vbt.dsi.seq_version >= 3))
return 0;
/* index = 1 to skip sequence byte */
@@ -1961,7 +1961,7 @@ static int get_init_otp_deassert_fragment_len(struct intel_display *display,
}
/*
- * Some v1 VBT MIPI sequences do the deassert in the init OTP sequence.
+ * Some v1/v2 VBT MIPI sequences do the deassert in the init OTP sequence.
* The deassert must be done before calling intel_dsi_device_ready, so for
* these devices we split the init OTP sequence into a deassert sequence and
* the actual init OTP part.
@@ -1972,9 +1972,9 @@ static void vlv_fixup_mipi_sequences(struct intel_display *display,
u8 *init_otp;
int len;
- /* Limit this to v1 vid-mode sequences */
+ /* Limit this to v1/v2 vid-mode sequences */
if (panel->vbt.dsi.config->is_cmd_mode ||
- panel->vbt.dsi.seq_version != 1)
+ panel->vbt.dsi.seq_version >= 3)
return;
/* Only do this if there are otp and assert seqs and no deassert seq */
diff --git a/drivers/gpu/drm/imagination/pvr_power.c b/drivers/gpu/drm/imagination/pvr_power.c
index 41f5d89e78b8..3e349d039fc0 100644
--- a/drivers/gpu/drm/imagination/pvr_power.c
+++ b/drivers/gpu/drm/imagination/pvr_power.c
@@ -386,13 +386,13 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset)
if (!err) {
if (hard_reset) {
pvr_dev->fw_dev.booted = false;
- WARN_ON(pm_runtime_force_suspend(from_pvr_device(pvr_dev)->dev));
+ WARN_ON(pvr_power_device_suspend(from_pvr_device(pvr_dev)->dev));
err = pvr_fw_hard_reset(pvr_dev);
if (err)
goto err_device_lost;
- err = pm_runtime_force_resume(from_pvr_device(pvr_dev)->dev);
+ err = pvr_power_device_resume(from_pvr_device(pvr_dev)->dev);
pvr_dev->fw_dev.booted = true;
if (err)
goto err_device_lost;
diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
index 200e65a7cefc..c7869a639bef 100644
--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c
+++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
@@ -314,14 +314,10 @@ nouveau_debugfs_fini(struct nouveau_drm *drm)
drm->debugfs = NULL;
}
-int
+void
nouveau_module_debugfs_init(void)
{
nouveau_debugfs_root = debugfs_create_dir("nouveau", NULL);
- if (IS_ERR(nouveau_debugfs_root))
- return PTR_ERR(nouveau_debugfs_root);
-
- return 0;
}
void
diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.h b/drivers/gpu/drm/nouveau/nouveau_debugfs.h
index b7617b344ee2..d05ed0e641c4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.h
+++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.h
@@ -24,7 +24,7 @@ extern void nouveau_debugfs_fini(struct nouveau_drm *);
extern struct dentry *nouveau_debugfs_root;
-int nouveau_module_debugfs_init(void);
+void nouveau_module_debugfs_init(void);
void nouveau_module_debugfs_fini(void);
#else
static inline void
@@ -42,10 +42,9 @@ nouveau_debugfs_fini(struct nouveau_drm *drm)
{
}
-static inline int
+static inline void
nouveau_module_debugfs_init(void)
{
- return 0;
}
static inline void
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 0c82a63cd49d..1527b801f013 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -1461,9 +1461,7 @@ nouveau_drm_init(void)
if (!nouveau_modeset)
return 0;
- ret = nouveau_module_debugfs_init();
- if (ret)
- return ret;
+ nouveau_module_debugfs_init();
#ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER
platform_driver_register(&nouveau_platform_driver);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c
index baf42339f93e..588cb4ab85cb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c
@@ -719,7 +719,6 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA *caps)
union acpi_object argv4 = {
.buffer.type = ACPI_TYPE_BUFFER,
.buffer.length = 4,
- .buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL),
}, *obj;
caps->status = 0xffff;
@@ -727,17 +726,22 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA *caps)
if (!acpi_check_dsm(handle, &NVOP_DSM_GUID, NVOP_DSM_REV, BIT_ULL(0x1a)))
return;
+ argv4.buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL);
+ if (!argv4.buffer.pointer)
+ return;
+
obj = acpi_evaluate_dsm(handle, &NVOP_DSM_GUID, NVOP_DSM_REV, 0x1a, &argv4);
if (!obj)
- return;
+ goto done;
if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) ||
WARN_ON(obj->buffer.length != 4))
- return;
+ goto done;
caps->status = 0;
caps->optimusCaps = *(u32 *)obj->buffer.pointer;
+done:
ACPI_FREE(obj);
kfree(argv4.buffer.pointer);
@@ -754,24 +758,28 @@ r535_gsp_acpi_jt(acpi_handle handle, JT_METHOD_DATA *jt)
union acpi_object argv4 = {
.buffer.type = ACPI_TYPE_BUFFER,
.buffer.length = sizeof(caps),
- .buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL),
}, *obj;
jt->status = 0xffff;
+ argv4.buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL);
+ if (!argv4.buffer.pointer)
+ return;
+
obj = acpi_evaluate_dsm(handle, &JT_DSM_GUID, JT_DSM_REV, 0x1, &argv4);
if (!obj)
- return;
+ goto done;
if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) ||
WARN_ON(obj->buffer.length != 4))
- return;
+ goto done;
jt->status = 0;
jt->jtCaps = *(u32 *)obj->buffer.pointer;
jt->jtRevId = (jt->jtCaps & 0xfff00000) >> 20;
jt->bSBIOSCaps = 0;
+done:
ACPI_FREE(obj);
kfree(argv4.buffer.pointer);
@@ -1744,6 +1752,13 @@ r535_gsp_fini(struct nvkm_gsp *gsp, bool suspend)
nvkm_gsp_sg_free(gsp->subdev.device, &gsp->sr.sgt);
return ret;
}
+
+ /*
+ * TODO: Debug the GSP firmware / RPC handling to find out why
+ * without this Turing (but none of the other architectures)
+ * ends up resetting all channels after resume.
+ */
+ msleep(50);
}
ret = r535_gsp_rpc_unloading_guest_driver(gsp, suspend);
diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c
index 2d9a0a3f6c38..7a38664e890e 100644
--- a/drivers/gpu/drm/tegra/nvdec.c
+++ b/drivers/gpu/drm/tegra/nvdec.c
@@ -261,10 +261,8 @@ static int nvdec_load_falcon_firmware(struct nvdec *nvdec)
if (!client->group) {
virt = dma_alloc_coherent(nvdec->dev, size, &iova, GFP_KERNEL);
-
- err = dma_mapping_error(nvdec->dev, iova);
- if (err < 0)
- return err;
+ if (!virt)
+ return -ENOMEM;
} else {
virt = tegra_drm_alloc(tegra, size, &iova);
if (IS_ERR(virt))
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 7a8af2311318..11e60d687572 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -171,14 +171,32 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss)
#define XE_DEVCOREDUMP_CHUNK_MAX (SZ_512M + SZ_1G)
+/**
+ * xe_devcoredump_read() - Read data from the Xe device coredump snapshot
+ * @buffer: Destination buffer to copy the coredump data into
+ * @offset: Offset in the coredump data to start reading from
+ * @count: Number of bytes to read
+ * @data: Pointer to the xe_devcoredump structure
+ * @datalen: Length of the data (unused)
+ *
+ * Reads a chunk of the coredump snapshot data into the provided buffer.
+ * If the devcoredump is smaller than 1.5 GB (XE_DEVCOREDUMP_CHUNK_MAX),
+ * it is read directly from a pre-written buffer. For larger devcoredumps,
+ * the pre-written buffer must be periodically repopulated from the snapshot
+ * state due to kmalloc size limitations.
+ *
+ * Return: Number of bytes copied on success, or a negative error code on failure.
+ */
static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
size_t count, void *data, size_t datalen)
{
struct xe_devcoredump *coredump = data;
struct xe_devcoredump_snapshot *ss;
- ssize_t byte_copied;
+ ssize_t byte_copied = 0;
u32 chunk_offset;
ssize_t new_chunk_position;
+ bool pm_needed = false;
+ int ret = 0;
if (!coredump)
return -ENODEV;
@@ -188,20 +206,19 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
/* Ensure delayed work is captured before continuing */
flush_work(&ss->work);
- if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX)
+ pm_needed = ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX;
+ if (pm_needed)
xe_pm_runtime_get(gt_to_xe(ss->gt));
mutex_lock(&coredump->lock);
if (!ss->read.buffer) {
- mutex_unlock(&coredump->lock);
- return -ENODEV;
+ ret = -ENODEV;
+ goto unlock;
}
- if (offset >= ss->read.size) {
- mutex_unlock(&coredump->lock);
- return 0;
- }
+ if (offset >= ss->read.size)
+ goto unlock;
new_chunk_position = div_u64_rem(offset,
XE_DEVCOREDUMP_CHUNK_MAX,
@@ -221,12 +238,13 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
ss->read.size - offset;
memcpy(buffer, ss->read.buffer + chunk_offset, byte_copied);
+unlock:
mutex_unlock(&coredump->lock);
- if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX)
+ if (pm_needed)
xe_pm_runtime_put(gt_to_xe(ss->gt));
- return byte_copied;
+ return byte_copied ? byte_copied : ret;
}
static void xe_devcoredump_free(void *data)
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 10622ca471a2..6717a636b1d9 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -444,6 +444,7 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue)
#define PF_MULTIPLIER 8
pf_queue->num_dw =
(num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER;
+ pf_queue->num_dw = roundup_pow_of_two(pf_queue->num_dw);
#undef PF_MULTIPLIER
pf_queue->gt = gt;
diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
index 63db66df064b..023ed6a6b49d 100644
--- a/drivers/gpu/drm/xe/xe_lmtt.c
+++ b/drivers/gpu/drm/xe/xe_lmtt.c
@@ -78,6 +78,9 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level
}
lmtt_assert(lmtt, xe_bo_is_vram(bo));
+ lmtt_debug(lmtt, "level=%u addr=%#llx\n", level, (u64)xe_bo_main_addr(bo, XE_PAGE_SIZE));
+
+ xe_map_memset(lmtt_to_xe(lmtt), &bo->vmap, 0, 0, bo->size);
pt->level = level;
pt->bo = bo;
@@ -91,6 +94,9 @@ out:
static void lmtt_pt_free(struct xe_lmtt_pt *pt)
{
+ lmtt_debug(&pt->bo->tile->sriov.pf.lmtt, "level=%u addr=%llx\n",
+ pt->level, (u64)xe_bo_main_addr(pt->bo, XE_PAGE_SIZE));
+
xe_bo_unpin_map_no_vm(pt->bo);
kfree(pt);
}
@@ -226,9 +232,14 @@ static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt,
switch (lmtt->ops->lmtt_pte_size(level)) {
case sizeof(u32):
+ lmtt_assert(lmtt, !overflows_type(pte, u32));
+ lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u32), u32));
+
xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u32), u32, pte);
break;
case sizeof(u64):
+ lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u64), u64));
+
xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u64), u64, pte);
break;
default:
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 7acdc4c78866..66bc02302c55 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -863,7 +863,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it))
xe_res_next(&src_it, src_L0);
else
- emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs,
+ emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs || use_comp_pat,
&src_it, src_L0, src);
if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it))
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index e4742e27e2cd..da6793c2f991 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -20,7 +20,7 @@
struct xe_modparam xe_modparam = {
.probe_display = true,
- .guc_log_level = 3,
+ .guc_log_level = IS_ENABLED(CONFIG_DRM_XE_DEBUG) ? 3 : 1,
.force_probe = CONFIG_DRM_XE_FORCE_PROBE,
.wedged_mode = 1,
.svm_notifier_size = 512,
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index ac4beaed58ff..278af53c74dc 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -140,7 +140,6 @@ static const struct xe_graphics_desc graphics_xelpg = {
.has_asid = 1, \
.has_atomic_enable_pte_bit = 1, \
.has_flat_ccs = 1, \
- .has_indirect_ring_state = 1, \
.has_range_tlb_invalidation = 1, \
.has_usm = 1, \
.has_64bit_timestamp = 1, \
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index ff749edc005b..ad263de44111 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -134,7 +134,7 @@ int xe_pm_suspend(struct xe_device *xe)
/* FIXME: Super racey... */
err = xe_bo_evict_all(xe);
if (err)
- goto err_pxp;
+ goto err_display;
for_each_gt(gt, xe, id) {
err = xe_gt_suspend(gt);
@@ -151,7 +151,6 @@ int xe_pm_suspend(struct xe_device *xe)
err_display:
xe_display_pm_resume(xe);
-err_pxp:
xe_pxp_pm_resume(xe->pxp);
err:
drm_dbg(&xe->drm, "Device suspend failed %d\n", err);
@@ -753,11 +752,13 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
}
/**
- * xe_pm_set_vram_threshold - Set a vram threshold for allowing/blocking D3Cold
+ * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold
* @xe: xe device instance
- * @threshold: VRAM size in bites for the D3cold threshold
+ * @threshold: VRAM size in MiB for the D3cold threshold
*
- * Returns 0 for success, negative error code otherwise.
+ * Return:
+ * * 0 - success
+ * * -EINVAL - invalid argument
*/
int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold)
{
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 2741849bbf4d..a6612105201a 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -114,10 +114,10 @@ struct fw_blobs_by_type {
#define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED
#define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \
- fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 44, 1)) \
- fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 44, 1)) \
+ fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 45, 2)) \
+ fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 45, 2)) \
fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \
- fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 44, 1)) \
+ fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 45, 2)) \
fw_def(DG1, GT_TYPE_ANY, major_ver(i915, guc, dg1, 70, 44, 1)) \
fw_def(ALDERLAKE_N, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \
fw_def(ALDERLAKE_P, GT_TYPE_ANY, major_ver(i915, guc, adlp, 70, 44, 1)) \
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 69c1d7fc695e..6d70109fcc43 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -38,10 +38,10 @@
GRAPHICS_VERSION(2004)
GRAPHICS_VERSION_RANGE(3000, 3001)
22019338487 MEDIA_VERSION(2000)
- GRAPHICS_VERSION(2001)
+ GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf)
MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf)
22019338487_display PLATFORM(LUNARLAKE)
-16023588340 GRAPHICS_VERSION(2001)
+16023588340 GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf)
14019789679 GRAPHICS_VERSION(1255)
GRAPHICS_VERSION_RANGE(1270, 2004)
no_media_l3 MEDIA_VERSION(3000)
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index b348d0464314..b31b8a2fd540 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1883,9 +1883,12 @@ u8 *hid_alloc_report_buf(struct hid_report *report, gfp_t flags)
/*
* 7 extra bytes are necessary to achieve proper functionality
* of implement() working on 8 byte chunks
+ * 1 extra byte for the report ID if it is null (not used) so
+ * we can reserve that extra byte in the first position of the buffer
+ * when sending it to .raw_request()
*/
- u32 len = hid_report_len(report) + 7;
+ u32 len = hid_report_len(report) + 7 + (report->id == 0);
return kzalloc(len, flags);
}
@@ -1973,7 +1976,7 @@ static struct hid_report *hid_get_report(struct hid_report_enum *report_enum,
int __hid_request(struct hid_device *hid, struct hid_report *report,
enum hid_class_request reqtype)
{
- char *buf;
+ char *buf, *data_buf;
int ret;
u32 len;
@@ -1981,13 +1984,19 @@ int __hid_request(struct hid_device *hid, struct hid_report *report,
if (!buf)
return -ENOMEM;
+ data_buf = buf;
len = hid_report_len(report);
+ if (report->id == 0) {
+ /* reserve the first byte for the report ID */
+ data_buf++;
+ len++;
+ }
+
if (reqtype == HID_REQ_SET_REPORT)
- hid_output_report(report, buf);
+ hid_output_report(report, data_buf);
- ret = hid->ll_driver->raw_request(hid, report->id, buf, len,
- report->type, reqtype);
+ ret = hid_hw_raw_request(hid, report->id, buf, len, report->type, reqtype);
if (ret < 0) {
dbg_hid("unable to complete request: %d\n", ret);
goto out;
diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index c6b6b1029540..4424c0512bae 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -3299,7 +3299,7 @@ static const char *keys[KEY_MAX + 1] = {
[BTN_STYLUS2] = "Stylus2", [BTN_TOOL_DOUBLETAP] = "ToolDoubleTap",
[BTN_TOOL_TRIPLETAP] = "ToolTripleTap", [BTN_TOOL_QUADTAP] = "ToolQuadrupleTap",
[BTN_GEAR_DOWN] = "BtnGearDown", [BTN_GEAR_UP] = "BtnGearUp",
- [BTN_WHEEL] = "BtnWheel", [KEY_OK] = "Ok",
+ [KEY_OK] = "Ok",
[KEY_SELECT] = "Select", [KEY_GOTO] = "Goto",
[KEY_CLEAR] = "Clear", [KEY_POWER2] = "Power2",
[KEY_OPTION] = "Option", [KEY_INFO] = "Info",
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index ec84ba5e93e5..ff7595caf440 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -2742,7 +2742,11 @@ static unsigned long __evict_a_few(unsigned long nr_buffers)
__make_buffer_clean(b);
__free_buffer_wake(b);
- cond_resched();
+ if (need_resched()) {
+ dm_bufio_unlock(c);
+ cond_resched();
+ dm_bufio_lock(c);
+ }
}
dm_bufio_unlock(c);
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index bd694910b01b..7f524a26cebc 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -2366,8 +2366,7 @@ static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats)
if (!bitmap)
return -ENOENT;
- if (!bitmap->mddev->bitmap_info.external &&
- !bitmap->storage.sb_page)
+ if (!bitmap->storage.sb_page)
return -EINVAL;
sb = kmap_local_page(bitmap->storage.sb_page);
stats->sync_size = le64_to_cpu(sb->sync_size);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 19c5a0ce5a40..64b8176907a9 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1399,7 +1399,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
}
read_bio = bio_alloc_clone(mirror->rdev->bdev, bio, gfp,
&mddev->bio_set);
-
+ read_bio->bi_opf &= ~REQ_NOWAIT;
r1_bio->bios[rdisk] = read_bio;
read_bio->bi_iter.bi_sector = r1_bio->sector +
@@ -1649,6 +1649,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
wait_for_serialization(rdev, r1_bio);
}
+ mbio->bi_opf &= ~REQ_NOWAIT;
r1_bio->bios[i] = mbio;
mbio->bi_iter.bi_sector = (r1_bio->sector + rdev->data_offset);
@@ -3428,6 +3429,7 @@ static int raid1_reshape(struct mddev *mddev)
/* ok, everything is stopped */
oldpool = conf->r1bio_pool;
conf->r1bio_pool = newpool;
+ init_waitqueue_head(&conf->r1bio_pool.wait);
for (d = d2 = 0; d < conf->raid_disks; d++) {
struct md_rdev *rdev = conf->mirrors[d].rdev;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index b74780af4c22..c9bd2005bfd0 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1182,8 +1182,11 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
}
}
- if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors))
+ if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors)) {
+ raid_end_bio_io(r10_bio);
return;
+ }
+
rdev = read_balance(conf, r10_bio, &max_sectors);
if (!rdev) {
if (err_rdev) {
@@ -1221,6 +1224,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
r10_bio->master_bio = bio;
}
read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set);
+ read_bio->bi_opf &= ~REQ_NOWAIT;
r10_bio->devs[slot].bio = read_bio;
r10_bio->devs[slot].rdev = rdev;
@@ -1256,6 +1260,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
conf->mirrors[devnum].rdev;
mbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO, &mddev->bio_set);
+ mbio->bi_opf &= ~REQ_NOWAIT;
if (replacement)
r10_bio->devs[n_copy].repl_bio = mbio;
else
@@ -1370,8 +1375,11 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
}
sectors = r10_bio->sectors;
- if (!regular_request_wait(mddev, conf, bio, sectors))
+ if (!regular_request_wait(mddev, conf, bio, sectors)) {
+ raid_end_bio_io(r10_bio);
return;
+ }
+
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
(mddev->reshape_backwards
? (bio->bi_iter.bi_sector < conf->reshape_safe &&
diff --git a/drivers/pci/controller/pci-host-common.c b/drivers/pci/controller/pci-host-common.c
index b0992325dd65..b37052863847 100644
--- a/drivers/pci/controller/pci-host-common.c
+++ b/drivers/pci/controller/pci-host-common.c
@@ -64,13 +64,13 @@ int pci_host_common_init(struct platform_device *pdev,
of_pci_check_probe_only();
+ platform_set_drvdata(pdev, bridge);
+
/* Parse and map our Configuration Space windows */
cfg = gen_pci_init(dev, bridge, ops);
if (IS_ERR(cfg))
return PTR_ERR(cfg);
- platform_set_drvdata(pdev, bridge);
-
bridge->sysdata = cfg;
bridge->ops = (struct pci_ops *)&ops->pci_ops;
bridge->enable_device = ops->enable_device;
diff --git a/drivers/pci/controller/pcie-apple.c b/drivers/pci/controller/pcie-apple.c
index 77fe73976654..0380d300adca 100644
--- a/drivers/pci/controller/pcie-apple.c
+++ b/drivers/pci/controller/pcie-apple.c
@@ -187,6 +187,7 @@ struct apple_pcie {
const struct hw_info *hw;
unsigned long *bitmap;
struct list_head ports;
+ struct list_head entry;
struct completion event;
struct irq_fwspec fwspec;
u32 nvecs;
@@ -205,6 +206,9 @@ struct apple_pcie_port {
int idx;
};
+static LIST_HEAD(pcie_list);
+static DEFINE_MUTEX(pcie_list_lock);
+
static void rmw_set(u32 set, void __iomem *addr)
{
writel_relaxed(readl_relaxed(addr) | set, addr);
@@ -720,13 +724,45 @@ static int apple_msi_init(struct apple_pcie *pcie)
return 0;
}
+static void apple_pcie_register(struct apple_pcie *pcie)
+{
+ guard(mutex)(&pcie_list_lock);
+
+ list_add_tail(&pcie->entry, &pcie_list);
+}
+
+static void apple_pcie_unregister(struct apple_pcie *pcie)
+{
+ guard(mutex)(&pcie_list_lock);
+
+ list_del(&pcie->entry);
+}
+
+static struct apple_pcie *apple_pcie_lookup(struct device *dev)
+{
+ struct apple_pcie *pcie;
+
+ guard(mutex)(&pcie_list_lock);
+
+ list_for_each_entry(pcie, &pcie_list, entry) {
+ if (pcie->dev == dev)
+ return pcie;
+ }
+
+ return NULL;
+}
+
static struct apple_pcie_port *apple_pcie_get_port(struct pci_dev *pdev)
{
struct pci_config_window *cfg = pdev->sysdata;
- struct apple_pcie *pcie = cfg->priv;
+ struct apple_pcie *pcie;
struct pci_dev *port_pdev;
struct apple_pcie_port *port;
+ pcie = apple_pcie_lookup(cfg->parent);
+ if (WARN_ON(!pcie))
+ return NULL;
+
/* Find the root port this device is on */
port_pdev = pcie_find_root_port(pdev);
@@ -806,10 +842,14 @@ static void apple_pcie_disable_device(struct pci_host_bridge *bridge, struct pci
static int apple_pcie_init(struct pci_config_window *cfg)
{
- struct apple_pcie *pcie = cfg->priv;
struct device *dev = cfg->parent;
+ struct apple_pcie *pcie;
int ret;
+ pcie = apple_pcie_lookup(dev);
+ if (WARN_ON(!pcie))
+ return -ENOENT;
+
for_each_available_child_of_node_scoped(dev->of_node, of_port) {
ret = apple_pcie_setup_port(pcie, of_port);
if (ret) {
@@ -852,13 +892,18 @@ static int apple_pcie_probe(struct platform_device *pdev)
mutex_init(&pcie->lock);
INIT_LIST_HEAD(&pcie->ports);
- dev_set_drvdata(dev, pcie);
ret = apple_msi_init(pcie);
if (ret)
return ret;
- return pci_host_common_init(pdev, &apple_pcie_cfg_ecam_ops);
+ apple_pcie_register(pcie);
+
+ ret = pci_host_common_init(pdev, &apple_pcie_cfg_ecam_ops);
+ if (ret)
+ apple_pcie_unregister(pcie);
+
+ return ret;
}
static const struct of_device_id apple_pcie_of_match[] = {
diff --git a/drivers/pci/ecam.c b/drivers/pci/ecam.c
index 2c5e6446e00e..260b7de2dbd5 100644
--- a/drivers/pci/ecam.c
+++ b/drivers/pci/ecam.c
@@ -84,8 +84,6 @@ struct pci_config_window *pci_ecam_create(struct device *dev,
goto err_exit_iomap;
}
- cfg->priv = dev_get_drvdata(dev);
-
if (ops->init) {
err = ops->init(cfg);
if (err)
diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c
index 6ede55a7c5e6..d686488f4111 100644
--- a/drivers/pci/msi/msi.c
+++ b/drivers/pci/msi/msi.c
@@ -934,10 +934,12 @@ int pci_msix_write_tph_tag(struct pci_dev *pdev, unsigned int index, u16 tag)
if (!pdev->msix_enabled)
return -ENXIO;
- guard(msi_descs_lock)(&pdev->dev);
virq = msi_get_virq(&pdev->dev, index);
if (!virq)
return -ENXIO;
+
+ guard(msi_descs_lock)(&pdev->dev);
+
/*
* This is a horrible hack, but short of implementing a PCI
* specific interrupt chip callback and a huge pile of
diff --git a/drivers/soc/aspeed/aspeed-lpc-snoop.c b/drivers/soc/aspeed/aspeed-lpc-snoop.c
index ef8f355589a5..fc3a2c41cc10 100644
--- a/drivers/soc/aspeed/aspeed-lpc-snoop.c
+++ b/drivers/soc/aspeed/aspeed-lpc-snoop.c
@@ -58,6 +58,7 @@ struct aspeed_lpc_snoop_model_data {
};
struct aspeed_lpc_snoop_channel {
+ bool enabled;
struct kfifo fifo;
wait_queue_head_t wq;
struct miscdevice miscdev;
@@ -190,6 +191,9 @@ static int aspeed_lpc_enable_snoop(struct aspeed_lpc_snoop *lpc_snoop,
const struct aspeed_lpc_snoop_model_data *model_data =
of_device_get_match_data(dev);
+ if (WARN_ON(lpc_snoop->chan[channel].enabled))
+ return -EBUSY;
+
init_waitqueue_head(&lpc_snoop->chan[channel].wq);
/* Create FIFO datastructure */
rc = kfifo_alloc(&lpc_snoop->chan[channel].fifo,
@@ -236,6 +240,8 @@ static int aspeed_lpc_enable_snoop(struct aspeed_lpc_snoop *lpc_snoop,
regmap_update_bits(lpc_snoop->regmap, HICRB,
hicrb_en, hicrb_en);
+ lpc_snoop->chan[channel].enabled = true;
+
return 0;
err_misc_deregister:
@@ -248,6 +254,9 @@ err_free_fifo:
static void aspeed_lpc_disable_snoop(struct aspeed_lpc_snoop *lpc_snoop,
int channel)
{
+ if (!lpc_snoop->chan[channel].enabled)
+ return;
+
switch (channel) {
case 0:
regmap_update_bits(lpc_snoop->regmap, HICR5,
@@ -263,8 +272,10 @@ static void aspeed_lpc_disable_snoop(struct aspeed_lpc_snoop *lpc_snoop,
return;
}
- kfifo_free(&lpc_snoop->chan[channel].fifo);
+ lpc_snoop->chan[channel].enabled = false;
+ /* Consider improving safety wrt concurrent reader(s) */
misc_deregister(&lpc_snoop->chan[channel].miscdev);
+ kfifo_free(&lpc_snoop->chan[channel].fifo);
}
static int aspeed_lpc_snoop_probe(struct platform_device *pdev)
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 91e0aa796e6b..83c9860e6b82 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -85,7 +85,7 @@ void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
six_unlock_intent(&b->c.lock);
}
-static void __btree_node_data_free(struct btree_cache *bc, struct btree *b)
+void __btree_node_data_free(struct btree *b)
{
BUG_ON(!list_empty(&b->list));
BUG_ON(btree_node_hashed(b));
@@ -112,16 +112,17 @@ static void __btree_node_data_free(struct btree_cache *bc, struct btree *b)
munmap(b->aux_data, btree_aux_data_bytes(b));
#endif
b->aux_data = NULL;
-
- btree_node_to_freedlist(bc, b);
}
static void btree_node_data_free(struct btree_cache *bc, struct btree *b)
{
BUG_ON(list_empty(&b->list));
list_del_init(&b->list);
+
+ __btree_node_data_free(b);
+
--bc->nr_freeable;
- __btree_node_data_free(bc, b);
+ btree_node_to_freedlist(bc, b);
}
static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg,
@@ -185,10 +186,7 @@ static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp)
struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
{
- struct btree_cache *bc = &c->btree_cache;
- struct btree *b;
-
- b = __btree_node_mem_alloc(c, GFP_KERNEL);
+ struct btree *b = __btree_node_mem_alloc(c, GFP_KERNEL);
if (!b)
return NULL;
@@ -198,8 +196,6 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
}
bch2_btree_lock_init(&b->c, 0, GFP_KERNEL);
-
- __bch2_btree_node_to_freelist(bc, b);
return b;
}
@@ -524,7 +520,8 @@ restart:
--touched;;
} else if (!btree_node_reclaim(c, b)) {
__bch2_btree_node_hash_remove(bc, b);
- __btree_node_data_free(bc, b);
+ __btree_node_data_free(b);
+ btree_node_to_freedlist(bc, b);
freed++;
bc->nr_freed++;
@@ -652,9 +649,12 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)
bch2_recalc_btree_reserve(c);
- for (i = 0; i < bc->nr_reserve; i++)
- if (!__bch2_btree_node_mem_alloc(c))
+ for (i = 0; i < bc->nr_reserve; i++) {
+ struct btree *b = __bch2_btree_node_mem_alloc(c);
+ if (!b)
goto err;
+ __bch2_btree_node_to_freelist(bc, b);
+ }
list_splice_init(&bc->live[0].list, &bc->freeable);
diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h
index ca3c1b145330..be275f87a60e 100644
--- a/fs/bcachefs/btree_cache.h
+++ b/fs/bcachefs/btree_cache.h
@@ -30,6 +30,7 @@ void bch2_btree_node_update_key_early(struct btree_trans *, enum btree_id, unsig
void bch2_btree_cache_cannibalize_unlock(struct btree_trans *);
int bch2_btree_cache_cannibalize_lock(struct btree_trans *, struct closure *);
+void __btree_node_data_free(struct btree *);
struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *);
struct btree *bch2_btree_node_mem_alloc(struct btree_trans *, bool);
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index e874a4357f64..a4cc72986e36 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -568,9 +568,9 @@ static int __btree_err(int ret,
bch2_mark_btree_validate_failure(failed, ca->dev_idx);
struct extent_ptr_decoded pick;
- have_retry = !bch2_bkey_pick_read_device(c,
+ have_retry = bch2_bkey_pick_read_device(c,
bkey_i_to_s_c(&b->key),
- failed, &pick, -1);
+ failed, &pick, -1) == 1;
}
if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry)
@@ -615,7 +615,6 @@ static int __btree_err(int ret,
goto out;
case -BCH_ERR_btree_node_read_err_bad_node:
prt_str(&out, ", ");
- ret = __bch2_topology_error(c, &out);
break;
}
@@ -644,7 +643,6 @@ static int __btree_err(int ret,
goto out;
case -BCH_ERR_btree_node_read_err_bad_node:
prt_str(&out, ", ");
- ret = __bch2_topology_error(c, &out);
break;
}
print:
@@ -1408,7 +1406,7 @@ static void btree_node_read_work(struct work_struct *work)
ret = bch2_bkey_pick_read_device(c,
bkey_i_to_s_c(&b->key),
&failed, &rb->pick, -1);
- if (ret) {
+ if (ret <= 0) {
set_btree_node_read_error(b);
break;
}
diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c
index 23d8c62ea4b6..42c9eb2c786e 100644
--- a/fs/bcachefs/btree_node_scan.c
+++ b/fs/bcachefs/btree_node_scan.c
@@ -75,39 +75,6 @@ static inline u64 bkey_journal_seq(struct bkey_s_c k)
}
}
-static bool found_btree_node_is_readable(struct btree_trans *trans,
- struct found_btree_node *f)
-{
- struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } tmp;
-
- found_btree_node_to_key(&tmp.k, f);
-
- struct btree *b = bch2_btree_node_get_noiter(trans, &tmp.k, f->btree_id, f->level, false);
- bool ret = !IS_ERR_OR_NULL(b);
- if (!ret)
- return ret;
-
- f->sectors_written = b->written;
- f->journal_seq = le64_to_cpu(b->data->keys.journal_seq);
-
- struct bkey_s_c k;
- struct bkey unpacked;
- struct btree_node_iter iter;
- for_each_btree_node_key_unpack(b, k, &iter, &unpacked)
- f->journal_seq = max(f->journal_seq, bkey_journal_seq(k));
-
- six_unlock_read(&b->c.lock);
-
- /*
- * We might update this node's range; if that happens, we need the node
- * to be re-read so the read path can trim keys that are no longer in
- * this node
- */
- if (b != btree_node_root(trans->c, b))
- bch2_btree_node_evict(trans, &tmp.k);
- return ret;
-}
-
static int found_btree_node_cmp_cookie(const void *_l, const void *_r)
{
const struct found_btree_node *l = _l;
@@ -159,17 +126,17 @@ static const struct min_heap_callbacks found_btree_node_heap_cbs = {
};
static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
- struct bio *bio, struct btree_node *bn, u64 offset)
+ struct btree *b, struct bio *bio, u64 offset)
{
struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes);
+ struct btree_node *bn = b->data;
bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ);
bio->bi_iter.bi_sector = offset;
- bch2_bio_map(bio, bn, PAGE_SIZE);
+ bch2_bio_map(bio, b->data, c->opts.block_size);
u64 submit_time = local_clock();
submit_bio_wait(bio);
-
bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status);
if (bio->bi_status) {
@@ -201,6 +168,14 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
if (BTREE_NODE_ID(bn) >= BTREE_ID_NR_MAX)
return;
+ bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ);
+ bio->bi_iter.bi_sector = offset;
+ bch2_bio_map(bio, b->data, c->opts.btree_node_size);
+
+ submit_time = local_clock();
+ submit_bio_wait(bio);
+ bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status);
+
rcu_read_lock();
struct found_btree_node n = {
.btree_id = BTREE_NODE_ID(bn),
@@ -217,7 +192,20 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
};
rcu_read_unlock();
- if (bch2_trans_run(c, found_btree_node_is_readable(trans, &n))) {
+ found_btree_node_to_key(&b->key, &n);
+
+ CLASS(printbuf, buf)();
+ if (!bch2_btree_node_read_done(c, ca, b, NULL, &buf)) {
+ /* read_done will swap out b->data for another buffer */
+ bn = b->data;
+ /*
+ * Grab journal_seq here because we want the max journal_seq of
+ * any bset; read_done sorts down to a single set and picks the
+ * max journal_seq
+ */
+ n.journal_seq = le64_to_cpu(bn->keys.journal_seq),
+ n.sectors_written = b->written;
+
mutex_lock(&f->lock);
if (BSET_BIG_ENDIAN(&bn->keys) != CPU_BIG_ENDIAN) {
bch_err(c, "try_read_btree_node() can't handle endian conversion");
@@ -237,12 +225,20 @@ static int read_btree_nodes_worker(void *p)
struct find_btree_nodes_worker *w = p;
struct bch_fs *c = container_of(w->f, struct bch_fs, found_btree_nodes);
struct bch_dev *ca = w->ca;
- void *buf = (void *) __get_free_page(GFP_KERNEL);
- struct bio *bio = bio_alloc(NULL, 1, 0, GFP_KERNEL);
unsigned long last_print = jiffies;
+ struct btree *b = NULL;
+ struct bio *bio = NULL;
+
+ b = __bch2_btree_node_mem_alloc(c);
+ if (!b) {
+ bch_err(c, "read_btree_nodes_worker: error allocating buf");
+ w->f->ret = -ENOMEM;
+ goto err;
+ }
- if (!buf || !bio) {
- bch_err(c, "read_btree_nodes_worker: error allocating bio/buf");
+ bio = bio_alloc(NULL, buf_pages(b->data, c->opts.btree_node_size), 0, GFP_KERNEL);
+ if (!bio) {
+ bch_err(c, "read_btree_nodes_worker: error allocating bio");
w->f->ret = -ENOMEM;
goto err;
}
@@ -266,11 +262,13 @@ static int read_btree_nodes_worker(void *p)
!bch2_dev_btree_bitmap_marked_sectors(ca, sector, btree_sectors(c)))
continue;
- try_read_btree_node(w->f, ca, bio, buf, sector);
+ try_read_btree_node(w->f, ca, b, bio, sector);
}
err:
+ if (b)
+ __btree_node_data_free(b);
+ kfree(b);
bio_put(bio);
- free_page((unsigned long) buf);
enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan);
closure_put(w->cl);
kfree(w);
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
index 901f643ead83..07c2a0f73cc2 100644
--- a/fs/bcachefs/debug.c
+++ b/fs/bcachefs/debug.c
@@ -153,8 +153,6 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
c->verify_data = __bch2_btree_node_mem_alloc(c);
if (!c->verify_data)
goto out;
-
- list_del_init(&c->verify_data->list);
}
BUG_ON(b->nsets != 1);
@@ -586,6 +584,8 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
i->ubuf = buf;
i->size = size;
i->ret = 0;
+
+ int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
restart:
seqmutex_lock(&c->btree_trans_lock);
list_sort(&c->btree_trans_list, list_ptr_order_cmp);
@@ -599,6 +599,11 @@ restart:
if (!closure_get_not_zero(&trans->ref))
continue;
+ if (!trans->srcu_held) {
+ closure_put(&trans->ref);
+ continue;
+ }
+
u32 seq = seqmutex_unlock(&c->btree_trans_lock);
bch2_btree_trans_to_text(&i->buf, trans);
@@ -620,6 +625,8 @@ restart:
}
seqmutex_unlock(&c->btree_trans_lock);
unlocked:
+ srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
+
if (i->buf.allocation_failure)
ret = -ENOMEM;
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index 86a842f1e88e..acc3b7b67704 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -282,7 +282,6 @@
x(EIO, sb_not_downgraded) \
x(EIO, btree_node_write_all_failed) \
x(EIO, btree_node_read_error) \
- x(EIO, btree_node_read_validate_error) \
x(EIO, btree_need_topology_repair) \
x(EIO, bucket_ref_update) \
x(EIO, trigger_alloc) \
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index b2a6c041e165..267e73d9d7e6 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -103,7 +103,7 @@ int __bch2_topology_error(struct bch_fs *c, struct printbuf *out)
return bch_err_throw(c, btree_need_topology_repair);
} else {
return bch2_run_explicit_recovery_pass(c, out, BCH_RECOVERY_PASS_check_topology, 0) ?:
- bch_err_throw(c, btree_node_read_validate_error);
+ bch_err_throw(c, btree_need_topology_repair);
}
}
@@ -633,7 +633,9 @@ err:
* log_fsck_err()s: that would require us to track for every error type
* which recovery pass corrects it, to get the fsck exit status correct:
*/
- if (bch2_err_matches(ret, BCH_ERR_fsck_fix)) {
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
+ /* nothing */
+ } else if (bch2_err_matches(ret, BCH_ERR_fsck_fix)) {
set_bit(BCH_FS_errors_fixed, &c->flags);
} else {
set_bit(BCH_FS_errors_not_fixed, &c->flags);
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 036e4ad95987..83cbd77dcb9c 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -50,19 +50,17 @@ void bch2_io_failures_to_text(struct printbuf *out,
struct bch_io_failures *failed)
{
static const char * const error_types[] = {
- "io", "checksum", "ec reconstruct", NULL
+ "btree validate", "io", "checksum", "ec reconstruct", NULL
};
for (struct bch_dev_io_failures *f = failed->devs;
f < failed->devs + failed->nr;
f++) {
unsigned errflags =
- ((!!f->failed_io) << 0) |
- ((!!f->failed_csum_nr) << 1) |
- ((!!f->failed_ec) << 2);
-
- if (!errflags)
- continue;
+ ((!!f->failed_btree_validate) << 0) |
+ ((!!f->failed_io) << 1) |
+ ((!!f->failed_csum_nr) << 2) |
+ ((!!f->failed_ec) << 3);
bch2_printbuf_make_room(out, 1024);
out->atomic++;
@@ -77,7 +75,9 @@ void bch2_io_failures_to_text(struct printbuf *out,
prt_char(out, ' ');
- if (is_power_of_2(errflags)) {
+ if (!errflags) {
+ prt_str(out, "no error - confused");
+ } else if (is_power_of_2(errflags)) {
prt_bitflags(out, error_types, errflags);
prt_str(out, " error");
} else {
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index dbf161e4311a..856eb2b41896 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -12,6 +12,7 @@
#include "fs.h"
#include "fsck.h"
#include "inode.h"
+#include "io_misc.h"
#include "keylist.h"
#include "namei.h"
#include "recovery_passes.h"
@@ -1919,33 +1920,11 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
"extent type past end of inode %llu:%u, i_size %llu\n%s",
i->inode.bi_inum, i->inode.bi_snapshot, i->inode.bi_size,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
- struct bkey_i *whiteout = bch2_trans_kmalloc(trans, sizeof(*whiteout));
- ret = PTR_ERR_OR_ZERO(whiteout);
- if (ret)
- goto err;
-
- bkey_init(&whiteout->k);
- whiteout->k.p = SPOS(k.k->p.inode,
- last_block,
- i->inode.bi_snapshot);
- bch2_key_resize(&whiteout->k,
- min(KEY_SIZE_MAX & (~0 << c->block_bits),
- U64_MAX - whiteout->k.p.offset));
-
-
- /*
- * Need a normal (not BTREE_ITER_all_snapshots)
- * iterator, if we're deleting in a different
- * snapshot and need to emit a whiteout
- */
- struct btree_iter iter2;
- bch2_trans_iter_init(trans, &iter2, BTREE_ID_extents,
- bkey_start_pos(&whiteout->k),
- BTREE_ITER_intent);
- ret = bch2_btree_iter_traverse(trans, &iter2) ?:
- bch2_trans_update(trans, &iter2, whiteout,
- BTREE_UPDATE_internal_snapshot_node);
- bch2_trans_iter_exit(trans, &iter2);
+ ret = bch2_fpunch_snapshot(trans,
+ SPOS(i->inode.bi_inum,
+ last_block,
+ i->inode.bi_snapshot),
+ POS(i->inode.bi_inum, U64_MAX));
if (ret)
goto err;
diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c
index bf72b1d2e2cb..07023667a475 100644
--- a/fs/bcachefs/io_misc.c
+++ b/fs/bcachefs/io_misc.c
@@ -135,6 +135,33 @@ err_noprint:
return ret;
}
+/* For fsck */
+int bch2_fpunch_snapshot(struct btree_trans *trans, struct bpos start, struct bpos end)
+{
+ u32 restart_count = trans->restart_count;
+ struct bch_fs *c = trans->c;
+ struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0);
+ unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits);
+ struct bkey_i delete;
+
+ int ret = for_each_btree_key_max_commit(trans, iter, BTREE_ID_extents,
+ start, end, 0, k,
+ &disk_res, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+ bkey_init(&delete.k);
+ delete.k.p = iter.pos;
+
+ /* create the biggest key we can */
+ bch2_key_resize(&delete.k, max_sectors);
+ bch2_cut_back(end, &delete);
+
+ bch2_extent_trim_atomic(trans, &iter, &delete) ?:
+ bch2_trans_update(trans, &iter, &delete, 0);
+ }));
+
+ bch2_disk_reservation_put(c, &disk_res);
+ return ret ?: trans_was_restarted(trans, restart_count);
+}
+
/*
* Returns -BCH_ERR_transacton_restart if we had to drop locks:
*/
diff --git a/fs/bcachefs/io_misc.h b/fs/bcachefs/io_misc.h
index 9cb44a7c43c1..b93e4d4b3c0c 100644
--- a/fs/bcachefs/io_misc.h
+++ b/fs/bcachefs/io_misc.h
@@ -5,6 +5,8 @@
int bch2_extent_fallocate(struct btree_trans *, subvol_inum, struct btree_iter *,
u64, struct bch_io_opts, s64 *,
struct write_point_specifier);
+
+int bch2_fpunch_snapshot(struct btree_trans *, struct bpos, struct bpos);
int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
subvol_inum, u64, s64 *);
int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *);
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index cd6201741c59..0042d43b8e57 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -170,6 +170,12 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne
return (struct journal_space) { 0, 0 };
/*
+ * It's possible for bucket size to be misaligned w.r.t. the filesystem
+ * block size:
+ */
+ min_bucket_size = round_down(min_bucket_size, block_sectors(c));
+
+ /*
* We sorted largest to smallest, and we want the smallest out of the
* @nr_devs_want largest devices:
*/
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index d0b7e3a36a54..c94debb12d2f 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -273,24 +273,35 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
goto out;
struct btree_path *path = btree_iter_path(trans, &iter);
- if (unlikely(!btree_path_node(path, k->level) &&
- !k->allocated)) {
+ if (unlikely(!btree_path_node(path, k->level))) {
struct bch_fs *c = trans->c;
+ CLASS(printbuf, buf)();
+ prt_str(&buf, "btree=");
+ bch2_btree_id_to_text(&buf, k->btree_id);
+ prt_printf(&buf, " level=%u ", k->level);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k->k));
+
if (!(c->recovery.passes_complete & (BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes)|
BIT_ULL(BCH_RECOVERY_PASS_check_topology)))) {
- bch_err(c, "have key in journal replay for btree depth that does not exist, confused");
+ bch_err(c, "have key in journal replay for btree depth that does not exist, confused\n%s",
+ buf.buf);
ret = -EINVAL;
}
-#if 0
+
+ if (!k->allocated) {
+ bch_notice(c, "dropping key in journal replay for depth that does not exist because we're recovering from scan\n%s",
+ buf.buf);
+ k->overwritten = true;
+ goto out;
+ }
+
bch2_trans_iter_exit(trans, &iter);
bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
BTREE_MAX_DEPTH, 0, iter_flags);
ret = bch2_btree_iter_traverse(trans, &iter) ?:
bch2_btree_increase_depth(trans, iter.path, 0) ?:
-BCH_ERR_transaction_restart_nested;
-#endif
- k->overwritten = true;
goto out;
}
diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c
index c09ed2dd4639..6a039e011064 100644
--- a/fs/bcachefs/recovery_passes.c
+++ b/fs/bcachefs/recovery_passes.c
@@ -360,7 +360,7 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c,
!(r->passes_complete & BIT_ULL(pass));
bool ratelimit = flags & RUN_RECOVERY_PASS_ratelimit;
- if (!(in_recovery && (flags & RUN_RECOVERY_PASS_nopersistent))) {
+ if (!(flags & RUN_RECOVERY_PASS_nopersistent)) {
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
__set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required);
}
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 6a329c329f43..16e4a6bd9b97 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -214,9 +214,11 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
/*
* bit 30: I/O error occurred on this folio
+ * bit 29: CPU has dirty data in D-cache (needs aliasing handling);
* bit 0 - 29: remaining parts to complete this folio
*/
-#define EROFS_ONLINEFOLIO_EIO (1 << 30)
+#define EROFS_ONLINEFOLIO_EIO 30
+#define EROFS_ONLINEFOLIO_DIRTY 29
void erofs_onlinefolio_init(struct folio *folio)
{
@@ -233,19 +235,23 @@ void erofs_onlinefolio_split(struct folio *folio)
atomic_inc((atomic_t *)&folio->private);
}
-void erofs_onlinefolio_end(struct folio *folio, int err)
+void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty)
{
int orig, v;
do {
orig = atomic_read((atomic_t *)&folio->private);
- v = (orig - 1) | (err ? EROFS_ONLINEFOLIO_EIO : 0);
+ DBG_BUGON(orig <= 0);
+ v = dirty << EROFS_ONLINEFOLIO_DIRTY;
+ v |= (orig - 1) | (!!err << EROFS_ONLINEFOLIO_EIO);
} while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig);
- if (v & ~EROFS_ONLINEFOLIO_EIO)
+ if (v & (BIT(EROFS_ONLINEFOLIO_DIRTY) - 1))
return;
folio->private = 0;
- folio_end_read(folio, !(v & EROFS_ONLINEFOLIO_EIO));
+ if (v & BIT(EROFS_ONLINEFOLIO_DIRTY))
+ flush_dcache_folio(folio);
+ folio_end_read(folio, !(v & BIT(EROFS_ONLINEFOLIO_EIO)));
}
static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
@@ -351,11 +357,16 @@ int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
*/
static int erofs_read_folio(struct file *file, struct folio *folio)
{
+ trace_erofs_read_folio(folio, true);
+
return iomap_read_folio(folio, &erofs_iomap_ops);
}
static void erofs_readahead(struct readahead_control *rac)
{
+ trace_erofs_readahead(rac->mapping->host, readahead_index(rac),
+ readahead_count(rac), true);
+
return iomap_readahead(rac, &erofs_iomap_ops);
}
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index bf62e2836b60..358061d7b660 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -301,13 +301,11 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
cur = min(cur, rq->outputsize);
if (cur && rq->out[0]) {
kin = kmap_local_page(rq->in[nrpages_in - 1]);
- if (rq->out[0] == rq->in[nrpages_in - 1]) {
+ if (rq->out[0] == rq->in[nrpages_in - 1])
memmove(kin + rq->pageofs_out, kin + pi, cur);
- flush_dcache_page(rq->out[0]);
- } else {
+ else
memcpy_to_page(rq->out[0], rq->pageofs_out,
kin + pi, cur);
- }
kunmap_local(kin);
}
rq->outputsize -= cur;
@@ -325,14 +323,12 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
po = (rq->pageofs_out + cur + pi) & ~PAGE_MASK;
DBG_BUGON(no >= nrpages_out);
cnt = min(insz - pi, PAGE_SIZE - po);
- if (rq->out[no] == rq->in[ni]) {
+ if (rq->out[no] == rq->in[ni])
memmove(kin + po,
kin + rq->pageofs_in + pi, cnt);
- flush_dcache_page(rq->out[no]);
- } else if (rq->out[no]) {
+ else if (rq->out[no])
memcpy_to_page(rq->out[no], po,
kin + rq->pageofs_in + pi, cnt);
- }
pi += cnt;
} while (pi < insz);
kunmap_local(kin);
diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c
index 2fae209d0274..3e4b38bec0aa 100644
--- a/fs/erofs/dir.c
+++ b/fs/erofs/dir.c
@@ -58,6 +58,11 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
struct erofs_dirent *de;
unsigned int nameoff, maxsize;
+ if (fatal_signal_pending(current)) {
+ err = -ERESTARTSYS;
+ break;
+ }
+
de = erofs_bread(&buf, dbstart, true);
if (IS_ERR(de)) {
erofs_err(sb, "failed to readdir of logical block %llu of nid %llu",
@@ -88,6 +93,7 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
break;
ctx->pos = dbstart + maxsize;
ofs = 0;
+ cond_resched();
}
erofs_put_metabuf(&buf);
if (EROFS_I(dir)->dot_omitted && ctx->pos == dir->i_size) {
diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c
index df5cc63f2c01..91781718199e 100644
--- a/fs/erofs/fileio.c
+++ b/fs/erofs/fileio.c
@@ -38,7 +38,7 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret)
} else {
bio_for_each_folio_all(fi, &rq->bio) {
DBG_BUGON(folio_test_uptodate(fi.folio));
- erofs_onlinefolio_end(fi.folio, ret);
+ erofs_onlinefolio_end(fi.folio, ret, false);
}
}
bio_uninit(&rq->bio);
@@ -96,8 +96,6 @@ static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio)
struct erofs_map_blocks *map = &io->map;
unsigned int cur = 0, end = folio_size(folio), len, attached = 0;
loff_t pos = folio_pos(folio), ofs;
- struct iov_iter iter;
- struct bio_vec bv;
int err = 0;
erofs_onlinefolio_init(folio);
@@ -122,13 +120,7 @@ static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio)
err = PTR_ERR(src);
break;
}
- bvec_set_folio(&bv, folio, len, cur);
- iov_iter_bvec(&iter, ITER_DEST, &bv, 1, len);
- if (copy_to_iter(src, len, &iter) != len) {
- erofs_put_metabuf(&buf);
- err = -EIO;
- break;
- }
+ memcpy_to_folio(folio, cur, src, len);
erofs_put_metabuf(&buf);
} else if (!(map->m_flags & EROFS_MAP_MAPPED)) {
folio_zero_segment(folio, cur, cur + len);
@@ -162,7 +154,7 @@ io_retry:
}
cur += len;
}
- erofs_onlinefolio_end(folio, err);
+ erofs_onlinefolio_end(folio, err, false);
return err;
}
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index a32c03a80c70..06b867d2fc3b 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -315,10 +315,12 @@ static inline struct folio *erofs_grab_folio_nowait(struct address_space *as,
/* The length of extent is full */
#define EROFS_MAP_FULL_MAPPED 0x0008
/* Located in the special packed inode */
-#define EROFS_MAP_FRAGMENT 0x0010
+#define __EROFS_MAP_FRAGMENT 0x0010
/* The extent refers to partial decompressed data */
#define EROFS_MAP_PARTIAL_REF 0x0020
+#define EROFS_MAP_FRAGMENT (EROFS_MAP_MAPPED | __EROFS_MAP_FRAGMENT)
+
struct erofs_map_blocks {
struct erofs_buf buf;
@@ -390,7 +392,7 @@ int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map);
void erofs_onlinefolio_init(struct folio *folio);
void erofs_onlinefolio_split(struct folio *folio);
-void erofs_onlinefolio_end(struct folio *folio, int err);
+void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty);
struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid);
int erofs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask,
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index fe8071844724..e3f28a1bb945 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1034,7 +1034,7 @@ static int z_erofs_scan_folio(struct z_erofs_frontend *f,
if (!(map->m_flags & EROFS_MAP_MAPPED)) {
folio_zero_segment(folio, cur, end);
tight = false;
- } else if (map->m_flags & EROFS_MAP_FRAGMENT) {
+ } else if (map->m_flags & __EROFS_MAP_FRAGMENT) {
erofs_off_t fpos = offset + cur - map->m_la;
err = z_erofs_read_fragment(inode->i_sb, folio, cur,
@@ -1091,7 +1091,7 @@ static int z_erofs_scan_folio(struct z_erofs_frontend *f,
tight = (bs == PAGE_SIZE);
}
} while ((end = cur) > 0);
- erofs_onlinefolio_end(folio, err);
+ erofs_onlinefolio_end(folio, err, false);
return err;
}
@@ -1196,7 +1196,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_backend *be, int err)
cur += len;
}
kunmap_local(dst);
- erofs_onlinefolio_end(page_folio(bvi->bvec.page), err);
+ erofs_onlinefolio_end(page_folio(bvi->bvec.page), err, true);
list_del(p);
kfree(bvi);
}
@@ -1355,7 +1355,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err)
DBG_BUGON(z_erofs_page_is_invalidated(page));
if (!z_erofs_is_shortlived_page(page)) {
- erofs_onlinefolio_end(page_folio(page), err);
+ erofs_onlinefolio_end(page_folio(page), err, true);
continue;
}
if (pcl->algorithmformat != Z_EROFS_COMPRESSION_LZ4) {
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 0bebc6e3a4d7..f1a15ff22147 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -413,8 +413,7 @@ static int z_erofs_map_blocks_fo(struct inode *inode,
!vi->z_tailextent_headlcn) {
map->m_la = 0;
map->m_llen = inode->i_size;
- map->m_flags = EROFS_MAP_MAPPED |
- EROFS_MAP_FULL_MAPPED | EROFS_MAP_FRAGMENT;
+ map->m_flags = EROFS_MAP_FRAGMENT;
return 0;
}
initial_lcn = ofs >> lclusterbits;
@@ -489,7 +488,7 @@ static int z_erofs_map_blocks_fo(struct inode *inode,
goto unmap_out;
}
} else if (fragment && m.lcn == vi->z_tailextent_headlcn) {
- map->m_flags |= EROFS_MAP_FRAGMENT;
+ map->m_flags = EROFS_MAP_FRAGMENT;
} else {
map->m_pa = erofs_pos(sb, m.pblk);
err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
@@ -617,7 +616,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode,
if (lstart < lend) {
map->m_la = lstart;
if (last && (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) {
- map->m_flags |= EROFS_MAP_MAPPED | EROFS_MAP_FRAGMENT;
+ map->m_flags = EROFS_MAP_FRAGMENT;
vi->z_fragmentoff = map->m_plen;
if (recsz > offsetof(struct z_erofs_extent, pstart_lo))
vi->z_fragmentoff |= map->m_pa << 32;
@@ -797,7 +796,7 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset,
iomap->length = map.m_llen;
if (map.m_flags & EROFS_MAP_MAPPED) {
iomap->type = IOMAP_MAPPED;
- iomap->addr = map.m_flags & EROFS_MAP_FRAGMENT ?
+ iomap->addr = map.m_flags & __EROFS_MAP_FRAGMENT ?
IOMAP_NULL_ADDR : map.m_pa;
} else {
iomap->type = IOMAP_HOLE;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 895256cd2786..0fbf5dfedb24 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -137,7 +137,13 @@ struct epitem {
};
/* List header used to link this structure to the eventpoll ready list */
- struct llist_node rdllink;
+ struct list_head rdllink;
+
+ /*
+ * Works together "struct eventpoll"->ovflist in keeping the
+ * single linked chain of items.
+ */
+ struct epitem *next;
/* The file descriptor information this item refers to */
struct epoll_filefd ffd;
@@ -185,15 +191,22 @@ struct eventpoll {
/* Wait queue used by file->poll() */
wait_queue_head_t poll_wait;
- /*
- * List of ready file descriptors. Adding to this list is lockless. Items can be removed
- * only with eventpoll::mtx
- */
- struct llist_head rdllist;
+ /* List of ready file descriptors */
+ struct list_head rdllist;
+
+ /* Lock which protects rdllist and ovflist */
+ rwlock_t lock;
/* RB tree root used to store monitored fd structs */
struct rb_root_cached rbr;
+ /*
+ * This is a single linked list that chains all the "struct epitem" that
+ * happened while transferring ready events to userspace w/out
+ * holding ->lock.
+ */
+ struct epitem *ovflist;
+
/* wakeup_source used when ep_send_events or __ep_eventpoll_poll is running */
struct wakeup_source *ws;
@@ -348,14 +361,10 @@ static inline int ep_cmp_ffd(struct epoll_filefd *p1,
(p1->file < p2->file ? -1 : p1->fd - p2->fd));
}
-/*
- * Add the item to its container eventpoll's rdllist; do nothing if the item is already on rdllist.
- */
-static void epitem_ready(struct epitem *epi)
+/* Tells us if the item is currently linked */
+static inline int ep_is_linked(struct epitem *epi)
{
- if (&epi->rdllink == cmpxchg(&epi->rdllink.next, &epi->rdllink, NULL))
- llist_add(&epi->rdllink, &epi->ep->rdllist);
-
+ return !list_empty(&epi->rdllink);
}
static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_entry_t *p)
@@ -374,26 +383,13 @@ static inline struct epitem *ep_item_from_wait(wait_queue_entry_t *p)
*
* @ep: Pointer to the eventpoll context.
*
- * Return: true if ready events might be available, false otherwise.
+ * Return: a value different than %zero if ready events are available,
+ * or %zero otherwise.
*/
-static inline bool ep_events_available(struct eventpoll *ep)
+static inline int ep_events_available(struct eventpoll *ep)
{
- bool available;
- int locked;
-
- locked = mutex_trylock(&ep->mtx);
- if (!locked) {
- /*
- * The lock held and someone might have removed all items while inspecting it. The
- * llist_empty() check in this case is futile. Assume that something is enqueued and
- * let ep_try_send_events() figure it out.
- */
- return true;
- }
-
- available = !llist_empty(&ep->rdllist);
- mutex_unlock(&ep->mtx);
- return available;
+ return !list_empty_careful(&ep->rdllist) ||
+ READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR;
}
#ifdef CONFIG_NET_RX_BUSY_POLL
@@ -728,6 +724,77 @@ static inline void ep_pm_stay_awake_rcu(struct epitem *epi)
rcu_read_unlock();
}
+
+/*
+ * ep->mutex needs to be held because we could be hit by
+ * eventpoll_release_file() and epoll_ctl().
+ */
+static void ep_start_scan(struct eventpoll *ep, struct list_head *txlist)
+{
+ /*
+ * Steal the ready list, and re-init the original one to the
+ * empty list. Also, set ep->ovflist to NULL so that events
+ * happening while looping w/out locks, are not lost. We cannot
+ * have the poll callback to queue directly on ep->rdllist,
+ * because we want the "sproc" callback to be able to do it
+ * in a lockless way.
+ */
+ lockdep_assert_irqs_enabled();
+ write_lock_irq(&ep->lock);
+ list_splice_init(&ep->rdllist, txlist);
+ WRITE_ONCE(ep->ovflist, NULL);
+ write_unlock_irq(&ep->lock);
+}
+
+static void ep_done_scan(struct eventpoll *ep,
+ struct list_head *txlist)
+{
+ struct epitem *epi, *nepi;
+
+ write_lock_irq(&ep->lock);
+ /*
+ * During the time we spent inside the "sproc" callback, some
+ * other events might have been queued by the poll callback.
+ * We re-insert them inside the main ready-list here.
+ */
+ for (nepi = READ_ONCE(ep->ovflist); (epi = nepi) != NULL;
+ nepi = epi->next, epi->next = EP_UNACTIVE_PTR) {
+ /*
+ * We need to check if the item is already in the list.
+ * During the "sproc" callback execution time, items are
+ * queued into ->ovflist but the "txlist" might already
+ * contain them, and the list_splice() below takes care of them.
+ */
+ if (!ep_is_linked(epi)) {
+ /*
+ * ->ovflist is LIFO, so we have to reverse it in order
+ * to keep in FIFO.
+ */
+ list_add(&epi->rdllink, &ep->rdllist);
+ ep_pm_stay_awake(epi);
+ }
+ }
+ /*
+ * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after
+ * releasing the lock, events will be queued in the normal way inside
+ * ep->rdllist.
+ */
+ WRITE_ONCE(ep->ovflist, EP_UNACTIVE_PTR);
+
+ /*
+ * Quickly re-inject items left on "txlist".
+ */
+ list_splice(txlist, &ep->rdllist);
+ __pm_relax(ep->ws);
+
+ if (!list_empty(&ep->rdllist)) {
+ if (waitqueue_active(&ep->wq))
+ wake_up(&ep->wq);
+ }
+
+ write_unlock_irq(&ep->lock);
+}
+
static void ep_get(struct eventpoll *ep)
{
refcount_inc(&ep->refcount);
@@ -765,12 +832,10 @@ static void ep_free(struct eventpoll *ep)
static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
{
struct file *file = epi->ffd.file;
- struct llist_node *put_back_last;
struct epitems_head *to_free;
struct hlist_head *head;
- LLIST_HEAD(put_back);
- lockdep_assert_held(&ep->mtx);
+ lockdep_assert_irqs_enabled();
/*
* Removes poll wait queue hooks.
@@ -802,20 +867,10 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
rb_erase_cached(&epi->rbn, &ep->rbr);
- if (llist_on_list(&epi->rdllink)) {
- put_back_last = NULL;
- while (true) {
- struct llist_node *n = llist_del_first(&ep->rdllist);
-
- if (&epi->rdllink == n || WARN_ON(!n))
- break;
- if (!put_back_last)
- put_back_last = n;
- __llist_add(n, &put_back);
- }
- if (put_back_last)
- llist_add_batch(put_back.first, put_back_last, &ep->rdllist);
- }
+ write_lock_irq(&ep->lock);
+ if (ep_is_linked(epi))
+ list_del_init(&epi->rdllink);
+ write_unlock_irq(&ep->lock);
wakeup_source_unregister(ep_wakeup_source(epi));
/*
@@ -917,9 +972,8 @@ static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, int depth
static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int depth)
{
struct eventpoll *ep = file->private_data;
- struct wakeup_source *ws;
- struct llist_node *n;
- struct epitem *epi;
+ LIST_HEAD(txlist);
+ struct epitem *epi, *tmp;
poll_table pt;
__poll_t res = 0;
@@ -933,39 +987,22 @@ static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int dep
* the ready list.
*/
mutex_lock_nested(&ep->mtx, depth);
- while (true) {
- n = llist_del_first_init(&ep->rdllist);
- if (!n)
- break;
-
- epi = llist_entry(n, struct epitem, rdllink);
-
+ ep_start_scan(ep, &txlist);
+ list_for_each_entry_safe(epi, tmp, &txlist, rdllink) {
if (ep_item_poll(epi, &pt, depth + 1)) {
res = EPOLLIN | EPOLLRDNORM;
- epitem_ready(epi);
break;
} else {
/*
- * We need to activate ep before deactivating epi, to prevent autosuspend
- * just in case epi becomes active after ep_item_poll() above.
- *
- * This is similar to ep_send_events().
+ * Item has been dropped into the ready list by the poll
+ * callback, but it's not actually ready, as far as
+ * caller requested events goes. We can remove it here.
*/
- ws = ep_wakeup_source(epi);
- if (ws) {
- if (ws->active)
- __pm_stay_awake(ep->ws);
- __pm_relax(ws);
- }
__pm_relax(ep_wakeup_source(epi));
-
- /* Just in case epi becomes active right before __pm_relax() */
- if (unlikely(ep_item_poll(epi, &pt, depth + 1)))
- ep_pm_stay_awake(epi);
-
- __pm_relax(ep->ws);
+ list_del_init(&epi->rdllink);
}
}
+ ep_done_scan(ep, &txlist);
mutex_unlock(&ep->mtx);
return res;
}
@@ -1114,10 +1151,12 @@ static int ep_alloc(struct eventpoll **pep)
return -ENOMEM;
mutex_init(&ep->mtx);
+ rwlock_init(&ep->lock);
init_waitqueue_head(&ep->wq);
init_waitqueue_head(&ep->poll_wait);
- init_llist_head(&ep->rdllist);
+ INIT_LIST_HEAD(&ep->rdllist);
ep->rbr = RB_ROOT_CACHED;
+ ep->ovflist = EP_UNACTIVE_PTR;
ep->user = get_current_user();
refcount_set(&ep->refcount, 1);
@@ -1200,10 +1239,93 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd,
#endif /* CONFIG_KCMP */
/*
+ * Adds a new entry to the tail of the list in a lockless way, i.e.
+ * multiple CPUs are allowed to call this function concurrently.
+ *
+ * Beware: it is necessary to prevent any other modifications of the
+ * existing list until all changes are completed, in other words
+ * concurrent list_add_tail_lockless() calls should be protected
+ * with a read lock, where write lock acts as a barrier which
+ * makes sure all list_add_tail_lockless() calls are fully
+ * completed.
+ *
+ * Also an element can be locklessly added to the list only in one
+ * direction i.e. either to the tail or to the head, otherwise
+ * concurrent access will corrupt the list.
+ *
+ * Return: %false if element has been already added to the list, %true
+ * otherwise.
+ */
+static inline bool list_add_tail_lockless(struct list_head *new,
+ struct list_head *head)
+{
+ struct list_head *prev;
+
+ /*
+ * This is simple 'new->next = head' operation, but cmpxchg()
+ * is used in order to detect that same element has been just
+ * added to the list from another CPU: the winner observes
+ * new->next == new.
+ */
+ if (!try_cmpxchg(&new->next, &new, head))
+ return false;
+
+ /*
+ * Initially ->next of a new element must be updated with the head
+ * (we are inserting to the tail) and only then pointers are atomically
+ * exchanged. XCHG guarantees memory ordering, thus ->next should be
+ * updated before pointers are actually swapped and pointers are
+ * swapped before prev->next is updated.
+ */
+
+ prev = xchg(&head->prev, new);
+
+ /*
+ * It is safe to modify prev->next and new->prev, because a new element
+ * is added only to the tail and new->next is updated before XCHG.
+ */
+
+ prev->next = new;
+ new->prev = prev;
+
+ return true;
+}
+
+/*
+ * Chains a new epi entry to the tail of the ep->ovflist in a lockless way,
+ * i.e. multiple CPUs are allowed to call this function concurrently.
+ *
+ * Return: %false if epi element has been already chained, %true otherwise.
+ */
+static inline bool chain_epi_lockless(struct epitem *epi)
+{
+ struct eventpoll *ep = epi->ep;
+
+ /* Fast preliminary check */
+ if (epi->next != EP_UNACTIVE_PTR)
+ return false;
+
+ /* Check that the same epi has not been just chained from another CPU */
+ if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR)
+ return false;
+
+ /* Atomically exchange tail */
+ epi->next = xchg(&ep->ovflist, epi);
+
+ return true;
+}
+
+/*
* This is the callback that is passed to the wait queue wakeup
* mechanism. It is called by the stored file descriptors when they
* have events to report.
*
+ * This callback takes a read lock in order not to contend with concurrent
+ * events from another file descriptor, thus all modifications to ->rdllist
+ * or ->ovflist are lockless. Read lock is paired with the write lock from
+ * ep_start/done_scan(), which stops all list modifications and guarantees
+ * that lists state is seen correctly.
+ *
* Another thing worth to mention is that ep_poll_callback() can be called
* concurrently for the same @epi from different CPUs if poll table was inited
* with several wait queues entries. Plural wakeup from different CPUs of a
@@ -1213,11 +1335,15 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd,
*/
static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
+ int pwake = 0;
struct epitem *epi = ep_item_from_wait(wait);
struct eventpoll *ep = epi->ep;
__poll_t pollflags = key_to_poll(key);
+ unsigned long flags;
int ewake = 0;
+ read_lock_irqsave(&ep->lock, flags);
+
ep_set_busy_poll_napi_id(epi);
/*
@@ -1227,7 +1353,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
* until the next EPOLL_CTL_MOD will be issued.
*/
if (!(epi->event.events & ~EP_PRIVATE_BITS))
- goto out;
+ goto out_unlock;
/*
* Check the events coming with the callback. At this stage, not
@@ -1236,10 +1362,22 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
* test for "key" != NULL before the event match test.
*/
if (pollflags && !(pollflags & epi->event.events))
- goto out;
+ goto out_unlock;
- ep_pm_stay_awake_rcu(epi);
- epitem_ready(epi);
+ /*
+ * If we are transferring events to userspace, we can hold no locks
+ * (because we're accessing user memory, and because of linux f_op->poll()
+ * semantics). All the events that happen during that period of time are
+ * chained in ep->ovflist and requeued later on.
+ */
+ if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) {
+ if (chain_epi_lockless(epi))
+ ep_pm_stay_awake_rcu(epi);
+ } else if (!ep_is_linked(epi)) {
+ /* In the usual case, add event to ready list. */
+ if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist))
+ ep_pm_stay_awake_rcu(epi);
+ }
/*
* Wake up ( if active ) both the eventpoll wait list and the ->poll()
@@ -1268,9 +1406,15 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
wake_up(&ep->wq);
}
if (waitqueue_active(&ep->poll_wait))
+ pwake++;
+
+out_unlock:
+ read_unlock_irqrestore(&ep->lock, flags);
+
+ /* We have to call this outside the lock */
+ if (pwake)
ep_poll_safewake(ep, epi, pollflags & EPOLL_URING_WAKE);
-out:
if (!(epi->event.events & EPOLLEXCLUSIVE))
ewake = 1;
@@ -1515,6 +1659,8 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
if (is_file_epoll(tfile))
tep = tfile->private_data;
+ lockdep_assert_irqs_enabled();
+
if (unlikely(percpu_counter_compare(&ep->user->epoll_watches,
max_user_watches) >= 0))
return -ENOSPC;
@@ -1526,10 +1672,11 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
}
/* Item initialization follow here ... */
- init_llist_node(&epi->rdllink);
+ INIT_LIST_HEAD(&epi->rdllink);
epi->ep = ep;
ep_set_ffd(&epi->ffd, tfile, fd);
epi->event = *event;
+ epi->next = EP_UNACTIVE_PTR;
if (tep)
mutex_lock_nested(&tep->mtx, 1);
@@ -1596,13 +1743,16 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
return -ENOMEM;
}
+ /* We have to drop the new item inside our item list to keep track of it */
+ write_lock_irq(&ep->lock);
+
/* record NAPI ID of new item if present */
ep_set_busy_poll_napi_id(epi);
/* If the file is already "ready" we drop it inside the ready list */
- if (revents) {
+ if (revents && !ep_is_linked(epi)) {
+ list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake(epi);
- epitem_ready(epi);
/* Notify waiting tasks that events are available */
if (waitqueue_active(&ep->wq))
@@ -1611,6 +1761,8 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
pwake++;
}
+ write_unlock_irq(&ep->lock);
+
/* We have to call this outside the lock */
if (pwake)
ep_poll_safewake(ep, NULL, 0);
@@ -1625,8 +1777,11 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
static int ep_modify(struct eventpoll *ep, struct epitem *epi,
const struct epoll_event *event)
{
+ int pwake = 0;
poll_table pt;
+ lockdep_assert_irqs_enabled();
+
init_poll_funcptr(&pt, NULL);
/*
@@ -1670,16 +1825,24 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
* list, push it inside.
*/
if (ep_item_poll(epi, &pt, 1)) {
- ep_pm_stay_awake(epi);
- epitem_ready(epi);
+ write_lock_irq(&ep->lock);
+ if (!ep_is_linked(epi)) {
+ list_add_tail(&epi->rdllink, &ep->rdllist);
+ ep_pm_stay_awake(epi);
- /* Notify waiting tasks that events are available */
- if (waitqueue_active(&ep->wq))
- wake_up(&ep->wq);
- if (waitqueue_active(&ep->poll_wait))
- ep_poll_safewake(ep, NULL, 0);
+ /* Notify waiting tasks that events are available */
+ if (waitqueue_active(&ep->wq))
+ wake_up(&ep->wq);
+ if (waitqueue_active(&ep->poll_wait))
+ pwake++;
+ }
+ write_unlock_irq(&ep->lock);
}
+ /* We have to call this outside the lock */
+ if (pwake)
+ ep_poll_safewake(ep, NULL, 0);
+
return 0;
}
@@ -1687,7 +1850,7 @@ static int ep_send_events(struct eventpoll *ep,
struct epoll_event __user *events, int maxevents)
{
struct epitem *epi, *tmp;
- LLIST_HEAD(txlist);
+ LIST_HEAD(txlist);
poll_table pt;
int res = 0;
@@ -1702,18 +1865,19 @@ static int ep_send_events(struct eventpoll *ep,
init_poll_funcptr(&pt, NULL);
mutex_lock(&ep->mtx);
+ ep_start_scan(ep, &txlist);
- while (res < maxevents) {
+ /*
+ * We can loop without lock because we are passed a task private list.
+ * Items cannot vanish during the loop we are holding ep->mtx.
+ */
+ list_for_each_entry_safe(epi, tmp, &txlist, rdllink) {
struct wakeup_source *ws;
- struct llist_node *n;
__poll_t revents;
- n = llist_del_first(&ep->rdllist);
- if (!n)
+ if (res >= maxevents)
break;
- epi = llist_entry(n, struct epitem, rdllink);
-
/*
* Activate ep->ws before deactivating epi->ws to prevent
* triggering auto-suspend here (in case we reactive epi->ws
@@ -1730,30 +1894,21 @@ static int ep_send_events(struct eventpoll *ep,
__pm_relax(ws);
}
+ list_del_init(&epi->rdllink);
+
/*
* If the event mask intersect the caller-requested one,
* deliver the event to userspace. Again, we are holding ep->mtx,
* so no operations coming from userspace can change the item.
*/
revents = ep_item_poll(epi, &pt, 1);
- if (!revents) {
- init_llist_node(n);
-
- /*
- * Just in case epi becomes ready after ep_item_poll() above, but before
- * init_llist_node(). Make sure to add it to the ready list, otherwise an
- * event may be lost.
- */
- if (unlikely(ep_item_poll(epi, &pt, 1))) {
- ep_pm_stay_awake(epi);
- epitem_ready(epi);
- }
+ if (!revents)
continue;
- }
events = epoll_put_uevent(revents, epi->event.data, events);
if (!events) {
- llist_add(&epi->rdllink, &ep->rdllist);
+ list_add(&epi->rdllink, &txlist);
+ ep_pm_stay_awake(epi);
if (!res)
res = -EFAULT;
break;
@@ -1761,31 +1916,25 @@ static int ep_send_events(struct eventpoll *ep,
res++;
if (epi->event.events & EPOLLONESHOT)
epi->event.events &= EP_PRIVATE_BITS;
- __llist_add(n, &txlist);
- }
-
- llist_for_each_entry_safe(epi, tmp, txlist.first, rdllink) {
- init_llist_node(&epi->rdllink);
-
- if (!(epi->event.events & EPOLLET)) {
+ else if (!(epi->event.events & EPOLLET)) {
/*
- * If this file has been added with Level Trigger mode, we need to insert
- * back inside the ready list, so that the next call to epoll_wait() will
- * check again the events availability.
+ * If this file has been added with Level
+ * Trigger mode, we need to insert back inside
+ * the ready list, so that the next call to
+ * epoll_wait() will check again the events
+ * availability. At this point, no one can insert
+ * into ep->rdllist besides us. The epoll_ctl()
+ * callers are locked out by
+ * ep_send_events() holding "mtx" and the
+ * poll callback will queue them in ep->ovflist.
*/
+ list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake(epi);
- epitem_ready(epi);
}
}
-
- __pm_relax(ep->ws);
+ ep_done_scan(ep, &txlist);
mutex_unlock(&ep->mtx);
- if (!llist_empty(&ep->rdllist)) {
- if (waitqueue_active(&ep->wq))
- wake_up(&ep->wq);
- }
-
return res;
}
@@ -1878,6 +2027,8 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
wait_queue_entry_t wait;
ktime_t expires, *to = NULL;
+ lockdep_assert_irqs_enabled();
+
if (timeout && (timeout->tv_sec | timeout->tv_nsec)) {
slack = select_estimate_accuracy(timeout);
to = &expires;
@@ -1937,15 +2088,54 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
init_wait(&wait);
wait.func = ep_autoremove_wake_function;
- prepare_to_wait_exclusive(&ep->wq, &wait, TASK_INTERRUPTIBLE);
+ write_lock_irq(&ep->lock);
+ /*
+ * Barrierless variant, waitqueue_active() is called under
+ * the same lock on wakeup ep_poll_callback() side, so it
+ * is safe to avoid an explicit barrier.
+ */
+ __set_current_state(TASK_INTERRUPTIBLE);
- if (!ep_events_available(ep))
+ /*
+ * Do the final check under the lock. ep_start/done_scan()
+ * plays with two lists (->rdllist and ->ovflist) and there
+ * is always a race when both lists are empty for short
+ * period of time although events are pending, so lock is
+ * important.
+ */
+ eavail = ep_events_available(ep);
+ if (!eavail)
+ __add_wait_queue_exclusive(&ep->wq, &wait);
+
+ write_unlock_irq(&ep->lock);
+
+ if (!eavail)
timed_out = !ep_schedule_timeout(to) ||
!schedule_hrtimeout_range(to, slack,
HRTIMER_MODE_ABS);
+ __set_current_state(TASK_RUNNING);
- finish_wait(&ep->wq, &wait);
- eavail = ep_events_available(ep);
+ /*
+ * We were woken up, thus go and try to harvest some events.
+ * If timed out and still on the wait queue, recheck eavail
+ * carefully under lock, below.
+ */
+ eavail = 1;
+
+ if (!list_empty_careful(&wait.entry)) {
+ write_lock_irq(&ep->lock);
+ /*
+ * If the thread timed out and is not on the wait queue,
+ * it means that the thread was woken up after its
+ * timeout expired before it could reacquire the lock.
+ * Thus, when wait.entry is empty, it needs to harvest
+ * events.
+ */
+ if (timed_out)
+ eavail = list_empty(&wait.entry);
+ __remove_wait_queue(&ep->wq, &wait);
+ write_unlock_irq(&ep->lock);
+ }
}
}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 4be91eb6ea5c..751479eb128f 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -36,9 +36,9 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
unsigned long text, lib, swap, anon, file, shmem;
unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
- anon = get_mm_counter(mm, MM_ANONPAGES);
- file = get_mm_counter(mm, MM_FILEPAGES);
- shmem = get_mm_counter(mm, MM_SHMEMPAGES);
+ anon = get_mm_counter_sum(mm, MM_ANONPAGES);
+ file = get_mm_counter_sum(mm, MM_FILEPAGES);
+ shmem = get_mm_counter_sum(mm, MM_SHMEMPAGES);
/*
* Note: to minimize their overhead, mm maintains hiwater_vm and
@@ -59,7 +59,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
text = min(text, mm->exec_vm << PAGE_SHIFT);
lib = (mm->exec_vm << PAGE_SHIFT) - text;
- swap = get_mm_counter(mm, MM_SWAPENTS);
+ swap = get_mm_counter_sum(mm, MM_SWAPENTS);
SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
@@ -92,12 +92,12 @@ unsigned long task_statm(struct mm_struct *mm,
unsigned long *shared, unsigned long *text,
unsigned long *data, unsigned long *resident)
{
- *shared = get_mm_counter(mm, MM_FILEPAGES) +
- get_mm_counter(mm, MM_SHMEMPAGES);
+ *shared = get_mm_counter_sum(mm, MM_FILEPAGES) +
+ get_mm_counter_sum(mm, MM_SHMEMPAGES);
*text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
>> PAGE_SHIFT;
*data = mm->data_vm + mm->stack_vm;
- *resident = *shared + get_mm_counter(mm, MM_ANONPAGES);
+ *resident = *shared + get_mm_counter_sum(mm, MM_ANONPAGES);
return mm->total_vm;
}
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index fafa86273f12..63d17cea2e95 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -8573,11 +8573,6 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work)
goto err_out;
}
- opinfo->op_state = OPLOCK_STATE_NONE;
- wake_up_interruptible_all(&opinfo->oplock_q);
- opinfo_put(opinfo);
- ksmbd_fd_put(work, fp);
-
rsp->StructureSize = cpu_to_le16(24);
rsp->OplockLevel = rsp_oplevel;
rsp->Reserved = 0;
@@ -8585,16 +8580,15 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work)
rsp->VolatileFid = volatile_id;
rsp->PersistentFid = persistent_id;
ret = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_oplock_break));
- if (!ret)
- return;
-
+ if (ret) {
err_out:
+ smb2_set_err_rsp(work);
+ }
+
opinfo->op_state = OPLOCK_STATE_NONE;
wake_up_interruptible_all(&opinfo->oplock_q);
-
opinfo_put(opinfo);
ksmbd_fd_put(work, fp);
- smb2_set_err_rsp(work);
}
static int check_lease_state(struct lease *lease, __le32 req_state)
@@ -8724,11 +8718,6 @@ static void smb21_lease_break_ack(struct ksmbd_work *work)
}
lease_state = lease->state;
- opinfo->op_state = OPLOCK_STATE_NONE;
- wake_up_interruptible_all(&opinfo->oplock_q);
- atomic_dec(&opinfo->breaking_cnt);
- wake_up_interruptible_all(&opinfo->oplock_brk);
- opinfo_put(opinfo);
rsp->StructureSize = cpu_to_le16(36);
rsp->Reserved = 0;
@@ -8737,16 +8726,16 @@ static void smb21_lease_break_ack(struct ksmbd_work *work)
rsp->LeaseState = lease_state;
rsp->LeaseDuration = 0;
ret = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_lease_ack));
- if (!ret)
- return;
-
+ if (ret) {
err_out:
+ smb2_set_err_rsp(work);
+ }
+
+ opinfo->op_state = OPLOCK_STATE_NONE;
wake_up_interruptible_all(&opinfo->oplock_q);
atomic_dec(&opinfo->breaking_cnt);
wake_up_interruptible_all(&opinfo->oplock_brk);
-
opinfo_put(opinfo);
- smb2_set_err_rsp(work);
}
/**
diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
index 64a428a06ace..c6cbe0d56e32 100644
--- a/fs/smb/server/transport_rdma.c
+++ b/fs/smb/server/transport_rdma.c
@@ -433,7 +433,8 @@ static void free_transport(struct smb_direct_transport *t)
if (t->qp) {
ib_drain_qp(t->qp);
ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs);
- ib_destroy_qp(t->qp);
+ t->qp = NULL;
+ rdma_destroy_qp(t->cm_id);
}
ksmbd_debug(RDMA, "drain the reassembly queue\n");
@@ -1940,8 +1941,8 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t,
return 0;
err:
if (t->qp) {
- ib_destroy_qp(t->qp);
t->qp = NULL;
+ rdma_destroy_qp(t->cm_id);
}
if (t->recv_cq) {
ib_destroy_cq(t->recv_cq);
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index 0f3aad12e495..d3437f6644e3 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -1282,6 +1282,7 @@ out1:
err = ksmbd_vfs_lock_parent(parent_path->dentry, path->dentry);
if (err) {
+ mnt_drop_write(parent_path->mnt);
path_put(path);
path_put(parent_path);
}
diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
index 5c3b2aa3e69d..d344d41e6cfe 100644
--- a/include/drm/drm_file.h
+++ b/include/drm/drm_file.h
@@ -300,6 +300,9 @@ struct drm_file {
*
* Mapping of mm object handles to object pointers. Used by the GEM
* subsystem. Protected by @table_lock.
+ *
+ * Note that allocated entries might be NULL as a transient state when
+ * creating or deleting a handle.
*/
struct idr object_idr;
diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h
index 668077009fce..38b24fc8978d 100644
--- a/include/drm/drm_framebuffer.h
+++ b/include/drm/drm_framebuffer.h
@@ -23,6 +23,7 @@
#ifndef __DRM_FRAMEBUFFER_H__
#define __DRM_FRAMEBUFFER_H__
+#include <linux/bits.h>
#include <linux/ctype.h>
#include <linux/list.h>
#include <linux/sched.h>
@@ -100,6 +101,8 @@ struct drm_framebuffer_funcs {
unsigned num_clips);
};
+#define DRM_FRAMEBUFFER_HAS_HANDLE_REF(_i) BIT(0u + (_i))
+
/**
* struct drm_framebuffer - frame buffer object
*
@@ -189,6 +192,10 @@ struct drm_framebuffer {
*/
int flags;
/**
+ * @internal_flags: Framebuffer flags like DRM_FRAMEBUFFER_HAS_HANDLE_REF.
+ */
+ unsigned int internal_flags;
+ /**
* @filp_head: Placed on &drm_file.fbs, protected by &drm_file.fbs_lock.
*/
struct list_head filp_head;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a59880c809c7..181a0deadc9e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -269,11 +269,16 @@ static inline dev_t disk_devt(struct gendisk *disk)
return MKDEV(disk->major, disk->first_minor);
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
* We should strive for 1 << (PAGE_SHIFT + MAX_PAGECACHE_ORDER)
* however we constrain this to what we can validate and test.
*/
#define BLK_MAX_BLOCK_SIZE SZ_64K
+#else
+#define BLK_MAX_BLOCK_SIZE PAGE_SIZE
+#endif
+
/* blk_validate_limits() validates bsize, so drivers don't usually need to */
static inline int blk_validate_block_size(unsigned long bsize)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 2922635986f5..a7efcec2e3d0 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -698,6 +698,8 @@ struct io_kiocb {
struct hlist_node hash_node;
/* For IOPOLL setup queues, with hybrid polling */
u64 iopoll_start;
+ /* for private io_kiocb freeing */
+ struct rcu_head rcu_head;
};
/* internal polling, see IORING_FEAT_FAST_POLL */
struct async_poll *apoll;
diff --git a/include/linux/irqchip/irq-msi-lib.h b/include/linux/irqchip/irq-msi-lib.h
index dd8d1d138544..224ac28e88d7 100644
--- a/include/linux/irqchip/irq-msi-lib.h
+++ b/include/linux/irqchip/irq-msi-lib.h
@@ -17,6 +17,7 @@
#define MATCH_PLATFORM_MSI BIT(DOMAIN_BUS_PLATFORM_MSI)
+struct msi_domain_info;
int msi_lib_irq_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec,
enum irq_domain_bus_token bus_token);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0ef2ba0c667a..fa538feaa8d9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2568,6 +2568,11 @@ static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
return percpu_counter_read_positive(&mm->rss_stat[member]);
}
+static inline unsigned long get_mm_counter_sum(struct mm_struct *mm, int member)
+{
+ return percpu_counter_sum_positive(&mm->rss_stat[member]);
+}
+
void mm_trace_rss_stat(struct mm_struct *mm, int member);
static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4f78a64beb52..aa9c5be7a632 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -548,10 +548,6 @@ struct sched_statistics {
u64 nr_failed_migrations_running;
u64 nr_failed_migrations_hot;
u64 nr_forced_migrations;
-#ifdef CONFIG_NUMA_BALANCING
- u64 numa_task_migrated;
- u64 numa_task_swapped;
-#endif
u64 nr_wakeups;
u64 nr_wakeups_sync;
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 91a3ce9a2687..9e15a088ba38 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -66,8 +66,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
NUMA_HINT_FAULTS,
NUMA_HINT_FAULTS_LOCAL,
NUMA_PAGE_MIGRATE,
- NUMA_TASK_MIGRATE,
- NUMA_TASK_SWAP,
#endif
#ifdef CONFIG_MIGRATION
PGMIGRATE_SUCCESS, PGMIGRATE_FAIL,
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 73648d26a622..5111ec040c53 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1666,12 +1666,11 @@ static void io_iopoll_req_issued(struct io_kiocb *req, unsigned int issue_flags)
io_req_flags_t io_file_get_flags(struct file *file)
{
- struct inode *inode = file_inode(file);
io_req_flags_t res = 0;
BUILD_BUG_ON(REQ_F_ISREG_BIT != REQ_F_SUPPORT_NOWAIT_BIT + 1);
- if (S_ISREG(inode->i_mode) && !(inode->i_flags & S_ANON_INODE))
+ if (S_ISREG(file_inode(file)->i_mode))
res |= REQ_F_ISREG;
if ((file->f_flags & O_NONBLOCK) || (file->f_mode & FMODE_NOWAIT))
res |= REQ_F_SUPPORT_NOWAIT;
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index 71400d6cefc8..4c2578f2efcb 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -82,7 +82,7 @@ static void io_msg_tw_complete(struct io_kiocb *req, io_tw_token_t tw)
spin_unlock(&ctx->msg_lock);
}
if (req)
- kmem_cache_free(req_cachep, req);
+ kfree_rcu(req, rcu_head);
percpu_ref_put(&ctx->refs);
}
@@ -90,7 +90,7 @@ static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req,
int res, u32 cflags, u64 user_data)
{
if (!READ_ONCE(ctx->submitter_task)) {
- kmem_cache_free(req_cachep, req);
+ kfree_rcu(req, rcu_head);
return -EOWNERDEAD;
}
req->opcode = IORING_OP_NOP;
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 085eeed8cd50..00d0064b22a5 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -863,10 +863,7 @@ static int io_pp_zc_init(struct page_pool *pp)
static void io_pp_zc_destroy(struct page_pool *pp)
{
struct io_zcrx_ifq *ifq = io_pp_to_ifq(pp);
- struct io_zcrx_area *area = ifq->area;
- if (WARN_ON_ONCE(area->free_count != area->nia.num_niovs))
- return;
percpu_ref_put(&ifq->ctx->refs);
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0db36b2b2448..22fdf0c187cd 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7204,18 +7204,18 @@ void perf_event_wakeup(struct perf_event *event)
static void perf_sigtrap(struct perf_event *event)
{
/*
- * We'd expect this to only occur if the irq_work is delayed and either
- * ctx->task or current has changed in the meantime. This can be the
- * case on architectures that do not implement arch_irq_work_raise().
+ * Both perf_pending_task() and perf_pending_irq() can race with the
+ * task exiting.
*/
- if (WARN_ON_ONCE(event->ctx->task != current))
+ if (current->flags & PF_EXITING)
return;
/*
- * Both perf_pending_task() and perf_pending_irq() can race with the
- * task exiting.
+ * We'd expect this to only occur if the irq_work is delayed and either
+ * ctx->task or current has changed in the meantime. This can be the
+ * case on architectures that do not implement arch_irq_work_raise().
*/
- if (current->flags & PF_EXITING)
+ if (WARN_ON_ONCE(event->ctx->task != current))
return;
send_sig_perf((void __user *)event->pending_addr,
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index bb608b68fb30..b4ca17c2fecf 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -384,6 +384,7 @@ static int suspend_prepare(suspend_state_t state)
return 0;
dpm_save_failed_step(SUSPEND_FREEZE);
+ filesystems_thaw();
pm_notifier_call_chain(PM_POST_SUSPEND);
Restore:
pm_restore_console();
@@ -540,7 +541,6 @@ int suspend_devices_and_enter(suspend_state_t state)
return error;
Recover_platform:
- pm_restore_gfp_mask();
platform_recover(state);
goto Resume_devices;
}
@@ -593,8 +593,6 @@ static int enter_state(suspend_state_t state)
ksys_sync_helper();
trace_suspend_resume(TPS("sync_filesystems"), 0, false);
}
- if (filesystem_freeze_enabled)
- filesystems_freeze();
pm_pr_dbg("Preparing system for sleep (%s)\n", mem_sleep_labels[state]);
pm_suspend_clear_flags();
@@ -614,7 +612,6 @@ static int enter_state(suspend_state_t state)
pm_pr_dbg("Finishing wakeup.\n");
suspend_finish();
Unlock:
- filesystems_thaw();
mutex_unlock(&system_transition_mutex);
return error;
}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ec68fc686bd7..81c6df746df1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3362,10 +3362,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
#ifdef CONFIG_NUMA_BALANCING
static void __migrate_swap_task(struct task_struct *p, int cpu)
{
- __schedstat_inc(p->stats.numa_task_swapped);
- count_vm_numa_event(NUMA_TASK_SWAP);
- count_memcg_event_mm(p->mm, NUMA_TASK_SWAP);
-
if (task_on_rq_queued(p)) {
struct rq *src_rq, *dst_rq;
struct rq_flags srf, drf;
@@ -7939,9 +7935,8 @@ int migrate_task_to(struct task_struct *p, int target_cpu)
if (!cpumask_test_cpu(target_cpu, p->cpus_ptr))
return -EINVAL;
- __schedstat_inc(p->stats.numa_task_migrated);
- count_vm_numa_event(NUMA_TASK_MIGRATE);
- count_memcg_event_mm(p->mm, NUMA_TASK_MIGRATE);
+ /* TODO: This is not properly updating schedstats */
+
trace_sched_move_numa(p, curr_cpu, target_cpu);
return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg);
}
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 9d71baf08075..557246880a7e 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -1210,10 +1210,6 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
P_SCHEDSTAT(nr_failed_migrations_running);
P_SCHEDSTAT(nr_failed_migrations_hot);
P_SCHEDSTAT(nr_forced_migrations);
-#ifdef CONFIG_NUMA_BALANCING
- P_SCHEDSTAT(numa_task_migrated);
- P_SCHEDSTAT(numa_task_swapped);
-#endif
P_SCHEDSTAT(nr_wakeups);
P_SCHEDSTAT(nr_wakeups_sync);
P_SCHEDSTAT(nr_wakeups_migrate);
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 424751cdf31f..40830a3ecd96 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -657,7 +657,7 @@ static int parse_btf_arg(char *varname,
ret = query_btf_context(ctx);
if (ret < 0 || ctx->nr_params == 0) {
trace_probe_log_err(ctx->offset, NO_BTF_ENTRY);
- return PTR_ERR(params);
+ return -ENOENT;
}
}
params = ctx->params;
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index 3a74d63a959e..0142bc916f73 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -135,6 +135,9 @@ size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sl
struct codetag_bytes n;
unsigned int i, nr = 0;
+ if (IS_ERR_OR_NULL(alloc_tag_cttype))
+ return 0;
+
if (can_sleep)
codetag_lock_module_list(alloc_tag_cttype, true);
else if (!codetag_trylock_module_list(alloc_tag_cttype))
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 00524e55a21e..ef66be963798 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -5319,6 +5319,7 @@ static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,
struct maple_enode *start;
if (mte_is_leaf(enode)) {
+ mte_set_node_dead(enode);
node->type = mte_node_type(enode);
goto free_leaf;
}
diff --git a/mm/damon/core.c b/mm/damon/core.c
index b217e0120e09..979b29e16ef4 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -1449,6 +1449,7 @@ static unsigned long damon_get_intervals_score(struct damon_ctx *c)
}
}
target_access_events = max_access_events * goal_bp / 10000;
+ target_access_events = target_access_events ? : 1;
return access_events * 10000 / target_access_events;
}
@@ -2355,9 +2356,8 @@ static void kdamond_usleep(unsigned long usecs)
*
* If there is a &struct damon_call_control request that registered via
* &damon_call() on @ctx, do or cancel the invocation of the function depending
- * on @cancel. @cancel is set when the kdamond is deactivated by DAMOS
- * watermarks, or the kdamond is already out of the main loop and therefore
- * will be terminated.
+ * on @cancel. @cancel is set when the kdamond is already out of the main loop
+ * and therefore will be terminated.
*/
static void kdamond_call(struct damon_ctx *ctx, bool cancel)
{
@@ -2405,7 +2405,7 @@ static int kdamond_wait_activation(struct damon_ctx *ctx)
if (ctx->callback.after_wmarks_check &&
ctx->callback.after_wmarks_check(ctx))
break;
- kdamond_call(ctx, true);
+ kdamond_call(ctx, false);
damos_walk_cancel(ctx);
}
return -EBUSY;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9dc95eac558c..a0d285d20992 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2340,12 +2340,15 @@ struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid,
struct folio *folio;
spin_lock_irq(&hugetlb_lock);
+ if (!h->resv_huge_pages) {
+ spin_unlock_irq(&hugetlb_lock);
+ return NULL;
+ }
+
folio = dequeue_hugetlb_folio_nodemask(h, gfp_mask, preferred_nid,
nmask);
- if (folio) {
- VM_BUG_ON(!h->resv_huge_pages);
+ if (folio)
h->resv_huge_pages--;
- }
spin_unlock_irq(&hugetlb_lock);
return folio;
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 8357e1a33699..b0877035491f 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -370,36 +370,6 @@ static inline bool init_task_stack_addr(const void *addr)
sizeof(init_thread_union.stack));
}
-/*
- * This function is invoked with report_lock (a raw_spinlock) held. A
- * PREEMPT_RT kernel cannot call find_vm_area() as it will acquire a sleeping
- * rt_spinlock.
- *
- * For !RT kernel, the PROVE_RAW_LOCK_NESTING config option will print a
- * lockdep warning for this raw_spinlock -> spinlock dependency. This config
- * option is enabled by default to ensure better test coverage to expose this
- * kind of RT kernel problem. This lockdep splat, however, can be suppressed
- * by using DEFINE_WAIT_OVERRIDE_MAP() if it serves a useful purpose and the
- * invalid PREEMPT_RT case has been taken care of.
- */
-static inline struct vm_struct *kasan_find_vm_area(void *addr)
-{
- static DEFINE_WAIT_OVERRIDE_MAP(vmalloc_map, LD_WAIT_SLEEP);
- struct vm_struct *va;
-
- if (IS_ENABLED(CONFIG_PREEMPT_RT))
- return NULL;
-
- /*
- * Suppress lockdep warning and fetch vmalloc area of the
- * offending address.
- */
- lock_map_acquire_try(&vmalloc_map);
- va = find_vm_area(addr);
- lock_map_release(&vmalloc_map);
- return va;
-}
-
static void print_address_description(void *addr, u8 tag,
struct kasan_report_info *info)
{
@@ -429,19 +399,8 @@ static void print_address_description(void *addr, u8 tag,
}
if (is_vmalloc_addr(addr)) {
- struct vm_struct *va = kasan_find_vm_area(addr);
-
- if (va) {
- pr_err("The buggy address belongs to the virtual mapping at\n"
- " [%px, %px) created by:\n"
- " %pS\n",
- va->addr, va->addr + va->size, va->caller);
- pr_err("\n");
-
- page = vmalloc_to_page(addr);
- } else {
- pr_err("The buggy address %px belongs to a vmalloc virtual mapping\n", addr);
- }
+ pr_err("The buggy address %px belongs to a vmalloc virtual mapping\n", addr);
+ page = vmalloc_to_page(addr);
}
if (page) {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 902da8a9c643..70fdeda1120b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -474,8 +474,6 @@ static const unsigned int memcg_vm_event_stat[] = {
NUMA_PAGE_MIGRATE,
NUMA_PTE_UPDATES,
NUMA_HINT_FAULTS,
- NUMA_TASK_MIGRATE,
- NUMA_TASK_SWAP,
#endif
};
diff --git a/mm/migrate.c b/mm/migrate.c
index 8cf0f9c9599d..2c88f3b33833 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2399,6 +2399,7 @@ set_status:
static int get_compat_pages_array(const void __user *chunk_pages[],
const void __user * __user *pages,
+ unsigned long chunk_offset,
unsigned long chunk_nr)
{
compat_uptr_t __user *pages32 = (compat_uptr_t __user *)pages;
@@ -2406,7 +2407,7 @@ static int get_compat_pages_array(const void __user *chunk_pages[],
int i;
for (i = 0; i < chunk_nr; i++) {
- if (get_user(p, pages32 + i))
+ if (get_user(p, pages32 + chunk_offset + i))
return -EFAULT;
chunk_pages[i] = compat_ptr(p);
}
@@ -2425,27 +2426,28 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
#define DO_PAGES_STAT_CHUNK_NR 16UL
const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
int chunk_status[DO_PAGES_STAT_CHUNK_NR];
+ unsigned long chunk_offset = 0;
while (nr_pages) {
unsigned long chunk_nr = min(nr_pages, DO_PAGES_STAT_CHUNK_NR);
if (in_compat_syscall()) {
if (get_compat_pages_array(chunk_pages, pages,
- chunk_nr))
+ chunk_offset, chunk_nr))
break;
} else {
- if (copy_from_user(chunk_pages, pages,
+ if (copy_from_user(chunk_pages, pages + chunk_offset,
chunk_nr * sizeof(*chunk_pages)))
break;
}
do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
- if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status)))
+ if (copy_to_user(status + chunk_offset, chunk_status,
+ chunk_nr * sizeof(*status)))
break;
- pages += chunk_nr;
- status += chunk_nr;
+ chunk_offset += chunk_nr;
nr_pages -= chunk_nr;
}
return nr_pages ? -EFAULT : 0;
diff --git a/mm/rmap.c b/mm/rmap.c
index fb63d9256f09..1320b88fab74 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1845,23 +1845,32 @@ void folio_remove_rmap_pud(struct folio *folio, struct page *page,
#endif
}
-/* We support batch unmapping of PTEs for lazyfree large folios */
-static inline bool can_batch_unmap_folio_ptes(unsigned long addr,
- struct folio *folio, pte_t *ptep)
+static inline unsigned int folio_unmap_pte_batch(struct folio *folio,
+ struct page_vma_mapped_walk *pvmw,
+ enum ttu_flags flags, pte_t pte)
{
const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
- int max_nr = folio_nr_pages(folio);
- pte_t pte = ptep_get(ptep);
+ unsigned long end_addr, addr = pvmw->address;
+ struct vm_area_struct *vma = pvmw->vma;
+ unsigned int max_nr;
+
+ if (flags & TTU_HWPOISON)
+ return 1;
+ if (!folio_test_large(folio))
+ return 1;
+ /* We may only batch within a single VMA and a single page table. */
+ end_addr = pmd_addr_end(addr, vma->vm_end);
+ max_nr = (end_addr - addr) >> PAGE_SHIFT;
+
+ /* We only support lazyfree batching for now ... */
if (!folio_test_anon(folio) || folio_test_swapbacked(folio))
- return false;
+ return 1;
if (pte_unused(pte))
- return false;
- if (pte_pfn(pte) != folio_pfn(folio))
- return false;
+ return 1;
- return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL,
- NULL, NULL) == max_nr;
+ return folio_pte_batch(folio, addr, pvmw->pte, pte, max_nr, fpb_flags,
+ NULL, NULL, NULL);
}
/*
@@ -2024,9 +2033,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
if (pte_dirty(pteval))
folio_mark_dirty(folio);
} else if (likely(pte_present(pteval))) {
- if (folio_test_large(folio) && !(flags & TTU_HWPOISON) &&
- can_batch_unmap_folio_ptes(address, folio, pvmw.pte))
- nr_pages = folio_nr_pages(folio);
+ nr_pages = folio_unmap_pte_batch(folio, &pvmw, flags, pteval);
end_addr = address + nr_pages * PAGE_SIZE;
flush_cache_range(vma, address, end_addr);
@@ -2206,13 +2213,16 @@ discard:
hugetlb_remove_rmap(folio);
} else {
folio_remove_rmap_ptes(folio, subpage, nr_pages, vma);
- folio_ref_sub(folio, nr_pages - 1);
}
if (vma->vm_flags & VM_LOCKED)
mlock_drain_local();
- folio_put(folio);
- /* We have already batched the entire folio */
- if (nr_pages > 1)
+ folio_put_refs(folio, nr_pages);
+
+ /*
+ * If we are sure that we batched the entire folio and cleared
+ * all PTEs, we can just optimize and stop right here.
+ */
+ if (nr_pages == folio_nr_pages(folio))
goto walk_done;
continue;
walk_abort:
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ab986dd09b6a..6dbcdceecae1 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -514,6 +514,7 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr,
pgtbl_mod_mask *mask)
{
+ int err = 0;
pte_t *pte;
/*
@@ -530,12 +531,18 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr,
do {
struct page *page = pages[*nr];
- if (WARN_ON(!pte_none(ptep_get(pte))))
- return -EBUSY;
- if (WARN_ON(!page))
- return -ENOMEM;
- if (WARN_ON(!pfn_valid(page_to_pfn(page))))
- return -EINVAL;
+ if (WARN_ON(!pte_none(ptep_get(pte)))) {
+ err = -EBUSY;
+ break;
+ }
+ if (WARN_ON(!page)) {
+ err = -ENOMEM;
+ break;
+ }
+ if (WARN_ON(!pfn_valid(page_to_pfn(page)))) {
+ err = -EINVAL;
+ break;
+ }
set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
(*nr)++;
@@ -543,7 +550,8 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr,
arch_leave_lazy_mmu_mode();
*mask |= PGTBL_PTE_MODIFIED;
- return 0;
+
+ return err;
}
static int vmap_pages_pmd_range(pud_t *pud, unsigned long addr,
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 429ae5339bfe..a78d70ddeacd 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1346,8 +1346,6 @@ const char * const vmstat_text[] = {
"numa_hint_faults",
"numa_hint_faults_local",
"numa_pages_migrated",
- "numa_task_migrated",
- "numa_task_swapped",
#endif
#ifdef CONFIG_MIGRATION
"pgmigrate_success",
diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs
index 624d7a4c83ea..14c1aa402951 100644
--- a/rust/kernel/drm/device.rs
+++ b/rust/kernel/drm/device.rs
@@ -66,7 +66,7 @@ impl<T: drm::Driver> Device<T> {
open: Some(drm::File::<T::File>::open_callback),
postclose: Some(drm::File::<T::File>::postclose_callback),
unload: None,
- release: None,
+ release: Some(Self::release),
master_set: None,
master_drop: None,
debugfs_init: None,
@@ -162,6 +162,16 @@ impl<T: drm::Driver> Device<T> {
// SAFETY: `ptr` is valid by the safety requirements of this function.
unsafe { &*ptr.cast() }
}
+
+ extern "C" fn release(ptr: *mut bindings::drm_device) {
+ // SAFETY: `ptr` is a valid pointer to a `struct drm_device` and embedded in `Self`.
+ let this = unsafe { Self::from_drm_device(ptr) };
+
+ // SAFETY:
+ // - When `release` runs it is guaranteed that there is no further access to `this`.
+ // - `this` is valid for dropping.
+ unsafe { core::ptr::drop_in_place(this) };
+ }
}
impl<T: drm::Driver> Deref for Device<T> {
diff --git a/rust/kernel/drm/driver.rs b/rust/kernel/drm/driver.rs
index acb638086131..af93d46d03d3 100644
--- a/rust/kernel/drm/driver.rs
+++ b/rust/kernel/drm/driver.rs
@@ -10,7 +10,6 @@ use crate::{
drm,
error::{to_result, Result},
prelude::*,
- str::CStr,
types::ARef,
};
use macros::vtable;
diff --git a/samples/damon/mtier.c b/samples/damon/mtier.c
index 36d2cd933f5a..c94254b77fc9 100644
--- a/samples/damon/mtier.c
+++ b/samples/damon/mtier.c
@@ -164,8 +164,12 @@ static int damon_sample_mtier_enable_store(
if (enable == enabled)
return 0;
- if (enable)
- return damon_sample_mtier_start();
+ if (enable) {
+ err = damon_sample_mtier_start();
+ if (err)
+ enable = false;
+ return err;
+ }
damon_sample_mtier_stop();
return 0;
}
diff --git a/samples/damon/prcl.c b/samples/damon/prcl.c
index 056b1b21a0fe..5597e6a08ab2 100644
--- a/samples/damon/prcl.c
+++ b/samples/damon/prcl.c
@@ -122,8 +122,12 @@ static int damon_sample_prcl_enable_store(
if (enable == enabled)
return 0;
- if (enable)
- return damon_sample_prcl_start();
+ if (enable) {
+ err = damon_sample_prcl_start();
+ if (err)
+ enable = false;
+ return err;
+ }
damon_sample_prcl_stop();
return 0;
}
diff --git a/samples/damon/wsse.c b/samples/damon/wsse.c
index 11be25803274..e20238a249e7 100644
--- a/samples/damon/wsse.c
+++ b/samples/damon/wsse.c
@@ -102,8 +102,12 @@ static int damon_sample_wsse_enable_store(
if (enable == enabled)
return 0;
- if (enable)
- return damon_sample_wsse_start();
+ if (enable) {
+ err = damon_sample_wsse_start();
+ if (err)
+ enable = false;
+ return err;
+ }
damon_sample_wsse_stop();
return 0;
}
diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in
index fd6bd69c5096..f795302ddfa8 100644
--- a/scripts/gdb/linux/constants.py.in
+++ b/scripts/gdb/linux/constants.py.in
@@ -20,6 +20,7 @@
#include <linux/of_fdt.h>
#include <linux/page_ext.h>
#include <linux/radix-tree.h>
+#include <linux/maple_tree.h>
#include <linux/slab.h>
#include <linux/threads.h>
#include <linux/vmalloc.h>
@@ -93,6 +94,12 @@ LX_GDBPARSED(RADIX_TREE_MAP_SIZE)
LX_GDBPARSED(RADIX_TREE_MAP_SHIFT)
LX_GDBPARSED(RADIX_TREE_MAP_MASK)
+/* linux/maple_tree.h */
+LX_VALUE(MAPLE_NODE_SLOTS)
+LX_VALUE(MAPLE_RANGE64_SLOTS)
+LX_VALUE(MAPLE_ARANGE64_SLOTS)
+LX_GDBPARSED(MAPLE_NODE_MASK)
+
/* linux/vmalloc.h */
LX_VALUE(VM_IOREMAP)
LX_VALUE(VM_ALLOC)
diff --git a/scripts/gdb/linux/interrupts.py b/scripts/gdb/linux/interrupts.py
index 616a5f26377a..f4f715a8f0e3 100644
--- a/scripts/gdb/linux/interrupts.py
+++ b/scripts/gdb/linux/interrupts.py
@@ -7,7 +7,7 @@ import gdb
from linux import constants
from linux import cpus
from linux import utils
-from linux import radixtree
+from linux import mapletree
irq_desc_type = utils.CachedType("struct irq_desc")
@@ -23,12 +23,12 @@ def irqd_is_level(desc):
def show_irq_desc(prec, irq):
text = ""
- desc = radixtree.lookup(gdb.parse_and_eval("&irq_desc_tree"), irq)
+ desc = mapletree.mtree_load(gdb.parse_and_eval("&sparse_irqs"), irq)
if desc is None:
return text
- desc = desc.cast(irq_desc_type.get_type())
- if desc is None:
+ desc = desc.cast(irq_desc_type.get_type().pointer())
+ if desc == 0:
return text
if irq_settings_is_hidden(desc):
@@ -110,7 +110,7 @@ def x86_show_mce(prec, var, pfx, desc):
pvar = gdb.parse_and_eval(var)
text = "%*s: " % (prec, pfx)
for cpu in cpus.each_online_cpu():
- text += "%10u " % (cpus.per_cpu(pvar, cpu))
+ text += "%10u " % (cpus.per_cpu(pvar, cpu).dereference())
text += " %s\n" % (desc)
return text
@@ -142,7 +142,7 @@ def x86_show_interupts(prec):
if constants.LX_CONFIG_X86_MCE:
text += x86_show_mce(prec, "&mce_exception_count", "MCE", "Machine check exceptions")
- text == x86_show_mce(prec, "&mce_poll_count", "MCP", "Machine check polls")
+ text += x86_show_mce(prec, "&mce_poll_count", "MCP", "Machine check polls")
text += show_irq_err_count(prec)
@@ -221,8 +221,8 @@ class LxInterruptList(gdb.Command):
gdb.write("CPU%-8d" % cpu)
gdb.write("\n")
- if utils.gdb_eval_or_none("&irq_desc_tree") is None:
- return
+ if utils.gdb_eval_or_none("&sparse_irqs") is None:
+ raise gdb.GdbError("Unable to find the sparse IRQ tree, is CONFIG_SPARSE_IRQ enabled?")
for irq in range(nr_irqs):
gdb.write(show_irq_desc(prec, irq))
diff --git a/scripts/gdb/linux/mapletree.py b/scripts/gdb/linux/mapletree.py
new file mode 100644
index 000000000000..d52d51c0a03f
--- /dev/null
+++ b/scripts/gdb/linux/mapletree.py
@@ -0,0 +1,252 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Maple tree helpers
+#
+# Copyright (c) 2025 Broadcom
+#
+# Authors:
+# Florian Fainelli <florian.fainelli@broadcom.com>
+
+import gdb
+
+from linux import utils
+from linux import constants
+from linux import xarray
+
+maple_tree_root_type = utils.CachedType("struct maple_tree")
+maple_node_type = utils.CachedType("struct maple_node")
+maple_enode_type = utils.CachedType("void")
+
+maple_dense = 0
+maple_leaf_64 = 1
+maple_range_64 = 2
+maple_arange_64 = 3
+
+class Mas(object):
+ ma_active = 0
+ ma_start = 1
+ ma_root = 2
+ ma_none = 3
+ ma_pause = 4
+ ma_overflow = 5
+ ma_underflow = 6
+ ma_error = 7
+
+ def __init__(self, mt, first, end):
+ if mt.type == maple_tree_root_type.get_type().pointer():
+ self.tree = mt.dereference()
+ elif mt.type != maple_tree_root_type.get_type():
+ raise gdb.GdbError("must be {} not {}"
+ .format(maple_tree_root_type.get_type().pointer(), mt.type))
+ self.tree = mt
+ self.index = first
+ self.last = end
+ self.node = None
+ self.status = self.ma_start
+ self.min = 0
+ self.max = -1
+
+ def is_start(self):
+ # mas_is_start()
+ return self.status == self.ma_start
+
+ def is_ptr(self):
+ # mas_is_ptr()
+ return self.status == self.ma_root
+
+ def is_none(self):
+ # mas_is_none()
+ return self.status == self.ma_none
+
+ def root(self):
+ # mas_root()
+ return self.tree['ma_root'].cast(maple_enode_type.get_type().pointer())
+
+ def start(self):
+ # mas_start()
+ if self.is_start() is False:
+ return None
+
+ self.min = 0
+ self.max = ~0
+
+ while True:
+ self.depth = 0
+ root = self.root()
+ if xarray.xa_is_node(root):
+ self.depth = 0
+ self.status = self.ma_active
+ self.node = mte_safe_root(root)
+ self.offset = 0
+ if mte_dead_node(self.node) is True:
+ continue
+
+ return None
+
+ self.node = None
+ # Empty tree
+ if root is None:
+ self.status = self.ma_none
+ self.offset = constants.LX_MAPLE_NODE_SLOTS
+ return None
+
+ # Single entry tree
+ self.status = self.ma_root
+ self.offset = constants.LX_MAPLE_NODE_SLOTS
+
+ if self.index != 0:
+ return None
+
+ return root
+
+ return None
+
+ def reset(self):
+ # mas_reset()
+ self.status = self.ma_start
+ self.node = None
+
+def mte_safe_root(node):
+ if node.type != maple_enode_type.get_type().pointer():
+ raise gdb.GdbError("{} must be {} not {}"
+ .format(mte_safe_root.__name__, maple_enode_type.get_type().pointer(), node.type))
+ ulong_type = utils.get_ulong_type()
+ indirect_ptr = node.cast(ulong_type) & ~0x2
+ val = indirect_ptr.cast(maple_enode_type.get_type().pointer())
+ return val
+
+def mte_node_type(entry):
+ ulong_type = utils.get_ulong_type()
+ val = None
+ if entry.type == maple_enode_type.get_type().pointer():
+ val = entry.cast(ulong_type)
+ elif entry.type == ulong_type:
+ val = entry
+ else:
+ raise gdb.GdbError("{} must be {} not {}"
+ .format(mte_node_type.__name__, maple_enode_type.get_type().pointer(), entry.type))
+ return (val >> 0x3) & 0xf
+
+def ma_dead_node(node):
+ if node.type != maple_node_type.get_type().pointer():
+ raise gdb.GdbError("{} must be {} not {}"
+ .format(ma_dead_node.__name__, maple_node_type.get_type().pointer(), node.type))
+ ulong_type = utils.get_ulong_type()
+ parent = node['parent']
+ indirect_ptr = node['parent'].cast(ulong_type) & ~constants.LX_MAPLE_NODE_MASK
+ return indirect_ptr == node
+
+def mte_to_node(enode):
+ ulong_type = utils.get_ulong_type()
+ if enode.type == maple_enode_type.get_type().pointer():
+ indirect_ptr = enode.cast(ulong_type)
+ elif enode.type == ulong_type:
+ indirect_ptr = enode
+ else:
+ raise gdb.GdbError("{} must be {} not {}"
+ .format(mte_to_node.__name__, maple_enode_type.get_type().pointer(), enode.type))
+ indirect_ptr = indirect_ptr & ~constants.LX_MAPLE_NODE_MASK
+ return indirect_ptr.cast(maple_node_type.get_type().pointer())
+
+def mte_dead_node(enode):
+ if enode.type != maple_enode_type.get_type().pointer():
+ raise gdb.GdbError("{} must be {} not {}"
+ .format(mte_dead_node.__name__, maple_enode_type.get_type().pointer(), enode.type))
+ node = mte_to_node(enode)
+ return ma_dead_node(node)
+
+def ma_is_leaf(tp):
+ result = tp < maple_range_64
+ return tp < maple_range_64
+
+def mt_pivots(t):
+ if t == maple_dense:
+ return 0
+ elif t == maple_leaf_64 or t == maple_range_64:
+ return constants.LX_MAPLE_RANGE64_SLOTS - 1
+ elif t == maple_arange_64:
+ return constants.LX_MAPLE_ARANGE64_SLOTS - 1
+
+def ma_pivots(node, t):
+ if node.type != maple_node_type.get_type().pointer():
+ raise gdb.GdbError("{}: must be {} not {}"
+ .format(ma_pivots.__name__, maple_node_type.get_type().pointer(), node.type))
+ if t == maple_arange_64:
+ return node['ma64']['pivot']
+ elif t == maple_leaf_64 or t == maple_range_64:
+ return node['mr64']['pivot']
+ else:
+ return None
+
+def ma_slots(node, tp):
+ if node.type != maple_node_type.get_type().pointer():
+ raise gdb.GdbError("{}: must be {} not {}"
+ .format(ma_slots.__name__, maple_node_type.get_type().pointer(), node.type))
+ if tp == maple_arange_64:
+ return node['ma64']['slot']
+ elif tp == maple_range_64 or tp == maple_leaf_64:
+ return node['mr64']['slot']
+ elif tp == maple_dense:
+ return node['slot']
+ else:
+ return None
+
+def mt_slot(mt, slots, offset):
+ ulong_type = utils.get_ulong_type()
+ return slots[offset].cast(ulong_type)
+
+def mtree_lookup_walk(mas):
+ ulong_type = utils.get_ulong_type()
+ n = mas.node
+
+ while True:
+ node = mte_to_node(n)
+ tp = mte_node_type(n)
+ pivots = ma_pivots(node, tp)
+ end = mt_pivots(tp)
+ offset = 0
+ while True:
+ if pivots[offset] >= mas.index:
+ break
+ if offset >= end:
+ break
+ offset += 1
+
+ slots = ma_slots(node, tp)
+ n = mt_slot(mas.tree, slots, offset)
+ if ma_dead_node(node) is True:
+ mas.reset()
+ return None
+ break
+
+ if ma_is_leaf(tp) is True:
+ break
+
+ return n
+
+def mtree_load(mt, index):
+ ulong_type = utils.get_ulong_type()
+ # MT_STATE(...)
+ mas = Mas(mt, index, index)
+ entry = None
+
+ while True:
+ entry = mas.start()
+ if mas.is_none():
+ return None
+
+ if mas.is_ptr():
+ if index != 0:
+ entry = None
+ return entry
+
+ entry = mtree_lookup_walk(mas)
+ if entry is None and mas.is_start():
+ continue
+ else:
+ break
+
+ if xarray.xa_is_zero(entry):
+ return None
+
+ return entry
diff --git a/scripts/gdb/linux/vfs.py b/scripts/gdb/linux/vfs.py
index b5fbb18ccb77..9e921b645a68 100644
--- a/scripts/gdb/linux/vfs.py
+++ b/scripts/gdb/linux/vfs.py
@@ -22,7 +22,7 @@ def dentry_name(d):
if parent == d or parent == 0:
return ""
p = dentry_name(d['d_parent']) + "/"
- return p + d['d_shortname']['string'].string()
+ return p + d['d_name']['name'].string()
class DentryName(gdb.Function):
"""Return string of the full path of a dentry.
diff --git a/scripts/gdb/linux/xarray.py b/scripts/gdb/linux/xarray.py
new file mode 100644
index 000000000000..f4477b5def75
--- /dev/null
+++ b/scripts/gdb/linux/xarray.py
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Xarray helpers
+#
+# Copyright (c) 2025 Broadcom
+#
+# Authors:
+# Florian Fainelli <florian.fainelli@broadcom.com>
+
+import gdb
+
+from linux import utils
+from linux import constants
+
+def xa_is_internal(entry):
+ ulong_type = utils.get_ulong_type()
+ return ((entry.cast(ulong_type) & 3) == 2)
+
+def xa_mk_internal(v):
+ return ((v << 2) | 2)
+
+def xa_is_zero(entry):
+ ulong_type = utils.get_ulong_type()
+ return entry.cast(ulong_type) == xa_mk_internal(257)
+
+def xa_is_node(entry):
+ ulong_type = utils.get_ulong_type()
+ return xa_is_internal(entry) and (entry.cast(ulong_type) > 4096)
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index b7dded3c8113..5cfb5d74dd5f 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -628,6 +628,7 @@
#define MSR_AMD64_OSVW_STATUS 0xc0010141
#define MSR_AMD_PPIN_CTL 0xc00102f0
#define MSR_AMD_PPIN 0xc00102f1
+#define MSR_AMD64_CPUID_FN_7 0xc0011002
#define MSR_AMD64_CPUID_FN_1 0xc0011004
#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
diff --git a/tools/include/linux/kallsyms.h b/tools/include/linux/kallsyms.h
index 5a37ccbec54f..f61a01dd7eb7 100644
--- a/tools/include/linux/kallsyms.h
+++ b/tools/include/linux/kallsyms.h
@@ -18,6 +18,7 @@ static inline const char *kallsyms_lookup(unsigned long addr,
return NULL;
}
+#ifdef HAVE_BACKTRACE_SUPPORT
#include <execinfo.h>
#include <stdlib.h>
static inline void print_ip_sym(const char *loglvl, unsigned long ip)
@@ -30,5 +31,8 @@ static inline void print_ip_sym(const char *loglvl, unsigned long ip)
free(name);
}
+#else
+static inline void print_ip_sym(const char *loglvl, unsigned long ip) {}
+#endif
#endif
diff --git a/tools/testing/selftests/hid/tests/test_mouse.py b/tools/testing/selftests/hid/tests/test_mouse.py
index 66daf7e5975c..eb4e15a0e53b 100644
--- a/tools/testing/selftests/hid/tests/test_mouse.py
+++ b/tools/testing/selftests/hid/tests/test_mouse.py
@@ -439,6 +439,68 @@ class BadResolutionMultiplierMouse(ResolutionMultiplierMouse):
return 32 # EPIPE
+class BadReportDescriptorMouse(BaseMouse):
+ """
+ This "device" was one autogenerated by syzbot. There are a lot of issues in
+ it, and the most problematic is that it declares features that have no
+ size.
+
+ This leads to report->size being set to 0 and can mess up with usbhid
+ internals. Fortunately, uhid merely passes the incoming buffer, without
+ touching it so a buffer of size 0 will be translated to [] without
+ triggering a kernel oops.
+
+ Because the report descriptor is wrong, no input are created, and we need
+ to tweak a little bit the parameters to make it look correct.
+ """
+
+ # fmt: off
+ report_descriptor = [
+ 0x96, 0x01, 0x00, # Report Count (1) 0
+ 0x06, 0x01, 0x00, # Usage Page (Generic Desktop) 3
+ # 0x03, 0x00, 0x00, 0x00, 0x00, # Ignored by the kernel somehow
+ 0x2a, 0x90, 0xa0, # Usage Maximum (41104) 6
+ 0x27, 0x00, 0x00, 0x00, 0x00, # Logical Maximum (0) 9
+ 0xb3, 0x81, 0x3e, 0x25, 0x03, # Feature (Cnst,Arr,Abs,Vol) 14
+ 0x1b, 0xdd, 0xe8, 0x40, 0x50, # Usage Minimum (1346431197) 19
+ 0x3b, 0x5d, 0x8c, 0x3d, 0xda, # Designator Index 24
+ ]
+ # fmt: on
+
+ def __init__(
+ self, rdesc=report_descriptor, name=None, input_info=(3, 0x045E, 0x07DA)
+ ):
+ super().__init__(rdesc, name, input_info)
+ self.high_resolution_report_called = False
+
+ def get_evdev(self, application=None):
+ assert self._input_nodes is None
+ return (
+ "Ok" # should be a list or None, but both would fail, so abusing the system
+ )
+
+ def next_sync_events(self, application=None):
+ # there are no evdev nodes, so no events
+ return []
+
+ def is_ready(self):
+ # we wait for the SET_REPORT command to come
+ return self.high_resolution_report_called
+
+ def set_report(self, req, rnum, rtype, data):
+ if rtype != self.UHID_FEATURE_REPORT:
+ raise InvalidHIDCommunication(f"Unexpected report type: {rtype}")
+ if rnum != 0x0:
+ raise InvalidHIDCommunication(f"Unexpected report number: {rnum}")
+
+ if len(data) != 1:
+ raise InvalidHIDCommunication(f"Unexpected data: {data}, expected '[0]'")
+
+ self.high_resolution_report_called = True
+
+ return 0
+
+
class ResolutionMultiplierHWheelMouse(TwoWheelMouse):
# fmt: off
report_descriptor = [
@@ -975,3 +1037,11 @@ class TestMiMouse(TestWheelMouse):
# assert below print out the real error
pass
assert remaining == []
+
+
+class TestBadReportDescriptorMouse(base.BaseTestCase.TestUhid):
+ def create_device(self):
+ return BadReportDescriptorMouse()
+
+ def assertName(self, uhdev):
+ pass