35 files changed, 948 insertions, 123 deletions
diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block
index 4ba771b56b3b..803f578dc023 100644
--- a/Documentation/ABI/stable/sysfs-block
+++ b/Documentation/ABI/stable/sysfs-block
@@ -778,6 +778,39 @@ Description:
 		0, write zeroes is not supported by the device.
 
 
+What:		/sys/block/<disk>/queue/write_zeroes_unmap_max_hw_bytes
+Date:		January 2025
+Contact:	Zhang Yi <yi.zhang@huawei.com>
+Description:
+		[RO] This file indicates whether a device supports zeroing data
+		in a specified block range without incurring the cost of
+		physically writing zeroes to the media for each individual
+		block. If this parameter is set to write_zeroes_max_bytes, the
+		device implements a zeroing operation which opportunistically
+		avoids writing zeroes to media while still guaranteeing that
+		subsequent reads from the specified block range will return
+		zeroed data. This operation is a best-effort optimization, a
+		device may fall back to physically writing zeroes to the media
+		due to other factors such as misalignment or being asked to
+		clear a block range smaller than the device's internal
+		allocation unit. If this parameter is set to 0, the device may
+		have to write each logical block media during a zeroing
+		operation.
+
+
+What:		/sys/block/<disk>/queue/write_zeroes_unmap_max_bytes
+Date:		January 2025
+Contact:	Zhang Yi <yi.zhang@huawei.com>
+Description:
+		[RW] While write_zeroes_unmap_max_hw_bytes is the hardware limit
+		for the device, this setting is the software limit. Since the
+		unmap write zeroes operation is a best-effort optimization, some
+		devices may still physically writing zeroes to media. So the
+		speed of this operation is not guaranteed. Writing a value of
+		'0' to this file disables this operation. Otherwise, this
+		parameter should be equal to write_zeroes_unmap_max_hw_bytes.
+
+
 What:		/sys/block/<disk>/queue/zone_append_max_bytes
 Date:		May 2020
 Contact:	linux-block@vger.kernel.org
diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power
index 54195530e97a..d3da88b26a53 100644
--- a/Documentation/ABI/testing/sysfs-devices-power
+++ b/Documentation/ABI/testing/sysfs-devices-power
@@ -56,7 +56,7 @@ Date:		January 2009
 Contact:	Rafael J. Wysocki <rjw@rjwysocki.net>
 Description:
 		The /sys/devices/.../async attribute allows the user space to
-		enable or diasble the device's suspend and resume callbacks to
+		enable or disable the device's suspend and resume callbacks to
 		be executed asynchronously (ie. in separate threads, in parallel
 		with the main suspend/resume thread) during system-wide power
 		transitions (eg. suspend to RAM, hibernation).
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index bf85f4de6862..ab8cd337f43a 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -584,6 +584,7 @@ What:		/sys/devices/system/cpu/vulnerabilities
 		/sys/devices/system/cpu/vulnerabilities/spectre_v1
 		/sys/devices/system/cpu/vulnerabilities/spectre_v2
 		/sys/devices/system/cpu/vulnerabilities/srbds
+		/sys/devices/system/cpu/vulnerabilities/tsa
 		/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
 Date:		January 2018
 Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
diff --git a/Documentation/ABI/testing/sysfs-driver-ufs b/Documentation/ABI/testing/sysfs-driver-ufs
index d4140dc6c5ba..615453fcc9ff 100644
--- a/Documentation/ABI/testing/sysfs-driver-ufs
+++ b/Documentation/ABI/testing/sysfs-driver-ufs
@@ -711,7 +711,7 @@ Description:	This file shows the thin provisioning type. This is one of
 
 		The file is read only.
 
-What:		/sys/class/scsi_device/*/device/unit_descriptor/physical_memory_resourse_count
+What:		/sys/class/scsi_device/*/device/unit_descriptor/physical_memory_resource_count
 Date:		February 2018
 Contact:	Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
 Description:	This file shows the total physical memory resources. This is
diff --git a/Documentation/ABI/testing/sysfs-fs-erofs b/Documentation/ABI/testing/sysfs-fs-erofs
index bf3b6299c15e..76d9808ed581 100644
--- a/Documentation/ABI/testing/sysfs-fs-erofs
+++ b/Documentation/ABI/testing/sysfs-fs-erofs
@@ -5,7 +5,7 @@ Description:	Shows all enabled kernel features.
 		Supported features:
 		zero_padding, compr_cfgs, big_pcluster, chunked_file,
 		device_table, compr_head2, sb_chksum, ztailpacking,
-		dedupe, fragments.
+		dedupe, fragments, 48bit, metabox.
 
 What:		/sys/fs/erofs/<disk>/sync_decompress
 Date:		November 2021
@@ -35,3 +35,11 @@ Description:	Used to set or show hardware accelerators in effect
 		and multiple accelerators are separated by '\n'.
 		Supported accelerator(s): qat_deflate.
 		Disable all accelerators with an empty string (echo > accel).
+
+What:		/sys/fs/erofs/<disk>/dir_ra_bytes
+Date:		July 2025
+Contact:	"Chao Yu" <chao@kernel.org>
+Description:	Used to set or show readahead bytes during readdir(), by
+		default the value is 16384.
+
+		- 0: disable readahead.
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 0cc35a14afbe..bd98ea3175ec 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1732,12 +1732,6 @@ The following nested keys are defined.
 	  numa_hint_faults (npn)
 		Number of NUMA hinting faults.
 
-	  numa_task_migrated (npn)
-		Number of task migration by NUMA balancing.
-
-	  numa_task_swapped (npn)
-		Number of task swap by NUMA balancing.
-
 	  pgdemote_kswapd
 		Number of pages demoted by kswapd.
 
diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
index 1302fd1b55e8..6dba18dbb9ab 100644
--- a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
+++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
@@ -157,9 +157,7 @@ This is achieved by using the otherwise unused and obsolete VERW instruction in
 combination with a microcode update. The microcode clears the affected CPU
 buffers when the VERW instruction is executed.
 
-Kernel reuses the MDS function to invoke the buffer clearing:
-
-	mds_clear_cpu_buffers()
+Kernel does the buffer clearing with x86_clear_cpu_buffers().
 
 On MDS affected CPUs, the kernel already invokes CPU buffer clear on
 kernel/userspace, hypervisor/guest and C-state (idle) transitions. No
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f1f2c0874da9..07e22ba5bfe3 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -7488,6 +7488,19 @@
 			having this key zero'ed is acceptable. E.g. in testing
 			scenarios.
 
+	tsa=		[X86] Control mitigation for Transient Scheduler
+			Attacks on AMD CPUs. Search the following in your
+			favourite search engine for more details:
+
+			"Technical guidance for mitigating transient scheduler
+			attacks".
+
+			off		- disable the mitigation
+			on		- enable the mitigation (default)
+			user		- mitigate only user/kernel transitions
+			vm		- mitigate only guest/host transitions
+
+
 	tsc=		Disable clocksource stability checks for TSC.
 			Format: <string>
 			[x86] reliable: mark tsc clocksource as reliable, this
diff --git a/Documentation/arch/x86/mds.rst b/Documentation/arch/x86/mds.rst
index 5a2e6c0ef04a..3518671e1a85 100644
--- a/Documentation/arch/x86/mds.rst
+++ b/Documentation/arch/x86/mds.rst
@@ -93,7 +93,7 @@ enters a C-state.
 
 The kernel provides a function to invoke the buffer clearing:
 
-    mds_clear_cpu_buffers()
+    x86_clear_cpu_buffers()
 
 Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path.
 Other than CFLAGS.ZF, this macro doesn't clobber any registers.
@@ -185,9 +185,9 @@ Mitigation points
    idle clearing would be a window dressing exercise and is therefore not
    activated.
 
-   The invocation is controlled by the static key mds_idle_clear which is
-   switched depending on the chosen mitigation mode and the SMT state of
-   the system.
+   The invocation is controlled by the static key cpu_buf_idle_clear which is
+   switched depending on the chosen mitigation mode and the SMT state of the
+   system.
 
    The buffer clear is only invoked before entering the C-State to prevent
    that stale data from the idling CPU from spilling to the Hyper-Thread
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml
index 2985c8c717d7..5403242545ab 100644
--- a/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml
@@ -52,6 +52,9 @@ properties:
   '#clock-cells':
     const: 1
 
+  '#reset-cells':
+    const: 1
+
 required:
   - compatible
   - reg
diff --git a/Documentation/devicetree/bindings/i2c/realtek,rtl9301-i2c.yaml b/Documentation/devicetree/bindings/i2c/realtek,rtl9301-i2c.yaml
index eddfd329c67b..69ac5db8b914 100644
--- a/Documentation/devicetree/bindings/i2c/realtek,rtl9301-i2c.yaml
+++ b/Documentation/devicetree/bindings/i2c/realtek,rtl9301-i2c.yaml
@@ -26,7 +26,8 @@ properties:
       - const: realtek,rtl9301-i2c
 
   reg:
-    description: Register offset and size this I2C controller.
+    items:
+      - description: Register offset and size this I2C controller.
 
   "#address-cells":
     const: 1
diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml
index 29f12d650442..1a5209139e13 100644
--- a/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml
@@ -223,12 +223,6 @@ allOf:
       - required:
           - pwms
 
-  - oneOf:
-      - required:
-          - interrupts
-      - required:
-          - io-backends
-
   - if:
       properties:
         compatible:
diff --git a/Documentation/devicetree/bindings/iio/gyroscope/invensense,mpu3050.yaml b/Documentation/devicetree/bindings/iio/gyroscope/invensense,mpu3050.yaml
index d1a6103fc37a..f3242dc0e7e6 100644
--- a/Documentation/devicetree/bindings/iio/gyroscope/invensense,mpu3050.yaml
+++ b/Documentation/devicetree/bindings/iio/gyroscope/invensense,mpu3050.yaml
@@ -21,7 +21,7 @@ properties:
   vlogic-supply: true
 
   interrupts:
-    minItems: 1
+    maxItems: 1
     description:
       Interrupt mapping for the trigger interrupt from the internal oscillator.
 
diff --git a/Documentation/devicetree/bindings/input/elan,ekth6915.yaml b/Documentation/devicetree/bindings/input/elan,ekth6915.yaml
index cb3e1801b0d3..0840e4ab28b7 100644
--- a/Documentation/devicetree/bindings/input/elan,ekth6915.yaml
+++ b/Documentation/devicetree/bindings/input/elan,ekth6915.yaml
@@ -4,14 +4,14 @@
 $id: http://devicetree.org/schemas/input/elan,ekth6915.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Elan eKTH6915 touchscreen controller
+title: Elan I2C-HID touchscreen controllers
 
 maintainers:
   - Douglas Anderson <dianders@chromium.org>
 
 description:
-  Supports the Elan eKTH6915 touchscreen controller.
-  This touchscreen controller uses the i2c-hid protocol with a reset GPIO.
+  Supports the Elan eKTH6915 and other I2C-HID touchscreen controllers.
+  These touchscreen controller use the i2c-hid protocol with a reset GPIO.
 
 allOf:
   - $ref: /schemas/input/touchscreen/touchscreen.yaml#
@@ -23,12 +23,14 @@ properties:
           - enum:
               - elan,ekth5015m
           - const: elan,ekth6915
+      - items:
+          - const: elan,ekth8d18
+          - const: elan,ekth6a12nay
       - enum:
           - elan,ekth6915
           - elan,ekth6a12nay
 
-  reg:
-    const: 0x10
+  reg: true
 
   interrupts:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-csi.yaml b/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-csi.yaml
index b3d6db922693..1aa5775ba2bc 100644
--- a/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-csi.yaml
+++ b/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-csi.yaml
@@ -110,7 +110,7 @@ examples:
         reg = <0x01cb4000 0x1000>;
         interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
         clocks = <&ccu CLK_BUS_CSI>,
-                 <&ccu CLK_CSI1_SCLK>,
+                 <&ccu CLK_CSI_SCLK>,
                  <&ccu CLK_DRAM_CSI>;
         clock-names = "bus",
                       "mod",
diff --git a/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml b/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml
index a61a76bb611c..3ea4a4290f23 100644
--- a/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml
+++ b/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml
@@ -79,7 +79,7 @@ examples:
         reg = <0x01cb8000 0x1000>;
         interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
         clocks = <&ccu CLK_BUS_CSI>,
-             <&ccu CLK_CSI1_SCLK>,
+             <&ccu CLK_CSI_SCLK>,
              <&ccu CLK_DRAM_CSI>;
         clock-names = "bus", "mod", "ram";
         resets = <&ccu RST_BUS_CSI>;
diff --git a/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-mipi-csi2.yaml b/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-mipi-csi2.yaml
index 54e15ab8a7f5..627b28e94354 100644
--- a/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-mipi-csi2.yaml
+++ b/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-mipi-csi2.yaml
@@ -103,7 +103,7 @@ examples:
         reg = <0x01cb1000 0x1000>;
         interrupts = <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>;
         clocks = <&ccu CLK_BUS_CSI>,
-                 <&ccu CLK_CSI1_SCLK>;
+                 <&ccu CLK_CSI_SCLK>;
         clock-names = "bus", "mod";
         resets = <&ccu RST_BUS_CSI>;
 
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
index 7b6a2fde8175..19934d5c24e5 100644
--- a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
+++ b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
@@ -23,7 +23,7 @@ properties:
               - allwinner,sun20i-d1-emac
               - allwinner,sun50i-h6-emac
               - allwinner,sun50i-h616-emac0
-              - allwinner,sun55i-a523-emac0
+              - allwinner,sun55i-a523-gmac0
           - const: allwinner,sun50i-a64-emac
 
   reg:
diff --git a/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml b/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml
index 4dd2dc9c678b..8afbd9ebd73f 100644
--- a/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml
@@ -80,6 +80,8 @@ examples:
       interrupt-parent = <&intc>;
       interrupts = <296 IRQ_TYPE_LEVEL_HIGH>;
       interrupt-names = "macirq";
+      phy-handle = <&phy0>;
+      phy-mode = "rgmii-id";
       resets = <&rst 30>;
       reset-names = "stmmaceth";
       snps,multicast-filter-bins = <0>;
@@ -91,7 +93,6 @@ examples:
       snps,mtl-rx-config = <&gmac0_mtl_rx_setup>;
       snps,mtl-tx-config = <&gmac0_mtl_tx_setup>;
       snps,axi-config = <&gmac0_stmmac_axi_setup>;
-      status = "disabled";
 
       gmac0_mtl_rx_setup: rx-queues-config {
         snps,rx-queues-to-use = <8>;
diff --git a/Documentation/filesystems/iomap/design.rst b/Documentation/filesystems/iomap/design.rst
index f2df9b6df988..0f7672676c0b 100644
--- a/Documentation/filesystems/iomap/design.rst
+++ b/Documentation/filesystems/iomap/design.rst
@@ -167,7 +167,6 @@ structure below:
      struct dax_device   *dax_dev;
      void                *inline_data;
      void                *private;
-     const struct iomap_folio_ops *folio_ops;
      u64                 validity_cookie;
  };
 
@@ -292,8 +291,6 @@ The fields are as follows:
    <https://lore.kernel.org/all/20180619164137.13720-7-hch@lst.de/>`_.
    This value will be passed unchanged to ``->iomap_end``.
 
- * ``folio_ops`` will be covered in the section on pagecache operations.
-
  * ``validity_cookie`` is a magic freshness value set by the filesystem
    that should be used to detect stale mappings.
    For pagecache operations this is critical for correct operation
diff --git a/Documentation/filesystems/iomap/operations.rst b/Documentation/filesystems/iomap/operations.rst
index 3b628e370d88..067ed8e14ef3 100644
--- a/Documentation/filesystems/iomap/operations.rst
+++ b/Documentation/filesystems/iomap/operations.rst
@@ -57,21 +57,19 @@ The following address space operations can be wrapped easily:
  * ``bmap``
  * ``swap_activate``
 
-``struct iomap_folio_ops``
+``struct iomap_write_ops``
 --------------------------
 
-The ``->iomap_begin`` function for pagecache operations may set the
-``struct iomap::folio_ops`` field to an ops structure to override
-default behaviors of iomap:
-
 .. code-block:: c
 
- struct iomap_folio_ops {
+ struct iomap_write_ops {
      struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos,
                                 unsigned len);
      void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied,
                        struct folio *folio);
      bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap);
+     int (*read_folio_range)(const struct iomap_iter *iter,
+     			struct folio *folio, loff_t pos, size_t len);
  };
 
 iomap calls these functions:
@@ -127,6 +125,10 @@ iomap calls these functions:
     ``->iomap_valid``, then the iomap should considered stale and the
     validation failed.
 
+  - ``read_folio_range``: Called to synchronously read in the range that will
+    be written to. If this function is not provided, iomap will default to
+    submitting a bio read request.
+
 These ``struct kiocb`` flags are significant for buffered I/O with iomap:
 
  * ``IOCB_NOWAIT``: Turns on ``IOMAP_NOWAIT``.
@@ -271,7 +273,7 @@ writeback.
 It does not lock ``i_rwsem`` or ``invalidate_lock``.
 
 The dirty bit will be cleared for all folios run through the
-``->map_blocks`` machinery described below even if the writeback fails.
+``->writeback_range`` machinery described below even if the writeback fails.
 This is to prevent dirty folio clots when storage devices fail; an
 ``-EIO`` is recorded for userspace to collect via ``fsync``.
 
@@ -283,15 +285,14 @@ The ``ops`` structure must be specified and is as follows:
 .. code-block:: c
 
  struct iomap_writeback_ops {
-     int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode,
-                       loff_t offset, unsigned len);
-     int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status);
-     void (*discard_folio)(struct folio *folio, loff_t pos);
+    int (*writeback_range)(struct iomap_writepage_ctx *wpc,
+        struct folio *folio, u64 pos, unsigned int len, u64 end_pos);
+    int (*writeback_submit)(struct iomap_writepage_ctx *wpc, int error);
  };
 
 The fields are as follows:
 
-  - ``map_blocks``: Sets ``wpc->iomap`` to the space mapping of the file
+  - ``writeback_range``: Sets ``wpc->iomap`` to the space mapping of the file
     range (in bytes) given by ``offset`` and ``len``.
     iomap calls this function for each dirty fs block in each dirty folio,
     though it will `reuse mappings
@@ -306,27 +307,26 @@ The fields are as follows:
     This revalidation must be open-coded by the filesystem; it is
     unclear if ``iomap::validity_cookie`` can be reused for this
     purpose.
-    This function must be supplied by the filesystem.
-
-  - ``submit_ioend``: Allows the file systems to hook into writeback bio
-    submission.
-    This might include pre-write space accounting updates, or installing
-    a custom ``->bi_end_io`` function for internal purposes, such as
-    deferring the ioend completion to a workqueue to run metadata update
-    transactions from process context before submitting the bio.
-    This function is optional.
 
-  - ``discard_folio``: iomap calls this function after ``->map_blocks``
-    fails to schedule I/O for any part of a dirty folio.
-    The function should throw away any reservations that may have been
-    made for the write.
+    If this methods fails to schedule I/O for any part of a dirty folio, it
+    should throw away any reservations that may have been made for the write.
     The folio will be marked clean and an ``-EIO`` recorded in the
     pagecache.
     Filesystems can use this callback to `remove
     <https://lore.kernel.org/all/20201029163313.1766967-1-bfoster@redhat.com/>`_
     delalloc reservations to avoid having delalloc reservations for
     clean pagecache.
-    This function is optional.
+    This function must be supplied by the filesystem.
+
+  - ``writeback_submit``: Submit the previous built writeback context.
+    Block based file systems should use the iomap_ioend_writeback_submit
+    helper, other file system can implement their own.
+    File systems can optionall to hook into writeback bio submission.
+    This might include pre-write space accounting updates, or installing
+    a custom ``->bi_end_io`` function for internal purposes, such as
+    deferring the ioend completion to a workqueue to run metadata update
+    transactions from process context before submitting the bio.
+    This function must be supplied by the filesystem.
 
 Pagecache Writeback Completion
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -340,10 +340,9 @@ If the write failed, it will also set the error bits on the folios and
 the address space.
 This can happen in interrupt or process context, depending on the
 storage device.
-
 Filesystems that need to update internal bookkeeping (e.g. unwritten
-extent conversions) should provide a ``->submit_ioend`` function to
-set ``struct iomap_end::bio::bi_end_io`` to its own function.
+extent conversions) should set their own bi_end_io on the bios
+submitted by ``->submit_writeback``
 This function should call ``iomap_finish_ioends`` after finishing its
 own work (e.g. unwritten extent conversion).
 
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 2e567e341c3b..aa287ccdac2f 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -87,8 +87,8 @@ prototypes::
 	int (*tmpfile) (struct mnt_idmap *, struct inode *,
 			struct file *, umode_t);
 	int (*fileattr_set)(struct mnt_idmap *idmap,
-			    struct dentry *dentry, struct fileattr *fa);
-	int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
+			    struct dentry *dentry, struct file_kattr *fa);
+	int (*fileattr_get)(struct dentry *dentry, struct file_kattr *fa);
 	struct posix_acl * (*get_acl)(struct mnt_idmap *, struct dentry *, int);
 	struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
 
@@ -253,10 +253,10 @@ prototypes::
 	int (*writepages)(struct address_space *, struct writeback_control *);
 	bool (*dirty_folio)(struct address_space *, struct folio *folio);
 	void (*readahead)(struct readahead_control *);
-	int (*write_begin)(struct file *, struct address_space *mapping,
+	int (*write_begin)(const struct kiocb *, struct address_space *mapping,
 				loff_t pos, unsigned len,
 				struct folio **foliop, void **fsdata);
-	int (*write_end)(struct file *, struct address_space *mapping,
+	int (*write_end)(const struct kiocb *, struct address_space *mapping,
 				loff_t pos, unsigned len, unsigned copied,
 				struct folio *folio, void *fsdata);
 	sector_t (*bmap)(struct address_space *, sector_t);
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index a5734bdd1cc7..85f590254f07 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -1224,9 +1224,6 @@ lookup_noperm_unlocked(), lookup_noperm_positive_unlocked().  They now
 take a qstr instead of separate name and length.  QSTR() can be used
 when strlen() is needed for the length.
 
-For try_lookup_noperm() a reference to the qstr is passed in case the
-hash might subsequently be needed.
-
 These function no longer do any permission checking - they previously
 checked that the caller has 'X' permission on the parent.  They must
 ONLY be used internally by a filesystem on itself when it knows that
@@ -1258,3 +1255,33 @@ iterator needed.  Instead of a cloned mount tree, the new interface returns
 an array of struct path, one for each mount collect_mounts() would've
 created.  These struct path point to locations in the caller's namespace
 that would be roots of the cloned mounts.
+
+---
+
+**mandatory**
+
+If your filesystem sets the default dentry_operations, use set_default_d_op()
+rather than manually setting sb->s_d_op.
+
+---
+
+**mandatory**
+
+d_set_d_op() is no longer exported (or public, for that matter); _if_
+your filesystem really needed that, make use of d_splice_alias_ops()
+to have them set.  Better yet, think hard whether you need different
+->d_op for different dentries - if not, just use set_default_d_op()
+at mount time and be done with that.  Currently procfs is the only
+thing that really needs ->d_op varying between dentries.
+
+---
+
+**highly recommended**
+
+The file operations mmap() callback is deprecated in favour of
+mmap_prepare(). This passes a pointer to a vm_area_desc to the callback
+rather than a VMA, as the VMA at this stage is not yet valid.
+
+The vm_area_desc provides the minimum required information for a filesystem
+to initialise state upon memory mapping of a file-backed region, and output
+parameters for the file system to set this state.
diff --git a/Documentation/filesystems/propagate_umount.txt b/Documentation/filesystems/propagate_umount.txt
new file mode 100644
index 000000000000..c90349e5b889
--- /dev/null
+++ b/Documentation/filesystems/propagate_umount.txt
@@ -0,0 +1,484 @@
+	Notes on propagate_umount()
+
+Umount propagation starts with a set of mounts we are already going to
+take out.  Ideally, we would like to add all downstream cognates to
+that set - anything with the same mountpoint as one of the removed
+mounts and with parent that would receive events from the parent of that
+mount.  However, there are some constraints the resulting set must
+satisfy.
+
+It is convenient to define several properties of sets of mounts:
+
+1) A set S of mounts is non-shifting if for any mount X belonging
+to S all subtrees mounted strictly inside of X (i.e. not overmounting
+the root of X) contain only elements of S.
+
+2) A set S is non-revealing if all locked mounts that belong to S have
+parents that also belong to S.
+
+3) A set S is closed if it contains all children of its elements.
+
+The set of mounts taken out by umount(2) must be non-shifting and
+non-revealing; the first constraint is what allows to reparent
+any remaining mounts and the second is what prevents the exposure
+of any concealed mountpoints.
+
+propagate_umount() takes the original set as an argument and tries to
+extend that set.  The original set is a full subtree and its root is
+unlocked; what matters is that it's closed and non-revealing.
+Resulting set may not be closed; there might still be mounts outside
+of that set, but only on top of stacks of root-overmounting elements
+of set.  They can be reparented to the place where the bottom of
+stack is attached to a mount that will survive.  NOTE: doing that
+will violate a constraint on having no more than one mount with
+the same parent/mountpoint pair; however, the caller (umount_tree())
+will immediately remedy that - it may keep unmounted element attached
+to parent, but only if the parent itself is unmounted.  Since all
+conflicts created by reparenting have common parent *not* in the
+set and one side of the conflict (bottom of the stack of overmounts)
+is in the set, it will be resolved.  However, we rely upon umount_tree()
+doing that pretty much immediately after the call of propagate_umount().
+
+Algorithm is based on two statements:
+	1) for any set S, there is a maximal non-shifting subset of S
+and it can be calculated in O(#S) time.
+	2) for any non-shifting set S, there is a maximal non-revealing
+subset of S.  That subset is also non-shifting and it can be calculated
+in O(#S) time.
+
+		Finding candidates.
+
+We are given a closed set U and we want to find all mounts that have
+the same mountpoint as some mount m in U *and* whose parent receives
+propagation from the parent of the same mount m.  Naive implementation
+would be
+	S = {}
+	for each m in U
+		add m to S
+		p = parent(m)
+		for each q in Propagation(p) - {p}
+			child = look_up(q, mountpoint(m))
+			if child
+				add child to S
+but that can lead to excessive work - there might be propagation among the
+subtrees of U, in which case we'd end up examining the same candidates
+many times.  Since propagation is transitive, the same will happen to
+everything downstream of that candidate and it's not hard to construct
+cases where the approach above leads to the time quadratic by the actual
+number of candidates.
+
+Note that if we run into a candidate we'd already seen, it must've been
+added on an earlier iteration of the outer loop - all additions made
+during one iteration of the outer loop have different parents.  So
+if we find a child already added to the set, we know that everything
+in Propagation(parent(child)) with the same mountpoint has been already
+added.
+	S = {}
+	for each m in U
+		if m in S
+			continue
+		add m to S
+		p = parent(m)
+		q = propagation_next(p, p)
+		while q
+			child = look_up(q, mountpoint(m))
+			if child
+				if child in S
+					q = skip_them(q, p)
+					continue;
+				add child to S
+			q = propagation_next(q, p)
+where
+skip_them(q, p)
+	keep walking Propagation(p) from q until we find something
+	not in Propagation(q)
+
+would get rid of that problem, but we need a sane implementation of
+skip_them().  That's not hard to do - split propagation_next() into
+"down into mnt_slave_list" and "forward-and-up" parts, with the
+skip_them() being "repeat the forward-and-up part until we get NULL
+or something that isn't a peer of the one we are skipping".
+
+Note that there can be no absolute roots among the extra candidates -
+they all come from mount lookups.  Absolute root among the original
+set is _currently_ impossible, but it might be worth protecting
+against.
+
+		Maximal non-shifting subsets.
+
+Let's call a mount m in a set S forbidden in that set if there is a
+subtree mounted strictly inside m and containing mounts that do not
+belong to S.
+
+The set is non-shifting when none of its elements are forbidden in it.
+
+If mount m is forbidden in a set S, it is forbidden in any subset S' it
+belongs to.  In other words, it can't belong to any of the non-shifting
+subsets of S.  If we had a way to find a forbidden mount or show that
+there's none, we could use it to find the maximal non-shifting subset
+simply by finding and removing them until none remain.
+
+Suppose mount m is forbidden in S; then any mounts forbidden in S - {m}
+must have been forbidden in S itself.  Indeed, since m has descendents
+that do not belong to S, any subtree that fits into S will fit into
+S - {m} as well.
+
+So in principle we could go through elements of S, checking if they
+are forbidden in S and removing the ones that are.  Removals will
+not invalidate the checks done for earlier mounts - if they were not
+forbidden at the time we checked, they won't become forbidden later.
+It's too costly to be practical, but there is a similar approach that
+is linear by size of S.
+
+Let's say that mount x in a set S is forbidden by mount y, if
+	* both x and y belong to S.
+	* there is a chain of mounts starting at x and leaving S
+	  immediately after passing through y, with the first
+	  mountpoint strictly inside x.
+Note 1: x may be equal to y - that's the case when something not
+belonging to S is mounted strictly inside x.
+Note 2: if y does not belong to S, it can't forbid anything in S.
+Note 3: if y has no children outside of S, it can't forbid anything in S.
+
+It's easy to show that mount x is forbidden in S if and only if x is
+forbidden in S by some mount y.  And it's easy to find all mounts in S
+forbidden by a given mount.
+
+Consider the following operation:
+	Trim(S, m) = S - {x : x is forbidden by m in S}
+
+Note that if m does not belong to S or has no children outside of S we
+are guaranteed that Trim(S, m) is equal to S.
+
+The following is true: if x is forbidden by y in Trim(S, m), it was
+already forbidden by y in S.
+
+Proof: Suppose x is forbidden by y in Trim(S, m).  Then there is a
+chain of mounts (x_0 = x, ..., x_k = y, x_{k+1} = r), such that x_{k+1}
+is the first element that doesn't belong to Trim(S, m) and the
+mountpoint of x_1 is strictly inside x.  If mount r belongs to S, it must
+have been removed by Trim(S, m), i.e. it was forbidden in S by m.
+Then there was a mount chain from r to some child of m that stayed in
+S all the way until m, but that's impossible since x belongs to Trim(S, m)
+and prepending (x_0, ..., x_k) to that chain demonstrates that x is also
+forbidden in S by m, and thus can't belong to Trim(S, m).
+Therefore r can not belong to S and our chain demonstrates that
+x is forbidden by y in S.  QED.
+
+Corollary: no mount is forbidden by m in Trim(S, m).  Indeed, any
+such mount would have been forbidden by m in S and thus would have been
+in the part of S removed in Trim(S, m).
+
+Corollary: no mount is forbidden by m in Trim(Trim(S, m), n).  Indeed,
+any such would have to have been forbidden by m in Trim(S, m), which
+is impossible.
+
+Corollary: after
+	S = Trim(S, x_1)
+	S = Trim(S, x_2)
+	...
+	S = Trim(S, x_k)
+no mount remaining in S will be forbidden by either of x_1,...,x_k.
+
+The following will reduce S to its maximal non-shifting subset:
+	visited = {}
+	while S contains elements not belonging to visited
+		let m be an arbitrary such element of S
+		S = Trim(S, m)
+		add m to visited
+
+S never grows, so the number of elements of S not belonging to visited
+decreases at least by one on each iteration.  When the loop terminates,
+all mounts remaining in S belong to visited.  It's easy to see that at
+the beginning of each iteration no mount remaining in S will be forbidden
+by any element of visited.  In other words, no mount remaining in S will
+be forbidden, i.e. final value of S will be non-shifting.  It will be
+the maximal non-shifting subset, since we were removing only forbidden
+elements.
+
+	There are two difficulties in implementing the above in linear
+time, both due to the fact that Trim() might need to remove more than one
+element.  Naive implementation of Trim() is vulnerable to running into a
+long chain of mounts, each mounted on top of parent's root.  Nothing in
+that chain is forbidden, so nothing gets removed from it.  We need to
+recognize such chains and avoid walking them again on subsequent calls of
+Trim(), otherwise we will end up with worst-case time being quadratic by
+the number of elements in S.  Another difficulty is in implementing the
+outer loop - we need to iterate through all elements of a shrinking set.
+That would be trivial if we never removed more than one element at a time
+(linked list, with list_for_each_entry_safe for iterator), but we may
+need to remove more than one entry, possibly including the ones we have
+already visited.
+
+	Let's start with naive algorithm for Trim():
+
+Trim_one(m)
+	found = false
+	for each n in children(m)
+		if n not in S
+			found = true
+			if (mountpoint(n) != root(m))
+				remove m from S
+				break
+	if found
+		Trim_ancestors(m)
+
+Trim_ancestors(m)
+	for (; parent(m) in S; m = parent(m)) {
+		if (mountpoint(m) != root(parent(m)))
+			remove parent(m) from S
+	}
+
+If m belongs to S, Trim_one(m) will replace S with Trim(S, m).
+Proof:
+	Consider the chains excluding elements from Trim(S, m).  The last
+two elements in such chain are m and some child of m that does not belong
+to S.  If m has no such children, Trim(S, m) is equal to S.
+	m itself is removed if and only if the chain has exactly two
+elements, i.e. when the last element does not overmount the root of m.
+In other words, that happens when m has a child not in S that does not
+overmount the root of m.
+	All other elements to remove will be ancestors of m, such that
+the entire descent chain from them to m is contained in S.  Let
+(x_0, x_1, ..., x_k = m) be the longest such chain.  x_i needs to be
+removed if and only if x_{i+1} does not overmount its root.  It's easy
+to see that Trim_ancestors(m) will iterate through that chain from
+x_k to x_1 and that it will remove exactly the elements that need to be
+removed.
+
+	Note that if the loop in Trim_ancestors() walks into an already
+visited element, we are guaranteed that remaining iterations will see
+only elements that had already been visited and remove none of them.
+That's the weakness that makes it vulnerable to long chains of full
+overmounts.
+
+	It's easy to deal with, if we can afford setting marks on
+elements of S; we would mark all elements already visited by
+Trim_ancestors() and have it bail out as soon as it sees an already
+marked element.
+
+	The problems with iterating through the set can be dealt with in
+several ways, depending upon the representation we choose for our set.
+One useful observation is that we are given a closed subset in S - the
+original set passed to propagate_umount().  Its elements can neither
+forbid anything nor be forbidden by anything - all their descendents
+belong to S, so they can not occur anywhere in any excluding chain.
+In other words, the elements of that subset will remain in S until
+the end and Trim_one(S, m) is a no-op for all m from that subset.
+
+	That suggests keeping S as a disjoint union of a closed set U
+('will be unmounted, no matter what') and the set of all elements of
+S that do not belong to U.  That set ('candidates') is all we need
+to iterate through.  Let's represent it as a subset in a cyclic list,
+consisting of all list elements that are marked as candidates (initially -
+all of them).  Then we could have Trim_ancestors() only remove the mark,
+leaving the elements on the list.  Then Trim_one() would never remove
+anything other than its argument from the containing list, allowing to
+use list_for_each_entry_safe() as iterator.
+
+	Assuming that representation we get the following:
+
+	list_for_each_entry_safe(m, ..., Candidates, ...)
+		Trim_one(m)
+where
+Trim_one(m)
+	if (m is not marked as a candidate)
+		strip the "seen by Trim_ancestors" mark from m
+		remove m from the Candidates list
+		return
+		
+	remove_this = false
+	found = false
+	for each n in children(m)
+		if n not in S
+			found = true
+			if (mountpoint(n) != root(m))
+				remove_this = true
+				break
+	if found
+		Trim_ancestors(m)
+	if remove_this
+		strip the "seen by Trim_ancestors" mark from m
+		strip the "candidate" mark from m
+		remove m from the Candidate list
+
+Trim_ancestors(m)
+	for (p = parent(m); p is marked as candidate ; m = p, p = parent(p)) {
+		if m is marked as seen by Trim_ancestors
+			return
+		mark m as seen by Trim_ancestors
+		if (mountpoint(m) != root(p))
+			strip the "candidate" mark from p
+	}
+
+	Terminating condition in the loop in Trim_ancestors() is correct,
+since that that loop will never run into p belonging to U - p is always
+an ancestor of argument of Trim_one() and since U is closed, the argument
+of Trim_one() would also have to belong to U.  But Trim_one() is never
+called for elements of U.  In other words, p belongs to S if and only
+if it belongs to candidates.
+
+	Time complexity:
+* we get no more than O(#S) calls of Trim_one()
+* the loop over children in Trim_one() never looks at the same child
+twice through all the calls.
+* iterations of that loop for children in S are no more than O(#S)
+in the worst case
+* at most two children that are not elements of S are considered per
+call of Trim_one().
+* the loop in Trim_ancestors() sets its mark once per iteration and
+no element of S has is set more than once.
+
+	In the end we may have some elements excluded from S by
+Trim_ancestors() still stuck on the list.  We could do a separate
+loop removing them from the list (also no worse than O(#S) time),
+but it's easier to leave that until the next phase - there we will
+iterate through the candidates anyway.
+
+	The caller has already removed all elements of U from their parents'
+lists of children, which means that checking if child belongs to S is
+equivalent to checking if it's marked as a candidate; we'll never see
+the elements of U in the loop over children in Trim_one().
+
+	What's more, if we see that children(m) is empty and m is not
+locked, we can immediately move m into the committed subset (remove
+from the parent's list of children, etc.).  That's one fewer mount we'll
+have to look into when we check the list of children of its parent *and*
+when we get to building the non-revealing subset.
+
+		Maximal non-revealing subsets
+
+If S is not a non-revealing subset, there is a locked element x in S
+such that parent of x is not in S.
+
+Obviously, no non-revealing subset of S may contain x.  Removing such
+elements one by one will obviously end with the maximal non-revealing
+subset (possibly empty one).  Note that removal of an element will
+require removal of all its locked children, etc.
+
+If the set had been non-shifting, it will remain non-shifting after
+such removals.
+Proof: suppose S was non-shifting, x is a locked element of S, parent of x
+is not in S and S - {x} is not non-shifting.  Then there is an element m
+in S - {x} and a subtree mounted strictly inside m, such that m contains
+an element not in in S - {x}.  Since S is non-shifting, everything in
+that subtree must belong to S.  But that means that this subtree must
+contain x somewhere *and* that parent of x either belongs that subtree
+or is equal to m.  Either way it must belong to S.  Contradiction.
+
+// same representation as for finding maximal non-shifting subsets:
+// S is a disjoint union of a non-revealing set U (the ones we are committed
+// to unmount) and a set of candidates, represented as a subset of list
+// elements that have "is a candidate" mark on them.
+// Elements of U are removed from their parents' lists of children.
+// In the end candidates becomes empty and maximal non-revealing non-shifting
+// subset of S is now in U
+	while (Candidates list is non-empty)
+		handle_locked(first(Candidates))
+
+handle_locked(m)
+	if m is not marked as a candidate
+		strip the "seen by Trim_ancestors" mark from m
+		remove m from the list
+		return
+	cutoff = m
+	for (p = m; p in candidates; p = parent(p)) {
+		strip the "seen by Trim_ancestors" mark from p
+		strip the "candidate" mark from p
+		remove p from the Candidates list
+		if (!locked(p))
+			cutoff = parent(p)
+	}
+	if p in U
+		cutoff = p
+	while m != cutoff
+		remove m from children(parent(m))
+		add m to U
+		m = parent(m)
+
+Let (x_0, ..., x_n = m) be the maximal chain of descent of m within S.
+* If it contains some elements of U, let x_k be the last one of those.
+Then union of U with {x_{k+1}, ..., x_n} is obviously non-revealing.
+* otherwise if all its elements are locked, then none of {x_0, ..., x_n}
+may be elements of a non-revealing subset of S.
+* otherwise let x_k be the first unlocked element of the chain.  Then none
+of {x_0, ..., x_{k-1}} may be an element of a non-revealing subset of
+S and union of U and {x_k, ..., x_n} is non-revealing.
+
+handle_locked(m) finds which of these cases applies and adjusts Candidates
+and U accordingly.  U remains non-revealing, union of Candidates and
+U still contains any non-revealing subset of S and after the call of
+handle_locked(m) m is guaranteed to be not in Candidates list.  So having
+it called for each element of S would suffice to empty Candidates,
+leaving U the maximal non-revealing subset of S.
+
+However, handle_locked(m) is a no-op when m belongs to U, so it's enough
+to have it called for elements of Candidates list until none remain.
+
+Time complexity: number of calls of handle_locked() is limited by
+#Candidates, each iteration of the first loop in handle_locked() removes
+an element from the list, so their total number of executions is also
+limited by #Candidates; number of iterations in the second loop is no
+greater than the number of iterations of the first loop.
+
+
+		Reparenting
+
+After we'd calculated the final set, we still need to deal with
+reparenting - if an element of the final set has a child not in it,
+we need to reparent such child.
+
+Such children can only be root-overmounting (otherwise the set wouldn't
+be non-shifting) and their parents can not belong to the original set,
+since the original is guaranteed to be closed.
+
+
+		Putting all of that together
+
+The plan is to
+	* find all candidates
+	* trim down to maximal non-shifting subset
+	* trim down to maximal non-revealing subset
+	* reparent anything that needs to be reparented
+	* return the resulting set to the caller
+
+For the 2nd and 3rd steps we want to separate the set into growing
+non-revealing subset, initially containing the original set ("U" in
+terms of the pseudocode above) and everything we are still not sure about
+("candidates").  It means that for the output of the 1st step we'd like
+the extra candidates separated from the stuff already in the original set.
+For the 4th step we would like the additions to U separate from the
+original set.
+
+So let's go for
+	* original set ("set").  Linkage via mnt_list
+	* undecided candidates ("candidates").  Subset of a list,
+consisting of all its elements marked with a new flag (T_UMOUNT_CANDIDATE).
+Initially all elements of the list will be marked that way; in the
+end the list will become empty and no mounts will remain marked with
+that flag.
+	* Reuse T_MARKED for "has been already seen by trim_ancestors()".
+	* anything in U that hadn't been in the original set - elements of
+candidates will gradually be either discarded or moved there.  In other
+words, it's the candidates we have already decided to unmount.	Its role
+is reasonably close to the old "to_umount", so let's use that name.
+Linkage via mnt_list.
+
+For gather_candidates() we'll need to maintain both candidates (S -
+set) and intersection of S with set.  Use T_UMOUNT_CANDIDATE for
+all elements we encounter, putting the ones not already in the original
+set into the list of candidates.  When we are done, strip that flag from
+all elements of the original set.  That gives a cheap way to check
+if element belongs to S (in gather_candidates) and to candidates
+itself (at later stages).  Call that predicate is_candidate(); it would
+be m->mnt_t_flags & T_UMOUNT_CANDIDATE.
+
+All elements of the original set are marked with MNT_UMOUNT and we'll
+need the same for elements added when joining the contents of to_umount
+to set in the end.  Let's set MNT_UMOUNT at the time we add an element
+to to_umount; that's close to what the old 'umount_one' is doing, so
+let's keep that name.  It also gives us another predicate we need -
+"belongs to union of set and to_umount"; will_be_unmounted() for now.
+
+Removals from the candidates list should strip both T_MARKED and
+T_UMOUNT_CANDIDATE; call it remove_from_candidates_list().
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index fd32a9a17bfb..486a91633474 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -515,8 +515,8 @@ As of kernel 2.6.22, the following members are defined:
 		struct posix_acl * (*get_acl)(struct mnt_idmap *, struct dentry *, int);
 	        int (*set_acl)(struct mnt_idmap *, struct dentry *, struct posix_acl *, int);
 		int (*fileattr_set)(struct mnt_idmap *idmap,
-				    struct dentry *dentry, struct fileattr *fa);
-		int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
+				    struct dentry *dentry, struct file_kattr *fa);
+		int (*fileattr_get)(struct dentry *dentry, struct file_kattr *fa);
 	        struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
 	};
 
@@ -758,8 +758,9 @@ process is more complicated and uses write_begin/write_end or
 dirty_folio to write data into the address_space, and
 writepages to writeback data to storage.
 
-Adding and removing pages to/from an address_space is protected by the
-inode's i_mutex.
+Removing pages from an address_space requires holding the inode's i_rwsem
+exclusively, while adding pages to the address_space requires holding the
+inode's i_mapping->invalidate_lock exclusively.
 
 When data is written to a page, the PG_Dirty flag should be set.  It
 typically remains set until writepages asks for it to be written.  This
@@ -822,10 +823,10 @@ cache in your filesystem.  The following members are defined:
 		int (*writepages)(struct address_space *, struct writeback_control *);
 		bool (*dirty_folio)(struct address_space *, struct folio *);
 		void (*readahead)(struct readahead_control *);
-		int (*write_begin)(struct file *, struct address_space *mapping,
+		int (*write_begin)(const struct kiocb *, struct address_space *mapping,
 				   loff_t pos, unsigned len,
-				struct page **pagep, void **fsdata);
-		int (*write_end)(struct file *, struct address_space *mapping,
+				   struct page **pagep, void **fsdata);
+		int (*write_end)(const struct kiocb *, struct address_space *mapping,
 				 loff_t pos, unsigned len, unsigned copied,
 				 struct folio *folio, void *fsdata);
 		sector_t (*bmap)(struct address_space *, sector_t);
@@ -1071,12 +1072,14 @@ This describes how the VFS can manipulate an open file.  As of kernel
 
 	struct file_operations {
 		struct module *owner;
+		fop_flags_t fop_flags;
 		loff_t (*llseek) (struct file *, loff_t, int);
 		ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
 		ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
 		ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
 		ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
-		int (*iopoll)(struct kiocb *kiocb, bool spin);
+		int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *,
+				unsigned int flags);
 		int (*iterate_shared) (struct file *, struct dir_context *);
 		__poll_t (*poll) (struct file *, struct poll_table_struct *);
 		long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
@@ -1093,18 +1096,24 @@ This describes how the VFS can manipulate an open file.  As of kernel
 		int (*flock) (struct file *, int, struct file_lock *);
 		ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
 		ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
-		int (*setlease)(struct file *, long, struct file_lock **, void **);
+		void (*splice_eof)(struct file *file);
+		int (*setlease)(struct file *, int, struct file_lease **, void **);
 		long (*fallocate)(struct file *file, int mode, loff_t offset,
 				  loff_t len);
 		void (*show_fdinfo)(struct seq_file *m, struct file *f);
 	#ifndef CONFIG_MMU
 		unsigned (*mmap_capabilities)(struct file *);
 	#endif
-		ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int);
+		ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
+				loff_t, size_t, unsigned int);
 		loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
 					   struct file *file_out, loff_t pos_out,
 					   loff_t len, unsigned int remap_flags);
 		int (*fadvise)(struct file *, loff_t, loff_t, int);
+		int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
+		int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *,
+					unsigned int poll_flags);
+		int (*mmap_prepare)(struct vm_area_desc *);
 	};
 
 Again, all methods are called without any locks being held, unless
@@ -1144,7 +1153,8 @@ otherwise noted.
 	 used on 64 bit kernels.
 
 ``mmap``
-	called by the mmap(2) system call
+	called by the mmap(2) system call. Deprecated in favour of
+	``mmap_prepare``.
 
 ``open``
 	called by the VFS when an inode should be opened.  When the VFS
@@ -1221,6 +1231,11 @@ otherwise noted.
 ``fadvise``
 	possibly called by the fadvise64() system call.
 
+``mmap_prepare``
+	Called by the mmap(2) system call. Allows a VFS to set up a
+	file-backed memory mapping, most notably establishing relevant
+	private state and VMA callbacks.
+
 Note that the file operations are implemented by the specific
 filesystem in which the inode resides.  When opening a device node
 (character or block special) most filesystems will call special
diff --git a/Documentation/hwmon/ina238.rst b/Documentation/hwmon/ina238.rst
index d1b93cf8627f..9a24da4786a4 100644
--- a/Documentation/hwmon/ina238.rst
+++ b/Documentation/hwmon/ina238.rst
@@ -65,7 +65,7 @@ Additional sysfs entries for sq52206
 ------------------------------------
 
 ======================= =======================================================
-energy1_input		Energy measurement (mJ)
+energy1_input		Energy measurement (uJ)
 
 power1_input_highest	Peak Power (uW)
 ======================= =======================================================
diff --git a/Documentation/netlink/specs/ovpn.yaml b/Documentation/netlink/specs/ovpn.yaml
index 096c51f0c69a..ba76426a542d 100644
--- a/Documentation/netlink/specs/ovpn.yaml
+++ b/Documentation/netlink/specs/ovpn.yaml
@@ -161,6 +161,66 @@ attribute-sets:
         type: uint
         doc: Number of packets transmitted at the transport level
   -
+    name: peer-new-input
+    subset-of: peer
+    attributes:
+      -
+        name: id
+      -
+        name: remote-ipv4
+      -
+        name: remote-ipv6
+      -
+        name: remote-ipv6-scope-id
+      -
+        name: remote-port
+      -
+        name: socket
+      -
+        name: vpn-ipv4
+      -
+        name: vpn-ipv6
+      -
+        name: local-ipv4
+      -
+        name: local-ipv6
+      -
+        name: keepalive-interval
+      -
+        name: keepalive-timeout
+  -
+    name: peer-set-input
+    subset-of: peer
+    attributes:
+      -
+        name: id
+      -
+        name: remote-ipv4
+      -
+        name: remote-ipv6
+      -
+        name: remote-ipv6-scope-id
+      -
+        name: remote-port
+      -
+        name: vpn-ipv4
+      -
+        name: vpn-ipv6
+      -
+        name: local-ipv4
+      -
+        name: local-ipv6
+      -
+        name: keepalive-interval
+      -
+        name: keepalive-timeout
+  -
+    name: peer-del-input
+    subset-of: peer
+    attributes:
+      -
+        name: id
+  -
     name: keyconf
     attributes:
       -
@@ -216,6 +276,33 @@ attribute-sets:
           obtain the actual cipher IV
         checks:
           exact-len: nonce-tail-size
+
+  -
+    name: keyconf-get
+    subset-of: keyconf
+    attributes:
+      -
+        name: peer-id
+      -
+        name: slot
+      -
+        name: key-id
+      -
+        name: cipher-alg
+  -
+    name: keyconf-swap-input
+    subset-of: keyconf
+    attributes:
+      -
+        name: peer-id
+  -
+    name: keyconf-del-input
+    subset-of: keyconf
+    attributes:
+      -
+        name: peer-id
+      -
+        name: slot
   -
     name: ovpn
     attributes:
@@ -235,12 +322,66 @@ attribute-sets:
         type: nest
         doc: Peer specific cipher configuration
         nested-attributes: keyconf
+  -
+    name: ovpn-peer-new-input
+    subset-of: ovpn
+    attributes:
+      -
+        name: ifindex
+      -
+        name: peer
+        nested-attributes: peer-new-input
+  -
+    name: ovpn-peer-set-input
+    subset-of: ovpn
+    attributes:
+      -
+        name: ifindex
+      -
+        name: peer
+        nested-attributes: peer-set-input
+  -
+    name: ovpn-peer-del-input
+    subset-of: ovpn
+    attributes:
+      -
+        name: ifindex
+      -
+        name: peer
+        nested-attributes: peer-del-input
+  -
+    name: ovpn-keyconf-get
+    subset-of: ovpn
+    attributes:
+      -
+        name: ifindex
+      -
+        name: keyconf
+        nested-attributes: keyconf-get
+  -
+    name: ovpn-keyconf-swap-input
+    subset-of: ovpn
+    attributes:
+      -
+        name: ifindex
+      -
+        name: keyconf
+        nested-attributes: keyconf-swap-input
+  -
+    name: ovpn-keyconf-del-input
+    subset-of: ovpn
+    attributes:
+      -
+        name: ifindex
+      -
+        name: keyconf
+        nested-attributes: keyconf-del-input
 
 operations:
   list:
     -
       name: peer-new
-      attribute-set: ovpn
+      attribute-set: ovpn-peer-new-input
       flags: [ admin-perm ]
       doc: Add a remote peer
       do:
@@ -252,7 +393,7 @@ operations:
             - peer
     -
       name: peer-set
-      attribute-set: ovpn
+      attribute-set: ovpn-peer-set-input
       flags: [ admin-perm ]
       doc: modify a remote peer
       do:
@@ -286,7 +427,7 @@ operations:
             - peer
     -
       name: peer-del
-      attribute-set: ovpn
+      attribute-set: ovpn-peer-del-input
       flags: [ admin-perm ]
       doc: Delete existing remote peer
       do:
@@ -316,7 +457,7 @@ operations:
             - keyconf
     -
       name: key-get
-      attribute-set: ovpn
+      attribute-set: ovpn-keyconf-get
       flags: [ admin-perm ]
       doc: Retrieve non-sensitive data about peer key and cipher
       do:
@@ -331,7 +472,7 @@ operations:
             - keyconf
     -
       name: key-swap
-      attribute-set: ovpn
+      attribute-set: ovpn-keyconf-swap-input
       flags: [ admin-perm ]
       doc: Swap primary and secondary session keys for a specific peer
       do:
@@ -350,7 +491,7 @@ operations:
       mcgrp: peers
     -
       name: key-del
-      attribute-set: ovpn
+      attribute-set: ovpn-keyconf-del-input
       flags: [ admin-perm ]
       doc: Delete cipher key for a specific peer
       do:
diff --git a/Documentation/networking/tls.rst b/Documentation/networking/tls.rst
index c7904a1bc167..36cc7afc2527 100644
--- a/Documentation/networking/tls.rst
+++ b/Documentation/networking/tls.rst
@@ -16,11 +16,13 @@ User interface
 Creating a TLS connection
 -------------------------
 
-First create a new TCP socket and set the TLS ULP.
+First create a new TCP socket and once the connection is established set the
+TLS ULP.
 
 .. code-block:: c
 
   sock = socket(AF_INET, SOCK_STREAM, 0);
+  connect(sock, addr, addrlen);
   setsockopt(sock, SOL_TCP, TCP_ULP, "tls", sizeof("tls"));
 
 Setting the TLS ULP allows us to set/get TLS socket options. Currently
diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst
index 1ac62dc3a66f..e1755610b4bc 100644
--- a/Documentation/process/maintainer-netdev.rst
+++ b/Documentation/process/maintainer-netdev.rst
@@ -312,7 +312,7 @@ Posting as one thread is discouraged because it confuses patchwork
 (as of patchwork 2.2.2).
 
 Co-posting selftests
---------------------
+~~~~~~~~~~~~~~~~~~~~
 
 Selftests should be part of the same series as the code changes.
 Specifically for fixes both code change and related test should go into
diff --git a/Documentation/security/credentials.rst b/Documentation/security/credentials.rst
index 2aa0791bcefe..d0191c8b8060 100644
--- a/Documentation/security/credentials.rst
+++ b/Documentation/security/credentials.rst
@@ -555,5 +555,5 @@ the VFS, and that can be done by calling into such as ``vfs_mkdir()`` with a
 different set of credentials.  This is done in the following places:
 
  * ``sys_faccessat()``.
- * ``do_coredump()``.
+ * ``vfs_coredump()``.
  * nfs4recover.c.
diff --git a/Documentation/translations/zh_CN/security/credentials.rst b/Documentation/translations/zh_CN/security/credentials.rst
index 91c353dfb622..88fcd9152ffe 100644
--- a/Documentation/translations/zh_CN/security/credentials.rst
+++ b/Documentation/translations/zh_CN/security/credentials.rst
@@ -475,5 +475,5 @@ const指针上操作，因此不需要进行类型转换，但需要临时放弃
 如 ``vfs_mkdir()`` 来实现。以下是一些进行此操作的位置:
 
  * ``sys_faccessat()``.
- * ``do_coredump()``.
+ * ``vfs_coredump()``.
  * nfs4recover.c.
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 9abf93ee5f65..544fb11351d9 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -2008,6 +2008,13 @@ If the KVM_CAP_VM_TSC_CONTROL capability is advertised, this can also
 be used as a vm ioctl to set the initial tsc frequency of subsequently
 created vCPUs.
 
+For TSC protected Confidential Computing (CoCo) VMs where TSC frequency
+is configured once at VM scope and remains unchanged during VM's
+lifetime, the vm ioctl should be used to configure the TSC frequency
+and the vcpu ioctl is not supported.
+
+Example of such CoCo VMs: TDX guests.
+
 4.56 KVM_GET_TSC_KHZ
 --------------------
 
@@ -7196,6 +7203,10 @@ The valid value for 'flags' is:
 					u64 leaf;
 					u64 r11, r12, r13, r14;
 				} get_tdvmcall_info;
+				struct {
+					u64 ret;
+					u64 vector;
+				} setup_event_notify;
 			};
 		} tdx;
 
@@ -7210,21 +7221,24 @@ number from register R11.  The remaining field of the union provide the
 inputs and outputs of the TDVMCALL.  Currently the following values of
 ``nr`` are defined:
 
-* ``TDVMCALL_GET_QUOTE``: the guest has requested to generate a TD-Quote
-signed by a service hosting TD-Quoting Enclave operating on the host.
-Parameters and return value are in the ``get_quote`` field of the union.
-The ``gpa`` field and ``size`` specify the guest physical address
-(without the shared bit set) and the size of a shared-memory buffer, in
-which the TDX guest passes a TD Report.  The ``ret`` field represents
-the return value of the GetQuote request.  When the request has been
-queued successfully, the TDX guest can poll the status field in the
-shared-memory area to check whether the Quote generation is completed or
-not. When completed, the generated Quote is returned via the same buffer.
-
-* ``TDVMCALL_GET_TD_VM_CALL_INFO``: the guest has requested the support
-status of TDVMCALLs.  The output values for the given leaf should be
-placed in fields from ``r11`` to ``r14`` of the ``get_tdvmcall_info``
-field of the union.
+ * ``TDVMCALL_GET_QUOTE``: the guest has requested to generate a TD-Quote
+   signed by a service hosting TD-Quoting Enclave operating on the host.
+   Parameters and return value are in the ``get_quote`` field of the union.
+   The ``gpa`` field and ``size`` specify the guest physical address
+   (without the shared bit set) and the size of a shared-memory buffer, in
+   which the TDX guest passes a TD Report.  The ``ret`` field represents
+   the return value of the GetQuote request.  When the request has been
+   queued successfully, the TDX guest can poll the status field in the
+   shared-memory area to check whether the Quote generation is completed or
+   not. When completed, the generated Quote is returned via the same buffer.
+
+ * ``TDVMCALL_GET_TD_VM_CALL_INFO``: the guest has requested the support
+   status of TDVMCALLs.  The output values for the given leaf should be
+   placed in fields from ``r11`` to ``r14`` of the ``get_tdvmcall_info``
+   field of the union.
+
+ * ``TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT``: the guest has requested to
+   set up a notification interrupt for vector ``vector``.
 
 KVM may add support for more values in the future that may cause a userspace
 exit, even without calls to ``KVM_ENABLE_CAP`` or similar.  In this case,
diff --git a/Documentation/virt/kvm/review-checklist.rst b/Documentation/virt/kvm/review-checklist.rst
index dc01aea4057b..debac54e14e7 100644
--- a/Documentation/virt/kvm/review-checklist.rst
+++ b/Documentation/virt/kvm/review-checklist.rst
@@ -7,7 +7,7 @@ Review checklist for kvm patches
 1.  The patch must follow Documentation/process/coding-style.rst and
     Documentation/process/submitting-patches.rst.
 
-2.  Patches should be against kvm.git master branch.
+2.  Patches should be against kvm.git master or next branches.
 
 3.  If the patch introduces or modifies a new userspace API:
     - the API must be documented in Documentation/virt/kvm/api.rst
@@ -18,10 +18,10 @@ Review checklist for kvm patches
 5.  New features must default to off (userspace should explicitly request them).
     Performance improvements can and should default to on.
 
-6.  New cpu features should be exposed via KVM_GET_SUPPORTED_CPUID2
+6.  New cpu features should be exposed via KVM_GET_SUPPORTED_CPUID2,
+    or its equivalent for non-x86 architectures
 
-7.  Emulator changes should be accompanied by unit tests for qemu-kvm.git
-    kvm/test directory.
+7.  The feature should be testable (see below).
 
 8.  Changes should be vendor neutral when possible.  Changes to common code
     are better than duplicating changes to vendor code.
@@ -36,6 +36,87 @@ Review checklist for kvm patches
 11. New guest visible features must either be documented in a hardware manual
     or be accompanied by documentation.
 
-12. Features must be robust against reset and kexec - for example, shared
-    host/guest memory must be unshared to prevent the host from writing to
-    guest memory that the guest has not reserved for this purpose.
+Testing of KVM code
+-------------------
+
+All features contributed to KVM, and in many cases bugfixes too, should be
+accompanied by some kind of tests and/or enablement in open source guests
+and VMMs.  KVM is covered by multiple test suites:
+
+*Selftests*
+  These are low level tests that allow granular testing of kernel APIs.
+  This includes API failure scenarios, invoking APIs after specific
+  guest instructions, and testing multiple calls to ``KVM_CREATE_VM``
+  within a single test.  They are included in the kernel tree at
+  ``tools/testing/selftests/kvm``.
+
+``kvm-unit-tests``
+  A collection of small guests that test CPU and emulated device features
+  from a guest's perspective.  They run under QEMU or ``kvmtool``, and
+  are generally not KVM-specific: they can be run with any accelerator
+  that QEMU support or even on bare metal, making it possible to compare
+  behavior across hypervisors and processor families.
+
+Functional test suites
+  Various sets of functional tests exist, such as QEMU's ``tests/functional``
+  suite and `avocado-vt <https://avocado-vt.readthedocs.io/en/latest/>`__.
+  These typically involve running a full operating system in a virtual
+  machine.
+
+The best testing approach depends on the feature's complexity and
+operation. Here are some examples and guidelines:
+
+New instructions (no new registers or APIs)
+  The corresponding CPU features (if applicable) should be made available
+  in QEMU.  If the instructions require emulation support or other code in
+  KVM, it is worth adding coverage to ``kvm-unit-tests`` or selftests;
+  the latter can be a better choice if the instructions relate to an API
+  that already has good selftest coverage.
+
+New hardware features (new registers, no new APIs)
+  These should be tested via ``kvm-unit-tests``; this more or less implies
+  supporting them in QEMU and/or ``kvmtool``.  In some cases selftests
+  can be used instead, similar to the previous case, or specifically to
+  test corner cases in guest state save/restore.
+
+Bug fixes and performance improvements
+  These usually do not introduce new APIs, but it's worth sharing
+  any benchmarks and tests that will validate your contribution,
+  ideally in the form of regression tests.  Tests and benchmarks
+  can be included in either ``kvm-unit-tests`` or selftests, depending
+  on the specifics of your change.  Selftests are especially useful for
+  regression tests because they are included directly in Linux's tree.
+
+Large scale internal changes
+  While it's difficult to provide a single policy, you should ensure that
+  the changed code is covered by either ``kvm-unit-tests`` or selftests.
+  In some cases the affected code is run for any guests and functional
+  tests suffice.  Explain your testing process in the cover letter,
+  as that can help identify gaps in existing test suites.
+
+New APIs
+  It is important to demonstrate your use case.  This can be as simple as
+  explaining that the feature is already in use on bare metal, or it can be
+  a proof-of-concept implementation in userspace.  The latter need not be
+  open source, though that is of course preferrable for easier testing.
+  Selftests should test corner cases of the APIs, and should also cover
+  basic host and guest operation if no open source VMM uses the feature.
+
+Bigger features, usually spanning host and guest
+  These should be supported by Linux guests, with limited exceptions for
+  Hyper-V features that are testable on Windows guests.  It is strongly
+  suggested that the feature be usable with an open source host VMM, such
+  as at least one of QEMU or crosvm, and guest firmware.  Selftests should
+  test at least API error cases.  Guest operation can be covered by
+  either selftests of ``kvm-unit-tests`` (this is especially important for
+  paravirtualized and Windows-only features).  Strong selftest coverage
+  can also be a replacement for implementation in an open source VMM,
+  but this is generally not recommended.
+
+Following the above suggestions for testing in selftests and
+``kvm-unit-tests`` will make it easier for the maintainers to review
+and accept your code.  In fact, even before you contribute your changes
+upstream it will make it easier for you to develop for KVM.
+
+Of course, the KVM maintainers reserve the right to require more tests,
+though they may also waive the requirement from time to time.
diff --git a/Documentation/virt/kvm/x86/intel-tdx.rst b/Documentation/virt/kvm/x86/intel-tdx.rst
index 76bdd95334d6..5efac62c92c7 100644
--- a/Documentation/virt/kvm/x86/intel-tdx.rst
+++ b/Documentation/virt/kvm/x86/intel-tdx.rst
@@ -79,7 +79,20 @@ to be configured to the TDX guest.
   struct kvm_tdx_capabilities {
         __u64 supported_attrs;
         __u64 supported_xfam;
-        __u64 reserved[254];
+
+        /* TDG.VP.VMCALL hypercalls executed in kernel and forwarded to
+         * userspace, respectively
+         */
+        __u64 kernel_tdvmcallinfo_1_r11;
+        __u64 user_tdvmcallinfo_1_r11;
+
+        /* TDG.VP.VMCALL instruction executions subfunctions executed in kernel
+         * and forwarded to userspace, respectively
+         */
+        __u64 kernel_tdvmcallinfo_1_r12;
+        __u64 user_tdvmcallinfo_1_r12;
+
+        __u64 reserved[250];
 
         /* Configurable CPUID bits for userspace */
         struct kvm_cpuid2 cpuid;
diff --git a/Documentation/wmi/acpi-interface.rst b/Documentation/wmi/acpi-interface.rst
index f1b28835d23c..1ef003b033bf 100644
--- a/Documentation/wmi/acpi-interface.rst
+++ b/Documentation/wmi/acpi-interface.rst
@@ -36,7 +36,7 @@ Offset  Size (in bytes) Content
 
 The WMI object flags control whether the method or notification ID is used:
 
-- 0x1: Data block usage is expensive and must be explicitly enabled/disabled.
+- 0x1: Data block is expensive to collect.
 - 0x2: Data block contains WMI methods.
 - 0x4: Data block contains ASCIZ string.
 - 0x8: Data block describes a WMI event, use notification ID instead
@@ -83,14 +83,18 @@ event as hexadecimal value. Their first parameter is an integer with a value
 of 0 if the WMI event should be disabled, other values will enable
 the WMI event.
 
+Those ACPI methods are always called even for WMI events not registered as
+being expensive to collect to match the behavior of the Windows driver.
+
 WCxx ACPI methods
 -----------------
-Similar to the ``WExx`` ACPI methods, except that it controls data collection
-instead of events and thus the last two characters of the ACPI method name are
-the method ID of the data block to enable/disable.
+Similar to the ``WExx`` ACPI methods, except that instead of WMI events it controls
+data collection of data blocks registered as being expensive to collect. Thus the
+last two characters of the ACPI method name are the method ID of the data block
+to enable/disable.
 
 Those ACPI methods are also called before setting data blocks to match the
-behaviour of the Windows driver.
+behavior of the Windows driver.
 
 _WED ACPI method
 ----------------