1036 files changed, 25543 insertions, 9456 deletions
diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 000000000000..854773350cc5
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+root = true
+
+[{*.{awk,c,dts,dtsi,dtso,h,mk,s,S},Kconfig,Makefile,Makefile.*}]
+charset = utf-8
+end_of_line = lf
+trim_trailing_whitespace = true
+insert_final_newline = true
+indent_style = tab
+indent_size = 8
+
+[*.{json,py,rs}]
+charset = utf-8
+end_of_line = lf
+trim_trailing_whitespace = true
+insert_final_newline = true
+indent_style = space
+indent_size = 4
+
+# this must be below the general *.py to overwrite it
+[tools/{perf,power,rcu,testing/kunit}/**.py,]
+indent_style = tab
+indent_size = 8
+
+[*.yaml]
+charset = utf-8
+end_of_line = lf
+trim_trailing_whitespace = unset
+insert_final_newline = true
+indent_style = space
+indent_size = 2
diff --git a/.gitignore b/.gitignore
index 98274e1160d7..689a4fa3f547 100644
--- a/.gitignore
+++ b/.gitignore
@@ -96,6 +96,7 @@ modules.order
 #
 !.clang-format
 !.cocciconfig
+!.editorconfig
 !.get_maintainer.ignore
 !.gitattributes
 !.gitignore
diff --git a/Documentation/admin-guide/cifs/todo.rst b/Documentation/admin-guide/cifs/todo.rst
index 2646ed2e2d3e..9a65c670774e 100644
--- a/Documentation/admin-guide/cifs/todo.rst
+++ b/Documentation/admin-guide/cifs/todo.rst
@@ -2,7 +2,8 @@
 TODO
 ====
 
-Version 2.14 December 21, 2018
+As of 6.7 kernel. See https://wiki.samba.org/index.php/LinuxCIFSKernel
+for list of features added by release
 
 A Partial List of Missing Features
 ==================================
@@ -12,22 +13,22 @@ for visible, important contributions to this module.  Here
 is a partial list of the known problems and missing features:
 
 a) SMB3 (and SMB3.1.1) missing optional features:
+   multichannel performance optimizations, algorithmic channel selection,
+   directory leases optimizations,
+   support for faster packet signing (GMAC),
+   support for compression over the network,
+   T10 copy offload ie "ODX" (copy chunk, and "Duplicate Extents" ioctl
+   are currently the only two server side copy mechanisms supported)
 
-   - multichannel (partially integrated), integration of multichannel with RDMA
-   - directory leases (improved metadata caching). Currently only implemented for root dir
-   - T10 copy offload ie "ODX" (copy chunk, and "Duplicate Extents" ioctl
-     currently the only two server side copy mechanisms supported)
+b) Better optimized compounding and error handling for sparse file support,
+   perhaps addition of new optional SMB3.1.1 fsctls to make collapse range
+   and insert range more atomic
 
-b) improved sparse file support (fiemap and SEEK_HOLE are implemented
-   but additional features would be supportable by the protocol such
-   as FALLOC_FL_COLLAPSE_RANGE and FALLOC_FL_INSERT_RANGE)
-
-c) Directory entry caching relies on a 1 second timer, rather than
-   using Directory Leases, currently only the root file handle is cached longer
-   by leveraging Directory Leases
+c) Support for SMB3.1.1 over QUIC (and perhaps other socket based protocols
+   like SCTP)
 
 d) quota support (needs minor kernel change since quota calls otherwise
-    won't make it to network filesystems or deviceless filesystems).
+   won't make it to network filesystems or deviceless filesystems).
 
 e) Additional use cases can be optimized to use "compounding" (e.g.
    open/query/close and open/setinfo/close) to reduce the number of
@@ -92,23 +93,20 @@ t) split cifs and smb3 support into separate modules so legacy (and less
 
 v) Additional testing of POSIX Extensions for SMB3.1.1
 
-w) Add support for additional strong encryption types, and additional spnego
-   authentication mechanisms (see MS-SMB2).  GCM-256 is now partially implemented.
+w) Support for the Mac SMB3.1.1 extensions to improve interop with Apple servers
+
+x) Support for additional authentication options (e.g. IAKERB, peer-to-peer
+   Kerberos, SCRAM and others supported by existing servers)
 
-x) Finish support for SMB3.1.1 compression
+y) Improved tracing, more eBPF trace points, better scripts for performance
+   analysis
 
 Known Bugs
 ==========
 
 See https://bugzilla.samba.org - search on product "CifsVFS" for
 current bug list.  Also check http://bugzilla.kernel.org (Product = File System, Component = CIFS)
-
-1) existing symbolic links (Windows reparse points) are recognized but
-   can not be created remotely. They are implemented for Samba and those that
-   support the CIFS Unix extensions, although earlier versions of Samba
-   overly restrict the pathnames.
-2) follow_link and readdir code does not follow dfs junctions
-   but recognizes them
+and xfstest results e.g. https://wiki.samba.org/index.php/Xfstest-results-smb3
 
 Misc testing to do
 ==================
diff --git a/Documentation/admin-guide/cifs/usage.rst b/Documentation/admin-guide/cifs/usage.rst
index 5f936b4b6018..aa8290a29dc8 100644
--- a/Documentation/admin-guide/cifs/usage.rst
+++ b/Documentation/admin-guide/cifs/usage.rst
@@ -81,7 +81,7 @@ much older and less secure than the default dialect SMB3 which includes
 many advanced security features such as downgrade attack detection
 and encrypted shares and stronger signing and authentication algorithms.
 There are additional mount options that may be helpful for SMB3 to get
-improved POSIX behavior (NB: can use vers=3.0 to force only SMB3, never 2.1):
+improved POSIX behavior (NB: can use vers=3 to force SMB3 or later, never 2.1):
 
    ``mfsymlinks`` and either ``cifsacl`` or ``modefromsid`` (usually with ``idsfromsid``)
 
@@ -715,6 +715,7 @@ DebugData		Displays information about active CIFS sessions and
 Stats			Lists summary resource usage information as well as per
 			share statistics.
 open_files		List all the open file handles on all active SMB sessions.
+mount_params            List of all mount parameters available for the module
 ======================= =======================================================
 
 Configuration pseudo-files:
@@ -864,6 +865,11 @@ i.e.::
 
     echo "value" > /sys/module/cifs/parameters/<param>
 
+More detailed descriptions of the available module parameters and their values
+can be seen by doing:
+
+    modinfo cifs (or modinfo smb3)
+
 ================= ==========================================================
 1. enable_oplocks Enable or disable oplocks. Oplocks are enabled by default.
 		  [Y/y/1]. To disable use any of [N/n/0].
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2e76c3476e2a..31b3a25680d0 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -888,9 +888,9 @@
 			memory region [offset, offset + size] for that kernel
 			image. If '@offset' is omitted, then a suitable offset
 			is selected automatically.
-			[KNL, X86-64, ARM64, RISCV] Select a region under 4G first, and
-			fall back to reserve region above 4G when '@offset'
-			hasn't been specified.
+			[KNL, X86-64, ARM64, RISCV, LoongArch] Select a region
+			under 4G first, and fall back to reserve region above
+			4G when '@offset' hasn't been specified.
 			See Documentation/admin-guide/kdump/kdump.rst for further details.
 
 	crashkernel=range1:size1[,range2:size2,...][@offset]
@@ -901,25 +901,27 @@
 			Documentation/admin-guide/kdump/kdump.rst for an example.
 
 	crashkernel=size[KMG],high
-			[KNL, X86-64, ARM64, RISCV] range could be above 4G.
+			[KNL, X86-64, ARM64, RISCV, LoongArch] range could be
+			above 4G.
 			Allow kernel to allocate physical memory region from top,
 			so could be above 4G if system have more than 4G ram
 			installed. Otherwise memory region will be allocated
 			below 4G, if available.
 			It will be ignored if crashkernel=X is specified.
 	crashkernel=size[KMG],low
-			[KNL, X86-64, ARM64, RISCV] range under 4G. When crashkernel=X,high
-			is passed, kernel could allocate physical memory region
-			above 4G, that cause second kernel crash on system
-			that require some amount of low memory, e.g. swiotlb
-			requires at least 64M+32K low memory, also enough extra
-			low memory is needed to make sure DMA buffers for 32-bit
-			devices won't run out. Kernel would try to allocate
+			[KNL, X86-64, ARM64, RISCV, LoongArch] range under 4G.
+			When crashkernel=X,high is passed, kernel could allocate
+			physical memory region above 4G, that cause second kernel
+			crash on system that require some amount of low memory,
+			e.g. swiotlb requires at least 64M+32K low memory, also
+			enough extra low memory is needed to make sure DMA buffers
+			for 32-bit devices won't run out. Kernel would try to allocate
 			default	size of memory below 4G automatically. The default
 			size is	platform dependent.
 			  --> x86: max(swiotlb_size_or_default() + 8MiB, 256MiB)
 			  --> arm64: 128MiB
 			  --> riscv: 128MiB
+			  --> loongarch: 128MiB
 			This one lets the user specify own low range under 4G
 			for second kernel instead.
 			0: to disable low allocation.
diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst
index bfdf236e2af3..e8c2ce1f9df6 100644
--- a/Documentation/arch/arm64/silicon-errata.rst
+++ b/Documentation/arch/arm64/silicon-errata.rst
@@ -71,6 +71,8 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A510     | #2658417        | ARM64_ERRATUM_2658417       |
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A510     | #3117295        | ARM64_ERRATUM_3117295       |
++----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A520     | #2966298        | ARM64_ERRATUM_2966298       |
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A53      | #826319         | ARM64_ERRATUM_826319        |
@@ -235,11 +237,9 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | Rockchip       | RK3588          | #3588001        | ROCKCHIP_ERRATUM_3588001    |
 +----------------+-----------------+-----------------+-----------------------------+
-
 +----------------+-----------------+-----------------+-----------------------------+
 | Fujitsu        | A64FX           | E#010001        | FUJITSU_ERRATUM_010001      |
 +----------------+-----------------+-----------------+-----------------------------+
-
 +----------------+-----------------+-----------------+-----------------------------+
 | ASR            | ASR8601         | #8601001        | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
diff --git a/Documentation/block/ioprio.rst b/Documentation/block/ioprio.rst
index a25c6d5df87b..4662e1ff3d81 100644
--- a/Documentation/block/ioprio.rst
+++ b/Documentation/block/ioprio.rst
@@ -6,17 +6,16 @@ Block io priorities
 Intro
 -----
 
-With the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io
-priorities are supported for reads on files.  This enables users to io nice
-processes or process groups, similar to what has been possible with cpu
-scheduling for ages.  This document mainly details the current possibilities
-with cfq; other io schedulers do not support io priorities thus far.
+The io priority feature enables users to io nice processes or process groups,
+similar to what has been possible with cpu scheduling for ages. Support for io
+priorities is io scheduler dependent and currently supported by bfq and
+mq-deadline.
 
 Scheduling classes
 ------------------
 
-CFQ implements three generic scheduling classes that determine how io is
-served for a process.
+Three generic scheduling classes are implemented for io priorities that
+determine how io is served for a process.
 
 IOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given
 higher priority than any other in the system, processes from this class are
diff --git a/Documentation/dev-tools/checkuapi.rst b/Documentation/dev-tools/checkuapi.rst
new file mode 100644
index 000000000000..9072f21b50b0
--- /dev/null
+++ b/Documentation/dev-tools/checkuapi.rst
@@ -0,0 +1,477 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+============
+UAPI Checker
+============
+
+The UAPI checker (``scripts/check-uapi.sh``) is a shell script which
+checks UAPI header files for userspace backwards-compatibility across
+the git tree.
+
+Options
+=======
+
+This section will describe the options with which ``check-uapi.sh``
+can be run.
+
+Usage::
+
+    check-uapi.sh [-b BASE_REF] [-p PAST_REF] [-j N] [-l ERROR_LOG] [-i] [-q] [-v]
+
+Available options::
+
+    -b BASE_REF    Base git reference to use for comparison. If unspecified or empty,
+                   will use any dirty changes in tree to UAPI files. If there are no
+                   dirty changes, HEAD will be used.
+    -p PAST_REF    Compare BASE_REF to PAST_REF (e.g. -p v6.1). If unspecified or empty,
+                   will use BASE_REF^1. Must be an ancestor of BASE_REF. Only headers
+                   that exist on PAST_REF will be checked for compatibility.
+    -j JOBS        Number of checks to run in parallel (default: number of CPU cores).
+    -l ERROR_LOG   Write error log to file (default: no error log is generated).
+    -i             Ignore ambiguous changes that may or may not break UAPI compatibility.
+    -q             Quiet operation.
+    -v             Verbose operation (print more information about each header being checked).
+
+Environmental args::
+
+    ABIDIFF  Custom path to abidiff binary
+    CC       C compiler (default is "gcc")
+    ARCH     Target architecture of C compiler (default is host arch)
+
+Exit codes::
+
+    0) Success
+    1) ABI difference detected
+    2) Prerequisite not met
+
+Examples
+========
+
+Basic Usage
+-----------
+
+First, let's try making a change to a UAPI header file that obviously
+won't break userspace::
+
+    cat << 'EOF' | patch -l -p1
+    --- a/include/uapi/linux/acct.h
+    +++ b/include/uapi/linux/acct.h
+    @@ -21,7 +21,9 @@
+     #include <asm/param.h>
+     #include <asm/byteorder.h>
+
+    -/*
+    +#define FOO
+    +
+    +/*
+      *  comp_t is a 16-bit "floating" point number with a 3-bit base 8
+      *  exponent and a 13-bit fraction.
+      *  comp2_t is 24-bit with 5-bit base 2 exponent and 20 bit fraction
+    diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
+    EOF
+
+Now, let's use the script to validate::
+
+    % ./scripts/check-uapi.sh
+    Installing user-facing UAPI headers from dirty tree... OK
+    Installing user-facing UAPI headers from HEAD... OK
+    Checking changes to UAPI headers between HEAD and dirty tree...
+    All 912 UAPI headers compatible with x86 appear to be backwards compatible
+
+Let's add another change that *might* break userspace::
+
+    cat << 'EOF' | patch -l -p1
+    --- a/include/uapi/linux/bpf.h
+    +++ b/include/uapi/linux/bpf.h
+    @@ -74,7 +74,7 @@ struct bpf_insn {
+            __u8    dst_reg:4;      /* dest register */
+            __u8    src_reg:4;      /* source register */
+            __s16   off;            /* signed offset */
+    -       __s32   imm;            /* signed immediate constant */
+    +       __u32   imm;            /* unsigned immediate constant */
+     };
+
+     /* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
+    EOF
+
+The script will catch this::
+
+    % ./scripts/check-uapi.sh
+    Installing user-facing UAPI headers from dirty tree... OK
+    Installing user-facing UAPI headers from HEAD... OK
+    Checking changes to UAPI headers between HEAD and dirty tree...
+    ==== ABI differences detected in include/linux/bpf.h from HEAD -> dirty tree ====
+        [C] 'struct bpf_insn' changed:
+          type size hasn't changed
+          1 data member change:
+            type of '__s32 imm' changed:
+              typedef name changed from __s32 to __u32 at int-ll64.h:27:1
+              underlying type 'int' changed:
+                type name changed from 'int' to 'unsigned int'
+                type size hasn't changed
+    ==================================================================================
+
+    error - 1/912 UAPI headers compatible with x86 appear _not_ to be backwards compatible
+
+In this case, the script is reporting the type change because it could
+break a userspace program that passes in a negative number. Now, let's
+say you know that no userspace program could possibly be using a negative
+value in ``imm``, so changing to an unsigned type there shouldn't hurt
+anything. You can pass the ``-i`` flag to the script to ignore changes
+in which the userspace backwards compatibility is ambiguous::
+
+    % ./scripts/check-uapi.sh -i
+    Installing user-facing UAPI headers from dirty tree... OK
+    Installing user-facing UAPI headers from HEAD... OK
+    Checking changes to UAPI headers between HEAD and dirty tree...
+    All 912 UAPI headers compatible with x86 appear to be backwards compatible
+
+Now, let's make a similar change that *will* break userspace::
+
+    cat << 'EOF' | patch -l -p1
+    --- a/include/uapi/linux/bpf.h
+    +++ b/include/uapi/linux/bpf.h
+    @@ -71,8 +71,8 @@ enum {
+
+     struct bpf_insn {
+            __u8    code;           /* opcode */
+    -       __u8    dst_reg:4;      /* dest register */
+            __u8    src_reg:4;      /* source register */
+    +       __u8    dst_reg:4;      /* dest register */
+            __s16   off;            /* signed offset */
+            __s32   imm;            /* signed immediate constant */
+     };
+    EOF
+
+Since we're re-ordering an existing struct member, there's no ambiguity,
+and the script will report the breakage even if you pass ``-i``::
+
+    % ./scripts/check-uapi.sh -i
+    Installing user-facing UAPI headers from dirty tree... OK
+    Installing user-facing UAPI headers from HEAD... OK
+    Checking changes to UAPI headers between HEAD and dirty tree...
+    ==== ABI differences detected in include/linux/bpf.h from HEAD -> dirty tree ====
+        [C] 'struct bpf_insn' changed:
+          type size hasn't changed
+          2 data member changes:
+            '__u8 dst_reg' offset changed from 8 to 12 (in bits) (by +4 bits)
+            '__u8 src_reg' offset changed from 12 to 8 (in bits) (by -4 bits)
+    ==================================================================================
+
+    error - 1/912 UAPI headers compatible with x86 appear _not_ to be backwards compatible
+
+Let's commit the breaking change, then commit the innocuous change::
+
+    % git commit -m 'Breaking UAPI change' include/uapi/linux/bpf.h
+    [detached HEAD f758e574663a] Breaking UAPI change
+     1 file changed, 1 insertion(+), 1 deletion(-)
+    % git commit -m 'Innocuous UAPI change' include/uapi/linux/acct.h
+    [detached HEAD 2e87df769081] Innocuous UAPI change
+     1 file changed, 3 insertions(+), 1 deletion(-)
+
+Now, let's run the script again with no arguments::
+
+    % ./scripts/check-uapi.sh
+    Installing user-facing UAPI headers from HEAD... OK
+    Installing user-facing UAPI headers from HEAD^1... OK
+    Checking changes to UAPI headers between HEAD^1 and HEAD...
+    All 912 UAPI headers compatible with x86 appear to be backwards compatible
+
+It doesn't catch any breaking change because, by default, it only
+compares ``HEAD`` to ``HEAD^1``. The breaking change was committed on
+``HEAD~2``. If we wanted the search scope to go back further, we'd have to
+use the ``-p`` option to pass a different past reference. In this case,
+let's pass ``-p HEAD~2`` to the script so it checks UAPI changes between
+``HEAD~2`` and ``HEAD``::
+
+    % ./scripts/check-uapi.sh -p HEAD~2
+    Installing user-facing UAPI headers from HEAD... OK
+    Installing user-facing UAPI headers from HEAD~2... OK
+    Checking changes to UAPI headers between HEAD~2 and HEAD...
+    ==== ABI differences detected in include/linux/bpf.h from HEAD~2 -> HEAD ====
+        [C] 'struct bpf_insn' changed:
+          type size hasn't changed
+          2 data member changes:
+            '__u8 dst_reg' offset changed from 8 to 12 (in bits) (by +4 bits)
+            '__u8 src_reg' offset changed from 12 to 8 (in bits) (by -4 bits)
+    ==============================================================================
+
+    error - 1/912 UAPI headers compatible with x86 appear _not_ to be backwards compatible
+
+Alternatively, we could have also run with ``-b HEAD~``. This would set the
+base reference to ``HEAD~`` so then the script would compare it to ``HEAD~^1``.
+
+Architecture-specific Headers
+-----------------------------
+
+Consider this change::
+
+    cat << 'EOF' | patch -l -p1
+    --- a/arch/arm64/include/uapi/asm/sigcontext.h
+    +++ b/arch/arm64/include/uapi/asm/sigcontext.h
+    @@ -70,6 +70,7 @@ struct sigcontext {
+     struct _aarch64_ctx {
+            __u32 magic;
+            __u32 size;
+    +       __u32 new_var;
+     };
+
+     #define FPSIMD_MAGIC   0x46508001
+    EOF
+
+This is a change to an arm64-specific UAPI header file. In this example, I'm
+running the script from an x86 machine with an x86 compiler, so, by default,
+the script only checks x86-compatible UAPI header files::
+
+    % ./scripts/check-uapi.sh
+    Installing user-facing UAPI headers from dirty tree... OK
+    Installing user-facing UAPI headers from HEAD... OK
+    No changes to UAPI headers were applied between HEAD and dirty tree
+
+With an x86 compiler, we can't check header files in ``arch/arm64``, so the
+script doesn't even try.
+
+If we want to check the header file, we'll have to use an arm64 compiler and
+set ``ARCH`` accordingly::
+
+    % CC=aarch64-linux-gnu-gcc ARCH=arm64 ./scripts/check-uapi.sh
+    Installing user-facing UAPI headers from dirty tree... OK
+    Installing user-facing UAPI headers from HEAD... OK
+    Checking changes to UAPI headers between HEAD and dirty tree...
+    ==== ABI differences detected in include/asm/sigcontext.h from HEAD -> dirty tree ====
+        [C] 'struct _aarch64_ctx' changed:
+          type size changed from 64 to 96 (in bits)
+          1 data member insertion:
+            '__u32 new_var', at offset 64 (in bits) at sigcontext.h:73:1
+        -- snip --
+        [C] 'struct zt_context' changed:
+          type size changed from 128 to 160 (in bits)
+          2 data member changes (1 filtered):
+            '__u16 nregs' offset changed from 64 to 96 (in bits) (by +32 bits)
+            '__u16 __reserved[3]' offset changed from 80 to 112 (in bits) (by +32 bits)
+    =======================================================================================
+
+    error - 1/884 UAPI headers compatible with arm64 appear _not_ to be backwards compatible
+
+We can see with ``ARCH`` and ``CC`` set properly for the file, the ABI
+change is reported properly. Also notice that the total number of UAPI
+header files checked by the script changes. This is because the number
+of headers installed for arm64 platforms is different than x86.
+
+Cross-Dependency Breakages
+--------------------------
+
+Consider this change::
+
+    cat << 'EOF' | patch -l -p1
+    --- a/include/uapi/linux/types.h
+    +++ b/include/uapi/linux/types.h
+    @@ -52,7 +52,7 @@ typedef __u32 __bitwise __wsum;
+     #define __aligned_be64 __be64 __attribute__((aligned(8)))
+     #define __aligned_le64 __le64 __attribute__((aligned(8)))
+
+    -typedef unsigned __bitwise __poll_t;
+    +typedef unsigned short __bitwise __poll_t;
+
+     #endif /*  __ASSEMBLY__ */
+     #endif /* _UAPI_LINUX_TYPES_H */
+    EOF
+
+Here, we're changing a ``typedef`` in ``types.h``. This doesn't break
+a UAPI in ``types.h``, but other UAPIs in the tree may break due to
+this change::
+
+    % ./scripts/check-uapi.sh
+    Installing user-facing UAPI headers from dirty tree... OK
+    Installing user-facing UAPI headers from HEAD... OK
+    Checking changes to UAPI headers between HEAD and dirty tree...
+    ==== ABI differences detected in include/linux/eventpoll.h from HEAD -> dirty tree ====
+        [C] 'struct epoll_event' changed:
+          type size changed from 96 to 80 (in bits)
+          2 data member changes:
+            type of '__poll_t events' changed:
+              underlying type 'unsigned int' changed:
+                type name changed from 'unsigned int' to 'unsigned short int'
+                type size changed from 32 to 16 (in bits)
+            '__u64 data' offset changed from 32 to 16 (in bits) (by -16 bits)
+    ========================================================================================
+    include/linux/eventpoll.h did not change between HEAD and dirty tree...
+    It's possible a change to one of the headers it includes caused this error:
+    #include <linux/fcntl.h>
+    #include <linux/types.h>
+
+Note that the script noticed the failing header file did not change,
+so it assumes one of its includes must have caused the breakage. Indeed,
+we can see ``linux/types.h`` is used from ``eventpoll.h``.
+
+UAPI Header Removals
+--------------------
+
+Consider this change::
+
+    cat << 'EOF' | patch -l -p1
+    diff --git a/include/uapi/asm-generic/Kbuild b/include/uapi/asm-generic/Kbuild
+    index ebb180aac74e..a9c88b0a8b3b 100644
+    --- a/include/uapi/asm-generic/Kbuild
+    +++ b/include/uapi/asm-generic/Kbuild
+    @@ -31,6 +31,6 @@ mandatory-y += stat.h
+     mandatory-y += statfs.h
+     mandatory-y += swab.h
+     mandatory-y += termbits.h
+    -mandatory-y += termios.h
+    +#mandatory-y += termios.h
+     mandatory-y += types.h
+     mandatory-y += unistd.h
+    EOF
+
+This script removes a UAPI header file from the install list. Let's run
+the script::
+
+    % ./scripts/check-uapi.sh
+    Installing user-facing UAPI headers from dirty tree... OK
+    Installing user-facing UAPI headers from HEAD... OK
+    Checking changes to UAPI headers between HEAD and dirty tree...
+    ==== UAPI header include/asm/termios.h was removed between HEAD and dirty tree ====
+
+    error - 1/912 UAPI headers compatible with x86 appear _not_ to be backwards compatible
+
+Removing a UAPI header is considered a breaking change, and the script
+will flag it as such.
+
+Checking Historic UAPI Compatibility
+------------------------------------
+
+You can use the ``-b`` and ``-p`` options to examine different chunks of your
+git tree. For example, to check all changed UAPI header files between tags
+v6.0 and v6.1, you'd run::
+
+    % ./scripts/check-uapi.sh -b v6.1 -p v6.0
+    Installing user-facing UAPI headers from v6.1... OK
+    Installing user-facing UAPI headers from v6.0... OK
+    Checking changes to UAPI headers between v6.0 and v6.1...
+
+    --- snip ---
+    error - 37/907 UAPI headers compatible with x86 appear _not_ to be backwards compatible
+
+Note: Before v5.3, a header file needed by the script is not present,
+so the script is unable to check changes before then.
+
+You'll notice that the script detected many UAPI changes that are not
+backwards compatible. Knowing that kernel UAPIs are supposed to be stable
+forever, this is an alarming result. This brings us to the next section:
+caveats.
+
+Caveats
+=======
+
+The UAPI checker makes no assumptions about the author's intention, so some
+types of changes may be flagged even though they intentionally break UAPI.
+
+Removals For Refactoring or Deprecation
+---------------------------------------
+
+Sometimes drivers for very old hardware are removed, such as in this example::
+
+    % ./scripts/check-uapi.sh -b ba47652ba655
+    Installing user-facing UAPI headers from ba47652ba655... OK
+    Installing user-facing UAPI headers from ba47652ba655^1... OK
+    Checking changes to UAPI headers between ba47652ba655^1 and ba47652ba655...
+    ==== UAPI header include/linux/meye.h was removed between ba47652ba655^1 and ba47652ba655 ====
+
+    error - 1/910 UAPI headers compatible with x86 appear _not_ to be backwards compatible
+
+The script will always flag removals (even if they're intentional).
+
+Struct Expansions
+-----------------
+
+Depending on how a structure is handled in kernelspace, a change which
+expands a struct could be non-breaking.
+
+If a struct is used as the argument to an ioctl, then the kernel driver
+must be able to handle ioctl commands of any size. Beyond that, you need
+to be careful when copying data from the user. Say, for example, that
+``struct foo`` is changed like this::
+
+    struct foo {
+        __u64 a; /* added in version 1 */
+    +   __u32 b; /* added in version 2 */
+    +   __u32 c; /* added in version 2 */
+    }
+
+By default, the script will flag this kind of change for further review::
+
+    [C] 'struct foo' changed:
+      type size changed from 64 to 128 (in bits)
+      2 data member insertions:
+        '__u32 b', at offset 64 (in bits)
+        '__u32 c', at offset 96 (in bits)
+
+However, it is possible that this change was made safely.
+
+If a userspace program was built with version 1, it will think
+``sizeof(struct foo)`` is 8. That size will be encoded in the
+ioctl value that gets sent to the kernel. If the kernel is built
+with version 2, it will think the ``sizeof(struct foo)`` is 16.
+
+The kernel can use the ``_IOC_SIZE`` macro to get the size encoded
+in the ioctl code that the user passed in and then use
+``copy_struct_from_user()`` to safely copy the value::
+
+    int handle_ioctl(unsigned long cmd, unsigned long arg)
+    {
+        switch _IOC_NR(cmd) {
+        0x01: {
+            struct foo my_cmd;  /* size 16 in the kernel */
+
+            ret = copy_struct_from_user(&my_cmd, arg, sizeof(struct foo), _IOC_SIZE(cmd));
+            ...
+
+``copy_struct_from_user`` will zero the struct in the kernel and then copy
+only the bytes passed in from the user (leaving new members zeroized).
+If the user passed in a larger struct, the extra members are ignored.
+
+If you know this situation is accounted for in the kernel code, you can
+pass ``-i`` to the script, and struct expansions like this will be ignored.
+
+Flex Array Migration
+--------------------
+
+While the script handles expansion into an existing flex array, it does
+still flag initial migration to flex arrays from 1-element fake flex
+arrays. For example::
+
+    struct foo {
+          __u32 x;
+    -     __u32 flex[1]; /* fake flex */
+    +     __u32 flex[];  /* real flex */
+    };
+
+This change would be flagged by the script::
+
+    [C] 'struct foo' changed:
+      type size changed from 64 to 32 (in bits)
+      1 data member change:
+        type of '__u32 flex[1]' changed:
+          type name changed from '__u32[1]' to '__u32[]'
+          array type size changed from 32 to 'unknown'
+          array type subrange 1 changed length from 1 to 'unknown'
+
+At this time, there's no way to filter these types of changes, so be
+aware of this possible false positive.
+
+Summary
+-------
+
+While many types of false positives are filtered out by the script,
+it's possible there are some cases where the script flags a change
+which does not break UAPI. It's also possible a change which *does*
+break userspace would not be flagged by this script. While the script
+has been run on much of the kernel history, there could still be corner
+cases that are not accounted for.
+
+The intention is for this script to be used as a quick check for
+maintainers or automated tooling, not as the end-all authority on
+patch compatibility. It's best to remember: use your best judgment
+(and ideally a unit test in userspace) to make sure your UAPI changes
+are backwards-compatible!
diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst
index 3d2286c683bc..efa49cdc8e2e 100644
--- a/Documentation/dev-tools/index.rst
+++ b/Documentation/dev-tools/index.rst
@@ -31,6 +31,7 @@ Documentation/dev-tools/testing-overview.rst
    kselftest
    kunit/index
    ktap
+   checkuapi
 
 
 .. only::  subproject and html
diff --git a/Documentation/devicetree/bindings/dma/dma-controller.yaml b/Documentation/devicetree/bindings/dma/dma-controller.yaml
index 04d150d4d15d..e6afca558c2d 100644
--- a/Documentation/devicetree/bindings/dma/dma-controller.yaml
+++ b/Documentation/devicetree/bindings/dma/dma-controller.yaml
@@ -19,19 +19,4 @@ properties:
 
 additionalProperties: true
 
-examples:
-  - |
-    dma: dma-controller@48000000 {
-        compatible = "ti,omap-sdma";
-        reg = <0x48000000 0x1000>;
-        interrupts = <0 12 0x4>,
-                     <0 13 0x4>,
-                     <0 14 0x4>,
-                     <0 15 0x4>;
-        #dma-cells = <1>;
-        dma-channels = <32>;
-        dma-requests = <127>;
-        dma-channel-mask = <0xfffe>;
-    };
-
 ...
diff --git a/Documentation/devicetree/bindings/dma/dma-router.yaml b/Documentation/devicetree/bindings/dma/dma-router.yaml
index 346fe0fa4460..5ad2febc581e 100644
--- a/Documentation/devicetree/bindings/dma/dma-router.yaml
+++ b/Documentation/devicetree/bindings/dma/dma-router.yaml
@@ -40,15 +40,4 @@ required:
 
 additionalProperties: true
 
-examples:
-  - |
-    sdma_xbar: dma-router@4a002b78 {
-        compatible = "ti,dra7-dma-crossbar";
-        reg = <0x4a002b78 0xfc>;
-        #dma-cells = <1>;
-        dma-requests = <205>;
-        ti,dma-safe-map = <0>;
-        dma-masters = <&sdma>;
-    };
-
 ...
diff --git a/Documentation/devicetree/bindings/dma/loongson,ls2x-apbdma.yaml b/Documentation/devicetree/bindings/dma/loongson,ls2x-apbdma.yaml
new file mode 100644
index 000000000000..6a1b49a49a64
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/loongson,ls2x-apbdma.yaml
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/dma/loongson,ls2x-apbdma.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Loongson LS2X APB DMA controller
+
+description:
+  The Loongson LS2X APB DMA controller is used for transferring data
+  between system memory and the peripherals on the APB bus.
+
+maintainers:
+  - Binbin Zhou <zhoubinbin@loongson.cn>
+
+allOf:
+  - $ref: dma-controller.yaml#
+
+properties:
+  compatible:
+    oneOf:
+      - const: loongson,ls2k1000-apbdma
+      - items:
+          - const: loongson,ls2k0500-apbdma
+          - const: loongson,ls2k1000-apbdma
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  '#dma-cells':
+    const: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - '#dma-cells'
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/clock/loongson,ls2k-clk.h>
+
+    dma-controller@1fe00c00 {
+        compatible = "loongson,ls2k1000-apbdma";
+        reg = <0x1fe00c00 0x8>;
+        interrupt-parent = <&liointc1>;
+        interrupts = <12 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&clk LOONGSON2_APB_CLK>;
+        #dma-cells = <1>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.yaml b/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.yaml
index 4003dbe94940..877147e95ecc 100644
--- a/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.yaml
+++ b/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.yaml
@@ -53,6 +53,9 @@ properties:
       ADMA_CHn_CTRL register.
     const: 1
 
+  dma-channel-mask:
+    maxItems: 1
+
 required:
   - compatible
   - reg
diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
index 88d0de3d1b46..deb64cb9ca3e 100644
--- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
+++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
@@ -32,6 +32,8 @@ properties:
               - qcom,sm8350-gpi-dma
               - qcom,sm8450-gpi-dma
               - qcom,sm8550-gpi-dma
+              - qcom,sm8650-gpi-dma
+              - qcom,x1e80100-gpi-dma
           - const: qcom,sm6350-gpi-dma
       - items:
           - enum:
diff --git a/Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml b/Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml
index c284abc6784a..a42b6a26a6d3 100644
--- a/Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml
+++ b/Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml
@@ -16,7 +16,7 @@ properties:
   compatible:
     items:
       - enum:
-          - renesas,r9a07g043-dmac # RZ/G2UL
+          - renesas,r9a07g043-dmac # RZ/G2UL and RZ/Five
           - renesas,r9a07g044-dmac # RZ/G2{L,LC}
           - renesas,r9a07g054-dmac # RZ/V2L
       - const: renesas,rz-dmac
diff --git a/Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml b/Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml
index a1af0b906365..3b22183a1a37 100644
--- a/Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml
+++ b/Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml
@@ -29,6 +29,7 @@ properties:
   compatible:
     items:
       - enum:
+          - microchip,mpfs-pdma
           - sifive,fu540-c000-pdma
       - const: sifive,pdma0
     description:
diff --git a/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml
index 4ca300a42a99..27b8e1636560 100644
--- a/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml
+++ b/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml
@@ -37,11 +37,11 @@ properties:
 
   reg:
     minItems: 3
-    maxItems: 5
+    maxItems: 9
 
   reg-names:
     minItems: 3
-    maxItems: 5
+    maxItems: 9
 
   "#dma-cells":
     const: 3
@@ -141,7 +141,10 @@ allOf:
         ti,sci-rm-range-tchan: false
 
         reg:
-          maxItems: 3
+          items:
+            - description: BCDMA Control /Status Registers region
+            - description: RX Channel Realtime Registers region
+            - description: Ring Realtime Registers region
 
         reg-names:
           items:
@@ -161,14 +164,29 @@ allOf:
       properties:
         reg:
           minItems: 5
+          items:
+            - description: BCDMA Control /Status Registers region
+            - description: Block Copy Channel Realtime Registers region
+            - description: RX Channel Realtime Registers region
+            - description: TX Channel Realtime Registers region
+            - description: Ring Realtime Registers region
+            - description: Ring Configuration Registers region
+            - description: TX Channel Configuration Registers region
+            - description: RX Channel Configuration Registers region
+            - description: Block Copy Channel Configuration Registers region
 
         reg-names:
+          minItems: 5
           items:
             - const: gcfg
             - const: bchanrt
             - const: rchanrt
             - const: tchanrt
             - const: ringrt
+            - const: ring
+            - const: tchan
+            - const: rchan
+            - const: bchan
 
       required:
         - ti,sci-rm-range-bchan
@@ -184,7 +202,11 @@ allOf:
         ti,sci-rm-range-bchan: false
 
         reg:
-          maxItems: 4
+          items:
+            - description: BCDMA Control /Status Registers region
+            - description: RX Channel Realtime Registers region
+            - description: TX Channel Realtime Registers region
+            - description: Ring Realtime Registers region
 
         reg-names:
           items:
@@ -220,8 +242,13 @@ examples:
                       <0x0 0x4c000000 0x0 0x20000>,
                       <0x0 0x4a820000 0x0 0x20000>,
                       <0x0 0x4aa40000 0x0 0x20000>,
-                      <0x0 0x4bc00000 0x0 0x100000>;
-                reg-names = "gcfg", "bchanrt", "rchanrt", "tchanrt", "ringrt";
+                      <0x0 0x4bc00000 0x0 0x100000>,
+                      <0x0 0x48600000 0x0 0x8000>,
+                      <0x0 0x484a4000 0x0 0x2000>,
+                      <0x0 0x484c2000 0x0 0x2000>,
+                      <0x0 0x48420000 0x0 0x2000>;
+                reg-names = "gcfg", "bchanrt", "rchanrt", "tchanrt", "ringrt",
+                            "ring", "tchan", "rchan", "bchan";
                 msi-parent = <&inta_main_dmss>;
                 #dma-cells = <3>;
 
diff --git a/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml
index a69f62f854d8..11e064c02994 100644
--- a/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml
+++ b/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml
@@ -45,14 +45,28 @@ properties:
       The second cell is the ASEL value for the channel
 
   reg:
-    maxItems: 4
+    minItems: 4
+    items:
+      - description: Packet DMA Control /Status Registers region
+      - description: RX Channel Realtime Registers region
+      - description: TX Channel Realtime Registers region
+      - description: Ring Realtime Registers region
+      - description: Ring Configuration Registers region
+      - description: TX Configuration Registers region
+      - description: RX Configuration Registers region
+      - description: RX Flow Configuration Registers region
 
   reg-names:
+    minItems: 4
     items:
       - const: gcfg
       - const: rchanrt
       - const: tchanrt
       - const: ringrt
+      - const: ring
+      - const: tchan
+      - const: rchan
+      - const: rflow
 
   msi-parent: true
 
@@ -136,8 +150,14 @@ examples:
                 reg = <0x0 0x485c0000 0x0 0x100>,
                       <0x0 0x4a800000 0x0 0x20000>,
                       <0x0 0x4aa00000 0x0 0x40000>,
-                      <0x0 0x4b800000 0x0 0x400000>;
-                reg-names = "gcfg", "rchanrt", "tchanrt", "ringrt";
+                      <0x0 0x4b800000 0x0 0x400000>,
+                      <0x0 0x485e0000 0x0 0x20000>,
+                      <0x0 0x484a0000 0x0 0x4000>,
+                      <0x0 0x484c0000 0x0 0x2000>,
+                      <0x0 0x48430000 0x0 0x4000>;
+                reg-names = "gcfg", "rchanrt", "tchanrt", "ringrt",
+                            "ring", "tchan", "rchan", "rflow";
+
                 msi-parent = <&inta_main_dmss>;
                 #dma-cells = <2>;
 
diff --git a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml
index 22f6c5e2f7f4..b18cf2bfdb5b 100644
--- a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml
+++ b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml
@@ -69,13 +69,24 @@ properties:
       - ti,j721e-navss-mcu-udmap
 
   reg:
-    maxItems: 3
+    minItems: 3
+    items:
+      - description: UDMA-P Control /Status Registers region
+      - description: RX Channel Realtime Registers region
+      - description: TX Channel Realtime Registers region
+      - description: TX Configuration Registers region
+      - description: RX Configuration Registers region
+      - description: RX Flow Configuration Registers region
 
   reg-names:
+    minItems: 3
     items:
       - const: gcfg
       - const: rchanrt
       - const: tchanrt
+      - const: tchan
+      - const: rchan
+      - const: rflow
 
   msi-parent: true
 
@@ -158,8 +169,11 @@ examples:
                 compatible = "ti,am654-navss-main-udmap";
                 reg = <0x0 0x31150000 0x0 0x100>,
                       <0x0 0x34000000 0x0 0x100000>,
-                      <0x0 0x35000000 0x0 0x100000>;
-                reg-names = "gcfg", "rchanrt", "tchanrt";
+                      <0x0 0x35000000 0x0 0x100000>,
+                      <0x0 0x30b00000 0x0 0x20000>,
+                      <0x0 0x30c00000 0x0 0x8000>,
+                      <0x0 0x30d00000 0x0 0x4000>;
+                reg-names = "gcfg", "rchanrt", "tchanrt", "tchan", "rchan", "rflow";
                 #dma-cells = <1>;
 
                 ti,ringacc = <&ringacc>;
diff --git a/Documentation/devicetree/bindings/interrupt-controller/loongson,liointc.yaml b/Documentation/devicetree/bindings/interrupt-controller/loongson,liointc.yaml
index 00b570c82903..60441f0c5d72 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/loongson,liointc.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/loongson,liointc.yaml
@@ -11,8 +11,13 @@ maintainers:
 
 description: |
   This interrupt controller is found in the Loongson-3 family of chips and
-  Loongson-2K1000 chip, as the primary package interrupt controller which
+  Loongson-2K series chips, as the primary package interrupt controller which
   can route local I/O interrupt to interrupt lines of cores.
+  Be aware of the following points.
+  1.The Loongson-2K0500 is a single core CPU;
+  2.The Loongson-2K0500/2K1000 has 64 device interrupt sources as inputs, so we
+    need to define two nodes in dts{i} to describe the "0-31" and "32-61" interrupt
+    sources respectively.
 
 allOf:
   - $ref: /schemas/interrupt-controller.yaml#
@@ -33,6 +38,7 @@ properties:
       - const: main
       - const: isr0
       - const: isr1
+    minItems: 2
 
   interrupt-controller: true
 
@@ -45,11 +51,9 @@ properties:
   interrupt-names:
     description: List of names for the parent interrupts.
     items:
-      - const: int0
-      - const: int1
-      - const: int2
-      - const: int3
+      pattern: int[0-3]
     minItems: 1
+    maxItems: 4
 
   '#interrupt-cells':
     const: 2
@@ -69,6 +73,7 @@ required:
   - compatible
   - reg
   - interrupts
+  - interrupt-names
   - interrupt-controller
   - '#interrupt-cells'
   - loongson,parent_int_map
@@ -86,7 +91,8 @@ if:
 then:
   properties:
     reg:
-      minItems: 3
+      minItems: 2
+      maxItems: 3
 
   required:
     - reg-names
diff --git a/Documentation/devicetree/bindings/loongarch/cpus.yaml b/Documentation/devicetree/bindings/loongarch/cpus.yaml
new file mode 100644
index 000000000000..f175872995e1
--- /dev/null
+++ b/Documentation/devicetree/bindings/loongarch/cpus.yaml
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/loongarch/cpus.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: LoongArch CPUs
+
+maintainers:
+  - Binbin Zhou <zhoubinbin@loongson.cn>
+
+description:
+  This document describes the list of LoongArch CPU cores that support FDT,
+  it describe the layout of CPUs in a system through the "cpus" node.
+
+allOf:
+  - $ref: /schemas/cpu.yaml#
+
+properties:
+  compatible:
+    enum:
+      - loongson,la264
+      - loongson,la364
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/loongson,ls2k-clk.h>
+
+    cpus {
+        #size-cells = <0>;
+        #address-cells = <1>;
+
+        cpu@0 {
+            compatible = "loongson,la264";
+            device_type = "cpu";
+            reg = <0>;
+            clocks = <&clk LOONGSON2_NODE_CLK>;
+        };
+
+        cpu@1 {
+            compatible = "loongson,la264";
+            device_type = "cpu";
+            reg = <1>;
+            clocks = <&clk LOONGSON2_NODE_CLK>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/loongarch/loongson.yaml b/Documentation/devicetree/bindings/loongarch/loongson.yaml
new file mode 100644
index 000000000000..e1a4a97b7576
--- /dev/null
+++ b/Documentation/devicetree/bindings/loongarch/loongson.yaml
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/loongarch/loongson.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Loongson SoC-based boards
+
+maintainers:
+  - Binbin Zhou <zhoubinbin@loongson.cn>
+
+properties:
+  $nodename:
+    const: '/'
+  compatible:
+    oneOf:
+      - description: Loongson-2K0500 processor based boards
+        items:
+          - const: loongson,ls2k0500-ref
+          - const: loongson,ls2k0500
+
+      - description: Loongson-2K1000 processor based boards
+        items:
+          - const: loongson,ls2k1000-ref
+          - const: loongson,ls2k1000
+
+      - description: Loongson-2K2000 processor based boards
+        items:
+          - const: loongson,ls2k2000-ref
+          - const: loongson,ls2k2000
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/power/reset/nvmem-reboot-mode.yaml b/Documentation/devicetree/bindings/power/reset/nvmem-reboot-mode.yaml
index 14a262bcbf7c..627f8a6078c2 100644
--- a/Documentation/devicetree/bindings/power/reset/nvmem-reboot-mode.yaml
+++ b/Documentation/devicetree/bindings/power/reset/nvmem-reboot-mode.yaml
@@ -28,17 +28,15 @@ properties:
     items:
       - const: reboot-mode
 
-patternProperties:
-  "^mode-.+":
-    $ref: /schemas/types.yaml#/definitions/uint32
-    description: Vendor-specific mode value written to the mode register
+allOf:
+  - $ref: reboot-mode.yaml#
 
 required:
   - compatible
   - nvmem-cells
   - nvmem-cell-names
 
-additionalProperties: false
+unevaluatedProperties: false
 
 examples:
   - |
diff --git a/Documentation/devicetree/bindings/power/reset/qcom,pon.yaml b/Documentation/devicetree/bindings/power/reset/qcom,pon.yaml
index 5e460128b0d1..fc8105a7b9b2 100644
--- a/Documentation/devicetree/bindings/power/reset/qcom,pon.yaml
+++ b/Documentation/devicetree/bindings/power/reset/qcom,pon.yaml
@@ -111,21 +111,24 @@ examples:
    #include <dt-bindings/interrupt-controller/irq.h>
    #include <dt-bindings/input/linux-event-codes.h>
    #include <dt-bindings/spmi/spmi.h>
-   spmi_bus: spmi@c440000 {
+
+   spmi@c440000 {
      reg = <0x0c440000 0x1100>;
      #address-cells = <2>;
      #size-cells = <0>;
-     pmk8350: pmic@0 {
+
+     pmic@0 {
        reg = <0x0 SPMI_USID>;
        #address-cells = <1>;
        #size-cells = <0>;
-       pmk8350_pon: pon_hlos@1300 {
-         reg = <0x1300>;
+
+       pon@800 {
          compatible = "qcom,pm8998-pon";
+         reg = <0x800>;
 
          pwrkey {
             compatible = "qcom,pm8941-pwrkey";
-            interrupts = < 0x0 0x8 0 IRQ_TYPE_EDGE_BOTH >;
+            interrupts = <0x0 0x8 0 IRQ_TYPE_EDGE_BOTH>;
             debounce = <15625>;
             bias-pull-up;
             linux,code = <KEY_POWER>;
diff --git a/Documentation/devicetree/bindings/power/reset/syscon-reboot-mode.yaml b/Documentation/devicetree/bindings/power/reset/syscon-reboot-mode.yaml
index 9b1ffceefe3d..b6acff199cde 100644
--- a/Documentation/devicetree/bindings/power/reset/syscon-reboot-mode.yaml
+++ b/Documentation/devicetree/bindings/power/reset/syscon-reboot-mode.yaml
@@ -29,12 +29,10 @@ properties:
     $ref: /schemas/types.yaml#/definitions/uint32
     description: Offset in the register map for the mode register (in bytes)
 
-patternProperties:
-  "^mode-.+":
-    $ref: /schemas/types.yaml#/definitions/uint32
-    description: Vendor-specific mode value written to the mode register
+allOf:
+  - $ref: reboot-mode.yaml#
 
-additionalProperties: false
+unevaluatedProperties: false
 
 required:
   - compatible
diff --git a/Documentation/devicetree/bindings/power/reset/xlnx,zynqmp-power.yaml b/Documentation/devicetree/bindings/power/reset/xlnx,zynqmp-power.yaml
index 45792e216981..799831636194 100644
--- a/Documentation/devicetree/bindings/power/reset/xlnx,zynqmp-power.yaml
+++ b/Documentation/devicetree/bindings/power/reset/xlnx,zynqmp-power.yaml
@@ -57,7 +57,7 @@ examples:
 
     firmware {
       zynqmp-firmware {
-        zynqmp-power {
+        power-management {
           compatible = "xlnx,zynqmp-power";
           interrupts = <0 35 4>;
         };
@@ -70,7 +70,7 @@ examples:
 
     firmware {
       zynqmp-firmware {
-        zynqmp-power {
+        power-management {
           compatible = "xlnx,zynqmp-power";
           interrupt-parent = <&gic>;
           interrupts = <0 35 4>;
diff --git a/Documentation/devicetree/bindings/power/supply/bq24190.yaml b/Documentation/devicetree/bindings/power/supply/bq24190.yaml
index d3ebc9de8c0b..131b7e57d22f 100644
--- a/Documentation/devicetree/bindings/power/supply/bq24190.yaml
+++ b/Documentation/devicetree/bindings/power/supply/bq24190.yaml
@@ -20,6 +20,7 @@ properties:
       - ti,bq24192
       - ti,bq24192i
       - ti,bq24196
+      - ti,bq24296
 
   reg:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml
index 23646b684ea2..9d8670c00e3b 100644
--- a/Documentation/devicetree/bindings/riscv/cpus.yaml
+++ b/Documentation/devicetree/bindings/riscv/cpus.yaml
@@ -63,8 +63,8 @@ properties:
 
   mmu-type:
     description:
-      Identifies the MMU address translation mode used on this
-      hart.  These values originate from the RISC-V Privileged
+      Identifies the largest MMU address translation mode supported by
+      this hart.  These values originate from the RISC-V Privileged
       Specification document, available from
       https://riscv.org/specifications/
     $ref: /schemas/types.yaml#/definitions/string
@@ -80,6 +80,11 @@ properties:
     description:
       The blocksize in bytes for the Zicbom cache operations.
 
+  riscv,cbop-block-size:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      The blocksize in bytes for the Zicbop cache operations.
+
   riscv,cboz-block-size:
     $ref: /schemas/types.yaml#/definitions/uint32
     description:
diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml
index 27beedb98198..63d81dc895e5 100644
--- a/Documentation/devicetree/bindings/riscv/extensions.yaml
+++ b/Documentation/devicetree/bindings/riscv/extensions.yaml
@@ -48,7 +48,7 @@ properties:
       insensitive, letters in the riscv,isa string must be all
       lowercase.
     $ref: /schemas/types.yaml#/definitions/string
-    pattern: ^rv(?:64|32)imaf?d?q?c?b?k?j?p?v?h?(?:[hsxz](?:[a-z])+)?(?:_[hsxz](?:[a-z])+)*$
+    pattern: ^rv(?:64|32)imaf?d?q?c?b?k?j?p?v?h?(?:[hsxz](?:[0-9a-z])+)?(?:_[hsxz](?:[0-9a-z])+)*$
     deprecated: true
 
   riscv,isa-base:
diff --git a/Documentation/devicetree/bindings/sound/tas2562.yaml b/Documentation/devicetree/bindings/sound/tas2562.yaml
index f01c0dde0cf7..d28c102c0ce7 100644
--- a/Documentation/devicetree/bindings/sound/tas2562.yaml
+++ b/Documentation/devicetree/bindings/sound/tas2562.yaml
@@ -18,7 +18,6 @@ description: |
 
   Specifications about the audio amplifier can be found at:
     https://www.ti.com/lit/gpn/tas2562
-    https://www.ti.com/lit/gpn/tas2563
     https://www.ti.com/lit/gpn/tas2564
     https://www.ti.com/lit/gpn/tas2110
 
@@ -29,7 +28,6 @@ properties:
   compatible:
     enum:
       - ti,tas2562
-      - ti,tas2563
       - ti,tas2564
       - ti,tas2110
 
diff --git a/Documentation/devicetree/bindings/sound/ti,tas2781.yaml b/Documentation/devicetree/bindings/sound/ti,tas2781.yaml
index a69e6c223308..976238689249 100644
--- a/Documentation/devicetree/bindings/sound/ti,tas2781.yaml
+++ b/Documentation/devicetree/bindings/sound/ti,tas2781.yaml
@@ -5,36 +5,46 @@
 $id: http://devicetree.org/schemas/sound/ti,tas2781.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Texas Instruments TAS2781 SmartAMP
+title: Texas Instruments TAS2563/TAS2781 SmartAMP
 
 maintainers:
   - Shenghao Ding <shenghao-ding@ti.com>
 
-description:
-  The TAS2781 is a mono, digital input Class-D audio amplifier
-  optimized for efficiently driving high peak power into small
-  loudspeakers. An integrated on-chip DSP supports Texas Instruments
-  Smart Amp speaker protection algorithm. The integrated speaker
-  voltage and current sense provides for real time
+description: |
+  The TAS2563/TAS2781 is a mono, digital input Class-D audio
+  amplifier optimized for efficiently driving high peak power into
+  small loudspeakers. An integrated on-chip DSP supports Texas
+  Instruments Smart Amp speaker protection algorithm. The
+  integrated speaker voltage and current sense provides for real time
   monitoring of loudspeaker behavior.
 
-allOf:
-  - $ref: dai-common.yaml#
+  Specifications about the audio amplifier can be found at:
+    https://www.ti.com/lit/gpn/tas2563
+    https://www.ti.com/lit/gpn/tas2781
 
 properties:
   compatible:
-    enum:
-      - ti,tas2781
+    description: |
+      ti,tas2563: 6.1-W Boosted Class-D Audio Amplifier With Integrated
+      DSP and IV Sense, 16/20/24/32bit stereo I2S or multichannel TDM.
+
+      ti,tas2781: 24-V Class-D Amplifier with Real Time Integrated Speaker
+      Protection and Audio Processing, 16/20/24/32bit stereo I2S or
+      multichannel TDM.
+    oneOf:
+      - items:
+          - enum:
+              - ti,tas2563
+          - const: ti,tas2781
+      - enum:
+          - ti,tas2781
 
   reg:
     description:
-      I2C address, in multiple tas2781s case, all the i2c address
+      I2C address, in multiple-AMP case, all the i2c address
       aggregate as one Audio Device to support multiple audio slots.
     maxItems: 8
     minItems: 1
-    items:
-      minimum: 0x38
-      maximum: 0x3f
 
   reset-gpios:
     maxItems: 1
@@ -49,6 +59,44 @@ required:
   - compatible
   - reg
 
+allOf:
+  - $ref: dai-common.yaml#
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - ti,tas2563
+    then:
+      properties:
+        reg:
+          description:
+            I2C address, in multiple-AMP case, all the i2c address
+            aggregate as one Audio Device to support multiple audio slots.
+          maxItems: 4
+          minItems: 1
+          items:
+            minimum: 0x4c
+            maximum: 0x4f
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - ti,tas2781
+    then:
+      properties:
+        reg:
+          description:
+            I2C address, in multiple-AMP case, all the i2c address
+            aggregate as one Audio Device to support multiple audio slots.
+          maxItems: 8
+          minItems: 1
+          items:
+            minimum: 0x38
+            maximum: 0x3f
+
 additionalProperties: false
 
 examples:
diff --git a/Documentation/devicetree/bindings/timer/sifive,clint.yaml b/Documentation/devicetree/bindings/timer/sifive,clint.yaml
index 4b6c20fc8194..fced6f2d8ecb 100644
--- a/Documentation/devicetree/bindings/timer/sifive,clint.yaml
+++ b/Documentation/devicetree/bindings/timer/sifive,clint.yaml
@@ -33,6 +33,7 @@ properties:
               - sifive,fu540-c000-clint # SiFive FU540
               - starfive,jh7100-clint   # StarFive JH7100
               - starfive,jh7110-clint   # StarFive JH7110
+              - starfive,jh8100-clint   # StarFive JH8100
           - const: sifive,clint0        # SiFive CLINT v0 IP block
       - items:
           - enum:
diff --git a/Documentation/devicetree/bindings/timer/thead,c900-aclint-mtimer.yaml b/Documentation/devicetree/bindings/timer/thead,c900-aclint-mtimer.yaml
index fbd235650e52..2e92bcdeb423 100644
--- a/Documentation/devicetree/bindings/timer/thead,c900-aclint-mtimer.yaml
+++ b/Documentation/devicetree/bindings/timer/thead,c900-aclint-mtimer.yaml
@@ -17,7 +17,12 @@ properties:
       - const: thead,c900-aclint-mtimer
 
   reg:
-    maxItems: 1
+    items:
+      - description: MTIMECMP Registers
+
+  reg-names:
+    items:
+      - const: mtimecmp
 
   interrupts-extended:
     minItems: 1
@@ -28,6 +33,7 @@ additionalProperties: false
 required:
   - compatible
   - reg
+  - reg-names
   - interrupts-extended
 
 examples:
@@ -39,5 +45,6 @@ examples:
                             <&cpu3intc 7>,
                             <&cpu4intc 7>;
       reg = <0xac000000 0x00010000>;
+      reg-names = "mtimecmp";
     };
 ...
diff --git a/Documentation/features/vm/TLB/arch-support.txt b/Documentation/features/vm/TLB/arch-support.txt
index 8fd22073a847..d222bd3ee749 100644
--- a/Documentation/features/vm/TLB/arch-support.txt
+++ b/Documentation/features/vm/TLB/arch-support.txt
@@ -20,7 +20,7 @@
     |    openrisc: |  ..  |
     |      parisc: | TODO |
     |     powerpc: | TODO |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: | TODO |
     |          sh: | TODO |
     |       sparc: | TODO |
diff --git a/Documentation/filesystems/netfs_library.rst b/Documentation/filesystems/netfs_library.rst
index 48b95d04f72d..4cc657d743f7 100644
--- a/Documentation/filesystems/netfs_library.rst
+++ b/Documentation/filesystems/netfs_library.rst
@@ -295,7 +295,6 @@ through which it can issue requests and negotiate::
 	struct netfs_request_ops {
 		void (*init_request)(struct netfs_io_request *rreq, struct file *file);
 		void (*free_request)(struct netfs_io_request *rreq);
-		int (*begin_cache_operation)(struct netfs_io_request *rreq);
 		void (*expand_readahead)(struct netfs_io_request *rreq);
 		bool (*clamp_length)(struct netfs_io_subrequest *subreq);
 		void (*issue_read)(struct netfs_io_subrequest *subreq);
@@ -317,20 +316,6 @@ The operations are as follows:
    [Optional] This is called as the request is being deallocated so that the
    filesystem can clean up any state it has attached there.
 
- * ``begin_cache_operation()``
-
-   [Optional] This is called to ask the network filesystem to call into the
-   cache (if present) to initialise the caching state for this read.  The netfs
-   library module cannot access the cache directly, so the cache should call
-   something like fscache_begin_read_operation() to do this.
-
-   The cache gets to store its state in ->cache_resources and must set a table
-   of operations of its own there (though of a different type).
-
-   This should return 0 on success and an error code otherwise.  If an error is
-   reported, the operation may proceed anyway, just without local caching (only
-   out of memory and interruption errors cause failure here).
-
  * ``expand_readahead()``
 
    [Optional] This is called to allow the filesystem to expand the size of a
@@ -460,14 +445,14 @@ When implementing a local cache to be used by the read helpers, two things are
 required: some way for the network filesystem to initialise the caching for a
 read request and a table of operations for the helpers to call.
 
-The network filesystem's ->begin_cache_operation() method is called to set up a
-cache and this must call into the cache to do the work.  If using fscache, for
-example, the cache would call::
+To begin a cache operation on an fscache object, the following function is
+called::
 
 	int fscache_begin_read_operation(struct netfs_io_request *rreq,
 					 struct fscache_cookie *cookie);
 
-passing in the request pointer and the cookie corresponding to the file.
+passing in the request pointer and the cookie corresponding to the file.  This
+fills in the cache resources mentioned below.
 
 The netfs_io_request object contains a place for the cache to hang its
 state::
diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst
index 1c244866041a..165514401441 100644
--- a/Documentation/filesystems/overlayfs.rst
+++ b/Documentation/filesystems/overlayfs.rst
@@ -145,7 +145,9 @@ filesystem, an overlay filesystem needs to record in the upper filesystem
 that files have been removed.  This is done using whiteouts and opaque
 directories (non-directories are always opaque).
 
-A whiteout is created as a character device with 0/0 device number.
+A whiteout is created as a character device with 0/0 device number or
+as a zero-size regular file with the xattr "trusted.overlay.whiteout".
+
 When a whiteout is found in the upper level of a merged directory, any
 matching name in the lower level is ignored, and the whiteout itself
 is also hidden.
@@ -154,6 +156,13 @@ A directory is made opaque by setting the xattr "trusted.overlay.opaque"
 to "y".  Where the upper filesystem contains an opaque directory, any
 directory in the lower filesystem with the same name is ignored.
 
+An opaque directory should not conntain any whiteouts, because they do not
+serve any purpose.  A merge directory containing regular files with the xattr
+"trusted.overlay.whiteout", should be additionally marked by setting the xattr
+"trusted.overlay.opaque" to "x" on the merge directory itself.
+This is needed to avoid the overhead of checking the "trusted.overlay.whiteout"
+on all entries during readdir in the common case.
+
 readdir
 -------
 
@@ -534,8 +543,9 @@ A lower dir with a regular whiteout will always be handled by the overlayfs
 mount, so to support storing an effective whiteout file in an overlayfs mount an
 alternative form of whiteout is supported. This form is a regular, zero-size
 file with the "overlay.whiteout" xattr set, inside a directory with the
-"overlay.whiteouts" xattr set. Such whiteouts are never created by overlayfs,
-but can be used by userspace tools (like containers) that generate lower layers.
+"overlay.opaque" xattr set to "x" (see `whiteouts and opaque directories`_).
+These alternative whiteouts are never created by overlayfs, but can be used by
+userspace tools (like containers) that generate lower layers.
 These alternative whiteouts can be escaped using the standard xattr escape
 mechanism in order to properly nest to any depth.
 
diff --git a/Documentation/filesystems/smb/ksmbd.rst b/Documentation/filesystems/smb/ksmbd.rst
index 7bed96d794fc..6b30e43a0d11 100644
--- a/Documentation/filesystems/smb/ksmbd.rst
+++ b/Documentation/filesystems/smb/ksmbd.rst
@@ -73,15 +73,14 @@ Auto Negotiation               Supported.
 Compound Request               Supported.
 Oplock Cache Mechanism         Supported.
 SMB2 leases(v1 lease)          Supported.
-Directory leases(v2 lease)     Planned for future.
+Directory leases(v2 lease)     Supported.
 Multi-credits                  Supported.
 NTLM/NTLMv2                    Supported.
 HMAC-SHA256 Signing            Supported.
 Secure negotiate               Supported.
 Signing Update                 Supported.
 Pre-authentication integrity   Supported.
-SMB3 encryption(CCM, GCM)      Supported. (CCM and GCM128 supported, GCM256 in
-                               progress)
+SMB3 encryption(CCM, GCM)      Supported. (CCM/GCM128 and CCM/GCM256 supported)
 SMB direct(RDMA)               Supported.
 SMB3 Multi-channel             Partially Supported. Planned to implement
                                replay/retry mechanisms for future.
@@ -112,6 +111,10 @@ DCE/RPC support                Partially Supported. a few calls(NetShareEnumAll,
                                for Witness protocol e.g.)
 ksmbd/nfsd interoperability    Planned for future. The features that ksmbd
                                support are Leases, Notify, ACLs and Share modes.
+SMB3.1.1 Compression           Planned for future.
+SMB3.1.1 over QUIC             Planned for future.
+Signing/Encryption over RDMA   Planned for future.
+SMB3.1.1 GMAC signing support  Planned for future.
 ============================== =================================================
 
 
diff --git a/Documentation/process/4.Coding.rst b/Documentation/process/4.Coding.rst
index 1f0d81f44e14..c2046dec0c2f 100644
--- a/Documentation/process/4.Coding.rst
+++ b/Documentation/process/4.Coding.rst
@@ -66,6 +66,10 @@ for aligning variables/macros, for reflowing text and other similar tasks.
 See the file :ref:`Documentation/process/clang-format.rst <clangformat>`
 for more details.
 
+Some basic editor settings, such as indentation and line endings, will be
+set automatically if you are using an editor that is compatible with
+EditorConfig. See the official EditorConfig website for more information:
+https://editorconfig.org/
 
 Abstraction layers
 ******************
diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst
index 6db37a46d305..c48382c6b477 100644
--- a/Documentation/process/coding-style.rst
+++ b/Documentation/process/coding-style.rst
@@ -735,6 +735,10 @@ for aligning variables/macros, for reflowing text and other similar tasks.
 See the file :ref:`Documentation/process/clang-format.rst <clangformat>`
 for more details.
 
+Some basic editor settings, such as indentation and line endings, will be
+set automatically if you are using an editor that is compatible with
+EditorConfig. See the official EditorConfig website for more information:
+https://editorconfig.org/
 
 10) Kconfig configuration files
 -------------------------------
diff --git a/Documentation/rust/arch-support.rst b/Documentation/rust/arch-support.rst
index b91e9ef4d0c2..73203ba1e901 100644
--- a/Documentation/rust/arch-support.rst
+++ b/Documentation/rust/arch-support.rst
@@ -12,10 +12,11 @@ which uses ``libclang``.
 Below is a general summary of architectures that currently work. Level of
 support corresponds to ``S`` values in the ``MAINTAINERS`` file.
 
-============  ================  ==============================================
-Architecture  Level of support  Constraints
-============  ================  ==============================================
-``um``        Maintained        ``x86_64`` only.
-``x86``       Maintained        ``x86_64`` only.
-============  ================  ==============================================
+=============  ================  ==============================================
+Architecture   Level of support  Constraints
+=============  ================  ==============================================
+``loongarch``  Maintained        -
+``um``         Maintained        ``x86_64`` only.
+``x86``        Maintained        ``x86_64`` only.
+=============  ================  ==============================================
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 8709c7cd3656..92152ac346c8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3692,6 +3692,13 @@ L:	bpf@vger.kernel.org
 S:	Supported
 F:	arch/arm64/net/
 
+BPF JIT for LOONGARCH
+M:	Tiezhu Yang <yangtiezhu@loongson.cn>
+R:	Hengqi Chen <hengqi.chen@gmail.com>
+L:	bpf@vger.kernel.org
+S:	Maintained
+F:	arch/loongarch/net/
+
 BPF JIT for MIPS (32-BIT AND 64-BIT)
 M:	Johan Almbladh <johan.almbladh@anyfinetworks.com>
 M:	Paul Burton <paulburton@kernel.org>
@@ -4540,7 +4547,7 @@ F:	drivers/net/ieee802154/ca8210.c
 
 CACHEFILES: FS-CACHE BACKEND FOR CACHING ON MOUNTED FILESYSTEMS
 M:	David Howells <dhowells@redhat.com>
-L:	linux-cachefs@redhat.com (moderated for non-subscribers)
+L:	netfs@lists.linux.dev
 S:	Supported
 F:	Documentation/filesystems/caching/cachefiles.rst
 F:	fs/cachefiles/
@@ -5229,7 +5236,7 @@ X:	drivers/clk/clkdev.c
 COMMON INTERNET FILE SYSTEM CLIENT (CIFS and SMB3)
 M:	Steve French <sfrench@samba.org>
 R:	Paulo Alcantara <pc@manguebit.com> (DFS, global name space)
-R:	Ronnie Sahlberg <lsahlber@redhat.com> (directory leases, sparse files)
+R:	Ronnie Sahlberg <ronniesahlberg@gmail.com> (directory leases, sparse files)
 R:	Shyam Prasad N <sprasad@microsoft.com> (multichannel)
 R:	Tom Talpey <tom@talpey.com> (RDMA, smbdirect)
 L:	linux-cifs@vger.kernel.org
@@ -7948,12 +7955,13 @@ L:	rust-for-linux@vger.kernel.org
 S:	Maintained
 F:	rust/kernel/net/phy.rs
 
-EXEC & BINFMT API
+EXEC & BINFMT API, ELF
 R:	Eric Biederman <ebiederm@xmission.com>
 R:	Kees Cook <keescook@chromium.org>
 L:	linux-mm@kvack.org
 S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/execve
+F:	Documentation/userspace-api/ELF.rst
 F:	fs/*binfmt_*.c
 F:	fs/exec.c
 F:	include/linux/binfmts.h
@@ -8214,6 +8222,20 @@ S:	Supported
 F:	fs/iomap/
 F:	include/linux/iomap.h
 
+FILESYSTEMS [NETFS LIBRARY]
+M:	David Howells <dhowells@redhat.com>
+R:	Jeff Layton <jlayton@kernel.org>
+L:	netfs@lists.linux.dev
+L:	linux-fsdevel@vger.kernel.org
+S:	Supported
+F:	Documentation/filesystems/caching/
+F:	Documentation/filesystems/netfs_library.rst
+F:	fs/netfs/
+F:	include/linux/fscache*.h
+F:	include/linux/netfs.h
+F:	include/trace/events/fscache.h
+F:	include/trace/events/netfs.h
+
 FILESYSTEMS [STACKABLE]
 M:	Miklos Szeredi <miklos@szeredi.hu>
 M:	Amir Goldstein <amir73il@gmail.com>
@@ -8659,14 +8681,6 @@ F:	Documentation/power/freezing-of-tasks.rst
 F:	include/linux/freezer.h
 F:	kernel/freezer.c
 
-FS-CACHE: LOCAL CACHING FOR NETWORK FILESYSTEMS
-M:	David Howells <dhowells@redhat.com>
-L:	linux-cachefs@redhat.com (moderated for non-subscribers)
-S:	Supported
-F:	Documentation/filesystems/caching/
-F:	fs/fscache/
-F:	include/linux/fscache*.h
-
 FSCRYPT: FILE SYSTEM LEVEL ENCRYPTION SUPPORT
 M:	Eric Biggers <ebiggers@kernel.org>
 M:	Theodore Y. Ts'o <tytso@mit.edu>
@@ -12627,6 +12641,13 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/gpio/loongson,ls-gpio.yaml
 F:	drivers/gpio/gpio-loongson-64bit.c
 
+LOONGSON LS2X APB DMA DRIVER
+M:	Binbin Zhou <zhoubinbin@loongson.cn>
+L:	dmaengine@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/dma/loongson,ls2x-apbdma.yaml
+F:	drivers/dma/ls2x-apb-dma.c
+
 LOONGSON LS2X I2C DRIVER
 M:	Binbin Zhou <zhoubinbin@loongson.cn>
 L:	linux-i2c@vger.kernel.org
@@ -17128,10 +17149,10 @@ PERFORMANCE EVENTS SUBSYSTEM
 M:	Peter Zijlstra <peterz@infradead.org>
 M:	Ingo Molnar <mingo@redhat.com>
 M:	Arnaldo Carvalho de Melo <acme@kernel.org>
+M:	Namhyung Kim <namhyung@kernel.org>
 R:	Mark Rutland <mark.rutland@arm.com>
 R:	Alexander Shishkin <alexander.shishkin@linux.intel.com>
 R:	Jiri Olsa <jolsa@kernel.org>
-R:	Namhyung Kim <namhyung@kernel.org>
 R:	Ian Rogers <irogers@google.com>
 R:	Adrian Hunter <adrian.hunter@intel.com>
 L:	linux-perf-users@vger.kernel.org
diff --git a/Makefile b/Makefile
index f1b2fd977275..9f9b76d3a4b7 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 VERSION = 6
-PATCHLEVEL = 7
+PATCHLEVEL = 8
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
 NAME = Hurr durr I'ma ninja sloth
 
 # *DOCUMENTATION*
@@ -155,6 +155,15 @@ endif
 
 export KBUILD_EXTMOD
 
+# backward compatibility
+KBUILD_EXTRA_WARN ?= $(KBUILD_ENABLE_EXTRA_GCC_CHECKS)
+
+ifeq ("$(origin W)", "command line")
+  KBUILD_EXTRA_WARN := $(W)
+endif
+
+export KBUILD_EXTRA_WARN
+
 # Kbuild will save output files in the current working directory.
 # This does not need to match to the root of the kernel source tree.
 #
@@ -181,14 +190,11 @@ ifeq ("$(origin O)", "command line")
 endif
 
 ifneq ($(KBUILD_OUTPUT),)
-# Make's built-in functions such as $(abspath ...), $(realpath ...) cannot
-# expand a shell special character '~'. We use a somewhat tedious way here.
-abs_objtree := $(shell mkdir -p $(KBUILD_OUTPUT) && cd $(KBUILD_OUTPUT) && pwd)
-$(if $(abs_objtree),, \
-     $(error failed to create output directory "$(KBUILD_OUTPUT)"))
-
+# $(realpath ...) gets empty if the path does not exist. Run 'mkdir -p' first.
+$(shell mkdir -p "$(KBUILD_OUTPUT)")
 # $(realpath ...) resolves symlinks
-abs_objtree := $(realpath $(abs_objtree))
+abs_objtree := $(realpath $(KBUILD_OUTPUT))
+$(if $(abs_objtree),,$(error failed to create output directory "$(KBUILD_OUTPUT)"))
 endif # ifneq ($(KBUILD_OUTPUT),)
 
 ifneq ($(words $(subst :, ,$(abs_srctree))), 1)
@@ -609,8 +615,6 @@ export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL KBUILD_RUSTFLAGS_KERNEL
 export RCS_FIND_IGNORE := \( -name SCCS -o -name BitKeeper -o -name .svn -o    \
 			  -name CVS -o -name .pc -o -name .hg -o -name .git \) \
 			  -prune -o
-export RCS_TAR_IGNORE := --exclude SCCS --exclude BitKeeper --exclude .svn \
-			 --exclude CVS --exclude .pc --exclude .hg --exclude .git
 
 # ===========================================================================
 # Rules shared between *config targets and build targets
@@ -982,6 +986,10 @@ NOSTDINC_FLAGS += -nostdinc
 # perform bounds checking.
 KBUILD_CFLAGS += $(call cc-option, -fstrict-flex-arrays=3)
 
+#Currently, disable -Wstringop-overflow for GCC 11, globally.
+KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-option, -Wno-stringop-overflow)
+KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow)
+
 # disable invalid "can't wrap" optimizations for signed / pointers
 KBUILD_CFLAGS	+= -fno-strict-overflow
 
@@ -1662,6 +1670,7 @@ help:
 	@echo  '		1: warnings which may be relevant and do not occur too often'
 	@echo  '		2: warnings which occur quite often but may still be relevant'
 	@echo  '		3: more obscure warnings, can most likely be ignored'
+	@echo  '		c: extra checks in the configuration stage (Kconfig)'
 	@echo  '		e: warnings are being treated as errors'
 	@echo  '		Multiple levels can be combined with W=12 or W=123'
 	@$(if $(dtstree), \
diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig
index feb38a94c1a7..43bc1255a5db 100644
--- a/arch/arm/configs/mxs_defconfig
+++ b/arch/arm/configs/mxs_defconfig
@@ -138,7 +138,8 @@ CONFIG_PWM_MXS=y
 CONFIG_NVMEM_MXS_OCOTP=y
 CONFIG_EXT4_FS=y
 # CONFIG_DNOTIFY is not set
-CONFIG_FSCACHE=m
+CONFIG_NETFS_SUPPORT=m
+CONFIG_FSCACHE=y
 CONFIG_FSCACHE_STATS=y
 CONFIG_CACHEFILES=m
 CONFIG_VFAT_FS=y
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index ea01a2c43efa..aa7c1d435139 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1039,8 +1039,12 @@ config ARM64_ERRATUM_2645198
 
 	  If unsure, say Y.
 
+config ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
+	bool
+
 config ARM64_ERRATUM_2966298
 	bool "Cortex-A520: 2966298: workaround for speculatively executed unprivileged load"
+	select ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
 	default y
 	help
 	  This option adds the workaround for ARM Cortex-A520 erratum 2966298.
@@ -1052,6 +1056,20 @@ config ARM64_ERRATUM_2966298
 
 	  If unsure, say Y.
 
+config ARM64_ERRATUM_3117295
+	bool "Cortex-A510: 3117295: workaround for speculatively executed unprivileged load"
+	select ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
+	default y
+	help
+	  This option adds the workaround for ARM Cortex-A510 erratum 3117295.
+
+	  On an affected Cortex-A510 core, a speculatively executed unprivileged
+	  load might leak data from a privileged level via a cache side channel.
+
+	  Work around this problem by executing a TLBI before returning to EL0.
+
+	  If unsure, say Y.
+
 config CAVIUM_ERRATUM_22375
 	bool "Cavium erratum 22375, 24313"
 	default y
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 7b1975bf4b90..513787e43329 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -760,32 +760,25 @@ alternative_endif
 .endm
 
 	/*
-	 * Check whether preempt/bh-disabled asm code should yield as soon as
-	 * it is able. This is the case if we are currently running in task
-	 * context, and either a softirq is pending, or the TIF_NEED_RESCHED
-	 * flag is set and re-enabling preemption a single time would result in
-	 * a preempt count of zero. (Note that the TIF_NEED_RESCHED flag is
-	 * stored negated in the top word of the thread_info::preempt_count
+	 * Check whether asm code should yield as soon as it is able. This is
+	 * the case if we are currently running in task context, and the
+	 * TIF_NEED_RESCHED flag is set. (Note that the TIF_NEED_RESCHED flag
+	 * is stored negated in the top word of the thread_info::preempt_count
 	 * field)
 	 */
-	.macro		cond_yield, lbl:req, tmp:req, tmp2:req
+	.macro		cond_yield, lbl:req, tmp:req, tmp2
+#ifdef CONFIG_PREEMPT_VOLUNTARY
 	get_current_task \tmp
 	ldr		\tmp, [\tmp, #TSK_TI_PREEMPT]
 	/*
 	 * If we are serving a softirq, there is no point in yielding: the
 	 * softirq will not be preempted no matter what we do, so we should
-	 * run to completion as quickly as we can.
+	 * run to completion as quickly as we can. The preempt_count field will
+	 * have BIT(SOFTIRQ_SHIFT) set in this case, so the zero check will
+	 * catch this case too.
 	 */
-	tbnz		\tmp, #SOFTIRQ_SHIFT, .Lnoyield_\@
-#ifdef CONFIG_PREEMPTION
-	sub		\tmp, \tmp, #PREEMPT_DISABLE_OFFSET
 	cbz		\tmp, \lbl
 #endif
-	adr_l		\tmp, irq_stat + IRQ_CPUSTAT_SOFTIRQ_PENDING
-	get_this_cpu_offset	\tmp2
-	ldr		w\tmp, [\tmp, \tmp2]
-	cbnz		w\tmp, \lbl	// yield on pending softirq in task context
-.Lnoyield_\@:
 	.endm
 
 /*
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index 50ce8b697ff3..e93548914c36 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -4,6 +4,8 @@
 
 #ifndef __ASSEMBLER__
 
+#include <linux/cpumask.h>
+
 #include <asm-generic/irq.h>
 
 void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu);
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index d95b3d6b471a..e5d03a7039b4 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -73,7 +73,13 @@ obj-$(CONFIG_ARM64_MTE)			+= mte.o
 obj-y					+= vdso-wrap.o
 obj-$(CONFIG_COMPAT_VDSO)		+= vdso32-wrap.o
 obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS)	+= patch-scs.o
-CFLAGS_patch-scs.o			+= -mbranch-protection=none
+
+# We need to prevent the SCS patching code from patching itself. Using
+# -mbranch-protection=none here to avoid the patchable PAC opcodes from being
+# generated triggers an issue with full LTO on Clang, which stops emitting PAC
+# instructions altogether. So instead, omit the unwind tables used by the
+# patching code, so it will not be able to locate its own PAC instructions.
+CFLAGS_patch-scs.o			+= -fno-asynchronous-unwind-tables -fno-unwind-tables
 
 # Force dependency (vdso*-wrap.S includes vdso.so through incbin)
 $(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 5ff1942b04fc..5a7dbbe0ce63 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -117,8 +117,6 @@ int main(void)
   DEFINE(DMA_FROM_DEVICE,	DMA_FROM_DEVICE);
   BLANK();
   DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET);
-  DEFINE(SOFTIRQ_SHIFT, SOFTIRQ_SHIFT);
-  DEFINE(IRQ_CPUSTAT_SOFTIRQ_PENDING, offsetof(irq_cpustat_t, __softirq_pending));
   BLANK();
   DEFINE(CPU_BOOT_TASK,		offsetof(struct secondary_data, task));
   BLANK();
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index e29e0fea63fb..967c7c7a4e7d 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -416,6 +416,19 @@ static struct midr_range broken_aarch32_aes[] = {
 };
 #endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */
 
+#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
+static const struct midr_range erratum_spec_unpriv_load_list[] = {
+#ifdef CONFIG_ARM64_ERRATUM_3117295
+	MIDR_ALL_VERSIONS(MIDR_CORTEX_A510),
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_2966298
+	/* Cortex-A520 r0p0 to r0p1 */
+	MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1),
+#endif
+	{},
+};
+#endif
+
 const struct arm64_cpu_capabilities arm64_errata[] = {
 #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
 	{
@@ -713,12 +726,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		MIDR_FIXED(MIDR_CPU_VAR_REV(1,1), BIT(25)),
 	},
 #endif
-#ifdef CONFIG_ARM64_ERRATUM_2966298
+#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
 	{
-		.desc = "ARM erratum 2966298",
-		.capability = ARM64_WORKAROUND_2966298,
+		.desc = "ARM errata 2966298, 3117295",
+		.capability = ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD,
 		/* Cortex-A520 r0p0 - r0p1 */
-		ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1),
+		ERRATA_MIDR_RANGE_LIST(erratum_spec_unpriv_load_list),
 	},
 #endif
 #ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index a6030913cd58..7ef0e127b149 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -428,16 +428,9 @@ alternative_else_nop_endif
 	ldp	x28, x29, [sp, #16 * 14]
 
 	.if	\el == 0
-alternative_if ARM64_WORKAROUND_2966298
-	tlbi	vale1, xzr
-	dsb	nsh
-alternative_else_nop_endif
-alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
-	ldr	lr, [sp, #S_LR]
-	add	sp, sp, #PT_REGS_SIZE		// restore sp
-	eret
-alternative_else_nop_endif
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+	alternative_insn "b .L_skip_tramp_exit_\@", nop, ARM64_UNMAP_KERNEL_AT_EL0
+
 	msr	far_el1, x29
 
 	ldr_this_cpu	x30, this_cpu_vector, x29
@@ -446,16 +439,26 @@ alternative_else_nop_endif
 	ldr		lr, [sp, #S_LR]		// restore x30
 	add		sp, sp, #PT_REGS_SIZE	// restore sp
 	br		x29
+
+.L_skip_tramp_exit_\@:
 #endif
-	.else
+	.endif
+
 	ldr	lr, [sp, #S_LR]
 	add	sp, sp, #PT_REGS_SIZE		// restore sp
 
+	.if \el == 0
+	/* This must be after the last explicit memory access */
+alternative_if ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
+	tlbi	vale1, xzr
+	dsb	nsh
+alternative_else_nop_endif
+	.else
 	/* Ensure any device/NC reads complete */
 	alternative_insn nop, "dmb sy", ARM64_WORKAROUND_1508412
+	.endif
 
 	eret
-	.endif
 	sb
 	.endm
 
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 505f389be3e0..a5dc6f764195 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -898,10 +898,8 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
 	 * allocate SVE now in case it is needed for use in streaming
 	 * mode.
 	 */
-	if (system_supports_sve()) {
-		sve_free(task);
-		sve_alloc(task, true);
-	}
+	sve_free(task);
+	sve_alloc(task, true);
 
 	if (free_sme)
 		sme_free(task);
@@ -1219,8 +1217,10 @@ void fpsimd_release_task(struct task_struct *dead_task)
  */
 void sme_alloc(struct task_struct *task, bool flush)
 {
-	if (task->thread.sme_state && flush) {
-		memset(task->thread.sme_state, 0, sme_state_size(task));
+	if (task->thread.sme_state) {
+		if (flush)
+			memset(task->thread.sme_state, 0,
+			       sme_state_size(task));
 		return;
 	}
 
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 09bb7fc7d3c2..dc6cf0e37194 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1108,12 +1108,13 @@ static int za_set(struct task_struct *target,
 		}
 	}
 
-	/* Allocate/reinit ZA storage */
-	sme_alloc(target, true);
-	if (!target->thread.sme_state) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	/*
+	 * Only flush the storage if PSTATE.ZA was not already set,
+	 * otherwise preserve any existing data.
+	 */
+	sme_alloc(target, !thread_za_enabled(&target->thread));
+	if (!target->thread.sme_state)
+		return -ENOMEM;
 
 	/* If there is no data then disable ZA */
 	if (!count) {
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 1e07d74d7a6c..b912b1409fc0 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -84,7 +84,6 @@ WORKAROUND_2077057
 WORKAROUND_2457168
 WORKAROUND_2645198
 WORKAROUND_2658417
-WORKAROUND_2966298
 WORKAROUND_AMPERE_AC03_CPU_38
 WORKAROUND_TRBE_OVERWRITE_FILL_MODE
 WORKAROUND_TSB_FLUSH_FAILURE
@@ -100,3 +99,4 @@ WORKAROUND_NVIDIA_CARMEL_CNP
 WORKAROUND_QCOM_FALKOR_E1003
 WORKAROUND_REPEAT_TLBI
 WORKAROUND_SPECULATIVE_AT
+WORKAROUND_SPECULATIVE_UNPRIV_LOAD
diff --git a/arch/csky/configs/defconfig b/arch/csky/configs/defconfig
index af722e4dfb47..ff559e5162aa 100644
--- a/arch/csky/configs/defconfig
+++ b/arch/csky/configs/defconfig
@@ -34,7 +34,8 @@ CONFIG_GENERIC_PHY=y
 CONFIG_EXT4_FS=y
 CONFIG_FANOTIFY=y
 CONFIG_QUOTA=y
-CONFIG_FSCACHE=m
+CONFIG_NETFS_SUPPORT=m
+CONFIG_FSCACHE=y
 CONFIG_FSCACHE_STATS=y
 CONFIG_CACHEFILES=m
 CONFIG_MSDOS_FS=y
diff --git a/arch/loongarch/Kbuild b/arch/loongarch/Kbuild
index beb8499dd8ed..bfa21465d83a 100644
--- a/arch/loongarch/Kbuild
+++ b/arch/loongarch/Kbuild
@@ -4,6 +4,7 @@ obj-y += net/
 obj-y += vdso/
 
 obj-$(CONFIG_KVM) += kvm/
+obj-$(CONFIG_BUILTIN_DTB) += boot/dts/
 
 # for cleaning
 subdir- += boot
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 15d05dd2b7f3..10959e6c3583 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -142,6 +142,7 @@ config LOONGARCH
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RETHOOK
 	select HAVE_RSEQ
+	select HAVE_RUST
 	select HAVE_SAMPLE_FTRACE_DIRECT
 	select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
 	select HAVE_SETUP_PER_CPU_AREA if NUMA
@@ -376,6 +377,24 @@ config CMDLINE_FORCE
 
 endchoice
 
+config BUILTIN_DTB
+	bool "Enable built-in dtb in kernel"
+	depends on OF
+	help
+	  Some existing systems do not provide a canonical device tree to
+	  the kernel at boot time. Let's provide a device tree table in the
+	  kernel, keyed by the dts filename, containing the relevant DTBs.
+
+	  Built-in DTBs are generic enough and can be used as references.
+
+config BUILTIN_DTB_NAME
+	string "Source file for built-in dtb"
+	depends on BUILTIN_DTB
+	help
+	  Base name (without suffix, relative to arch/loongarch/boot/dts/)
+	  for the DTS file that will be used to produce the DTB linked into
+	  the kernel.
+
 config DMI
 	bool "Enable DMI scanning"
 	select DMI_SCAN_MACHINE_NON_EFI_FALLBACK
@@ -577,6 +596,9 @@ config ARCH_SELECTS_CRASH_DUMP
 	depends on CRASH_DUMP
 	select RELOCATABLE
 
+config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
+	def_bool CRASH_CORE
+
 config RELOCATABLE
 	bool "Relocatable kernel"
 	help
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index 4ba8d67ddb09..983aa2b1629a 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -6,6 +6,7 @@
 boot	:= arch/loongarch/boot
 
 KBUILD_DEFCONFIG := loongson3_defconfig
+KBUILD_DTBS      := dtbs
 
 image-name-y			:= vmlinux
 image-name-$(CONFIG_EFI_ZBOOT)	:= vmlinuz
@@ -81,8 +82,11 @@ KBUILD_AFLAGS_MODULE		+= -Wa,-mla-global-with-abs
 KBUILD_CFLAGS_MODULE		+= -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
 endif
 
+KBUILD_RUSTFLAGS_MODULE		+= -Crelocation-model=pic
+
 ifeq ($(CONFIG_RELOCATABLE),y)
 KBUILD_CFLAGS_KERNEL		+= -fPIE
+KBUILD_RUSTFLAGS_KERNEL		+= -Crelocation-model=pie
 LDFLAGS_vmlinux			+= -static -pie --no-dynamic-linker -z notext $(call ld-option, --apply-dynamic-relocs)
 endif
 
@@ -141,7 +145,7 @@ endif
 
 vdso-install-y += arch/loongarch/vdso/vdso.so.dbg
 
-all:	$(notdir $(KBUILD_IMAGE))
+all:	$(notdir $(KBUILD_IMAGE)) $(KBUILD_DTBS)
 
 vmlinuz.efi: vmlinux.efi
 
diff --git a/arch/loongarch/boot/dts/Makefile b/arch/loongarch/boot/dts/Makefile
index 5f1f55e911ad..747d0c3f6389 100644
--- a/arch/loongarch/boot/dts/Makefile
+++ b/arch/loongarch/boot/dts/Makefile
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
-dtstree	:= $(srctree)/$(src)
 
-dtb-y := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
+dtb-y = loongson-2k0500-ref.dtb loongson-2k1000-ref.dtb loongson-2k2000-ref.dtb
+
+obj-$(CONFIG_BUILTIN_DTB)	+= $(addsuffix .dtb.o, $(CONFIG_BUILTIN_DTB_NAME))
diff --git a/arch/loongarch/boot/dts/loongson-2k0500-ref.dts b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts
new file mode 100644
index 000000000000..b38071a4d0b0
--- /dev/null
+++ b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+/dts-v1/;
+
+#include "loongson-2k0500.dtsi"
+
+/ {
+	compatible = "loongson,ls2k0500-ref", "loongson,ls2k0500";
+	model = "Loongson-2K0500 Reference Board";
+
+	aliases {
+		ethernet0 = &gmac0;
+		ethernet1 = &gmac1;
+		serial0 = &uart0;
+	};
+
+	chosen {
+		stdout-path = "serial0:115200n8";
+	};
+
+	memory@200000 {
+		device_type = "memory";
+		reg = <0x0 0x00200000 0x0 0x0ee00000>,
+		      <0x0 0x90000000 0x0 0x60000000>;
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		linux,cma {
+			compatible = "shared-dma-pool";
+			reusable;
+			size = <0x0 0x2000000>;
+			linux,cma-default;
+		};
+	};
+};
+
+&gmac0 {
+	status = "okay";
+
+	phy-mode = "rgmii";
+	bus_id = <0x0>;
+};
+
+&gmac1 {
+	status = "okay";
+
+	phy-mode = "rgmii";
+	bus_id = <0x1>;
+};
+
+&i2c0 {
+	status = "okay";
+
+	#address-cells = <1>;
+	#size-cells = <0>;
+	eeprom@57{
+		compatible = "atmel,24c16";
+		reg = <0x57>;
+		pagesize = <16>;
+	};
+};
+
+&ehci0 {
+	status = "okay";
+};
+
+&ohci0 {
+	status = "okay";
+};
+
+&sata {
+	status = "okay";
+};
+
+&uart0 {
+	status = "okay";
+};
+
+&rtc0 {
+	status = "okay";
+};
diff --git a/arch/loongarch/boot/dts/loongson-2k0500.dtsi b/arch/loongarch/boot/dts/loongson-2k0500.dtsi
new file mode 100644
index 000000000000..444779c21034
--- /dev/null
+++ b/arch/loongarch/boot/dts/loongson-2k0500.dtsi
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/interrupt-controller/irq.h>
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: cpu@0 {
+			compatible = "loongson,la264";
+			device_type = "cpu";
+			reg = <0x0>;
+			clocks = <&cpu_clk>;
+		};
+	};
+
+	cpu_clk: cpu-clk {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <500000000>;
+	};
+
+	cpuintc: interrupt-controller {
+		compatible = "loongson,cpu-interrupt-controller";
+		#interrupt-cells = <1>;
+		interrupt-controller;
+	};
+
+	bus@10000000 {
+		compatible = "simple-bus";
+		ranges = <0x0 0x10000000 0x0 0x10000000 0x0 0x10000000>,
+			 <0x0 0x02000000 0x0 0x02000000 0x0 0x02000000>,
+			 <0x0 0x20000000 0x0 0x20000000 0x0 0x10000000>,
+			 <0x0 0x40000000 0x0 0x40000000 0x0 0x40000000>,
+			 <0xfe 0x0 0xfe 0x0 0x0 0x40000000>;
+		#address-cells = <2>;
+		#size-cells = <2>;
+
+		isa@16400000 {
+			compatible = "isa";
+			#size-cells = <1>;
+			#address-cells = <2>;
+			ranges = <1 0x0 0x0 0x16400000 0x4000>;
+		};
+
+		liointc0: interrupt-controller@1fe11400 {
+			compatible = "loongson,liointc-2.0";
+			reg = <0x0 0x1fe11400 0x0 0x40>,
+			      <0x0 0x1fe11040 0x0 0x8>;
+			reg-names = "main", "isr0";
+
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			interrupt-parent = <&cpuintc>;
+			interrupts = <2>;
+			interrupt-names = "int0";
+
+			loongson,parent_int_map = <0xffffffff>, /* int0 */
+						  <0x00000000>, /* int1 */
+						  <0x00000000>, /* int2 */
+						  <0x00000000>; /* int3 */
+		};
+
+		liointc1: interrupt-controller@1fe11440 {
+			compatible = "loongson,liointc-2.0";
+			reg = <0x0 0x1fe11440 0x0 0x40>,
+			      <0x0 0x1fe11048 0x0 0x8>;
+			reg-names = "main", "isr0";
+
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			interrupt-parent = <&cpuintc>;
+			interrupts = <4>;
+			interrupt-names = "int2";
+
+			loongson,parent_int_map = <0x00000000>, /* int0 */
+						  <0x00000000>, /* int1 */
+						  <0xffffffff>, /* int2 */
+						  <0x00000000>; /* int3 */
+		};
+
+		eiointc: interrupt-controller@1fe11600 {
+			compatible = "loongson,ls2k0500-eiointc";
+			reg = <0x0 0x1fe11600 0x0 0xea00>;
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&cpuintc>;
+			interrupts = <3>;
+		};
+
+		gmac0: ethernet@1f020000 {
+			compatible = "snps,dwmac-3.70a";
+			reg = <0x0 0x1f020000 0x0 0x10000>;
+			interrupt-parent = <&liointc0>;
+			interrupts = <12 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "macirq";
+			status = "disabled";
+		};
+
+		gmac1: ethernet@1f030000 {
+			compatible = "snps,dwmac-3.70a";
+			reg = <0x0 0x1f030000 0x0 0x10000>;
+			interrupt-parent = <&liointc0>;
+			interrupts = <14 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "macirq";
+			status = "disabled";
+		};
+
+		sata: sata@1f040000 {
+			compatible = "snps,spear-ahci";
+			reg = <0x0 0x1f040000 0x0 0x10000>;
+			interrupt-parent = <&eiointc>;
+			interrupts = <75>;
+			status = "disabled";
+		};
+
+		ehci0: usb@1f050000 {
+			compatible = "generic-ehci";
+			reg = <0x0 0x1f050000 0x0 0x8000>;
+			interrupt-parent = <&eiointc>;
+			interrupts = <71>;
+			status = "disabled";
+		};
+
+		ohci0: usb@1f058000 {
+			compatible = "generic-ohci";
+			reg = <0x0 0x1f058000 0x0 0x8000>;
+			interrupt-parent = <&eiointc>;
+			interrupts = <72>;
+			status = "disabled";
+		};
+
+		uart0: serial@1ff40800 {
+			compatible = "ns16550a";
+			reg = <0x0 0x1ff40800 0x0 0x10>;
+			clock-frequency = <100000000>;
+			interrupt-parent = <&eiointc>;
+			interrupts = <2>;
+			no-loopback-test;
+			status = "disabled";
+		};
+
+		i2c0: i2c@1ff48000 {
+			compatible = "loongson,ls2k-i2c";
+			reg = <0x0 0x1ff48000 0x0 0x0800>;
+			interrupt-parent = <&eiointc>;
+			interrupts = <14>;
+			status = "disabled";
+		};
+
+		i2c@1ff48800 {
+			compatible = "loongson,ls2k-i2c";
+			reg = <0x0 0x1ff48800 0x0 0x0800>;
+			interrupt-parent = <&eiointc>;
+			interrupts = <15>;
+			status = "disabled";
+		};
+
+		i2c@1ff49000 {
+			compatible = "loongson,ls2k-i2c";
+			reg = <0x0 0x1ff49000 0x0 0x0800>;
+			interrupt-parent = <&eiointc>;
+			interrupts = <16>;
+			status = "disabled";
+		};
+
+		i2c@1ff49800 {
+			compatible = "loongson,ls2k-i2c";
+			reg = <0x0 0x1ff49800 0x0 0x0800>;
+			interrupt-parent = <&eiointc>;
+			interrupts = <17>;
+			status = "disabled";
+		};
+
+		i2c@1ff4a000 {
+			compatible = "loongson,ls2k-i2c";
+			reg = <0x0 0x1ff4a000 0x0 0x0800>;
+			interrupt-parent = <&eiointc>;
+			interrupts = <18>;
+			status = "disabled";
+		};
+
+		i2c@1ff4a800 {
+			compatible = "loongson,ls2k-i2c";
+			reg = <0x0 0x1ff4a800 0x0 0x0800>;
+			interrupt-parent = <&eiointc>;
+			interrupts = <19>;
+			status = "disabled";
+		};
+
+		pmc: power-management@1ff6c000 {
+			compatible = "loongson,ls2k0500-pmc", "syscon";
+			reg = <0x0 0x1ff6c000 0x0 0x58>;
+			interrupt-parent = <&eiointc>;
+			interrupts = <56>;
+			loongson,suspend-address = <0x0 0x1c000500>;
+
+			syscon-reboot {
+				compatible = "syscon-reboot";
+				offset = <0x30>;
+				mask = <0x1>;
+			};
+
+			syscon-poweroff {
+				compatible = "syscon-poweroff";
+				regmap = <&pmc>;
+				offset = <0x14>;
+				mask = <0x3c00>;
+				value = <0x3c00>;
+			};
+		};
+
+		rtc0: rtc@1ff6c100 {
+			compatible = "loongson,ls2k0500-rtc", "loongson,ls7a-rtc";
+			reg = <0x0 0x1ff6c100 0x0 0x100>;
+			interrupt-parent = <&eiointc>;
+			interrupts = <35>;
+			status = "disabled";
+		};
+
+		pcie@1a000000 {
+			compatible = "loongson,ls2k-pci";
+			reg = <0x0 0x1a000000 0x0 0x02000000>,
+			      <0xfe 0x0 0x0 0x20000000>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			device_type = "pci";
+			bus-range = <0x0 0x5>;
+			ranges = <0x01000000 0x0 0x00004000 0x0 0x16404000 0x0 0x00004000>,
+				 <0x02000000 0x0 0x40000000 0x0 0x40000000 0x0 0x40000000>;
+
+			pcie@0,0 {
+				reg = <0x0000 0x0 0x0 0x0 0x0>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				device_type = "pci";
+				interrupt-parent = <&eiointc>;
+				#interrupt-cells = <1>;
+				interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+				interrupt-map = <0x0 0x0 0x0 0x0 &eiointc 81>;
+				ranges;
+			};
+
+			pcie@1,0 {
+				reg = <0x0800 0x0 0x0 0x0 0x0>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				device_type = "pci";
+				interrupt-parent = <&eiointc>;
+				#interrupt-cells = <1>;
+				interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+				interrupt-map = <0x0 0x0 0x0 0x0 &eiointc 82>;
+				ranges;
+			};
+		};
+	};
+};
diff --git a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts
new file mode 100644
index 000000000000..132a2d1ea8bc
--- /dev/null
+++ b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+/dts-v1/;
+
+#include "loongson-2k1000.dtsi"
+
+/ {
+	compatible = "loongson,ls2k1000-ref", "loongson,ls2k1000";
+	model = "Loongson-2K1000 Reference Board";
+
+	aliases {
+		serial0 = &uart0;
+	};
+
+	chosen {
+		stdout-path = "serial0:115200n8";
+	};
+
+	memory@200000 {
+		device_type = "memory";
+		reg = <0x0 0x00200000 0x0 0x06e00000>,
+		      <0x0 0x08000000 0x0 0x07000000>,
+		      <0x0 0x90000000 0x1 0xe0000000>;
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		linux,cma {
+			compatible = "shared-dma-pool";
+			reusable;
+			size = <0x0 0x2000000>;
+			linux,cma-default;
+		};
+	};
+};
+
+&gmac0 {
+	status = "okay";
+
+	phy-mode = "rgmii";
+	phy-handle = <&phy0>;
+	mdio {
+		compatible = "snps,dwmac-mdio";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		phy0: ethernet-phy@0 {
+			reg = <0>;
+		};
+	};
+};
+
+&gmac1 {
+	status = "okay";
+
+	phy-mode = "rgmii";
+	phy-handle = <&phy1>;
+	mdio {
+		compatible = "snps,dwmac-mdio";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		phy1: ethernet-phy@1 {
+			reg = <16>;
+		};
+	};
+};
+
+&i2c2 {
+	status = "okay";
+
+	pinctrl-0 = <&i2c0_pins_default>;
+	pinctrl-names = "default";
+
+	#address-cells = <1>;
+	#size-cells = <0>;
+	eeprom@57{
+		compatible = "atmel,24c16";
+		reg = <0x57>;
+		pagesize = <16>;
+	};
+};
+
+&spi0 {
+	status = "okay";
+
+	#address-cells = <1>;
+	#size-cells = <0>;
+	spidev@0 {
+		compatible = "rohm,dh2228fv";
+		spi-max-frequency = <100000000>;
+		reg = <0>;
+	};
+};
+
+&ehci0 {
+	status = "okay";
+};
+
+&ohci0 {
+	status = "okay";
+};
+
+&sata {
+	status = "okay";
+};
+
+&uart0 {
+	status = "okay";
+};
+
+&clk {
+	status = "okay";
+};
+
+&rtc0 {
+	status = "okay";
+};
+
+&pctrl {
+	status = "okay";
+
+	sdio_pins_default: sdio-pins {
+		sdio-pinmux {
+			groups = "sdio";
+			function = "sdio";
+		};
+		sdio-det-pinmux {
+			groups = "pwm2";
+			function = "gpio";
+		};
+	};
+
+	pwm1_pins_default: pwm1-pins {
+		pinmux {
+			groups = "pwm1";
+			function = "pwm1";
+		};
+	};
+
+	pwm0_pins_default: pwm0-pins {
+		pinmux {
+			groups = "pwm0";
+			function = "pwm0";
+		};
+	};
+
+	i2c1_pins_default: i2c1-pins {
+		pinmux {
+			groups = "i2c1";
+			function = "i2c1";
+		};
+	};
+
+	i2c0_pins_default: i2c0-pins {
+		pinmux {
+			groups = "i2c0";
+			function = "i2c0";
+		};
+	};
+
+	nand_pins_default: nand-pins {
+		pinmux {
+			groups = "nand";
+			function = "nand";
+		};
+	};
+
+	hda_pins_default: hda-pins {
+		grp0-pinmux {
+			groups = "hda";
+			function = "hda";
+		};
+		grp1-pinmux {
+			groups = "i2s";
+			function = "gpio";
+		};
+	};
+};
diff --git a/arch/loongarch/boot/dts/loongson-2k1000.dtsi b/arch/loongarch/boot/dts/loongson-2k1000.dtsi
new file mode 100644
index 000000000000..49a70f8c3cab
--- /dev/null
+++ b/arch/loongarch/boot/dts/loongson-2k1000.dtsi
@@ -0,0 +1,492 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/clock/loongson,ls2k-clk.h>
+#include <dt-bindings/gpio/gpio.h>
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: cpu@0 {
+			compatible = "loongson,la264";
+			device_type = "cpu";
+			reg= <0x0>;
+			clocks = <&clk LOONGSON2_NODE_CLK>;
+		};
+
+		cpu1: cpu@1 {
+			compatible = "loongson,la264";
+			device_type = "cpu";
+			reg = <0x1>;
+			clocks = <&clk LOONGSON2_NODE_CLK>;
+		};
+	};
+
+	ref_100m: clock-ref-100m {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <100000000>;
+		clock-output-names = "ref_100m";
+	};
+
+	cpuintc: interrupt-controller {
+		compatible = "loongson,cpu-interrupt-controller";
+		#interrupt-cells = <1>;
+		interrupt-controller;
+	};
+
+	/* i2c of the dvi eeprom edid */
+	i2c-gpio-0 {
+		compatible = "i2c-gpio";
+		scl-gpios = <&gpio0 0 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
+		sda-gpios = <&gpio0 1 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
+		i2c-gpio,delay-us = <5>;        /* ~100 kHz */
+		#address-cells = <1>;
+		#size-cells = <0>;
+		status = "disabled";
+	};
+
+	/* i2c of the eeprom edid */
+	i2c-gpio-1 {
+		compatible = "i2c-gpio";
+		scl-gpios = <&gpio0 33 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
+		sda-gpios = <&gpio0 32 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
+		i2c-gpio,delay-us = <5>;        /* ~100 kHz */
+		#address-cells = <1>;
+		#size-cells = <0>;
+		status = "disabled";
+	};
+
+	thermal-zones {
+		cpu-thermal {
+			polling-delay-passive = <1000>;
+			polling-delay = <5000>;
+			thermal-sensors = <&tsensor 0>;
+
+			trips {
+				cpu_alert: cpu-alert {
+					temperature = <33000>;
+					hysteresis = <2000>;
+					type = "active";
+				};
+
+				cpu_crit: cpu-crit {
+					temperature = <85000>;
+					hysteresis = <5000>;
+					type = "critical";
+				};
+			};
+		};
+	};
+
+	bus@10000000 {
+		compatible = "simple-bus";
+		ranges = <0x0 0x10000000 0x0 0x10000000 0x0 0x10000000>,
+			 <0x0 0x02000000 0x0 0x02000000 0x0 0x02000000>,
+			 <0x0 0x20000000 0x0 0x20000000 0x0 0x10000000>,
+			 <0x0 0x40000000 0x0 0x40000000 0x0 0x40000000>,
+			 <0xfe 0x0 0xfe 0x0 0x0 0x40000000>;
+		#address-cells = <2>;
+		#size-cells = <2>;
+		dma-coherent;
+
+		liointc0: interrupt-controller@1fe01400 {
+			compatible = "loongson,liointc-2.0";
+			reg = <0x0 0x1fe01400 0x0 0x40>,
+			      <0x0 0x1fe01040 0x0 0x8>,
+			      <0x0 0x1fe01140 0x0 0x8>;
+			reg-names = "main", "isr0", "isr1";
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			interrupt-parent = <&cpuintc>;
+			interrupts = <2>;
+			interrupt-names = "int0";
+			loongson,parent_int_map = <0xffffffff>, /* int0 */
+						  <0x00000000>, /* int1 */
+						  <0x00000000>, /* int2 */
+						  <0x00000000>; /* int3 */
+		};
+
+		liointc1: interrupt-controller@1fe01440 {
+			compatible = "loongson,liointc-2.0";
+			reg = <0x0 0x1fe01440 0x0 0x40>,
+			      <0x0 0x1fe01048 0x0 0x8>,
+			      <0x0 0x1fe01148 0x0 0x8>;
+			reg-names = "main", "isr0", "isr1";
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			interrupt-parent = <&cpuintc>;
+			interrupts = <3>;
+			interrupt-names = "int1";
+			loongson,parent_int_map = <0x00000000>, /* int0 */
+						  <0xffffffff>, /* int1 */
+						  <0x00000000>, /* int2 */
+						  <0x00000000>; /* int3 */
+		};
+
+		chipid@1fe00000 {
+			compatible = "loongson,ls2k-chipid";
+			reg = <0x0 0x1fe00000 0x0 0x30>;
+			little-endian;
+		};
+
+		pctrl: pinctrl@1fe00420 {
+			compatible = "loongson,ls2k-pinctrl";
+			reg = <0x0 0x1fe00420 0x0 0x18>;
+			status = "disabled";
+		};
+
+		clk: clock-controller@1fe00480 {
+			compatible = "loongson,ls2k-clk";
+			reg = <0x0 0x1fe00480 0x0 0x58>;
+			#clock-cells = <1>;
+			clocks = <&ref_100m>;
+			clock-names = "ref_100m";
+			status = "disabled";
+		};
+
+		gpio0: gpio@1fe00500 {
+			compatible = "loongson,ls2k-gpio";
+			reg = <0x0 0x1fe00500 0x0 0x38>;
+			ngpios = <64>;
+			#gpio-cells = <2>;
+			gpio-controller;
+			gpio-ranges = <&pctrl 0x0 0x0 15>,
+				      <&pctrl 16 16 15>,
+				      <&pctrl 32 32 10>,
+				      <&pctrl 44 44 20>;
+			interrupt-parent = <&liointc1>;
+			interrupts = <28 IRQ_TYPE_LEVEL_HIGH>,
+				     <29 IRQ_TYPE_LEVEL_HIGH>,
+				     <30 IRQ_TYPE_LEVEL_HIGH>,
+				     <30 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <26 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <>,
+				     <>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>,
+				     <27 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		tsensor: thermal-sensor@1fe01500 {
+			compatible = "loongson,ls2k1000-thermal";
+			reg = <0x0 0x1fe01500 0x0 0x30>;
+			interrupt-parent = <&liointc0>;
+			interrupts = <7 IRQ_TYPE_LEVEL_HIGH>;
+			#thermal-sensor-cells = <1>;
+		};
+
+		dma-controller@1fe00c00 {
+			compatible = "loongson,ls2k1000-apbdma";
+			reg = <0x0 0x1fe00c00 0x0 0x8>;
+			interrupt-parent = <&liointc1>;
+			interrupts = <12 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&clk LOONGSON2_APB_CLK>;
+			#dma-cells = <1>;
+			status = "disabled";
+		};
+
+		dma-controller@1fe00c10 {
+			compatible = "loongson,ls2k1000-apbdma";
+			reg = <0x0 0x1fe00c10 0x0 0x8>;
+			interrupt-parent = <&liointc1>;
+			interrupts = <13 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&clk LOONGSON2_APB_CLK>;
+			#dma-cells = <1>;
+			status = "disabled";
+		};
+
+		dma-controller@1fe00c20 {
+			compatible = "loongson,ls2k1000-apbdma";
+			reg = <0x0 0x1fe00c20 0x0 0x8>;
+			interrupt-parent = <&liointc1>;
+			interrupts = <14 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&clk LOONGSON2_APB_CLK>;
+			#dma-cells = <1>;
+			status = "disabled";
+		};
+
+		dma-controller@1fe00c30 {
+			compatible = "loongson,ls2k1000-apbdma";
+			reg = <0x0 0x1fe00c30 0x0 0x8>;
+			interrupt-parent = <&liointc1>;
+			interrupts = <15 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&clk LOONGSON2_APB_CLK>;
+			#dma-cells = <1>;
+			status = "disabled";
+		};
+
+		dma-controller@1fe00c40 {
+			compatible = "loongson,ls2k1000-apbdma";
+			reg = <0x0 0x1fe00c40 0x0 0x8>;
+			interrupt-parent = <&liointc1>;
+			interrupts = <16 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&clk LOONGSON2_APB_CLK>;
+			#dma-cells = <1>;
+			status = "disabled";
+		};
+
+		uart0: serial@1fe20000 {
+			compatible = "ns16550a";
+			reg = <0x0 0x1fe20000 0x0 0x10>;
+			clock-frequency = <125000000>;
+			interrupt-parent = <&liointc0>;
+			interrupts = <0x0 IRQ_TYPE_LEVEL_HIGH>;
+			no-loopback-test;
+			status = "disabled";
+		};
+
+		i2c2: i2c@1fe21000 {
+			compatible = "loongson,ls2k-i2c";
+			reg = <0x0 0x1fe21000 0x0 0x8>;
+			interrupt-parent = <&liointc0>;
+			interrupts = <22 IRQ_TYPE_LEVEL_HIGH>;
+			status = "disabled";
+		};
+
+		i2c3: i2c@1fe21800 {
+			compatible = "loongson,ls2k-i2c";
+			reg = <0x0 0x1fe21800 0x0 0x8>;
+			interrupt-parent = <&liointc0>;
+			interrupts = <23 IRQ_TYPE_LEVEL_HIGH>;
+			status = "disabled";
+		};
+
+		pmc: power-management@1fe27000 {
+			compatible = "loongson,ls2k1000-pmc", "loongson,ls2k0500-pmc", "syscon";
+			reg = <0x0 0x1fe27000 0x0 0x58>;
+			interrupt-parent = <&liointc1>;
+			interrupts = <11 IRQ_TYPE_LEVEL_HIGH>;
+			loongson,suspend-address = <0x0 0x1c000500>;
+
+			syscon-reboot {
+				compatible = "syscon-reboot";
+				offset = <0x30>;
+				mask = <0x1>;
+			};
+
+			syscon-poweroff {
+				compatible = "syscon-poweroff";
+				regmap = <&pmc>;
+				offset = <0x14>;
+				mask = <0x3c00>;
+				value = <0x3c00>;
+			};
+		};
+
+		rtc0: rtc@1fe27800 {
+			compatible = "loongson,ls2k1000-rtc";
+			reg = <0x0 0x1fe27800 0x0 0x100>;
+			interrupt-parent = <&liointc1>;
+			interrupts = <8 IRQ_TYPE_LEVEL_HIGH>;
+			status = "disabled";
+		};
+
+		spi0: spi@1fff0220 {
+			compatible = "loongson,ls2k1000-spi";
+			reg = <0x0 0x1fff0220 0x0 0x10>;
+			clocks = <&clk LOONGSON2_BOOT_CLK>;
+			status = "disabled";
+		};
+
+		pcie@1a000000 {
+			compatible = "loongson,ls2k-pci";
+			reg = <0x0 0x1a000000 0x0 0x02000000>,
+			      <0xfe 0x0 0x0 0x20000000>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			device_type = "pci";
+			bus-range = <0x0 0xff>;
+			ranges = <0x01000000 0x0 0x00008000 0x0 0x18008000 0x0 0x00008000>,
+				 <0x02000000 0x0 0x60000000 0x0 0x60000000 0x0 0x20000000>;
+
+			gmac0: ethernet@3,0 {
+				reg = <0x1800 0x0 0x0 0x0 0x0>;
+				interrupt-parent = <&liointc0>;
+				interrupts = <12 IRQ_TYPE_LEVEL_HIGH>,
+					     <13 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-names = "macirq", "eth_lpi";
+				status = "disabled";
+			};
+
+			gmac1: ethernet@3,1 {
+				reg = <0x1900 0x0 0x0 0x0 0x0>;
+				interrupt-parent = <&liointc0>;
+				interrupts = <14 IRQ_TYPE_LEVEL_HIGH>,
+					     <15 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-names = "macirq", "eth_lpi";
+				status = "disabled";
+			};
+
+			ehci0: usb@4,1 {
+				reg = <0x2100 0x0 0x0 0x0 0x0>;
+				interrupt-parent = <&liointc1>;
+				interrupts = <18 IRQ_TYPE_LEVEL_HIGH>;
+				status = "disabled";
+			};
+
+			ohci0: usb@4,2 {
+				reg = <0x2200 0x0 0x0 0x0 0x0>;
+				interrupt-parent = <&liointc1>;
+				interrupts = <19 IRQ_TYPE_LEVEL_HIGH>;
+				status = "disabled";
+			};
+
+			display@6,0 {
+				reg = <0x3000 0x0 0x0 0x0 0x0>;
+				interrupt-parent = <&liointc0>;
+				interrupts = <28 IRQ_TYPE_LEVEL_HIGH>;
+				status = "disabled";
+			};
+
+			hda@7,0 {
+				reg = <0x3800 0x0 0x0 0x0 0x0>;
+				interrupt-parent = <&liointc0>;
+				interrupts = <4 IRQ_TYPE_LEVEL_HIGH>;
+				status = "disabled";
+			};
+
+			sata: sata@8,0 {
+				reg = <0x4000 0x0 0x0 0x0 0x0>;
+				interrupt-parent = <&liointc0>;
+				interrupts = <19 IRQ_TYPE_LEVEL_HIGH>;
+				status = "disabled";
+			};
+
+			pcie@9,0 {
+				reg = <0x4800 0x0 0x0 0x0 0x0>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				device_type = "pci";
+				#interrupt-cells = <1>;
+				interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+				interrupt-map = <0x0 0x0 0x0 0x0 &liointc1 0x0 IRQ_TYPE_LEVEL_HIGH>;
+				ranges;
+			};
+
+			pcie@a,0 {
+				reg = <0x5000 0x0 0x0 0x0 0x0>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				device_type = "pci";
+				interrupt-parent = <&liointc1>;
+				#interrupt-cells = <1>;
+				interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+				interrupt-map = <0x0 0x0 0x0 0x0 &liointc1 1 IRQ_TYPE_LEVEL_HIGH>;
+				ranges;
+			};
+
+			pcie@b,0 {
+				reg = <0x5800 0x0 0x0 0x0 0x0>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				device_type = "pci";
+				interrupt-parent = <&liointc1>;
+				#interrupt-cells = <1>;
+				interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+				interrupt-map = <0x0 0x0 0x0 0x0 &liointc1 2 IRQ_TYPE_LEVEL_HIGH>;
+				ranges;
+			};
+
+			pcie@c,0 {
+				reg = <0x6000 0x0 0x0 0x0 0x0>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				device_type = "pci";
+				interrupt-parent = <&liointc1>;
+				#interrupt-cells = <1>;
+				interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+				interrupt-map = <0x0 0x0 0x0 0x0 &liointc1 3 IRQ_TYPE_LEVEL_HIGH>;
+				ranges;
+			};
+
+			pcie@d,0 {
+				reg = <0x6800 0x0 0x0 0x0 0x0>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				device_type = "pci";
+				interrupt-parent = <&liointc1>;
+				#interrupt-cells = <1>;
+				interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+				interrupt-map = <0x0 0x0 0x0 0x0 &liointc1 4 IRQ_TYPE_LEVEL_HIGH>;
+				ranges;
+			};
+
+			pcie@e,0 {
+				reg = <0x7000 0x0 0x0 0x0 0x0>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				device_type = "pci";
+				interrupt-parent = <&liointc1>;
+				#interrupt-cells = <1>;
+				interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+				interrupt-map = <0x0 0x0 0x0 0x0 &liointc1 5 IRQ_TYPE_LEVEL_HIGH>;
+				ranges;
+			};
+		};
+	};
+};
diff --git a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
new file mode 100644
index 000000000000..dca91caf895e
--- /dev/null
+++ b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+/dts-v1/;
+
+#include "loongson-2k2000.dtsi"
+
+/ {
+	compatible = "loongson,ls2k2000-ref", "loongson,ls2k2000";
+	model = "Loongson-2K2000 Reference Board";
+
+	aliases {
+		serial0 = &uart0;
+	};
+
+	chosen {
+		stdout-path = "serial0:115200n8";
+	};
+
+	memory@200000 {
+		device_type = "memory";
+		reg = <0x0 0x00200000 0x0 0x0ee00000>,
+		      <0x0 0x90000000 0x0 0x70000000>;
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		linux,cma {
+			compatible = "shared-dma-pool";
+			reusable;
+			size = <0x0 0x2000000>;
+			linux,cma-default;
+		};
+	};
+};
+
+&sata {
+	status = "okay";
+};
+
+&uart0 {
+	status = "okay";
+};
+
+&rtc0 {
+	status = "okay";
+};
+
+&xhci0 {
+	status = "okay";
+};
+
+&xhci1 {
+	status = "okay";
+};
+
+&gmac0 {
+	status = "okay";
+};
+
+&gmac1 {
+	status = "okay";
+};
+
+&gmac2 {
+	status = "okay";
+};
diff --git a/arch/loongarch/boot/dts/loongson-2k2000.dtsi b/arch/loongarch/boot/dts/loongson-2k2000.dtsi
new file mode 100644
index 000000000000..a231949b5f55
--- /dev/null
+++ b/arch/loongarch/boot/dts/loongson-2k2000.dtsi
@@ -0,0 +1,300 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/interrupt-controller/irq.h>
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: cpu@1 {
+			compatible = "loongson,la364";
+			device_type = "cpu";
+			reg = <0x0>;
+			clocks = <&cpu_clk>;
+		};
+
+		cpu1: cpu@2 {
+			compatible = "loongson,la364";
+			device_type = "cpu";
+			reg = <0x1>;
+			clocks = <&cpu_clk>;
+		};
+	};
+
+	cpu_clk: cpu-clk {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <1400000000>;
+	};
+
+	cpuintc: interrupt-controller {
+		compatible = "loongson,cpu-interrupt-controller";
+		#interrupt-cells = <1>;
+		interrupt-controller;
+	};
+
+	bus@10000000 {
+		compatible = "simple-bus";
+		ranges = <0x0 0x10000000 0x0 0x10000000 0x0 0x10000000>,
+			 <0x0 0x02000000 0x0 0x02000000 0x0 0x02000000>,
+			 <0x0 0x40000000 0x0 0x40000000 0x0 0x40000000>,
+			 <0xfe 0x0 0xfe 0x0 0x0 0x40000000>;
+		#address-cells = <2>;
+		#size-cells = <2>;
+
+		pmc: power-management@100d0000 {
+			compatible = "loongson,ls2k2000-pmc", "loongson,ls2k0500-pmc", "syscon";
+			reg = <0x0 0x100d0000 0x0 0x58>;
+			interrupt-parent = <&eiointc>;
+			interrupts = <47>;
+			loongson,suspend-address = <0x0 0x1c000500>;
+
+			syscon-reboot {
+				compatible = "syscon-reboot";
+				offset = <0x30>;
+				mask = <0x1>;
+			};
+
+			syscon-poweroff {
+				compatible = "syscon-poweroff";
+				regmap = <&pmc>;
+				offset = <0x14>;
+				mask = <0x3c00>;
+				value = <0x3c00>;
+			};
+		};
+
+		liointc: interrupt-controller@1fe01400 {
+			compatible = "loongson,liointc-1.0";
+			reg = <0x0 0x1fe01400 0x0 0x64>;
+
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			interrupt-parent = <&cpuintc>;
+			interrupts = <2>;
+			interrupt-names = "int0";
+			loongson,parent_int_map = <0xffffffff>, /* int0 */
+						  <0x00000000>, /* int1 */
+						  <0x00000000>, /* int2 */
+						  <0x00000000>; /* int3 */
+		};
+
+		eiointc: interrupt-controller@1fe01600 {
+			compatible = "loongson,ls2k2000-eiointc";
+			reg = <0x0 0x1fe01600 0x0 0xea00>;
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&cpuintc>;
+			interrupts = <3>;
+		};
+
+		pic: interrupt-controller@10000000 {
+			compatible = "loongson,pch-pic-1.0";
+			reg = <0x0 0x10000000 0x0 0x400>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			loongson,pic-base-vec = <0>;
+			interrupt-parent = <&eiointc>;
+		};
+
+		msi: msi-controller@1fe01140 {
+			compatible = "loongson,pch-msi-1.0";
+			reg = <0x0 0x1fe01140 0x0 0x8>;
+			msi-controller;
+			loongson,msi-base-vec = <64>;
+			loongson,msi-num-vecs = <192>;
+			interrupt-parent = <&eiointc>;
+		};
+
+		rtc0: rtc@100d0100 {
+			compatible = "loongson,ls2k2000-rtc", "loongson,ls7a-rtc";
+			reg = <0x0 0x100d0100 0x0 0x100>;
+			interrupt-parent = <&pic>;
+			interrupts = <52 IRQ_TYPE_LEVEL_HIGH>;
+			status = "disabled";
+		};
+
+		uart0: serial@1fe001e0 {
+			compatible = "ns16550a";
+			reg = <0x0 0x1fe001e0 0x0 0x10>;
+			clock-frequency = <100000000>;
+			interrupt-parent = <&liointc>;
+			interrupts = <10 IRQ_TYPE_LEVEL_HIGH>;
+			no-loopback-test;
+			status = "disabled";
+		};
+
+		pcie@1a000000 {
+			compatible = "loongson,ls2k-pci";
+			reg = <0x0 0x1a000000 0x0 0x02000000>,
+			      <0xfe 0x0 0x0 0x20000000>;
+			#address-cells = <3>;
+			#size-cells = <2>;
+			device_type = "pci";
+			bus-range = <0x0 0xff>;
+			ranges = <0x01000000 0x0 0x00008000 0x0 0x18400000 0x0 0x00008000>,
+				 <0x02000000 0x0 0x60000000 0x0 0x60000000 0x0 0x20000000>;
+
+			gmac0: ethernet@3,0 {
+				reg = <0x1800 0x0 0x0 0x0 0x0>;
+				interrupts = <12 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-parent = <&pic>;
+				status = "disabled";
+			};
+
+			gmac1: ethernet@3,1 {
+				reg = <0x1900 0x0 0x0 0x0 0x0>;
+				interrupts = <14 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-parent = <&pic>;
+				status = "disabled";
+			};
+
+			gmac2: ethernet@3,2 {
+				reg = <0x1a00 0x0 0x0 0x0 0x0>;
+				interrupts = <17 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-parent = <&pic>;
+				status = "disabled";
+			};
+
+			xhci0: usb@4,0 {
+				reg = <0x2000 0x0 0x0 0x0 0x0>;
+				interrupts = <48 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-parent = <&pic>;
+				status = "disabled";
+			};
+
+			xhci1: usb@19,0 {
+				reg = <0xc800 0x0 0x0 0x0 0x0>;
+				interrupts = <22 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-parent = <&pic>;
+				status = "disabled";
+			};
+
+			display@6,1 {
+				reg = <0x3100 0x0 0x0 0x0 0x0>;
+				interrupts = <28 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-parent = <&pic>;
+				status = "disabled";
+			};
+
+			hda@7,0 {
+				reg = <0x3800 0x0 0x0 0x0 0x0>;
+				interrupts = <58 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-parent = <&pic>;
+				status = "disabled";
+			};
+
+			sata: sata@8,0 {
+				reg = <0x4000 0x0 0x0 0x0 0x0>;
+				interrupts = <16 IRQ_TYPE_LEVEL_HIGH>;
+				interrupt-parent = <&pic>;
+				status = "disabled";
+			};
+
+			pcie@9,0 {
+				reg = <0x4800 0x0 0x0 0x0 0x0>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				device_type = "pci";
+				interrupt-parent = <&pic>;
+				#interrupt-cells = <1>;
+				interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+				interrupt-map = <0x0 0x0 0x0 0x0 &pic 32 IRQ_TYPE_LEVEL_HIGH>;
+				ranges;
+			};
+
+			pcie@a,0 {
+				reg = <0x5000 0x0 0x0 0x0 0x0>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				device_type = "pci";
+				interrupt-parent = <&pic>;
+				#interrupt-cells = <1>;
+				interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+				interrupt-map = <0x0 0x0 0x0 0x0 &pic 33 IRQ_TYPE_LEVEL_HIGH>;
+				ranges;
+			};
+
+			pcie@b,0 {
+				reg = <0x5800 0x0 0x0 0x0 0x0>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				device_type = "pci";
+				interrupt-parent = <&pic>;
+				#interrupt-cells = <1>;
+				interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+				interrupt-map = <0x0 0x0 0x0 0x0 &pic 34 IRQ_TYPE_LEVEL_HIGH>;
+				ranges;
+			};
+
+			pcie@c,0 {
+				reg = <0x6000 0x0 0x0 0x0 0x0>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				device_type = "pci";
+				interrupt-parent = <&pic>;
+				#interrupt-cells = <1>;
+				interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+				interrupt-map = <0x0 0x0 0x0 0x0 &pic 35 IRQ_TYPE_LEVEL_HIGH>;
+				ranges;
+			};
+
+			pcie@d,0 {
+				reg = <0x6800 0x0 0x0 0x0 0x0>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				device_type = "pci";
+				interrupt-parent = <&pic>;
+				#interrupt-cells = <1>;
+				interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+				interrupt-map = <0x0 0x0 0x0 0x0 &pic 36 IRQ_TYPE_LEVEL_HIGH>;
+				ranges;
+			};
+
+			pcie@e,0 {
+				reg = <0x7000 0x0 0x0 0x0 0x0>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				device_type = "pci";
+				interrupt-parent = <&pic>;
+				#interrupt-cells = <1>;
+				interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+				interrupt-map = <0x0 0x0 0x0 0x0 &pic 37 IRQ_TYPE_LEVEL_HIGH>;
+				ranges;
+			};
+
+			pcie@f,0 {
+				reg = <0x7800 0x0 0x0 0x0 0x0>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				device_type = "pci";
+				interrupt-parent = <&pic>;
+				#interrupt-cells = <1>;
+				interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+				interrupt-map = <0x0 0x0 0x0 0x0 &pic 40 IRQ_TYPE_LEVEL_HIGH>;
+				ranges;
+			};
+
+			pcie@10,0 {
+				reg = <0x8000 0x0 0x0 0x0 0x0>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				device_type = "pci";
+				interrupt-parent = <&pic>;
+				#interrupt-cells = <1>;
+				interrupt-map-mask = <0x0 0x0 0x0 0x0>;
+				interrupt-map = <0x0 0x0 0x0 0x0 &pic 30 IRQ_TYPE_LEVEL_HIGH>;
+				ranges;
+			};
+		};
+	};
+};
diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig
index 60e331af9839..f18c2ba871ef 100644
--- a/arch/loongarch/configs/loongson3_defconfig
+++ b/arch/loongarch/configs/loongson3_defconfig
@@ -6,6 +6,8 @@ CONFIG_HIGH_RES_TIMERS=y
 CONFIG_BPF_SYSCALL=y
 CONFIG_BPF_JIT=y
 CONFIG_PREEMPT=y
+CONFIG_PREEMPT_DYNAMIC=y
+CONFIG_SCHED_CORE=y
 CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_BSD_PROCESS_ACCT_V3=y
 CONFIG_TASKSTATS=y
@@ -19,6 +21,7 @@ CONFIG_BLK_CGROUP=y
 CONFIG_CFS_BANDWIDTH=y
 CONFIG_RT_GROUP_SCHED=y
 CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_RDMA=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_HUGETLB=y
 CONFIG_CPUSETS=y
@@ -26,6 +29,7 @@ CONFIG_CGROUP_DEVICE=y
 CONFIG_CGROUP_CPUACCT=y
 CONFIG_CGROUP_PERF=y
 CONFIG_CGROUP_BPF=y
+CONFIG_CGROUP_MISC=y
 CONFIG_NAMESPACES=y
 CONFIG_USER_NS=y
 CONFIG_CHECKPOINT_RESTORE=y
@@ -35,6 +39,8 @@ CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_PERF_EVENTS=y
+CONFIG_KEXEC=y
+CONFIG_CRASH_DUMP=y
 CONFIG_LOONGARCH=y
 CONFIG_64BIT=y
 CONFIG_MACH_LOONGSON64=y
@@ -44,13 +50,11 @@ CONFIG_DMI=y
 CONFIG_EFI=y
 CONFIG_SMP=y
 CONFIG_HOTPLUG_CPU=y
-CONFIG_NR_CPUS=64
+CONFIG_NR_CPUS=256
 CONFIG_NUMA=y
 CONFIG_CPU_HAS_FPU=y
 CONFIG_CPU_HAS_LSX=y
 CONFIG_CPU_HAS_LASX=y
-CONFIG_KEXEC=y
-CONFIG_CRASH_DUMP=y
 CONFIG_RANDOMIZE_BASE=y
 CONFIG_SUSPEND=y
 CONFIG_HIBERNATION=y
@@ -62,10 +66,6 @@ CONFIG_ACPI_IPMI=m
 CONFIG_ACPI_HOTPLUG_CPU=y
 CONFIG_ACPI_PCI_SLOT=y
 CONFIG_ACPI_HOTPLUG_MEMORY=y
-CONFIG_EFI_ZBOOT=y
-CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y
-CONFIG_EFI_CAPSULE_LOADER=m
-CONFIG_EFI_TEST=m
 CONFIG_VIRTUALIZATION=y
 CONFIG_KVM=m
 CONFIG_JUMP_LABEL=y
@@ -74,10 +74,18 @@ CONFIG_MODULE_FORCE_LOAD=y
 CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_MODVERSIONS=y
+CONFIG_BLK_DEV_ZONED=y
 CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BLK_DEV_THROTTLING_LOW=y
+CONFIG_BLK_WBT=y
+CONFIG_BLK_CGROUP_IOLATENCY=y
+CONFIG_BLK_CGROUP_FC_APPID=y
+CONFIG_BLK_CGROUP_IOCOST=y
+CONFIG_BLK_CGROUP_IOPRIO=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_BSD_DISKLABEL=y
 CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_CMDLINE_PARTITION=y
 CONFIG_IOSCHED_BFQ=y
 CONFIG_BFQ_GROUP_IOSCHED=y
 CONFIG_BINFMT_MISC=m
@@ -93,6 +101,8 @@ CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y
 CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_CMA=y
+CONFIG_CMA_SYSFS=y
 CONFIG_USERFAULTFD=y
 CONFIG_NET=y
 CONFIG_PACKET=y
@@ -128,6 +138,7 @@ CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_IPV6_ROUTE_INFO=y
 CONFIG_INET6_ESP=m
 CONFIG_IPV6_MROUTE=y
+CONFIG_MPTCP=y
 CONFIG_NETWORK_PHY_TIMESTAMPING=y
 CONFIG_NETFILTER=y
 CONFIG_BRIDGE_NETFILTER=m
@@ -352,6 +363,7 @@ CONFIG_PCIEAER=y
 CONFIG_PCI_IOV=y
 CONFIG_HOTPLUG_PCI=y
 CONFIG_HOTPLUG_PCI_SHPC=y
+CONFIG_PCI_HOST_GENERIC=y
 CONFIG_PCCARD=m
 CONFIG_YENTA=m
 CONFIG_RAPIDIO=y
@@ -365,6 +377,10 @@ CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_FW_LOADER_COMPRESS=y
 CONFIG_FW_LOADER_COMPRESS_ZSTD=y
+CONFIG_EFI_ZBOOT=y
+CONFIG_EFI_BOOTLOADER_CONTROL=m
+CONFIG_EFI_CAPSULE_LOADER=m
+CONFIG_EFI_TEST=m
 CONFIG_MTD=m
 CONFIG_MTD_BLOCK=m
 CONFIG_MTD_CFI=m
@@ -586,6 +602,7 @@ CONFIG_RTW89_8852AE=m
 CONFIG_RTW89_8852CE=m
 CONFIG_ZD1211RW=m
 CONFIG_USB_NET_RNDIS_WLAN=m
+CONFIG_USB4_NET=m
 CONFIG_INPUT_MOUSEDEV=y
 CONFIG_INPUT_MOUSEDEV_PSAUX=y
 CONFIG_INPUT_EVDEV=y
@@ -691,6 +708,9 @@ CONFIG_SND_HDA_CODEC_SIGMATEL=y
 CONFIG_SND_HDA_CODEC_HDMI=y
 CONFIG_SND_HDA_CODEC_CONEXANT=y
 CONFIG_SND_USB_AUDIO=m
+CONFIG_SND_SOC=m
+CONFIG_SND_SOC_LOONGSON_CARD=m
+CONFIG_SND_VIRTIO=m
 CONFIG_HIDRAW=y
 CONFIG_UHID=m
 CONFIG_HID_A4TECH=m
@@ -738,6 +758,11 @@ CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_EFI=y
 CONFIG_RTC_DRV_LOONGSON=y
 CONFIG_DMADEVICES=y
+CONFIG_LS2X_APB_DMA=y
+CONFIG_UDMABUF=y
+CONFIG_DMABUF_HEAPS=y
+CONFIG_DMABUF_HEAPS_SYSTEM=y
+CONFIG_DMABUF_HEAPS_CMA=y
 CONFIG_UIO=m
 CONFIG_UIO_PDRV_GENIRQ=m
 CONFIG_UIO_DMEM_GENIRQ=m
@@ -778,7 +803,15 @@ CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y
 CONFIG_DEVFREQ_GOV_PERFORMANCE=y
 CONFIG_DEVFREQ_GOV_POWERSAVE=y
 CONFIG_DEVFREQ_GOV_USERSPACE=y
+CONFIG_NTB=m
+CONFIG_NTB_MSI=y
+CONFIG_NTB_IDT=m
+CONFIG_NTB_EPF=m
+CONFIG_NTB_SWITCHTEC=m
+CONFIG_NTB_PERF=m
+CONFIG_NTB_TRANSPORT=m
 CONFIG_PWM=y
+CONFIG_USB4=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
@@ -797,6 +830,10 @@ CONFIG_GFS2_FS_LOCKING_DLM=y
 CONFIG_OCFS2_FS=m
 CONFIG_BTRFS_FS=y
 CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_F2FS_FS=m
+CONFIG_F2FS_FS_SECURITY=y
+CONFIG_F2FS_CHECK_FS=y
+CONFIG_F2FS_FS_COMPRESSION=y
 CONFIG_FANOTIFY=y
 CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA=y
@@ -883,7 +920,6 @@ CONFIG_KEY_DH_OPERATIONS=y
 CONFIG_SECURITY=y
 CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-CONFIG_SECURITY_SELINUX_DISABLE=y
 CONFIG_SECURITY_APPARMOR=y
 CONFIG_SECURITY_YAMA=y
 CONFIG_DEFAULT_SECURITY_DAC=y
@@ -914,6 +950,9 @@ CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 CONFIG_CRYPTO_CRC32_LOONGARCH=m
 CONFIG_CRYPTO_DEV_VIRTIO=m
+CONFIG_DMA_CMA=y
+CONFIG_DMA_NUMA_CMA=y
+CONFIG_CMA_SIZE_MBYTES=0
 CONFIG_PRINTK_TIME=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/loongarch/include/asm/bootinfo.h b/arch/loongarch/include/asm/bootinfo.h
index c60796869b2b..6d5846dd075c 100644
--- a/arch/loongarch/include/asm/bootinfo.h
+++ b/arch/loongarch/include/asm/bootinfo.h
@@ -24,13 +24,15 @@ struct loongson_board_info {
 	const char *board_vendor;
 };
 
+#define NR_WORDS DIV_ROUND_UP(NR_CPUS, BITS_PER_LONG)
+
 struct loongson_system_configuration {
 	int nr_cpus;
 	int nr_nodes;
 	int boot_cpu_id;
 	int cores_per_node;
 	int cores_per_package;
-	unsigned long cores_io_master;
+	unsigned long cores_io_master[NR_WORDS];
 	unsigned long suspend_addr;
 	const char *cpuname;
 };
@@ -42,7 +44,7 @@ extern struct loongson_system_configuration loongson_sysconf;
 
 static inline bool io_master(int cpu)
 {
-	return test_bit(cpu, &loongson_sysconf.cores_io_master);
+	return test_bit(cpu, loongson_sysconf.cores_io_master);
 }
 
 #endif /* _ASM_BOOTINFO_H */
diff --git a/arch/loongarch/include/asm/crash_core.h b/arch/loongarch/include/asm/crash_core.h
new file mode 100644
index 000000000000..218bdbfa527b
--- /dev/null
+++ b/arch/loongarch/include/asm/crash_core.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _LOONGARCH_CRASH_CORE_H
+#define _LOONGARCH_CRASH_CORE_H
+
+#define CRASH_ALIGN			SZ_2M
+
+#define CRASH_ADDR_LOW_MAX		SZ_4G
+#define CRASH_ADDR_HIGH_MAX		memblock_end_of_DRAM()
+
+extern phys_addr_t memblock_end_of_DRAM(void);
+
+#endif
diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h
index 9b16a3b8e706..f16bd42456e4 100644
--- a/arch/loongarch/include/asm/elf.h
+++ b/arch/loongarch/include/asm/elf.h
@@ -241,8 +241,6 @@ void loongarch_dump_regs64(u64 *uregs, const struct pt_regs *regs);
 do {									\
 	current->thread.vdso = &vdso_info;				\
 									\
-	loongarch_set_personality_fcsr(state);				\
-									\
 	if (personality(current->personality) != PER_LINUX)		\
 		set_personality(PER_LINUX);				\
 } while (0)
@@ -259,7 +257,6 @@ do {									\
 	clear_thread_flag(TIF_32BIT_ADDR);				\
 									\
 	current->thread.vdso = &vdso_info;				\
-	loongarch_set_personality_fcsr(state);				\
 									\
 	p = personality(current->personality);				\
 	if (p != PER_LINUX32 && p != PER_LINUX)				\
@@ -340,6 +337,4 @@ extern int arch_elf_pt_proc(void *ehdr, void *phdr, struct file *elf,
 extern int arch_check_elf(void *ehdr, bool has_interpreter, void *interp_ehdr,
 			  struct arch_elf_state *state);
 
-extern void loongarch_set_personality_fcsr(struct arch_elf_state *state);
-
 #endif /* _ASM_ELF_H */
diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h
index a11996eb5892..de891c2c83d4 100644
--- a/arch/loongarch/include/asm/ftrace.h
+++ b/arch/loongarch/include/asm/ftrace.h
@@ -63,7 +63,7 @@ ftrace_regs_get_instruction_pointer(struct ftrace_regs *fregs)
 static __always_inline void
 ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long ip)
 {
-	regs_set_return_value(&fregs->regs, ip);
+	instruction_pointer_set(&fregs->regs, ip);
 }
 
 #define ftrace_regs_get_argument(fregs, n) \
diff --git a/arch/loongarch/include/asm/shmparam.h b/arch/loongarch/include/asm/shmparam.h
deleted file mode 100644
index c9554f48d2df..000000000000
--- a/arch/loongarch/include/asm/shmparam.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
- */
-#ifndef _ASM_SHMPARAM_H
-#define _ASM_SHMPARAM_H
-
-#define __ARCH_FORCE_SHMLBA	1
-
-#define	SHMLBA	SZ_64K		 /* attach addr a multiple of this */
-
-#endif /* _ASM_SHMPARAM_H */
diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c
index 8e00a754e548..b6b097bbf866 100644
--- a/arch/loongarch/kernel/acpi.c
+++ b/arch/loongarch/kernel/acpi.c
@@ -119,7 +119,7 @@ acpi_parse_eio_master(union acpi_subtable_headers *header, const unsigned long e
 		return -EINVAL;
 
 	core = eiointc->node * CORES_PER_EIO_NODE;
-	set_bit(core, &(loongson_sysconf.cores_io_master));
+	set_bit(core, loongson_sysconf.cores_io_master);
 
 	return 0;
 }
diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c
index acb5d3385675..000825406c1f 100644
--- a/arch/loongarch/kernel/efi.c
+++ b/arch/loongarch/kernel/efi.c
@@ -140,4 +140,6 @@ void __init efi_init(void)
 
 		early_memunmap(tbl, sizeof(*tbl));
 	}
+
+	efi_esrt_init();
 }
diff --git a/arch/loongarch/kernel/elf.c b/arch/loongarch/kernel/elf.c
index 183e94fc9c69..0fa81ced28dc 100644
--- a/arch/loongarch/kernel/elf.c
+++ b/arch/loongarch/kernel/elf.c
@@ -23,8 +23,3 @@ int arch_check_elf(void *_ehdr, bool has_interpreter, void *_interp_ehdr,
 {
 	return 0;
 }
-
-void loongarch_set_personality_fcsr(struct arch_elf_state *state)
-{
-	current->thread.fpu.fcsr = boot_cpu_data.fpu_csr0;
-}
diff --git a/arch/loongarch/kernel/env.c b/arch/loongarch/kernel/env.c
index 6b3bfb0092e6..2f1f5b08638f 100644
--- a/arch/loongarch/kernel/env.c
+++ b/arch/loongarch/kernel/env.c
@@ -5,13 +5,16 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 #include <linux/acpi.h>
+#include <linux/clk.h>
 #include <linux/efi.h>
 #include <linux/export.h>
 #include <linux/memblock.h>
+#include <linux/of_clk.h>
 #include <asm/early_ioremap.h>
 #include <asm/bootinfo.h>
 #include <asm/loongson.h>
 #include <asm/setup.h>
+#include <asm/time.h>
 
 u64 efi_system_table;
 struct loongson_system_configuration loongson_sysconf;
@@ -36,7 +39,16 @@ void __init init_environ(void)
 
 static int __init init_cpu_fullname(void)
 {
-	int cpu;
+	struct device_node *root;
+	int cpu, ret;
+	char *model;
+
+	/* Parsing cpuname from DTS model property */
+	root = of_find_node_by_path("/");
+	ret = of_property_read_string(root, "model", (const char **)&model);
+	of_node_put(root);
+	if (ret == 0)
+		loongson_sysconf.cpuname = strsep(&model, " ");
 
 	if (loongson_sysconf.cpuname && !strncmp(loongson_sysconf.cpuname, "Loongson", 8)) {
 		for (cpu = 0; cpu < NR_CPUS; cpu++)
@@ -46,6 +58,26 @@ static int __init init_cpu_fullname(void)
 }
 arch_initcall(init_cpu_fullname);
 
+static int __init fdt_cpu_clk_init(void)
+{
+	struct clk *clk;
+	struct device_node *np;
+
+	np = of_get_cpu_node(0, NULL);
+	if (!np)
+		return -ENODEV;
+
+	clk = of_clk_get(np, 0);
+	if (IS_ERR(clk))
+		return -ENODEV;
+
+	cpu_clock_freq = clk_get_rate(clk);
+	clk_put(clk);
+
+	return 0;
+}
+late_initcall(fdt_cpu_clk_init);
+
 static ssize_t boardinfo_show(struct kobject *kobj,
 			      struct kobj_attribute *attr, char *buf)
 {
diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
index 0ecab4216392..c4f7de2e2805 100644
--- a/arch/loongarch/kernel/head.S
+++ b/arch/loongarch/kernel/head.S
@@ -74,6 +74,11 @@ SYM_CODE_START(kernel_entry)			# kernel entry point
 	la.pcrel	t0, fw_arg2
 	st.d		a2, t0, 0
 
+#ifdef CONFIG_PAGE_SIZE_4KB
+	li.d		t0, 0
+	li.d		t1, CSR_STFILL
+	csrxchg		t0, t1, LOONGARCH_CSR_IMPCTL1
+#endif
 	/* KSave3 used for percpu base, initialized as 0 */
 	csrwr		zero, PERCPU_BASE_KS
 	/* GPR21 used for percpu base (runtime), initialized as 0 */
@@ -126,6 +131,11 @@ SYM_CODE_START(smpboot_entry)
 
 	JUMP_VIRT_ADDR	t0, t1
 
+#ifdef CONFIG_PAGE_SIZE_4KB
+	li.d		t0, 0
+	li.d		t1, CSR_STFILL
+	csrxchg		t0, t1, LOONGARCH_CSR_IMPCTL1
+#endif
 	/* Enable PG */
 	li.w		t0, 0xb0		# PLV=0, IE=0, PG=1
 	csrwr		t0, LOONGARCH_CSR_CRMD
diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
index 767d94cce0de..f2ff8b5d591e 100644
--- a/arch/loongarch/kernel/process.c
+++ b/arch/loongarch/kernel/process.c
@@ -85,6 +85,7 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
 	regs->csr_euen = euen;
 	lose_fpu(0);
 	lose_lbt(0);
+	current->thread.fpu.fcsr = boot_cpu_data.fpu_csr0;
 
 	clear_thread_flag(TIF_LSX_CTX_LIVE);
 	clear_thread_flag(TIF_LASX_CTX_LIVE);
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index d183a745fb85..edf2bba80130 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -252,38 +252,23 @@ static void __init arch_reserve_vmcore(void)
 #endif
 }
 
-/* 2MB alignment for crash kernel regions */
-#define CRASH_ALIGN	SZ_2M
-#define CRASH_ADDR_MAX	SZ_4G
-
-static void __init arch_parse_crashkernel(void)
+static void __init arch_reserve_crashkernel(void)
 {
-#ifdef CONFIG_KEXEC
 	int ret;
-	unsigned long long total_mem;
+	unsigned long long low_size = 0;
 	unsigned long long crash_base, crash_size;
+	char *cmdline = boot_command_line;
+	bool high = false;
 
-	total_mem = memblock_phys_mem_size();
-	ret = parse_crashkernel(boot_command_line, total_mem,
-				&crash_size, &crash_base,
-				NULL, NULL);
-	if (ret < 0 || crash_size <= 0)
+	if (!IS_ENABLED(CONFIG_KEXEC_CORE))
 		return;
 
-	if (crash_base <= 0) {
-		crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, CRASH_ALIGN, CRASH_ADDR_MAX);
-		if (!crash_base) {
-			pr_warn("crashkernel reservation failed - No suitable area found.\n");
-			return;
-		}
-	} else if (!memblock_phys_alloc_range(crash_size, CRASH_ALIGN, crash_base, crash_base + crash_size)) {
-		pr_warn("Invalid memory region reserved for crash kernel\n");
+	ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
+				&crash_size, &crash_base, &low_size, &high);
+	if (ret)
 		return;
-	}
 
-	crashk_res.start = crash_base;
-	crashk_res.end	 = crash_base + crash_size - 1;
-#endif
+	reserve_crashkernel_generic(cmdline, crash_size, crash_base, low_size, high);
 }
 
 static void __init fdt_setup(void)
@@ -295,8 +280,12 @@ static void __init fdt_setup(void)
 	if (acpi_os_get_root_pointer())
 		return;
 
-	/* Look for a device tree configuration table entry */
-	fdt_pointer = efi_fdt_pointer();
+	/* Prefer to use built-in dtb, checking its legality first. */
+	if (!fdt_check_header(__dtb_start))
+		fdt_pointer = __dtb_start;
+	else
+		fdt_pointer = efi_fdt_pointer(); /* Fallback to firmware dtb */
+
 	if (!fdt_pointer || fdt_check_header(fdt_pointer))
 		return;
 
@@ -330,7 +319,9 @@ static void __init bootcmdline_init(char **cmdline_p)
 		if (boot_command_line[0])
 			strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
 
-		strlcat(boot_command_line, init_command_line, COMMAND_LINE_SIZE);
+		if (!strstr(boot_command_line, init_command_line))
+			strlcat(boot_command_line, init_command_line, COMMAND_LINE_SIZE);
+
 		goto out;
 	}
 #endif
@@ -357,7 +348,7 @@ out:
 void __init platform_init(void)
 {
 	arch_reserve_vmcore();
-	arch_parse_crashkernel();
+	arch_reserve_crashkernel();
 
 #ifdef CONFIG_ACPI_TABLE_UPGRADE
 	acpi_table_upgrade();
@@ -467,15 +458,6 @@ static void __init resource_init(void)
 		request_resource(res, &data_resource);
 		request_resource(res, &bss_resource);
 	}
-
-#ifdef CONFIG_KEXEC
-	if (crashk_res.start < crashk_res.end) {
-		insert_resource(&iomem_resource, &crashk_res);
-		pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
-			(unsigned long)((crashk_res.end - crashk_res.start + 1) >> 20),
-			(unsigned long)(crashk_res.start  >> 20));
-	}
-#endif
 }
 
 static int __init add_legacy_isa_io(struct fwnode_handle *fwnode,
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 5bca12d16e06..a16e3dbe9f09 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -208,7 +208,7 @@ static void __init fdt_smp_setup(void)
 	}
 
 	loongson_sysconf.nr_cpus = num_processors;
-	set_bit(0, &(loongson_sysconf.cores_io_master));
+	set_bit(0, loongson_sysconf.cores_io_master);
 #endif
 }
 
@@ -216,6 +216,9 @@ void __init loongson_smp_setup(void)
 {
 	fdt_smp_setup();
 
+	if (loongson_sysconf.cores_per_package == 0)
+		loongson_sysconf.cores_per_package = num_processors;
+
 	cpu_data[0].core = cpu_logical_map(0) % loongson_sysconf.cores_per_package;
 	cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package;
 
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index 4fcd6cd6da23..e73323d759d0 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -201,6 +201,11 @@ bool bpf_jit_supports_kfunc_call(void)
 	return true;
 }
 
+bool bpf_jit_supports_far_kfunc_call(void)
+{
+	return true;
+}
+
 /* initialized on the first pass of build_body() */
 static int out_offset = -1;
 static int emit_bpf_tail_call(struct jit_ctx *ctx)
@@ -465,7 +470,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
 	const u8 dst = regmap[insn->dst_reg];
 	const s16 off = insn->off;
 	const s32 imm = insn->imm;
-	const u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm;
 	const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || BPF_CLASS(insn->code) == BPF_JMP32;
 
 	switch (code) {
@@ -923,8 +927,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
 
 	/* dst = imm64 */
 	case BPF_LD | BPF_IMM | BPF_DW:
+	{
+		const u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm;
+
 		move_imm(ctx, dst, imm64, is32);
 		return 1;
+	}
 
 	/* dst = *(size *)(src + off) */
 	case BPF_LDX | BPF_MEM | BPF_B:
diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig
index b51f738a39a0..4714074c8bd7 100644
--- a/arch/mips/configs/ip27_defconfig
+++ b/arch/mips/configs/ip27_defconfig
@@ -287,7 +287,8 @@ CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 CONFIG_FUSE_FS=m
 CONFIG_CUSE=m
-CONFIG_FSCACHE=m
+CONFIG_NETFS_SUPPORT=m
+CONFIG_FSCACHE=y
 CONFIG_FSCACHE_STATS=y
 CONFIG_CACHEFILES=m
 CONFIG_PROC_KCORE=y
diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig
index 38f17b658421..3389e6e885d9 100644
--- a/arch/mips/configs/lemote2f_defconfig
+++ b/arch/mips/configs/lemote2f_defconfig
@@ -238,7 +238,8 @@ CONFIG_BTRFS_FS=m
 CONFIG_QUOTA=y
 CONFIG_QFMT_V2=m
 CONFIG_AUTOFS_FS=m
-CONFIG_FSCACHE=m
+CONFIG_NETFS_SUPPORT=m
+CONFIG_FSCACHE=y
 CONFIG_CACHEFILES=m
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig
index 07839a4b397e..78f498752066 100644
--- a/arch/mips/configs/loongson3_defconfig
+++ b/arch/mips/configs/loongson3_defconfig
@@ -356,7 +356,8 @@ CONFIG_QFMT_V2=m
 CONFIG_AUTOFS_FS=y
 CONFIG_FUSE_FS=m
 CONFIG_VIRTIO_FS=m
-CONFIG_FSCACHE=m
+CONFIG_NETFS_SUPPORT=m
+CONFIG_FSCACHE=y
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
 CONFIG_MSDOS_FS=m
diff --git a/arch/mips/configs/pic32mzda_defconfig b/arch/mips/configs/pic32mzda_defconfig
index 166d2ad372d1..54774f90c23e 100644
--- a/arch/mips/configs/pic32mzda_defconfig
+++ b/arch/mips/configs/pic32mzda_defconfig
@@ -68,7 +68,8 @@ CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
 CONFIG_AUTOFS_FS=m
 CONFIG_FUSE_FS=m
-CONFIG_FSCACHE=m
+CONFIG_NETFS_SUPPORT=m
+CONFIG_FSCACHE=y
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
 CONFIG_ZISOFS=y
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 414b978b8010..b9fc064d38d2 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -859,6 +859,7 @@ config THREAD_SHIFT
 	int "Thread shift" if EXPERT
 	range 13 15
 	default "15" if PPC_256K_PAGES
+	default "15" if PPC_PSERIES || PPC_POWERNV
 	default "14" if PPC64
 	default "13"
 	help
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index b549499eb363..bffbd869a068 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -53,6 +53,7 @@ config RISCV
 	select ARCH_USE_MEMTEST
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_USES_CFI_TRAPS if CFI_CLANG
+	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU
 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT
@@ -66,9 +67,10 @@ config RISCV
 	select CLINT_TIMER if !MMU
 	select CLONE_BACKWARDS
 	select COMMON_CLK
-	select CPU_PM if CPU_IDLE || HIBERNATION
+	select CPU_PM if CPU_IDLE || HIBERNATION || SUSPEND
 	select EDAC_SUPPORT
 	select FRAME_POINTER if PERF_EVENTS || (FUNCTION_TRACER && !DYNAMIC_FTRACE)
+	select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if DYNAMIC_FTRACE
 	select GENERIC_ARCH_TOPOLOGY
 	select GENERIC_ATOMIC64 if !64BIT
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
@@ -115,6 +117,7 @@ config RISCV
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DMA_CONTIGUOUS if MMU
 	select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE)
+	select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 	select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
 	select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
 	select HAVE_FUNCTION_GRAPH_TRACER
@@ -142,6 +145,8 @@ config RISCV
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RETHOOK if !XIP_KERNEL
 	select HAVE_RSEQ
+	select HAVE_SAMPLE_FTRACE_DIRECT
+	select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
 	select HAVE_STACKPROTECTOR
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
@@ -183,6 +188,20 @@ config HAVE_SHADOW_CALL_STACK
 	# https://github.com/riscv-non-isa/riscv-elf-psabi-doc/commit/a484e843e6eeb51f0cb7b8819e50da6d2444d769
 	depends on $(ld-option,--no-relax-gp)
 
+config RISCV_USE_LINKER_RELAXATION
+	def_bool y
+	# https://github.com/llvm/llvm-project/commit/6611d58f5bbcbec77262d392e2923e1d680f6985
+	depends on !LD_IS_LLD || LLD_VERSION >= 150000
+
+# https://github.com/llvm/llvm-project/commit/bbc0f99f3bc96f1db16f649fc21dd18e5b0918f6
+config ARCH_HAS_BROKEN_DWARF5
+	def_bool y
+	depends on RISCV_USE_LINKER_RELAXATION
+	# https://github.com/llvm/llvm-project/commit/1df5ea29b43690b6622db2cad7b745607ca4de6a
+	depends on AS_IS_LLVM && AS_VERSION < 180000
+	# https://github.com/llvm/llvm-project/commit/7ffabb61a5569444b5ac9322e22e5471cc5e4a77
+	depends on LD_IS_LLD && LLD_VERSION < 180000
+
 config ARCH_MMAP_RND_BITS_MIN
 	default 18 if 64BIT
 	default 8
@@ -529,6 +548,28 @@ config RISCV_ISA_V_DEFAULT_ENABLE
 
 	  If you don't know what to do here, say Y.
 
+config RISCV_ISA_V_UCOPY_THRESHOLD
+	int "Threshold size for vectorized user copies"
+	depends on RISCV_ISA_V
+	default 768
+	help
+	  Prefer using vectorized copy_to_user()/copy_from_user() when the
+	  workload size exceeds this value.
+
+config RISCV_ISA_V_PREEMPTIVE
+	bool "Run kernel-mode Vector with kernel preemption"
+	depends on PREEMPTION
+	depends on RISCV_ISA_V
+	default y
+	help
+	  Usually, in-kernel SIMD routines are run with preemption disabled.
+	  Functions which envoke long running SIMD thus must yield core's
+	  vector unit to prevent blocking other tasks for too long.
+
+	  This config allows kernel to run SIMD without explicitly disable
+	  preemption. Enabling this config will result in higher memory
+	  consumption due to the allocation of per-task's kernel Vector context.
+
 config TOOLCHAIN_HAS_ZBB
 	bool
 	default y
@@ -655,6 +696,20 @@ config RISCV_MISALIGNED
 	  load/store for both kernel and userspace. When disable, misaligned
 	  accesses will generate SIGBUS in userspace and panic in kernel.
 
+config RISCV_EFFICIENT_UNALIGNED_ACCESS
+	bool "Assume the CPU supports fast unaligned memory accesses"
+	depends on NONPORTABLE
+	select DCACHE_WORD_ACCESS if MMU
+	select HAVE_EFFICIENT_UNALIGNED_ACCESS
+	help
+	  Say Y here if you want the kernel to assume that the CPU supports
+	  efficient unaligned memory accesses.  When enabled, this option
+	  improves the performance of the kernel on such CPUs.  However, the
+	  kernel will run much more slowly, or will not be able to run at all,
+	  on CPUs that do not support efficient unaligned memory accesses.
+
+	  If unsure what to do here, say N.
+
 endmenu # "Platform type"
 
 menu "Kernel features"
diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata
index f5c432b005e7..910ba8837add 100644
--- a/arch/riscv/Kconfig.errata
+++ b/arch/riscv/Kconfig.errata
@@ -98,6 +98,7 @@ config ERRATA_THEAD_CMO
 	depends on ERRATA_THEAD && MMU
 	select DMA_DIRECT_REMAP
 	select RISCV_DMA_NONCOHERENT
+	select RISCV_NONSTANDARD_CACHE_OPS
 	default y
 	help
 	  This will apply the cache management errata to handle the
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index a74be78678eb..0b7d109258e7 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -43,8 +43,7 @@ else
 	KBUILD_LDFLAGS += -melf32lriscv
 endif
 
-ifeq ($(CONFIG_LD_IS_LLD),y)
-ifeq ($(call test-lt, $(CONFIG_LLD_VERSION), 150000),y)
+ifndef CONFIG_RISCV_USE_LINKER_RELAXATION
 	KBUILD_CFLAGS += -mno-relax
 	KBUILD_AFLAGS += -mno-relax
 ifndef CONFIG_AS_IS_LLVM
@@ -52,7 +51,6 @@ ifndef CONFIG_AS_IS_LLVM
 	KBUILD_AFLAGS += -Wa,-mno-relax
 endif
 endif
-endif
 
 ifeq ($(CONFIG_SHADOW_CALL_STACK),y)
 	KBUILD_LDFLAGS += --no-relax-gp
@@ -108,7 +106,9 @@ KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax)
 # unaligned accesses.  While unaligned accesses are explicitly allowed in the
 # RISC-V ISA, they're emulated by machine mode traps on all extant
 # architectures.  It's faster to have GCC emit only aligned accesses.
+ifneq ($(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS),y)
 KBUILD_CFLAGS += $(call cc-option,-mstrict-align)
+endif
 
 ifeq ($(CONFIG_STACKPROTECTOR_PER_TASK),y)
 prepare: stack_protector_prepare
@@ -163,6 +163,8 @@ BOOT_TARGETS := Image Image.gz loader loader.bin xipImage vmlinuz.efi
 
 all:	$(notdir $(KBUILD_IMAGE))
 
+loader.bin: loader
+Image.gz loader vmlinuz.efi: Image
 $(BOOT_TARGETS): vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 	@$(kecho) '  Kernel: $(boot)/$@ is ready'
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index 905881282a7c..eaf34e871e30 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -149,6 +149,7 @@ CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_DW=y
 CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_SH_SCI=y
+CONFIG_SERIAL_EARLYCON_RISCV_SBI=y
 CONFIG_VIRTIO_CONSOLE=y
 CONFIG_HW_RANDOM=y
 CONFIG_HW_RANDOM_VIRTIO=y
diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
index 0554ed4bf087..b1c410bbc1ae 100644
--- a/arch/riscv/errata/thead/errata.c
+++ b/arch/riscv/errata/thead/errata.c
@@ -12,8 +12,10 @@
 #include <asm/alternative.h>
 #include <asm/cacheflush.h>
 #include <asm/cpufeature.h>
+#include <asm/dma-noncoherent.h>
 #include <asm/errata_list.h>
 #include <asm/hwprobe.h>
+#include <asm/io.h>
 #include <asm/patch.h>
 #include <asm/vendorid_list.h>
 
@@ -33,6 +35,69 @@ static bool errata_probe_pbmt(unsigned int stage,
 	return false;
 }
 
+/*
+ * th.dcache.ipa rs1 (invalidate, physical address)
+ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
+ *   0000001    01010      rs1       000      00000  0001011
+ * th.dcache.iva rs1 (invalidate, virtual address)
+ *   0000001    00110      rs1       000      00000  0001011
+ *
+ * th.dcache.cpa rs1 (clean, physical address)
+ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
+ *   0000001    01001      rs1       000      00000  0001011
+ * th.dcache.cva rs1 (clean, virtual address)
+ *   0000001    00101      rs1       000      00000  0001011
+ *
+ * th.dcache.cipa rs1 (clean then invalidate, physical address)
+ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
+ *   0000001    01011      rs1       000      00000  0001011
+ * th.dcache.civa rs1 (clean then invalidate, virtual address)
+ *   0000001    00111      rs1       000      00000  0001011
+ *
+ * th.sync.s (make sure all cache operations finished)
+ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
+ *   0000000    11001     00000      000      00000  0001011
+ */
+#define THEAD_INVAL_A0	".long 0x02a5000b"
+#define THEAD_CLEAN_A0	".long 0x0295000b"
+#define THEAD_FLUSH_A0	".long 0x02b5000b"
+#define THEAD_SYNC_S	".long 0x0190000b"
+
+#define THEAD_CMO_OP(_op, _start, _size, _cachesize)			\
+asm volatile("mv a0, %1\n\t"						\
+	     "j 2f\n\t"							\
+	     "3:\n\t"							\
+	     THEAD_##_op##_A0 "\n\t"					\
+	     "add a0, a0, %0\n\t"					\
+	     "2:\n\t"							\
+	     "bltu a0, %2, 3b\n\t"					\
+	     THEAD_SYNC_S						\
+	     : : "r"(_cachesize),					\
+		 "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)),	\
+		 "r"((unsigned long)(_start) + (_size))			\
+	     : "a0")
+
+static void thead_errata_cache_inv(phys_addr_t paddr, size_t size)
+{
+	THEAD_CMO_OP(INVAL, paddr, size, riscv_cbom_block_size);
+}
+
+static void thead_errata_cache_wback(phys_addr_t paddr, size_t size)
+{
+	THEAD_CMO_OP(CLEAN, paddr, size, riscv_cbom_block_size);
+}
+
+static void thead_errata_cache_wback_inv(phys_addr_t paddr, size_t size)
+{
+	THEAD_CMO_OP(FLUSH, paddr, size, riscv_cbom_block_size);
+}
+
+static const struct riscv_nonstd_cache_ops thead_errata_cmo_ops = {
+	.wback = &thead_errata_cache_wback,
+	.inv = &thead_errata_cache_inv,
+	.wback_inv = &thead_errata_cache_wback_inv,
+};
+
 static bool errata_probe_cmo(unsigned int stage,
 			     unsigned long arch_id, unsigned long impid)
 {
@@ -48,6 +113,7 @@ static bool errata_probe_cmo(unsigned int stage,
 	if (stage == RISCV_ALTERNATIVES_BOOT) {
 		riscv_cbom_block_size = L1_CACHE_BYTES;
 		riscv_noncoherent_supported();
+		riscv_noncoherent_register_cache_ops(&thead_errata_cmo_ops);
 	}
 
 	return true;
@@ -77,8 +143,7 @@ static u32 thead_errata_probe(unsigned int stage,
 	if (errata_probe_pbmt(stage, archid, impid))
 		cpu_req_errata |= BIT(ERRATA_THEAD_PBMT);
 
-	if (errata_probe_cmo(stage, archid, impid))
-		cpu_req_errata |= BIT(ERRATA_THEAD_CMO);
+	errata_probe_cmo(stage, archid, impid);
 
 	if (errata_probe_pmu(stage, archid, impid))
 		cpu_req_errata |= BIT(ERRATA_THEAD_PMU);
diff --git a/arch/riscv/include/asm/arch_hweight.h b/arch/riscv/include/asm/arch_hweight.h
new file mode 100644
index 000000000000..c20236a0725b
--- /dev/null
+++ b/arch/riscv/include/asm/arch_hweight.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Based on arch/x86/include/asm/arch_hweight.h
+ */
+
+#ifndef _ASM_RISCV_HWEIGHT_H
+#define _ASM_RISCV_HWEIGHT_H
+
+#include <asm/alternative-macros.h>
+#include <asm/hwcap.h>
+
+#if (BITS_PER_LONG == 64)
+#define CPOPW	"cpopw "
+#elif (BITS_PER_LONG == 32)
+#define CPOPW	"cpop "
+#else
+#error "Unexpected BITS_PER_LONG"
+#endif
+
+static __always_inline unsigned int __arch_hweight32(unsigned int w)
+{
+#ifdef CONFIG_RISCV_ISA_ZBB
+	asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+				      RISCV_ISA_EXT_ZBB, 1)
+			  : : : : legacy);
+
+	asm (".option push\n"
+	     ".option arch,+zbb\n"
+	     CPOPW "%0, %0\n"
+	     ".option pop\n"
+	     : "+r" (w) : :);
+
+	return w;
+
+legacy:
+#endif
+	return __sw_hweight32(w);
+}
+
+static inline unsigned int __arch_hweight16(unsigned int w)
+{
+	return __arch_hweight32(w & 0xffff);
+}
+
+static inline unsigned int __arch_hweight8(unsigned int w)
+{
+	return __arch_hweight32(w & 0xff);
+}
+
+#if BITS_PER_LONG == 64
+static __always_inline unsigned long __arch_hweight64(__u64 w)
+{
+# ifdef CONFIG_RISCV_ISA_ZBB
+	asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+				      RISCV_ISA_EXT_ZBB, 1)
+			  : : : : legacy);
+
+	asm (".option push\n"
+	     ".option arch,+zbb\n"
+	     "cpop %0, %0\n"
+	     ".option pop\n"
+	     : "+r" (w) : :);
+
+	return w;
+
+legacy:
+# endif
+	return __sw_hweight64(w);
+}
+#else /* BITS_PER_LONG == 64 */
+static inline unsigned long __arch_hweight64(__u64 w)
+{
+	return  __arch_hweight32((u32)w) +
+		__arch_hweight32((u32)(w >> 32));
+}
+#endif /* !(BITS_PER_LONG == 64) */
+
+#endif /* _ASM_RISCV_HWEIGHT_H */
diff --git a/arch/riscv/include/asm/archrandom.h b/arch/riscv/include/asm/archrandom.h
new file mode 100644
index 000000000000..5345360adfb9
--- /dev/null
+++ b/arch/riscv/include/asm/archrandom.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Kernel interface for the RISCV arch_random_* functions
+ *
+ * Copyright (c) 2023 Rivos Inc.
+ *
+ */
+
+#ifndef ASM_RISCV_ARCHRANDOM_H
+#define ASM_RISCV_ARCHRANDOM_H
+
+#include <asm/csr.h>
+#include <asm/processor.h>
+
+#define SEED_RETRY_LOOPS 100
+
+static inline bool __must_check csr_seed_long(unsigned long *v)
+{
+	unsigned int retry = SEED_RETRY_LOOPS, valid_seeds = 0;
+	const int needed_seeds = sizeof(long) / sizeof(u16);
+	u16 *entropy = (u16 *)v;
+
+	do {
+		/*
+		 * The SEED CSR must be accessed with a read-write instruction.
+		 */
+		unsigned long csr_seed = csr_swap(CSR_SEED, 0);
+		unsigned long opst = csr_seed & SEED_OPST_MASK;
+
+		switch (opst) {
+		case SEED_OPST_ES16:
+			entropy[valid_seeds++] = csr_seed & SEED_ENTROPY_MASK;
+			if (valid_seeds == needed_seeds)
+				return true;
+			break;
+
+		case SEED_OPST_DEAD:
+			pr_err_once("archrandom: Unrecoverable error\n");
+			return false;
+
+		case SEED_OPST_BIST:
+		case SEED_OPST_WAIT:
+		default:
+			cpu_relax();
+			continue;
+		}
+	} while (--retry);
+
+	return false;
+}
+
+static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs)
+{
+	return 0;
+}
+
+static inline size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs)
+{
+	if (!max_longs)
+		return 0;
+
+	/*
+	 * If Zkr is supported and csr_seed_long succeeds, we return one long
+	 * worth of entropy.
+	 */
+	if (riscv_has_extension_likely(RISCV_ISA_EXT_ZKR) && csr_seed_long(v))
+		return 1;
+
+	return 0;
+}
+
+#endif /* ASM_RISCV_ARCHRANDOM_H */
diff --git a/arch/riscv/include/asm/asm-extable.h b/arch/riscv/include/asm/asm-extable.h
index 00a96e7a9664..0c8bfd54fc4e 100644
--- a/arch/riscv/include/asm/asm-extable.h
+++ b/arch/riscv/include/asm/asm-extable.h
@@ -6,6 +6,7 @@
 #define EX_TYPE_FIXUP			1
 #define EX_TYPE_BPF			2
 #define EX_TYPE_UACCESS_ERR_ZERO	3
+#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD	4
 
 #ifdef CONFIG_MMU
 
@@ -47,6 +48,11 @@
 #define EX_DATA_REG_ZERO_SHIFT	5
 #define EX_DATA_REG_ZERO	GENMASK(9, 5)
 
+#define EX_DATA_REG_DATA_SHIFT	0
+#define EX_DATA_REG_DATA	GENMASK(4, 0)
+#define EX_DATA_REG_ADDR_SHIFT	5
+#define EX_DATA_REG_ADDR	GENMASK(9, 5)
+
 #define EX_DATA_REG(reg, gpr)						\
 	"((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")"
 
@@ -62,6 +68,15 @@
 #define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err)			\
 	_ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero)
 
+#define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr)		\
+	__DEFINE_ASM_GPR_NUMS							\
+	__ASM_EXTABLE_RAW(#insn, #fixup,					\
+			  __stringify(EX_TYPE_LOAD_UNALIGNED_ZEROPAD),		\
+			  "("							\
+			    EX_DATA_REG(DATA, data) " | "			\
+			    EX_DATA_REG(ADDR, addr)				\
+			  ")")
+
 #endif /* __ASSEMBLY__ */
 
 #else /* CONFIG_MMU */
diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h
index 36b955c762ba..cd627ec289f1 100644
--- a/arch/riscv/include/asm/asm-prototypes.h
+++ b/arch/riscv/include/asm/asm-prototypes.h
@@ -9,6 +9,33 @@ long long __lshrti3(long long a, int b);
 long long __ashrti3(long long a, int b);
 long long __ashlti3(long long a, int b);
 
+#ifdef CONFIG_RISCV_ISA_V
+
+#ifdef CONFIG_MMU
+asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n);
+#endif /* CONFIG_MMU  */
+
+void xor_regs_2_(unsigned long bytes, unsigned long *__restrict p1,
+		 const unsigned long *__restrict p2);
+void xor_regs_3_(unsigned long bytes, unsigned long *__restrict p1,
+		 const unsigned long *__restrict p2,
+		 const unsigned long *__restrict p3);
+void xor_regs_4_(unsigned long bytes, unsigned long *__restrict p1,
+		 const unsigned long *__restrict p2,
+		 const unsigned long *__restrict p3,
+		 const unsigned long *__restrict p4);
+void xor_regs_5_(unsigned long bytes, unsigned long *__restrict p1,
+		 const unsigned long *__restrict p2,
+		 const unsigned long *__restrict p3,
+		 const unsigned long *__restrict p4,
+		 const unsigned long *__restrict p5);
+
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs);
+asmlinkage void riscv_v_context_nesting_end(struct pt_regs *regs);
+#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */
+
+#endif /* CONFIG_RISCV_ISA_V */
 
 #define DECLARE_DO_ERROR_INFO(name)	asmlinkage void name(struct pt_regs *regs)
 
diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
index 224b4dc02b50..9ffc35537024 100644
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -271,7 +271,9 @@ legacy:
 #include <asm-generic/bitops/fls64.h>
 #include <asm-generic/bitops/sched.h>
 
-#include <asm-generic/bitops/hweight.h>
+#include <asm/arch_hweight.h>
+
+#include <asm-generic/bitops/const_hweight.h>
 
 #if (BITS_PER_LONG == 64)
 #define __AMO(op)	"amo" #op ".d"
diff --git a/arch/riscv/include/asm/checksum.h b/arch/riscv/include/asm/checksum.h
new file mode 100644
index 000000000000..a5b60b54b101
--- /dev/null
+++ b/arch/riscv/include/asm/checksum.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Checksum routines
+ *
+ * Copyright (C) 2023 Rivos Inc.
+ */
+#ifndef __ASM_RISCV_CHECKSUM_H
+#define __ASM_RISCV_CHECKSUM_H
+
+#include <linux/in6.h>
+#include <linux/uaccess.h>
+
+#define ip_fast_csum ip_fast_csum
+
+extern unsigned int do_csum(const unsigned char *buff, int len);
+#define do_csum do_csum
+
+/* Default version is sufficient for 32 bit */
+#ifndef CONFIG_32BIT
+#define _HAVE_ARCH_IPV6_CSUM
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+			const struct in6_addr *daddr,
+			__u32 len, __u8 proto, __wsum sum);
+#endif
+
+/* Define riscv versions of functions before importing asm-generic/checksum.h */
+#include <asm-generic/checksum.h>
+
+/**
+ * Quickly compute an IP checksum with the assumption that IPv4 headers will
+ * always be in multiples of 32-bits, and have an ihl of at least 5.
+ *
+ * @ihl: the number of 32 bit segments and must be greater than or equal to 5.
+ * @iph: assumed to be word aligned given that NET_IP_ALIGN is set to 2 on
+ *  riscv, defining IP headers to be aligned.
+ */
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+	unsigned long csum = 0;
+	int pos = 0;
+
+	do {
+		csum += ((const unsigned int *)iph)[pos];
+		if (IS_ENABLED(CONFIG_32BIT))
+			csum += csum < ((const unsigned int *)iph)[pos];
+	} while (++pos < ihl);
+
+	/*
+	 * ZBB only saves three instructions on 32-bit and five on 64-bit so not
+	 * worth checking if supported without Alternatives.
+	 */
+	if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
+	    IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+		unsigned long fold_temp;
+
+		asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
+					      RISCV_ISA_EXT_ZBB, 1)
+		    :
+		    :
+		    :
+		    : no_zbb);
+
+		if (IS_ENABLED(CONFIG_32BIT)) {
+			asm(".option push				\n\
+			.option arch,+zbb				\n\
+				not	%[fold_temp], %[csum]		\n\
+				rori	%[csum], %[csum], 16		\n\
+				sub	%[csum], %[fold_temp], %[csum]	\n\
+			.option pop"
+			: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp));
+		} else {
+			asm(".option push				\n\
+			.option arch,+zbb				\n\
+				rori	%[fold_temp], %[csum], 32	\n\
+				add	%[csum], %[fold_temp], %[csum]	\n\
+				srli	%[csum], %[csum], 32		\n\
+				not	%[fold_temp], %[csum]		\n\
+				roriw	%[csum], %[csum], 16		\n\
+				subw	%[csum], %[fold_temp], %[csum]	\n\
+			.option pop"
+			: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp));
+		}
+		return (__force __sum16)(csum >> 16);
+	}
+no_zbb:
+#ifndef CONFIG_32BIT
+	csum += ror64(csum, 32);
+	csum >>= 32;
+#endif
+	return csum_fold((__force __wsum)csum);
+}
+
+#endif /* __ASM_RISCV_CHECKSUM_H */
diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
index fbdde8b8a47e..5a626ed2c47a 100644
--- a/arch/riscv/include/asm/cpufeature.h
+++ b/arch/riscv/include/asm/cpufeature.h
@@ -135,4 +135,6 @@ static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsi
 	return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
 }
 
+DECLARE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
+
 #endif
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 306a19a5509c..510014051f5d 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -411,6 +411,15 @@
 #define CSR_VTYPE		0xc21
 #define CSR_VLENB		0xc22
 
+/* Scalar Crypto Extension - Entropy */
+#define CSR_SEED		0x015
+#define SEED_OPST_MASK		_AC(0xC0000000, UL)
+#define SEED_OPST_BIST		_AC(0x00000000, UL)
+#define SEED_OPST_WAIT		_AC(0x40000000, UL)
+#define SEED_OPST_ES16		_AC(0x80000000, UL)
+#define SEED_OPST_DEAD		_AC(0xC0000000, UL)
+#define SEED_ENTROPY_MASK	_AC(0xFFFF, UL)
+
 #ifdef CONFIG_RISCV_M_MODE
 # define CSR_STATUS	CSR_MSTATUS
 # define CSR_IE		CSR_MIE
diff --git a/arch/riscv/include/asm/entry-common.h b/arch/riscv/include/asm/entry-common.h
index 7ab5e34318c8..2293e535f865 100644
--- a/arch/riscv/include/asm/entry-common.h
+++ b/arch/riscv/include/asm/entry-common.h
@@ -4,6 +4,23 @@
 #define _ASM_RISCV_ENTRY_COMMON_H
 
 #include <asm/stacktrace.h>
+#include <asm/thread_info.h>
+#include <asm/vector.h>
+
+static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
+						  unsigned long ti_work)
+{
+	if (ti_work & _TIF_RISCV_V_DEFER_RESTORE) {
+		clear_thread_flag(TIF_RISCV_V_DEFER_RESTORE);
+		/*
+		 * We are already called with irq disabled, so go without
+		 * keeping track of riscv_v_flags.
+		 */
+		riscv_v_vstate_restore(&current->thread.vstate, regs);
+	}
+}
+
+#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
 
 void handle_page_fault(struct pt_regs *regs);
 void handle_break(struct pt_regs *regs);
diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
index 83ed25e43553..ea33288f8a25 100644
--- a/arch/riscv/include/asm/errata_list.h
+++ b/arch/riscv/include/asm/errata_list.h
@@ -24,9 +24,8 @@
 
 #ifdef CONFIG_ERRATA_THEAD
 #define	ERRATA_THEAD_PBMT 0
-#define	ERRATA_THEAD_CMO 1
-#define	ERRATA_THEAD_PMU 2
-#define	ERRATA_THEAD_NUMBER 3
+#define	ERRATA_THEAD_PMU 1
+#define	ERRATA_THEAD_NUMBER 2
 #endif
 
 #ifdef __ASSEMBLY__
@@ -94,54 +93,17 @@ asm volatile(ALTERNATIVE(						\
 #define ALT_THEAD_PMA(_val)
 #endif
 
-/*
- * th.dcache.ipa rs1 (invalidate, physical address)
- * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
- *   0000001    01010      rs1       000      00000  0001011
- * th.dache.iva rs1 (invalida, virtual address)
- *   0000001    00110      rs1       000      00000  0001011
- *
- * th.dcache.cpa rs1 (clean, physical address)
- * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
- *   0000001    01001      rs1       000      00000  0001011
- * th.dcache.cva rs1 (clean, virtual address)
- *   0000001    00101      rs1       000      00000  0001011
- *
- * th.dcache.cipa rs1 (clean then invalidate, physical address)
- * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
- *   0000001    01011      rs1       000      00000  0001011
- * th.dcache.civa rs1 (... virtual address)
- *   0000001    00111      rs1       000      00000  0001011
- *
- * th.sync.s (make sure all cache operations finished)
- * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
- *   0000000    11001     00000      000      00000  0001011
- */
-#define THEAD_INVAL_A0	".long 0x0265000b"
-#define THEAD_CLEAN_A0	".long 0x0255000b"
-#define THEAD_FLUSH_A0	".long 0x0275000b"
-#define THEAD_SYNC_S	".long 0x0190000b"
-
 #define ALT_CMO_OP(_op, _start, _size, _cachesize)			\
-asm volatile(ALTERNATIVE_2(						\
-	__nops(6),							\
+asm volatile(ALTERNATIVE(						\
+	__nops(5),							\
 	"mv a0, %1\n\t"							\
 	"j 2f\n\t"							\
 	"3:\n\t"							\
 	CBO_##_op(a0)							\
 	"add a0, a0, %0\n\t"						\
 	"2:\n\t"							\
-	"bltu a0, %2, 3b\n\t"						\
-	"nop", 0, RISCV_ISA_EXT_ZICBOM, CONFIG_RISCV_ISA_ZICBOM,	\
-	"mv a0, %1\n\t"							\
-	"j 2f\n\t"							\
-	"3:\n\t"							\
-	THEAD_##_op##_A0 "\n\t"						\
-	"add a0, a0, %0\n\t"						\
-	"2:\n\t"							\
-	"bltu a0, %2, 3b\n\t"						\
-	THEAD_SYNC_S, THEAD_VENDOR_ID,					\
-			ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO)	\
+	"bltu a0, %2, 3b\n\t",						\
+	0, RISCV_ISA_EXT_ZICBOM, CONFIG_RISCV_ISA_ZICBOM)		\
 	: : "r"(_cachesize),						\
 	    "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)),	\
 	    "r"((unsigned long)(_start) + (_size))			\
diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h
index 2b2f5df7ef2c..329172122952 100644
--- a/arch/riscv/include/asm/ftrace.h
+++ b/arch/riscv/include/asm/ftrace.h
@@ -128,7 +128,23 @@ do {									\
 struct dyn_ftrace;
 int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
 #define ftrace_init_nop ftrace_init_nop
-#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+struct ftrace_ops;
+struct ftrace_regs;
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+		       struct ftrace_ops *op, struct ftrace_regs *fregs);
+#define ftrace_graph_func ftrace_graph_func
+
+static inline void __arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
+{
+		regs->t1 = addr;
+}
+#define arch_ftrace_set_direct_caller(fregs, addr) \
+	__arch_ftrace_set_direct_caller(&(fregs)->regs, addr)
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+
+#endif /* __ASSEMBLY__ */
 
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index e3ffef1c6119..0c94260b5d0c 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -865,7 +865,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
 #define TASK_SIZE_MIN	(PGDIR_SIZE_L3 * PTRS_PER_PGD / 2)
 
 #ifdef CONFIG_COMPAT
-#define TASK_SIZE_32	(_AC(0x80000000, UL) - PAGE_SIZE)
+#define TASK_SIZE_32	(_AC(0x80000000, UL))
 #define TASK_SIZE	(test_thread_flag(TIF_32BIT) ? \
 			 TASK_SIZE_32 : TASK_SIZE_64)
 #else
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index f19f861cda54..a8509cc31ab2 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -16,7 +16,7 @@
 
 #ifdef CONFIG_64BIT
 #define DEFAULT_MAP_WINDOW	(UL(1) << (MMAP_VA_BITS - 1))
-#define STACK_TOP_MAX		TASK_SIZE_64
+#define STACK_TOP_MAX		TASK_SIZE
 
 #define arch_get_mmap_end(addr, len, flags)			\
 ({								\
@@ -73,6 +73,43 @@
 struct task_struct;
 struct pt_regs;
 
+/*
+ * We use a flag to track in-kernel Vector context. Currently the flag has the
+ * following meaning:
+ *
+ *  - bit 0: indicates whether the in-kernel Vector context is active. The
+ *    activation of this state disables the preemption. On a non-RT kernel, it
+ *    also disable bh.
+ *  - bits 8: is used for tracking preemptible kernel-mode Vector, when
+ *    RISCV_ISA_V_PREEMPTIVE is enabled. Calling kernel_vector_begin() does not
+ *    disable the preemption if the thread's kernel_vstate.datap is allocated.
+ *    Instead, the kernel set this bit field. Then the trap entry/exit code
+ *    knows if we are entering/exiting the context that owns preempt_v.
+ *     - 0: the task is not using preempt_v
+ *     - 1: the task is actively using preempt_v. But whether does the task own
+ *          the preempt_v context is decided by bits in RISCV_V_CTX_DEPTH_MASK.
+ *  - bit 16-23 are RISCV_V_CTX_DEPTH_MASK, used by context tracking routine
+ *     when preempt_v starts:
+ *     - 0: the task is actively using, and own preempt_v context.
+ *     - non-zero: the task was using preempt_v, but then took a trap within.
+ *       Thus, the task does not own preempt_v. Any use of Vector will have to
+ *       save preempt_v, if dirty, and fallback to non-preemptible kernel-mode
+ *       Vector.
+ *  - bit 30: The in-kernel preempt_v context is saved, and requries to be
+ *    restored when returning to the context that owns the preempt_v.
+ *  - bit 31: The in-kernel preempt_v context is dirty, as signaled by the
+ *    trap entry code. Any context switches out-of current task need to save
+ *    it to the task's in-kernel V context. Also, any traps nesting on-top-of
+ *    preempt_v requesting to use V needs a save.
+ */
+#define RISCV_V_CTX_DEPTH_MASK		0x00ff0000
+
+#define RISCV_V_CTX_UNIT_DEPTH		0x00010000
+#define RISCV_KERNEL_MODE_V		0x00000001
+#define RISCV_PREEMPT_V			0x00000100
+#define RISCV_PREEMPT_V_DIRTY		0x80000000
+#define RISCV_PREEMPT_V_NEED_RESTORE	0x40000000
+
 /* CPU-specific state of a task */
 struct thread_struct {
 	/* Callee-saved registers */
@@ -81,9 +118,11 @@ struct thread_struct {
 	unsigned long s[12];	/* s[0]: frame pointer */
 	struct __riscv_d_ext_state fstate;
 	unsigned long bad_cause;
-	unsigned long vstate_ctrl;
+	u32 riscv_v_flags;
+	u32 vstate_ctrl;
 	struct __riscv_v_ext_state vstate;
 	unsigned long align_ctl;
+	struct __riscv_v_ext_state kernel_vstate;
 };
 
 /* Whitelist the fstate from the task_struct for hardened usercopy */
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index b6f898c56940..6e68f8dff76b 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -29,6 +29,7 @@ enum sbi_ext_id {
 	SBI_EXT_RFENCE = 0x52464E43,
 	SBI_EXT_HSM = 0x48534D,
 	SBI_EXT_SRST = 0x53525354,
+	SBI_EXT_SUSP = 0x53555350,
 	SBI_EXT_PMU = 0x504D55,
 	SBI_EXT_DBCN = 0x4442434E,
 	SBI_EXT_STA = 0x535441,
@@ -115,6 +116,14 @@ enum sbi_srst_reset_reason {
 	SBI_SRST_RESET_REASON_SYS_FAILURE,
 };
 
+enum sbi_ext_susp_fid {
+	SBI_EXT_SUSP_SYSTEM_SUSPEND = 0,
+};
+
+enum sbi_ext_susp_sleep_type {
+	SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM = 0,
+};
+
 enum sbi_ext_pmu_fid {
 	SBI_EXT_PMU_NUM_COUNTERS = 0,
 	SBI_EXT_PMU_COUNTER_GET_INFO,
@@ -288,8 +297,13 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
 			unsigned long arg3, unsigned long arg4,
 			unsigned long arg5);
 
+#ifdef CONFIG_RISCV_SBI_V01
 void sbi_console_putchar(int ch);
 int sbi_console_getchar(void);
+#else
+static inline void sbi_console_putchar(int ch) { }
+static inline int sbi_console_getchar(void) { return -ENOENT; }
+#endif
 long sbi_get_mvendorid(void);
 long sbi_get_marchid(void);
 long sbi_get_mimpid(void);
@@ -346,6 +360,11 @@ static inline unsigned long sbi_mk_version(unsigned long major,
 }
 
 int sbi_err_map_linux_errno(int err);
+
+extern bool sbi_debug_console_available;
+int sbi_debug_console_write(const char *bytes, unsigned int num_bytes);
+int sbi_debug_console_read(char *bytes, unsigned int num_bytes);
+
 #else /* CONFIG_RISCV_SBI */
 static inline int sbi_remote_fence_i(const struct cpumask *cpu_mask) { return -1; }
 static inline void sbi_init(void) {}
diff --git a/arch/riscv/include/asm/simd.h b/arch/riscv/include/asm/simd.h
new file mode 100644
index 000000000000..54efbf523d49
--- /dev/null
+++ b/arch/riscv/include/asm/simd.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef __ASM_SIMD_H
+#define __ASM_SIMD_H
+
+#include <linux/compiler.h>
+#include <linux/irqflags.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/types.h>
+#include <linux/thread_info.h>
+
+#include <asm/vector.h>
+
+#ifdef CONFIG_RISCV_ISA_V
+/*
+ * may_use_simd - whether it is allowable at this time to issue vector
+ *                instructions or access the vector register file
+ *
+ * Callers must not assume that the result remains true beyond the next
+ * preempt_enable() or return from softirq context.
+ */
+static __must_check inline bool may_use_simd(void)
+{
+	/*
+	 * RISCV_KERNEL_MODE_V is only set while preemption is disabled,
+	 * and is clear whenever preemption is enabled.
+	 */
+	if (in_hardirq() || in_nmi())
+		return false;
+
+	/*
+	 * Nesting is acheived in preempt_v by spreading the control for
+	 * preemptible and non-preemptible kernel-mode Vector into two fields.
+	 * Always try to match with prempt_v if kernel V-context exists. Then,
+	 * fallback to check non preempt_v if nesting happens, or if the config
+	 * is not set.
+	 */
+	if (IS_ENABLED(CONFIG_RISCV_ISA_V_PREEMPTIVE) && current->thread.kernel_vstate.datap) {
+		if (!riscv_preempt_v_started(current))
+			return true;
+	}
+	/*
+	 * Non-preemptible kernel-mode Vector temporarily disables bh. So we
+	 * must not return true on irq_disabled(). Otherwise we would fail the
+	 * lockdep check calling local_bh_enable()
+	 */
+	return !irqs_disabled() && !(riscv_v_flags() & RISCV_KERNEL_MODE_V);
+}
+
+#else /* ! CONFIG_RISCV_ISA_V */
+
+static __must_check inline bool may_use_simd(void)
+{
+	return false;
+}
+
+#endif /* ! CONFIG_RISCV_ISA_V */
+
+#endif
diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h
index f90d8e42f3c7..7efdb0584d47 100644
--- a/arch/riscv/include/asm/switch_to.h
+++ b/arch/riscv/include/asm/switch_to.h
@@ -53,8 +53,7 @@ static inline void __switch_to_fpu(struct task_struct *prev,
 	struct pt_regs *regs;
 
 	regs = task_pt_regs(prev);
-	if (unlikely(regs->status & SR_SD))
-		fstate_save(prev, regs);
+	fstate_save(prev, regs);
 	fstate_restore(next, task_pt_regs(next));
 }
 
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index 4856697c5f25..5d473343634b 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -102,12 +102,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
 #define TIF_NOTIFY_SIGNAL	9	/* signal notifications exist */
 #define TIF_UPROBE		10	/* uprobe breakpoint or singlestep */
 #define TIF_32BIT		11	/* compat-mode 32bit process */
+#define TIF_RISCV_V_DEFER_RESTORE	12 /* restore Vector before returing to user */
 
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
 #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
 #define _TIF_UPROBE		(1 << TIF_UPROBE)
+#define _TIF_RISCV_V_DEFER_RESTORE	(1 << TIF_RISCV_V_DEFER_RESTORE)
 
 #define _TIF_WORK_MASK \
 	(_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \
diff --git a/arch/riscv/include/asm/tlbbatch.h b/arch/riscv/include/asm/tlbbatch.h
new file mode 100644
index 000000000000..46014f70b9da
--- /dev/null
+++ b/arch/riscv/include/asm/tlbbatch.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+#ifndef _ASM_RISCV_TLBBATCH_H
+#define _ASM_RISCV_TLBBATCH_H
+
+#include <linux/cpumask.h>
+
+struct arch_tlbflush_unmap_batch {
+	struct cpumask cpumask;
+};
+
+#endif /* _ASM_RISCV_TLBBATCH_H */
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index a60416bbe190..928f096dca21 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -47,6 +47,14 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end);
 void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
 			unsigned long end);
 #endif
+
+bool arch_tlbbatch_should_defer(struct mm_struct *mm);
+void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+			       struct mm_struct *mm,
+			       unsigned long uaddr);
+void arch_flush_tlb_batched_pending(struct mm_struct *mm);
+void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
+
 #else /* CONFIG_SMP && CONFIG_MMU */
 
 #define flush_tlb_all() local_flush_tlb_all()
diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
index 87aaef656257..0cd6f0a027d1 100644
--- a/arch/riscv/include/asm/vector.h
+++ b/arch/riscv/include/asm/vector.h
@@ -22,6 +22,18 @@
 extern unsigned long riscv_v_vsize;
 int riscv_v_setup_vsize(void);
 bool riscv_v_first_use_handler(struct pt_regs *regs);
+void kernel_vector_begin(void);
+void kernel_vector_end(void);
+void get_cpu_vector_context(void);
+void put_cpu_vector_context(void);
+void riscv_v_thread_free(struct task_struct *tsk);
+void __init riscv_v_setup_ctx_cache(void);
+void riscv_v_thread_alloc(struct task_struct *tsk);
+
+static inline u32 riscv_v_flags(void)
+{
+	return READ_ONCE(current->thread.riscv_v_flags);
+}
 
 static __always_inline bool has_vector(void)
 {
@@ -162,36 +174,89 @@ static inline void riscv_v_vstate_discard(struct pt_regs *regs)
 	__riscv_v_vstate_dirty(regs);
 }
 
-static inline void riscv_v_vstate_save(struct task_struct *task,
+static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate,
 				       struct pt_regs *regs)
 {
 	if ((regs->status & SR_VS) == SR_VS_DIRTY) {
-		struct __riscv_v_ext_state *vstate = &task->thread.vstate;
-
 		__riscv_v_vstate_save(vstate, vstate->datap);
 		__riscv_v_vstate_clean(regs);
 	}
 }
 
-static inline void riscv_v_vstate_restore(struct task_struct *task,
+static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate,
 					  struct pt_regs *regs)
 {
 	if ((regs->status & SR_VS) != SR_VS_OFF) {
-		struct __riscv_v_ext_state *vstate = &task->thread.vstate;
-
 		__riscv_v_vstate_restore(vstate, vstate->datap);
 		__riscv_v_vstate_clean(regs);
 	}
 }
 
+static inline void riscv_v_vstate_set_restore(struct task_struct *task,
+					      struct pt_regs *regs)
+{
+	if ((regs->status & SR_VS) != SR_VS_OFF) {
+		set_tsk_thread_flag(task, TIF_RISCV_V_DEFER_RESTORE);
+		riscv_v_vstate_on(regs);
+	}
+}
+
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+static inline bool riscv_preempt_v_dirty(struct task_struct *task)
+{
+	return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V_DIRTY);
+}
+
+static inline bool riscv_preempt_v_restore(struct task_struct *task)
+{
+	return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V_NEED_RESTORE);
+}
+
+static inline void riscv_preempt_v_clear_dirty(struct task_struct *task)
+{
+	barrier();
+	task->thread.riscv_v_flags &= ~RISCV_PREEMPT_V_DIRTY;
+}
+
+static inline void riscv_preempt_v_set_restore(struct task_struct *task)
+{
+	barrier();
+	task->thread.riscv_v_flags |= RISCV_PREEMPT_V_NEED_RESTORE;
+}
+
+static inline bool riscv_preempt_v_started(struct task_struct *task)
+{
+	return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V);
+}
+
+#else /* !CONFIG_RISCV_ISA_V_PREEMPTIVE */
+static inline bool riscv_preempt_v_dirty(struct task_struct *task) { return false; }
+static inline bool riscv_preempt_v_restore(struct task_struct *task) { return false; }
+static inline bool riscv_preempt_v_started(struct task_struct *task) { return false; }
+#define riscv_preempt_v_clear_dirty(tsk)	do {} while (0)
+#define riscv_preempt_v_set_restore(tsk)	do {} while (0)
+#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */
+
 static inline void __switch_to_vector(struct task_struct *prev,
 				      struct task_struct *next)
 {
 	struct pt_regs *regs;
 
-	regs = task_pt_regs(prev);
-	riscv_v_vstate_save(prev, regs);
-	riscv_v_vstate_restore(next, task_pt_regs(next));
+	if (riscv_preempt_v_started(prev)) {
+		if (riscv_preempt_v_dirty(prev)) {
+			__riscv_v_vstate_save(&prev->thread.kernel_vstate,
+					      prev->thread.kernel_vstate.datap);
+			riscv_preempt_v_clear_dirty(prev);
+		}
+	} else {
+		regs = task_pt_regs(prev);
+		riscv_v_vstate_save(&prev->thread.vstate, regs);
+	}
+
+	if (riscv_preempt_v_started(next))
+		riscv_preempt_v_set_restore(next);
+	else
+		riscv_v_vstate_set_restore(next, task_pt_regs(next));
 }
 
 void riscv_v_vstate_ctrl_init(struct task_struct *tsk);
@@ -208,11 +273,14 @@ static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; }
 static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; }
 #define riscv_v_vsize (0)
 #define riscv_v_vstate_discard(regs)		do {} while (0)
-#define riscv_v_vstate_save(task, regs)		do {} while (0)
-#define riscv_v_vstate_restore(task, regs)	do {} while (0)
+#define riscv_v_vstate_save(vstate, regs)	do {} while (0)
+#define riscv_v_vstate_restore(vstate, regs)	do {} while (0)
 #define __switch_to_vector(__prev, __next)	do {} while (0)
 #define riscv_v_vstate_off(regs)		do {} while (0)
 #define riscv_v_vstate_on(regs)			do {} while (0)
+#define riscv_v_thread_free(tsk)		do {} while (0)
+#define  riscv_v_setup_ctx_cache()		do {} while (0)
+#define riscv_v_thread_alloc(tsk)		do {} while (0)
 
 #endif /* CONFIG_RISCV_ISA_V */
 
diff --git a/arch/riscv/include/asm/word-at-a-time.h b/arch/riscv/include/asm/word-at-a-time.h
index 7c086ac6ecd4..f3f031e34191 100644
--- a/arch/riscv/include/asm/word-at-a-time.h
+++ b/arch/riscv/include/asm/word-at-a-time.h
@@ -9,6 +9,7 @@
 #define _ASM_RISCV_WORD_AT_A_TIME_H
 
 
+#include <asm/asm-extable.h>
 #include <linux/kernel.h>
 
 struct word_at_a_time {
@@ -45,4 +46,30 @@ static inline unsigned long find_zero(unsigned long mask)
 /* The mask we created is directly usable as a bytemask */
 #define zero_bytemask(mask) (mask)
 
+#ifdef CONFIG_DCACHE_WORD_ACCESS
+
+/*
+ * Load an unaligned word from kernel space.
+ *
+ * In the (very unlikely) case of the word being a page-crosser
+ * and the next page not being mapped, take the exception and
+ * return zeroes in the non-existing part.
+ */
+static inline unsigned long load_unaligned_zeropad(const void *addr)
+{
+	unsigned long ret;
+
+	/* Load word from unaligned pointer addr */
+	asm(
+	"1:	" REG_L " %0, %2\n"
+	"2:\n"
+	_ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(1b, 2b, %0, %1)
+	: "=&r" (ret)
+	: "r" (addr), "m" (*(unsigned long *)addr));
+
+	return ret;
+}
+
+#endif	/* CONFIG_DCACHE_WORD_ACCESS */
+
 #endif /* _ASM_RISCV_WORD_AT_A_TIME_H */
diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h
new file mode 100644
index 000000000000..96011861e46b
--- /dev/null
+++ b/arch/riscv/include/asm/xor.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 SiFive
+ */
+
+#include <linux/hardirq.h>
+#include <asm-generic/xor.h>
+#ifdef CONFIG_RISCV_ISA_V
+#include <asm/vector.h>
+#include <asm/switch_to.h>
+#include <asm/asm-prototypes.h>
+
+static void xor_vector_2(unsigned long bytes, unsigned long *__restrict p1,
+			 const unsigned long *__restrict p2)
+{
+	kernel_vector_begin();
+	xor_regs_2_(bytes, p1, p2);
+	kernel_vector_end();
+}
+
+static void xor_vector_3(unsigned long bytes, unsigned long *__restrict p1,
+			 const unsigned long *__restrict p2,
+			 const unsigned long *__restrict p3)
+{
+	kernel_vector_begin();
+	xor_regs_3_(bytes, p1, p2, p3);
+	kernel_vector_end();
+}
+
+static void xor_vector_4(unsigned long bytes, unsigned long *__restrict p1,
+			 const unsigned long *__restrict p2,
+			 const unsigned long *__restrict p3,
+			 const unsigned long *__restrict p4)
+{
+	kernel_vector_begin();
+	xor_regs_4_(bytes, p1, p2, p3, p4);
+	kernel_vector_end();
+}
+
+static void xor_vector_5(unsigned long bytes, unsigned long *__restrict p1,
+			 const unsigned long *__restrict p2,
+			 const unsigned long *__restrict p3,
+			 const unsigned long *__restrict p4,
+			 const unsigned long *__restrict p5)
+{
+	kernel_vector_begin();
+	xor_regs_5_(bytes, p1, p2, p3, p4, p5);
+	kernel_vector_end();
+}
+
+static struct xor_block_template xor_block_rvv = {
+	.name = "rvv",
+	.do_2 = xor_vector_2,
+	.do_3 = xor_vector_3,
+	.do_4 = xor_vector_4,
+	.do_5 = xor_vector_5
+};
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES           \
+	do {        \
+		xor_speed(&xor_block_8regs);    \
+		xor_speed(&xor_block_32regs);    \
+		if (has_vector()) { \
+			xor_speed(&xor_block_rvv);\
+		} \
+	} while (0)
+#endif
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index c92c623b311e..f71910718053 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -64,6 +64,7 @@ obj-$(CONFIG_MMU) += vdso.o vdso/
 obj-$(CONFIG_RISCV_MISALIGNED)	+= traps_misaligned.o
 obj-$(CONFIG_FPU)		+= fpu.o
 obj-$(CONFIG_RISCV_ISA_V)	+= vector.o
+obj-$(CONFIG_RISCV_ISA_V)	+= kernel_mode_vector.o
 obj-$(CONFIG_SMP)		+= smpboot.o
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_SMP)		+= cpu_ops.o
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index e32591e9da90..89920f84d0a3 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -8,8 +8,10 @@
 
 #include <linux/acpi.h>
 #include <linux/bitmap.h>
+#include <linux/cpu.h>
 #include <linux/cpuhotplug.h>
 #include <linux/ctype.h>
+#include <linux/jump_label.h>
 #include <linux/log2.h>
 #include <linux/memory.h>
 #include <linux/module.h>
@@ -44,6 +46,8 @@ struct riscv_isainfo hart_isa[NR_CPUS];
 /* Performance information */
 DEFINE_PER_CPU(long, misaligned_access_speed);
 
+static cpumask_t fast_misaligned_access;
+
 /**
  * riscv_isa_extension_base() - Get base extension word
  *
@@ -784,6 +788,16 @@ static int check_unaligned_access(void *param)
 		(speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
 
 	per_cpu(misaligned_access_speed, cpu) = speed;
+
+	/*
+	 * Set the value of fast_misaligned_access of a CPU. These operations
+	 * are atomic to avoid race conditions.
+	 */
+	if (speed == RISCV_HWPROBE_MISALIGNED_FAST)
+		cpumask_set_cpu(cpu, &fast_misaligned_access);
+	else
+		cpumask_clear_cpu(cpu, &fast_misaligned_access);
+
 	return 0;
 }
 
@@ -796,13 +810,69 @@ static void check_unaligned_access_nonboot_cpu(void *param)
 		check_unaligned_access(pages[cpu]);
 }
 
+DEFINE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
+
+static void modify_unaligned_access_branches(cpumask_t *mask, int weight)
+{
+	if (cpumask_weight(mask) == weight)
+		static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key);
+	else
+		static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key);
+}
+
+static void set_unaligned_access_static_branches_except_cpu(int cpu)
+{
+	/*
+	 * Same as set_unaligned_access_static_branches, except excludes the
+	 * given CPU from the result. When a CPU is hotplugged into an offline
+	 * state, this function is called before the CPU is set to offline in
+	 * the cpumask, and thus the CPU needs to be explicitly excluded.
+	 */
+
+	cpumask_t fast_except_me;
+
+	cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask);
+	cpumask_clear_cpu(cpu, &fast_except_me);
+
+	modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1);
+}
+
+static void set_unaligned_access_static_branches(void)
+{
+	/*
+	 * This will be called after check_unaligned_access_all_cpus so the
+	 * result of unaligned access speed for all CPUs will be available.
+	 *
+	 * To avoid the number of online cpus changing between reading
+	 * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be
+	 * held before calling this function.
+	 */
+
+	cpumask_t fast_and_online;
+
+	cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask);
+
+	modify_unaligned_access_branches(&fast_and_online, num_online_cpus());
+}
+
+static int lock_and_set_unaligned_access_static_branch(void)
+{
+	cpus_read_lock();
+	set_unaligned_access_static_branches();
+	cpus_read_unlock();
+
+	return 0;
+}
+
+arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
+
 static int riscv_online_cpu(unsigned int cpu)
 {
 	static struct page *buf;
 
 	/* We are already set since the last check */
 	if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
-		return 0;
+		goto exit;
 
 	buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
 	if (!buf) {
@@ -812,6 +882,17 @@ static int riscv_online_cpu(unsigned int cpu)
 
 	check_unaligned_access(buf);
 	__free_pages(buf, MISALIGNED_BUFFER_ORDER);
+
+exit:
+	set_unaligned_access_static_branches();
+
+	return 0;
+}
+
+static int riscv_offline_cpu(unsigned int cpu)
+{
+	set_unaligned_access_static_branches_except_cpu(cpu);
+
 	return 0;
 }
 
@@ -846,9 +927,12 @@ static int check_unaligned_access_all_cpus(void)
 	/* Check core 0. */
 	smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
 
-	/* Setup hotplug callback for any new CPUs that come online. */
+	/*
+	 * Setup hotplug callbacks for any new CPUs that come online or go
+	 * offline.
+	 */
 	cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
-				  riscv_online_cpu, NULL);
+				  riscv_online_cpu, riscv_offline_cpu);
 
 out:
 	unaligned_emulation_finish();
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 54ca4564a926..9d1a305d5508 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -83,6 +83,10 @@ SYM_CODE_START(handle_exception)
 	/* Load the kernel shadow call stack pointer if coming from userspace */
 	scs_load_current_if_task_changed s5
 
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+	move a0, sp
+	call riscv_v_context_nesting_start
+#endif
 	move a0, sp /* pt_regs */
 	la ra, ret_from_exception
 
@@ -138,6 +142,10 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
 	 */
 	csrw CSR_SCRATCH, tp
 1:
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+	move a0, sp
+	call riscv_v_context_nesting_end
+#endif
 	REG_L a0, PT_STATUS(sp)
 	/*
 	 * The current load reservation is effectively part of the processor's
diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index 03a6434a8cdd..f5aa24d9e1c1 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -178,32 +178,28 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 }
 
 #ifdef CONFIG_DYNAMIC_FTRACE
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+		       struct ftrace_ops *op, struct ftrace_regs *fregs)
+{
+	struct pt_regs *regs = arch_ftrace_get_regs(fregs);
+	unsigned long *parent = (unsigned long *)&regs->ra;
+
+	prepare_ftrace_return(parent, ip, frame_pointer(regs));
+}
+#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
 extern void ftrace_graph_call(void);
-extern void ftrace_graph_regs_call(void);
 int ftrace_enable_ftrace_graph_caller(void)
 {
-	int ret;
-
-	ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
-				    (unsigned long)&prepare_ftrace_return, true, true);
-	if (ret)
-		return ret;
-
-	return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
+	return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
 				    (unsigned long)&prepare_ftrace_return, true, true);
 }
 
 int ftrace_disable_ftrace_graph_caller(void)
 {
-	int ret;
-
-	ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
-				    (unsigned long)&prepare_ftrace_return, false, true);
-	if (ret)
-		return ret;
-
-	return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
+	return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
 				    (unsigned long)&prepare_ftrace_return, false, true);
 }
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
 #endif /* CONFIG_DYNAMIC_FTRACE */
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c
new file mode 100644
index 000000000000..6afe80c7f03a
--- /dev/null
+++ b/arch/riscv/kernel/kernel_mode_vector.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Catalin Marinas <catalin.marinas@arm.com>
+ * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2021 SiFive
+ */
+#include <linux/compiler.h>
+#include <linux/irqflags.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/types.h>
+
+#include <asm/vector.h>
+#include <asm/switch_to.h>
+#include <asm/simd.h>
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+#include <asm/asm-prototypes.h>
+#endif
+
+static inline void riscv_v_flags_set(u32 flags)
+{
+	WRITE_ONCE(current->thread.riscv_v_flags, flags);
+}
+
+static inline void riscv_v_start(u32 flags)
+{
+	int orig;
+
+	orig = riscv_v_flags();
+	BUG_ON((orig & flags) != 0);
+	riscv_v_flags_set(orig | flags);
+	barrier();
+}
+
+static inline void riscv_v_stop(u32 flags)
+{
+	int orig;
+
+	barrier();
+	orig = riscv_v_flags();
+	BUG_ON((orig & flags) == 0);
+	riscv_v_flags_set(orig & ~flags);
+}
+
+/*
+ * Claim ownership of the CPU vector context for use by the calling context.
+ *
+ * The caller may freely manipulate the vector context metadata until
+ * put_cpu_vector_context() is called.
+ */
+void get_cpu_vector_context(void)
+{
+	/*
+	 * disable softirqs so it is impossible for softirqs to nest
+	 * get_cpu_vector_context() when kernel is actively using Vector.
+	 */
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+		local_bh_disable();
+	else
+		preempt_disable();
+
+	riscv_v_start(RISCV_KERNEL_MODE_V);
+}
+
+/*
+ * Release the CPU vector context.
+ *
+ * Must be called from a context in which get_cpu_vector_context() was
+ * previously called, with no call to put_cpu_vector_context() in the
+ * meantime.
+ */
+void put_cpu_vector_context(void)
+{
+	riscv_v_stop(RISCV_KERNEL_MODE_V);
+
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+		local_bh_enable();
+	else
+		preempt_enable();
+}
+
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+static __always_inline u32 *riscv_v_flags_ptr(void)
+{
+	return &current->thread.riscv_v_flags;
+}
+
+static inline void riscv_preempt_v_set_dirty(void)
+{
+	*riscv_v_flags_ptr() |= RISCV_PREEMPT_V_DIRTY;
+}
+
+static inline void riscv_preempt_v_reset_flags(void)
+{
+	*riscv_v_flags_ptr() &= ~(RISCV_PREEMPT_V_DIRTY | RISCV_PREEMPT_V_NEED_RESTORE);
+}
+
+static inline void riscv_v_ctx_depth_inc(void)
+{
+	*riscv_v_flags_ptr() += RISCV_V_CTX_UNIT_DEPTH;
+}
+
+static inline void riscv_v_ctx_depth_dec(void)
+{
+	*riscv_v_flags_ptr() -= RISCV_V_CTX_UNIT_DEPTH;
+}
+
+static inline u32 riscv_v_ctx_get_depth(void)
+{
+	return *riscv_v_flags_ptr() & RISCV_V_CTX_DEPTH_MASK;
+}
+
+static int riscv_v_stop_kernel_context(void)
+{
+	if (riscv_v_ctx_get_depth() != 0 || !riscv_preempt_v_started(current))
+		return 1;
+
+	riscv_preempt_v_clear_dirty(current);
+	riscv_v_stop(RISCV_PREEMPT_V);
+	return 0;
+}
+
+static int riscv_v_start_kernel_context(bool *is_nested)
+{
+	struct __riscv_v_ext_state *kvstate, *uvstate;
+
+	kvstate = &current->thread.kernel_vstate;
+	if (!kvstate->datap)
+		return -ENOENT;
+
+	if (riscv_preempt_v_started(current)) {
+		WARN_ON(riscv_v_ctx_get_depth() == 0);
+		*is_nested = true;
+		get_cpu_vector_context();
+		if (riscv_preempt_v_dirty(current)) {
+			__riscv_v_vstate_save(kvstate, kvstate->datap);
+			riscv_preempt_v_clear_dirty(current);
+		}
+		riscv_preempt_v_set_restore(current);
+		return 0;
+	}
+
+	/* Transfer the ownership of V from user to kernel, then save */
+	riscv_v_start(RISCV_PREEMPT_V | RISCV_PREEMPT_V_DIRTY);
+	if ((task_pt_regs(current)->status & SR_VS) == SR_VS_DIRTY) {
+		uvstate = &current->thread.vstate;
+		__riscv_v_vstate_save(uvstate, uvstate->datap);
+	}
+	riscv_preempt_v_clear_dirty(current);
+	return 0;
+}
+
+/* low-level V context handling code, called with irq disabled */
+asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs)
+{
+	int depth;
+
+	if (!riscv_preempt_v_started(current))
+		return;
+
+	depth = riscv_v_ctx_get_depth();
+	if (depth == 0 && (regs->status & SR_VS) == SR_VS_DIRTY)
+		riscv_preempt_v_set_dirty();
+
+	riscv_v_ctx_depth_inc();
+}
+
+asmlinkage void riscv_v_context_nesting_end(struct pt_regs *regs)
+{
+	struct __riscv_v_ext_state *vstate = &current->thread.kernel_vstate;
+	u32 depth;
+
+	WARN_ON(!irqs_disabled());
+
+	if (!riscv_preempt_v_started(current))
+		return;
+
+	riscv_v_ctx_depth_dec();
+	depth = riscv_v_ctx_get_depth();
+	if (depth == 0) {
+		if (riscv_preempt_v_restore(current)) {
+			__riscv_v_vstate_restore(vstate, vstate->datap);
+			__riscv_v_vstate_clean(regs);
+			riscv_preempt_v_reset_flags();
+		}
+	}
+}
+#else
+#define riscv_v_start_kernel_context(nested)	(-ENOENT)
+#define riscv_v_stop_kernel_context()		(-ENOENT)
+#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */
+
+/*
+ * kernel_vector_begin(): obtain the CPU vector registers for use by the calling
+ * context
+ *
+ * Must not be called unless may_use_simd() returns true.
+ * Task context in the vector registers is saved back to memory as necessary.
+ *
+ * A matching call to kernel_vector_end() must be made before returning from the
+ * calling context.
+ *
+ * The caller may freely use the vector registers until kernel_vector_end() is
+ * called.
+ */
+void kernel_vector_begin(void)
+{
+	bool nested = false;
+
+	if (WARN_ON(!has_vector()))
+		return;
+
+	BUG_ON(!may_use_simd());
+
+	if (riscv_v_start_kernel_context(&nested)) {
+		get_cpu_vector_context();
+		riscv_v_vstate_save(&current->thread.vstate, task_pt_regs(current));
+	}
+
+	if (!nested)
+		riscv_v_vstate_set_restore(current, task_pt_regs(current));
+
+	riscv_v_enable();
+}
+EXPORT_SYMBOL_GPL(kernel_vector_begin);
+
+/*
+ * kernel_vector_end(): give the CPU vector registers back to the current task
+ *
+ * Must be called from a context in which kernel_vector_begin() was previously
+ * called, with no call to kernel_vector_end() in the meantime.
+ *
+ * The caller must not use the vector registers after this function is called,
+ * unless kernel_vector_begin() is called again in the meantime.
+ */
+void kernel_vector_end(void)
+{
+	if (WARN_ON(!has_vector()))
+		return;
+
+	riscv_v_disable();
+
+	if (riscv_v_stop_kernel_context())
+		put_cpu_vector_context();
+}
+EXPORT_SYMBOL_GPL(kernel_vector_end);
diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S
index 79dc81223238..b7561288e8da 100644
--- a/arch/riscv/kernel/mcount-dyn.S
+++ b/arch/riscv/kernel/mcount-dyn.S
@@ -57,31 +57,150 @@
 	.endm
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-	.macro SAVE_ALL
+
+/**
+* SAVE_ABI_REGS - save regs against the pt_regs struct
+*
+* @all: tell if saving all the regs
+*
+* If all is set, all the regs will be saved, otherwise only ABI
+* related regs (a0-a7,epc,ra and optional s0) will be saved.
+*
+* After the stack is established,
+*
+* 0(sp) stores the PC of the traced function which can be accessed
+* by &(fregs)->regs->epc in tracing function. Note that the real
+* function entry address should be computed with -FENTRY_RA_OFFSET.
+*
+* 8(sp) stores the function return address (i.e. parent IP) that
+* can be accessed by &(fregs)->regs->ra in tracing function.
+*
+* The other regs are saved at the respective localtion and accessed
+* by the respective pt_regs member.
+*
+* Here is the layout of stack for your reference.
+*
+* PT_SIZE_ON_STACK  ->  +++++++++
+*                       + ..... +
+*                       + t3-t6 +
+*                       + s2-s11+
+*                       + a0-a7 + --++++-> ftrace_caller saved
+*                       + s1    +   +
+*                       + s0    + --+
+*                       + t0-t2 +   +
+*                       + tp    +   +
+*                       + gp    +   +
+*                       + sp    +   +
+*                       + ra    + --+ // parent IP
+*               sp  ->  + epc   + --+ // PC
+*                       +++++++++
+**/
+	.macro SAVE_ABI_REGS, all=0
 	addi	sp, sp, -PT_SIZE_ON_STACK
 
-	REG_S t0,  PT_EPC(sp)
-	REG_S x1,  PT_RA(sp)
-	REG_S x2,  PT_SP(sp)
-	REG_S x3,  PT_GP(sp)
-	REG_S x4,  PT_TP(sp)
-	REG_S x5,  PT_T0(sp)
-	save_from_x6_to_x31
+	REG_S	t0,  PT_EPC(sp)
+	REG_S	x1,  PT_RA(sp)
+
+	// save the ABI regs
+
+	REG_S	x10, PT_A0(sp)
+	REG_S	x11, PT_A1(sp)
+	REG_S	x12, PT_A2(sp)
+	REG_S	x13, PT_A3(sp)
+	REG_S	x14, PT_A4(sp)
+	REG_S	x15, PT_A5(sp)
+	REG_S	x16, PT_A6(sp)
+	REG_S	x17, PT_A7(sp)
+
+	// save the leftover regs
+
+	.if \all == 1
+	REG_S	x2, PT_SP(sp)
+	REG_S	x3, PT_GP(sp)
+	REG_S	x4, PT_TP(sp)
+	REG_S	x5, PT_T0(sp)
+	REG_S	x6, PT_T1(sp)
+	REG_S	x7, PT_T2(sp)
+	REG_S	x8, PT_S0(sp)
+	REG_S	x9, PT_S1(sp)
+	REG_S	x18, PT_S2(sp)
+	REG_S	x19, PT_S3(sp)
+	REG_S	x20, PT_S4(sp)
+	REG_S	x21, PT_S5(sp)
+	REG_S	x22, PT_S6(sp)
+	REG_S	x23, PT_S7(sp)
+	REG_S	x24, PT_S8(sp)
+	REG_S	x25, PT_S9(sp)
+	REG_S	x26, PT_S10(sp)
+	REG_S	x27, PT_S11(sp)
+	REG_S	x28, PT_T3(sp)
+	REG_S	x29, PT_T4(sp)
+	REG_S	x30, PT_T5(sp)
+	REG_S	x31, PT_T6(sp)
+
+	// save s0 if FP_TEST defined
+
+	.else
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+	REG_S	x8, PT_S0(sp)
+#endif
+	.endif
 	.endm
 
-	.macro RESTORE_ALL
-	REG_L x1,  PT_RA(sp)
-	REG_L x2,  PT_SP(sp)
-	REG_L x3,  PT_GP(sp)
-	REG_L x4,  PT_TP(sp)
-	/* Restore t0 with PT_EPC */
-	REG_L x5,  PT_EPC(sp)
-	restore_from_x6_to_x31
+	.macro RESTORE_ABI_REGS, all=0
+	REG_L	t0, PT_EPC(sp)
+	REG_L	x1, PT_RA(sp)
+	REG_L	x10, PT_A0(sp)
+	REG_L	x11, PT_A1(sp)
+	REG_L	x12, PT_A2(sp)
+	REG_L	x13, PT_A3(sp)
+	REG_L	x14, PT_A4(sp)
+	REG_L	x15, PT_A5(sp)
+	REG_L	x16, PT_A6(sp)
+	REG_L	x17, PT_A7(sp)
 
+	.if \all == 1
+	REG_L	x2, PT_SP(sp)
+	REG_L	x3, PT_GP(sp)
+	REG_L	x4, PT_TP(sp)
+	REG_L	x6, PT_T1(sp)
+	REG_L	x7, PT_T2(sp)
+	REG_L	x8, PT_S0(sp)
+	REG_L	x9, PT_S1(sp)
+	REG_L	x18, PT_S2(sp)
+	REG_L	x19, PT_S3(sp)
+	REG_L	x20, PT_S4(sp)
+	REG_L	x21, PT_S5(sp)
+	REG_L	x22, PT_S6(sp)
+	REG_L	x23, PT_S7(sp)
+	REG_L	x24, PT_S8(sp)
+	REG_L	x25, PT_S9(sp)
+	REG_L	x26, PT_S10(sp)
+	REG_L	x27, PT_S11(sp)
+	REG_L	x28, PT_T3(sp)
+	REG_L	x29, PT_T4(sp)
+	REG_L	x30, PT_T5(sp)
+	REG_L	x31, PT_T6(sp)
+
+	.else
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+	REG_L	x8, PT_S0(sp)
+#endif
+	.endif
 	addi	sp, sp, PT_SIZE_ON_STACK
 	.endm
+
+	.macro PREPARE_ARGS
+	addi	a0, t0, -FENTRY_RA_OFFSET
+	la	a1, function_trace_op
+	REG_L	a2, 0(a1)
+	mv	a1, ra
+	mv	a3, sp
+	.endm
+
 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
 
+#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
 SYM_FUNC_START(ftrace_caller)
 	SAVE_ABI
 
@@ -105,34 +224,39 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
 	call	ftrace_stub
 #endif
 	RESTORE_ABI
-	jr t0
+	jr	t0
 SYM_FUNC_END(ftrace_caller)
 
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
 SYM_FUNC_START(ftrace_regs_caller)
-	SAVE_ALL
-
-	addi	a0, t0, -FENTRY_RA_OFFSET
-	la	a1, function_trace_op
-	REG_L	a2, 0(a1)
-	mv	a1, ra
-	mv	a3, sp
+	mv	t1, zero
+	SAVE_ABI_REGS 1
+	PREPARE_ARGS
 
 SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
 	call	ftrace_stub
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	addi	a0, sp, PT_RA
-	REG_L	a1, PT_EPC(sp)
-	addi	a1, a1, -FENTRY_RA_OFFSET
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
-	mv	a2, s0
-#endif
-SYM_INNER_LABEL(ftrace_graph_regs_call, SYM_L_GLOBAL)
+	RESTORE_ABI_REGS 1
+	bnez	t1, .Ldirect
+	jr	t0
+.Ldirect:
+	jr	t1
+SYM_FUNC_END(ftrace_regs_caller)
+
+SYM_FUNC_START(ftrace_caller)
+	SAVE_ABI_REGS 0
+	PREPARE_ARGS
+
+SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
 	call	ftrace_stub
-#endif
 
-	RESTORE_ALL
-	jr t0
-SYM_FUNC_END(ftrace_regs_caller)
+	RESTORE_ABI_REGS 0
+	jr	t0
+SYM_FUNC_END(ftrace_caller)
 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+SYM_CODE_START(ftrace_stub_direct_tramp)
+	jr	t0
+SYM_CODE_END(ftrace_stub_direct_tramp)
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 862834bb1d64..5e5a82644451 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -723,8 +723,8 @@ static int add_relocation_to_accumulate(struct module *me, int type,
 
 			if (!bucket) {
 				kfree(entry);
-				kfree(rel_head);
 				kfree(rel_head->rel_entry);
+				kfree(rel_head);
 				return -ENOMEM;
 			}
 
@@ -747,6 +747,10 @@ initialize_relocation_hashtable(unsigned int num_relocations,
 {
 	/* Can safely assume that bits is not greater than sizeof(long) */
 	unsigned long hashtable_size = roundup_pow_of_two(num_relocations);
+	/*
+	 * When hashtable_size == 1, hashtable_bits == 0.
+	 * This is valid because the hashing algorithm returns 0 in this case.
+	 */
 	unsigned int hashtable_bits = ilog2(hashtable_size);
 
 	/*
@@ -760,10 +764,10 @@ initialize_relocation_hashtable(unsigned int num_relocations,
 	hashtable_size <<= should_double_size;
 
 	*relocation_hashtable = kmalloc_array(hashtable_size,
-					      sizeof(*relocation_hashtable),
+					      sizeof(**relocation_hashtable),
 					      GFP_KERNEL);
 	if (!*relocation_hashtable)
-		return -ENOMEM;
+		return 0;
 
 	__hash_init(*relocation_hashtable, hashtable_size);
 
@@ -779,6 +783,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 	Elf_Sym *sym;
 	void *location;
 	unsigned int i, type;
+	unsigned int j_idx = 0;
 	Elf_Addr v;
 	int res;
 	unsigned int num_relocations = sechdrs[relsec].sh_size / sizeof(*rel);
@@ -789,8 +794,8 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 	hashtable_bits = initialize_relocation_hashtable(num_relocations,
 							 &relocation_hashtable);
 
-	if (hashtable_bits < 0)
-		return hashtable_bits;
+	if (!relocation_hashtable)
+		return -ENOMEM;
 
 	INIT_LIST_HEAD(&used_buckets_list);
 
@@ -829,9 +834,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 		v = sym->st_value + rel[i].r_addend;
 
 		if (type == R_RISCV_PCREL_LO12_I || type == R_RISCV_PCREL_LO12_S) {
-			unsigned int j;
+			unsigned int j = j_idx;
+			bool found = false;
 
-			for (j = 0; j < sechdrs[relsec].sh_size / sizeof(*rel); j++) {
+			do {
 				unsigned long hi20_loc =
 					sechdrs[sechdrs[relsec].sh_info].sh_addr
 					+ rel[j].r_offset;
@@ -860,16 +866,26 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 					hi20 = (offset + 0x800) & 0xfffff000;
 					lo12 = offset - hi20;
 					v = lo12;
+					found = true;
 
 					break;
 				}
-			}
-			if (j == sechdrs[relsec].sh_size / sizeof(*rel)) {
+
+				j++;
+				if (j > sechdrs[relsec].sh_size / sizeof(*rel))
+					j = 0;
+
+			} while (j_idx != j);
+
+			if (!found) {
 				pr_err(
 				  "%s: Can not find HI20 relocation information\n",
 				  me->name);
 				return -EINVAL;
 			}
+
+			/* Record the previous j-loop end index */
+			j_idx = j;
 		}
 
 		if (reloc_handlers[type].accumulate_handler)
diff --git a/arch/riscv/kernel/pi/cmdline_early.c b/arch/riscv/kernel/pi/cmdline_early.c
index 68e786c84c94..f6d4dedffb84 100644
--- a/arch/riscv/kernel/pi/cmdline_early.c
+++ b/arch/riscv/kernel/pi/cmdline_early.c
@@ -38,8 +38,7 @@ static char *get_early_cmdline(uintptr_t dtb_pa)
 	if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) ||
 	    IS_ENABLED(CONFIG_CMDLINE_FORCE) ||
 	    fdt_cmdline_size == 0 /* CONFIG_CMDLINE_FALLBACK */) {
-		strncat(early_cmdline, CONFIG_CMDLINE,
-			COMMAND_LINE_SIZE - fdt_cmdline_size);
+		strlcat(early_cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
 	}
 
 	return early_cmdline;
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 4f21d970a129..92922dbd5b5c 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -171,6 +171,7 @@ void flush_thread(void)
 	riscv_v_vstate_off(task_pt_regs(current));
 	kfree(current->thread.vstate.datap);
 	memset(&current->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
+	clear_tsk_thread_flag(current, TIF_RISCV_V_DEFER_RESTORE);
 #endif
 }
 
@@ -178,7 +179,7 @@ void arch_release_task_struct(struct task_struct *tsk)
 {
 	/* Free the vector context of datap. */
 	if (has_vector())
-		kfree(tsk->thread.vstate.datap);
+		riscv_v_thread_free(tsk);
 }
 
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
@@ -187,6 +188,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 	*dst = *src;
 	/* clear entire V context, including datap for a new task */
 	memset(&dst->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
+	memset(&dst->thread.kernel_vstate, 0, sizeof(struct __riscv_v_ext_state));
+	clear_tsk_thread_flag(dst, TIF_RISCV_V_DEFER_RESTORE);
 
 	return 0;
 }
@@ -221,7 +224,15 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 		childregs->a0 = 0; /* Return value of fork() */
 		p->thread.s[0] = 0;
 	}
+	p->thread.riscv_v_flags = 0;
+	if (has_vector())
+		riscv_v_thread_alloc(p);
 	p->thread.ra = (unsigned long)ret_from_fork;
 	p->thread.sp = (unsigned long)childregs; /* kernel sp */
 	return 0;
 }
+
+void __init arch_task_cache_init(void)
+{
+	riscv_v_setup_ctx_cache();
+}
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index 2afe460de16a..e8515aa9d80b 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -99,8 +99,11 @@ static int riscv_vr_get(struct task_struct *target,
 	 * Ensure the vector registers have been saved to the memory before
 	 * copying them to membuf.
 	 */
-	if (target == current)
-		riscv_v_vstate_save(current, task_pt_regs(current));
+	if (target == current) {
+		get_cpu_vector_context();
+		riscv_v_vstate_save(&current->thread.vstate, task_pt_regs(current));
+		put_cpu_vector_context();
+	}
 
 	ptrace_vstate.vstart = vstate->vstart;
 	ptrace_vstate.vl = vstate->vl;
diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
index 5a62ed1da453..e66e0999a800 100644
--- a/arch/riscv/kernel/sbi.c
+++ b/arch/riscv/kernel/sbi.c
@@ -7,6 +7,7 @@
 
 #include <linux/bits.h>
 #include <linux/init.h>
+#include <linux/mm.h>
 #include <linux/pm.h>
 #include <linux/reboot.h>
 #include <asm/sbi.h>
@@ -571,6 +572,66 @@ long sbi_get_mimpid(void)
 }
 EXPORT_SYMBOL_GPL(sbi_get_mimpid);
 
+bool sbi_debug_console_available;
+
+int sbi_debug_console_write(const char *bytes, unsigned int num_bytes)
+{
+	phys_addr_t base_addr;
+	struct sbiret ret;
+
+	if (!sbi_debug_console_available)
+		return -EOPNOTSUPP;
+
+	if (is_vmalloc_addr(bytes))
+		base_addr = page_to_phys(vmalloc_to_page(bytes)) +
+			    offset_in_page(bytes);
+	else
+		base_addr = __pa(bytes);
+	if (PAGE_SIZE < (offset_in_page(bytes) + num_bytes))
+		num_bytes = PAGE_SIZE - offset_in_page(bytes);
+
+	if (IS_ENABLED(CONFIG_32BIT))
+		ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_WRITE,
+				num_bytes, lower_32_bits(base_addr),
+				upper_32_bits(base_addr), 0, 0, 0);
+	else
+		ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_WRITE,
+				num_bytes, base_addr, 0, 0, 0, 0);
+
+	if (ret.error == SBI_ERR_FAILURE)
+		return -EIO;
+	return ret.error ? sbi_err_map_linux_errno(ret.error) : ret.value;
+}
+
+int sbi_debug_console_read(char *bytes, unsigned int num_bytes)
+{
+	phys_addr_t base_addr;
+	struct sbiret ret;
+
+	if (!sbi_debug_console_available)
+		return -EOPNOTSUPP;
+
+	if (is_vmalloc_addr(bytes))
+		base_addr = page_to_phys(vmalloc_to_page(bytes)) +
+			    offset_in_page(bytes);
+	else
+		base_addr = __pa(bytes);
+	if (PAGE_SIZE < (offset_in_page(bytes) + num_bytes))
+		num_bytes = PAGE_SIZE - offset_in_page(bytes);
+
+	if (IS_ENABLED(CONFIG_32BIT))
+		ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_READ,
+				num_bytes, lower_32_bits(base_addr),
+				upper_32_bits(base_addr), 0, 0, 0);
+	else
+		ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_READ,
+				num_bytes, base_addr, 0, 0, 0, 0);
+
+	if (ret.error == SBI_ERR_FAILURE)
+		return -EIO;
+	return ret.error ? sbi_err_map_linux_errno(ret.error) : ret.value;
+}
+
 void __init sbi_init(void)
 {
 	int ret;
@@ -612,6 +673,11 @@ void __init sbi_init(void)
 			sbi_srst_reboot_nb.priority = 192;
 			register_restart_handler(&sbi_srst_reboot_nb);
 		}
+		if ((sbi_spec_version >= sbi_mk_version(2, 0)) &&
+		    (sbi_probe_extension(SBI_EXT_DBCN) > 0)) {
+			pr_info("SBI DBCN extension detected\n");
+			sbi_debug_console_available = true;
+		}
 	} else {
 		__sbi_set_timer = __sbi_set_timer_v01;
 		__sbi_send_ipi	= __sbi_send_ipi_v01;
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index 33dfb5078301..501e66debf69 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -86,7 +86,10 @@ static long save_v_state(struct pt_regs *regs, void __user **sc_vec)
 	/* datap is designed to be 16 byte aligned for better performance */
 	WARN_ON(unlikely(!IS_ALIGNED((unsigned long)datap, 16)));
 
-	riscv_v_vstate_save(current, regs);
+	get_cpu_vector_context();
+	riscv_v_vstate_save(&current->thread.vstate, regs);
+	put_cpu_vector_context();
+
 	/* Copy everything of vstate but datap. */
 	err = __copy_to_user(&state->v_state, &current->thread.vstate,
 			     offsetof(struct __riscv_v_ext_state, datap));
@@ -134,7 +137,7 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec)
 	if (unlikely(err))
 		return err;
 
-	riscv_v_vstate_restore(current, regs);
+	riscv_v_vstate_set_restore(current, regs);
 
 	return err;
 }
diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c
index 3c89b8ec69c4..239509367e42 100644
--- a/arch/riscv/kernel/suspend.c
+++ b/arch/riscv/kernel/suspend.c
@@ -4,8 +4,12 @@
  * Copyright (c) 2022 Ventana Micro Systems Inc.
  */
 
+#define pr_fmt(fmt) "suspend: " fmt
+
 #include <linux/ftrace.h>
+#include <linux/suspend.h>
 #include <asm/csr.h>
+#include <asm/sbi.h>
 #include <asm/suspend.h>
 
 void suspend_save_csrs(struct suspend_context *context)
@@ -85,3 +89,43 @@ int cpu_suspend(unsigned long arg,
 
 	return rc;
 }
+
+#ifdef CONFIG_RISCV_SBI
+static int sbi_system_suspend(unsigned long sleep_type,
+			      unsigned long resume_addr,
+			      unsigned long opaque)
+{
+	struct sbiret ret;
+
+	ret = sbi_ecall(SBI_EXT_SUSP, SBI_EXT_SUSP_SYSTEM_SUSPEND,
+			sleep_type, resume_addr, opaque, 0, 0, 0);
+	if (ret.error)
+		return sbi_err_map_linux_errno(ret.error);
+
+	return ret.value;
+}
+
+static int sbi_system_suspend_enter(suspend_state_t state)
+{
+	return cpu_suspend(SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM, sbi_system_suspend);
+}
+
+static const struct platform_suspend_ops sbi_system_suspend_ops = {
+	.valid = suspend_valid_only_mem,
+	.enter = sbi_system_suspend_enter,
+};
+
+static int __init sbi_system_suspend_init(void)
+{
+	if (sbi_spec_version >= sbi_mk_version(2, 0) &&
+	    sbi_probe_extension(SBI_EXT_SUSP) > 0) {
+		pr_info("SBI SUSP extension detected\n");
+		if (IS_ENABLED(CONFIG_SUSPEND))
+			suspend_set_ops(&sbi_system_suspend_ops);
+	}
+
+	return 0;
+}
+
+arch_initcall(sbi_system_suspend_init);
+#endif /* CONFIG_RISCV_SBI */
diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
index 578b6292487e..6727d1d3b8f2 100644
--- a/arch/riscv/kernel/vector.c
+++ b/arch/riscv/kernel/vector.c
@@ -21,6 +21,10 @@
 #include <asm/bug.h>
 
 static bool riscv_v_implicit_uacc = IS_ENABLED(CONFIG_RISCV_ISA_V_DEFAULT_ENABLE);
+static struct kmem_cache *riscv_v_user_cachep;
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+static struct kmem_cache *riscv_v_kernel_cachep;
+#endif
 
 unsigned long riscv_v_vsize __read_mostly;
 EXPORT_SYMBOL_GPL(riscv_v_vsize);
@@ -47,6 +51,21 @@ int riscv_v_setup_vsize(void)
 	return 0;
 }
 
+void __init riscv_v_setup_ctx_cache(void)
+{
+	if (!has_vector())
+		return;
+
+	riscv_v_user_cachep = kmem_cache_create_usercopy("riscv_vector_ctx",
+							 riscv_v_vsize, 16, SLAB_PANIC,
+							 0, riscv_v_vsize, NULL);
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+	riscv_v_kernel_cachep = kmem_cache_create("riscv_vector_kctx",
+						  riscv_v_vsize, 16,
+						  SLAB_PANIC, NULL);
+#endif
+}
+
 static bool insn_is_vector(u32 insn_buf)
 {
 	u32 opcode = insn_buf & __INSN_OPCODE_MASK;
@@ -80,20 +99,37 @@ static bool insn_is_vector(u32 insn_buf)
 	return false;
 }
 
-static int riscv_v_thread_zalloc(void)
+static int riscv_v_thread_zalloc(struct kmem_cache *cache,
+				 struct __riscv_v_ext_state *ctx)
 {
 	void *datap;
 
-	datap = kzalloc(riscv_v_vsize, GFP_KERNEL);
+	datap = kmem_cache_zalloc(cache, GFP_KERNEL);
 	if (!datap)
 		return -ENOMEM;
 
-	current->thread.vstate.datap = datap;
-	memset(&current->thread.vstate, 0, offsetof(struct __riscv_v_ext_state,
-						    datap));
+	ctx->datap = datap;
+	memset(ctx, 0, offsetof(struct __riscv_v_ext_state, datap));
 	return 0;
 }
 
+void riscv_v_thread_alloc(struct task_struct *tsk)
+{
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+	riscv_v_thread_zalloc(riscv_v_kernel_cachep, &tsk->thread.kernel_vstate);
+#endif
+}
+
+void riscv_v_thread_free(struct task_struct *tsk)
+{
+	if (tsk->thread.vstate.datap)
+		kmem_cache_free(riscv_v_user_cachep, tsk->thread.vstate.datap);
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+	if (tsk->thread.kernel_vstate.datap)
+		kmem_cache_free(riscv_v_kernel_cachep, tsk->thread.kernel_vstate.datap);
+#endif
+}
+
 #define VSTATE_CTRL_GET_CUR(x) ((x) & PR_RISCV_V_VSTATE_CTRL_CUR_MASK)
 #define VSTATE_CTRL_GET_NEXT(x) (((x) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) >> 2)
 #define VSTATE_CTRL_MAKE_NEXT(x) (((x) << 2) & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK)
@@ -122,7 +158,8 @@ static inline void riscv_v_ctrl_set(struct task_struct *tsk, int cur, int nxt,
 	ctrl |= VSTATE_CTRL_MAKE_NEXT(nxt);
 	if (inherit)
 		ctrl |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
-	tsk->thread.vstate_ctrl = ctrl;
+	tsk->thread.vstate_ctrl &= ~PR_RISCV_V_VSTATE_CTRL_MASK;
+	tsk->thread.vstate_ctrl |= ctrl;
 }
 
 bool riscv_v_vstate_ctrl_user_allowed(void)
@@ -162,12 +199,12 @@ bool riscv_v_first_use_handler(struct pt_regs *regs)
 	 * context where VS has been off. So, try to allocate the user's V
 	 * context and resume execution.
 	 */
-	if (riscv_v_thread_zalloc()) {
+	if (riscv_v_thread_zalloc(riscv_v_user_cachep, &current->thread.vstate)) {
 		force_sig(SIGBUS);
 		return true;
 	}
 	riscv_v_vstate_on(regs);
-	riscv_v_vstate_restore(current, regs);
+	riscv_v_vstate_set_restore(current, regs);
 	return true;
 }
 
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 26cb2502ecf8..bd6e6c1b0497 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -6,8 +6,14 @@ lib-y			+= memmove.o
 lib-y			+= strcmp.o
 lib-y			+= strlen.o
 lib-y			+= strncmp.o
+lib-y			+= csum.o
+ifeq ($(CONFIG_MMU), y)
+lib-$(CONFIG_RISCV_ISA_V)	+= uaccess_vector.o
+endif
 lib-$(CONFIG_MMU)	+= uaccess.o
 lib-$(CONFIG_64BIT)	+= tishift.o
 lib-$(CONFIG_RISCV_ISA_ZICBOZ)	+= clear_page.o
 
 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+lib-$(CONFIG_RISCV_ISA_V)	+= xor.o
+lib-$(CONFIG_RISCV_ISA_V)	+= riscv_v_helpers.o
diff --git a/arch/riscv/lib/csum.c b/arch/riscv/lib/csum.c
new file mode 100644
index 000000000000..af3df5274ccb
--- /dev/null
+++ b/arch/riscv/lib/csum.c
@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Checksum library
+ *
+ * Influenced by arch/arm64/lib/csum.c
+ * Copyright (C) 2023 Rivos Inc.
+ */
+#include <linux/bitops.h>
+#include <linux/compiler.h>
+#include <linux/jump_label.h>
+#include <linux/kasan-checks.h>
+#include <linux/kernel.h>
+
+#include <asm/cpufeature.h>
+
+#include <net/checksum.h>
+
+/* Default version is sufficient for 32 bit */
+#ifndef CONFIG_32BIT
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+			const struct in6_addr *daddr,
+			__u32 len, __u8 proto, __wsum csum)
+{
+	unsigned int ulen, uproto;
+	unsigned long sum = (__force unsigned long)csum;
+
+	sum += (__force unsigned long)saddr->s6_addr32[0];
+	sum += (__force unsigned long)saddr->s6_addr32[1];
+	sum += (__force unsigned long)saddr->s6_addr32[2];
+	sum += (__force unsigned long)saddr->s6_addr32[3];
+
+	sum += (__force unsigned long)daddr->s6_addr32[0];
+	sum += (__force unsigned long)daddr->s6_addr32[1];
+	sum += (__force unsigned long)daddr->s6_addr32[2];
+	sum += (__force unsigned long)daddr->s6_addr32[3];
+
+	ulen = (__force unsigned int)htonl((unsigned int)len);
+	sum += ulen;
+
+	uproto = (__force unsigned int)htonl(proto);
+	sum += uproto;
+
+	/*
+	 * Zbb support saves 4 instructions, so not worth checking without
+	 * alternatives if supported
+	 */
+	if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
+	    IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+		unsigned long fold_temp;
+
+		/*
+		 * Zbb is likely available when the kernel is compiled with Zbb
+		 * support, so nop when Zbb is available and jump when Zbb is
+		 * not available.
+		 */
+		asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
+					      RISCV_ISA_EXT_ZBB, 1)
+				  :
+				  :
+				  :
+				  : no_zbb);
+		asm(".option push					\n\
+		.option arch,+zbb					\n\
+			rori	%[fold_temp], %[sum], 32		\n\
+			add	%[sum], %[fold_temp], %[sum]		\n\
+			srli	%[sum], %[sum], 32			\n\
+			not	%[fold_temp], %[sum]			\n\
+			roriw	%[sum], %[sum], 16			\n\
+			subw	%[sum], %[fold_temp], %[sum]		\n\
+		.option pop"
+		: [sum] "+r" (sum), [fold_temp] "=&r" (fold_temp));
+		return (__force __sum16)(sum >> 16);
+	}
+no_zbb:
+	sum += ror64(sum, 32);
+	sum >>= 32;
+	return csum_fold((__force __wsum)sum);
+}
+EXPORT_SYMBOL(csum_ipv6_magic);
+#endif /* !CONFIG_32BIT */
+
+#ifdef CONFIG_32BIT
+#define OFFSET_MASK 3
+#elif CONFIG_64BIT
+#define OFFSET_MASK 7
+#endif
+
+static inline __no_sanitize_address unsigned long
+do_csum_common(const unsigned long *ptr, const unsigned long *end,
+	       unsigned long data)
+{
+	unsigned int shift;
+	unsigned long csum = 0, carry = 0;
+
+	/*
+	 * Do 32-bit reads on RV32 and 64-bit reads otherwise. This should be
+	 * faster than doing 32-bit reads on architectures that support larger
+	 * reads.
+	 */
+	while (ptr < end) {
+		csum += data;
+		carry += csum < data;
+		data = *(ptr++);
+	}
+
+	/*
+	 * Perform alignment (and over-read) bytes on the tail if any bytes
+	 * leftover.
+	 */
+	shift = ((long)ptr - (long)end) * 8;
+#ifdef __LITTLE_ENDIAN
+	data = (data << shift) >> shift;
+#else
+	data = (data >> shift) << shift;
+#endif
+	csum += data;
+	carry += csum < data;
+	csum += carry;
+	csum += csum < carry;
+
+	return csum;
+}
+
+/*
+ * Algorithm accounts for buff being misaligned.
+ * If buff is not aligned, will over-read bytes but not use the bytes that it
+ * shouldn't. The same thing will occur on the tail-end of the read.
+ */
+static inline __no_sanitize_address unsigned int
+do_csum_with_alignment(const unsigned char *buff, int len)
+{
+	unsigned int offset, shift;
+	unsigned long csum, data;
+	const unsigned long *ptr, *end;
+
+	/*
+	 * Align address to closest word (double word on rv64) that comes before
+	 * buff. This should always be in the same page and cache line.
+	 * Directly call KASAN with the alignment we will be using.
+	 */
+	offset = (unsigned long)buff & OFFSET_MASK;
+	kasan_check_read(buff, len);
+	ptr = (const unsigned long *)(buff - offset);
+
+	/*
+	 * Clear the most significant bytes that were over-read if buff was not
+	 * aligned.
+	 */
+	shift = offset * 8;
+	data = *(ptr++);
+#ifdef __LITTLE_ENDIAN
+	data = (data >> shift) << shift;
+#else
+	data = (data << shift) >> shift;
+#endif
+	end = (const unsigned long *)(buff + len);
+	csum = do_csum_common(ptr, end, data);
+
+#ifdef CC_HAS_ASM_GOTO_TIED_OUTPUT
+	/*
+	 * Zbb support saves 6 instructions, so not worth checking without
+	 * alternatives if supported
+	 */
+	if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
+	    IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+		unsigned long fold_temp;
+
+		/*
+		 * Zbb is likely available when the kernel is compiled with Zbb
+		 * support, so nop when Zbb is available and jump when Zbb is
+		 * not available.
+		 */
+		asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
+					      RISCV_ISA_EXT_ZBB, 1)
+				  :
+				  :
+				  :
+				  : no_zbb);
+
+#ifdef CONFIG_32BIT
+		asm_volatile_goto(".option push			\n\
+		.option arch,+zbb				\n\
+			rori	%[fold_temp], %[csum], 16	\n\
+			andi	%[offset], %[offset], 1		\n\
+			add	%[csum], %[fold_temp], %[csum]	\n\
+			beq	%[offset], zero, %l[end]	\n\
+			rev8	%[csum], %[csum]		\n\
+		.option pop"
+			: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
+			: [offset] "r" (offset)
+			:
+			: end);
+
+		return (unsigned short)csum;
+#else /* !CONFIG_32BIT */
+		asm_volatile_goto(".option push			\n\
+		.option arch,+zbb				\n\
+			rori	%[fold_temp], %[csum], 32	\n\
+			add	%[csum], %[fold_temp], %[csum]	\n\
+			srli	%[csum], %[csum], 32		\n\
+			roriw	%[fold_temp], %[csum], 16	\n\
+			addw	%[csum], %[fold_temp], %[csum]	\n\
+			andi	%[offset], %[offset], 1		\n\
+			beq	%[offset], zero, %l[end]	\n\
+			rev8	%[csum], %[csum]		\n\
+		.option pop"
+			: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
+			: [offset] "r" (offset)
+			:
+			: end);
+
+		return (csum << 16) >> 48;
+#endif /* !CONFIG_32BIT */
+end:
+		return csum >> 16;
+	}
+no_zbb:
+#endif /* CC_HAS_ASM_GOTO_TIED_OUTPUT */
+#ifndef CONFIG_32BIT
+	csum += ror64(csum, 32);
+	csum >>= 32;
+#endif
+	csum = (u32)csum + ror32((u32)csum, 16);
+	if (offset & 1)
+		return (u16)swab32(csum);
+	return csum >> 16;
+}
+
+/*
+ * Does not perform alignment, should only be used if machine has fast
+ * misaligned accesses, or when buff is known to be aligned.
+ */
+static inline __no_sanitize_address unsigned int
+do_csum_no_alignment(const unsigned char *buff, int len)
+{
+	unsigned long csum, data;
+	const unsigned long *ptr, *end;
+
+	ptr = (const unsigned long *)(buff);
+	data = *(ptr++);
+
+	kasan_check_read(buff, len);
+
+	end = (const unsigned long *)(buff + len);
+	csum = do_csum_common(ptr, end, data);
+
+	/*
+	 * Zbb support saves 6 instructions, so not worth checking without
+	 * alternatives if supported
+	 */
+	if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
+	    IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+		unsigned long fold_temp;
+
+		/*
+		 * Zbb is likely available when the kernel is compiled with Zbb
+		 * support, so nop when Zbb is available and jump when Zbb is
+		 * not available.
+		 */
+		asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
+					      RISCV_ISA_EXT_ZBB, 1)
+				  :
+				  :
+				  :
+				  : no_zbb);
+
+#ifdef CONFIG_32BIT
+		asm (".option push				\n\
+		.option arch,+zbb				\n\
+			rori	%[fold_temp], %[csum], 16	\n\
+			add	%[csum], %[fold_temp], %[csum]	\n\
+		.option pop"
+			: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
+			:
+			: );
+
+#else /* !CONFIG_32BIT */
+		asm (".option push				\n\
+		.option arch,+zbb				\n\
+			rori	%[fold_temp], %[csum], 32	\n\
+			add	%[csum], %[fold_temp], %[csum]	\n\
+			srli	%[csum], %[csum], 32		\n\
+			roriw	%[fold_temp], %[csum], 16	\n\
+			addw	%[csum], %[fold_temp], %[csum]	\n\
+		.option pop"
+			: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
+			:
+			: );
+#endif /* !CONFIG_32BIT */
+		return csum >> 16;
+	}
+no_zbb:
+#ifndef CONFIG_32BIT
+	csum += ror64(csum, 32);
+	csum >>= 32;
+#endif
+	csum = (u32)csum + ror32((u32)csum, 16);
+	return csum >> 16;
+}
+
+/*
+ * Perform a checksum on an arbitrary memory address.
+ * Will do a light-weight address alignment if buff is misaligned, unless
+ * cpu supports fast misaligned accesses.
+ */
+unsigned int do_csum(const unsigned char *buff, int len)
+{
+	if (unlikely(len <= 0))
+		return 0;
+
+	/*
+	 * Significant performance gains can be seen by not doing alignment
+	 * on machines with fast misaligned accesses.
+	 *
+	 * There is some duplicate code between the "with_alignment" and
+	 * "no_alignment" implmentations, but the overlap is too awkward to be
+	 * able to fit in one function without introducing multiple static
+	 * branches. The largest chunk of overlap was delegated into the
+	 * do_csum_common function.
+	 */
+	if (static_branch_likely(&fast_misaligned_access_speed_key))
+		return do_csum_no_alignment(buff, len);
+
+	if (((unsigned long)buff & OFFSET_MASK) == 0)
+		return do_csum_no_alignment(buff, len);
+
+	return do_csum_with_alignment(buff, len);
+}
diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c
new file mode 100644
index 000000000000..be38a93cedae
--- /dev/null
+++ b/arch/riscv/lib/riscv_v_helpers.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2023 SiFive
+ * Author: Andy Chiu <andy.chiu@sifive.com>
+ */
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+#include <asm/vector.h>
+#include <asm/simd.h>
+
+#ifdef CONFIG_MMU
+#include <asm/asm-prototypes.h>
+#endif
+
+#ifdef CONFIG_MMU
+size_t riscv_v_usercopy_threshold = CONFIG_RISCV_ISA_V_UCOPY_THRESHOLD;
+int __asm_vector_usercopy(void *dst, void *src, size_t n);
+int fallback_scalar_usercopy(void *dst, void *src, size_t n);
+asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n)
+{
+	size_t remain, copied;
+
+	/* skip has_vector() check because it has been done by the asm  */
+	if (!may_use_simd())
+		goto fallback;
+
+	kernel_vector_begin();
+	remain = __asm_vector_usercopy(dst, src, n);
+	kernel_vector_end();
+
+	if (remain) {
+		copied = n - remain;
+		dst += copied;
+		src += copied;
+		n = remain;
+		goto fallback;
+	}
+
+	return remain;
+
+fallback:
+	return fallback_scalar_usercopy(dst, src, n);
+}
+#endif
diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index a9d356d6c03c..bc22c078aba8 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -3,6 +3,8 @@
 #include <asm/asm.h>
 #include <asm/asm-extable.h>
 #include <asm/csr.h>
+#include <asm/hwcap.h>
+#include <asm/alternative-macros.h>
 
 	.macro fixup op reg addr lbl
 100:
@@ -11,6 +13,13 @@
 	.endm
 
 SYM_FUNC_START(__asm_copy_to_user)
+#ifdef CONFIG_RISCV_ISA_V
+	ALTERNATIVE("j fallback_scalar_usercopy", "nop", 0, RISCV_ISA_EXT_v, CONFIG_RISCV_ISA_V)
+	REG_L	t0, riscv_v_usercopy_threshold
+	bltu	a2, t0, fallback_scalar_usercopy
+	tail enter_vector_usercopy
+#endif
+SYM_FUNC_START(fallback_scalar_usercopy)
 
 	/* Enable access to user memory */
 	li t6, SR_SUM
@@ -181,6 +190,7 @@ SYM_FUNC_START(__asm_copy_to_user)
 	sub a0, t5, a0
 	ret
 SYM_FUNC_END(__asm_copy_to_user)
+SYM_FUNC_END(fallback_scalar_usercopy)
 EXPORT_SYMBOL(__asm_copy_to_user)
 SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user)
 EXPORT_SYMBOL(__asm_copy_from_user)
diff --git a/arch/riscv/lib/uaccess_vector.S b/arch/riscv/lib/uaccess_vector.S
new file mode 100644
index 000000000000..51ab5588e9ff
--- /dev/null
+++ b/arch/riscv/lib/uaccess_vector.S
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/linkage.h>
+#include <asm-generic/export.h>
+#include <asm/asm.h>
+#include <asm/asm-extable.h>
+#include <asm/csr.h>
+
+#define pDst a0
+#define pSrc a1
+#define iNum a2
+
+#define iVL a3
+
+#define ELEM_LMUL_SETTING m8
+#define vData v0
+
+	.macro fixup op reg addr lbl
+100:
+	\op \reg, \addr
+	_asm_extable	100b, \lbl
+	.endm
+
+SYM_FUNC_START(__asm_vector_usercopy)
+	/* Enable access to user memory */
+	li	t6, SR_SUM
+	csrs	CSR_STATUS, t6
+
+loop:
+	vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma
+	fixup vle8.v vData, (pSrc), 10f
+	sub iNum, iNum, iVL
+	add pSrc, pSrc, iVL
+	fixup vse8.v vData, (pDst), 11f
+	add pDst, pDst, iVL
+	bnez iNum, loop
+
+	/* Exception fixup for vector load is shared with normal exit */
+10:
+	/* Disable access to user memory */
+	csrc	CSR_STATUS, t6
+	mv	a0, iNum
+	ret
+
+	/* Exception fixup code for vector store. */
+11:
+	/* Undo the subtraction after vle8.v */
+	add	iNum, iNum, iVL
+	/* Make sure the scalar fallback skip already processed bytes */
+	csrr	t2, CSR_VSTART
+	sub	iNum, iNum, t2
+	j	10b
+SYM_FUNC_END(__asm_vector_usercopy)
diff --git a/arch/riscv/lib/xor.S b/arch/riscv/lib/xor.S
new file mode 100644
index 000000000000..b28f2430e52f
--- /dev/null
+++ b/arch/riscv/lib/xor.S
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 SiFive
+ */
+#include <linux/linkage.h>
+#include <linux/export.h>
+#include <asm/asm.h>
+
+SYM_FUNC_START(xor_regs_2_)
+	vsetvli a3, a0, e8, m8, ta, ma
+	vle8.v v0, (a1)
+	vle8.v v8, (a2)
+	sub a0, a0, a3
+	vxor.vv v16, v0, v8
+	add a2, a2, a3
+	vse8.v v16, (a1)
+	add a1, a1, a3
+	bnez a0, xor_regs_2_
+	ret
+SYM_FUNC_END(xor_regs_2_)
+EXPORT_SYMBOL(xor_regs_2_)
+
+SYM_FUNC_START(xor_regs_3_)
+	vsetvli a4, a0, e8, m8, ta, ma
+	vle8.v v0, (a1)
+	vle8.v v8, (a2)
+	sub a0, a0, a4
+	vxor.vv v0, v0, v8
+	vle8.v v16, (a3)
+	add a2, a2, a4
+	vxor.vv v16, v0, v16
+	add a3, a3, a4
+	vse8.v v16, (a1)
+	add a1, a1, a4
+	bnez a0, xor_regs_3_
+	ret
+SYM_FUNC_END(xor_regs_3_)
+EXPORT_SYMBOL(xor_regs_3_)
+
+SYM_FUNC_START(xor_regs_4_)
+	vsetvli a5, a0, e8, m8, ta, ma
+	vle8.v v0, (a1)
+	vle8.v v8, (a2)
+	sub a0, a0, a5
+	vxor.vv v0, v0, v8
+	vle8.v v16, (a3)
+	add a2, a2, a5
+	vxor.vv v0, v0, v16
+	vle8.v v24, (a4)
+	add a3, a3, a5
+	vxor.vv v16, v0, v24
+	add a4, a4, a5
+	vse8.v v16, (a1)
+	add a1, a1, a5
+	bnez a0, xor_regs_4_
+	ret
+SYM_FUNC_END(xor_regs_4_)
+EXPORT_SYMBOL(xor_regs_4_)
+
+SYM_FUNC_START(xor_regs_5_)
+	vsetvli a6, a0, e8, m8, ta, ma
+	vle8.v v0, (a1)
+	vle8.v v8, (a2)
+	sub a0, a0, a6
+	vxor.vv v0, v0, v8
+	vle8.v v16, (a3)
+	add a2, a2, a6
+	vxor.vv v0, v0, v16
+	vle8.v v24, (a4)
+	add a3, a3, a6
+	vxor.vv v0, v0, v24
+	vle8.v v8, (a5)
+	add a4, a4, a6
+	vxor.vv v16, v0, v8
+	add a5, a5, a6
+	vse8.v v16, (a1)
+	add a1, a1, a6
+	bnez a0, xor_regs_5_
+	ret
+SYM_FUNC_END(xor_regs_5_)
+EXPORT_SYMBOL(xor_regs_5_)
diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c
index 35484d830fd6..dd1530af3ef1 100644
--- a/arch/riscv/mm/extable.c
+++ b/arch/riscv/mm/extable.c
@@ -27,6 +27,14 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex,
 	return true;
 }
 
+static inline unsigned long regs_get_gpr(struct pt_regs *regs, unsigned int offset)
+{
+	if (unlikely(!offset || offset > MAX_REG_OFFSET))
+		return 0;
+
+	return *(unsigned long *)((unsigned long)regs + offset);
+}
+
 static inline void regs_set_gpr(struct pt_regs *regs, unsigned int offset,
 				unsigned long val)
 {
@@ -50,6 +58,27 @@ static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex,
 	return true;
 }
 
+static bool
+ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex,
+				  struct pt_regs *regs)
+{
+	int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->data);
+	int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
+	unsigned long data, addr, offset;
+
+	addr = regs_get_gpr(regs, reg_addr * sizeof(unsigned long));
+
+	offset = addr & 0x7UL;
+	addr &= ~0x7UL;
+
+	data = *(unsigned long *)addr >> (offset * 8);
+
+	regs_set_gpr(regs, reg_data * sizeof(unsigned long), data);
+
+	regs->epc = get_ex_fixup(ex);
+	return true;
+}
+
 bool fixup_exception(struct pt_regs *regs)
 {
 	const struct exception_table_entry *ex;
@@ -65,6 +94,8 @@ bool fixup_exception(struct pt_regs *regs)
 		return ex_handler_bpf(ex, regs);
 	case EX_TYPE_UACCESS_ERR_ZERO:
 		return ex_handler_uaccess_err_zero(ex, regs);
+	case EX_TYPE_LOAD_UNALIGNED_ZEROPAD:
+		return ex_handler_load_unaligned_zeropad(ex, regs);
 	}
 
 	BUG();
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index a65937336cdc..32cad6a65ccd 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -1060,7 +1060,11 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 	kernel_map.virt_addr = KERNEL_LINK_ADDR + kernel_map.virt_offset;
 
 #ifdef CONFIG_XIP_KERNEL
+#ifdef CONFIG_64BIT
 	kernel_map.page_offset = PAGE_OFFSET_L3;
+#else
+	kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL);
+#endif
 	kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR;
 	kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom);
 
@@ -1387,10 +1391,29 @@ void __init misc_mem_init(void)
 }
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
+void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+			       unsigned long addr, unsigned long next)
+{
+	pmd_set_huge(pmd, virt_to_phys(p), PAGE_KERNEL);
+}
+
+int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node,
+				unsigned long addr, unsigned long next)
+{
+	vmemmap_verify((pte_t *)pmdp, node, addr, next);
+	return 1;
+}
+
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 			       struct vmem_altmap *altmap)
 {
-	return vmemmap_populate_basepages(start, end, node, NULL);
+	/*
+	 * Note that SPARSEMEM_VMEMMAP is only selected for rv64 and that we
+	 * can't use hugepage mappings for 2-level page table because in case of
+	 * memory hotplug, we are not able to update all the page tables with
+	 * the new PMDs.
+	 */
+	return vmemmap_populate_hugepages(start, end, node, NULL);
 }
 #endif
 
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 8aadc5f71c93..8d12b26f5ac3 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -98,29 +98,23 @@ static void __ipi_flush_tlb_range_asid(void *info)
 	local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid);
 }
 
-static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
-			      unsigned long size, unsigned long stride)
+static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid,
+			      unsigned long start, unsigned long size,
+			      unsigned long stride)
 {
 	struct flush_tlb_range_data ftd;
-	const struct cpumask *cmask;
-	unsigned long asid = FLUSH_TLB_NO_ASID;
 	bool broadcast;
 
-	if (mm) {
-		unsigned int cpuid;
+	if (cpumask_empty(cmask))
+		return;
 
-		cmask = mm_cpumask(mm);
-		if (cpumask_empty(cmask))
-			return;
+	if (cmask != cpu_online_mask) {
+		unsigned int cpuid;
 
 		cpuid = get_cpu();
 		/* check if the tlbflush needs to be sent to other CPUs */
 		broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
-
-		if (static_branch_unlikely(&use_asid_allocator))
-			asid = atomic_long_read(&mm->context.id) & asid_mask;
 	} else {
-		cmask = cpu_online_mask;
 		broadcast = true;
 	}
 
@@ -140,25 +134,34 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
 		local_flush_tlb_range_asid(start, size, stride, asid);
 	}
 
-	if (mm)
+	if (cmask != cpu_online_mask)
 		put_cpu();
 }
 
+static inline unsigned long get_mm_asid(struct mm_struct *mm)
+{
+	return static_branch_unlikely(&use_asid_allocator) ?
+			atomic_long_read(&mm->context.id) & asid_mask : FLUSH_TLB_NO_ASID;
+}
+
 void flush_tlb_mm(struct mm_struct *mm)
 {
-	__flush_tlb_range(mm, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
+	__flush_tlb_range(mm_cpumask(mm), get_mm_asid(mm),
+			  0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
 }
 
 void flush_tlb_mm_range(struct mm_struct *mm,
 			unsigned long start, unsigned long end,
 			unsigned int page_size)
 {
-	__flush_tlb_range(mm, start, end - start, page_size);
+	__flush_tlb_range(mm_cpumask(mm), get_mm_asid(mm),
+			  start, end - start, page_size);
 }
 
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
 {
-	__flush_tlb_range(vma->vm_mm, addr, PAGE_SIZE, PAGE_SIZE);
+	__flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
+			  addr, PAGE_SIZE, PAGE_SIZE);
 }
 
 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
@@ -190,18 +193,44 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 		}
 	}
 
-	__flush_tlb_range(vma->vm_mm, start, end - start, stride_size);
+	__flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
+			  start, end - start, stride_size);
 }
 
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
-	__flush_tlb_range(NULL, start, end - start, PAGE_SIZE);
+	__flush_tlb_range((struct cpumask *)cpu_online_mask, FLUSH_TLB_NO_ASID,
+			  start, end - start, PAGE_SIZE);
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
 			unsigned long end)
 {
-	__flush_tlb_range(vma->vm_mm, start, end - start, PMD_SIZE);
+	__flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
+			  start, end - start, PMD_SIZE);
 }
 #endif
+
+bool arch_tlbbatch_should_defer(struct mm_struct *mm)
+{
+	return true;
+}
+
+void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+			       struct mm_struct *mm,
+			       unsigned long uaddr)
+{
+	cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
+}
+
+void arch_flush_tlb_batched_pending(struct mm_struct *mm)
+{
+	flush_tlb_mm(mm);
+}
+
+void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
+{
+	__flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0,
+			  FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
+}
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 58dc64dd94a8..719a97e7edb2 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -795,6 +795,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
 	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
 	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
 	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
+	bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT;
 	void *orig_call = func_addr;
 	bool save_ret;
 	u32 insn;
@@ -878,7 +879,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
 
 	stack_size = round_up(stack_size, 16);
 
-	if (func_addr) {
+	if (!is_struct_ops) {
 		/* For the trampoline called from function entry,
 		 * the frame of traced function and the frame of
 		 * trampoline need to be considered.
@@ -998,7 +999,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
 
 	emit_ld(RV_REG_S1, -sreg_off, RV_REG_FP, ctx);
 
-	if (func_addr) {
+	if (!is_struct_ops) {
 		/* trampoline called from function entry */
 		emit_ld(RV_REG_T0, stack_size - 8, RV_REG_SP, ctx);
 		emit_ld(RV_REG_FP, stack_size - 16, RV_REG_SP, ctx);
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 67ba0157fbdb..cae2dd34fbb4 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -637,8 +637,9 @@ CONFIG_FUSE_FS=y
 CONFIG_CUSE=m
 CONFIG_VIRTIO_FS=m
 CONFIG_OVERLAY_FS=m
+CONFIG_NETFS_SUPPORT=m
 CONFIG_NETFS_STATS=y
-CONFIG_FSCACHE=m
+CONFIG_FSCACHE=y
 CONFIG_CACHEFILES=m
 CONFIG_ISO9660_FS=y
 CONFIG_JOLIET=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 4c2650c1fbdd..42b988873e54 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -622,8 +622,9 @@ CONFIG_FUSE_FS=y
 CONFIG_CUSE=m
 CONFIG_VIRTIO_FS=m
 CONFIG_OVERLAY_FS=m
+CONFIG_NETFS_SUPPORT=m
 CONFIG_NETFS_STATS=y
-CONFIG_FSCACHE=m
+CONFIG_FSCACHE=y
 CONFIG_CACHEFILES=m
 CONFIG_ISO9660_FS=y
 CONFIG_JOLIET=y
diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
index 0f279360838a..30d117f9ad7e 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -1220,7 +1220,7 @@ static int __init arch_setup(void)
 		lcdc_info.ch[0].num_modes		= ARRAY_SIZE(ecovec_dvi_modes);
 
 		/* No backlight */
-		gpio_backlight_data.fbdev = NULL;
+		gpio_backlight_data.dev = NULL;
 
 		gpio_set_value(GPIO_PTA2, 1);
 		gpio_set_value(GPIO_PTU1, 1);
diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig
index cf59b98446e4..7b427c17fbfe 100644
--- a/arch/sh/configs/sdk7786_defconfig
+++ b/arch/sh/configs/sdk7786_defconfig
@@ -171,7 +171,8 @@ CONFIG_BTRFS_FS=y
 CONFIG_AUTOFS_FS=m
 CONFIG_FUSE_FS=y
 CONFIG_CUSE=m
-CONFIG_FSCACHE=m
+CONFIG_NETFS_SUPPORT=m
+CONFIG_FSCACHE=y
 CONFIG_CACHEFILES=m
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
diff --git a/arch/sh/kernel/vsyscall/Makefile b/arch/sh/kernel/vsyscall/Makefile
index 6e8664448048..118744d349e2 100644
--- a/arch/sh/kernel/vsyscall/Makefile
+++ b/arch/sh/kernel/vsyscall/Makefile
@@ -1,11 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-y += vsyscall.o vsyscall-syscall.o vsyscall-syms.o
 
-$(obj)/vsyscall-syscall.o: \
-	$(foreach F,trapa,$(obj)/vsyscall-$F.so)
+$(obj)/vsyscall-syscall.o: $(obj)/vsyscall-trapa.so
 
 # Teach kbuild about targets
-targets += $(foreach F,trapa,vsyscall-$F.o vsyscall-$F.so)
+targets += vsyscall-trapa.o vsyscall-traps.so
 targets += vsyscall-note.o vsyscall.lds vsyscall-dummy.o
 
 # The DSO images are built using a special linker script
diff --git a/arch/sparc/kernel/pci_sabre.c b/arch/sparc/kernel/pci_sabre.c
index 3c38ca40a22b..a84598568300 100644
--- a/arch/sparc/kernel/pci_sabre.c
+++ b/arch/sparc/kernel/pci_sabre.c
@@ -13,7 +13,10 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
 
 #include <asm/apb.h>
 #include <asm/iommu.h>
@@ -456,7 +459,6 @@ static void sabre_pbm_init(struct pci_pbm_info *pbm,
 static const struct of_device_id sabre_match[];
 static int sabre_probe(struct platform_device *op)
 {
-	const struct of_device_id *match;
 	const struct linux_prom64_registers *pr_regs;
 	struct device_node *dp = op->dev.of_node;
 	struct pci_pbm_info *pbm;
@@ -466,8 +468,7 @@ static int sabre_probe(struct platform_device *op)
 	const u32 *vdma;
 	u64 clear_irq;
 
-	match = of_match_device(sabre_match, &op->dev);
-	hummingbird_p = match && (match->data != NULL);
+	hummingbird_p = (uintptr_t)device_get_match_data(&op->dev);
 	if (!hummingbird_p) {
 		struct device_node *cpu_dp;
 
diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c
index 23b47f7fdb1d..5d8dd4949586 100644
--- a/arch/sparc/kernel/pci_schizo.c
+++ b/arch/sparc/kernel/pci_schizo.c
@@ -11,7 +11,10 @@
 #include <linux/slab.h>
 #include <linux/export.h>
 #include <linux/interrupt.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
 #include <linux/numa.h>
 
 #include <asm/iommu.h>
@@ -1459,15 +1462,13 @@ out_err:
 	return err;
 }
 
-static const struct of_device_id schizo_match[];
 static int schizo_probe(struct platform_device *op)
 {
-	const struct of_device_id *match;
+	unsigned long chip_type = (unsigned long)device_get_match_data(&op->dev);
 
-	match = of_match_device(schizo_match, &op->dev);
-	if (!match)
+	if (!chip_type)
 		return -EINVAL;
-	return __schizo_init(op, (unsigned long)match->data);
+	return __schizo_init(op, chip_type);
 }
 
 /* The ordering of this table is very important.  Some Tomatillo
diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile
index d08c3a0443f3..7f5eedf1f5e0 100644
--- a/arch/sparc/vdso/Makefile
+++ b/arch/sparc/vdso/Makefile
@@ -3,9 +3,6 @@
 # Building vDSO images for sparc.
 #
 
-VDSO64-$(CONFIG_SPARC64)	:= y
-VDSOCOMPAT-$(CONFIG_COMPAT)	:= y
-
 # files to link into the vdso
 vobjs-y := vdso-note.o vclock_gettime.o
 
@@ -13,22 +10,15 @@ vobjs-y := vdso-note.o vclock_gettime.o
 obj-y				+= vma.o
 
 # vDSO images to build
-vdso_img-$(VDSO64-y)		+= 64
-vdso_img-$(VDSOCOMPAT-y)	+= 32
+obj-$(CONFIG_SPARC64)		+= vdso-image-64.o
+obj-$(CONFIG_COMPAT)		+= vdso-image-32.o
 
-vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
+vobjs := $(addprefix $(obj)/, $(vobjs-y))
 
 $(obj)/vdso.o: $(obj)/vdso.so
 
 targets += vdso.lds $(vobjs-y)
-
-# Build the vDSO image C files and link them in.
-vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o)
-vdso_img_cfiles := $(vdso_img-y:%=vdso-image-%.c)
-vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg)
-obj-y += $(vdso_img_objs)
-targets += $(vdso_img_cfiles)
-targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so)
+targets += $(foreach x, 32 64, vdso-image-$(x).c vdso$(x).so vdso$(x).so.dbg)
 
 CPPFLAGS_vdso.lds += -P -C
 
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index feef615e2c9c..c9a16fba58b9 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -336,7 +336,7 @@ int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes,
 	if (nr_vecs > BIO_MAX_VECS)
 		return -E2BIG;
 	if (nr_vecs > UIO_FASTIOV) {
-		bvec = kcalloc(sizeof(*bvec), nr_vecs, GFP_KERNEL);
+		bvec = kcalloc(nr_vecs, sizeof(*bvec), GFP_KERNEL);
 		if (!bvec)
 			return -ENOMEM;
 		pages = NULL;
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index e303fd317313..ff93c385ba5a 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -300,7 +300,7 @@ static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
  * @disk: gendisk the new blkg is associated with
  * @gfp_mask: allocation mask to use
  *
- * Allocate a new blkg assocating @blkcg and @q.
+ * Allocate a new blkg associating @blkcg and @disk.
  */
 static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
 				   gfp_t gfp_mask)
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 089fcb9cfce3..c8beec6d7df0 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -1261,7 +1261,7 @@ static void weight_updated(struct ioc_gq *iocg, struct ioc_now *now)
 static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now)
 {
 	struct ioc *ioc = iocg->ioc;
-	u64 last_period, cur_period;
+	u64 __maybe_unused last_period, cur_period;
 	u64 vtime, vtarget;
 	int i;
 
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 5cbeb9344f2f..94668e72ab09 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -479,23 +479,6 @@ out:
 	return res;
 }
 
-static int hctx_run_show(void *data, struct seq_file *m)
-{
-	struct blk_mq_hw_ctx *hctx = data;
-
-	seq_printf(m, "%lu\n", hctx->run);
-	return 0;
-}
-
-static ssize_t hctx_run_write(void *data, const char __user *buf, size_t count,
-			      loff_t *ppos)
-{
-	struct blk_mq_hw_ctx *hctx = data;
-
-	hctx->run = 0;
-	return count;
-}
-
 static int hctx_active_show(void *data, struct seq_file *m)
 {
 	struct blk_mq_hw_ctx *hctx = data;
@@ -624,7 +607,6 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
 	{"tags_bitmap", 0400, hctx_tags_bitmap_show},
 	{"sched_tags", 0400, hctx_sched_tags_show},
 	{"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show},
-	{"run", 0600, hctx_run_show, hctx_run_write},
 	{"active", 0400, hctx_active_show},
 	{"dispatch_busy", 0400, hctx_dispatch_busy_show},
 	{"type", 0400, hctx_type_show},
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 67c95f31b15b..451a2c1f1f32 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -324,8 +324,6 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
 	if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
 		return;
 
-	hctx->run++;
-
 	/*
 	 * A return of -EAGAIN is an indication that hctx->dispatch is not
 	 * empty and we must run again in order to avoid starving flushes.
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c11c97afa0bc..aa87fcfda1ec 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -772,11 +772,16 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
 		/*
 		 * Partial zone append completions cannot be supported as the
 		 * BIO fragments may end up not being written sequentially.
+		 * For such case, force the completed nbytes to be equal to
+		 * the BIO size so that bio_advance() sets the BIO remaining
+		 * size to 0 and we end up calling bio_endio() before returning.
 		 */
-		if (bio->bi_iter.bi_size != nbytes)
+		if (bio->bi_iter.bi_size != nbytes) {
 			bio->bi_status = BLK_STS_IOERR;
-		else
+			nbytes = bio->bi_iter.bi_size;
+		} else {
 			bio->bi_iter.bi_sector = rq->__sector;
+		}
 	}
 
 	bio_advance(bio, nbytes);
@@ -1860,6 +1865,22 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
 	__add_wait_queue(wq, wait);
 
 	/*
+	 * Add one explicit barrier since blk_mq_get_driver_tag() may
+	 * not imply barrier in case of failure.
+	 *
+	 * Order adding us to wait queue and allocating driver tag.
+	 *
+	 * The pair is the one implied in sbitmap_queue_wake_up() which
+	 * orders clearing sbitmap tag bits and waitqueue_active() in
+	 * __sbitmap_queue_wake_up(), since waitqueue_active() is lockless
+	 *
+	 * Otherwise, re-order of adding wait queue and getting driver tag
+	 * may cause __sbitmap_queue_wake_up() to wake up nothing because
+	 * the waitqueue_active() may not observe us in wait queue.
+	 */
+	smp_mb();
+
+	/*
 	 * It's possible that a tag was freed in the window between the
 	 * allocation failure and adding the hardware queue to the wait
 	 * queue.
@@ -2891,8 +2912,11 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
 	return NULL;
 }
 
-/* return true if this @rq can be used for @bio */
-static bool blk_mq_can_use_cached_rq(struct request *rq, struct blk_plug *plug,
+/*
+ * Check if we can use the passed on request for submitting the passed in bio,
+ * and remove it from the request list if it can be used.
+ */
+static bool blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
 		struct bio *bio)
 {
 	enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf);
@@ -2952,12 +2976,6 @@ void blk_mq_submit_bio(struct bio *bio)
 	blk_status_t ret;
 
 	bio = blk_queue_bounce(bio, q);
-	if (bio_may_exceed_limits(bio, &q->limits)) {
-		bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
-		if (!bio)
-			return;
-	}
-
 	bio_set_ioprio(bio);
 
 	if (plug) {
@@ -2966,16 +2984,26 @@ void blk_mq_submit_bio(struct bio *bio)
 			rq = NULL;
 	}
 	if (rq) {
+		if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
+			bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
+			if (!bio)
+				return;
+		}
 		if (!bio_integrity_prep(bio))
 			return;
 		if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
 			return;
-		if (blk_mq_can_use_cached_rq(rq, plug, bio))
+		if (blk_mq_use_cached_rq(rq, plug, bio))
 			goto done;
 		percpu_ref_get(&q->q_usage_counter);
 	} else {
 		if (unlikely(bio_queue_enter(bio)))
 			return;
+		if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
+			bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
+			if (!bio)
+				goto fail;
+		}
 		if (!bio_integrity_prep(bio))
 			goto fail;
 	}
diff --git a/block/ioprio.c b/block/ioprio.c
index b5a942519a79..73301a261429 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -139,32 +139,6 @@ out:
 	return ret;
 }
 
-/*
- * If the task has set an I/O priority, use that. Otherwise, return
- * the default I/O priority.
- *
- * Expected to be called for current task or with task_lock() held to keep
- * io_context stable.
- */
-int __get_task_ioprio(struct task_struct *p)
-{
-	struct io_context *ioc = p->io_context;
-	int prio;
-
-	if (p != current)
-		lockdep_assert_held(&p->alloc_lock);
-	if (ioc)
-		prio = ioc->ioprio;
-	else
-		prio = IOPRIO_DEFAULT;
-
-	if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE)
-		prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(p),
-					 task_nice_ioprio(p));
-	return prio;
-}
-EXPORT_SYMBOL_GPL(__get_task_ioprio);
-
 static int get_task_ioprio(struct task_struct *p)
 {
 	int ret;
diff --git a/block/partitions/core.c b/block/partitions/core.c
index e6ac73617f3e..cab0d76a828e 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -562,8 +562,8 @@ static bool blk_add_partition(struct gendisk *disk,
 	part = add_partition(disk, p, from, size, state->parts[p].flags,
 			     &state->parts[p].info);
 	if (IS_ERR(part) && PTR_ERR(part) != -ENXIO) {
-		printk(KERN_ERR " %s: p%d could not be added: %ld\n",
-		       disk->disk_name, p, -PTR_ERR(part));
+		printk(KERN_ERR " %s: p%d could not be added: %pe\n",
+		       disk->disk_name, p, part);
 		return true;
 	}
 
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 146b32fa7b47..f8145499da38 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -165,39 +165,37 @@ static loff_t get_loop_size(struct loop_device *lo, struct file *file)
 	return get_size(lo->lo_offset, lo->lo_sizelimit, file);
 }
 
+/*
+ * We support direct I/O only if lo_offset is aligned with the logical I/O size
+ * of backing device, and the logical block size of loop is bigger than that of
+ * the backing device.
+ */
+static bool lo_bdev_can_use_dio(struct loop_device *lo,
+		struct block_device *backing_bdev)
+{
+	unsigned short sb_bsize = bdev_logical_block_size(backing_bdev);
+
+	if (queue_logical_block_size(lo->lo_queue) < sb_bsize)
+		return false;
+	if (lo->lo_offset & (sb_bsize - 1))
+		return false;
+	return true;
+}
+
 static void __loop_update_dio(struct loop_device *lo, bool dio)
 {
 	struct file *file = lo->lo_backing_file;
-	struct address_space *mapping = file->f_mapping;
-	struct inode *inode = mapping->host;
-	unsigned short sb_bsize = 0;
-	unsigned dio_align = 0;
+	struct inode *inode = file->f_mapping->host;
+	struct block_device *backing_bdev = NULL;
 	bool use_dio;
 
-	if (inode->i_sb->s_bdev) {
-		sb_bsize = bdev_logical_block_size(inode->i_sb->s_bdev);
-		dio_align = sb_bsize - 1;
-	}
+	if (S_ISBLK(inode->i_mode))
+		backing_bdev = I_BDEV(inode);
+	else if (inode->i_sb->s_bdev)
+		backing_bdev = inode->i_sb->s_bdev;
 
-	/*
-	 * We support direct I/O only if lo_offset is aligned with the
-	 * logical I/O size of backing device, and the logical block
-	 * size of loop is bigger than the backing device's.
-	 *
-	 * TODO: the above condition may be loosed in the future, and
-	 * direct I/O may be switched runtime at that time because most
-	 * of requests in sane applications should be PAGE_SIZE aligned
-	 */
-	if (dio) {
-		if (queue_logical_block_size(lo->lo_queue) >= sb_bsize &&
-		    !(lo->lo_offset & dio_align) &&
-		    (file->f_mode & FMODE_CAN_ODIRECT))
-			use_dio = true;
-		else
-			use_dio = false;
-	} else {
-		use_dio = false;
-	}
+	use_dio = dio && (file->f_mode & FMODE_CAN_ODIRECT) &&
+		(!backing_bdev || lo_bdev_can_use_dio(lo, backing_bdev));
 
 	if (lo->use_dio == use_dio)
 		return;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 4e72ec4e25ac..33a8f37bb6a1 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -508,7 +508,7 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send,
 		       struct iov_iter *iter, int msg_flags, int *sent)
 {
 	int result;
-	struct msghdr msg;
+	struct msghdr msg = {} ;
 	unsigned int noreclaim_flag;
 
 	if (unlikely(!sock)) {
@@ -524,10 +524,6 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send,
 	do {
 		sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
 		sock->sk->sk_use_task_frag = false;
-		msg.msg_name = NULL;
-		msg.msg_namelen = 0;
-		msg.msg_control = NULL;
-		msg.msg_controllen = 0;
 		msg.msg_flags = msg_flags | MSG_NOSIGNAL;
 
 		if (send)
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 9f7695f00c2d..36755f263e8e 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1840,7 +1840,7 @@ static void null_del_dev(struct nullb *nullb)
 
 	dev = nullb->dev;
 
-	ida_simple_remove(&nullb_indexes, nullb->index);
+	ida_free(&nullb_indexes, nullb->index);
 
 	list_del_init(&nullb->list);
 
@@ -2174,7 +2174,7 @@ static int null_add_dev(struct nullb_device *dev)
 	blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
 
 	mutex_lock(&lock);
-	rv = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
+	rv = ida_alloc(&nullb_indexes, GFP_KERNEL);
 	if (rv < 0) {
 		mutex_unlock(&lock);
 		goto out_cleanup_zone;
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index a999b698b131..12b5d53ec856 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3452,14 +3452,15 @@ static bool rbd_lock_add_request(struct rbd_img_request *img_req)
 static void rbd_lock_del_request(struct rbd_img_request *img_req)
 {
 	struct rbd_device *rbd_dev = img_req->rbd_dev;
-	bool need_wakeup;
+	bool need_wakeup = false;
 
 	lockdep_assert_held(&rbd_dev->lock_rwsem);
 	spin_lock(&rbd_dev->lock_lists_lock);
-	rbd_assert(!list_empty(&img_req->lock_item));
-	list_del_init(&img_req->lock_item);
-	need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING &&
-		       list_empty(&rbd_dev->running_list));
+	if (!list_empty(&img_req->lock_item)) {
+		list_del_init(&img_req->lock_item);
+		need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING &&
+			       list_empty(&rbd_dev->running_list));
+	}
 	spin_unlock(&rbd_dev->lock_lists_lock);
 	if (need_wakeup)
 		complete(&rbd_dev->releasing_wait);
@@ -3842,14 +3843,19 @@ static void wake_lock_waiters(struct rbd_device *rbd_dev, int result)
 		return;
 	}
 
-	list_for_each_entry(img_req, &rbd_dev->acquiring_list, lock_item) {
+	while (!list_empty(&rbd_dev->acquiring_list)) {
+		img_req = list_first_entry(&rbd_dev->acquiring_list,
+					   struct rbd_img_request, lock_item);
 		mutex_lock(&img_req->state_mutex);
 		rbd_assert(img_req->state == RBD_IMG_EXCLUSIVE_LOCK);
+		if (!result)
+			list_move_tail(&img_req->lock_item,
+				       &rbd_dev->running_list);
+		else
+			list_del_init(&img_req->lock_item);
 		rbd_img_schedule(img_req, result);
 		mutex_unlock(&img_req->state_mutex);
 	}
-
-	list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list);
 }
 
 static bool locker_equal(const struct ceph_locker *lhs,
@@ -5326,7 +5332,7 @@ static void rbd_dev_release(struct device *dev)
 
 	if (need_put) {
 		destroy_workqueue(rbd_dev->task_wq);
-		ida_simple_remove(&rbd_dev_id_ida, rbd_dev->dev_id);
+		ida_free(&rbd_dev_id_ida, rbd_dev->dev_id);
 	}
 
 	rbd_dev_free(rbd_dev);
@@ -5402,9 +5408,9 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
 		return NULL;
 
 	/* get an id and fill in device name */
-	rbd_dev->dev_id = ida_simple_get(&rbd_dev_id_ida, 0,
-					 minor_to_rbd_dev_id(1 << MINORBITS),
-					 GFP_KERNEL);
+	rbd_dev->dev_id = ida_alloc_max(&rbd_dev_id_ida,
+					minor_to_rbd_dev_id(1 << MINORBITS) - 1,
+					GFP_KERNEL);
 	if (rbd_dev->dev_id < 0)
 		goto fail_rbd_dev;
 
@@ -5425,7 +5431,7 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
 	return rbd_dev;
 
 fail_dev_id:
-	ida_simple_remove(&rbd_dev_id_ida, rbd_dev->dev_id);
+	ida_free(&rbd_dev_id_ida, rbd_dev->dev_id);
 fail_rbd_dev:
 	rbd_dev_free(rbd_dev);
 	return NULL;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 3b6b9abb8ce1..5bf98fd6a651 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -367,8 +367,6 @@ static void virtblk_done(struct virtqueue *vq)
 				blk_mq_complete_request(req);
 			req_done = true;
 		}
-		if (unlikely(virtqueue_is_broken(vq)))
-			break;
 	} while (!virtqueue_enable_cb(vq));
 
 	/* In case queue is stopped waiting for more buffers. */
diff --git a/drivers/clk/qcom/gcc-x1e80100.c b/drivers/clk/qcom/gcc-x1e80100.c
index 74db7fef237b..d7182d6e9783 100644
--- a/drivers/clk/qcom/gcc-x1e80100.c
+++ b/drivers/clk/qcom/gcc-x1e80100.c
@@ -4,8 +4,9 @@
  */
 
 #include <linux/clk-provider.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/platform_device.h>
 #include <linux/regmap.h>
 
 #include <dt-bindings/clock/qcom,x1e80100-gcc.h>
diff --git a/drivers/clocksource/timer-cadence-ttc.c b/drivers/clocksource/timer-cadence-ttc.c
index 32daaac9b132..ca7a06489c40 100644
--- a/drivers/clocksource/timer-cadence-ttc.c
+++ b/drivers/clocksource/timer-cadence-ttc.c
@@ -69,7 +69,7 @@
  * @base_addr:	Base address of timer
  * @freq:	Timer input clock frequency
  * @clk:	Associated clock source
- * @clk_rate_change_nb	Notifier block for clock rate changes
+ * @clk_rate_change_nb:	Notifier block for clock rate changes
  */
 struct ttc_timer {
 	void __iomem *base_addr;
@@ -134,7 +134,7 @@ static void ttc_set_interval(struct ttc_timer *timer,
  * @irq:	IRQ number of the Timer
  * @dev_id:	void pointer to the ttc_timer instance
  *
- * returns: Always IRQ_HANDLED - success
+ * Returns: Always IRQ_HANDLED - success
  **/
 static irqreturn_t ttc_clock_event_interrupt(int irq, void *dev_id)
 {
@@ -151,8 +151,9 @@ static irqreturn_t ttc_clock_event_interrupt(int irq, void *dev_id)
 
 /**
  * __ttc_clocksource_read - Reads the timer counter register
+ * @cs: &clocksource to read from
  *
- * returns: Current timer counter register value
+ * Returns: Current timer counter register value
  **/
 static u64 __ttc_clocksource_read(struct clocksource *cs)
 {
@@ -173,7 +174,7 @@ static u64 notrace ttc_sched_clock_read(void)
  * @cycles:	Timer interval ticks
  * @evt:	Address of clock event instance
  *
- * returns: Always 0 - success
+ * Returns: Always %0 - success
  **/
 static int ttc_set_next_event(unsigned long cycles,
 					struct clock_event_device *evt)
@@ -186,9 +187,12 @@ static int ttc_set_next_event(unsigned long cycles,
 }
 
 /**
- * ttc_set_{shutdown|oneshot|periodic} - Sets the state of timer
- *
+ * ttc_shutdown - Sets the state of timer
  * @evt:	Address of clock event instance
+ *
+ * Used for shutdown or oneshot.
+ *
+ * Returns: Always %0 - success
  **/
 static int ttc_shutdown(struct clock_event_device *evt)
 {
@@ -202,6 +206,12 @@ static int ttc_shutdown(struct clock_event_device *evt)
 	return 0;
 }
 
+/**
+ * ttc_set_periodic - Sets the state of timer
+ * @evt:	Address of clock event instance
+ *
+ * Returns: Always %0 - success
+ */
 static int ttc_set_periodic(struct clock_event_device *evt)
 {
 	struct ttc_timer_clockevent *ttce = to_ttc_timer_clkevent(evt);
diff --git a/drivers/clocksource/timer-ep93xx.c b/drivers/clocksource/timer-ep93xx.c
index bc0ca6e12334..6981ff3ac8a9 100644
--- a/drivers/clocksource/timer-ep93xx.c
+++ b/drivers/clocksource/timer-ep93xx.c
@@ -155,9 +155,8 @@ static int __init ep93xx_timer_of_init(struct device_node *np)
 	ep93xx_tcu = tcu;
 
 	irq = irq_of_parse_and_map(np, 0);
-	if (irq == 0)
-		irq = -EINVAL;
-	if (irq < 0) {
+	if (!irq) {
+		ret = -EINVAL;
 		pr_err("EP93XX Timer Can't parse IRQ %d", irq);
 		goto out_free;
 	}
diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c
index 57857c0dfba9..e66dcbd66566 100644
--- a/drivers/clocksource/timer-riscv.c
+++ b/drivers/clocksource/timer-riscv.c
@@ -61,12 +61,19 @@ static int riscv_clock_next_event(unsigned long delta,
 	return 0;
 }
 
+static int riscv_clock_shutdown(struct clock_event_device *evt)
+{
+	riscv_clock_event_stop();
+	return 0;
+}
+
 static unsigned int riscv_clock_event_irq;
 static DEFINE_PER_CPU(struct clock_event_device, riscv_clock_event) = {
 	.name			= "riscv_timer_clockevent",
 	.features		= CLOCK_EVT_FEAT_ONESHOT,
 	.rating			= 100,
 	.set_next_event		= riscv_clock_next_event,
+	.set_state_shutdown	= riscv_clock_shutdown,
 };
 
 /*
diff --git a/drivers/clocksource/timer-ti-dm.c b/drivers/clocksource/timer-ti-dm.c
index 5f60f6bd3386..56acf2617262 100644
--- a/drivers/clocksource/timer-ti-dm.c
+++ b/drivers/clocksource/timer-ti-dm.c
@@ -183,7 +183,7 @@ static inline u32 dmtimer_read(struct dmtimer *timer, u32 reg)
  * dmtimer_write - write timer registers in posted and non-posted mode
  * @timer:      timer pointer over which write operation is to perform
  * @reg:        lowest byte holds the register offset
- * @value:      data to write into the register
+ * @val:        data to write into the register
  *
  * The posted mode bit is encoded in reg. Note that in posted mode, the write
  * pending bit must be checked. Otherwise a write on a register which has a
@@ -949,7 +949,7 @@ static int omap_dm_timer_set_int_enable(struct omap_dm_timer *cookie,
 
 /**
  * omap_dm_timer_set_int_disable - disable timer interrupts
- * @timer:	pointer to timer handle
+ * @cookie:	pointer to timer cookie
  * @mask:	bit mask of interrupts to be disabled
  *
  * Disables the specified timer interrupts for a timer.
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 70ba506dabab..e928f2ca0f1e 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -378,6 +378,20 @@ config LPC18XX_DMAMUX
 	  Enable support for DMA on NXP LPC18xx/43xx platforms
 	  with PL080 and multiplexed DMA request lines.
 
+config LS2X_APB_DMA
+	tristate "Loongson LS2X APB DMA support"
+	depends on LOONGARCH || COMPILE_TEST
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support for the Loongson LS2X APB DMA controller driver. The
+	  DMA controller is having single DMA channel which can be
+	  configured for different peripherals like audio, nand, sdio
+	  etc which is in APB bus.
+
+	  This DMA controller transfers data from memory to peripheral fifo.
+	  It does not support memory to memory data transfer.
+
 config MCF_EDMA
 	tristate "Freescale eDMA engine support, ColdFire mcf5441x SoCs"
 	depends on M5441x || COMPILE_TEST
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 83553a97a010..dfd40d14e408 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_INTEL_IOATDMA) += ioat/
 obj-y += idxd/
 obj-$(CONFIG_K3_DMA) += k3dma.o
 obj-$(CONFIG_LPC18XX_DMAMUX) += lpc18xx-dmamux.o
+obj-$(CONFIG_LS2X_APB_DMA) += ls2x-apb-dma.o
 obj-$(CONFIG_MILBEAUT_HDMAC) += milbeaut-hdmac.o
 obj-$(CONFIG_MILBEAUT_XDMAC) += milbeaut-xdmac.o
 obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o
diff --git a/drivers/dma/apple-admac.c b/drivers/dma/apple-admac.c
index 5b63996640d9..9588773dd2eb 100644
--- a/drivers/dma/apple-admac.c
+++ b/drivers/dma/apple-admac.c
@@ -57,6 +57,8 @@
 
 #define REG_BUS_WIDTH(ch)	(0x8040 + (ch) * 0x200)
 
+#define BUS_WIDTH_WORD_SIZE	GENMASK(3, 0)
+#define BUS_WIDTH_FRAME_SIZE	GENMASK(7, 4)
 #define BUS_WIDTH_8BIT		0x00
 #define BUS_WIDTH_16BIT		0x01
 #define BUS_WIDTH_32BIT		0x02
@@ -740,7 +742,8 @@ static int admac_device_config(struct dma_chan *chan,
 	struct admac_data *ad = adchan->host;
 	bool is_tx = admac_chan_direction(adchan->no) == DMA_MEM_TO_DEV;
 	int wordsize = 0;
-	u32 bus_width = 0;
+	u32 bus_width = readl_relaxed(ad->base + REG_BUS_WIDTH(adchan->no)) &
+		~(BUS_WIDTH_WORD_SIZE | BUS_WIDTH_FRAME_SIZE);
 
 	switch (is_tx ? config->dst_addr_width : config->src_addr_width) {
 	case DMA_SLAVE_BUSWIDTH_1_BYTE:
diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c
index 2457a420c13d..4e339c04fc1e 100644
--- a/drivers/dma/dma-axi-dmac.c
+++ b/drivers/dma/dma-axi-dmac.c
@@ -81,9 +81,13 @@
 #define AXI_DMAC_REG_CURRENT_DEST_ADDR	0x438
 #define AXI_DMAC_REG_PARTIAL_XFER_LEN	0x44c
 #define AXI_DMAC_REG_PARTIAL_XFER_ID	0x450
+#define AXI_DMAC_REG_CURRENT_SG_ID	0x454
+#define AXI_DMAC_REG_SG_ADDRESS		0x47c
+#define AXI_DMAC_REG_SG_ADDRESS_HIGH	0x4bc
 
 #define AXI_DMAC_CTRL_ENABLE		BIT(0)
 #define AXI_DMAC_CTRL_PAUSE		BIT(1)
+#define AXI_DMAC_CTRL_ENABLE_SG		BIT(2)
 
 #define AXI_DMAC_IRQ_SOT		BIT(0)
 #define AXI_DMAC_IRQ_EOT		BIT(1)
@@ -97,20 +101,35 @@
 /* The maximum ID allocated by the hardware is 31 */
 #define AXI_DMAC_SG_UNUSED 32U
 
+/* Flags for axi_dmac_hw_desc.flags */
+#define AXI_DMAC_HW_FLAG_LAST		BIT(0)
+#define AXI_DMAC_HW_FLAG_IRQ		BIT(1)
+
+struct axi_dmac_hw_desc {
+	u32 flags;
+	u32 id;
+	u64 dest_addr;
+	u64 src_addr;
+	u64 next_sg_addr;
+	u32 y_len;
+	u32 x_len;
+	u32 src_stride;
+	u32 dst_stride;
+	u64 __pad[2];
+};
+
 struct axi_dmac_sg {
-	dma_addr_t src_addr;
-	dma_addr_t dest_addr;
-	unsigned int x_len;
-	unsigned int y_len;
-	unsigned int dest_stride;
-	unsigned int src_stride;
-	unsigned int id;
 	unsigned int partial_len;
 	bool schedule_when_free;
+
+	struct axi_dmac_hw_desc *hw;
+	dma_addr_t hw_phys;
 };
 
 struct axi_dmac_desc {
 	struct virt_dma_desc vdesc;
+	struct axi_dmac_chan *chan;
+
 	bool cyclic;
 	bool have_partial_xfer;
 
@@ -139,6 +158,7 @@ struct axi_dmac_chan {
 	bool hw_partial_xfer;
 	bool hw_cyclic;
 	bool hw_2d;
+	bool hw_sg;
 };
 
 struct axi_dmac {
@@ -213,9 +233,11 @@ static void axi_dmac_start_transfer(struct axi_dmac_chan *chan)
 	unsigned int flags = 0;
 	unsigned int val;
 
-	val = axi_dmac_read(dmac, AXI_DMAC_REG_START_TRANSFER);
-	if (val) /* Queue is full, wait for the next SOT IRQ */
-		return;
+	if (!chan->hw_sg) {
+		val = axi_dmac_read(dmac, AXI_DMAC_REG_START_TRANSFER);
+		if (val) /* Queue is full, wait for the next SOT IRQ */
+			return;
+	}
 
 	desc = chan->next_desc;
 
@@ -229,14 +251,15 @@ static void axi_dmac_start_transfer(struct axi_dmac_chan *chan)
 	sg = &desc->sg[desc->num_submitted];
 
 	/* Already queued in cyclic mode. Wait for it to finish */
-	if (sg->id != AXI_DMAC_SG_UNUSED) {
+	if (sg->hw->id != AXI_DMAC_SG_UNUSED) {
 		sg->schedule_when_free = true;
 		return;
 	}
 
-	desc->num_submitted++;
-	if (desc->num_submitted == desc->num_sgs ||
-	    desc->have_partial_xfer) {
+	if (chan->hw_sg) {
+		chan->next_desc = NULL;
+	} else if (++desc->num_submitted == desc->num_sgs ||
+		   desc->have_partial_xfer) {
 		if (desc->cyclic)
 			desc->num_submitted = 0; /* Start again */
 		else
@@ -246,32 +269,42 @@ static void axi_dmac_start_transfer(struct axi_dmac_chan *chan)
 		chan->next_desc = desc;
 	}
 
-	sg->id = axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_ID);
+	sg->hw->id = axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_ID);
 
-	if (axi_dmac_dest_is_mem(chan)) {
-		axi_dmac_write(dmac, AXI_DMAC_REG_DEST_ADDRESS, sg->dest_addr);
-		axi_dmac_write(dmac, AXI_DMAC_REG_DEST_STRIDE, sg->dest_stride);
-	}
+	if (!chan->hw_sg) {
+		if (axi_dmac_dest_is_mem(chan)) {
+			axi_dmac_write(dmac, AXI_DMAC_REG_DEST_ADDRESS, sg->hw->dest_addr);
+			axi_dmac_write(dmac, AXI_DMAC_REG_DEST_STRIDE, sg->hw->dst_stride);
+		}
 
-	if (axi_dmac_src_is_mem(chan)) {
-		axi_dmac_write(dmac, AXI_DMAC_REG_SRC_ADDRESS, sg->src_addr);
-		axi_dmac_write(dmac, AXI_DMAC_REG_SRC_STRIDE, sg->src_stride);
+		if (axi_dmac_src_is_mem(chan)) {
+			axi_dmac_write(dmac, AXI_DMAC_REG_SRC_ADDRESS, sg->hw->src_addr);
+			axi_dmac_write(dmac, AXI_DMAC_REG_SRC_STRIDE, sg->hw->src_stride);
+		}
 	}
 
 	/*
 	 * If the hardware supports cyclic transfers and there is no callback to
-	 * call and only a single segment, enable hw cyclic mode to avoid
-	 * unnecessary interrupts.
+	 * call, enable hw cyclic mode to avoid unnecessary interrupts.
 	 */
-	if (chan->hw_cyclic && desc->cyclic && !desc->vdesc.tx.callback &&
-		desc->num_sgs == 1)
-		flags |= AXI_DMAC_FLAG_CYCLIC;
+	if (chan->hw_cyclic && desc->cyclic && !desc->vdesc.tx.callback) {
+		if (chan->hw_sg)
+			desc->sg[desc->num_sgs - 1].hw->flags &= ~AXI_DMAC_HW_FLAG_IRQ;
+		else if (desc->num_sgs == 1)
+			flags |= AXI_DMAC_FLAG_CYCLIC;
+	}
 
 	if (chan->hw_partial_xfer)
 		flags |= AXI_DMAC_FLAG_PARTIAL_REPORT;
 
-	axi_dmac_write(dmac, AXI_DMAC_REG_X_LENGTH, sg->x_len - 1);
-	axi_dmac_write(dmac, AXI_DMAC_REG_Y_LENGTH, sg->y_len - 1);
+	if (chan->hw_sg) {
+		axi_dmac_write(dmac, AXI_DMAC_REG_SG_ADDRESS, (u32)sg->hw_phys);
+		axi_dmac_write(dmac, AXI_DMAC_REG_SG_ADDRESS_HIGH,
+			       (u64)sg->hw_phys >> 32);
+	} else {
+		axi_dmac_write(dmac, AXI_DMAC_REG_X_LENGTH, sg->hw->x_len);
+		axi_dmac_write(dmac, AXI_DMAC_REG_Y_LENGTH, sg->hw->y_len);
+	}
 	axi_dmac_write(dmac, AXI_DMAC_REG_FLAGS, flags);
 	axi_dmac_write(dmac, AXI_DMAC_REG_START_TRANSFER, 1);
 }
@@ -286,9 +319,9 @@ static inline unsigned int axi_dmac_total_sg_bytes(struct axi_dmac_chan *chan,
 	struct axi_dmac_sg *sg)
 {
 	if (chan->hw_2d)
-		return sg->x_len * sg->y_len;
+		return (sg->hw->x_len + 1) * (sg->hw->y_len + 1);
 	else
-		return sg->x_len;
+		return (sg->hw->x_len + 1);
 }
 
 static void axi_dmac_dequeue_partial_xfers(struct axi_dmac_chan *chan)
@@ -307,9 +340,9 @@ static void axi_dmac_dequeue_partial_xfers(struct axi_dmac_chan *chan)
 		list_for_each_entry(desc, &chan->active_descs, vdesc.node) {
 			for (i = 0; i < desc->num_sgs; i++) {
 				sg = &desc->sg[i];
-				if (sg->id == AXI_DMAC_SG_UNUSED)
+				if (sg->hw->id == AXI_DMAC_SG_UNUSED)
 					continue;
-				if (sg->id == id) {
+				if (sg->hw->id == id) {
 					desc->have_partial_xfer = true;
 					sg->partial_len = len;
 					found_sg = true;
@@ -348,6 +381,9 @@ static void axi_dmac_compute_residue(struct axi_dmac_chan *chan,
 	rslt->result = DMA_TRANS_NOERROR;
 	rslt->residue = 0;
 
+	if (chan->hw_sg)
+		return;
+
 	/*
 	 * We get here if the last completed segment is partial, which
 	 * means we can compute the residue from that segment onwards
@@ -374,36 +410,47 @@ static bool axi_dmac_transfer_done(struct axi_dmac_chan *chan,
 	    (completed_transfers & AXI_DMAC_FLAG_PARTIAL_XFER_DONE))
 		axi_dmac_dequeue_partial_xfers(chan);
 
-	do {
-		sg = &active->sg[active->num_completed];
-		if (sg->id == AXI_DMAC_SG_UNUSED) /* Not yet submitted */
-			break;
-		if (!(BIT(sg->id) & completed_transfers))
-			break;
-		active->num_completed++;
-		sg->id = AXI_DMAC_SG_UNUSED;
-		if (sg->schedule_when_free) {
-			sg->schedule_when_free = false;
-			start_next = true;
+	if (chan->hw_sg) {
+		if (active->cyclic) {
+			vchan_cyclic_callback(&active->vdesc);
+		} else {
+			list_del(&active->vdesc.node);
+			vchan_cookie_complete(&active->vdesc);
+			active = axi_dmac_active_desc(chan);
+			start_next = !!active;
 		}
+	} else {
+		do {
+			sg = &active->sg[active->num_completed];
+			if (sg->hw->id == AXI_DMAC_SG_UNUSED) /* Not yet submitted */
+				break;
+			if (!(BIT(sg->hw->id) & completed_transfers))
+				break;
+			active->num_completed++;
+			sg->hw->id = AXI_DMAC_SG_UNUSED;
+			if (sg->schedule_when_free) {
+				sg->schedule_when_free = false;
+				start_next = true;
+			}
 
-		if (sg->partial_len)
-			axi_dmac_compute_residue(chan, active);
+			if (sg->partial_len)
+				axi_dmac_compute_residue(chan, active);
 
-		if (active->cyclic)
-			vchan_cyclic_callback(&active->vdesc);
+			if (active->cyclic)
+				vchan_cyclic_callback(&active->vdesc);
 
-		if (active->num_completed == active->num_sgs ||
-		    sg->partial_len) {
-			if (active->cyclic) {
-				active->num_completed = 0; /* wrap around */
-			} else {
-				list_del(&active->vdesc.node);
-				vchan_cookie_complete(&active->vdesc);
-				active = axi_dmac_active_desc(chan);
+			if (active->num_completed == active->num_sgs ||
+			    sg->partial_len) {
+				if (active->cyclic) {
+					active->num_completed = 0; /* wrap around */
+				} else {
+					list_del(&active->vdesc.node);
+					vchan_cookie_complete(&active->vdesc);
+					active = axi_dmac_active_desc(chan);
+				}
 			}
-		}
-	} while (active);
+		} while (active);
+	}
 
 	return start_next;
 }
@@ -467,8 +514,12 @@ static void axi_dmac_issue_pending(struct dma_chan *c)
 	struct axi_dmac_chan *chan = to_axi_dmac_chan(c);
 	struct axi_dmac *dmac = chan_to_axi_dmac(chan);
 	unsigned long flags;
+	u32 ctrl = AXI_DMAC_CTRL_ENABLE;
 
-	axi_dmac_write(dmac, AXI_DMAC_REG_CTRL, AXI_DMAC_CTRL_ENABLE);
+	if (chan->hw_sg)
+		ctrl |= AXI_DMAC_CTRL_ENABLE_SG;
+
+	axi_dmac_write(dmac, AXI_DMAC_REG_CTRL, ctrl);
 
 	spin_lock_irqsave(&chan->vchan.lock, flags);
 	if (vchan_issue_pending(&chan->vchan))
@@ -476,22 +527,58 @@ static void axi_dmac_issue_pending(struct dma_chan *c)
 	spin_unlock_irqrestore(&chan->vchan.lock, flags);
 }
 
-static struct axi_dmac_desc *axi_dmac_alloc_desc(unsigned int num_sgs)
+static struct axi_dmac_desc *
+axi_dmac_alloc_desc(struct axi_dmac_chan *chan, unsigned int num_sgs)
 {
+	struct axi_dmac *dmac = chan_to_axi_dmac(chan);
+	struct device *dev = dmac->dma_dev.dev;
+	struct axi_dmac_hw_desc *hws;
 	struct axi_dmac_desc *desc;
+	dma_addr_t hw_phys;
 	unsigned int i;
 
 	desc = kzalloc(struct_size(desc, sg, num_sgs), GFP_NOWAIT);
 	if (!desc)
 		return NULL;
 	desc->num_sgs = num_sgs;
+	desc->chan = chan;
+
+	hws = dma_alloc_coherent(dev, PAGE_ALIGN(num_sgs * sizeof(*hws)),
+				&hw_phys, GFP_ATOMIC);
+	if (!hws) {
+		kfree(desc);
+		return NULL;
+	}
 
-	for (i = 0; i < num_sgs; i++)
-		desc->sg[i].id = AXI_DMAC_SG_UNUSED;
+	for (i = 0; i < num_sgs; i++) {
+		desc->sg[i].hw = &hws[i];
+		desc->sg[i].hw_phys = hw_phys + i * sizeof(*hws);
+
+		hws[i].id = AXI_DMAC_SG_UNUSED;
+		hws[i].flags = 0;
+
+		/* Link hardware descriptors */
+		hws[i].next_sg_addr = hw_phys + (i + 1) * sizeof(*hws);
+	}
+
+	/* The last hardware descriptor will trigger an interrupt */
+	desc->sg[num_sgs - 1].hw->flags = AXI_DMAC_HW_FLAG_LAST | AXI_DMAC_HW_FLAG_IRQ;
 
 	return desc;
 }
 
+static void axi_dmac_free_desc(struct axi_dmac_desc *desc)
+{
+	struct axi_dmac *dmac = chan_to_axi_dmac(desc->chan);
+	struct device *dev = dmac->dma_dev.dev;
+	struct axi_dmac_hw_desc *hw = desc->sg[0].hw;
+	dma_addr_t hw_phys = desc->sg[0].hw_phys;
+
+	dma_free_coherent(dev, PAGE_ALIGN(desc->num_sgs * sizeof(*hw)),
+			  hw, hw_phys);
+	kfree(desc);
+}
+
 static struct axi_dmac_sg *axi_dmac_fill_linear_sg(struct axi_dmac_chan *chan,
 	enum dma_transfer_direction direction, dma_addr_t addr,
 	unsigned int num_periods, unsigned int period_len,
@@ -508,26 +595,24 @@ static struct axi_dmac_sg *axi_dmac_fill_linear_sg(struct axi_dmac_chan *chan,
 	segment_size = ((segment_size - 1) | chan->length_align_mask) + 1;
 
 	for (i = 0; i < num_periods; i++) {
-		len = period_len;
-
-		while (len > segment_size) {
+		for (len = period_len; len > segment_size; sg++) {
 			if (direction == DMA_DEV_TO_MEM)
-				sg->dest_addr = addr;
+				sg->hw->dest_addr = addr;
 			else
-				sg->src_addr = addr;
-			sg->x_len = segment_size;
-			sg->y_len = 1;
-			sg++;
+				sg->hw->src_addr = addr;
+			sg->hw->x_len = segment_size - 1;
+			sg->hw->y_len = 0;
+			sg->hw->flags = 0;
 			addr += segment_size;
 			len -= segment_size;
 		}
 
 		if (direction == DMA_DEV_TO_MEM)
-			sg->dest_addr = addr;
+			sg->hw->dest_addr = addr;
 		else
-			sg->src_addr = addr;
-		sg->x_len = len;
-		sg->y_len = 1;
+			sg->hw->src_addr = addr;
+		sg->hw->x_len = len - 1;
+		sg->hw->y_len = 0;
 		sg++;
 		addr += len;
 	}
@@ -554,7 +639,7 @@ static struct dma_async_tx_descriptor *axi_dmac_prep_slave_sg(
 	for_each_sg(sgl, sg, sg_len, i)
 		num_sgs += DIV_ROUND_UP(sg_dma_len(sg), chan->max_length);
 
-	desc = axi_dmac_alloc_desc(num_sgs);
+	desc = axi_dmac_alloc_desc(chan, num_sgs);
 	if (!desc)
 		return NULL;
 
@@ -563,7 +648,7 @@ static struct dma_async_tx_descriptor *axi_dmac_prep_slave_sg(
 	for_each_sg(sgl, sg, sg_len, i) {
 		if (!axi_dmac_check_addr(chan, sg_dma_address(sg)) ||
 		    !axi_dmac_check_len(chan, sg_dma_len(sg))) {
-			kfree(desc);
+			axi_dmac_free_desc(desc);
 			return NULL;
 		}
 
@@ -583,7 +668,7 @@ static struct dma_async_tx_descriptor *axi_dmac_prep_dma_cyclic(
 {
 	struct axi_dmac_chan *chan = to_axi_dmac_chan(c);
 	struct axi_dmac_desc *desc;
-	unsigned int num_periods, num_segments;
+	unsigned int num_periods, num_segments, num_sgs;
 
 	if (direction != chan->direction)
 		return NULL;
@@ -597,11 +682,16 @@ static struct dma_async_tx_descriptor *axi_dmac_prep_dma_cyclic(
 
 	num_periods = buf_len / period_len;
 	num_segments = DIV_ROUND_UP(period_len, chan->max_length);
+	num_sgs = num_periods * num_segments;
 
-	desc = axi_dmac_alloc_desc(num_periods * num_segments);
+	desc = axi_dmac_alloc_desc(chan, num_sgs);
 	if (!desc)
 		return NULL;
 
+	/* Chain the last descriptor to the first, and remove its "last" flag */
+	desc->sg[num_sgs - 1].hw->next_sg_addr = desc->sg[0].hw_phys;
+	desc->sg[num_sgs - 1].hw->flags &= ~AXI_DMAC_HW_FLAG_LAST;
+
 	axi_dmac_fill_linear_sg(chan, direction, buf_addr, num_periods,
 		period_len, desc->sg);
 
@@ -653,26 +743,26 @@ static struct dma_async_tx_descriptor *axi_dmac_prep_interleaved(
 			return NULL;
 	}
 
-	desc = axi_dmac_alloc_desc(1);
+	desc = axi_dmac_alloc_desc(chan, 1);
 	if (!desc)
 		return NULL;
 
 	if (axi_dmac_src_is_mem(chan)) {
-		desc->sg[0].src_addr = xt->src_start;
-		desc->sg[0].src_stride = xt->sgl[0].size + src_icg;
+		desc->sg[0].hw->src_addr = xt->src_start;
+		desc->sg[0].hw->src_stride = xt->sgl[0].size + src_icg;
 	}
 
 	if (axi_dmac_dest_is_mem(chan)) {
-		desc->sg[0].dest_addr = xt->dst_start;
-		desc->sg[0].dest_stride = xt->sgl[0].size + dst_icg;
+		desc->sg[0].hw->dest_addr = xt->dst_start;
+		desc->sg[0].hw->dst_stride = xt->sgl[0].size + dst_icg;
 	}
 
 	if (chan->hw_2d) {
-		desc->sg[0].x_len = xt->sgl[0].size;
-		desc->sg[0].y_len = xt->numf;
+		desc->sg[0].hw->x_len = xt->sgl[0].size - 1;
+		desc->sg[0].hw->y_len = xt->numf - 1;
 	} else {
-		desc->sg[0].x_len = xt->sgl[0].size * xt->numf;
-		desc->sg[0].y_len = 1;
+		desc->sg[0].hw->x_len = xt->sgl[0].size * xt->numf - 1;
+		desc->sg[0].hw->y_len = 0;
 	}
 
 	if (flags & DMA_CYCLIC)
@@ -688,7 +778,7 @@ static void axi_dmac_free_chan_resources(struct dma_chan *c)
 
 static void axi_dmac_desc_free(struct virt_dma_desc *vdesc)
 {
-	kfree(container_of(vdesc, struct axi_dmac_desc, vdesc));
+	axi_dmac_free_desc(to_axi_dmac_desc(vdesc));
 }
 
 static bool axi_dmac_regmap_rdwr(struct device *dev, unsigned int reg)
@@ -714,6 +804,9 @@ static bool axi_dmac_regmap_rdwr(struct device *dev, unsigned int reg)
 	case AXI_DMAC_REG_CURRENT_DEST_ADDR:
 	case AXI_DMAC_REG_PARTIAL_XFER_LEN:
 	case AXI_DMAC_REG_PARTIAL_XFER_ID:
+	case AXI_DMAC_REG_CURRENT_SG_ID:
+	case AXI_DMAC_REG_SG_ADDRESS:
+	case AXI_DMAC_REG_SG_ADDRESS_HIGH:
 		return true;
 	default:
 		return false;
@@ -866,6 +959,10 @@ static int axi_dmac_detect_caps(struct axi_dmac *dmac, unsigned int version)
 	if (axi_dmac_read(dmac, AXI_DMAC_REG_FLAGS) == AXI_DMAC_FLAG_CYCLIC)
 		chan->hw_cyclic = true;
 
+	axi_dmac_write(dmac, AXI_DMAC_REG_SG_ADDRESS, 0xffffffff);
+	if (axi_dmac_read(dmac, AXI_DMAC_REG_SG_ADDRESS))
+		chan->hw_sg = true;
+
 	axi_dmac_write(dmac, AXI_DMAC_REG_Y_LENGTH, 1);
 	if (axi_dmac_read(dmac, AXI_DMAC_REG_Y_LENGTH) == 1)
 		chan->hw_2d = true;
@@ -911,6 +1008,7 @@ static int axi_dmac_probe(struct platform_device *pdev)
 	struct axi_dmac *dmac;
 	struct regmap *regmap;
 	unsigned int version;
+	u32 irq_mask = 0;
 	int ret;
 
 	dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
@@ -966,6 +1064,7 @@ static int axi_dmac_probe(struct platform_device *pdev)
 	dma_dev->dst_addr_widths = BIT(dmac->chan.dest_width);
 	dma_dev->directions = BIT(dmac->chan.direction);
 	dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+	dma_dev->max_sg_burst = 31; /* 31 SGs maximum in one burst */
 	INIT_LIST_HEAD(&dma_dev->channels);
 
 	dmac->chan.vchan.desc_free = axi_dmac_desc_free;
@@ -977,7 +1076,10 @@ static int axi_dmac_probe(struct platform_device *pdev)
 
 	dma_dev->copy_align = (dmac->chan.address_align_mask + 1);
 
-	axi_dmac_write(dmac, AXI_DMAC_REG_IRQ_MASK, 0x00);
+	if (dmac->chan.hw_sg)
+		irq_mask |= AXI_DMAC_IRQ_SOT;
+
+	axi_dmac_write(dmac, AXI_DMAC_REG_IRQ_MASK, irq_mask);
 
 	if (of_dma_is_coherent(pdev->dev.of_node)) {
 		ret = axi_dmac_read(dmac, AXI_DMAC_REG_COHERENCY_DESC);
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index b7388ae62d7f..491b22240221 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -1103,6 +1103,9 @@ EXPORT_SYMBOL_GPL(dma_async_device_channel_register);
 static void __dma_async_device_channel_unregister(struct dma_device *device,
 						  struct dma_chan *chan)
 {
+	if (chan->local == NULL)
+		return;
+
 	WARN_ONCE(!device->device_release && chan->client_count,
 		  "%s called while %d clients hold a reference\n",
 		  __func__, chan->client_count);
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index ffe621695e47..a4f608837849 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -21,6 +21,10 @@
 #include <linux/slab.h>
 #include <linux/wait.h>
 
+static bool nobounce;
+module_param(nobounce, bool, 0644);
+MODULE_PARM_DESC(nobounce, "Prevent using swiotlb buffer (default: use swiotlb buffer)");
+
 static unsigned int test_buf_size = 16384;
 module_param(test_buf_size, uint, 0644);
 MODULE_PARM_DESC(test_buf_size, "Size of the memcpy test buffer");
@@ -90,6 +94,7 @@ MODULE_PARM_DESC(polled, "Use polling for completion instead of interrupts");
 
 /**
  * struct dmatest_params - test parameters.
+ * @nobounce:		prevent using swiotlb buffer
  * @buf_size:		size of the memcpy test buffer
  * @channel:		bus ID of the channel to test
  * @device:		bus ID of the DMA Engine to test
@@ -106,6 +111,7 @@ MODULE_PARM_DESC(polled, "Use polling for completion instead of interrupts");
  * @polled:		use polling for completion instead of interrupts
  */
 struct dmatest_params {
+	bool		nobounce;
 	unsigned int	buf_size;
 	char		channel[20];
 	char		device[32];
@@ -215,6 +221,7 @@ struct dmatest_done {
 struct dmatest_data {
 	u8		**raw;
 	u8		**aligned;
+	gfp_t		gfp_flags;
 	unsigned int	cnt;
 	unsigned int	off;
 };
@@ -533,7 +540,7 @@ static int dmatest_alloc_test_data(struct dmatest_data *d,
 		goto err;
 
 	for (i = 0; i < d->cnt; i++) {
-		d->raw[i] = kmalloc(buf_size + align, GFP_KERNEL);
+		d->raw[i] = kmalloc(buf_size + align, d->gfp_flags);
 		if (!d->raw[i])
 			goto err;
 
@@ -655,6 +662,13 @@ static int dmatest_func(void *data)
 		goto err_free_coefs;
 	}
 
+	src->gfp_flags = GFP_KERNEL;
+	dst->gfp_flags = GFP_KERNEL;
+	if (params->nobounce) {
+		src->gfp_flags = GFP_DMA;
+		dst->gfp_flags = GFP_DMA;
+	}
+
 	if (dmatest_alloc_test_data(src, buf_size, align) < 0)
 		goto err_free_coefs;
 
@@ -1093,6 +1107,7 @@ static void add_threaded_test(struct dmatest_info *info)
 	struct dmatest_params *params = &info->params;
 
 	/* Copy test parameters */
+	params->nobounce = nobounce;
 	params->buf_size = test_buf_size;
 	strscpy(params->channel, strim(test_channel), sizeof(params->channel));
 	strscpy(params->device, strim(test_device), sizeof(params->device));
diff --git a/drivers/dma/dw-edma/dw-edma-v0-debugfs.c b/drivers/dma/dw-edma/dw-edma-v0-debugfs.c
index 0745d9e7d259..406f169b09a7 100644
--- a/drivers/dma/dw-edma/dw-edma-v0-debugfs.c
+++ b/drivers/dma/dw-edma/dw-edma-v0-debugfs.c
@@ -176,7 +176,7 @@ dw_edma_debugfs_regs_wr(struct dw_edma *dw, struct dentry *dent)
 	};
 	struct dentry *regs_dent, *ch_dent;
 	int nr_entries, i;
-	char name[16];
+	char name[32];
 
 	regs_dent = debugfs_create_dir(WRITE_STR, dent);
 
@@ -239,7 +239,7 @@ static noinline_for_stack void dw_edma_debugfs_regs_rd(struct dw_edma *dw,
 	};
 	struct dentry *regs_dent, *ch_dent;
 	int nr_entries, i;
-	char name[16];
+	char name[32];
 
 	regs_dent = debugfs_create_dir(READ_STR, dent);
 
diff --git a/drivers/dma/dw-edma/dw-hdma-v0-debugfs.c b/drivers/dma/dw-edma/dw-hdma-v0-debugfs.c
index 520c81978b08..dcdc57fe976c 100644
--- a/drivers/dma/dw-edma/dw-hdma-v0-debugfs.c
+++ b/drivers/dma/dw-edma/dw-hdma-v0-debugfs.c
@@ -116,7 +116,7 @@ static void dw_hdma_debugfs_regs_ch(struct dw_edma *dw, enum dw_edma_dir dir,
 static void dw_hdma_debugfs_regs_wr(struct dw_edma *dw, struct dentry *dent)
 {
 	struct dentry *regs_dent, *ch_dent;
-	char name[16];
+	char name[32];
 	int i;
 
 	regs_dent = debugfs_create_dir(WRITE_STR, dent);
@@ -133,7 +133,7 @@ static void dw_hdma_debugfs_regs_wr(struct dw_edma *dw, struct dentry *dent)
 static void dw_hdma_debugfs_regs_rd(struct dw_edma *dw, struct dentry *dent)
 {
 	struct dentry *regs_dent, *ch_dent;
-	char name[16];
+	char name[32];
 	int i;
 
 	regs_dent = debugfs_create_dir(READ_STR, dent);
diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c
index 238a69bd0d6f..45cc419b1b4a 100644
--- a/drivers/dma/fsl-edma-main.c
+++ b/drivers/dma/fsl-edma-main.c
@@ -9,6 +9,7 @@
  * Vybrid and Layerscape SoCs.
  */
 
+#include <dt-bindings/dma/fsl-edma.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/clk.h>
@@ -21,10 +22,6 @@
 
 #include "fsl-edma-common.h"
 
-#define ARGS_RX                         BIT(0)
-#define ARGS_REMOTE                     BIT(1)
-#define ARGS_MULTI_FIFO                 BIT(2)
-
 static void fsl_edma_synchronize(struct dma_chan *chan)
 {
 	struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
@@ -153,9 +150,15 @@ static struct dma_chan *fsl_edma3_xlate(struct of_phandle_args *dma_spec,
 		i = fsl_chan - fsl_edma->chans;
 
 		fsl_chan->priority = dma_spec->args[1];
-		fsl_chan->is_rxchan = dma_spec->args[2] & ARGS_RX;
-		fsl_chan->is_remote = dma_spec->args[2] & ARGS_REMOTE;
-		fsl_chan->is_multi_fifo = dma_spec->args[2] & ARGS_MULTI_FIFO;
+		fsl_chan->is_rxchan = dma_spec->args[2] & FSL_EDMA_RX;
+		fsl_chan->is_remote = dma_spec->args[2] & FSL_EDMA_REMOTE;
+		fsl_chan->is_multi_fifo = dma_spec->args[2] & FSL_EDMA_MULTI_FIFO;
+
+		if ((dma_spec->args[2] & FSL_EDMA_EVEN_CH) && (i & 0x1))
+			continue;
+
+		if ((dma_spec->args[2] & FSL_EDMA_ODD_CH) && !(i & 0x1))
+			continue;
 
 		if (!b_chmux && i == dma_spec->args[0]) {
 			chan = dma_get_slave_channel(chan);
diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c
index 47cb28468049..a1d0aa63142a 100644
--- a/drivers/dma/fsl-qdma.c
+++ b/drivers/dma/fsl-qdma.c
@@ -805,7 +805,7 @@ fsl_qdma_irq_init(struct platform_device *pdev,
 	int i;
 	int cpu;
 	int ret;
-	char irq_name[20];
+	char irq_name[32];
 
 	fsl_qdma->error_irq =
 		platform_get_irq_byname(pdev, "qdma-error");
diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index 1d918d45d9f6..77f8885cf407 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -165,7 +165,7 @@ static void idxd_cdev_dev_release(struct device *dev)
 	struct idxd_wq *wq = idxd_cdev->wq;
 
 	cdev_ctx = &ictx[wq->idxd->data->type];
-	ida_simple_remove(&cdev_ctx->minor_ida, idxd_cdev->minor);
+	ida_free(&cdev_ctx->minor_ida, idxd_cdev->minor);
 	kfree(idxd_cdev);
 }
 
@@ -463,7 +463,7 @@ int idxd_wq_add_cdev(struct idxd_wq *wq)
 	cdev = &idxd_cdev->cdev;
 	dev = cdev_dev(idxd_cdev);
 	cdev_ctx = &ictx[wq->idxd->data->type];
-	minor = ida_simple_get(&cdev_ctx->minor_ida, 0, MINORMASK, GFP_KERNEL);
+	minor = ida_alloc_max(&cdev_ctx->minor_ida, MINORMASK, GFP_KERNEL);
 	if (minor < 0) {
 		kfree(idxd_cdev);
 		return minor;
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index f43d81128b96..ecfdf4a8f1f8 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -807,6 +807,9 @@ err_bmap:
 
 static void idxd_device_evl_free(struct idxd_device *idxd)
 {
+	void *evl_log;
+	unsigned int evl_log_size;
+	dma_addr_t evl_dma;
 	union gencfg_reg gencfg;
 	union genctrl_reg genctrl;
 	struct device *dev = &idxd->pdev->dev;
@@ -827,11 +830,15 @@ static void idxd_device_evl_free(struct idxd_device *idxd)
 	iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET);
 	iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET + 8);
 
-	dma_free_coherent(dev, evl->log_size, evl->log, evl->dma);
 	bitmap_free(evl->bmap);
+	evl_log = evl->log;
+	evl_log_size = evl->log_size;
+	evl_dma = evl->dma;
 	evl->log = NULL;
 	evl->size = IDXD_EVL_SIZE_MIN;
 	spin_unlock(&evl->lock);
+
+	dma_free_coherent(dev, evl_log_size, evl_log, evl_dma);
 }
 
 static void idxd_group_config_write(struct idxd_group *group)
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index f81ecf5863e8..9b42f5e96b1e 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -421,9 +421,7 @@ struct sdma_desc {
  * @shp_addr:		value for gReg[6]
  * @per_addr:		value for gReg[2]
  * @status:		status of dma channel
- * @context_loaded:	ensure context is only loaded once
  * @data:		specific sdma interface structure
- * @bd_pool:		dma_pool for bd
  * @terminate_worker:	used to call back into terminate work function
  * @terminated:		terminated list
  * @is_ram_script:	flag for script in ram
@@ -486,8 +484,6 @@ struct sdma_channel {
  * @num_script_addrs:	Number of script addresses in this image
  * @ram_code_start:	offset of SDMA ram image in this firmware image
  * @ram_code_size:	size of SDMA ram image
- * @script_addrs:	Stores the start address of the SDMA scripts
- *			(in SDMA memory space)
  */
 struct sdma_firmware_header {
 	u32	magic;
diff --git a/drivers/dma/ls2x-apb-dma.c b/drivers/dma/ls2x-apb-dma.c
new file mode 100644
index 000000000000..a49913f3ed3f
--- /dev/null
+++ b/drivers/dma/ls2x-apb-dma.c
@@ -0,0 +1,705 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Driver for the Loongson LS2X APB DMA Controller
+ *
+ * Copyright (C) 2017-2023 Loongson Corporation
+ */
+
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+/* Global Configuration Register */
+#define LDMA_ORDER_ERG		0x0
+
+/* Bitfield definitions */
+
+/* Bitfields in Global Configuration Register */
+#define LDMA_64BIT_EN		BIT(0) /* 1: 64 bit support */
+#define LDMA_UNCOHERENT_EN	BIT(1) /* 0: cache, 1: uncache */
+#define LDMA_ASK_VALID		BIT(2)
+#define LDMA_START		BIT(3) /* DMA start operation */
+#define LDMA_STOP		BIT(4) /* DMA stop operation */
+#define LDMA_CONFIG_MASK	GENMASK(4, 0) /* DMA controller config bits mask */
+
+/* Bitfields in ndesc_addr field of HW decriptor */
+#define LDMA_DESC_EN		BIT(0) /*1: The next descriptor is valid */
+#define LDMA_DESC_ADDR_LOW	GENMASK(31, 1)
+
+/* Bitfields in cmd field of HW decriptor */
+#define LDMA_INT		BIT(1) /* Enable DMA interrupts */
+#define LDMA_DATA_DIRECTION	BIT(12) /* 1: write to device, 0: read from device */
+
+#define LDMA_SLAVE_BUSWIDTHS	(BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))
+
+#define LDMA_MAX_TRANS_LEN	U32_MAX
+
+/*--  descriptors  -----------------------------------------------------*/
+
+/*
+ * struct ls2x_dma_hw_desc - DMA HW descriptor
+ * @ndesc_addr: the next descriptor low address.
+ * @mem_addr: memory low address.
+ * @apb_addr: device buffer address.
+ * @len: length of a piece of carried content, in words.
+ * @step_len: length between two moved memory data blocks.
+ * @step_times: number of blocks to be carried in a single DMA operation.
+ * @cmd: descriptor command or state.
+ * @stats: DMA status.
+ * @high_ndesc_addr: the next descriptor high address.
+ * @high_mem_addr: memory high address.
+ * @reserved: reserved
+ */
+struct ls2x_dma_hw_desc {
+	u32 ndesc_addr;
+	u32 mem_addr;
+	u32 apb_addr;
+	u32 len;
+	u32 step_len;
+	u32 step_times;
+	u32 cmd;
+	u32 stats;
+	u32 high_ndesc_addr;
+	u32 high_mem_addr;
+	u32 reserved[2];
+} __packed;
+
+/*
+ * struct ls2x_dma_sg - ls2x dma scatter gather entry
+ * @hw: the pointer to DMA HW descriptor.
+ * @llp: physical address of the DMA HW descriptor.
+ * @phys: destination or source address(mem).
+ * @len: number of Bytes to read.
+ */
+struct ls2x_dma_sg {
+	struct ls2x_dma_hw_desc	*hw;
+	dma_addr_t		llp;
+	dma_addr_t		phys;
+	u32			len;
+};
+
+/*
+ * struct ls2x_dma_desc - software descriptor
+ * @vdesc: pointer to the virtual dma descriptor.
+ * @cyclic: flag to dma cyclic
+ * @burst_size: burst size of transaction, in words.
+ * @desc_num: number of sg entries.
+ * @direction: transfer direction, to or from device.
+ * @status: dma controller status.
+ * @sg: array of sgs.
+ */
+struct ls2x_dma_desc {
+	struct virt_dma_desc		vdesc;
+	bool				cyclic;
+	size_t				burst_size;
+	u32				desc_num;
+	enum dma_transfer_direction	direction;
+	enum dma_status			status;
+	struct ls2x_dma_sg		sg[] __counted_by(desc_num);
+};
+
+/*--  Channels  --------------------------------------------------------*/
+
+/*
+ * struct ls2x_dma_chan - internal representation of an LS2X APB DMA channel
+ * @vchan: virtual dma channel entry.
+ * @desc: pointer to the ls2x sw dma descriptor.
+ * @pool: hw desc table
+ * @irq: irq line
+ * @sconfig: configuration for slave transfers, passed via .device_config
+ */
+struct ls2x_dma_chan {
+	struct virt_dma_chan	vchan;
+	struct ls2x_dma_desc	*desc;
+	void			*pool;
+	int			irq;
+	struct dma_slave_config	sconfig;
+};
+
+/*--  Controller  ------------------------------------------------------*/
+
+/*
+ * struct ls2x_dma_priv - LS2X APB DMAC specific information
+ * @ddev: dmaengine dma_device object members
+ * @dma_clk: DMAC clock source
+ * @regs: memory mapped register base
+ * @lchan: channel to store ls2x_dma_chan structures
+ */
+struct ls2x_dma_priv {
+	struct dma_device	ddev;
+	struct clk		*dma_clk;
+	void __iomem		*regs;
+	struct ls2x_dma_chan	lchan;
+};
+
+/*--  Helper functions  ------------------------------------------------*/
+
+static inline struct ls2x_dma_desc *to_ldma_desc(struct virt_dma_desc *vdesc)
+{
+	return container_of(vdesc, struct ls2x_dma_desc, vdesc);
+}
+
+static inline struct ls2x_dma_chan *to_ldma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct ls2x_dma_chan, vchan.chan);
+}
+
+static inline struct ls2x_dma_priv *to_ldma_priv(struct dma_device *ddev)
+{
+	return container_of(ddev, struct ls2x_dma_priv, ddev);
+}
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+
+static void ls2x_dma_desc_free(struct virt_dma_desc *vdesc)
+{
+	struct ls2x_dma_chan *lchan = to_ldma_chan(vdesc->tx.chan);
+	struct ls2x_dma_desc *desc = to_ldma_desc(vdesc);
+	int i;
+
+	for (i = 0; i < desc->desc_num; i++) {
+		if (desc->sg[i].hw)
+			dma_pool_free(lchan->pool, desc->sg[i].hw,
+				      desc->sg[i].llp);
+	}
+
+	kfree(desc);
+}
+
+static void ls2x_dma_write_cmd(struct ls2x_dma_chan *lchan, bool cmd)
+{
+	struct ls2x_dma_priv *priv = to_ldma_priv(lchan->vchan.chan.device);
+	u64 val;
+
+	val = lo_hi_readq(priv->regs + LDMA_ORDER_ERG) & ~LDMA_CONFIG_MASK;
+	val |= LDMA_64BIT_EN | cmd;
+	lo_hi_writeq(val, priv->regs + LDMA_ORDER_ERG);
+}
+
+static void ls2x_dma_start_transfer(struct ls2x_dma_chan *lchan)
+{
+	struct ls2x_dma_priv *priv = to_ldma_priv(lchan->vchan.chan.device);
+	struct ls2x_dma_sg *ldma_sg;
+	struct virt_dma_desc *vdesc;
+	u64 val;
+
+	/* Get the next descriptor */
+	vdesc = vchan_next_desc(&lchan->vchan);
+	if (!vdesc) {
+		lchan->desc = NULL;
+		return;
+	}
+
+	list_del(&vdesc->node);
+	lchan->desc = to_ldma_desc(vdesc);
+	ldma_sg = &lchan->desc->sg[0];
+
+	/* Start DMA */
+	lo_hi_writeq(0, priv->regs + LDMA_ORDER_ERG);
+	val = (ldma_sg->llp & ~LDMA_CONFIG_MASK) | LDMA_64BIT_EN | LDMA_START;
+	lo_hi_writeq(val, priv->regs + LDMA_ORDER_ERG);
+}
+
+static size_t ls2x_dmac_detect_burst(struct ls2x_dma_chan *lchan)
+{
+	u32 maxburst, buswidth;
+
+	/* Reject definitely invalid configurations */
+	if ((lchan->sconfig.src_addr_width & LDMA_SLAVE_BUSWIDTHS) &&
+	    (lchan->sconfig.dst_addr_width & LDMA_SLAVE_BUSWIDTHS))
+		return 0;
+
+	if (lchan->sconfig.direction == DMA_MEM_TO_DEV) {
+		maxburst = lchan->sconfig.dst_maxburst;
+		buswidth = lchan->sconfig.dst_addr_width;
+	} else {
+		maxburst = lchan->sconfig.src_maxburst;
+		buswidth = lchan->sconfig.src_addr_width;
+	}
+
+	/* If maxburst is zero, fallback to LDMA_MAX_TRANS_LEN */
+	return maxburst ? (maxburst * buswidth) >> 2 : LDMA_MAX_TRANS_LEN;
+}
+
+static void ls2x_dma_fill_desc(struct ls2x_dma_chan *lchan, u32 sg_index,
+			       struct ls2x_dma_desc *desc)
+{
+	struct ls2x_dma_sg *ldma_sg = &desc->sg[sg_index];
+	u32 num_segments, segment_size;
+
+	if (desc->direction == DMA_MEM_TO_DEV) {
+		ldma_sg->hw->cmd = LDMA_INT | LDMA_DATA_DIRECTION;
+		ldma_sg->hw->apb_addr = lchan->sconfig.dst_addr;
+	} else {
+		ldma_sg->hw->cmd = LDMA_INT;
+		ldma_sg->hw->apb_addr = lchan->sconfig.src_addr;
+	}
+
+	ldma_sg->hw->mem_addr = lower_32_bits(ldma_sg->phys);
+	ldma_sg->hw->high_mem_addr = upper_32_bits(ldma_sg->phys);
+
+	/* Split into multiple equally sized segments if necessary */
+	num_segments = DIV_ROUND_UP((ldma_sg->len + 3) >> 2, desc->burst_size);
+	segment_size = DIV_ROUND_UP((ldma_sg->len + 3) >> 2, num_segments);
+
+	/* Word count register takes input in words */
+	ldma_sg->hw->len = segment_size;
+	ldma_sg->hw->step_times = num_segments;
+	ldma_sg->hw->step_len = 0;
+
+	/* lets make a link list */
+	if (sg_index) {
+		desc->sg[sg_index - 1].hw->ndesc_addr = ldma_sg->llp | LDMA_DESC_EN;
+		desc->sg[sg_index - 1].hw->high_ndesc_addr = upper_32_bits(ldma_sg->llp);
+	}
+}
+
+/*--  DMA Engine API  --------------------------------------------------*/
+
+/*
+ * ls2x_dma_alloc_chan_resources - allocate resources for DMA channel
+ * @chan: allocate descriptor resources for this channel
+ *
+ * return - the number of allocated descriptors
+ */
+static int ls2x_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct ls2x_dma_chan *lchan = to_ldma_chan(chan);
+
+	/* Create a pool of consistent memory blocks for hardware descriptors */
+	lchan->pool = dma_pool_create(dev_name(chan2dev(chan)),
+				      chan->device->dev, PAGE_SIZE,
+				      __alignof__(struct ls2x_dma_hw_desc), 0);
+	if (!lchan->pool) {
+		dev_err(chan2dev(chan), "No memory for descriptors\n");
+		return -ENOMEM;
+	}
+
+	return 1;
+}
+
+/*
+ * ls2x_dma_free_chan_resources - free all channel resources
+ * @chan: DMA channel
+ */
+static void ls2x_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct ls2x_dma_chan *lchan = to_ldma_chan(chan);
+
+	vchan_free_chan_resources(to_virt_chan(chan));
+	dma_pool_destroy(lchan->pool);
+	lchan->pool = NULL;
+}
+
+/*
+ * ls2x_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
+ * @chan: DMA channel
+ * @sgl: scatterlist to transfer to/from
+ * @sg_len: number of entries in @scatterlist
+ * @direction: DMA direction
+ * @flags: tx descriptor status flags
+ * @context: transaction context (ignored)
+ *
+ * Return: Async transaction descriptor on success and NULL on failure
+ */
+static struct dma_async_tx_descriptor *
+ls2x_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		       u32 sg_len, enum dma_transfer_direction direction,
+		       unsigned long flags, void *context)
+{
+	struct ls2x_dma_chan *lchan = to_ldma_chan(chan);
+	struct ls2x_dma_desc *desc;
+	struct scatterlist *sg;
+	size_t burst_size;
+	int i;
+
+	if (unlikely(!sg_len || !is_slave_direction(direction)))
+		return NULL;
+
+	burst_size = ls2x_dmac_detect_burst(lchan);
+	if (!burst_size)
+		return NULL;
+
+	desc = kzalloc(struct_size(desc, sg, sg_len), GFP_NOWAIT);
+	if (!desc)
+		return NULL;
+
+	desc->desc_num = sg_len;
+	desc->direction = direction;
+	desc->burst_size = burst_size;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		struct ls2x_dma_sg *ldma_sg = &desc->sg[i];
+
+		/* Allocate DMA capable memory for hardware descriptor */
+		ldma_sg->hw = dma_pool_alloc(lchan->pool, GFP_NOWAIT, &ldma_sg->llp);
+		if (!ldma_sg->hw) {
+			desc->desc_num = i;
+			ls2x_dma_desc_free(&desc->vdesc);
+			return NULL;
+		}
+
+		ldma_sg->phys = sg_dma_address(sg);
+		ldma_sg->len = sg_dma_len(sg);
+
+		ls2x_dma_fill_desc(lchan, i, desc);
+	}
+
+	/* Setting the last descriptor enable bit */
+	desc->sg[sg_len - 1].hw->ndesc_addr &= ~LDMA_DESC_EN;
+	desc->status = DMA_IN_PROGRESS;
+
+	return vchan_tx_prep(&lchan->vchan, &desc->vdesc, flags);
+}
+
+/*
+ * ls2x_dma_prep_dma_cyclic - prepare the cyclic DMA transfer
+ * @chan: the DMA channel to prepare
+ * @buf_addr: physical DMA address where the buffer starts
+ * @buf_len: total number of bytes for the entire buffer
+ * @period_len: number of bytes for each period
+ * @direction: transfer direction, to or from device
+ * @flags: tx descriptor status flags
+ *
+ * Return: Async transaction descriptor on success and NULL on failure
+ */
+static struct dma_async_tx_descriptor *
+ls2x_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+			 size_t period_len, enum dma_transfer_direction direction,
+			 unsigned long flags)
+{
+	struct ls2x_dma_chan *lchan = to_ldma_chan(chan);
+	struct ls2x_dma_desc *desc;
+	size_t burst_size;
+	u32 num_periods;
+	int i;
+
+	if (unlikely(!buf_len || !period_len))
+		return NULL;
+
+	if (unlikely(!is_slave_direction(direction)))
+		return NULL;
+
+	burst_size = ls2x_dmac_detect_burst(lchan);
+	if (!burst_size)
+		return NULL;
+
+	num_periods = buf_len / period_len;
+	desc = kzalloc(struct_size(desc, sg, num_periods), GFP_NOWAIT);
+	if (!desc)
+		return NULL;
+
+	desc->desc_num = num_periods;
+	desc->direction = direction;
+	desc->burst_size = burst_size;
+
+	/* Build cyclic linked list */
+	for (i = 0; i < num_periods; i++) {
+		struct ls2x_dma_sg *ldma_sg = &desc->sg[i];
+
+		/* Allocate DMA capable memory for hardware descriptor */
+		ldma_sg->hw = dma_pool_alloc(lchan->pool, GFP_NOWAIT, &ldma_sg->llp);
+		if (!ldma_sg->hw) {
+			desc->desc_num = i;
+			ls2x_dma_desc_free(&desc->vdesc);
+			return NULL;
+		}
+
+		ldma_sg->phys = buf_addr + period_len * i;
+		ldma_sg->len = period_len;
+
+		ls2x_dma_fill_desc(lchan, i, desc);
+	}
+
+	/* Lets make a cyclic list */
+	desc->sg[num_periods - 1].hw->ndesc_addr = desc->sg[0].llp | LDMA_DESC_EN;
+	desc->sg[num_periods - 1].hw->high_ndesc_addr = upper_32_bits(desc->sg[0].llp);
+	desc->cyclic = true;
+	desc->status = DMA_IN_PROGRESS;
+
+	return vchan_tx_prep(&lchan->vchan, &desc->vdesc, flags);
+}
+
+/*
+ * ls2x_slave_config - set slave configuration for channel
+ * @chan: dma channel
+ * @cfg: slave configuration
+ *
+ * Sets slave configuration for channel
+ */
+static int ls2x_dma_slave_config(struct dma_chan *chan,
+				 struct dma_slave_config *config)
+{
+	struct ls2x_dma_chan *lchan = to_ldma_chan(chan);
+
+	memcpy(&lchan->sconfig, config, sizeof(*config));
+	return 0;
+}
+
+/*
+ * ls2x_dma_issue_pending - push pending transactions to the hardware
+ * @chan: channel
+ *
+ * When this function is called, all pending transactions are pushed to the
+ * hardware and executed.
+ */
+static void ls2x_dma_issue_pending(struct dma_chan *chan)
+{
+	struct ls2x_dma_chan *lchan = to_ldma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&lchan->vchan.lock, flags);
+	if (vchan_issue_pending(&lchan->vchan) && !lchan->desc)
+		ls2x_dma_start_transfer(lchan);
+	spin_unlock_irqrestore(&lchan->vchan.lock, flags);
+}
+
+/*
+ * ls2x_dma_terminate_all - terminate all transactions
+ * @chan: channel
+ *
+ * Stops all DMA transactions.
+ */
+static int ls2x_dma_terminate_all(struct dma_chan *chan)
+{
+	struct ls2x_dma_chan *lchan = to_ldma_chan(chan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&lchan->vchan.lock, flags);
+	/* Setting stop cmd */
+	ls2x_dma_write_cmd(lchan, LDMA_STOP);
+	if (lchan->desc) {
+		vchan_terminate_vdesc(&lchan->desc->vdesc);
+		lchan->desc = NULL;
+	}
+
+	vchan_get_all_descriptors(&lchan->vchan, &head);
+	spin_unlock_irqrestore(&lchan->vchan.lock, flags);
+
+	vchan_dma_desc_free_list(&lchan->vchan, &head);
+	return 0;
+}
+
+/*
+ * ls2x_dma_synchronize - Synchronizes the termination of transfers to the
+ * current context.
+ * @chan: channel
+ */
+static void ls2x_dma_synchronize(struct dma_chan *chan)
+{
+	struct ls2x_dma_chan *lchan = to_ldma_chan(chan);
+
+	vchan_synchronize(&lchan->vchan);
+}
+
+static int ls2x_dma_pause(struct dma_chan *chan)
+{
+	struct ls2x_dma_chan *lchan = to_ldma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&lchan->vchan.lock, flags);
+	if (lchan->desc && lchan->desc->status == DMA_IN_PROGRESS) {
+		ls2x_dma_write_cmd(lchan, LDMA_STOP);
+		lchan->desc->status = DMA_PAUSED;
+	}
+	spin_unlock_irqrestore(&lchan->vchan.lock, flags);
+
+	return 0;
+}
+
+static int ls2x_dma_resume(struct dma_chan *chan)
+{
+	struct ls2x_dma_chan *lchan = to_ldma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&lchan->vchan.lock, flags);
+	if (lchan->desc && lchan->desc->status == DMA_PAUSED) {
+		lchan->desc->status = DMA_IN_PROGRESS;
+		ls2x_dma_write_cmd(lchan, LDMA_START);
+	}
+	spin_unlock_irqrestore(&lchan->vchan.lock, flags);
+
+	return 0;
+}
+
+/*
+ * ls2x_dma_isr - LS2X DMA Interrupt handler
+ * @irq: IRQ number
+ * @dev_id: Pointer to ls2x_dma_chan
+ *
+ * Return: IRQ_HANDLED/IRQ_NONE
+ */
+static irqreturn_t ls2x_dma_isr(int irq, void *dev_id)
+{
+	struct ls2x_dma_chan *lchan = dev_id;
+	struct ls2x_dma_desc *desc;
+
+	spin_lock(&lchan->vchan.lock);
+	desc = lchan->desc;
+	if (desc) {
+		if (desc->cyclic) {
+			vchan_cyclic_callback(&desc->vdesc);
+		} else {
+			desc->status = DMA_COMPLETE;
+			vchan_cookie_complete(&desc->vdesc);
+			ls2x_dma_start_transfer(lchan);
+		}
+
+		/* ls2x_dma_start_transfer() updates lchan->desc */
+		if (!lchan->desc)
+			ls2x_dma_write_cmd(lchan, LDMA_STOP);
+	}
+	spin_unlock(&lchan->vchan.lock);
+
+	return IRQ_HANDLED;
+}
+
+static int ls2x_dma_chan_init(struct platform_device *pdev,
+			      struct ls2x_dma_priv *priv)
+{
+	struct ls2x_dma_chan *lchan = &priv->lchan;
+	struct device *dev = &pdev->dev;
+	int ret;
+
+	lchan->irq = platform_get_irq(pdev, 0);
+	if (lchan->irq < 0)
+		return lchan->irq;
+
+	ret = devm_request_irq(dev, lchan->irq, ls2x_dma_isr, IRQF_TRIGGER_RISING,
+			       dev_name(&pdev->dev), lchan);
+	if (ret)
+		return ret;
+
+	/* Initialize channels related values */
+	INIT_LIST_HEAD(&priv->ddev.channels);
+	lchan->vchan.desc_free = ls2x_dma_desc_free;
+	vchan_init(&lchan->vchan, &priv->ddev);
+
+	return 0;
+}
+
+/*
+ * ls2x_dma_probe - Driver probe function
+ * @pdev: Pointer to the platform_device structure
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int ls2x_dma_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct ls2x_dma_priv *priv;
+	struct dma_device *ddev;
+	int ret;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->regs = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(priv->regs))
+		return dev_err_probe(dev, PTR_ERR(priv->regs),
+				     "devm_platform_ioremap_resource failed.\n");
+
+	priv->dma_clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(priv->dma_clk))
+		return dev_err_probe(dev, PTR_ERR(priv->dma_clk), "devm_clk_get failed.\n");
+
+	ret = clk_prepare_enable(priv->dma_clk);
+	if (ret)
+		return dev_err_probe(dev, ret, "clk_prepare_enable failed.\n");
+
+	ret = ls2x_dma_chan_init(pdev, priv);
+	if (ret)
+		goto disable_clk;
+
+	ddev = &priv->ddev;
+	ddev->dev = dev;
+	dma_cap_zero(ddev->cap_mask);
+	dma_cap_set(DMA_SLAVE, ddev->cap_mask);
+	dma_cap_set(DMA_CYCLIC, ddev->cap_mask);
+
+	ddev->device_alloc_chan_resources = ls2x_dma_alloc_chan_resources;
+	ddev->device_free_chan_resources = ls2x_dma_free_chan_resources;
+	ddev->device_tx_status = dma_cookie_status;
+	ddev->device_issue_pending = ls2x_dma_issue_pending;
+	ddev->device_prep_slave_sg = ls2x_dma_prep_slave_sg;
+	ddev->device_prep_dma_cyclic = ls2x_dma_prep_dma_cyclic;
+	ddev->device_config = ls2x_dma_slave_config;
+	ddev->device_terminate_all = ls2x_dma_terminate_all;
+	ddev->device_synchronize = ls2x_dma_synchronize;
+	ddev->device_pause = ls2x_dma_pause;
+	ddev->device_resume = ls2x_dma_resume;
+
+	ddev->src_addr_widths = LDMA_SLAVE_BUSWIDTHS;
+	ddev->dst_addr_widths = LDMA_SLAVE_BUSWIDTHS;
+	ddev->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+
+	ret = dma_async_device_register(&priv->ddev);
+	if (ret < 0)
+		goto disable_clk;
+
+	ret = of_dma_controller_register(dev->of_node, of_dma_xlate_by_chan_id, priv);
+	if (ret < 0)
+		goto unregister_dmac;
+
+	platform_set_drvdata(pdev, priv);
+
+	dev_info(dev, "Loongson LS2X APB DMA driver registered successfully.\n");
+	return 0;
+
+unregister_dmac:
+	dma_async_device_unregister(&priv->ddev);
+disable_clk:
+	clk_disable_unprepare(priv->dma_clk);
+
+	return ret;
+}
+
+/*
+ * ls2x_dma_remove - Driver remove function
+ * @pdev: Pointer to the platform_device structure
+ */
+static void ls2x_dma_remove(struct platform_device *pdev)
+{
+	struct ls2x_dma_priv *priv = platform_get_drvdata(pdev);
+
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&priv->ddev);
+	clk_disable_unprepare(priv->dma_clk);
+}
+
+static const struct of_device_id ls2x_dma_of_match_table[] = {
+	{ .compatible = "loongson,ls2k1000-apbdma" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, ls2x_dma_of_match_table);
+
+static struct platform_driver ls2x_dmac_driver = {
+	.probe		= ls2x_dma_probe,
+	.remove_new	= ls2x_dma_remove,
+	.driver = {
+		.name	= "ls2x-apbdma",
+		.of_match_table	= ls2x_dma_of_match_table,
+	},
+};
+module_platform_driver(ls2x_dmac_driver);
+
+MODULE_DESCRIPTION("Loongson LS2X APB DMA Controller driver");
+MODULE_AUTHOR("Loongson Technology Corporation Limited");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/milbeaut-hdmac.c b/drivers/dma/milbeaut-hdmac.c
index 1b0a95892627..7b41c670970a 100644
--- a/drivers/dma/milbeaut-hdmac.c
+++ b/drivers/dma/milbeaut-hdmac.c
@@ -531,7 +531,7 @@ disable_clk:
 	return ret;
 }
 
-static int milbeaut_hdmac_remove(struct platform_device *pdev)
+static void milbeaut_hdmac_remove(struct platform_device *pdev)
 {
 	struct milbeaut_hdmac_device *mdev = platform_get_drvdata(pdev);
 	struct dma_chan *chan;
@@ -546,16 +546,21 @@ static int milbeaut_hdmac_remove(struct platform_device *pdev)
 	 */
 	list_for_each_entry(chan, &mdev->ddev.channels, device_node) {
 		ret = dmaengine_terminate_sync(chan);
-		if (ret)
-			return ret;
+		if (ret) {
+			/*
+			 * This results in resource leakage and maybe also
+			 * use-after-free errors as e.g. *mdev is kfreed.
+			 */
+			dev_alert(&pdev->dev, "Failed to terminate channel %d (%pe)\n",
+				  chan->chan_id, ERR_PTR(ret));
+			return;
+		}
 		milbeaut_hdmac_free_chan_resources(chan);
 	}
 
 	of_dma_controller_free(pdev->dev.of_node);
 	dma_async_device_unregister(&mdev->ddev);
 	clk_disable_unprepare(mdev->clk);
-
-	return 0;
 }
 
 static const struct of_device_id milbeaut_hdmac_match[] = {
@@ -566,7 +571,7 @@ MODULE_DEVICE_TABLE(of, milbeaut_hdmac_match);
 
 static struct platform_driver milbeaut_hdmac_driver = {
 	.probe = milbeaut_hdmac_probe,
-	.remove = milbeaut_hdmac_remove,
+	.remove_new = milbeaut_hdmac_remove,
 	.driver = {
 		.name = "milbeaut-m10v-hdmac",
 		.of_match_table = milbeaut_hdmac_match,
diff --git a/drivers/dma/milbeaut-xdmac.c b/drivers/dma/milbeaut-xdmac.c
index d29d01e730aa..2cce529b448e 100644
--- a/drivers/dma/milbeaut-xdmac.c
+++ b/drivers/dma/milbeaut-xdmac.c
@@ -368,7 +368,7 @@ disable_xdmac:
 	return ret;
 }
 
-static int milbeaut_xdmac_remove(struct platform_device *pdev)
+static void milbeaut_xdmac_remove(struct platform_device *pdev)
 {
 	struct milbeaut_xdmac_device *mdev = platform_get_drvdata(pdev);
 	struct dma_chan *chan;
@@ -383,8 +383,15 @@ static int milbeaut_xdmac_remove(struct platform_device *pdev)
 	 */
 	list_for_each_entry(chan, &mdev->ddev.channels, device_node) {
 		ret = dmaengine_terminate_sync(chan);
-		if (ret)
-			return ret;
+		if (ret) {
+			/*
+			 * This results in resource leakage and maybe also
+			 * use-after-free errors as e.g. *mdev is kfreed.
+			 */
+			dev_alert(&pdev->dev, "Failed to terminate channel %d (%pe)\n",
+				  chan->chan_id, ERR_PTR(ret));
+			return;
+		}
 		milbeaut_xdmac_free_chan_resources(chan);
 	}
 
@@ -392,8 +399,6 @@ static int milbeaut_xdmac_remove(struct platform_device *pdev)
 	dma_async_device_unregister(&mdev->ddev);
 
 	disable_xdmac(mdev);
-
-	return 0;
 }
 
 static const struct of_device_id milbeaut_xdmac_match[] = {
@@ -404,7 +409,7 @@ MODULE_DEVICE_TABLE(of, milbeaut_xdmac_match);
 
 static struct platform_driver milbeaut_xdmac_driver = {
 	.probe = milbeaut_xdmac_probe,
-	.remove = milbeaut_xdmac_remove,
+	.remove_new = milbeaut_xdmac_remove,
 	.driver = {
 		.name = "milbeaut-m10v-xdmac",
 		.of_match_table = milbeaut_xdmac_match,
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 3cf0b38387ae..c29744bfdf2c 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -1053,6 +1053,9 @@ static bool _trigger(struct pl330_thread *thrd)
 
 	thrd->req_running = idx;
 
+	if (desc->rqtype == DMA_MEM_TO_DEV || desc->rqtype == DMA_DEV_TO_MEM)
+		UNTIL(thrd, PL330_STATE_WFP);
+
 	return true;
 }
 
diff --git a/drivers/dma/sf-pdma/sf-pdma.c b/drivers/dma/sf-pdma/sf-pdma.c
index 3125a2f162b4..428473611115 100644
--- a/drivers/dma/sf-pdma/sf-pdma.c
+++ b/drivers/dma/sf-pdma/sf-pdma.c
@@ -20,10 +20,13 @@
 #include <linux/mod_devicetable.h>
 #include <linux/dma-mapping.h>
 #include <linux/of.h>
+#include <linux/of_dma.h>
 #include <linux/slab.h>
 
 #include "sf-pdma.h"
 
+#define PDMA_QUIRK_NO_STRICT_ORDERING   BIT(0)
+
 #ifndef readq
 static inline unsigned long long readq(void __iomem *addr)
 {
@@ -65,7 +68,7 @@ static struct sf_pdma_desc *sf_pdma_alloc_desc(struct sf_pdma_chan *chan)
 static void sf_pdma_fill_desc(struct sf_pdma_desc *desc,
 			      u64 dst, u64 src, u64 size)
 {
-	desc->xfer_type = PDMA_FULL_SPEED;
+	desc->xfer_type =  desc->chan->pdma->transfer_type;
 	desc->xfer_size = size;
 	desc->dst_addr = dst;
 	desc->src_addr = src;
@@ -492,6 +495,7 @@ static void sf_pdma_setup_chans(struct sf_pdma *pdma)
 
 static int sf_pdma_probe(struct platform_device *pdev)
 {
+	const struct sf_pdma_driver_platdata *ddata;
 	struct sf_pdma *pdma;
 	int ret, n_chans;
 	const enum dma_slave_buswidth widths =
@@ -517,6 +521,14 @@ static int sf_pdma_probe(struct platform_device *pdev)
 
 	pdma->n_chans = n_chans;
 
+	pdma->transfer_type = PDMA_FULL_SPEED | PDMA_STRICT_ORDERING;
+
+	ddata  = device_get_match_data(&pdev->dev);
+	if (ddata) {
+		if (ddata->quirks & PDMA_QUIRK_NO_STRICT_ORDERING)
+			pdma->transfer_type &= ~PDMA_STRICT_ORDERING;
+	}
+
 	pdma->membase = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(pdma->membase))
 		return PTR_ERR(pdma->membase);
@@ -563,7 +575,20 @@ static int sf_pdma_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	ret = of_dma_controller_register(pdev->dev.of_node,
+					 of_dma_xlate_by_chan_id, pdma);
+	if (ret < 0) {
+		dev_err(&pdev->dev,
+			"Can't register SiFive Platform OF_DMA. (%d)\n", ret);
+		goto err_unregister;
+	}
+
 	return 0;
+
+err_unregister:
+	dma_async_device_unregister(&pdma->dma_dev);
+
+	return ret;
 }
 
 static void sf_pdma_remove(struct platform_device *pdev)
@@ -583,12 +608,25 @@ static void sf_pdma_remove(struct platform_device *pdev)
 		tasklet_kill(&ch->err_tasklet);
 	}
 
+	if (pdev->dev.of_node)
+		of_dma_controller_free(pdev->dev.of_node);
+
 	dma_async_device_unregister(&pdma->dma_dev);
 }
 
+static const struct sf_pdma_driver_platdata mpfs_pdma = {
+	.quirks = PDMA_QUIRK_NO_STRICT_ORDERING,
+};
+
 static const struct of_device_id sf_pdma_dt_ids[] = {
-	{ .compatible = "sifive,fu540-c000-pdma" },
-	{ .compatible = "sifive,pdma0" },
+	{
+		.compatible = "sifive,fu540-c000-pdma",
+	}, {
+		.compatible = "sifive,pdma0",
+	}, {
+		.compatible = "microchip,mpfs-pdma",
+		.data	    = &mpfs_pdma,
+	},
 	{},
 };
 MODULE_DEVICE_TABLE(of, sf_pdma_dt_ids);
diff --git a/drivers/dma/sf-pdma/sf-pdma.h b/drivers/dma/sf-pdma/sf-pdma.h
index d05772b5d8d3..215e07183d7e 100644
--- a/drivers/dma/sf-pdma/sf-pdma.h
+++ b/drivers/dma/sf-pdma/sf-pdma.h
@@ -48,7 +48,8 @@
 #define PDMA_ERR_STATUS_MASK				GENMASK(31, 31)
 
 /* Transfer Type */
-#define PDMA_FULL_SPEED					0xFF000008
+#define PDMA_FULL_SPEED					0xFF000000
+#define PDMA_STRICT_ORDERING				BIT(3)
 
 /* Error Recovery */
 #define MAX_RETRY					1
@@ -112,8 +113,13 @@ struct sf_pdma {
 	struct dma_device       dma_dev;
 	void __iomem            *membase;
 	void __iomem            *mappedbase;
+	u32			transfer_type;
 	u32			n_chans;
 	struct sf_pdma_chan	chans[] __counted_by(n_chans);
 };
 
+struct sf_pdma_driver_platdata {
+	u32 quirks;
+};
+
 #endif /* _SF_PDMA_H */
diff --git a/drivers/dma/sh/rz-dmac.c b/drivers/dma/sh/rz-dmac.c
index fea5bda34bc2..1f1e86ba5c66 100644
--- a/drivers/dma/sh/rz-dmac.c
+++ b/drivers/dma/sh/rz-dmac.c
@@ -755,11 +755,11 @@ static struct dma_chan *rz_dmac_of_xlate(struct of_phandle_args *dma_spec,
 
 static int rz_dmac_chan_probe(struct rz_dmac *dmac,
 			      struct rz_dmac_chan *channel,
-			      unsigned int index)
+			      u8 index)
 {
 	struct platform_device *pdev = to_platform_device(dmac->dev);
 	struct rz_lmdesc *lmdesc;
-	char pdev_irqname[5];
+	char pdev_irqname[6];
 	char *irqname;
 	int ret;
 
@@ -767,7 +767,7 @@ static int rz_dmac_chan_probe(struct rz_dmac *dmac,
 	channel->mid_rid = -EINVAL;
 
 	/* Request the channel interrupt. */
-	sprintf(pdev_irqname, "ch%u", index);
+	scnprintf(pdev_irqname, sizeof(pdev_irqname), "ch%u", index);
 	channel->irq = platform_get_irq_byname(pdev, pdev_irqname);
 	if (channel->irq < 0)
 		return channel->irq;
@@ -845,9 +845,9 @@ static int rz_dmac_probe(struct platform_device *pdev)
 	struct dma_device *engine;
 	struct rz_dmac *dmac;
 	int channel_num;
-	unsigned int i;
 	int ret;
 	int irq;
+	u8 i;
 
 	dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
 	if (!dmac)
diff --git a/drivers/dma/sh/shdma.h b/drivers/dma/sh/shdma.h
index 9c121a4b33ad..f97d80343aea 100644
--- a/drivers/dma/sh/shdma.h
+++ b/drivers/dma/sh/shdma.h
@@ -25,7 +25,7 @@ struct sh_dmae_chan {
 	const struct sh_dmae_slave_config *config; /* Slave DMA configuration */
 	int xmit_shift;			/* log_2(bytes_per_xfer) */
 	void __iomem *base;
-	char dev_id[16];		/* unique name per DMAC of channel */
+	char dev_id[32];		/* unique name per DMAC of channel */
 	int pm_error;
 	dma_addr_t slave_addr;
 };
diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c
index a9b4302f6050..f7cd0cad056c 100644
--- a/drivers/dma/sh/usb-dmac.c
+++ b/drivers/dma/sh/usb-dmac.c
@@ -706,10 +706,10 @@ static const struct dev_pm_ops usb_dmac_pm = {
 
 static int usb_dmac_chan_probe(struct usb_dmac *dmac,
 			       struct usb_dmac_chan *uchan,
-			       unsigned int index)
+			       u8 index)
 {
 	struct platform_device *pdev = to_platform_device(dmac->dev);
-	char pdev_irqname[5];
+	char pdev_irqname[6];
 	char *irqname;
 	int ret;
 
@@ -717,7 +717,7 @@ static int usb_dmac_chan_probe(struct usb_dmac *dmac,
 	uchan->iomem = dmac->iomem + USB_DMAC_CHAN_OFFSET(index);
 
 	/* Request the channel interrupt. */
-	sprintf(pdev_irqname, "ch%u", index);
+	scnprintf(pdev_irqname, sizeof(pdev_irqname), "ch%u", index);
 	uchan->irq = platform_get_irq_byname(pdev, pdev_irqname);
 	if (uchan->irq < 0)
 		return -ENODEV;
@@ -768,8 +768,8 @@ static int usb_dmac_probe(struct platform_device *pdev)
 	const enum dma_slave_buswidth widths = USB_DMAC_SLAVE_BUSWIDTH;
 	struct dma_device *engine;
 	struct usb_dmac *dmac;
-	unsigned int i;
 	int ret;
+	u8 i;
 
 	dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
 	if (!dmac)
@@ -869,7 +869,7 @@ static void usb_dmac_chan_remove(struct usb_dmac *dmac,
 static void usb_dmac_remove(struct platform_device *pdev)
 {
 	struct usb_dmac *dmac = platform_get_drvdata(pdev);
-	int i;
+	u8 i;
 
 	for (i = 0; i < dmac->n_channels; ++i)
 		usb_dmac_chan_remove(dmac, &dmac->channels[i]);
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 002833fb1fa0..2c489299148e 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -31,13 +31,11 @@
 /**
  * struct stedma40_platform_data - Configuration struct for the dma device.
  *
- * @dev_tx: mapping between destination event line and io address
- * @dev_rx: mapping between source event line and io address
  * @disabled_channels: A vector, ending with -1, that marks physical channels
  * that are for different reasons not available for the driver.
  * @soft_lli_chans: A vector, that marks physical channels will use LLI by SW
  * which avoids HW bug that exists in some versions of the controller.
- * SoftLLI introduces relink overhead that could impact performace for
+ * SoftLLI introduces relink overhead that could impact performance for
  * certain use cases.
  * @num_of_soft_lli_chans: The number of channels that needs to be configured
  * to use SoftLLI.
@@ -184,7 +182,7 @@ static __maybe_unused u32 d40_backup_regs[] = {
 
 /*
  * since 9540 and 8540 has the same HW revision
- * use v4a for 9540 or ealier
+ * use v4a for 9540 or earlier
  * use v4b for 8540 or later
  * HW revision:
  * DB8500ed has revision 0
@@ -411,7 +409,7 @@ struct d40_desc {
  *
  * @base: The virtual address of LCLA. 18 bit aligned.
  * @dma_addr: DMA address, if mapped
- * @base_unaligned: The orignal kmalloc pointer, if kmalloc is used.
+ * @base_unaligned: The original kmalloc pointer, if kmalloc is used.
  * This pointer is only there for clean-up on error.
  * @pages: The number of pages needed for all physical channels.
  * Only used later for clean-up on error
@@ -1655,7 +1653,7 @@ static void dma_tasklet(struct tasklet_struct *t)
 
 	return;
  check_pending_tx:
-	/* Rescue manouver if receiving double interrupts */
+	/* Rescue maneuver if receiving double interrupts */
 	if (d40c->pending_tx > 0)
 		d40c->pending_tx--;
 	spin_unlock_irqrestore(&d40c->lock, flags);
@@ -3412,7 +3410,7 @@ static int __init d40_lcla_allocate(struct d40_base *base)
 		base->lcla_pool.base = (void *)page_list[i];
 	} else {
 		/*
-		 * After many attempts and no succees with finding the correct
+		 * After many attempts and no success with finding the correct
 		 * alignment, try with allocating a big buffer.
 		 */
 		dev_warn(base->dev,
diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c
index 7a0586633bf3..24ad7077c53b 100644
--- a/drivers/dma/tegra210-adma.c
+++ b/drivers/dma/tegra210-adma.c
@@ -153,6 +153,7 @@ struct tegra_adma {
 	void __iomem			*base_addr;
 	struct clk			*ahub_clk;
 	unsigned int			nr_channels;
+	unsigned long			*dma_chan_mask;
 	unsigned long			rx_requests_reserved;
 	unsigned long			tx_requests_reserved;
 
@@ -741,6 +742,10 @@ static int __maybe_unused tegra_adma_runtime_suspend(struct device *dev)
 
 	for (i = 0; i < tdma->nr_channels; i++) {
 		tdc = &tdma->channels[i];
+		/* skip for reserved channels */
+		if (!tdc->tdma)
+			continue;
+
 		ch_reg = &tdc->ch_regs;
 		ch_reg->cmd = tdma_ch_read(tdc, ADMA_CH_CMD);
 		/* skip if channel is not active */
@@ -779,6 +784,9 @@ static int __maybe_unused tegra_adma_runtime_resume(struct device *dev)
 
 	for (i = 0; i < tdma->nr_channels; i++) {
 		tdc = &tdma->channels[i];
+		/* skip for reserved channels */
+		if (!tdc->tdma)
+			continue;
 		ch_reg = &tdc->ch_regs;
 		/* skip if channel was not active earlier */
 		if (!ch_reg->cmd)
@@ -867,10 +875,31 @@ static int tegra_adma_probe(struct platform_device *pdev)
 		return PTR_ERR(tdma->ahub_clk);
 	}
 
+	tdma->dma_chan_mask = devm_kzalloc(&pdev->dev,
+					   BITS_TO_LONGS(tdma->nr_channels) * sizeof(unsigned long),
+					   GFP_KERNEL);
+	if (!tdma->dma_chan_mask)
+		return -ENOMEM;
+
+	/* Enable all channels by default */
+	bitmap_fill(tdma->dma_chan_mask, tdma->nr_channels);
+
+	ret = of_property_read_u32_array(pdev->dev.of_node, "dma-channel-mask",
+					 (u32 *)tdma->dma_chan_mask,
+					 BITS_TO_U32(tdma->nr_channels));
+	if (ret < 0 && (ret != -EINVAL)) {
+		dev_err(&pdev->dev, "dma-channel-mask is not complete.\n");
+		return ret;
+	}
+
 	INIT_LIST_HEAD(&tdma->dma_dev.channels);
 	for (i = 0; i < tdma->nr_channels; i++) {
 		struct tegra_adma_chan *tdc = &tdma->channels[i];
 
+		/* skip for reserved channels */
+		if (!test_bit(i, tdma->dma_chan_mask))
+			continue;
+
 		tdc->chan_addr = tdma->base_addr + cdata->ch_base_offset
 				 + (cdata->ch_reg_size * i);
 
@@ -957,8 +986,10 @@ static void tegra_adma_remove(struct platform_device *pdev)
 	of_dma_controller_free(pdev->dev.of_node);
 	dma_async_device_unregister(&tdma->dma_dev);
 
-	for (i = 0; i < tdma->nr_channels; ++i)
-		irq_dispose_mapping(tdma->channels[i].irq);
+	for (i = 0; i < tdma->nr_channels; ++i) {
+		if (tdma->channels[i].irq)
+			irq_dispose_mapping(tdma->channels[i].irq);
+	}
 
 	pm_runtime_disable(&pdev->dev);
 }
diff --git a/drivers/dma/ti/Makefile b/drivers/dma/ti/Makefile
index acc950bf609c..d376c117cecf 100644
--- a/drivers/dma/ti/Makefile
+++ b/drivers/dma/ti/Makefile
@@ -12,6 +12,7 @@ k3-psil-lib-objs := k3-psil.o \
 		    k3-psil-j721s2.o \
 		    k3-psil-am62.o \
 		    k3-psil-am62a.o \
-		    k3-psil-j784s4.o
+		    k3-psil-j784s4.o \
+		    k3-psil-am62p.o
 obj-$(CONFIG_TI_K3_PSIL) += k3-psil-lib.o
 obj-$(CONFIG_TI_DMA_CROSSBAR) += dma-crossbar.o
diff --git a/drivers/dma/ti/k3-psil-am62p.c b/drivers/dma/ti/k3-psil-am62p.c
new file mode 100644
index 000000000000..0f338e16d971
--- /dev/null
+++ b/drivers/dma/ti/k3-psil-am62p.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com
+ */
+
+#include <linux/kernel.h>
+
+#include "k3-psil-priv.h"
+
+#define PSIL_PDMA_XY_TR(x)					\
+	{							\
+		.thread_id = x,					\
+		.ep_config = {					\
+			.ep_type = PSIL_EP_PDMA_XY,		\
+			.mapped_channel_id = -1,		\
+			.default_flow_id = -1,			\
+		},						\
+	}
+
+#define PSIL_PDMA_XY_PKT(x)					\
+	{							\
+		.thread_id = x,					\
+		.ep_config = {					\
+			.ep_type = PSIL_EP_PDMA_XY,		\
+			.mapped_channel_id = -1,		\
+			.default_flow_id = -1,			\
+			.pkt_mode = 1,				\
+		},						\
+	}
+
+#define PSIL_ETHERNET(x, ch, flow_base, flow_cnt)		\
+	{							\
+		.thread_id = x,					\
+		.ep_config = {					\
+			.ep_type = PSIL_EP_NATIVE,		\
+			.pkt_mode = 1,				\
+			.needs_epib = 1,			\
+			.psd_size = 16,				\
+			.mapped_channel_id = ch,		\
+			.flow_start = flow_base,		\
+			.flow_num = flow_cnt,			\
+			.default_flow_id = flow_base,		\
+		},						\
+	}
+
+#define PSIL_SAUL(x, ch, flow_base, flow_cnt, default_flow, tx)	\
+	{							\
+		.thread_id = x,					\
+		.ep_config = {					\
+			.ep_type = PSIL_EP_NATIVE,		\
+			.pkt_mode = 1,				\
+			.needs_epib = 1,			\
+			.psd_size = 64,				\
+			.mapped_channel_id = ch,		\
+			.flow_start = flow_base,		\
+			.flow_num = flow_cnt,			\
+			.default_flow_id = default_flow,	\
+			.notdpkt = tx,				\
+		},						\
+	}
+
+#define PSIL_PDMA_MCASP(x)				\
+	{						\
+		.thread_id = x,				\
+		.ep_config = {				\
+			.ep_type = PSIL_EP_PDMA_XY,	\
+			.pdma_acc32 = 1,		\
+			.pdma_burst = 1,		\
+		},					\
+	}
+
+#define PSIL_CSI2RX(x)					\
+	{						\
+		.thread_id = x,				\
+		.ep_config = {				\
+			.ep_type = PSIL_EP_NATIVE,	\
+		},					\
+	}
+
+/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */
+static struct psil_ep am62p_src_ep_map[] = {
+	/* SAUL */
+	PSIL_SAUL(0x7504, 20, 35, 8, 35, 0),
+	PSIL_SAUL(0x7505, 21, 35, 8, 36, 0),
+	PSIL_SAUL(0x7506, 22, 43, 8, 43, 0),
+	PSIL_SAUL(0x7507, 23, 43, 8, 44, 0),
+	/* PDMA_MAIN0 - SPI0-2 */
+	PSIL_PDMA_XY_PKT(0x4300),
+	PSIL_PDMA_XY_PKT(0x4301),
+	PSIL_PDMA_XY_PKT(0x4302),
+	PSIL_PDMA_XY_PKT(0x4303),
+	PSIL_PDMA_XY_PKT(0x4304),
+	PSIL_PDMA_XY_PKT(0x4305),
+	PSIL_PDMA_XY_PKT(0x4306),
+	PSIL_PDMA_XY_PKT(0x4307),
+	PSIL_PDMA_XY_PKT(0x4308),
+	PSIL_PDMA_XY_PKT(0x4309),
+	PSIL_PDMA_XY_PKT(0x430a),
+	PSIL_PDMA_XY_PKT(0x430b),
+	/* PDMA_MAIN1 - UART0-6 */
+	PSIL_PDMA_XY_PKT(0x4400),
+	PSIL_PDMA_XY_PKT(0x4401),
+	PSIL_PDMA_XY_PKT(0x4402),
+	PSIL_PDMA_XY_PKT(0x4403),
+	PSIL_PDMA_XY_PKT(0x4404),
+	PSIL_PDMA_XY_PKT(0x4405),
+	PSIL_PDMA_XY_PKT(0x4406),
+	/* PDMA_MAIN2 - MCASP0-2 */
+	PSIL_PDMA_MCASP(0x4500),
+	PSIL_PDMA_MCASP(0x4501),
+	PSIL_PDMA_MCASP(0x4502),
+	/* CPSW3G */
+	PSIL_ETHERNET(0x4600, 19, 19, 16),
+	/* CSI2RX */
+	PSIL_CSI2RX(0x5000),
+	PSIL_CSI2RX(0x5001),
+	PSIL_CSI2RX(0x5002),
+	PSIL_CSI2RX(0x5003),
+	PSIL_CSI2RX(0x5004),
+	PSIL_CSI2RX(0x5005),
+	PSIL_CSI2RX(0x5006),
+	PSIL_CSI2RX(0x5007),
+	PSIL_CSI2RX(0x5008),
+	PSIL_CSI2RX(0x5009),
+	PSIL_CSI2RX(0x500a),
+	PSIL_CSI2RX(0x500b),
+	PSIL_CSI2RX(0x500c),
+	PSIL_CSI2RX(0x500d),
+	PSIL_CSI2RX(0x500e),
+	PSIL_CSI2RX(0x500f),
+	PSIL_CSI2RX(0x5010),
+	PSIL_CSI2RX(0x5011),
+	PSIL_CSI2RX(0x5012),
+	PSIL_CSI2RX(0x5013),
+	PSIL_CSI2RX(0x5014),
+	PSIL_CSI2RX(0x5015),
+	PSIL_CSI2RX(0x5016),
+	PSIL_CSI2RX(0x5017),
+	PSIL_CSI2RX(0x5018),
+	PSIL_CSI2RX(0x5019),
+	PSIL_CSI2RX(0x501a),
+	PSIL_CSI2RX(0x501b),
+	PSIL_CSI2RX(0x501c),
+	PSIL_CSI2RX(0x501d),
+	PSIL_CSI2RX(0x501e),
+	PSIL_CSI2RX(0x501f),
+	PSIL_CSI2RX(0x5000),
+	PSIL_CSI2RX(0x5001),
+	PSIL_CSI2RX(0x5002),
+	PSIL_CSI2RX(0x5003),
+	PSIL_CSI2RX(0x5004),
+	PSIL_CSI2RX(0x5005),
+	PSIL_CSI2RX(0x5006),
+	PSIL_CSI2RX(0x5007),
+	PSIL_CSI2RX(0x5008),
+	PSIL_CSI2RX(0x5009),
+	PSIL_CSI2RX(0x500a),
+	PSIL_CSI2RX(0x500b),
+	PSIL_CSI2RX(0x500c),
+	PSIL_CSI2RX(0x500d),
+	PSIL_CSI2RX(0x500e),
+	PSIL_CSI2RX(0x500f),
+	PSIL_CSI2RX(0x5010),
+	PSIL_CSI2RX(0x5011),
+	PSIL_CSI2RX(0x5012),
+	PSIL_CSI2RX(0x5013),
+	PSIL_CSI2RX(0x5014),
+	PSIL_CSI2RX(0x5015),
+	PSIL_CSI2RX(0x5016),
+	PSIL_CSI2RX(0x5017),
+	PSIL_CSI2RX(0x5018),
+	PSIL_CSI2RX(0x5019),
+	PSIL_CSI2RX(0x501a),
+	PSIL_CSI2RX(0x501b),
+	PSIL_CSI2RX(0x501c),
+	PSIL_CSI2RX(0x501d),
+	PSIL_CSI2RX(0x501e),
+	PSIL_CSI2RX(0x501f),
+	/* CSIRX 1-3 (only for J722S) */
+	PSIL_CSI2RX(0x5100),
+	PSIL_CSI2RX(0x5101),
+	PSIL_CSI2RX(0x5102),
+	PSIL_CSI2RX(0x5103),
+	PSIL_CSI2RX(0x5104),
+	PSIL_CSI2RX(0x5105),
+	PSIL_CSI2RX(0x5106),
+	PSIL_CSI2RX(0x5107),
+	PSIL_CSI2RX(0x5108),
+	PSIL_CSI2RX(0x5109),
+	PSIL_CSI2RX(0x510a),
+	PSIL_CSI2RX(0x510b),
+	PSIL_CSI2RX(0x510c),
+	PSIL_CSI2RX(0x510d),
+	PSIL_CSI2RX(0x510e),
+	PSIL_CSI2RX(0x510f),
+	PSIL_CSI2RX(0x5110),
+	PSIL_CSI2RX(0x5111),
+	PSIL_CSI2RX(0x5112),
+	PSIL_CSI2RX(0x5113),
+	PSIL_CSI2RX(0x5114),
+	PSIL_CSI2RX(0x5115),
+	PSIL_CSI2RX(0x5116),
+	PSIL_CSI2RX(0x5117),
+	PSIL_CSI2RX(0x5118),
+	PSIL_CSI2RX(0x5119),
+	PSIL_CSI2RX(0x511a),
+	PSIL_CSI2RX(0x511b),
+	PSIL_CSI2RX(0x511c),
+	PSIL_CSI2RX(0x511d),
+	PSIL_CSI2RX(0x511e),
+	PSIL_CSI2RX(0x511f),
+	PSIL_CSI2RX(0x5200),
+	PSIL_CSI2RX(0x5201),
+	PSIL_CSI2RX(0x5202),
+	PSIL_CSI2RX(0x5203),
+	PSIL_CSI2RX(0x5204),
+	PSIL_CSI2RX(0x5205),
+	PSIL_CSI2RX(0x5206),
+	PSIL_CSI2RX(0x5207),
+	PSIL_CSI2RX(0x5208),
+	PSIL_CSI2RX(0x5209),
+	PSIL_CSI2RX(0x520a),
+	PSIL_CSI2RX(0x520b),
+	PSIL_CSI2RX(0x520c),
+	PSIL_CSI2RX(0x520d),
+	PSIL_CSI2RX(0x520e),
+	PSIL_CSI2RX(0x520f),
+	PSIL_CSI2RX(0x5210),
+	PSIL_CSI2RX(0x5211),
+	PSIL_CSI2RX(0x5212),
+	PSIL_CSI2RX(0x5213),
+	PSIL_CSI2RX(0x5214),
+	PSIL_CSI2RX(0x5215),
+	PSIL_CSI2RX(0x5216),
+	PSIL_CSI2RX(0x5217),
+	PSIL_CSI2RX(0x5218),
+	PSIL_CSI2RX(0x5219),
+	PSIL_CSI2RX(0x521a),
+	PSIL_CSI2RX(0x521b),
+	PSIL_CSI2RX(0x521c),
+	PSIL_CSI2RX(0x521d),
+	PSIL_CSI2RX(0x521e),
+	PSIL_CSI2RX(0x521f),
+	PSIL_CSI2RX(0x5300),
+	PSIL_CSI2RX(0x5301),
+	PSIL_CSI2RX(0x5302),
+	PSIL_CSI2RX(0x5303),
+	PSIL_CSI2RX(0x5304),
+	PSIL_CSI2RX(0x5305),
+	PSIL_CSI2RX(0x5306),
+	PSIL_CSI2RX(0x5307),
+	PSIL_CSI2RX(0x5308),
+	PSIL_CSI2RX(0x5309),
+	PSIL_CSI2RX(0x530a),
+	PSIL_CSI2RX(0x530b),
+	PSIL_CSI2RX(0x530c),
+	PSIL_CSI2RX(0x530d),
+	PSIL_CSI2RX(0x530e),
+	PSIL_CSI2RX(0x530f),
+	PSIL_CSI2RX(0x5310),
+	PSIL_CSI2RX(0x5311),
+	PSIL_CSI2RX(0x5312),
+	PSIL_CSI2RX(0x5313),
+	PSIL_CSI2RX(0x5314),
+	PSIL_CSI2RX(0x5315),
+	PSIL_CSI2RX(0x5316),
+	PSIL_CSI2RX(0x5317),
+	PSIL_CSI2RX(0x5318),
+	PSIL_CSI2RX(0x5319),
+	PSIL_CSI2RX(0x531a),
+	PSIL_CSI2RX(0x531b),
+	PSIL_CSI2RX(0x531c),
+	PSIL_CSI2RX(0x531d),
+	PSIL_CSI2RX(0x531e),
+	PSIL_CSI2RX(0x531f),
+};
+
+/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */
+static struct psil_ep am62p_dst_ep_map[] = {
+	/* SAUL */
+	PSIL_SAUL(0xf500, 27, 83, 8, 83, 1),
+	PSIL_SAUL(0xf501, 28, 91, 8, 91, 1),
+	/* PDMA_MAIN0 - SPI0-2 */
+	PSIL_PDMA_XY_PKT(0xc300),
+	PSIL_PDMA_XY_PKT(0xc301),
+	PSIL_PDMA_XY_PKT(0xc302),
+	PSIL_PDMA_XY_PKT(0xc303),
+	PSIL_PDMA_XY_PKT(0xc304),
+	PSIL_PDMA_XY_PKT(0xc305),
+	PSIL_PDMA_XY_PKT(0xc306),
+	PSIL_PDMA_XY_PKT(0xc307),
+	PSIL_PDMA_XY_PKT(0xc308),
+	PSIL_PDMA_XY_PKT(0xc309),
+	PSIL_PDMA_XY_PKT(0xc30a),
+	PSIL_PDMA_XY_PKT(0xc30b),
+	/* PDMA_MAIN1 - UART0-6 */
+	PSIL_PDMA_XY_PKT(0xc400),
+	PSIL_PDMA_XY_PKT(0xc401),
+	PSIL_PDMA_XY_PKT(0xc402),
+	PSIL_PDMA_XY_PKT(0xc403),
+	PSIL_PDMA_XY_PKT(0xc404),
+	PSIL_PDMA_XY_PKT(0xc405),
+	PSIL_PDMA_XY_PKT(0xc406),
+	/* PDMA_MAIN2 - MCASP0-2 */
+	PSIL_PDMA_MCASP(0xc500),
+	PSIL_PDMA_MCASP(0xc501),
+	PSIL_PDMA_MCASP(0xc502),
+	/* CPSW3G */
+	PSIL_ETHERNET(0xc600, 19, 19, 8),
+	PSIL_ETHERNET(0xc601, 20, 27, 8),
+	PSIL_ETHERNET(0xc602, 21, 35, 8),
+	PSIL_ETHERNET(0xc603, 22, 43, 8),
+	PSIL_ETHERNET(0xc604, 23, 51, 8),
+	PSIL_ETHERNET(0xc605, 24, 59, 8),
+	PSIL_ETHERNET(0xc606, 25, 67, 8),
+	PSIL_ETHERNET(0xc607, 26, 75, 8),
+};
+
+struct psil_ep_map am62p_ep_map = {
+	.name = "am62p",
+	.src = am62p_src_ep_map,
+	.src_count = ARRAY_SIZE(am62p_src_ep_map),
+	.dst = am62p_dst_ep_map,
+	.dst_count = ARRAY_SIZE(am62p_dst_ep_map),
+};
diff --git a/drivers/dma/ti/k3-psil-priv.h b/drivers/dma/ti/k3-psil-priv.h
index c383723d1c8f..a577be97e344 100644
--- a/drivers/dma/ti/k3-psil-priv.h
+++ b/drivers/dma/ti/k3-psil-priv.h
@@ -45,5 +45,6 @@ extern struct psil_ep_map j721s2_ep_map;
 extern struct psil_ep_map am62_ep_map;
 extern struct psil_ep_map am62a_ep_map;
 extern struct psil_ep_map j784s4_ep_map;
+extern struct psil_ep_map am62p_ep_map;
 
 #endif /* K3_PSIL_PRIV_H_ */
diff --git a/drivers/dma/ti/k3-psil.c b/drivers/dma/ti/k3-psil.c
index c11389d67a3f..25148d952472 100644
--- a/drivers/dma/ti/k3-psil.c
+++ b/drivers/dma/ti/k3-psil.c
@@ -26,6 +26,8 @@ static const struct soc_device_attribute k3_soc_devices[] = {
 	{ .family = "AM62X", .data = &am62_ep_map },
 	{ .family = "AM62AX", .data = &am62a_ep_map },
 	{ .family = "J784S4", .data = &j784s4_ep_map },
+	{ .family = "AM62PX", .data = &am62p_ep_map },
+	{ .family = "J722S", .data = &am62p_ep_map },
 	{ /* sentinel */ }
 };
 
diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c
index 30fd2f386f36..2841a539c264 100644
--- a/drivers/dma/ti/k3-udma.c
+++ b/drivers/dma/ti/k3-udma.c
@@ -4441,6 +4441,8 @@ static const struct soc_device_attribute k3_soc_devices[] = {
 	{ .family = "AM62X", .data = &am64_soc_data },
 	{ .family = "AM62AX", .data = &am64_soc_data },
 	{ .family = "J784S4", .data = &j721e_soc_data },
+	{ .family = "AM62PX", .data = &am64_soc_data },
+	{ .family = "J722S", .data = &am64_soc_data },
 	{ /* sentinel */ }
 };
 
diff --git a/drivers/dma/uniphier-mdmac.c b/drivers/dma/uniphier-mdmac.c
index 618839df0748..ad7125f6e2ca 100644
--- a/drivers/dma/uniphier-mdmac.c
+++ b/drivers/dma/uniphier-mdmac.c
@@ -453,7 +453,7 @@ disable_clk:
 	return ret;
 }
 
-static int uniphier_mdmac_remove(struct platform_device *pdev)
+static void uniphier_mdmac_remove(struct platform_device *pdev)
 {
 	struct uniphier_mdmac_device *mdev = platform_get_drvdata(pdev);
 	struct dma_chan *chan;
@@ -468,16 +468,21 @@ static int uniphier_mdmac_remove(struct platform_device *pdev)
 	 */
 	list_for_each_entry(chan, &mdev->ddev.channels, device_node) {
 		ret = dmaengine_terminate_sync(chan);
-		if (ret)
-			return ret;
+		if (ret) {
+			/*
+			 * This results in resource leakage and maybe also
+			 * use-after-free errors as e.g. *mdev is kfreed.
+			 */
+			dev_alert(&pdev->dev, "Failed to terminate channel %d (%pe)\n",
+				  chan->chan_id, ERR_PTR(ret));
+			return;
+		}
 		uniphier_mdmac_free_chan_resources(chan);
 	}
 
 	of_dma_controller_free(pdev->dev.of_node);
 	dma_async_device_unregister(&mdev->ddev);
 	clk_disable_unprepare(mdev->clk);
-
-	return 0;
 }
 
 static const struct of_device_id uniphier_mdmac_match[] = {
@@ -488,7 +493,7 @@ MODULE_DEVICE_TABLE(of, uniphier_mdmac_match);
 
 static struct platform_driver uniphier_mdmac_driver = {
 	.probe = uniphier_mdmac_probe,
-	.remove = uniphier_mdmac_remove,
+	.remove_new = uniphier_mdmac_remove,
 	.driver = {
 		.name = "uniphier-mio-dmac",
 		.of_match_table = uniphier_mdmac_match,
diff --git a/drivers/dma/uniphier-xdmac.c b/drivers/dma/uniphier-xdmac.c
index 3a8ee2b173b5..3ce2dc2ad9de 100644
--- a/drivers/dma/uniphier-xdmac.c
+++ b/drivers/dma/uniphier-xdmac.c
@@ -563,7 +563,7 @@ out_unregister_dmac:
 	return ret;
 }
 
-static int uniphier_xdmac_remove(struct platform_device *pdev)
+static void uniphier_xdmac_remove(struct platform_device *pdev)
 {
 	struct uniphier_xdmac_device *xdev = platform_get_drvdata(pdev);
 	struct dma_device *ddev = &xdev->ddev;
@@ -579,15 +579,20 @@ static int uniphier_xdmac_remove(struct platform_device *pdev)
 	 */
 	list_for_each_entry(chan, &ddev->channels, device_node) {
 		ret = dmaengine_terminate_sync(chan);
-		if (ret)
-			return ret;
+		if (ret) {
+			/*
+			 * This results in resource leakage and maybe also
+			 * use-after-free errors as e.g. *xdev is kfreed.
+			 */
+			dev_alert(&pdev->dev, "Failed to terminate channel %d (%pe)\n",
+				  chan->chan_id, ERR_PTR(ret));
+			return;
+		}
 		uniphier_xdmac_free_chan_resources(chan);
 	}
 
 	of_dma_controller_free(pdev->dev.of_node);
 	dma_async_device_unregister(ddev);
-
-	return 0;
 }
 
 static const struct of_device_id uniphier_xdmac_match[] = {
@@ -598,7 +603,7 @@ MODULE_DEVICE_TABLE(of, uniphier_xdmac_match);
 
 static struct platform_driver uniphier_xdmac_driver = {
 	.probe = uniphier_xdmac_probe,
-	.remove = uniphier_xdmac_remove,
+	.remove_new = uniphier_xdmac_remove,
 	.driver = {
 		.name = "uniphier-xdmac",
 		.of_match_table = uniphier_xdmac_match,
diff --git a/drivers/dma/xilinx/xdma-regs.h b/drivers/dma/xilinx/xdma-regs.h
index e641a5083e14..98f5f6fb9ff9 100644
--- a/drivers/dma/xilinx/xdma-regs.h
+++ b/drivers/dma/xilinx/xdma-regs.h
@@ -64,9 +64,10 @@ struct xdma_hw_desc {
 	__le64		next_desc;
 };
 
-#define XDMA_DESC_SIZE		sizeof(struct xdma_hw_desc)
-#define XDMA_DESC_BLOCK_SIZE	(XDMA_DESC_SIZE * XDMA_DESC_ADJACENT)
-#define XDMA_DESC_BLOCK_ALIGN	4096
+#define XDMA_DESC_SIZE			sizeof(struct xdma_hw_desc)
+#define XDMA_DESC_BLOCK_SIZE		(XDMA_DESC_SIZE * XDMA_DESC_ADJACENT)
+#define XDMA_DESC_BLOCK_ALIGN		32
+#define XDMA_DESC_BLOCK_BOUNDARY	4096
 
 /*
  * Channel registers
@@ -76,6 +77,7 @@ struct xdma_hw_desc {
 #define XDMA_CHAN_CONTROL_W1S		0x8
 #define XDMA_CHAN_CONTROL_W1C		0xc
 #define XDMA_CHAN_STATUS		0x40
+#define XDMA_CHAN_STATUS_RC		0x44
 #define XDMA_CHAN_COMPLETED_DESC	0x48
 #define XDMA_CHAN_ALIGNMENTS		0x4c
 #define XDMA_CHAN_INTR_ENABLE		0x90
@@ -101,6 +103,7 @@ struct xdma_hw_desc {
 #define CHAN_CTRL_IE_MAGIC_STOPPED		BIT(4)
 #define CHAN_CTRL_IE_IDLE_STOPPED		BIT(6)
 #define CHAN_CTRL_IE_READ_ERROR			GENMASK(13, 9)
+#define CHAN_CTRL_IE_WRITE_ERROR		GENMASK(18, 14)
 #define CHAN_CTRL_IE_DESC_ERROR			GENMASK(23, 19)
 #define CHAN_CTRL_NON_INCR_ADDR			BIT(25)
 #define CHAN_CTRL_POLL_MODE_WB			BIT(26)
@@ -111,8 +114,17 @@ struct xdma_hw_desc {
 			 CHAN_CTRL_IE_DESC_ALIGN_MISMATCH |		\
 			 CHAN_CTRL_IE_MAGIC_STOPPED |			\
 			 CHAN_CTRL_IE_READ_ERROR |			\
+			 CHAN_CTRL_IE_WRITE_ERROR |			\
 			 CHAN_CTRL_IE_DESC_ERROR)
 
+#define XDMA_CHAN_STATUS_MASK CHAN_CTRL_START
+
+#define XDMA_CHAN_ERROR_MASK (CHAN_CTRL_IE_DESC_ALIGN_MISMATCH |	\
+			      CHAN_CTRL_IE_MAGIC_STOPPED |		\
+			      CHAN_CTRL_IE_READ_ERROR |			\
+			      CHAN_CTRL_IE_WRITE_ERROR |		\
+			      CHAN_CTRL_IE_DESC_ERROR)
+
 /* bits of the channel interrupt enable mask */
 #define CHAN_IM_DESC_ERROR			BIT(19)
 #define CHAN_IM_READ_ERROR			BIT(9)
@@ -134,18 +146,6 @@ struct xdma_hw_desc {
 #define XDMA_SGDMA_DESC_ADJ	0x4088
 #define XDMA_SGDMA_DESC_CREDIT	0x408c
 
-/* bits of the SG DMA control register */
-#define XDMA_CTRL_RUN_STOP			BIT(0)
-#define XDMA_CTRL_IE_DESC_STOPPED		BIT(1)
-#define XDMA_CTRL_IE_DESC_COMPLETED		BIT(2)
-#define XDMA_CTRL_IE_DESC_ALIGN_MISMATCH	BIT(3)
-#define XDMA_CTRL_IE_MAGIC_STOPPED		BIT(4)
-#define XDMA_CTRL_IE_IDLE_STOPPED		BIT(6)
-#define XDMA_CTRL_IE_READ_ERROR			GENMASK(13, 9)
-#define XDMA_CTRL_IE_DESC_ERROR			GENMASK(23, 19)
-#define XDMA_CTRL_NON_INCR_ADDR			BIT(25)
-#define XDMA_CTRL_POLL_MODE_WB			BIT(26)
-
 /*
  * interrupt registers
  */
diff --git a/drivers/dma/xilinx/xdma.c b/drivers/dma/xilinx/xdma.c
index 84a88029226f..170017ff2aad 100644
--- a/drivers/dma/xilinx/xdma.c
+++ b/drivers/dma/xilinx/xdma.c
@@ -78,27 +78,31 @@ struct xdma_chan {
  * @vdesc: Virtual DMA descriptor
  * @chan: DMA channel pointer
  * @dir: Transferring direction of the request
- * @dev_addr: Physical address on DMA device side
  * @desc_blocks: Hardware descriptor blocks
  * @dblk_num: Number of hardware descriptor blocks
  * @desc_num: Number of hardware descriptors
  * @completed_desc_num: Completed hardware descriptors
  * @cyclic: Cyclic transfer vs. scatter-gather
+ * @interleaved_dma: Interleaved DMA transfer
  * @periods: Number of periods in the cyclic transfer
  * @period_size: Size of a period in bytes in cyclic transfers
+ * @frames_left: Number of frames left in interleaved DMA transfer
+ * @error: tx error flag
  */
 struct xdma_desc {
 	struct virt_dma_desc		vdesc;
 	struct xdma_chan		*chan;
 	enum dma_transfer_direction	dir;
-	u64				dev_addr;
 	struct xdma_desc_block		*desc_blocks;
 	u32				dblk_num;
 	u32				desc_num;
 	u32				completed_desc_num;
 	bool				cyclic;
+	bool				interleaved_dma;
 	u32				periods;
 	u32				period_size;
+	u32				frames_left;
+	bool				error;
 };
 
 #define XDMA_DEV_STATUS_REG_DMA		BIT(0)
@@ -276,6 +280,7 @@ xdma_alloc_desc(struct xdma_chan *chan, u32 desc_num, bool cyclic)
 	sw_desc->chan = chan;
 	sw_desc->desc_num = desc_num;
 	sw_desc->cyclic = cyclic;
+	sw_desc->error = false;
 	dblk_num = DIV_ROUND_UP(desc_num, XDMA_DESC_ADJACENT);
 	sw_desc->desc_blocks = kcalloc(dblk_num, sizeof(*sw_desc->desc_blocks),
 				       GFP_NOWAIT);
@@ -371,6 +376,31 @@ static int xdma_xfer_start(struct xdma_chan *xchan)
 		return ret;
 
 	xchan->busy = true;
+
+	return 0;
+}
+
+/**
+ * xdma_xfer_stop - Stop DMA transfer
+ * @xchan: DMA channel pointer
+ */
+static int xdma_xfer_stop(struct xdma_chan *xchan)
+{
+	int ret;
+	u32 val;
+	struct xdma_device *xdev = xchan->xdev_hdl;
+
+	/* clear run stop bit to prevent any further auto-triggering */
+	ret = regmap_write(xdev->rmap, xchan->base + XDMA_CHAN_CONTROL_W1C,
+			   CHAN_CTRL_RUN_STOP);
+	if (ret)
+		return ret;
+
+	/* Clear the channel status register */
+	ret = regmap_read(xdev->rmap, xchan->base + XDMA_CHAN_STATUS_RC, &val);
+	if (ret)
+		return ret;
+
 	return 0;
 }
 
@@ -476,6 +506,84 @@ static void xdma_issue_pending(struct dma_chan *chan)
 }
 
 /**
+ * xdma_terminate_all - Terminate all transactions
+ * @chan: DMA channel pointer
+ */
+static int xdma_terminate_all(struct dma_chan *chan)
+{
+	struct xdma_chan *xdma_chan = to_xdma_chan(chan);
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	xdma_xfer_stop(xdma_chan);
+
+	spin_lock_irqsave(&xdma_chan->vchan.lock, flags);
+
+	xdma_chan->busy = false;
+	vd = vchan_next_desc(&xdma_chan->vchan);
+	if (vd) {
+		list_del(&vd->node);
+		dma_cookie_complete(&vd->tx);
+		vchan_terminate_vdesc(vd);
+	}
+	vchan_get_all_descriptors(&xdma_chan->vchan, &head);
+	list_splice_tail(&head, &xdma_chan->vchan.desc_terminated);
+
+	spin_unlock_irqrestore(&xdma_chan->vchan.lock, flags);
+
+	return 0;
+}
+
+/**
+ * xdma_synchronize - Synchronize terminated transactions
+ * @chan: DMA channel pointer
+ */
+static void xdma_synchronize(struct dma_chan *chan)
+{
+	struct xdma_chan *xdma_chan = to_xdma_chan(chan);
+
+	vchan_synchronize(&xdma_chan->vchan);
+}
+
+/**
+ * xdma_fill_descs - Fill hardware descriptors with contiguous memory block addresses
+ * @sw_desc: tx descriptor state container
+ * @src_addr: Value for a ->src_addr field of a first descriptor
+ * @dst_addr: Value for a ->dst_addr field of a first descriptor
+ * @size: Total size of a contiguous memory block
+ * @filled_descs_num: Number of filled hardware descriptors for corresponding sw_desc
+ */
+static inline u32 xdma_fill_descs(struct xdma_desc *sw_desc, u64 src_addr,
+				  u64 dst_addr, u32 size, u32 filled_descs_num)
+{
+	u32 left = size, len, desc_num = filled_descs_num;
+	struct xdma_desc_block *dblk;
+	struct xdma_hw_desc *desc;
+
+	dblk = sw_desc->desc_blocks + (desc_num / XDMA_DESC_ADJACENT);
+	desc = dblk->virt_addr;
+	desc += desc_num & XDMA_DESC_ADJACENT_MASK;
+	do {
+		len = min_t(u32, left, XDMA_DESC_BLEN_MAX);
+		/* set hardware descriptor */
+		desc->bytes = cpu_to_le32(len);
+		desc->src_addr = cpu_to_le64(src_addr);
+		desc->dst_addr = cpu_to_le64(dst_addr);
+		if (!(++desc_num & XDMA_DESC_ADJACENT_MASK))
+			desc = (++dblk)->virt_addr;
+		else
+			desc++;
+
+		src_addr += len;
+		dst_addr += len;
+		left -= len;
+	} while (left);
+
+	return desc_num - filled_descs_num;
+}
+
+/**
  * xdma_prep_device_sg - prepare a descriptor for a DMA transaction
  * @chan: DMA channel pointer
  * @sgl: Transfer scatter gather list
@@ -491,13 +599,10 @@ xdma_prep_device_sg(struct dma_chan *chan, struct scatterlist *sgl,
 {
 	struct xdma_chan *xdma_chan = to_xdma_chan(chan);
 	struct dma_async_tx_descriptor *tx_desc;
-	u32 desc_num = 0, i, len, rest;
-	struct xdma_desc_block *dblk;
-	struct xdma_hw_desc *desc;
 	struct xdma_desc *sw_desc;
-	u64 dev_addr, *src, *dst;
+	u32 desc_num = 0, i;
+	u64 addr, dev_addr, *src, *dst;
 	struct scatterlist *sg;
-	u64 addr;
 
 	for_each_sg(sgl, sg, sg_len, i)
 		desc_num += DIV_ROUND_UP(sg_dma_len(sg), XDMA_DESC_BLEN_MAX);
@@ -506,6 +611,8 @@ xdma_prep_device_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	if (!sw_desc)
 		return NULL;
 	sw_desc->dir = dir;
+	sw_desc->cyclic = false;
+	sw_desc->interleaved_dma = false;
 
 	if (dir == DMA_MEM_TO_DEV) {
 		dev_addr = xdma_chan->cfg.dst_addr;
@@ -517,32 +624,11 @@ xdma_prep_device_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		dst = &addr;
 	}
 
-	dblk = sw_desc->desc_blocks;
-	desc = dblk->virt_addr;
-	desc_num = 1;
+	desc_num = 0;
 	for_each_sg(sgl, sg, sg_len, i) {
 		addr = sg_dma_address(sg);
-		rest = sg_dma_len(sg);
-
-		do {
-			len = min_t(u32, rest, XDMA_DESC_BLEN_MAX);
-			/* set hardware descriptor */
-			desc->bytes = cpu_to_le32(len);
-			desc->src_addr = cpu_to_le64(*src);
-			desc->dst_addr = cpu_to_le64(*dst);
-
-			if (!(desc_num & XDMA_DESC_ADJACENT_MASK)) {
-				dblk++;
-				desc = dblk->virt_addr;
-			} else {
-				desc++;
-			}
-
-			desc_num++;
-			dev_addr += len;
-			addr += len;
-			rest -= len;
-		} while (rest);
+		desc_num += xdma_fill_descs(sw_desc, *src, *dst, sg_dma_len(sg), desc_num);
+		dev_addr += sg_dma_len(sg);
 	}
 
 	tx_desc = vchan_tx_prep(&xdma_chan->vchan, &sw_desc->vdesc, flags);
@@ -576,9 +662,9 @@ xdma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t address,
 	struct xdma_device *xdev = xdma_chan->xdev_hdl;
 	unsigned int periods = size / period_size;
 	struct dma_async_tx_descriptor *tx_desc;
-	struct xdma_desc_block *dblk;
-	struct xdma_hw_desc *desc;
 	struct xdma_desc *sw_desc;
+	u64 addr, dev_addr, *src, *dst;
+	u32 desc_num;
 	unsigned int i;
 
 	/*
@@ -602,22 +688,23 @@ xdma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t address,
 	sw_desc->periods = periods;
 	sw_desc->period_size = period_size;
 	sw_desc->dir = dir;
+	sw_desc->interleaved_dma = false;
 
-	dblk = sw_desc->desc_blocks;
-	desc = dblk->virt_addr;
+	addr = address;
+	if (dir == DMA_MEM_TO_DEV) {
+		dev_addr = xdma_chan->cfg.dst_addr;
+		src = &addr;
+		dst = &dev_addr;
+	} else {
+		dev_addr = xdma_chan->cfg.src_addr;
+		src = &dev_addr;
+		dst = &addr;
+	}
 
-	/* fill hardware descriptor */
+	desc_num = 0;
 	for (i = 0; i < periods; i++) {
-		desc->bytes = cpu_to_le32(period_size);
-		if (dir == DMA_MEM_TO_DEV) {
-			desc->src_addr = cpu_to_le64(address + i * period_size);
-			desc->dst_addr = cpu_to_le64(xdma_chan->cfg.dst_addr);
-		} else {
-			desc->src_addr = cpu_to_le64(xdma_chan->cfg.src_addr);
-			desc->dst_addr = cpu_to_le64(address + i * period_size);
-		}
-
-		desc++;
+		desc_num += xdma_fill_descs(sw_desc, *src, *dst, period_size, desc_num);
+		addr += i * period_size;
 	}
 
 	tx_desc = vchan_tx_prep(&xdma_chan->vchan, &sw_desc->vdesc, flags);
@@ -633,6 +720,57 @@ failed:
 }
 
 /**
+ * xdma_prep_interleaved_dma - Prepare virtual descriptor for interleaved DMA transfers
+ * @chan: DMA channel
+ * @xt: DMA transfer template
+ * @flags: tx flags
+ */
+static struct dma_async_tx_descriptor *
+xdma_prep_interleaved_dma(struct dma_chan *chan,
+			  struct dma_interleaved_template *xt,
+			  unsigned long flags)
+{
+	int i;
+	u32 desc_num = 0, period_size = 0;
+	struct dma_async_tx_descriptor *tx_desc;
+	struct xdma_chan *xchan = to_xdma_chan(chan);
+	struct xdma_desc *sw_desc;
+	u64 src_addr, dst_addr;
+
+	for (i = 0; i < xt->frame_size; ++i)
+		desc_num += DIV_ROUND_UP(xt->sgl[i].size, XDMA_DESC_BLEN_MAX);
+
+	sw_desc = xdma_alloc_desc(xchan, desc_num, false);
+	if (!sw_desc)
+		return NULL;
+	sw_desc->dir = xt->dir;
+	sw_desc->interleaved_dma = true;
+	sw_desc->cyclic = flags & DMA_PREP_REPEAT;
+	sw_desc->frames_left = xt->numf;
+	sw_desc->periods = xt->numf;
+
+	desc_num = 0;
+	src_addr = xt->src_start;
+	dst_addr = xt->dst_start;
+	for (i = 0; i < xt->frame_size; ++i) {
+		desc_num += xdma_fill_descs(sw_desc, src_addr, dst_addr, xt->sgl[i].size, desc_num);
+		src_addr += dmaengine_get_src_icg(xt, &xt->sgl[i]) + (xt->src_inc ?
+							      xt->sgl[i].size : 0);
+		dst_addr += dmaengine_get_dst_icg(xt, &xt->sgl[i]) + (xt->dst_inc ?
+							      xt->sgl[i].size : 0);
+		period_size += xt->sgl[i].size;
+	}
+	sw_desc->period_size = period_size;
+
+	tx_desc = vchan_tx_prep(&xchan->vchan, &sw_desc->vdesc, flags);
+	if (tx_desc)
+		return tx_desc;
+
+	xdma_free_desc(&sw_desc->vdesc);
+	return NULL;
+}
+
+/**
  * xdma_device_config - Configure the DMA channel
  * @chan: DMA channel
  * @cfg: channel configuration
@@ -677,9 +815,8 @@ static int xdma_alloc_chan_resources(struct dma_chan *chan)
 		return -EINVAL;
 	}
 
-	xdma_chan->desc_pool = dma_pool_create(dma_chan_name(chan),
-					       dev, XDMA_DESC_BLOCK_SIZE,
-					       XDMA_DESC_BLOCK_ALIGN, 0);
+	xdma_chan->desc_pool = dma_pool_create(dma_chan_name(chan), dev, XDMA_DESC_BLOCK_SIZE,
+					       XDMA_DESC_BLOCK_ALIGN, XDMA_DESC_BLOCK_BOUNDARY);
 	if (!xdma_chan->desc_pool) {
 		xdma_err(xdev, "unable to allocate descriptor pool");
 		return -ENOMEM;
@@ -706,20 +843,20 @@ static enum dma_status xdma_tx_status(struct dma_chan *chan, dma_cookie_t cookie
 	spin_lock_irqsave(&xdma_chan->vchan.lock, flags);
 
 	vd = vchan_find_desc(&xdma_chan->vchan, cookie);
-	if (vd)
-		desc = to_xdma_desc(vd);
-	if (!desc || !desc->cyclic) {
-		spin_unlock_irqrestore(&xdma_chan->vchan.lock, flags);
-		return ret;
-	}
-
-	period_idx = desc->completed_desc_num % desc->periods;
-	residue = (desc->periods - period_idx) * desc->period_size;
+	if (!vd)
+		goto out;
 
+	desc = to_xdma_desc(vd);
+	if (desc->error) {
+		ret = DMA_ERROR;
+	} else if (desc->cyclic) {
+		period_idx = desc->completed_desc_num % desc->periods;
+		residue = (desc->periods - period_idx) * desc->period_size;
+		dma_set_residue(state, residue);
+	}
+out:
 	spin_unlock_irqrestore(&xdma_chan->vchan.lock, flags);
 
-	dma_set_residue(state, residue);
-
 	return ret;
 }
 
@@ -732,11 +869,12 @@ static irqreturn_t xdma_channel_isr(int irq, void *dev_id)
 {
 	struct xdma_chan *xchan = dev_id;
 	u32 complete_desc_num = 0;
-	struct xdma_device *xdev;
-	struct virt_dma_desc *vd;
+	struct xdma_device *xdev = xchan->xdev_hdl;
+	struct virt_dma_desc *vd, *next_vd;
 	struct xdma_desc *desc;
 	int ret;
 	u32 st;
+	bool repeat_tx;
 
 	spin_lock(&xchan->vchan.lock);
 
@@ -745,45 +883,76 @@ static irqreturn_t xdma_channel_isr(int irq, void *dev_id)
 	if (!vd)
 		goto out;
 
-	xchan->busy = false;
+	/* Clear-on-read the status register */
+	ret = regmap_read(xdev->rmap, xchan->base + XDMA_CHAN_STATUS_RC, &st);
+	if (ret)
+		goto out;
+
 	desc = to_xdma_desc(vd);
-	xdev = xchan->xdev_hdl;
+
+	st &= XDMA_CHAN_STATUS_MASK;
+	if ((st & XDMA_CHAN_ERROR_MASK) ||
+	    !(st & (CHAN_CTRL_IE_DESC_COMPLETED | CHAN_CTRL_IE_DESC_STOPPED))) {
+		desc->error = true;
+		xdma_err(xdev, "channel error, status register value: 0x%x", st);
+		goto out;
+	}
 
 	ret = regmap_read(xdev->rmap, xchan->base + XDMA_CHAN_COMPLETED_DESC,
 			  &complete_desc_num);
 	if (ret)
 		goto out;
 
-	desc->completed_desc_num += complete_desc_num;
+	if (desc->interleaved_dma) {
+		xchan->busy = false;
+		desc->completed_desc_num += complete_desc_num;
+		if (complete_desc_num == XDMA_DESC_BLOCK_NUM * XDMA_DESC_ADJACENT) {
+			xdma_xfer_start(xchan);
+			goto out;
+		}
 
-	if (desc->cyclic) {
-		ret = regmap_read(xdev->rmap, xchan->base + XDMA_CHAN_STATUS,
-				  &st);
-		if (ret)
+		/* last desc of any frame */
+		desc->frames_left--;
+		if (desc->frames_left)
+			goto out;
+
+		/* last desc of the last frame  */
+		repeat_tx = vd->tx.flags & DMA_PREP_REPEAT;
+		next_vd = list_first_entry_or_null(&vd->node, struct virt_dma_desc, node);
+		if (next_vd)
+			repeat_tx = repeat_tx && !(next_vd->tx.flags & DMA_PREP_LOAD_EOT);
+		if (repeat_tx) {
+			desc->frames_left = desc->periods;
+			desc->completed_desc_num = 0;
+			vchan_cyclic_callback(vd);
+		} else {
+			list_del(&vd->node);
+			vchan_cookie_complete(vd);
+		}
+		/* start (or continue) the tx of a first desc on the vc.desc_issued list, if any */
+		xdma_xfer_start(xchan);
+	} else if (!desc->cyclic) {
+		xchan->busy = false;
+		desc->completed_desc_num += complete_desc_num;
+
+		/* if all data blocks are transferred, remove and complete the request */
+		if (desc->completed_desc_num == desc->desc_num) {
+			list_del(&vd->node);
+			vchan_cookie_complete(vd);
 			goto out;
+		}
 
-		regmap_write(xdev->rmap, xchan->base + XDMA_CHAN_STATUS, st);
+		if (desc->completed_desc_num > desc->desc_num ||
+		    complete_desc_num != XDMA_DESC_BLOCK_NUM * XDMA_DESC_ADJACENT)
+			goto out;
 
+		/* transfer the rest of data */
+		xdma_xfer_start(xchan);
+	} else {
+		desc->completed_desc_num = complete_desc_num;
 		vchan_cyclic_callback(vd);
-		goto out;
-	}
-
-	/*
-	 * if all data blocks are transferred, remove and complete the request
-	 */
-	if (desc->completed_desc_num == desc->desc_num) {
-		list_del(&vd->node);
-		vchan_cookie_complete(vd);
-		goto out;
 	}
 
-	if (desc->completed_desc_num > desc->desc_num ||
-	    complete_desc_num != XDMA_DESC_BLOCK_NUM * XDMA_DESC_ADJACENT)
-		goto out;
-
-	/* transfer the rest of data (SG only) */
-	xdma_xfer_start(xchan);
-
 out:
 	spin_unlock(&xchan->vchan.lock);
 	return IRQ_HANDLED;
@@ -1080,6 +1249,9 @@ static int xdma_probe(struct platform_device *pdev)
 	dma_cap_set(DMA_SLAVE, xdev->dma_dev.cap_mask);
 	dma_cap_set(DMA_PRIVATE, xdev->dma_dev.cap_mask);
 	dma_cap_set(DMA_CYCLIC, xdev->dma_dev.cap_mask);
+	dma_cap_set(DMA_INTERLEAVE, xdev->dma_dev.cap_mask);
+	dma_cap_set(DMA_REPEAT, xdev->dma_dev.cap_mask);
+	dma_cap_set(DMA_LOAD_EOT, xdev->dma_dev.cap_mask);
 
 	xdev->dma_dev.dev = &pdev->dev;
 	xdev->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
@@ -1089,10 +1261,13 @@ static int xdma_probe(struct platform_device *pdev)
 	xdev->dma_dev.device_prep_slave_sg = xdma_prep_device_sg;
 	xdev->dma_dev.device_config = xdma_device_config;
 	xdev->dma_dev.device_issue_pending = xdma_issue_pending;
+	xdev->dma_dev.device_terminate_all = xdma_terminate_all;
+	xdev->dma_dev.device_synchronize = xdma_synchronize;
 	xdev->dma_dev.filter.map = pdata->device_map;
 	xdev->dma_dev.filter.mapcnt = pdata->device_map_cnt;
 	xdev->dma_dev.filter.fn = xdma_filter_fn;
 	xdev->dma_dev.device_prep_dma_cyclic = xdma_prep_dma_cyclic;
+	xdev->dma_dev.device_prep_interleaved_dma = xdma_prep_interleaved_dma;
 
 	ret = dma_async_device_register(&xdev->dma_dev);
 	if (ret) {
diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c
index 69587d85a7cd..b82815e64d24 100644
--- a/drivers/dma/xilinx/xilinx_dpdma.c
+++ b/drivers/dma/xilinx/xilinx_dpdma.c
@@ -309,7 +309,7 @@ static ssize_t xilinx_dpdma_debugfs_desc_done_irq_read(char *buf)
 
 	out_str_len = strlen(XILINX_DPDMA_DEBUGFS_UINT16_MAX_STR);
 	out_str_len = min_t(size_t, XILINX_DPDMA_DEBUGFS_READ_MAX_SIZE,
-			    out_str_len);
+			    out_str_len + 1);
 	snprintf(buf, out_str_len, "%d",
 		 dpdma_debugfs.xilinx_dpdma_irq_done_count);
 
diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c
index 1eca8cc271f8..5152bd1b0daf 100644
--- a/drivers/dpll/dpll_core.c
+++ b/drivers/dpll/dpll_core.c
@@ -29,8 +29,6 @@ static u32 dpll_pin_xa_id;
 	WARN_ON_ONCE(!xa_get_mark(&dpll_device_xa, (d)->id, DPLL_REGISTERED))
 #define ASSERT_DPLL_NOT_REGISTERED(d)	\
 	WARN_ON_ONCE(xa_get_mark(&dpll_device_xa, (d)->id, DPLL_REGISTERED))
-#define ASSERT_PIN_REGISTERED(p)	\
-	WARN_ON_ONCE(!xa_get_mark(&dpll_pin_xa, (p)->id, DPLL_REGISTERED))
 
 struct dpll_device_registration {
 	struct list_head list;
@@ -425,6 +423,53 @@ void dpll_device_unregister(struct dpll_device *dpll,
 }
 EXPORT_SYMBOL_GPL(dpll_device_unregister);
 
+static void dpll_pin_prop_free(struct dpll_pin_properties *prop)
+{
+	kfree(prop->package_label);
+	kfree(prop->panel_label);
+	kfree(prop->board_label);
+	kfree(prop->freq_supported);
+}
+
+static int dpll_pin_prop_dup(const struct dpll_pin_properties *src,
+			     struct dpll_pin_properties *dst)
+{
+	memcpy(dst, src, sizeof(*dst));
+	if (src->freq_supported && src->freq_supported_num) {
+		size_t freq_size = src->freq_supported_num *
+				   sizeof(*src->freq_supported);
+		dst->freq_supported = kmemdup(src->freq_supported,
+					      freq_size, GFP_KERNEL);
+		if (!src->freq_supported)
+			return -ENOMEM;
+	}
+	if (src->board_label) {
+		dst->board_label = kstrdup(src->board_label, GFP_KERNEL);
+		if (!dst->board_label)
+			goto err_board_label;
+	}
+	if (src->panel_label) {
+		dst->panel_label = kstrdup(src->panel_label, GFP_KERNEL);
+		if (!dst->panel_label)
+			goto err_panel_label;
+	}
+	if (src->package_label) {
+		dst->package_label = kstrdup(src->package_label, GFP_KERNEL);
+		if (!dst->package_label)
+			goto err_package_label;
+	}
+
+	return 0;
+
+err_package_label:
+	kfree(dst->panel_label);
+err_panel_label:
+	kfree(dst->board_label);
+err_board_label:
+	kfree(dst->freq_supported);
+	return -ENOMEM;
+}
+
 static struct dpll_pin *
 dpll_pin_alloc(u64 clock_id, u32 pin_idx, struct module *module,
 	       const struct dpll_pin_properties *prop)
@@ -441,20 +486,24 @@ dpll_pin_alloc(u64 clock_id, u32 pin_idx, struct module *module,
 	if (WARN_ON(prop->type < DPLL_PIN_TYPE_MUX ||
 		    prop->type > DPLL_PIN_TYPE_MAX)) {
 		ret = -EINVAL;
-		goto err;
+		goto err_pin_prop;
 	}
-	pin->prop = prop;
+	ret = dpll_pin_prop_dup(prop, &pin->prop);
+	if (ret)
+		goto err_pin_prop;
 	refcount_set(&pin->refcount, 1);
 	xa_init_flags(&pin->dpll_refs, XA_FLAGS_ALLOC);
 	xa_init_flags(&pin->parent_refs, XA_FLAGS_ALLOC);
 	ret = xa_alloc_cyclic(&dpll_pin_xa, &pin->id, pin, xa_limit_32b,
 			      &dpll_pin_xa_id, GFP_KERNEL);
 	if (ret)
-		goto err;
+		goto err_xa_alloc;
 	return pin;
-err:
+err_xa_alloc:
 	xa_destroy(&pin->dpll_refs);
 	xa_destroy(&pin->parent_refs);
+	dpll_pin_prop_free(&pin->prop);
+err_pin_prop:
 	kfree(pin);
 	return ERR_PTR(ret);
 }
@@ -514,6 +563,7 @@ void dpll_pin_put(struct dpll_pin *pin)
 		xa_destroy(&pin->dpll_refs);
 		xa_destroy(&pin->parent_refs);
 		xa_erase(&dpll_pin_xa, pin->id);
+		dpll_pin_prop_free(&pin->prop);
 		kfree(pin);
 	}
 	mutex_unlock(&dpll_lock);
@@ -564,8 +614,6 @@ dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin,
 	    WARN_ON(!ops->state_on_dpll_get) ||
 	    WARN_ON(!ops->direction_get))
 		return -EINVAL;
-	if (ASSERT_DPLL_REGISTERED(dpll))
-		return -EINVAL;
 
 	mutex_lock(&dpll_lock);
 	if (WARN_ON(!(dpll->module == pin->module &&
@@ -636,15 +684,13 @@ int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin,
 	unsigned long i, stop;
 	int ret;
 
-	if (WARN_ON(parent->prop->type != DPLL_PIN_TYPE_MUX))
+	if (WARN_ON(parent->prop.type != DPLL_PIN_TYPE_MUX))
 		return -EINVAL;
 
 	if (WARN_ON(!ops) ||
 	    WARN_ON(!ops->state_on_pin_get) ||
 	    WARN_ON(!ops->direction_get))
 		return -EINVAL;
-	if (ASSERT_PIN_REGISTERED(parent))
-		return -EINVAL;
 
 	mutex_lock(&dpll_lock);
 	ret = dpll_xa_ref_pin_add(&pin->parent_refs, parent, ops, priv);
diff --git a/drivers/dpll/dpll_core.h b/drivers/dpll/dpll_core.h
index 5585873c5c1b..717f715015c7 100644
--- a/drivers/dpll/dpll_core.h
+++ b/drivers/dpll/dpll_core.h
@@ -44,7 +44,7 @@ struct dpll_device {
  * @module:		module of creator
  * @dpll_refs:		hold referencees to dplls pin was registered with
  * @parent_refs:	hold references to parent pins pin was registered with
- * @prop:		pointer to pin properties given by registerer
+ * @prop:		pin properties copied from the registerer
  * @rclk_dev_name:	holds name of device when pin can recover clock from it
  * @refcount:		refcount
  **/
@@ -55,7 +55,7 @@ struct dpll_pin {
 	struct module *module;
 	struct xarray dpll_refs;
 	struct xarray parent_refs;
-	const struct dpll_pin_properties *prop;
+	struct dpll_pin_properties prop;
 	refcount_t refcount;
 };
 
diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c
index 3370dbddb86b..314bb3775465 100644
--- a/drivers/dpll/dpll_netlink.c
+++ b/drivers/dpll/dpll_netlink.c
@@ -303,17 +303,17 @@ dpll_msg_add_pin_freq(struct sk_buff *msg, struct dpll_pin *pin,
 	if (nla_put_64bit(msg, DPLL_A_PIN_FREQUENCY, sizeof(freq), &freq,
 			  DPLL_A_PIN_PAD))
 		return -EMSGSIZE;
-	for (fs = 0; fs < pin->prop->freq_supported_num; fs++) {
+	for (fs = 0; fs < pin->prop.freq_supported_num; fs++) {
 		nest = nla_nest_start(msg, DPLL_A_PIN_FREQUENCY_SUPPORTED);
 		if (!nest)
 			return -EMSGSIZE;
-		freq = pin->prop->freq_supported[fs].min;
+		freq = pin->prop.freq_supported[fs].min;
 		if (nla_put_64bit(msg, DPLL_A_PIN_FREQUENCY_MIN, sizeof(freq),
 				  &freq, DPLL_A_PIN_PAD)) {
 			nla_nest_cancel(msg, nest);
 			return -EMSGSIZE;
 		}
-		freq = pin->prop->freq_supported[fs].max;
+		freq = pin->prop.freq_supported[fs].max;
 		if (nla_put_64bit(msg, DPLL_A_PIN_FREQUENCY_MAX, sizeof(freq),
 				  &freq, DPLL_A_PIN_PAD)) {
 			nla_nest_cancel(msg, nest);
@@ -329,9 +329,9 @@ static bool dpll_pin_is_freq_supported(struct dpll_pin *pin, u32 freq)
 {
 	int fs;
 
-	for (fs = 0; fs < pin->prop->freq_supported_num; fs++)
-		if (freq >= pin->prop->freq_supported[fs].min &&
-		    freq <= pin->prop->freq_supported[fs].max)
+	for (fs = 0; fs < pin->prop.freq_supported_num; fs++)
+		if (freq >= pin->prop.freq_supported[fs].min &&
+		    freq <= pin->prop.freq_supported[fs].max)
 			return true;
 	return false;
 }
@@ -421,7 +421,7 @@ static int
 dpll_cmd_pin_get_one(struct sk_buff *msg, struct dpll_pin *pin,
 		     struct netlink_ext_ack *extack)
 {
-	const struct dpll_pin_properties *prop = pin->prop;
+	const struct dpll_pin_properties *prop = &pin->prop;
 	struct dpll_pin_ref *ref;
 	int ret;
 
@@ -553,6 +553,24 @@ __dpll_device_change_ntf(struct dpll_device *dpll)
 	return dpll_device_event_send(DPLL_CMD_DEVICE_CHANGE_NTF, dpll);
 }
 
+static bool dpll_pin_available(struct dpll_pin *pin)
+{
+	struct dpll_pin_ref *par_ref;
+	unsigned long i;
+
+	if (!xa_get_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED))
+		return false;
+	xa_for_each(&pin->parent_refs, i, par_ref)
+		if (xa_get_mark(&dpll_pin_xa, par_ref->pin->id,
+				DPLL_REGISTERED))
+			return true;
+	xa_for_each(&pin->dpll_refs, i, par_ref)
+		if (xa_get_mark(&dpll_device_xa, par_ref->dpll->id,
+				DPLL_REGISTERED))
+			return true;
+	return false;
+}
+
 /**
  * dpll_device_change_ntf - notify that the dpll device has been changed
  * @dpll: registered dpll pointer
@@ -579,7 +597,7 @@ dpll_pin_event_send(enum dpll_cmd event, struct dpll_pin *pin)
 	int ret = -ENOMEM;
 	void *hdr;
 
-	if (WARN_ON(!xa_get_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED)))
+	if (!dpll_pin_available(pin))
 		return -ENODEV;
 
 	msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
@@ -717,7 +735,7 @@ dpll_pin_on_pin_state_set(struct dpll_pin *pin, u32 parent_idx,
 	int ret;
 
 	if (!(DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE &
-	      pin->prop->capabilities)) {
+	      pin->prop.capabilities)) {
 		NL_SET_ERR_MSG(extack, "state changing is not allowed");
 		return -EOPNOTSUPP;
 	}
@@ -753,7 +771,7 @@ dpll_pin_state_set(struct dpll_device *dpll, struct dpll_pin *pin,
 	int ret;
 
 	if (!(DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE &
-	      pin->prop->capabilities)) {
+	      pin->prop.capabilities)) {
 		NL_SET_ERR_MSG(extack, "state changing is not allowed");
 		return -EOPNOTSUPP;
 	}
@@ -780,7 +798,7 @@ dpll_pin_prio_set(struct dpll_device *dpll, struct dpll_pin *pin,
 	int ret;
 
 	if (!(DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE &
-	      pin->prop->capabilities)) {
+	      pin->prop.capabilities)) {
 		NL_SET_ERR_MSG(extack, "prio changing is not allowed");
 		return -EOPNOTSUPP;
 	}
@@ -808,7 +826,7 @@ dpll_pin_direction_set(struct dpll_pin *pin, struct dpll_device *dpll,
 	int ret;
 
 	if (!(DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE &
-	      pin->prop->capabilities)) {
+	      pin->prop.capabilities)) {
 		NL_SET_ERR_MSG(extack, "direction changing is not allowed");
 		return -EOPNOTSUPP;
 	}
@@ -838,8 +856,8 @@ dpll_pin_phase_adj_set(struct dpll_pin *pin, struct nlattr *phase_adj_attr,
 	int ret;
 
 	phase_adj = nla_get_s32(phase_adj_attr);
-	if (phase_adj > pin->prop->phase_range.max ||
-	    phase_adj < pin->prop->phase_range.min) {
+	if (phase_adj > pin->prop.phase_range.max ||
+	    phase_adj < pin->prop.phase_range.min) {
 		NL_SET_ERR_MSG_ATTR(extack, phase_adj_attr,
 				    "phase adjust value not supported");
 		return -EINVAL;
@@ -1023,7 +1041,7 @@ dpll_pin_find(u64 clock_id, struct nlattr *mod_name_attr,
 	unsigned long i;
 
 	xa_for_each_marked(&dpll_pin_xa, i, pin, DPLL_REGISTERED) {
-		prop = pin->prop;
+		prop = &pin->prop;
 		cid_match = clock_id ? pin->clock_id == clock_id : true;
 		mod_match = mod_name_attr && module_name(pin->module) ?
 			!nla_strcmp(mod_name_attr,
@@ -1130,6 +1148,10 @@ int dpll_nl_pin_id_get_doit(struct sk_buff *skb, struct genl_info *info)
 	}
 	pin = dpll_pin_find_from_nlattr(info);
 	if (!IS_ERR(pin)) {
+		if (!dpll_pin_available(pin)) {
+			nlmsg_free(msg);
+			return -ENODEV;
+		}
 		ret = dpll_msg_add_pin_handle(msg, pin);
 		if (ret) {
 			nlmsg_free(msg);
@@ -1179,6 +1201,8 @@ int dpll_nl_pin_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
 
 	xa_for_each_marked_start(&dpll_pin_xa, i, pin, DPLL_REGISTERED,
 				 ctx->idx) {
+		if (!dpll_pin_available(pin))
+			continue;
 		hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
 				  cb->nlh->nlmsg_seq,
 				  &dpll_nl_family, NLM_F_MULTI,
@@ -1441,7 +1465,8 @@ int dpll_pin_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
 	}
 	info->user_ptr[0] = xa_load(&dpll_pin_xa,
 				    nla_get_u32(info->attrs[DPLL_A_PIN_ID]));
-	if (!info->user_ptr[0]) {
+	if (!info->user_ptr[0] ||
+	    !dpll_pin_available(info->user_ptr[0])) {
 		NL_SET_ERR_MSG(info->extack, "pin not found");
 		ret = -ENODEV;
 		goto unlock_dev;
diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c
index 19706bd2642a..82fcfd29bc4d 100644
--- a/drivers/firmware/sysfb.c
+++ b/drivers/firmware/sysfb.c
@@ -71,7 +71,7 @@ EXPORT_SYMBOL_GPL(sysfb_disable);
 
 static __init int sysfb_init(void)
 {
-	const struct screen_info *si = &screen_info;
+	struct screen_info *si = &screen_info;
 	struct simplefb_platform_data mode;
 	const char *name;
 	bool compatible;
@@ -119,18 +119,6 @@ static __init int sysfb_init(void)
 	if (ret)
 		goto err;
 
-	/*
-	 * The firmware framebuffer is now maintained by the created
-	 * device. Disable screen_info after we've consumed it. Prevents
-	 * invalid access during kexec reboots.
-	 *
-	 * TODO: Vgacon still relies on the global screen_info. Make
-	 *       vgacon work with the platform device, so we can clear
-	 *       the screen_info unconditionally.
-	 */
-	if (strcmp(name, "platform-framebuffer"))
-		screen_info.orig_video_isVGA = 0;
-
 	goto unlock_mutex;
 err:
 	platform_device_put(pd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9da14436a373..3d8a48f46b01 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -254,8 +254,6 @@ extern int amdgpu_agp;
 
 extern int amdgpu_wbrf;
 
-extern int fw_bo_location;
-
 #define AMDGPU_VM_MAX_NUM_CTX			4096
 #define AMDGPU_SG_THRESHOLD			(256*1024*1024)
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS	        3000
@@ -1146,6 +1144,7 @@ struct amdgpu_device {
 	bool                            debug_vm;
 	bool                            debug_largebar;
 	bool                            debug_disable_soft_recovery;
+	bool                            debug_use_vram_fw_buf;
 };
 
 static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 067690ba7bff..77e263660288 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -138,6 +138,9 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work)
 	amdgpu_device_gpu_recover(adev, NULL, &reset_context);
 }
 
+static const struct drm_client_funcs kfd_client_funcs = {
+	.unregister	= drm_client_release,
+};
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
 	int i;
@@ -161,7 +164,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 			.enable_mes = adev->enable_mes,
 		};
 
-		ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", NULL);
+		ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", &kfd_client_funcs);
 		if (ret) {
 			dev_err(adev->dev, "Failed to init DRM client: %d\n", ret);
 			return;
@@ -695,10 +698,8 @@ err:
 void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
 {
 	enum amd_powergating_state state = idle ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE;
-	/* Temporary workaround to fix issues observed in some
-	 * compute applications when GFXOFF is enabled on GFX11.
-	 */
-	if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11) {
+	if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 &&
+	    ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) {
 		pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled");
 		amdgpu_gfx_off_ctrl(adev, idle);
 	} else if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 9) &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index cf6ed5fce291..f262b9d89541 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -311,7 +311,7 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem);
 int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo);
 
 int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
-					    struct dma_fence **ef);
+					    struct dma_fence __rcu **ef);
 int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
 					      struct kfd_vm_fault_info *info);
 int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d17b2452cb1f..f183d7faeeec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2802,7 +2802,7 @@ unlock_out:
 	put_task_struct(usertask);
 }
 
-static void replace_eviction_fence(struct dma_fence **ef,
+static void replace_eviction_fence(struct dma_fence __rcu **ef,
 				   struct dma_fence *new_ef)
 {
 	struct dma_fence *old_ef = rcu_replace_pointer(*ef, new_ef, true
@@ -2837,7 +2837,7 @@ static void replace_eviction_fence(struct dma_fence **ef,
  * 7.  Add fence to all PD and PT BOs.
  * 8.  Unreserve all BOs
  */
-int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu **ef)
 {
 	struct amdkfd_process_info *process_info = info;
 	struct amdgpu_vm *peer_vm;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5bb444bb36ce..b158d27d0a71 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1544,6 +1544,7 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
 				return true;
 
 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
+			release_firmware(adev->pm.fw);
 			if (fw_ver < 0x00160e00)
 				return true;
 		}
@@ -5245,7 +5246,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
 	struct amdgpu_device *tmp_adev = NULL;
 	bool need_full_reset, skip_hw_reset, vram_lost = false;
 	int r = 0;
-	bool gpu_reset_for_dev_remove = 0;
 
 	/* Try reset handler method first */
 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
@@ -5265,10 +5265,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
 	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
 
-	gpu_reset_for_dev_remove =
-		test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
-			test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
-
 	/*
 	 * ASIC reset has to be done on all XGMI hive nodes ASAP
 	 * to allow proper links negotiation in FW (within 1 sec)
@@ -5311,18 +5307,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
 		amdgpu_ras_intr_cleared();
 	}
 
-	/* Since the mode1 reset affects base ip blocks, the
-	 * phase1 ip blocks need to be resumed. Otherwise there
-	 * will be a BIOS signature error and the psp bootloader
-	 * can't load kdb on the next amdgpu install.
-	 */
-	if (gpu_reset_for_dev_remove) {
-		list_for_each_entry(tmp_adev, device_list_handle, reset_list)
-			amdgpu_device_ip_resume_phase1(tmp_adev);
-
-		goto end;
-	}
-
 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
 		if (need_full_reset) {
 			/* post card */
@@ -5559,11 +5543,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	int i, r = 0;
 	bool need_emergency_restart = false;
 	bool audio_suspended = false;
-	bool gpu_reset_for_dev_remove = false;
-
-	gpu_reset_for_dev_remove =
-			test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
-				test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
 
 	/*
 	 * Special case: RAS triggered and full reset isn't supported
@@ -5601,7 +5580,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
 			list_add_tail(&tmp_adev->reset_list, &device_list);
-			if (gpu_reset_for_dev_remove && adev->shutdown)
+			if (adev->shutdown)
 				tmp_adev->shutdown = true;
 		}
 		if (!list_is_first(&adev->reset_list, &device_list))
@@ -5686,10 +5665,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 
 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
-		if (gpu_reset_for_dev_remove) {
-			/* Workaroud for ASICs need to disable SMC first */
-			amdgpu_device_smu_fini_early(tmp_adev);
-		}
 		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
 		/*TODO Should we stop ?*/
 		if (r) {
@@ -5721,9 +5696,6 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 		r = amdgpu_do_asic_reset(device_list_handle, reset_context);
 		if (r && r == -EAGAIN)
 			goto retry;
-
-		if (!r && gpu_reset_for_dev_remove)
-			goto recover_end;
 	}
 
 skip_hw_reset:
@@ -5779,7 +5751,6 @@ skip_sched_resume:
 		amdgpu_ras_set_error_query_ready(tmp_adev, true);
 	}
 
-recover_end:
 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
 					    reset_list);
 	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 0431eafa86b5..c7d60dd0fb97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -1963,8 +1963,6 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
 		break;
 	case IP_VERSION(9, 4, 3):
-		if (!amdgpu_exp_hw_support)
-			return -EINVAL;
 		amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block);
 		break;
 	case IP_VERSION(10, 1, 10):
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 852cec98ff26..cc69005f5b46 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -128,6 +128,7 @@ enum AMDGPU_DEBUG_MASK {
 	AMDGPU_DEBUG_VM = BIT(0),
 	AMDGPU_DEBUG_LARGEBAR = BIT(1),
 	AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2),
+	AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3),
 };
 
 unsigned int amdgpu_vram_limit = UINT_MAX;
@@ -210,7 +211,6 @@ int amdgpu_seamless = -1; /* auto */
 uint amdgpu_debug_mask;
 int amdgpu_agp = -1; /* auto */
 int amdgpu_wbrf = -1;
-int fw_bo_location = -1;
 
 static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
 
@@ -990,10 +990,6 @@ MODULE_PARM_DESC(wbrf,
 	"Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)");
 module_param_named(wbrf, amdgpu_wbrf, int, 0444);
 
-MODULE_PARM_DESC(fw_bo_location,
-	"location to put firmware bo for frontdoor loading (-1 = auto (default), 0 = on ram, 1 = on vram");
-module_param(fw_bo_location, int, 0644);
-
 /* These devices are not supported by amdgpu.
  * They are supported by the mach64, r128, radeon drivers
  */
@@ -2122,6 +2118,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
 		pr_info("debug: soft reset for GPU recovery disabled\n");
 		adev->debug_disable_soft_recovery = true;
 	}
+
+	if (amdgpu_debug_mask & AMDGPU_DEBUG_USE_VRAM_FW_BUF) {
+		pr_info("debug: place fw in vram for frontdoor loading\n");
+		adev->debug_use_vram_fw_buf = true;
+	}
 }
 
 static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)
@@ -2233,6 +2234,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 
 	pci_set_drvdata(pdev, ddev);
 
+	amdgpu_init_debug_options(adev);
+
 	ret = amdgpu_driver_load_kms(adev, flags);
 	if (ret)
 		goto err_pci;
@@ -2313,8 +2316,6 @@ retry_init:
 			amdgpu_get_secondary_funcs(adev);
 	}
 
-	amdgpu_init_debug_options(adev);
-
 	return 0;
 
 err_pci:
@@ -2336,38 +2337,6 @@ amdgpu_pci_remove(struct pci_dev *pdev)
 		pm_runtime_forbid(dev->dev);
 	}
 
-	if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 2) &&
-	    !amdgpu_sriov_vf(adev)) {
-		bool need_to_reset_gpu = false;
-
-		if (adev->gmc.xgmi.num_physical_nodes > 1) {
-			struct amdgpu_hive_info *hive;
-
-			hive = amdgpu_get_xgmi_hive(adev);
-			if (hive->device_remove_count == 0)
-				need_to_reset_gpu = true;
-			hive->device_remove_count++;
-			amdgpu_put_xgmi_hive(hive);
-		} else {
-			need_to_reset_gpu = true;
-		}
-
-		/* Workaround for ASICs need to reset SMU.
-		 * Called only when the first device is removed.
-		 */
-		if (need_to_reset_gpu) {
-			struct amdgpu_reset_context reset_context;
-
-			adev->shutdown = true;
-			memset(&reset_context, 0, sizeof(reset_context));
-			reset_context.method = AMD_RESET_METHOD_NONE;
-			reset_context.reset_req_dev = adev;
-			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-			set_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context.flags);
-			amdgpu_device_gpu_recover(adev, NULL, &reset_context);
-		}
-	}
-
 	amdgpu_driver_unload_kms(dev);
 
 	/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index d2f273d77e59..55784a9f26c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -1045,21 +1045,28 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)
 	 * seconds, so here, we just pick up three parts for emulation.
 	 */
 	ret = memcmp(vram_ptr, cptr, 10);
-	if (ret)
-		return ret;
+	if (ret) {
+		ret = -EIO;
+		goto release_buffer;
+	}
 
 	ret = memcmp(vram_ptr + (size / 2), cptr, 10);
-	if (ret)
-		return ret;
+	if (ret) {
+		ret = -EIO;
+		goto release_buffer;
+	}
 
 	ret = memcmp(vram_ptr + size - 10, cptr, 10);
-	if (ret)
-		return ret;
+	if (ret) {
+		ret = -EIO;
+		goto release_buffer;
+	}
 
+release_buffer:
 	amdgpu_bo_free_kernel(&vram_bo, &vram_gpu,
 			&vram_ptr);
 
-	return 0;
+	return ret;
 }
 
 static ssize_t current_memory_partition_show(
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index b5ebafd4a3ad..bf4f48fe438d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1105,7 +1105,12 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 			if (amdgpu_dpm_read_sensor(adev,
 						   AMDGPU_PP_SENSOR_GPU_AVG_POWER,
 						   (void *)&ui32, &ui32_size)) {
-				return -EINVAL;
+				/* fall back to input power for backwards compat */
+				if (amdgpu_dpm_read_sensor(adev,
+							   AMDGPU_PP_SENSOR_GPU_INPUT_POWER,
+							   (void *)&ui32, &ui32_size)) {
+					return -EINVAL;
+				}
 			}
 			ui32 >>= 8;
 			break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 2addbdf88394..0328616473f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -466,7 +466,7 @@ static int psp_sw_init(void *handle)
 	}
 
 	ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
-				      (amdgpu_sriov_vf(adev) || fw_bo_location == 1) ?
+				      (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ?
 				      AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
 				      &psp->fw_pri_bo,
 				      &psp->fw_pri_mc_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index fc42fb6ee191..31823a30dea2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -305,11 +305,13 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
 			return -EINVAL;
 
 		data->head.block = block_id;
-		/* only ue and ce errors are supported */
+		/* only ue, ce and poison errors are supported */
 		if (!memcmp("ue", err, 2))
 			data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
 		else if (!memcmp("ce", err, 2))
 			data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE;
+		else if (!memcmp("poison", err, 6))
+			data->head.type = AMDGPU_RAS_ERROR__POISON;
 		else
 			return -EINVAL;
 
@@ -431,9 +433,10 @@ static void amdgpu_ras_instance_mask_check(struct amdgpu_device *adev,
  * The block is one of: umc, sdma, gfx, etc.
  *	see ras_block_string[] for details
  *
- * The error type is one of: ue, ce, where,
+ * The error type is one of: ue, ce and poison where,
  *	ue is multi-uncorrectable
  *	ce is single-correctable
+ *	poison is poison
  *
  * The sub-block is a the sub-block index, pass 0 if there is no sub-block.
  * The address and value are hexadecimal numbers, leading 0x is optional.
@@ -1067,8 +1070,7 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev,
 			mcm_info = &err_info->mcm_info;
 			if (err_info->ce_count) {
 				dev_info(adev->dev, "socket: %d, die: %d, "
-					 "%lld new correctable hardware errors detected in %s block, "
-					 "no user action is needed\n",
+					 "%lld new correctable hardware errors detected in %s block\n",
 					 mcm_info->socket_id,
 					 mcm_info->die_id,
 					 err_info->ce_count,
@@ -1080,8 +1082,7 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev,
 			err_info = &err_node->err_info;
 			mcm_info = &err_info->mcm_info;
 			dev_info(adev->dev, "socket: %d, die: %d, "
-				 "%lld correctable hardware errors detected in total in %s block, "
-				 "no user action is needed\n",
+				 "%lld correctable hardware errors detected in total in %s block\n",
 				 mcm_info->socket_id, mcm_info->die_id, err_info->ce_count, blk_name);
 		}
 	}
@@ -1108,16 +1109,14 @@ static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev,
 			   adev->smuio.funcs->get_die_id) {
 			dev_info(adev->dev, "socket: %d, die: %d "
 				 "%ld correctable hardware errors "
-				 "detected in %s block, no user "
-				 "action is needed.\n",
+				 "detected in %s block\n",
 				 adev->smuio.funcs->get_socket_id(adev),
 				 adev->smuio.funcs->get_die_id(adev),
 				 ras_mgr->err_data.ce_count,
 				 blk_name);
 		} else {
 			dev_info(adev->dev, "%ld correctable hardware errors "
-				 "detected in %s block, no user "
-				 "action is needed.\n",
+				 "detected in %s block\n",
 				 ras_mgr->err_data.ce_count,
 				 blk_name);
 		}
@@ -1920,7 +1919,7 @@ static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj
 				struct amdgpu_iv_entry *entry)
 {
 	dev_info(obj->adev->dev,
-		"Poison is created, no user action is needed.\n");
+		"Poison is created\n");
 }
 
 static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj,
@@ -2920,6 +2919,11 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 
 	amdgpu_ras_query_poison_mode(adev);
 
+	/* Packed socket_id to ras feature mask bits[31:29] */
+	if (adev->smuio.funcs &&
+	    adev->smuio.funcs->get_socket_id)
+		con->features |= ((adev->smuio.funcs->get_socket_id(adev)) << 29);
+
 	/* Get RAS schema for particular SOC */
 	con->schema = amdgpu_get_ras_schema(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index b0335a1c5e90..19899f6b9b2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -32,7 +32,6 @@ enum AMDGPU_RESET_FLAGS {
 
 	AMDGPU_NEED_FULL_RESET = 0,
 	AMDGPU_SKIP_HW_RESET = 1,
-	AMDGPU_RESET_FOR_DEVICE_REMOVE = 2,
 };
 
 struct amdgpu_reset_context {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index d334e42fe0eb..3e12763e477a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1062,7 +1062,7 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev)
 {
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) {
 		amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE,
-			(amdgpu_sriov_vf(adev) || fw_bo_location == 1) ?
+			(amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ?
 			AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
 			&adev->firmware.fw_buf,
 			&adev->firmware.fw_buf_mc,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index b6cd565562ad..4740dd65b99d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -116,7 +116,7 @@ struct amdgpu_mem_stats;
 #define AMDGPU_VM_FAULT_STOP_FIRST	1
 #define AMDGPU_VM_FAULT_STOP_ALWAYS	2
 
-/* Reserve 4MB VRAM for page tables */
+/* How much VRAM be reserved for page tables */
 #define AMDGPU_VM_RESERVED_VRAM		(8ULL << 20)
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
index 6f149b54d4d3..b9a15d51eb5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
@@ -59,11 +59,8 @@ static inline uint16_t complete_integer_division_u16(
 
 static uint16_t vpe_u1_8_from_fraction(uint16_t numerator, uint16_t denominator)
 {
-	bool arg1_negative = numerator < 0;
-	bool arg2_negative = denominator < 0;
-
-	uint16_t arg1_value = (uint16_t)(arg1_negative ? -numerator : numerator);
-	uint16_t arg2_value = (uint16_t)(arg2_negative ? -denominator : denominator);
+	u16 arg1_value = numerator;
+	u16 arg2_value = denominator;
 
 	uint16_t remainder;
 
@@ -100,9 +97,6 @@ static uint16_t vpe_u1_8_from_fraction(uint16_t numerator, uint16_t denominator)
 		res_value += summand;
 	}
 
-	if (arg1_negative ^ arg2_negative)
-		res_value = -res_value;
-
 	return res_value;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 6cab882e8061..1592c63b3099 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -43,7 +43,6 @@ struct amdgpu_hive_info {
 	} pstate;
 
 	struct amdgpu_reset_domain *reset_domain;
-	uint32_t device_remove_count;
 	atomic_t ras_recovery;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
index f0737fb3a999..d1bba9c64e16 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
@@ -30,6 +30,8 @@
 
 #define regATHUB_MISC_CNTL_V3_0_1			0x00d7
 #define regATHUB_MISC_CNTL_V3_0_1_BASE_IDX		0
+#define regATHUB_MISC_CNTL_V3_3_0			0x00d8
+#define regATHUB_MISC_CNTL_V3_3_0_BASE_IDX		0
 
 
 static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev)
@@ -40,6 +42,9 @@ static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev)
 	case IP_VERSION(3, 0, 1):
 		data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1);
 		break;
+	case IP_VERSION(3, 3, 0):
+		data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_3_0);
+		break;
 	default:
 		data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
 		break;
@@ -53,6 +58,9 @@ static void athub_v3_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data)
 	case IP_VERSION(3, 0, 1):
 		WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1, data);
 		break;
+	case IP_VERSION(3, 3, 0):
+		WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_3_0, data);
+		break;
 	default:
 		WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
 		break;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 73f6d7e72c73..d63cab294883 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3996,16 +3996,13 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
 
 	if (!amdgpu_sriov_vf(adev)) {
 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
-		err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
-		/* don't check this.  There are apparently firmwares in the wild with
-		 * incorrect size in the header
-		 */
-		if (err == -ENODEV)
-			goto out;
+		err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
 		if (err)
-			dev_dbg(adev->dev,
-				"gfx10: amdgpu_ucode_request() failed \"%s\"\n",
-				fw_name);
+			goto out;
+
+		/* don't validate this firmware. There are apparently firmwares
+		 * in the wild with incorrect size in the header
+		 */
 		rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
 		version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
 		version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 2fbcd9765980..0ea0866c261f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -115,7 +115,7 @@ static const struct soc15_reg_golden golden_settings_gc_11_5_0[] = {
 	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL, 0xffffffff, 0xf37fff3f),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xfffffffb, 0x00f40188),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL4, 0xf0ffffff, 0x8000b007),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL4, 0xf0ffffff, 0x80009007),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf1ffffff, 0x00880007),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPC_CONFIG_CNTL_1, 0xffffffff, 0x00010000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
@@ -6383,6 +6383,9 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+			bitmap = i * adev->gfx.config.max_sh_per_se + j;
+			if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1))
+				continue;
 			mask = 1;
 			counter = 0;
 			gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
index 95d06da544e2..49aecdcee006 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
@@ -456,10 +456,12 @@ static void gfxhub_v1_2_xcc_gart_disable(struct amdgpu_device *adev,
 		WREG32_SOC15_RLC(GC, GET_INST(GC, j), regMC_VM_MX_L1_TLB_CNTL, tmp);
 
 		/* Setup L2 cache */
-		tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL);
-		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
-		WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp);
-		WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0);
+		if (!amdgpu_sriov_vf(adev)) {
+			tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL);
+			tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
+			WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp);
+			WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0);
+		}
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 6d24c84924cb..19986ff6a48d 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -401,8 +401,7 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device
 
 			if (err_data.ce_count)
 				dev_info(adev->dev, "%ld correctable hardware "
-						"errors detected in %s block, "
-						"no user action is needed.\n",
+						"errors detected in %s block\n",
 						obj->err_data.ce_count,
 						get_ras_block_str(adev->nbio.ras_if));
 
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
index 25a3da83e0fb..e90f33780803 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -597,8 +597,7 @@ static void nbio_v7_9_handle_ras_controller_intr_no_bifring(struct amdgpu_device
 
 			if (err_data.ce_count)
 				dev_info(adev->dev, "%ld correctable hardware "
-						"errors detected in %s block, "
-						"no user action is needed.\n",
+						"errors detected in %s block\n",
 						obj->err_data.ce_count,
 						get_ras_block_str(adev->nbio.ras_if));
 
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
index 530549314ce4..a3ee3c4c650f 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
@@ -64,7 +64,7 @@ static void umc_v6_7_query_error_status_helper(struct amdgpu_device *adev,
 	uint64_t reg_value;
 
 	if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)
-		dev_info(adev->dev, "Deferred error, no user action is needed.\n");
+		dev_info(adev->dev, "Deferred error\n");
 
 	if (mc_umc_status)
 		dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index d630100b9e91..f856901055d3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -1026,7 +1026,7 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
 	} else {
 		res = devm_request_free_mem_region(adev->dev, &iomem_resource, size);
 		if (IS_ERR(res))
-			return -ENOMEM;
+			return PTR_ERR(res);
 		pgmap->range.start = res->start;
 		pgmap->range.end = res->end;
 		pgmap->type = MEMORY_DEVICE_PRIVATE;
@@ -1042,10 +1042,10 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
 	r = devm_memremap_pages(adev->dev, pgmap);
 	if (IS_ERR(r)) {
 		pr_err("failed to register HMM device memory\n");
-		/* Disable SVM support capability */
-		pgmap->type = 0;
 		if (pgmap->type == MEMORY_DEVICE_PRIVATE)
 			devm_release_mem_region(adev->dev, res->start, resource_size(res));
+		/* Disable SVM support capability */
+		pgmap->type = 0;
 		return PTR_ERR(r);
 	}
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 745024b31340..17fbedbf3651 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -917,7 +917,7 @@ struct kfd_process {
 	 * fence will be triggered during eviction and new one will be created
 	 * during restore
 	 */
-	struct dma_fence *ef;
+	struct dma_fence __rcu *ef;
 
 	/* Work items for evicting and restoring BOs */
 	struct delayed_work eviction_work;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 71df51fcc1b0..717a60d7a4ea 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1110,6 +1110,7 @@ static void kfd_process_wq_release(struct work_struct *work)
 {
 	struct kfd_process *p = container_of(work, struct kfd_process,
 					     release_work);
+	struct dma_fence *ef;
 
 	kfd_process_dequeue_from_all_devices(p);
 	pqm_uninit(&p->pqm);
@@ -1118,7 +1119,9 @@ static void kfd_process_wq_release(struct work_struct *work)
 	 * destroyed. This allows any BOs to be freed without
 	 * triggering pointless evictions or waiting for fences.
 	 */
-	dma_fence_signal(p->ef);
+	synchronize_rcu();
+	ef = rcu_access_pointer(p->ef);
+	dma_fence_signal(ef);
 
 	kfd_process_remove_sysfs(p);
 
@@ -1127,7 +1130,7 @@ static void kfd_process_wq_release(struct work_struct *work)
 	svm_range_list_fini(p);
 
 	kfd_process_destroy_pdds(p);
-	dma_fence_put(p->ef);
+	dma_fence_put(ef);
 
 	kfd_event_free_process(p);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index ac84c4a2ca07..c50a0dc9c9c0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -404,14 +404,9 @@ static void svm_range_bo_release(struct kref *kref)
 		spin_lock(&svm_bo->list_lock);
 	}
 	spin_unlock(&svm_bo->list_lock);
-	if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) {
-		/* We're not in the eviction worker.
-		 * Signal the fence and synchronize with any
-		 * pending eviction work.
-		 */
+	if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
+		/* We're not in the eviction worker. Signal the fence. */
 		dma_fence_signal(&svm_bo->eviction_fence->base);
-		cancel_work_sync(&svm_bo->eviction_work);
-	}
 	dma_fence_put(&svm_bo->eviction_fence->base);
 	amdgpu_bo_unref(&svm_bo->bo);
 	kfree(svm_bo);
@@ -2345,8 +2340,10 @@ retry:
 		mutex_unlock(&svms->lock);
 		mmap_write_unlock(mm);
 
-		/* Pairs with mmget in svm_range_add_list_work */
-		mmput(mm);
+		/* Pairs with mmget in svm_range_add_list_work. If dropping the
+		 * last mm refcount, schedule release work to avoid circular locking
+		 */
+		mmput_async(mm);
 
 		spin_lock(&svms->deferred_list_lock);
 	}
@@ -2657,6 +2654,7 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
 {
 	struct vm_area_struct *vma;
 	struct interval_tree_node *node;
+	struct rb_node *rb_node;
 	unsigned long start_limit, end_limit;
 
 	vma = vma_lookup(p->mm, addr << PAGE_SHIFT);
@@ -2676,16 +2674,15 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
 	if (node) {
 		end_limit = min(end_limit, node->start);
 		/* Last range that ends before the fault address */
-		node = container_of(rb_prev(&node->rb),
-				    struct interval_tree_node, rb);
+		rb_node = rb_prev(&node->rb);
 	} else {
 		/* Last range must end before addr because
 		 * there was no range after addr
 		 */
-		node = container_of(rb_last(&p->svms.objects.rb_root),
-				    struct interval_tree_node, rb);
+		rb_node = rb_last(&p->svms.objects.rb_root);
 	}
-	if (node) {
+	if (rb_node) {
+		node = container_of(rb_node, struct interval_tree_node, rb);
 		if (node->last >= addr) {
 			WARN(1, "Overlap with prev node and page fault addr\n");
 			return -EFAULT;
@@ -3432,13 +3429,14 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
 
 int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
 {
-	if (!fence)
-		return -EINVAL;
-
-	if (dma_fence_is_signaled(&fence->base))
-		return 0;
-
-	if (fence->svm_bo) {
+	/* Dereferencing fence->svm_bo is safe here because the fence hasn't
+	 * signaled yet and we're under the protection of the fence->lock.
+	 * After the fence is signaled in svm_range_bo_release, we cannot get
+	 * here any more.
+	 *
+	 * Reference is dropped in svm_range_evict_svm_bo_worker.
+	 */
+	if (svm_bo_ref_unless_zero(fence->svm_bo)) {
 		WRITE_ONCE(fence->svm_bo->evicting, 1);
 		schedule_work(&fence->svm_bo->eviction_work);
 	}
@@ -3453,8 +3451,6 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
 	int r = 0;
 
 	svm_bo = container_of(work, struct svm_range_bo, eviction_work);
-	if (!svm_bo_ref_unless_zero(svm_bo))
-		return; /* svm_bo was freed while eviction was pending */
 
 	if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
 		mm = svm_bo->eviction_fence->mm;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index b569b6eda4e3..d4f525b66a09 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -9292,10 +9292,10 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 		if (!new_con_state->writeback_job)
 			continue;
 
-		new_crtc_state = NULL;
+		new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base);
 
-		if (acrtc)
-			new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base);
+		if (!new_crtc_state)
+			continue;
 
 		if (acrtc->wb_enabled)
 			continue;
@@ -10752,7 +10752,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
 			DRM_DEBUG_DRIVER("drm_dp_mst_atomic_check() failed\n");
 			goto fail;
 		}
-		status = dc_validate_global_state(dc, dm_state->context, false);
+		status = dc_validate_global_state(dc, dm_state->context, true);
 		if (status != DC_OK) {
 			DRM_DEBUG_DRIVER("DC global validation failure: %s (%d)",
 				       dc_status_to_str(status), status);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 9b527bffe11a..c87b64e464ed 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -1239,7 +1239,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc,
 	if (has_crtc_cm_degamma && ret != -EINVAL) {
 		drm_dbg_kms(crtc->base.crtc->dev,
 			    "doesn't support plane and CRTC degamma at the same time\n");
-			return -EINVAL;
+		return -EINVAL;
 	}
 
 	/* If we are here, it means we don't have plane degamma settings, check
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index eaf8d9f48244..85b7f58a7f35 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -979,6 +979,11 @@ int dm_helper_dmub_aux_transfer_sync(
 		struct aux_payload *payload,
 		enum aux_return_code_type *operation_result)
 {
+	if (!link->hpd_status) {
+		*operation_result = AUX_RET_ERROR_HPD_DISCON;
+		return -1;
+	}
+
 	return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload,
 			operation_result);
 }
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
index 757528256326..a84f1e376dee 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
@@ -87,6 +87,20 @@ static const struct IP_BASE CLK_BASE = { { { { 0x00016C00, 0x02401800, 0, 0, 0,
 #define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK	0x0000F000L
 #define CLK1_CLK_PLL_REQ__FbMult_frac_MASK	0xFFFF0000L
 
+#define regCLK1_CLK2_BYPASS_CNTL			0x029c
+#define regCLK1_CLK2_BYPASS_CNTL_BASE_IDX	0
+
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL__SHIFT	0x0
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV__SHIFT	0x10
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK		0x00000007L
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK		0x000F0000L
+
+#define regCLK6_0_CLK6_spll_field_8				0x464b
+#define regCLK6_0_CLK6_spll_field_8_BASE_IDX	0
+
+#define CLK6_0_CLK6_spll_field_8__spll_ssc_en__SHIFT	0xd
+#define CLK6_0_CLK6_spll_field_8__spll_ssc_en_MASK		0x00002000L
+
 #define REG(reg_name) \
 	(CLK_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name)
 
@@ -131,35 +145,63 @@ static int dcn314_get_active_display_cnt_wa(
 	return display_count;
 }
 
-static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable)
+static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context,
+				  bool safe_to_lower, bool disable)
 {
 	struct dc *dc = clk_mgr_base->ctx->dc;
 	int i;
 
 	for (i = 0; i < dc->res_pool->pipe_count; ++i) {
-		struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+		struct pipe_ctx *pipe = safe_to_lower
+			? &context->res_ctx.pipe_ctx[i]
+			: &dc->current_state->res_ctx.pipe_ctx[i];
 
 		if (pipe->top_pipe || pipe->prev_odm_pipe)
 			continue;
 		if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) {
-			struct stream_encoder *stream_enc = pipe->stream_res.stream_enc;
-
 			if (disable) {
-				if (stream_enc && stream_enc->funcs->disable_fifo)
-					pipe->stream_res.stream_enc->funcs->disable_fifo(stream_enc);
+				if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc)
+					pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
 
-				pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
 				reset_sync_context_for_pipe(dc, context, i);
 			} else {
 				pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg);
-
-				if (stream_enc && stream_enc->funcs->enable_fifo)
-					pipe->stream_res.stream_enc->funcs->enable_fifo(stream_enc);
 			}
 		}
 	}
 }
 
+bool dcn314_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base)
+{
+	struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+	uint32_t ssc_enable;
+
+	REG_GET(CLK6_0_CLK6_spll_field_8, spll_ssc_en, &ssc_enable);
+
+	return ssc_enable == 1;
+}
+
+void dcn314_init_clocks(struct clk_mgr *clk_mgr)
+{
+	struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
+	uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz;
+
+	memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
+	// Assumption is that boot state always supports pstate
+	clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk;	// restore ref_dtbclk
+	clk_mgr->clks.p_state_change_support = true;
+	clk_mgr->clks.prev_p_state_change_support = true;
+	clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN;
+	clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN;
+
+	// to adjust dp_dto reference clock if ssc is enable otherwise to apply dprefclk
+	if (dcn314_is_spll_ssc_enabled(clk_mgr))
+		clk_mgr->dp_dto_source_clock_in_khz =
+			dce_adjust_dp_ref_freq_for_ss(clk_mgr_int, clk_mgr->dprefclk_khz);
+	else
+		clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz;
+}
+
 void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
 			struct dc_state *context,
 			bool safe_to_lower)
@@ -252,11 +294,11 @@ void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
 	}
 
 	if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
-		dcn314_disable_otg_wa(clk_mgr_base, context, true);
+		dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true);
 
 		clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
 		dcn314_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
-		dcn314_disable_otg_wa(clk_mgr_base, context, false);
+		dcn314_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false);
 
 		update_dispclk = true;
 	}
@@ -436,6 +478,11 @@ static DpmClocks314_t dummy_clocks;
 
 static struct dcn314_watermarks dummy_wms = { 0 };
 
+static struct dcn314_ss_info_table ss_info_table = {
+	.ss_divider = 1000,
+	.ss_percentage = {0, 0, 375, 375, 375}
+};
+
 static void dcn314_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn314_watermarks *table)
 {
 	int i, num_valid_sets;
@@ -708,13 +755,31 @@ static struct clk_mgr_funcs dcn314_funcs = {
 	.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
 	.get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
 	.update_clocks = dcn314_update_clocks,
-	.init_clocks = dcn31_init_clocks,
+	.init_clocks = dcn314_init_clocks,
 	.enable_pme_wa = dcn314_enable_pme_wa,
 	.are_clock_states_equal = dcn314_are_clock_states_equal,
 	.notify_wm_ranges = dcn314_notify_wm_ranges
 };
 extern struct clk_mgr_funcs dcn3_fpga_funcs;
 
+static void dcn314_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr)
+{
+	uint32_t clock_source;
+	//uint32_t ssc_enable;
+
+	REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source);
+	//REG_GET(CLK6_0_CLK6_spll_field_8, spll_ssc_en, &ssc_enable);
+
+	if (dcn314_is_spll_ssc_enabled(&clk_mgr->base) && (clock_source < ARRAY_SIZE(ss_info_table.ss_percentage))) {
+		clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source];
+
+		if (clk_mgr->dprefclk_ss_percentage != 0) {
+			clk_mgr->ss_on_dprefclk = true;
+			clk_mgr->dprefclk_ss_divider = ss_info_table.ss_divider;
+		}
+	}
+}
+
 void dcn314_clk_mgr_construct(
 		struct dc_context *ctx,
 		struct clk_mgr_dcn314 *clk_mgr,
@@ -782,6 +847,7 @@ void dcn314_clk_mgr_construct(
 	clk_mgr->base.base.dprefclk_khz = 600000;
 	clk_mgr->base.base.clks.ref_dtbclk_khz = 600000;
 	dce_clock_read_ss_info(&clk_mgr->base);
+	dcn314_read_ss_info_from_lut(&clk_mgr->base);
 	/*if bios enabled SS, driver needs to adjust dtb clock, only enable with correct bios*/
 
 	clk_mgr->base.base.bw_params = &dcn314_bw_params;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
index 171f84340eb2..002c28e80720 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h
@@ -28,6 +28,8 @@
 #define __DCN314_CLK_MGR_H__
 #include "clk_mgr_internal.h"
 
+#define DCN314_NUM_CLOCK_SOURCES   5
+
 struct dcn314_watermarks;
 
 struct dcn314_smu_watermark_set {
@@ -40,9 +42,18 @@ struct clk_mgr_dcn314 {
 	struct dcn314_smu_watermark_set smu_wm_set;
 };
 
+struct dcn314_ss_info_table {
+	uint32_t ss_divider;
+	uint32_t ss_percentage[DCN314_NUM_CLOCK_SOURCES];
+};
+
 bool dcn314_are_clock_states_equal(struct dc_clocks *a,
 		struct dc_clocks *b);
 
+bool dcn314_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base);
+
+void dcn314_init_clocks(struct clk_mgr *clk_mgr);
+
 void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
 			struct dc_state *context,
 			bool safe_to_lower);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 2d7205058c64..aa7c02ba948e 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -411,12 +411,9 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc,
 	 * avoid conflicting with firmware updates.
 	 */
 	if (dc->ctx->dce_version > DCE_VERSION_MAX)
-		if (dc->optimized_required)
+		if (dc->optimized_required || dc->wm_optimized_required)
 			return false;
 
-	if (!memcmp(&stream->adjust, adjust, sizeof(*adjust)))
-		return true;
-
 	stream->adjust.v_total_max = adjust->v_total_max;
 	stream->adjust.v_total_mid = adjust->v_total_mid;
 	stream->adjust.v_total_mid_frame_num = adjust->v_total_mid_frame_num;
@@ -2230,6 +2227,7 @@ void dc_post_update_surfaces_to_stream(struct dc *dc)
 	}
 
 	dc->optimized_required = false;
+	dc->wm_optimized_required = false;
 }
 
 bool dc_set_generic_gpio_for_stereo(bool enable,
@@ -2652,6 +2650,8 @@ enum surface_update_type dc_check_update_surfaces_for_stream(
 		} else if (memcmp(&dc->current_state->bw_ctx.bw.dcn.clk, &dc->clk_mgr->clks, offsetof(struct dc_clocks, prev_p_state_change_support)) != 0) {
 			dc->optimized_required = true;
 		}
+
+		dc->optimized_required |= dc->wm_optimized_required;
 	}
 
 	return type;
@@ -2859,6 +2859,9 @@ static void copy_stream_update_to_stream(struct dc *dc,
 	if (update->vrr_active_fixed)
 		stream->vrr_active_fixed = *update->vrr_active_fixed;
 
+	if (update->crtc_timing_adjust)
+		stream->adjust = *update->crtc_timing_adjust;
+
 	if (update->dpms_off)
 		stream->dpms_off = *update->dpms_off;
 
@@ -3519,7 +3522,7 @@ static void commit_planes_for_stream(struct dc *dc,
 	top_pipe_to_program = resource_get_otg_master_for_stream(
 				&context->res_ctx,
 				stream);
-
+	ASSERT(top_pipe_to_program != NULL);
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 
@@ -4288,7 +4291,8 @@ static bool full_update_required(struct dc *dc,
 			stream_update->mst_bw_update ||
 			stream_update->func_shaper ||
 			stream_update->lut3d_func ||
-			stream_update->pending_test_pattern))
+			stream_update->pending_test_pattern ||
+			stream_update->crtc_timing_adjust))
 		return true;
 
 	if (stream) {
@@ -4341,6 +4345,8 @@ static bool should_commit_minimal_transition_for_windowed_mpo_odm(struct dc *dc,
 
 	cur_pipe = resource_get_otg_master_for_stream(&dc->current_state->res_ctx, stream);
 	new_pipe = resource_get_otg_master_for_stream(&context->res_ctx, stream);
+	if (!cur_pipe || !new_pipe)
+		return false;
 	cur_is_odm_in_use = resource_get_odm_slice_count(cur_pipe) > 1;
 	new_is_odm_in_use = resource_get_odm_slice_count(new_pipe) > 1;
 	if (cur_is_odm_in_use == new_is_odm_in_use)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 57f0ddd15923..9fbdb09697fd 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -2194,6 +2194,10 @@ void resource_log_pipe_topology_update(struct dc *dc, struct dc_state *state)
 	for (stream_idx = 0; stream_idx < state->stream_count; stream_idx++) {
 		otg_master = resource_get_otg_master_for_stream(
 				&state->res_ctx, state->streams[stream_idx]);
+		if (!otg_master	|| otg_master->stream_res.tg == NULL) {
+			DC_LOG_DC("topology update: otg_master NULL stream_idx %d!\n", stream_idx);
+			return;
+		}
 		slice_count = resource_get_opp_heads_for_otg_master(otg_master,
 				&state->res_ctx, opp_heads);
 		for (slice_idx = 0; slice_idx < slice_count; slice_idx++) {
@@ -4986,20 +4990,6 @@ enum dc_status update_dp_encoder_resources_for_test_harness(const struct dc *dc,
 	return DC_OK;
 }
 
-bool resource_subvp_in_use(struct dc *dc,
-		struct dc_state *context)
-{
-	uint32_t i;
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
-		if (dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE)
-			return true;
-	}
-	return false;
-}
-
 bool check_subvp_sw_cursor_fallback_req(const struct dc *dc, struct dc_stream_state *stream)
 {
 	if (!dc->debug.disable_subvp_high_refresh && is_subvp_high_refresh_candidate(stream))
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
index 460a8010c79f..88c6436b28b6 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
@@ -267,7 +267,8 @@ void dc_state_construct(struct dc *dc, struct dc_state *state)
 	state->clk_mgr = dc->clk_mgr;
 
 	/* Initialise DIG link encoder resource tracking variables. */
-	link_enc_cfg_init(dc, state);
+	if (dc->res_pool)
+		link_enc_cfg_init(dc, state);
 }
 
 void dc_state_destruct(struct dc_state *state)
@@ -433,8 +434,9 @@ bool dc_state_add_plane(
 
 	otg_master_pipe = resource_get_otg_master_for_stream(
 			&state->res_ctx, stream);
-	added = resource_append_dpp_pipes_for_plane_composition(state,
-			dc->current_state, pool, otg_master_pipe, plane_state);
+	if (otg_master_pipe)
+		added = resource_append_dpp_pipes_for_plane_composition(state,
+				dc->current_state, pool, otg_master_pipe, plane_state);
 
 	if (added) {
 		stream_status->plane_states[stream_status->plane_count] =
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index f30a341bc090..5d7aa882416b 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -51,7 +51,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.265"
+#define DC_VER "3.2.266"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
@@ -1036,6 +1036,7 @@ struct dc {
 
 	/* Require to optimize clocks and bandwidth for added/removed planes */
 	bool optimized_required;
+	bool wm_optimized_required;
 	bool idle_optimizations_allowed;
 	bool enable_c20_dtm_b0;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index a23eebd9933b..ee10941caa59 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -139,6 +139,7 @@ union stream_update_flags {
 		uint32_t wb_update:1;
 		uint32_t dsc_changed : 1;
 		uint32_t mst_bw : 1;
+		uint32_t crtc_timing_adjust : 1;
 		uint32_t fams_changed : 1;
 	} bits;
 
@@ -325,6 +326,7 @@ struct dc_stream_update {
 	struct dc_3dlut *lut3d_func;
 
 	struct test_pattern *pending_test_pattern;
+	struct dc_crtc_timing_adjust *crtc_timing_adjust;
 };
 
 bool dc_is_stream_unchanged(
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index 4f276169e05a..b08ccb8c68bc 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -1140,23 +1140,25 @@ struct dc_panel_config {
 	} ilr;
 };
 
+#define MAX_SINKS_PER_LINK 4
+
 /*
  *  USB4 DPIA BW ALLOCATION STRUCTS
  */
 struct dc_dpia_bw_alloc {
-	int sink_verified_bw;  // The Verified BW that sink can allocated and use that has been verified already
-	int sink_allocated_bw; // The Actual Allocated BW that sink currently allocated
-	int sink_max_bw;       // The Max BW that sink can require/support
+	int remote_sink_req_bw[MAX_SINKS_PER_LINK]; // BW requested by remote sinks
+	int link_verified_bw;  // The Verified BW that link can allocated and use that has been verified already
+	int link_max_bw;       // The Max BW that link can require/support
+	int allocated_bw;      // The Actual Allocated BW for this DPIA
 	int estimated_bw;      // The estimated available BW for this DPIA
 	int bw_granularity;    // BW Granularity
+	int dp_overhead;       // DP overhead in dp tunneling
 	bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for all 3:  DP-Tx & Dpia & CM
 	bool response_ready;   // Response ready from the CM side
 	uint8_t nrd_max_lane_count; // Non-reduced max lane count
 	uint8_t nrd_max_link_rate; // Non-reduced max link rate
 };
 
-#define MAX_SINKS_PER_LINK 4
-
 enum dc_hpd_enable_select {
 	HPD_EN_FOR_ALL_EDP = 0,
 	HPD_EN_FOR_PRIMARY_EDP_ONLY,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
index 140598f18bbd..f0458b8f00af 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
@@ -782,7 +782,7 @@ static void get_azalia_clock_info_dp(
 	/*audio_dto_module = dpDtoSourceClockInkhz * 10,000;
 	 *  [khz] ->[100Hz] */
 	azalia_clock_info->audio_dto_module =
-		pll_info->dp_dto_source_clock_in_khz * 10;
+		pll_info->audio_dto_source_clock_in_khz * 10;
 }
 
 void dce_aud_wall_dto_setup(
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
index 5d3f6fa1011e..970644b695cd 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
@@ -975,6 +975,9 @@ static bool dcn31_program_pix_clk(
 			look_up_in_video_optimized_rate_tlb(pix_clk_params->requested_pix_clk_100hz / 10);
 	struct bp_pixel_clock_parameters bp_pc_params = {0};
 	enum transmitter_color_depth bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24;
+
+	if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0)
+		dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz;
 	// For these signal types Driver to program DP_DTO without calling VBIOS Command table
 	if (dc_is_dp_signal(pix_clk_params->signal_type) || dc_is_virtual_signal(pix_clk_params->signal_type)) {
 		if (e) {
@@ -1088,6 +1091,10 @@ static bool get_pixel_clk_frequency_100hz(
 	struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source);
 	unsigned int clock_hz = 0;
 	unsigned int modulo_hz = 0;
+	unsigned int dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dprefclk_khz;
+
+	if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0)
+		dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz;
 
 	if (clock_source->id == CLOCK_SOURCE_ID_DP_DTO) {
 		clock_hz = REG_READ(PHASE[inst]);
@@ -1100,7 +1107,7 @@ static bool get_pixel_clk_frequency_100hz(
 			modulo_hz = REG_READ(MODULO[inst]);
 			if (modulo_hz)
 				*pixel_clk_khz = div_u64((uint64_t)clock_hz*
-					clock_source->ctx->dc->clk_mgr->dprefclk_khz*10,
+					dp_dto_ref_khz*10,
 					modulo_hz);
 			else
 				*pixel_clk_khz = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index e4a328b45c8a..87760600e154 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -183,6 +183,20 @@ bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc,
 	return true;
 }
 
+bool dcn32_subvp_in_use(struct dc *dc,
+		struct dc_state *context)
+{
+	uint32_t i;
+
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+		if (dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_NONE)
+			return true;
+	}
+	return false;
+}
+
 bool dcn32_mpo_in_use(struct dc_state *context)
 {
 	uint32_t i;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index aa68d010cbfd..9f37f717a1f8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -33,7 +33,6 @@
 #include "dcn30/dcn30_resource.h"
 #include "link.h"
 #include "dc_state_priv.h"
-#include "resource.h"
 
 #define DC_LOGGER_INIT(logger)
 
@@ -292,7 +291,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
 
 		/* for subvp + DRR case, if subvp pipes are still present we support pstate */
 		if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported &&
-				resource_subvp_in_use(dc, context))
+				dcn32_subvp_in_use(dc, context))
 			vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = temp_clock_change_support;
 
 		if (vlevel < context->bw_ctx.dml.vba.soc.num_states &&
@@ -2273,7 +2272,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
 	unsigned int dummy_latency_index = 0;
 	int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
 	unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
-	bool subvp_active = resource_subvp_in_use(dc, context);
+	bool subvp_in_use = dcn32_subvp_in_use(dc, context);
 	unsigned int min_dram_speed_mts_margin;
 	bool need_fclk_lat_as_dummy = false;
 	bool is_subvp_p_drr = false;
@@ -2282,7 +2281,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
 	dc_assert_fp_enabled();
 
 	/* need to find dummy latency index for subvp */
-	if (subvp_active) {
+	if (subvp_in_use) {
 		/* Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK */
 		if (!pstate_en) {
 			context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp;
@@ -2468,7 +2467,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
 				dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16;
 		}
 
-		if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && !subvp_active) {
+		if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && !subvp_in_use) {
 			/* find largest table entry that is lower than dram speed,
 			 * but lower than DPM0 still uses DPM0
 			 */
@@ -3528,7 +3527,7 @@ void dcn32_set_clock_limits(const struct _vcs_dpi_soc_bounding_box_st *soc_bb)
 void dcn32_override_min_req_memclk(struct dc *dc, struct dc_state *context)
 {
 	// WA: restrict FPO and SubVP to use first non-strobe mode (DCN32 BW issue)
-	if ((context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || resource_subvp_in_use(dc, context)) &&
+	if ((context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dcn32_subvp_in_use(dc, context)) &&
 			dc->dml.soc.num_chans <= 8) {
 		int num_mclk_levels = dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
index 3d12dabd39e4..475c4ec43c01 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
@@ -166,9 +166,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
 	.num_states = 5,
 	.sr_exit_time_us = 14.0,
 	.sr_enter_plus_exit_time_us = 16.0,
-	.sr_exit_z8_time_us = 525.0,
-	.sr_enter_plus_exit_z8_time_us = 715.0,
-	.fclk_change_latency_us = 20.0,
+	.sr_exit_z8_time_us = 210.0,
+	.sr_enter_plus_exit_z8_time_us = 320.0,
+	.fclk_change_latency_us = 24.0,
 	.usr_retraining_latency_us = 2,
 	.writeback_latency_us = 12.0,
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
index b95bf27f2fe2..9be5ebf3a8c0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
@@ -6229,7 +6229,7 @@ static void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *m
 				CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
 				CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
 				CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
-				CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes;
+				CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
 				CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k];
 				CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
 				CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k];
@@ -6329,7 +6329,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
 				mode_lib->ms.NoOfDPPThisState,
 				mode_lib->ms.dpte_group_bytes,
 				s->HostVMInefficiencyFactor,
-				mode_lib->ms.soc.hostvm_min_page_size_kbytes,
+				mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
 				mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels);
 
 		s->NextMaxVStartup = s->MaxVStartupAllPlanes[j];
@@ -6542,7 +6542,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
 						mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
 						mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels,
 						mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
-						mode_lib->ms.soc.hostvm_min_page_size_kbytes,
+						mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
 						mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k],
 						mode_lib->ms.MetaRowBytes[j][k],
 						mode_lib->ms.DPTEBytesPerRow[j][k],
@@ -7687,7 +7687,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib)
 		CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
 		CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
 		CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes;
-		CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes;
+		CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
 		CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn;
 		CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode;
 		CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceededPerState;
@@ -7957,7 +7957,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib)
 		UseMinimumDCFCLK_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
 		UseMinimumDCFCLK_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
 		UseMinimumDCFCLK_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
-		UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes;
+		UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
 		UseMinimumDCFCLK_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
 		UseMinimumDCFCLK_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
 		UseMinimumDCFCLK_params->ImmediateFlipRequirement = s->ImmediateFlipRequiredFinal;
@@ -8699,7 +8699,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 	CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
 	CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
 	CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes;
-	CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes;
+	CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
 	CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn;
 	CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode;
 	CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0];
@@ -8805,7 +8805,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 			mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
 			locals->dpte_group_bytes,
 			s->HostVMInefficiencyFactor,
-			mode_lib->ms.soc.hostvm_min_page_size_kbytes,
+			mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
 			mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels);
 
 	locals->TCalc = 24.0 / locals->DCFCLKDeepSleep;
@@ -8995,7 +8995,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 			CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
 			CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
 			CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
-			CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes;
+			CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
 			CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k];
 			CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
 			CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k];
@@ -9240,7 +9240,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 						mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
 						mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels,
 						mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
-						mode_lib->ms.soc.hostvm_min_page_size_kbytes,
+						mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
 						locals->PDEAndMetaPTEBytesFrame[k],
 						locals->MetaRowByte[k],
 						locals->PixelPTEBytesPerRow[k],
@@ -9446,13 +9446,13 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
 		CalculateWatermarks_params->CompressedBufferSizeInkByte = locals->CompressedBufferSizeInkByte;
 
 		// Output
-		CalculateWatermarks_params->Watermark = &s->dummy_watermark; // Watermarks *Watermark
-		CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[0];
-		CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0][0]; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[]
-		CalculateWatermarks_params->SubViewportLinesNeededInMALL = &mode_lib->ms.SubViewportLinesNeededInMALL[j]; // dml_uint_t SubViewportLinesNeededInMALL[]
-		CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[0];
-		CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // dml_float_t *MaxActiveFCLKChangeLatencySupported
-		CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[0];
+		CalculateWatermarks_params->Watermark = &locals->Watermark; // Watermarks *Watermark
+		CalculateWatermarks_params->DRAMClockChangeSupport = &locals->DRAMClockChangeSupport;
+		CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = locals->MaxActiveDRAMClockChangeLatencySupported; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[]
+		CalculateWatermarks_params->SubViewportLinesNeededInMALL = locals->SubViewportLinesNeededInMALL; // dml_uint_t SubViewportLinesNeededInMALL[]
+		CalculateWatermarks_params->FCLKChangeSupport = &locals->FCLKChangeSupport;
+		CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &locals->MaxActiveFCLKChangeLatencySupported; // dml_float_t *MaxActiveFCLKChangeLatencySupported
+		CalculateWatermarks_params->USRRetrainingSupport = &locals->USRRetrainingSupport;
 
 		CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
 			&mode_lib->scratch,
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
index fa6a93dd9629..64d01a9cd68c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -626,8 +626,8 @@ static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st *
 		if (is_dp2p0_output_encoder(pipe))
 			out->OutputEncoder[location] = dml_dp2p0;
 		break;
-		out->OutputEncoder[location] = dml_edp;
 	case SIGNAL_TYPE_EDP:
+		out->OutputEncoder[location] = dml_edp;
 		break;
 	case SIGNAL_TYPE_HDMI_TYPE_A:
 	case SIGNAL_TYPE_DVI_SINGLE_LINK:
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
index fb328cd06cea..5660f15da291 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
@@ -1354,7 +1354,7 @@ static void build_audio_output(
 	if (state->clk_mgr &&
 		(pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
 			pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)) {
-		audio_output->pll_info.dp_dto_source_clock_in_khz =
+		audio_output->pll_info.audio_dto_source_clock_in_khz =
 				state->clk_mgr->funcs->get_dp_ref_clk_frequency(
 						state->clk_mgr);
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
index 51dd2ae09b2a..6dd479e8a348 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
@@ -3076,7 +3076,7 @@ void dcn10_prepare_bandwidth(
 			context,
 			false);
 
-	dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub,
+	dc->wm_optimized_required = hubbub->funcs->program_watermarks(hubbub,
 			&context->bw_ctx.bw.dcn.watermarks,
 			dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000,
 			true);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
index bc71a9b058fe..e931342fcf4c 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
@@ -1882,42 +1882,6 @@ static void dcn20_program_pipe(
 	}
 }
 
-static void update_vmin_vmax_fams(struct dc *dc,
-		struct dc_state *context)
-{
-	uint32_t i;
-	struct drr_params params = {0};
-	bool subvp_in_use = resource_subvp_in_use(dc, context);
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
-		if (resource_is_pipe_type(pipe, OTG_MASTER) &&
-				((subvp_in_use && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM &&
-				pipe->stream->allow_freesync) || (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && pipe->stream->fpo_in_use))) {
-			if (!pipe->stream->vrr_active_variable && !pipe->stream->vrr_active_fixed) {
-				struct timing_generator *tg = context->res_ctx.pipe_ctx[i].stream_res.tg;
-
-				/* DRR should be configured already if we're in active variable
-				 * or active fixed, so only program if we're not in this state
-				 */
-				params.vertical_total_min = pipe->stream->timing.v_total;
-				params.vertical_total_max = pipe->stream->timing.v_total;
-				tg->funcs->set_drr(tg, &params);
-			}
-		} else {
-			if (resource_is_pipe_type(pipe, OTG_MASTER) &&
-					!pipe->stream->vrr_active_variable &&
-					!pipe->stream->vrr_active_fixed) {
-				struct timing_generator *tg = context->res_ctx.pipe_ctx[i].stream_res.tg;
-				params.vertical_total_min = 0;
-				params.vertical_total_max = 0;
-				tg->funcs->set_drr(tg, &params);
-			}
-		}
-	}
-}
-
 void dcn20_program_front_end_for_ctx(
 		struct dc *dc,
 		struct dc_state *context)
@@ -1994,7 +1958,6 @@ void dcn20_program_front_end_for_ctx(
 				&& context->res_ctx.pipe_ctx[i].stream)
 			hws->funcs.blank_pixel_data(dc, &context->res_ctx.pipe_ctx[i], true);
 
-	update_vmin_vmax_fams(dc, context);
 
 	/* Disconnect mpcc */
 	for (i = 0; i < dc->res_pool->pipe_count; i++)
@@ -2196,10 +2159,10 @@ void dcn20_prepare_bandwidth(
 	}
 
 	/* program dchubbub watermarks:
-	 * For assigning optimized_required, use |= operator since we don't want
+	 * For assigning wm_optimized_required, use |= operator since we don't want
 	 * to clear the value if the optimize has not happened yet
 	 */
-	dc->optimized_required |= hubbub->funcs->program_watermarks(hubbub,
+	dc->wm_optimized_required |= hubbub->funcs->program_watermarks(hubbub,
 					&context->bw_ctx.bw.dcn.watermarks,
 					dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000,
 					false);
@@ -2212,10 +2175,10 @@ void dcn20_prepare_bandwidth(
 	if (hubbub->funcs->program_compbuf_size) {
 		if (context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes) {
 			compbuf_size_kb = context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes;
-			dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes);
+			dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes);
 		} else {
 			compbuf_size_kb = context->bw_ctx.bw.dcn.compbuf_size_kb;
-			dc->optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb);
+			dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb);
 		}
 
 		hubbub->funcs->program_compbuf_size(hubbub, compbuf_size_kb, false);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
index cbba39d251e5..17e014d3bdc8 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
@@ -333,6 +333,7 @@ struct clk_mgr {
 	bool force_smu_not_present;
 	bool dc_mode_softmax_enabled;
 	int dprefclk_khz; // Used by program pixel clock in clock source funcs, need to figureout where this goes
+	int dp_dto_source_clock_in_khz; // Used to program DP DTO with ss adjustment on DCN314
 	int dentist_vco_freq_khz;
 	struct clk_state_registers_and_bypass boot_snapshot;
 	struct clk_bw_params *bw_params;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h
index 1d51fed12e20..c958ef37b78a 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/resource.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h
@@ -609,9 +609,6 @@ bool dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy(
 		struct pipe_ctx *sec_pipe,
 		bool odm);
 
-bool resource_subvp_in_use(struct dc *dc,
-		struct dc_state *context);
-
 /* A test harness interface that modifies dp encoder resources in the given dc
  * state and bypasses the need to revalidate. The interface assumes that the
  * test harness interface is called with pre-validated link config stored in the
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
index 5fe8b4871c77..3cbfbf8d107e 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
@@ -900,11 +900,15 @@ bool link_set_dsc_pps_packet(struct pipe_ctx *pipe_ctx, bool enable, bool immedi
 {
 	struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
 	struct dc_stream_state *stream = pipe_ctx->stream;
-	DC_LOGGER_INIT(dsc->ctx->logger);
 
-	if (!pipe_ctx->stream->timing.flags.DSC || !dsc)
+	if (!pipe_ctx->stream->timing.flags.DSC)
 		return false;
 
+	if (!dsc)
+		return false;
+
+	DC_LOGGER_INIT(dsc->ctx->logger);
+
 	if (enable) {
 		struct dsc_config dsc_cfg;
 		uint8_t dsc_packed_pps[128];
@@ -2005,17 +2009,11 @@ static enum dc_status enable_link_dp(struct dc_state *state,
 		}
 	}
 
-	/*
-	 * If the link is DP-over-USB4 do the following:
-	 * - Train with fallback when enabling DPIA link. Conventional links are
+	/* Train with fallback when enabling DPIA link. Conventional links are
 	 * trained with fallback during sink detection.
-	 * - Allocate only what the stream needs for bw in Gbps. Inform the CM
-	 * in case stream needs more or less bw from what has been allocated
-	 * earlier at plug time.
 	 */
-	if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
+	if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
 		do_fallback = true;
-	}
 
 	/*
 	 * Temporary w/a to get DP2.0 link rates to work with SST.
@@ -2197,6 +2195,32 @@ static enum dc_status enable_link(
 	return status;
 }
 
+static bool allocate_usb4_bandwidth_for_stream(struct dc_stream_state *stream, int bw)
+{
+	return true;
+}
+
+static bool allocate_usb4_bandwidth(struct dc_stream_state *stream)
+{
+	bool ret;
+
+	int bw = dc_bandwidth_in_kbps_from_timing(&stream->timing,
+			dc_link_get_highest_encoding_format(stream->sink->link));
+
+	ret = allocate_usb4_bandwidth_for_stream(stream, bw);
+
+	return ret;
+}
+
+static bool deallocate_usb4_bandwidth(struct dc_stream_state *stream)
+{
+	bool ret;
+
+	ret = allocate_usb4_bandwidth_for_stream(stream, 0);
+
+	return ret;
+}
+
 void link_set_dpms_off(struct pipe_ctx *pipe_ctx)
 {
 	struct dc  *dc = pipe_ctx->stream->ctx->dc;
@@ -2232,6 +2256,9 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx)
 	update_psp_stream_config(pipe_ctx, true);
 	dc->hwss.blank_stream(pipe_ctx);
 
+	if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
+		deallocate_usb4_bandwidth(pipe_ctx->stream);
+
 	if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
 		deallocate_mst_payload(pipe_ctx);
 	else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT &&
@@ -2474,6 +2501,9 @@ void link_set_dpms_on(
 		}
 	}
 
+	if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
+		allocate_usb4_bandwidth(pipe_ctx->stream);
+
 	if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
 		allocate_mst_payload(pipe_ctx);
 	else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT &&
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
index b45fda96eaf6..8fe66c367850 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
@@ -346,23 +346,61 @@ enum dc_status link_validate_mode_timing(
 	return DC_OK;
 }
 
+/*
+ * This function calculates the bandwidth required for the stream timing
+ * and aggregates the stream bandwidth for the respective dpia link
+ *
+ * @stream: pointer to the dc_stream_state struct instance
+ * @num_streams: number of streams to be validated
+ *
+ * return: true if validation is succeeded
+ */
 bool link_validate_dpia_bandwidth(const struct dc_stream_state *stream, const unsigned int num_streams)
 {
-	bool ret = true;
-	int bw_needed[MAX_DPIA_NUM];
-	struct dc_link *link[MAX_DPIA_NUM];
-
-	if (!num_streams || num_streams > MAX_DPIA_NUM)
-		return ret;
+	int bw_needed[MAX_DPIA_NUM] = {0};
+	struct dc_link *dpia_link[MAX_DPIA_NUM] = {0};
+	int num_dpias = 0;
 
 	for (uint8_t i = 0; i < num_streams; ++i) {
+		if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT) {
+			/* new dpia sst stream, check whether it exceeds max dpia */
+			if (num_dpias >= MAX_DPIA_NUM)
+				return false;
 
-		link[i] = stream[i].link;
-		bw_needed[i] = dc_bandwidth_in_kbps_from_timing(&stream[i].timing,
-				dc_link_get_highest_encoding_format(link[i]));
+			dpia_link[num_dpias] = stream[i].link;
+			bw_needed[num_dpias] = dc_bandwidth_in_kbps_from_timing(&stream[i].timing,
+					dc_link_get_highest_encoding_format(dpia_link[num_dpias]));
+			num_dpias++;
+		} else if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
+			uint8_t j = 0;
+			/* check whether its a known dpia link */
+			for (; j < num_dpias; ++j) {
+				if (dpia_link[j] == stream[i].link)
+					break;
+			}
+
+			if (j == num_dpias) {
+				/* new dpia mst stream, check whether it exceeds max dpia */
+				if (num_dpias >= MAX_DPIA_NUM)
+					return false;
+				else {
+					dpia_link[j] = stream[i].link;
+					num_dpias++;
+				}
+			}
+
+			bw_needed[j] += dc_bandwidth_in_kbps_from_timing(&stream[i].timing,
+				dc_link_get_highest_encoding_format(dpia_link[j]));
+		}
 	}
 
-	ret = dpia_validate_usb4_bw(link, bw_needed, num_streams);
+	/* Include dp overheads */
+	for (uint8_t i = 0; i < num_dpias; ++i) {
+		int dp_overhead = 0;
+
+		dp_overhead = link_dp_dpia_get_dp_overhead_in_dp_tunneling(dpia_link[i]);
+		bw_needed[i] += dp_overhead;
+	}
 
-	return ret;
+	return dpia_validate_usb4_bw(dpia_link, bw_needed, num_dpias);
 }
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c
index 982eda3c46f5..6af42ba9885c 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c
@@ -82,25 +82,33 @@ bool dpia_query_hpd_status(struct dc_link *link)
 {
 	union dmub_rb_cmd cmd = {0};
 	struct dc_dmub_srv *dmub_srv = link->ctx->dmub_srv;
-	bool is_hpd_high = false;
 
 	/* prepare QUERY_HPD command */
 	cmd.query_hpd.header.type = DMUB_CMD__QUERY_HPD_STATE;
 	cmd.query_hpd.data.instance = link->link_id.enum_id - ENUM_ID_1;
 	cmd.query_hpd.data.ch_type = AUX_CHANNEL_DPIA;
 
-	/* Return HPD status reported by DMUB if query successfully executed. */
-	if (dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
-	    cmd.query_hpd.data.status == AUX_RET_SUCCESS)
-		is_hpd_high = cmd.query_hpd.data.result;
-
-	DC_LOG_DEBUG("%s: link(%d) dpia(%d) cmd_status(%d) result(%d)\n",
-		__func__,
-		link->link_index,
-		link->link_id.enum_id - ENUM_ID_1,
-		cmd.query_hpd.data.status,
-		cmd.query_hpd.data.result);
-
-	return is_hpd_high;
+	/* Query dpia hpd status from dmub */
+	if (dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd,
+		DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) &&
+	    cmd.query_hpd.data.status == AUX_RET_SUCCESS) {
+		DC_LOG_DEBUG("%s: for link(%d) dpia(%d) success, current_hpd_status(%d) new_hpd_status(%d)\n",
+			__func__,
+			link->link_index,
+			link->link_id.enum_id - ENUM_ID_1,
+			link->hpd_status,
+			cmd.query_hpd.data.result);
+		link->hpd_status = cmd.query_hpd.data.result;
+	} else {
+		DC_LOG_ERROR("%s: for link(%d) dpia(%d) failed with status(%d), current_hpd_status(%d) new_hpd_status(0)\n",
+			__func__,
+			link->link_index,
+			link->link_id.enum_id - ENUM_ID_1,
+			cmd.query_hpd.data.status,
+			link->hpd_status);
+		link->hpd_status = false;
+	}
+
+	return link->hpd_status;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
index a7aa8c9da868..dd0d2b206462 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c
@@ -54,12 +54,18 @@ static bool get_bw_alloc_proceed_flag(struct dc_link *tmp)
 static void reset_bw_alloc_struct(struct dc_link *link)
 {
 	link->dpia_bw_alloc_config.bw_alloc_enabled = false;
-	link->dpia_bw_alloc_config.sink_verified_bw = 0;
-	link->dpia_bw_alloc_config.sink_max_bw = 0;
+	link->dpia_bw_alloc_config.link_verified_bw = 0;
+	link->dpia_bw_alloc_config.link_max_bw = 0;
+	link->dpia_bw_alloc_config.allocated_bw = 0;
 	link->dpia_bw_alloc_config.estimated_bw = 0;
 	link->dpia_bw_alloc_config.bw_granularity = 0;
+	link->dpia_bw_alloc_config.dp_overhead = 0;
 	link->dpia_bw_alloc_config.response_ready = false;
-	link->dpia_bw_alloc_config.sink_allocated_bw = 0;
+	link->dpia_bw_alloc_config.nrd_max_lane_count = 0;
+	link->dpia_bw_alloc_config.nrd_max_link_rate = 0;
+	for (int i = 0; i < MAX_SINKS_PER_LINK; i++)
+		link->dpia_bw_alloc_config.remote_sink_req_bw[i] = 0;
+	DC_LOG_DEBUG("reset usb4 bw alloc of link(%d)\n", link->link_index);
 }
 
 #define BW_GRANULARITY_0 4 // 0.25 Gbps
@@ -210,8 +216,8 @@ static int get_host_router_total_dp_tunnel_bw(const struct dc *dc, uint8_t hr_in
 				link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) &&
 				(link_dpia_secondary->hpd_status &&
 				link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled)) {
-				total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw +
-					link_dpia_secondary->dpia_bw_alloc_config.sink_allocated_bw;
+					total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw +
+						link_dpia_secondary->dpia_bw_alloc_config.allocated_bw;
 			} else if (link_dpia_primary->hpd_status &&
 					link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) {
 				total_bw = link_dpia_primary->dpia_bw_alloc_config.estimated_bw;
@@ -264,7 +270,7 @@ static void set_usb4_req_bw_req(struct dc_link *link, int req_bw)
 
 	/* Error check whether requested and allocated are equal */
 	req_bw = requested_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity);
-	if (req_bw == link->dpia_bw_alloc_config.sink_allocated_bw) {
+	if (req_bw == link->dpia_bw_alloc_config.allocated_bw) {
 		DC_LOG_ERROR("%s: Request bw equals to allocated bw for link(%d)\n",
 			__func__, link->link_index);
 	}
@@ -387,9 +393,9 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res
 		DC_LOG_DEBUG("%s: BW REQ SUCCESS for DP-TX Request for link(%d)\n",
 			__func__, link->link_index);
 		DC_LOG_DEBUG("%s: current allocated_bw(%d), new allocated_bw(%d)\n",
-			__func__, link->dpia_bw_alloc_config.sink_allocated_bw, bw_needed);
+			__func__, link->dpia_bw_alloc_config.allocated_bw, bw_needed);
 
-		link->dpia_bw_alloc_config.sink_allocated_bw = bw_needed;
+		link->dpia_bw_alloc_config.allocated_bw = bw_needed;
 
 		link->dpia_bw_alloc_config.response_ready = true;
 		break;
@@ -427,8 +433,8 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea
 	if (link->hpd_status && peak_bw > 0) {
 
 		// If DP over USB4 then we need to check BW allocation
-		link->dpia_bw_alloc_config.sink_max_bw = peak_bw;
-		set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.sink_max_bw);
+		link->dpia_bw_alloc_config.link_max_bw = peak_bw;
+		set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.link_max_bw);
 
 		do {
 			if (timeout > 0)
@@ -440,8 +446,8 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea
 
 		if (!timeout)
 			ret = 0;// ERROR TIMEOUT waiting for response for allocating bw
-		else if (link->dpia_bw_alloc_config.sink_allocated_bw > 0)
-			ret = link->dpia_bw_alloc_config.sink_allocated_bw;
+		else if (link->dpia_bw_alloc_config.allocated_bw > 0)
+			ret = link->dpia_bw_alloc_config.allocated_bw;
 	}
 	//2. Cold Unplug
 	else if (!link->hpd_status)
@@ -450,7 +456,6 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea
 out:
 	return ret;
 }
-
 bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw)
 {
 	bool ret = false;
@@ -458,7 +463,7 @@ bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int r
 
 	DC_LOG_DEBUG("%s: ENTER: link(%d), hpd_status(%d), current allocated_bw(%d), req_bw(%d)\n",
 		__func__, link->link_index, link->hpd_status,
-		link->dpia_bw_alloc_config.sink_allocated_bw, req_bw);
+		link->dpia_bw_alloc_config.allocated_bw, req_bw);
 
 	if (!get_bw_alloc_proceed_flag(link))
 		goto out;
@@ -523,3 +528,30 @@ bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed_per_dpia, const
 
 	return ret;
 }
+
+int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link)
+{
+	int dp_overhead = 0, link_mst_overhead = 0;
+
+	if (!get_bw_alloc_proceed_flag((link)))
+		return dp_overhead;
+
+	/* if its mst link, add MTPH overhead */
+	if ((link->type == dc_connection_mst_branch) &&
+		!link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) {
+		/* For 8b/10b encoding: MTP is 64 time slots long, slot 0 is used for MTPH
+		 * MST overhead is 1/64 of link bandwidth (excluding any overhead)
+		 */
+		const struct dc_link_settings *link_cap =
+			dc_link_get_link_cap(link);
+		uint32_t link_bw_in_kbps = (uint32_t)link_cap->link_rate *
+					   (uint32_t)link_cap->lane_count *
+					   LINK_RATE_REF_FREQ_IN_KHZ * 8;
+		link_mst_overhead = (link_bw_in_kbps / 64) + ((link_bw_in_kbps % 64) ? 1 : 0);
+	}
+
+	/* add all the overheads */
+	dp_overhead = link_mst_overhead;
+
+	return dp_overhead;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
index 981bc4eb6120..3b6d8494f9d5 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h
@@ -99,4 +99,13 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res
  */
 bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed, const unsigned int num_dpias);
 
+/*
+ * Obtain all the DP overheads in dp tunneling for the dpia link
+ *
+ * @link: pointer to the dc_link struct instance
+ *
+ * return: DP overheads in DP tunneling
+ */
+int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link);
+
 #endif /* DC_INC_LINK_DP_DPIA_BW_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
index 7f1196528218..046d3e205415 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c
@@ -930,8 +930,8 @@ bool edp_get_replay_state(const struct dc_link *link, uint64_t *state)
 bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream)
 {
 	/* To-do: Setup Replay */
-	struct dc *dc = link->ctx->dc;
-	struct dmub_replay *replay = dc->res_pool->replay;
+	struct dc *dc;
+	struct dmub_replay *replay;
 	int i;
 	unsigned int panel_inst;
 	struct replay_context replay_context = { 0 };
@@ -947,6 +947,10 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream
 	if (!link)
 		return false;
 
+	dc = link->ctx->dc;
+
+	replay = dc->res_pool->replay;
+
 	if (!replay)
 		return false;
 
@@ -975,8 +979,7 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream
 
 	replay_context.line_time_in_ns = lineTimeInNs;
 
-	if (replay)
-		link->replay_settings.replay_feature_enabled =
+	link->replay_settings.replay_feature_enabled =
 			replay->funcs->replay_copy_settings(replay, link, &replay_context, panel_inst);
 	if (link->replay_settings.replay_feature_enabled) {
 
diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
index 91ea0d4da06a..823493543325 100644
--- a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
@@ -166,12 +166,6 @@ static bool optc32_disable_crtc(struct timing_generator *optc)
 {
 	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
-	/* disable otg request until end of the first line
-	 * in the vertical blank region
-	 */
-	REG_UPDATE(OTG_CONTROL,
-			OTG_MASTER_EN, 0);
-
 	REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT,
 			OPTC_SEG0_SRC_SEL, 0xf,
 			OPTC_SEG1_SRC_SEL, 0xf,
@@ -179,6 +173,15 @@ static bool optc32_disable_crtc(struct timing_generator *optc)
 			OPTC_SEG3_SRC_SEL, 0xf,
 			OPTC_NUM_OF_INPUT_SEGMENT, 0);
 
+	REG_UPDATE(OPTC_MEMORY_CONFIG,
+			OPTC_MEM_SEL, 0);
+
+	/* disable otg request until end of the first line
+	 * in the vertical blank region
+	 */
+	REG_UPDATE(OTG_CONTROL,
+			OTG_MASTER_EN, 0);
+
 	REG_UPDATE(CONTROL,
 			VTG0_ENABLE, 0);
 
@@ -205,6 +208,13 @@ static void optc32_disable_phantom_otg(struct timing_generator *optc)
 {
 	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
+	REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT,
+			OPTC_SEG0_SRC_SEL, 0xf,
+			OPTC_SEG1_SRC_SEL, 0xf,
+			OPTC_SEG2_SRC_SEL, 0xf,
+			OPTC_SEG3_SRC_SEL, 0xf,
+			OPTC_NUM_OF_INPUT_SEGMENT, 0);
+
 	REG_UPDATE(OTG_CONTROL, OTG_MASTER_EN, 0);
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
index 08a59cf449ca..5b1547508850 100644
--- a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
@@ -138,12 +138,6 @@ static bool optc35_disable_crtc(struct timing_generator *optc)
 {
 	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
-	/* disable otg request until end of the first line
-	 * in the vertical blank region
-	 */
-	REG_UPDATE(OTG_CONTROL,
-			OTG_MASTER_EN, 0);
-
 	REG_UPDATE_5(OPTC_DATA_SOURCE_SELECT,
 			OPTC_SEG0_SRC_SEL, 0xf,
 			OPTC_SEG1_SRC_SEL, 0xf,
@@ -151,6 +145,15 @@ static bool optc35_disable_crtc(struct timing_generator *optc)
 			OPTC_SEG3_SRC_SEL, 0xf,
 			OPTC_NUM_OF_INPUT_SEGMENT, 0);
 
+	REG_UPDATE(OPTC_MEMORY_CONFIG,
+			OPTC_MEM_SEL, 0);
+
+	/* disable otg request until end of the first line
+	 * in the vertical blank region
+	 */
+	REG_UPDATE(OTG_CONTROL,
+			OTG_MASTER_EN, 0);
+
 	REG_UPDATE(CONTROL,
 			VTG0_ENABLE, 0);
 
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
index ac04a9c9a3d8..c4d71e7f18af 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
@@ -1899,7 +1899,7 @@ int dcn32_populate_dml_pipes_from_context(
 
 static struct dc_cap_funcs cap_funcs = {
 	.get_dcc_compression_cap = dcn20_get_dcc_compression_cap,
-	.get_subvp_en = resource_subvp_in_use,
+	.get_subvp_en = dcn32_subvp_in_use,
 };
 
 void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context,
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
index 62611acd4bcb..0c87b0fabba7 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.h
@@ -131,6 +131,9 @@ void dcn32_merge_pipes_for_subvp(struct dc *dc,
 bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc,
 		struct dc_state *context);
 
+bool dcn32_subvp_in_use(struct dc *dc,
+		struct dc_state *context);
+
 bool dcn32_mpo_in_use(struct dc_state *context);
 
 bool dcn32_any_surfaces_rotated(struct dc *dc, struct dc_state *context);
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
index e1ab207c46f1..74412e5f03fe 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
@@ -1574,7 +1574,7 @@ static void dcn321_destroy_resource_pool(struct resource_pool **pool)
 
 static struct dc_cap_funcs cap_funcs = {
 	.get_dcc_compression_cap = dcn20_get_dcc_compression_cap,
-	.get_subvp_en = resource_subvp_in_use,
+	.get_subvp_en = dcn32_subvp_in_use,
 };
 
 static void dcn321_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
diff --git a/drivers/gpu/drm/amd/display/include/audio_types.h b/drivers/gpu/drm/amd/display/include/audio_types.h
index 66a54da0641c..915a031a43cb 100644
--- a/drivers/gpu/drm/amd/display/include/audio_types.h
+++ b/drivers/gpu/drm/amd/display/include/audio_types.h
@@ -64,7 +64,7 @@ enum audio_dto_source {
 /* PLL information required for AZALIA DTO calculation */
 
 struct audio_pll_info {
-	uint32_t dp_dto_source_clock_in_khz;
+	uint32_t audio_dto_source_clock_in_khz;
 	uint32_t feed_back_divider;
 	enum audio_dto_source dto_source;
 	bool ss_enabled;
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h
index 7ee3d291120d..6f80bfa7e41a 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h
@@ -8707,10 +8707,10 @@
 #define regBIF_BX1_MM_CFGREGS_CNTL_BASE_IDX                                                             2
 #define regBIF_BX1_BX_RESET_CNTL                                                                        0x00f0
 #define regBIF_BX1_BX_RESET_CNTL_BASE_IDX                                                               2
-#define regBIF_BX1_INTERRUPT_CNTL                                                                       0x8e11
-#define regBIF_BX1_INTERRUPT_CNTL_BASE_IDX                                                              5
-#define regBIF_BX1_INTERRUPT_CNTL2                                                                      0x8e12
-#define regBIF_BX1_INTERRUPT_CNTL2_BASE_IDX                                                             5
+#define regBIF_BX1_INTERRUPT_CNTL                                                                       0x00f1
+#define regBIF_BX1_INTERRUPT_CNTL_BASE_IDX                                                              2
+#define regBIF_BX1_INTERRUPT_CNTL2                                                                      0x00f2
+#define regBIF_BX1_INTERRUPT_CNTL2_BASE_IDX                                                             2
 #define regBIF_BX1_CLKREQB_PAD_CNTL                                                                     0x00f8
 #define regBIF_BX1_CLKREQB_PAD_CNTL_BASE_IDX                                                            2
 #define regBIF_BX1_BIF_FEATURES_CONTROL_MISC                                                            0x00fb
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index f3cb490fe79b..087d57850304 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -4349,11 +4349,19 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
 	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size))
 		seq_printf(m, "\t%u mV (VDDNB)\n", value);
 	size = sizeof(uint32_t);
-	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void *)&query, &size))
-		seq_printf(m, "\t%u.%02u W (average GPU)\n", query >> 8, query & 0xff);
+	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void *)&query, &size)) {
+		if (adev->flags & AMD_IS_APU)
+			seq_printf(m, "\t%u.%02u W (average SoC including CPU)\n", query >> 8, query & 0xff);
+		else
+			seq_printf(m, "\t%u.%02u W (average SoC)\n", query >> 8, query & 0xff);
+	}
 	size = sizeof(uint32_t);
-	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER, (void *)&query, &size))
-		seq_printf(m, "\t%u.%02u W (current GPU)\n", query >> 8, query & 0xff);
+	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER, (void *)&query, &size)) {
+		if (adev->flags & AMD_IS_APU)
+			seq_printf(m, "\t%u.%02u W (current SoC including CPU)\n", query >> 8, query & 0xff);
+		else
+			seq_printf(m, "\t%u.%02u W (current SoC)\n", query >> 8, query & 0xff);
+	}
 	size = sizeof(value);
 	seq_printf(m, "\n");
 
@@ -4379,9 +4387,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
 		/* VCN clocks */
 		if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCN_POWER_STATE, (void *)&value, &size)) {
 			if (!value) {
-				seq_printf(m, "VCN: Disabled\n");
+				seq_printf(m, "VCN: Powered down\n");
 			} else {
-				seq_printf(m, "VCN: Enabled\n");
+				seq_printf(m, "VCN: Powered up\n");
 				if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, (void *)&value, &size))
 					seq_printf(m, "\t%u MHz (DCLK)\n", value/100);
 				if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, (void *)&value, &size))
@@ -4393,9 +4401,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
 		/* UVD clocks */
 		if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_POWER, (void *)&value, &size)) {
 			if (!value) {
-				seq_printf(m, "UVD: Disabled\n");
+				seq_printf(m, "UVD: Powered down\n");
 			} else {
-				seq_printf(m, "UVD: Enabled\n");
+				seq_printf(m, "UVD: Powered up\n");
 				if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, (void *)&value, &size))
 					seq_printf(m, "\t%u MHz (DCLK)\n", value/100);
 				if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, (void *)&value, &size))
@@ -4407,9 +4415,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
 		/* VCE clocks */
 		if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_POWER, (void *)&value, &size)) {
 			if (!value) {
-				seq_printf(m, "VCE: Disabled\n");
+				seq_printf(m, "VCE: Powered down\n");
 			} else {
-				seq_printf(m, "VCE: Enabled\n");
+				seq_printf(m, "VCE: Powered up\n");
 				if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_ECCLK, (void *)&value, &size))
 					seq_printf(m, "\t%u MHz (ECCLK)\n", value/100);
 			}
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c
index f2a55c1413f5..17882f8dfdd3 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/process_pptables_v1_0.c
@@ -200,7 +200,7 @@ static int get_platform_power_management_table(
 		struct pp_hwmgr *hwmgr,
 		ATOM_Tonga_PPM_Table *atom_ppm_table)
 {
-	struct phm_ppm_table *ptr = kzalloc(sizeof(ATOM_Tonga_PPM_Table), GFP_KERNEL);
+	struct phm_ppm_table *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
 	struct phm_ppt_v1_information *pp_table_information =
 		(struct phm_ppt_v1_information *)(hwmgr->pptable);
 
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
index b1a8799e2dee..aa91730e4eaf 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
@@ -3999,6 +3999,7 @@ static int smu7_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 	uint32_t sclk, mclk, activity_percent;
 	uint32_t offset, val_vid;
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	struct amdgpu_device *adev = hwmgr->adev;
 
 	/* size must be at least 4 bytes for all sensors */
 	if (*size < 4)
@@ -4042,7 +4043,21 @@ static int smu7_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 		*size = 4;
 		return 0;
 	case AMDGPU_PP_SENSOR_GPU_INPUT_POWER:
-		return smu7_get_gpu_power(hwmgr, (uint32_t *)value);
+		if ((adev->asic_type != CHIP_HAWAII) &&
+		    (adev->asic_type != CHIP_BONAIRE) &&
+		    (adev->asic_type != CHIP_FIJI) &&
+		    (adev->asic_type != CHIP_TONGA))
+			return smu7_get_gpu_power(hwmgr, (uint32_t *)value);
+		else
+			return -EOPNOTSUPP;
+	case AMDGPU_PP_SENSOR_GPU_AVG_POWER:
+		if ((adev->asic_type != CHIP_HAWAII) &&
+		    (adev->asic_type != CHIP_BONAIRE) &&
+		    (adev->asic_type != CHIP_FIJI) &&
+		    (adev->asic_type != CHIP_TONGA))
+			return -EOPNOTSUPP;
+		else
+			return smu7_get_gpu_power(hwmgr, (uint32_t *)value);
 	case AMDGPU_PP_SENSOR_VDDGFX:
 		if ((data->vr_config & VRCONF_VDDGFX_MASK) ==
 		    (VR_SVI2_PLANE_2 << VRCONF_VDDGFX_SHIFT))
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index cc2a02ceea85..3c98a8a0386a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -970,7 +970,9 @@ static int smu_v13_0_6_print_clks(struct smu_context *smu, char *buf, int size,
 			if (i < (clocks.num_levels - 1))
 				clk2 = clocks.data[i + 1].clocks_in_khz / 1000;
 
-			if (curr_clk >= clk1 && curr_clk < clk2) {
+			if (curr_clk == clk1) {
+				level = i;
+			} else if (curr_clk >= clk1 && curr_clk < clk2) {
 				level = (curr_clk - clk1) <= (clk2 - curr_clk) ?
 						i :
 						i + 1;
@@ -2234,17 +2236,18 @@ static int smu_v13_0_6_mode2_reset(struct smu_context *smu)
 			continue;
 		}
 
-		if (ret) {
-			dev_err(adev->dev,
-				"failed to send mode2 message \tparam: 0x%08x error code %d\n",
-				SMU_RESET_MODE_2, ret);
+		if (ret)
 			goto out;
-		}
+
 	} while (ret == -ETIME && timeout);
 
 out:
 	mutex_unlock(&smu->message_lock);
 
+	if (ret)
+		dev_err(adev->dev, "failed to send mode2 reset, error code %d",
+			ret);
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c
index a6602c012671..3dda885df5b2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c
@@ -108,6 +108,9 @@ nouveau_vma_new(struct nouveau_bo *nvbo, struct nouveau_vmm *vmm,
 	} else {
 		ret = nvif_vmm_get(&vmm->vmm, PTES, false, mem->mem.page, 0,
 				   mem->mem.size, &tmp);
+		if (ret)
+			goto done;
+
 		vma->addr = tmp.addr;
 	}
 
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index f5187b384ae9..4130945052ed 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -195,7 +195,7 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func
 		    bool use_dma_alloc, bool use_dma32)
 {
 	struct ttm_global *glob = &ttm_glob;
-	int ret;
+	int ret, nid;
 
 	if (WARN_ON(vma_manager == NULL))
 		return -EINVAL;
@@ -215,7 +215,12 @@ int ttm_device_init(struct ttm_device *bdev, const struct ttm_device_funcs *func
 
 	ttm_sys_man_init(bdev);
 
-	ttm_pool_init(&bdev->pool, dev, dev_to_node(dev), use_dma_alloc, use_dma32);
+	if (dev)
+		nid = dev_to_node(dev);
+	else
+		nid = NUMA_NO_NODE;
+
+	ttm_pool_init(&bdev->pool, dev, nid, use_dma_alloc, use_dma32);
 
 	bdev->vma_manager = vma_manager;
 	spin_lock_init(&bdev->lru_lock);
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index 1cced50d8d8c..e36ae1f0d885 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -47,7 +47,7 @@ config DRM_XE
 
 config DRM_XE_DISPLAY
 	bool "Enable display support"
-	depends on DRM_XE && EXPERT && DRM_XE=m
+	depends on DRM_XE && DRM_XE=m
 	select FB_IOMEM_HELPERS
 	select I2C
 	select I2C_ALGOBIT
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 53bd2a8ba1ae..efcf0ab7a1a6 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -17,7 +17,6 @@ subdir-ccflags-y += $(call cc-option, -Wunused-const-variable)
 subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned)
 subdir-ccflags-y += $(call cc-option, -Wformat-overflow)
 subdir-ccflags-y += $(call cc-option, -Wformat-truncation)
-subdir-ccflags-y += $(call cc-option, -Wstringop-overflow)
 subdir-ccflags-y += $(call cc-option, -Wstringop-truncation)
 # The following turn off the warnings enabled by -Wextra
 ifeq ($(findstring 2, $(KBUILD_EXTRA_WARN)),)
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 412b2e7ce40c..3436fd9cf2b2 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -125,14 +125,13 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
 
 	bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC,
 			       ttm_bo_type_device, bo_flags);
-
-	xe_bo_lock(bo, false);
-
 	if (IS_ERR(bo)) {
 		KUNIT_FAIL(test, "Failed to create bo.\n");
 		return;
 	}
 
+	xe_bo_lock(bo, false);
+
 	kunit_info(test, "Verifying that CCS data is cleared on creation.\n");
 	ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL,
 			       test);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 7a32faa2f688..a6523df0f1d3 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -331,7 +331,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 		xe_res_first_sg(xe_bo_sg(pt), 0, pt->size, &src_it);
 
 	emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), false,
-		 &src_it, XE_PAGE_SIZE, pt);
+		 &src_it, XE_PAGE_SIZE, pt->ttm.resource);
 
 	run_sanity_job(m, xe, bb, bb->len, "Writing PTE for our fake PT", test);
 
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 8e4a3b1f6b93..0b0e262e2166 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -125,9 +125,9 @@ static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res)
 static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
 			   u32 bo_flags, u32 *c)
 {
-	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
-
 	if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) {
+		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
+
 		bo->placements[*c] = (struct ttm_place) {
 			.mem_type = XE_PL_TT,
 		};
@@ -145,6 +145,8 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo,
 	struct xe_mem_region *vram;
 	u64 io_size;
 
+	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
+
 	vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram;
 	xe_assert(xe, vram && vram->usable_size);
 	io_size = vram->io_size;
@@ -175,8 +177,6 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo,
 static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
 			 u32 bo_flags, u32 *c)
 {
-	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
-
 	if (bo->props.preferred_gt == XE_GT1) {
 		if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
 			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
@@ -193,9 +193,9 @@ static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
 static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
 			   u32 bo_flags, u32 *c)
 {
-	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
-
 	if (bo_flags & XE_BO_CREATE_STOLEN_BIT) {
+		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
+
 		bo->placements[*c] = (struct ttm_place) {
 			.mem_type = XE_PL_STOLEN,
 			.flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
@@ -442,7 +442,7 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
 
 		if (vram->mapping &&
 		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
-			mem->bus.addr = (u8 *)vram->mapping +
+			mem->bus.addr = (u8 __force *)vram->mapping +
 				mem->bus.offset;
 
 		mem->bus.offset += vram->io_start;
@@ -734,7 +734,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 			/* Create a new VMAP once kernel BO back in VRAM */
 			if (!ret && resource_is_vram(new_mem)) {
 				struct xe_mem_region *vram = res_to_mem_region(new_mem);
-				void *new_addr = vram->mapping +
+				void __iomem *new_addr = vram->mapping +
 					(new_mem->start << PAGE_SHIFT);
 
 				if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index d9ae77fe7382..b8d8da546670 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -484,7 +484,7 @@ int xe_device_probe(struct xe_device *xe)
 
 	err = xe_device_set_has_flat_ccs(xe);
 	if (err)
-		return err;
+		goto err_irq_shutdown;
 
 	err = xe_mmio_probe_vram(xe);
 	if (err)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index c45ef17b3473..5dc9127a2029 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -97,7 +97,7 @@ struct xe_mem_region {
 	 */
 	resource_size_t actual_physical_size;
 	/** @mapping: pointer to VRAM mappable space */
-	void *__iomem mapping;
+	void __iomem *mapping;
 };
 
 /**
@@ -146,7 +146,7 @@ struct xe_tile {
 		size_t size;
 
 		/** @regs: pointer to tile's MMIO space (starting with registers) */
-		void *regs;
+		void __iomem *regs;
 	} mmio;
 
 	/**
@@ -159,7 +159,7 @@ struct xe_tile {
 		size_t size;
 
 		/** @regs: pointer to tile's additional MMIO-extension space */
-		void *regs;
+		void __iomem *regs;
 	} mmio_ext;
 
 	/** @mem: memory management info for tile */
@@ -301,7 +301,7 @@ struct xe_device {
 		/** @size: size of MMIO space for device */
 		size_t size;
 		/** @regs: pointer to MMIO space for device */
-		void *regs;
+		void __iomem *regs;
 	} mmio;
 
 	/** @mem: memory info for device */
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index d30c0d0689bc..b853feed9ccc 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -115,7 +115,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	struct xe_sched_job *job;
 	struct dma_fence *rebind_fence;
 	struct xe_vm *vm;
-	bool write_locked;
+	bool write_locked, skip_retry = false;
 	ktime_t end = 0;
 	int err = 0;
 
@@ -227,7 +227,8 @@ retry:
 	}
 
 	if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) {
-		err = -EWOULDBLOCK;
+		err = -EWOULDBLOCK;	/* Aliased to -EAGAIN */
+		skip_retry = true;
 		goto err_exec;
 	}
 
@@ -337,7 +338,7 @@ err_unlock_list:
 		up_write(&vm->lock);
 	else
 		up_read(&vm->lock);
-	if (err == -EAGAIN)
+	if (err == -EAGAIN && !skip_retry)
 		goto retry;
 err_syncs:
 	for (i = 0; i < num_syncs; i++)
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 44fe8097b7cd..bcfc4127c7c5 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -67,6 +67,11 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
 	q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us;
 	q->sched_props.preempt_timeout_us =
 				hwe->eclass->sched_props.preempt_timeout_us;
+	if (q->flags & EXEC_QUEUE_FLAG_KERNEL &&
+	    q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY)
+		q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL;
+	else
+		q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL;
 
 	if (xe_exec_queue_is_parallel(q)) {
 		q->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 3d7e704ec3d9..8d4b7feb8c30 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -52,8 +52,6 @@ struct xe_exec_queue {
 	struct xe_vm *vm;
 	/** @class: class of this exec queue */
 	enum xe_engine_class class;
-	/** @priority: priority of this exec queue */
-	enum xe_exec_queue_priority priority;
 	/**
 	 * @logical_mask: logical mask of where job submitted to exec queue can run
 	 */
@@ -84,6 +82,8 @@ struct xe_exec_queue {
 #define EXEC_QUEUE_FLAG_VM			BIT(4)
 /* child of VM queue for multi-tile VM jobs */
 #define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD	BIT(5)
+/* kernel exec_queue only, set priority to highest level */
+#define EXEC_QUEUE_FLAG_HIGH_PRIORITY		BIT(6)
 
 	/**
 	 * @flags: flags for this exec queue, should statically setup aside from ban
@@ -142,6 +142,8 @@ struct xe_exec_queue {
 		u32 timeslice_us;
 		/** @preempt_timeout_us: preemption timeout in micro-seconds */
 		u32 preempt_timeout_us;
+		/** @priority: priority of this exec queue */
+		enum xe_exec_queue_priority priority;
 	} sched_props;
 
 	/** @compute: compute exec queue state */
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
index 3adfa6686e7c..e5b0f4ecdbe8 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.c
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -196,6 +196,9 @@ void xe_gt_freq_init(struct xe_gt *gt)
 	struct xe_device *xe = gt_to_xe(gt);
 	int err;
 
+	if (xe->info.skip_guc_pc)
+		return;
+
 	gt->freq = kobject_create_and_add("freq0", gt->sysfs);
 	if (!gt->freq) {
 		drm_warn(&xe->drm, "failed to add freq0 directory to %s\n",
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 482cb0df9f15..0a61390c64a7 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -60,7 +60,12 @@ static u32 guc_ctl_debug_flags(struct xe_guc *guc)
 
 static u32 guc_ctl_feature_flags(struct xe_guc *guc)
 {
-	return GUC_CTL_ENABLE_SLPC;
+	u32 flags = 0;
+
+	if (!guc_to_xe(guc)->info.skip_guc_pc)
+		flags |= GUC_CTL_ENABLE_SLPC;
+
+	return flags;
 }
 
 static u32 guc_ctl_log_params_flags(struct xe_guc *guc)
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 21ac68e3246f..54ffcfcdd41f 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -421,7 +421,7 @@ static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q)
 {
 	struct exec_queue_policy policy;
 	struct xe_device *xe = guc_to_xe(guc);
-	enum xe_exec_queue_priority prio = q->priority;
+	enum xe_exec_queue_priority prio = q->sched_props.priority;
 	u32 timeslice_us = q->sched_props.timeslice_us;
 	u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
 
@@ -1231,7 +1231,6 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
 	err = xe_sched_entity_init(&ge->entity, sched);
 	if (err)
 		goto err_sched;
-	q->priority = XE_EXEC_QUEUE_PRIORITY_NORMAL;
 
 	if (xe_exec_queue_is_lr(q))
 		INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup);
@@ -1301,15 +1300,15 @@ static int guc_exec_queue_set_priority(struct xe_exec_queue *q,
 {
 	struct xe_sched_msg *msg;
 
-	if (q->priority == priority || exec_queue_killed_or_banned(q))
+	if (q->sched_props.priority == priority || exec_queue_killed_or_banned(q))
 		return 0;
 
 	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
 	if (!msg)
 		return -ENOMEM;
 
+	q->sched_props.priority = priority;
 	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
-	q->priority = priority;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index adf1dab5eba2..e05e9e7282b6 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -62,6 +62,8 @@ struct xe_migrate {
 	 * out of the pt_bo.
 	 */
 	struct drm_suballoc_manager vm_update_sa;
+	/** @min_chunk_size: For dgfx, Minimum chunk size */
+	u64 min_chunk_size;
 };
 
 #define MAX_PREEMPTDISABLE_TRANSFER SZ_8M /* Around 1ms. */
@@ -344,7 +346,8 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 
 		m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe,
 					    EXEC_QUEUE_FLAG_KERNEL |
-					    EXEC_QUEUE_FLAG_PERMANENT);
+					    EXEC_QUEUE_FLAG_PERMANENT |
+					    EXEC_QUEUE_FLAG_HIGH_PRIORITY);
 	} else {
 		m->q = xe_exec_queue_create_class(xe, primary_gt, vm,
 						  XE_ENGINE_CLASS_COPY,
@@ -355,8 +358,6 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 		xe_vm_close_and_put(vm);
 		return ERR_CAST(m->q);
 	}
-	if (xe->info.has_usm)
-		m->q->priority = XE_EXEC_QUEUE_PRIORITY_KERNEL;
 
 	mutex_init(&m->job_mutex);
 
@@ -364,6 +365,19 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 	if (err)
 		return ERR_PTR(err);
 
+	if (IS_DGFX(xe)) {
+		if (xe_device_has_flat_ccs(xe))
+			/* min chunk size corresponds to 4K of CCS Metadata */
+			m->min_chunk_size = SZ_4K * SZ_64K /
+				xe_device_ccs_bytes(xe, SZ_64K);
+		else
+			/* Somewhat arbitrary to avoid a huge amount of blits */
+			m->min_chunk_size = SZ_64K;
+		m->min_chunk_size = roundup_pow_of_two(m->min_chunk_size);
+		drm_dbg(&xe->drm, "Migrate min chunk size is 0x%08llx\n",
+			(unsigned long long)m->min_chunk_size);
+	}
+
 	return m;
 }
 
@@ -375,16 +389,35 @@ static u64 max_mem_transfer_per_pass(struct xe_device *xe)
 	return MAX_PREEMPTDISABLE_TRANSFER;
 }
 
-static u64 xe_migrate_res_sizes(struct xe_device *xe, struct xe_res_cursor *cur)
+static u64 xe_migrate_res_sizes(struct xe_migrate *m, struct xe_res_cursor *cur)
 {
-	/*
-	 * For VRAM we use identity mapped pages so we are limited to current
-	 * cursor size. For system we program the pages ourselves so we have no
-	 * such limitation.
-	 */
-	return min_t(u64, max_mem_transfer_per_pass(xe),
-		     mem_type_is_vram(cur->mem_type) ? cur->size :
-		     cur->remaining);
+	struct xe_device *xe = tile_to_xe(m->tile);
+	u64 size = min_t(u64, max_mem_transfer_per_pass(xe), cur->remaining);
+
+	if (mem_type_is_vram(cur->mem_type)) {
+		/*
+		 * VRAM we want to blit in chunks with sizes aligned to
+		 * min_chunk_size in order for the offset to CCS metadata to be
+		 * page-aligned. If it's the last chunk it may be smaller.
+		 *
+		 * Another constraint is that we need to limit the blit to
+		 * the VRAM block size, unless size is smaller than
+		 * min_chunk_size.
+		 */
+		u64 chunk = max_t(u64, cur->size, m->min_chunk_size);
+
+		size = min_t(u64, size, chunk);
+		if (size > m->min_chunk_size)
+			size = round_down(size, m->min_chunk_size);
+	}
+
+	return size;
+}
+
+static bool xe_migrate_allow_identity(u64 size, const struct xe_res_cursor *cur)
+{
+	/* If the chunk is not fragmented, allow identity map. */
+	return cur->size >= size;
 }
 
 static u32 pte_update_size(struct xe_migrate *m,
@@ -397,7 +430,12 @@ static u32 pte_update_size(struct xe_migrate *m,
 	u32 cmds = 0;
 
 	*L0_pt = pt_ofs;
-	if (!is_vram) {
+	if (is_vram && xe_migrate_allow_identity(*L0, cur)) {
+		/* Offset into identity map. */
+		*L0_ofs = xe_migrate_vram_ofs(tile_to_xe(m->tile),
+					      cur->start + vram_region_gpu_offset(res));
+		cmds += cmd_size;
+	} else {
 		/* Clip L0 to available size */
 		u64 size = min(*L0, (u64)avail_pts * SZ_2M);
 		u64 num_4k_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE);
@@ -413,11 +451,6 @@ static u32 pte_update_size(struct xe_migrate *m,
 
 		/* Each chunk has a single blit command */
 		cmds += cmd_size;
-	} else {
-		/* Offset into identity map. */
-		*L0_ofs = xe_migrate_vram_ofs(tile_to_xe(m->tile),
-					      cur->start + vram_region_gpu_offset(res));
-		cmds += cmd_size;
 	}
 
 	return cmds;
@@ -427,10 +460,10 @@ static void emit_pte(struct xe_migrate *m,
 		     struct xe_bb *bb, u32 at_pt,
 		     bool is_vram, bool is_comp_pte,
 		     struct xe_res_cursor *cur,
-		     u32 size, struct xe_bo *bo)
+		     u32 size, struct ttm_resource *res)
 {
 	struct xe_device *xe = tile_to_xe(m->tile);
-
+	struct xe_vm *vm = m->q->vm;
 	u16 pat_index;
 	u32 ptes;
 	u64 ofs = at_pt * XE_PAGE_SIZE;
@@ -443,13 +476,6 @@ static void emit_pte(struct xe_migrate *m,
 	else
 		pat_index = xe->pat.idx[XE_CACHE_WB];
 
-	/*
-	 * FIXME: Emitting VRAM PTEs to L0 PTs is forbidden. Currently
-	 * we're only emitting VRAM PTEs during sanity tests, so when
-	 * that's moved to a Kunit test, we should condition VRAM PTEs
-	 * on running tests.
-	 */
-
 	ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE);
 
 	while (ptes) {
@@ -469,20 +495,22 @@ static void emit_pte(struct xe_migrate *m,
 
 			addr = xe_res_dma(cur) & PAGE_MASK;
 			if (is_vram) {
-				/* Is this a 64K PTE entry? */
-				if ((m->q->vm->flags & XE_VM_FLAG_64K) &&
-				    !(cur_ofs & (16 * 8 - 1))) {
-					xe_tile_assert(m->tile, IS_ALIGNED(addr, SZ_64K));
+				if (vm->flags & XE_VM_FLAG_64K) {
+					u64 va = cur_ofs * XE_PAGE_SIZE / 8;
+
+					xe_assert(xe, (va & (SZ_64K - 1)) ==
+						  (addr & (SZ_64K - 1)));
+
 					flags |= XE_PTE_PS64;
 				}
 
-				addr += vram_region_gpu_offset(bo->ttm.resource);
+				addr += vram_region_gpu_offset(res);
 				devmem = true;
 			}
 
-			addr = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe,
-								 addr, pat_index,
-								 0, devmem, flags);
+			addr = vm->pt_ops->pte_encode_addr(m->tile->xe,
+							   addr, pat_index,
+							   0, devmem, flags);
 			bb->cs[bb->len++] = lower_32_bits(addr);
 			bb->cs[bb->len++] = upper_32_bits(addr);
 
@@ -694,8 +722,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 		bool usm = xe->info.has_usm;
 		u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
 
-		src_L0 = xe_migrate_res_sizes(xe, &src_it);
-		dst_L0 = xe_migrate_res_sizes(xe, &dst_it);
+		src_L0 = xe_migrate_res_sizes(m, &src_it);
+		dst_L0 = xe_migrate_res_sizes(m, &dst_it);
 
 		drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n",
 			pass++, src_L0, dst_L0);
@@ -716,6 +744,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 						      &ccs_ofs, &ccs_pt, 0,
 						      2 * avail_pts,
 						      avail_pts);
+			xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE));
 		}
 
 		/* Add copy commands size here */
@@ -728,20 +757,20 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 			goto err_sync;
 		}
 
-		if (!src_is_vram)
-			emit_pte(m, bb, src_L0_pt, src_is_vram, true, &src_it, src_L0,
-				 src_bo);
-		else
+		if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it))
 			xe_res_next(&src_it, src_L0);
-
-		if (!dst_is_vram)
-			emit_pte(m, bb, dst_L0_pt, dst_is_vram, true, &dst_it, src_L0,
-				 dst_bo);
 		else
+			emit_pte(m, bb, src_L0_pt, src_is_vram, true, &src_it, src_L0,
+				 src);
+
+		if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it))
 			xe_res_next(&dst_it, src_L0);
+		else
+			emit_pte(m, bb, dst_L0_pt, dst_is_vram, true, &dst_it, src_L0,
+				 dst);
 
 		if (copy_system_ccs)
-			emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src_bo);
+			emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src);
 
 		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
 		update_idx = bb->len;
@@ -950,7 +979,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 		bool usm = xe->info.has_usm;
 		u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
 
-		clear_L0 = xe_migrate_res_sizes(xe, &src_it);
+		clear_L0 = xe_migrate_res_sizes(m, &src_it);
 
 		drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0);
 
@@ -977,12 +1006,12 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 
 		size -= clear_L0;
 		/* Preemption is enabled again by the ring ops. */
-		if (!clear_vram) {
-			emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0,
-				 bo);
-		} else {
+		if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it))
 			xe_res_next(&src_it, clear_L0);
-		}
+		else
+			emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0,
+				 dst);
+
 		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
 		update_idx = bb->len;
 
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index f660cfb79f50..c8c5d74b6e90 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -303,7 +303,7 @@ void xe_mmio_probe_tiles(struct xe_device *xe)
 	u8 id, tile_count = xe->info.tile_count;
 	struct xe_gt *gt = xe_root_mmio_gt(xe);
 	struct xe_tile *tile;
-	void *regs;
+	void __iomem *regs;
 	u32 mtcfg;
 
 	if (tile_count == 1)
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index d2b00d0bf1e2..e5d7d5e2bec1 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -31,7 +31,7 @@ struct xe_ttm_stolen_mgr {
 	/* GPU base offset */
 	resource_size_t stolen_base;
 
-	void *__iomem mapping;
+	void __iomem *mapping;
 };
 
 static inline struct xe_ttm_stolen_mgr *
@@ -275,7 +275,7 @@ static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe,
 	drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS));
 
 	if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping)
-		mem->bus.addr = (u8 *)mgr->mapping + mem->bus.offset;
+		mem->bus.addr = (u8 __force *)mgr->mapping + mem->bus.offset;
 
 	mem->bus.offset += mgr->io_base;
 	mem->bus.is_iomem = true;
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 0cfe7289b97e..10b6995fbf29 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -335,13 +335,13 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
 	down_write(&vm->lock);
 	err = drm_gpuvm_exec_lock(&vm_exec);
 	if (err)
-		return err;
+		goto out_up_write;
 
 	pfence = xe_preempt_fence_create(q, q->compute.context,
 					 ++q->compute.seqno);
 	if (!pfence) {
 		err = -ENOMEM;
-		goto out_unlock;
+		goto out_fini;
 	}
 
 	list_add(&q->compute.link, &vm->preempt.exec_queues);
@@ -364,8 +364,9 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
 
 	up_read(&vm->userptr.notifier_lock);
 
-out_unlock:
+out_fini:
 	drm_exec_fini(exec);
+out_up_write:
 	up_write(&vm->lock);
 
 	return err;
@@ -2063,9 +2064,11 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 		if (err)
 			return ERR_PTR(err);
 
-		vm_bo = drm_gpuvm_bo_find(&vm->gpuvm, obj);
-		if (!vm_bo)
-			break;
+		vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
+		if (IS_ERR(vm_bo)) {
+			xe_bo_unlock(bo);
+			return ERR_CAST(vm_bo);
+		}
 
 		ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
 		drm_gpuvm_bo_put(vm_bo);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 0a2bd72a6d76..2266358d8074 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -8132,6 +8132,19 @@ static void status_unused(struct seq_file *seq)
 	seq_printf(seq, "\n");
 }
 
+static void status_personalities(struct seq_file *seq)
+{
+	struct md_personality *pers;
+
+	seq_puts(seq, "Personalities : ");
+	spin_lock(&pers_lock);
+	list_for_each_entry(pers, &pers_list, list)
+		seq_printf(seq, "[%s] ", pers->name);
+
+	spin_unlock(&pers_lock);
+	seq_puts(seq, "\n");
+}
+
 static int status_resync(struct seq_file *seq, struct mddev *mddev)
 {
 	sector_t max_sectors, resync, res;
@@ -8273,20 +8286,10 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev)
 static void *md_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(&all_mddevs_lock)
 {
-	struct md_personality *pers;
-
-	seq_puts(seq, "Personalities : ");
-	spin_lock(&pers_lock);
-	list_for_each_entry(pers, &pers_list, list)
-		seq_printf(seq, "[%s] ", pers->name);
-
-	spin_unlock(&pers_lock);
-	seq_puts(seq, "\n");
 	seq->poll_event = atomic_read(&md_event_count);
-
 	spin_lock(&all_mddevs_lock);
 
-	return seq_list_start(&all_mddevs, *pos);
+	return seq_list_start_head(&all_mddevs, *pos);
 }
 
 static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
@@ -8297,16 +8300,23 @@ static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 static void md_seq_stop(struct seq_file *seq, void *v)
 	__releases(&all_mddevs_lock)
 {
-	status_unused(seq);
 	spin_unlock(&all_mddevs_lock);
 }
 
 static int md_seq_show(struct seq_file *seq, void *v)
 {
-	struct mddev *mddev = list_entry(v, struct mddev, all_mddevs);
+	struct mddev *mddev;
 	sector_t sectors;
 	struct md_rdev *rdev;
 
+	if (v == &all_mddevs) {
+		status_personalities(seq);
+		if (list_empty(&all_mddevs))
+			status_unused(seq);
+		return 0;
+	}
+
+	mddev = list_entry(v, struct mddev, all_mddevs);
 	if (!mddev_get(mddev))
 		return 0;
 
@@ -8382,6 +8392,10 @@ static int md_seq_show(struct seq_file *seq, void *v)
 	}
 	spin_unlock(&mddev->lock);
 	spin_lock(&all_mddevs_lock);
+
+	if (mddev == list_last_entry(&all_mddevs, struct mddev, all_mddevs))
+		status_unused(seq);
+
 	if (atomic_dec_and_test(&mddev->active))
 		__mddev_put(mddev);
 
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index aaa434f0c175..24f0d799fd98 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1968,12 +1968,12 @@ static void end_sync_write(struct bio *bio)
 }
 
 static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector,
-			   int sectors, struct page *page, int rw)
+			   int sectors, struct page *page, blk_opf_t rw)
 {
 	if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
 		/* success */
 		return 1;
-	if (rw == WRITE) {
+	if (rw == REQ_OP_WRITE) {
 		set_bit(WriteErrorSeen, &rdev->flags);
 		if (!test_and_set_bit(WantReplacement,
 				      &rdev->flags))
@@ -2090,7 +2090,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
 			rdev = conf->mirrors[d].rdev;
 			if (r1_sync_page_io(rdev, sect, s,
 					    pages[idx],
-					    WRITE) == 0) {
+					    REQ_OP_WRITE) == 0) {
 				r1_bio->bios[d]->bi_end_io = NULL;
 				rdev_dec_pending(rdev, mddev);
 			}
@@ -2105,7 +2105,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
 			rdev = conf->mirrors[d].rdev;
 			if (r1_sync_page_io(rdev, sect, s,
 					    pages[idx],
-					    READ) != 0)
+					    REQ_OP_READ) != 0)
 				atomic_add(s, &rdev->corrected_errors);
 		}
 		sectors -= s;
@@ -2321,7 +2321,7 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio)
 			    !test_bit(Faulty, &rdev->flags)) {
 				atomic_inc(&rdev->nr_pending);
 				r1_sync_page_io(rdev, sect, s,
-						conf->tmppage, WRITE);
+						conf->tmppage, REQ_OP_WRITE);
 				rdev_dec_pending(rdev, mddev);
 			}
 		}
@@ -2335,7 +2335,7 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio)
 			    !test_bit(Faulty, &rdev->flags)) {
 				atomic_inc(&rdev->nr_pending);
 				if (r1_sync_page_io(rdev, sect, s,
-						    conf->tmppage, READ)) {
+						conf->tmppage, REQ_OP_READ)) {
 					atomic_add(s, &rdev->corrected_errors);
 					pr_info("md/raid1:%s: read error corrected (%d sectors at %llu on %pg)\n",
 						mdname(mddev), s,
diff --git a/drivers/media/pci/solo6x10/solo6x10-offsets.h b/drivers/media/pci/solo6x10/solo6x10-offsets.h
index f414ee1316f2..fdbb817e6360 100644
--- a/drivers/media/pci/solo6x10/solo6x10-offsets.h
+++ b/drivers/media/pci/solo6x10/solo6x10-offsets.h
@@ -57,16 +57,16 @@
 #define SOLO_MP4E_EXT_ADDR(__solo) \
 	(SOLO_EREF_EXT_ADDR(__solo) + SOLO_EREF_EXT_AREA(__solo))
 #define SOLO_MP4E_EXT_SIZE(__solo) \
-	max((__solo->nr_chans * 0x00080000),				\
-	    min(((__solo->sdram_size - SOLO_MP4E_EXT_ADDR(__solo)) -	\
-		 __SOLO_JPEG_MIN_SIZE(__solo)), 0x00ff0000))
+	clamp(__solo->sdram_size - SOLO_MP4E_EXT_ADDR(__solo) -	\
+	      __SOLO_JPEG_MIN_SIZE(__solo),			\
+	      __solo->nr_chans * 0x00080000, 0x00ff0000)
 
 #define __SOLO_JPEG_MIN_SIZE(__solo)		(__solo->nr_chans * 0x00080000)
 #define SOLO_JPEG_EXT_ADDR(__solo) \
 		(SOLO_MP4E_EXT_ADDR(__solo) + SOLO_MP4E_EXT_SIZE(__solo))
 #define SOLO_JPEG_EXT_SIZE(__solo) \
-	max(__SOLO_JPEG_MIN_SIZE(__solo),				\
-	    min((__solo->sdram_size - SOLO_JPEG_EXT_ADDR(__solo)), 0x00ff0000))
+	clamp(__solo->sdram_size - SOLO_JPEG_EXT_ADDR(__solo),	\
+	      __SOLO_JPEG_MIN_SIZE(__solo), 0x00ff0000)
 
 #define SOLO_SDRAM_END(__solo) \
 	(SOLO_JPEG_EXT_ADDR(__solo) + SOLO_JPEG_EXT_SIZE(__solo))
diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c
index f44ba2600415..e2ec69aa46e5 100644
--- a/drivers/net/can/c_can/c_can_platform.c
+++ b/drivers/net/can/c_can/c_can_platform.c
@@ -30,9 +30,9 @@
 #include <linux/io.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
+#include <linux/property.h>
 #include <linux/clk.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
 
@@ -259,22 +259,13 @@ static int c_can_plat_probe(struct platform_device *pdev)
 	void __iomem *addr;
 	struct net_device *dev;
 	struct c_can_priv *priv;
-	const struct of_device_id *match;
 	struct resource *mem;
 	int irq;
 	struct clk *clk;
 	const struct c_can_driver_data *drvdata;
 	struct device_node *np = pdev->dev.of_node;
 
-	match = of_match_device(c_can_of_table, &pdev->dev);
-	if (match) {
-		drvdata = match->data;
-	} else if (pdev->id_entry->driver_data) {
-		drvdata = (struct c_can_driver_data *)
-			platform_get_device_id(pdev)->driver_data;
-	} else {
-		return -ENODEV;
-	}
+	drvdata = device_get_match_data(&pdev->dev);
 
 	/* get the appropriate clk */
 	clk = devm_clk_get(&pdev->dev, NULL);
diff --git a/drivers/net/can/flexcan/flexcan-core.c b/drivers/net/can/flexcan/flexcan-core.c
index d15f85a40c1e..8ea7f2795551 100644
--- a/drivers/net/can/flexcan/flexcan-core.c
+++ b/drivers/net/can/flexcan/flexcan-core.c
@@ -23,11 +23,11 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
 #include <linux/can/platform/flexcan.h>
 #include <linux/pm_runtime.h>
+#include <linux/property.h>
 #include <linux/regmap.h>
 #include <linux/regulator/consumer.h>
 
@@ -2034,7 +2034,6 @@ MODULE_DEVICE_TABLE(platform, flexcan_id_table);
 
 static int flexcan_probe(struct platform_device *pdev)
 {
-	const struct of_device_id *of_id;
 	const struct flexcan_devtype_data *devtype_data;
 	struct net_device *dev;
 	struct flexcan_priv *priv;
@@ -2090,14 +2089,7 @@ static int flexcan_probe(struct platform_device *pdev)
 	if (IS_ERR(regs))
 		return PTR_ERR(regs);
 
-	of_id = of_match_device(flexcan_of_match, &pdev->dev);
-	if (of_id)
-		devtype_data = of_id->data;
-	else if (platform_get_device_id(pdev)->driver_data)
-		devtype_data = (struct flexcan_devtype_data *)
-			platform_get_device_id(pdev)->driver_data;
-	else
-		return -ENODEV;
+	devtype_data = device_get_match_data(&pdev->dev);
 
 	if ((devtype_data->quirks & FLEXCAN_QUIRK_SUPPORT_FD) &&
 	    !((devtype_data->quirks &
diff --git a/drivers/net/can/mscan/mpc5xxx_can.c b/drivers/net/can/mscan/mpc5xxx_can.c
index 4837df6efa92..5b3d69c3b6b6 100644
--- a/drivers/net/can/mscan/mpc5xxx_can.c
+++ b/drivers/net/can/mscan/mpc5xxx_can.c
@@ -12,8 +12,10 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 #include <linux/netdevice.h>
 #include <linux/can/dev.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
@@ -290,7 +292,7 @@ static int mpc5xxx_can_probe(struct platform_device *ofdev)
 	int irq, mscan_clksrc = 0;
 	int err = -ENOMEM;
 
-	data = of_device_get_match_data(&ofdev->dev);
+	data = device_get_match_data(&ofdev->dev);
 	if (!data)
 		return -EINVAL;
 
@@ -351,13 +353,11 @@ exit_unmap_mem:
 
 static void mpc5xxx_can_remove(struct platform_device *ofdev)
 {
-	const struct of_device_id *match;
 	const struct mpc5xxx_can_data *data;
 	struct net_device *dev = platform_get_drvdata(ofdev);
 	struct mscan_priv *priv = netdev_priv(dev);
 
-	match = of_match_device(mpc5xxx_can_table, &ofdev->dev);
-	data = match ? match->data : NULL;
+	data = device_get_match_data(&ofdev->dev);
 
 	unregister_mscandev(dev);
 	if (data && data->put_clock)
diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
index abe58f103043..3722eaa84234 100644
--- a/drivers/net/can/xilinx_can.c
+++ b/drivers/net/can/xilinx_can.c
@@ -20,8 +20,8 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
 #include <linux/string.h>
@@ -1726,8 +1726,7 @@ static int xcan_probe(struct platform_device *pdev)
 	struct net_device *ndev;
 	struct xcan_priv *priv;
 	struct phy *transceiver;
-	const struct of_device_id *of_id;
-	const struct xcan_devtype_data *devtype = &xcan_axi_data;
+	const struct xcan_devtype_data *devtype;
 	void __iomem *addr;
 	int ret;
 	int rx_max, tx_max;
@@ -1741,9 +1740,7 @@ static int xcan_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	of_id = of_match_device(xcan_of_match, &pdev->dev);
-	if (of_id && of_id->data)
-		devtype = of_id->data;
+	devtype = device_get_match_data(&pdev->dev);
 
 	hw_tx_max_property = devtype->flags & XCAN_FLAG_TX_MAILBOXES ?
 			     "tx-mailbox-count" : "tx-fifo-depth";
diff --git a/drivers/net/ethernet/8390/8390.c b/drivers/net/ethernet/8390/8390.c
index 0e0aa4016858..c5636245f1ca 100644
--- a/drivers/net/ethernet/8390/8390.c
+++ b/drivers/net/ethernet/8390/8390.c
@@ -100,4 +100,5 @@ static void __exit ns8390_module_exit(void)
 module_init(ns8390_module_init);
 module_exit(ns8390_module_exit);
 #endif /* MODULE */
+MODULE_DESCRIPTION("National Semiconductor 8390 core driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/8390/8390p.c b/drivers/net/ethernet/8390/8390p.c
index 6834742057b3..6d429b11e9c6 100644
--- a/drivers/net/ethernet/8390/8390p.c
+++ b/drivers/net/ethernet/8390/8390p.c
@@ -102,4 +102,5 @@ static void __exit NS8390p_cleanup_module(void)
 
 module_init(NS8390p_init_module);
 module_exit(NS8390p_cleanup_module);
+MODULE_DESCRIPTION("National Semiconductor 8390 core for ISA driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/8390/apne.c b/drivers/net/ethernet/8390/apne.c
index a09f383dd249..828edca8d30c 100644
--- a/drivers/net/ethernet/8390/apne.c
+++ b/drivers/net/ethernet/8390/apne.c
@@ -610,4 +610,5 @@ static int init_pcmcia(void)
 	return 1;
 }
 
+MODULE_DESCRIPTION("National Semiconductor 8390 Amiga PCMCIA ethernet driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/8390/hydra.c b/drivers/net/ethernet/8390/hydra.c
index 24f49a8ff903..fd9dcdc356e6 100644
--- a/drivers/net/ethernet/8390/hydra.c
+++ b/drivers/net/ethernet/8390/hydra.c
@@ -270,4 +270,5 @@ static void __exit hydra_cleanup_module(void)
 module_init(hydra_init_module);
 module_exit(hydra_cleanup_module);
 
+MODULE_DESCRIPTION("Zorro-II Hydra 8390 ethernet driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/8390/stnic.c b/drivers/net/ethernet/8390/stnic.c
index 265976e3b64a..6cc0e190aa79 100644
--- a/drivers/net/ethernet/8390/stnic.c
+++ b/drivers/net/ethernet/8390/stnic.c
@@ -296,4 +296,5 @@ static void __exit stnic_cleanup(void)
 
 module_init(stnic_probe);
 module_exit(stnic_cleanup);
+MODULE_DESCRIPTION("National Semiconductor DP83902AV ethernet driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/8390/zorro8390.c b/drivers/net/ethernet/8390/zorro8390.c
index d70390e9d03d..c24dd4fe7a10 100644
--- a/drivers/net/ethernet/8390/zorro8390.c
+++ b/drivers/net/ethernet/8390/zorro8390.c
@@ -443,4 +443,5 @@ static void __exit zorro8390_cleanup_module(void)
 module_init(zorro8390_init_module);
 module_exit(zorro8390_cleanup_module);
 
+MODULE_DESCRIPTION("Zorro NS8390-based ethernet driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bcm4908_enet.c b/drivers/net/ethernet/broadcom/bcm4908_enet.c
index 3e7c8671cd11..72df1bb10172 100644
--- a/drivers/net/ethernet/broadcom/bcm4908_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c
@@ -793,5 +793,6 @@ static struct platform_driver bcm4908_enet_driver = {
 };
 module_platform_driver(bcm4908_enet_driver);
 
+MODULE_DESCRIPTION("Broadcom BCM4908 Gigabit Ethernet driver");
 MODULE_LICENSE("GPL v2");
 MODULE_DEVICE_TABLE(of, bcm4908_enet_of_match);
diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c b/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c
index 9b83d5361699..50b8e97a811d 100644
--- a/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c
@@ -260,4 +260,5 @@ void bcma_mdio_mii_unregister(struct mii_bus *mii_bus)
 EXPORT_SYMBOL_GPL(bcma_mdio_mii_unregister);
 
 MODULE_AUTHOR("Rafał Miłecki");
+MODULE_DESCRIPTION("Broadcom iProc GBit BCMA MDIO helpers");
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c
index 6e4f36aaf5db..36f9bad28e6a 100644
--- a/drivers/net/ethernet/broadcom/bgmac-bcma.c
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c
@@ -362,4 +362,5 @@ module_init(bgmac_init)
 module_exit(bgmac_exit)
 
 MODULE_AUTHOR("Rafał Miłecki");
+MODULE_DESCRIPTION("Broadcom iProc GBit BCMA interface driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c
index 0b21fd5bd457..77425c7a32db 100644
--- a/drivers/net/ethernet/broadcom/bgmac-platform.c
+++ b/drivers/net/ethernet/broadcom/bgmac-platform.c
@@ -298,4 +298,5 @@ static struct platform_driver bgmac_enet_driver = {
 };
 
 module_platform_driver(bgmac_enet_driver);
+MODULE_DESCRIPTION("Broadcom iProc GBit platform interface driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 448a1b90de5e..6ffdc4229407 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -1626,4 +1626,5 @@ int bgmac_enet_resume(struct bgmac *bgmac)
 EXPORT_SYMBOL_GPL(bgmac_enet_resume);
 
 MODULE_AUTHOR("Rafał Miłecki");
+MODULE_DESCRIPTION("Broadcom iProc GBit driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 0aacd3c6ed5c..39845d556baf 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -3817,7 +3817,7 @@ static int bnxt_alloc_cp_rings(struct bnxt *bp)
 {
 	bool sh = !!(bp->flags & BNXT_FLAG_SHARED_RINGS);
 	int i, j, rc, ulp_base_vec, ulp_msix;
-	int tcs = netdev_get_num_tc(bp->dev);
+	int tcs = bp->num_tc;
 
 	if (!tcs)
 		tcs = 1;
@@ -5935,8 +5935,12 @@ static u16 bnxt_get_max_rss_ring(struct bnxt *bp)
 
 int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings)
 {
-	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
-		return DIV_ROUND_UP(rx_rings, BNXT_RSS_TABLE_ENTRIES_P5);
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
+		if (!rx_rings)
+			return 0;
+		return bnxt_calc_nr_ring_pages(rx_rings - 1,
+					       BNXT_RSS_TABLE_ENTRIES_P5);
+	}
 	if (BNXT_CHIP_TYPE_NITRO_A0(bp))
 		return 2;
 	return 1;
@@ -6926,7 +6930,7 @@ static int bnxt_hwrm_get_rings(struct bnxt *bp)
 			if (cp < (rx + tx)) {
 				rc = __bnxt_trim_rings(bp, &rx, &tx, cp, false);
 				if (rc)
-					return rc;
+					goto get_rings_exit;
 				if (bp->flags & BNXT_FLAG_AGG_RINGS)
 					rx <<= 1;
 				hw_resc->resv_rx_rings = rx;
@@ -6938,8 +6942,9 @@ static int bnxt_hwrm_get_rings(struct bnxt *bp)
 		hw_resc->resv_cp_rings = cp;
 		hw_resc->resv_stat_ctxs = stats;
 	}
+get_rings_exit:
 	hwrm_req_drop(bp, req);
-	return 0;
+	return rc;
 }
 
 int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings)
@@ -7000,10 +7005,11 @@ __bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
 
 		req->num_rx_rings = cpu_to_le16(rx_rings);
 		if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
+			u16 rss_ctx = bnxt_get_nr_rss_ctxs(bp, ring_grps);
+
 			req->num_cmpl_rings = cpu_to_le16(tx_rings + ring_grps);
 			req->num_msix = cpu_to_le16(cp_rings);
-			req->num_rsscos_ctxs =
-				cpu_to_le16(DIV_ROUND_UP(ring_grps, 64));
+			req->num_rsscos_ctxs = cpu_to_le16(rss_ctx);
 		} else {
 			req->num_cmpl_rings = cpu_to_le16(cp_rings);
 			req->num_hw_ring_grps = cpu_to_le16(ring_grps);
@@ -7050,8 +7056,10 @@ __bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
 	req->num_tx_rings = cpu_to_le16(tx_rings);
 	req->num_rx_rings = cpu_to_le16(rx_rings);
 	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
+		u16 rss_ctx = bnxt_get_nr_rss_ctxs(bp, ring_grps);
+
 		req->num_cmpl_rings = cpu_to_le16(tx_rings + ring_grps);
-		req->num_rsscos_ctxs = cpu_to_le16(DIV_ROUND_UP(ring_grps, 64));
+		req->num_rsscos_ctxs = cpu_to_le16(rss_ctx);
 	} else {
 		req->num_cmpl_rings = cpu_to_le16(cp_rings);
 		req->num_hw_ring_grps = cpu_to_le16(ring_grps);
@@ -9938,7 +9946,7 @@ static int __bnxt_num_tx_to_cp(struct bnxt *bp, int tx, int tx_sets, int tx_xdp)
 
 int bnxt_num_tx_to_cp(struct bnxt *bp, int tx)
 {
-	int tcs = netdev_get_num_tc(bp->dev);
+	int tcs = bp->num_tc;
 
 	if (!tcs)
 		tcs = 1;
@@ -9947,7 +9955,7 @@ int bnxt_num_tx_to_cp(struct bnxt *bp, int tx)
 
 static int bnxt_num_cp_to_tx(struct bnxt *bp, int tx_cp)
 {
-	int tcs = netdev_get_num_tc(bp->dev);
+	int tcs = bp->num_tc;
 
 	return (tx_cp - bp->tx_nr_rings_xdp) * tcs +
 	       bp->tx_nr_rings_xdp;
@@ -9977,7 +9985,7 @@ static void bnxt_setup_msix(struct bnxt *bp)
 	struct net_device *dev = bp->dev;
 	int tcs, i;
 
-	tcs = netdev_get_num_tc(dev);
+	tcs = bp->num_tc;
 	if (tcs) {
 		int i, off, count;
 
@@ -10009,8 +10017,10 @@ static void bnxt_setup_inta(struct bnxt *bp)
 {
 	const int len = sizeof(bp->irq_tbl[0].name);
 
-	if (netdev_get_num_tc(bp->dev))
+	if (bp->num_tc) {
 		netdev_reset_tc(bp->dev);
+		bp->num_tc = 0;
+	}
 
 	snprintf(bp->irq_tbl[0].name, len, "%s-%s-%d", bp->dev->name, "TxRx",
 		 0);
@@ -10236,8 +10246,8 @@ static void bnxt_clear_int_mode(struct bnxt *bp)
 
 int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init)
 {
-	int tcs = netdev_get_num_tc(bp->dev);
 	bool irq_cleared = false;
+	int tcs = bp->num_tc;
 	int rc;
 
 	if (!bnxt_need_reserve_rings(bp))
@@ -10263,6 +10273,7 @@ int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init)
 		    bp->tx_nr_rings - bp->tx_nr_rings_xdp)) {
 		netdev_err(bp->dev, "tx ring reservation failure\n");
 		netdev_reset_tc(bp->dev);
+		bp->num_tc = 0;
 		if (bp->tx_nr_rings_xdp)
 			bp->tx_nr_rings_per_tc = bp->tx_nr_rings_xdp;
 		else
@@ -11564,10 +11575,12 @@ int bnxt_half_open_nic(struct bnxt *bp)
 		netdev_err(bp->dev, "bnxt_alloc_mem err: %x\n", rc);
 		goto half_open_err;
 	}
+	bnxt_init_napi(bp);
 	set_bit(BNXT_STATE_HALF_OPEN, &bp->state);
 	rc = bnxt_init_nic(bp, true);
 	if (rc) {
 		clear_bit(BNXT_STATE_HALF_OPEN, &bp->state);
+		bnxt_del_napi(bp);
 		netdev_err(bp->dev, "bnxt_init_nic err: %x\n", rc);
 		goto half_open_err;
 	}
@@ -11586,6 +11599,7 @@ half_open_err:
 void bnxt_half_close_nic(struct bnxt *bp)
 {
 	bnxt_hwrm_resource_free(bp, false, true);
+	bnxt_del_napi(bp);
 	bnxt_free_skbs(bp);
 	bnxt_free_mem(bp, true);
 	clear_bit(BNXT_STATE_HALF_OPEN, &bp->state);
@@ -13232,6 +13246,11 @@ static int bnxt_fw_init_one_p1(struct bnxt *bp)
 
 	bp->fw_cap = 0;
 	rc = bnxt_hwrm_ver_get(bp);
+	/* FW may be unresponsive after FLR. FLR must complete within 100 msec
+	 * so wait before continuing with recovery.
+	 */
+	if (rc)
+		msleep(100);
 	bnxt_try_map_fw_health_reg(bp);
 	if (rc) {
 		rc = bnxt_try_recover_fw(bp);
@@ -13784,7 +13803,7 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc)
 		return -EINVAL;
 	}
 
-	if (netdev_get_num_tc(dev) == tc)
+	if (bp->num_tc == tc)
 		return 0;
 
 	if (bp->flags & BNXT_FLAG_SHARED_RINGS)
@@ -13802,9 +13821,11 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc)
 	if (tc) {
 		bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tc;
 		netdev_set_num_tc(dev, tc);
+		bp->num_tc = tc;
 	} else {
 		bp->tx_nr_rings = bp->tx_nr_rings_per_tc;
 		netdev_reset_tc(dev);
+		bp->num_tc = 0;
 	}
 	bp->tx_nr_rings += bp->tx_nr_rings_xdp;
 	tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index b8ef1717cb65..47338b48ca20 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -2225,6 +2225,7 @@ struct bnxt {
 	u8			tc_to_qidx[BNXT_MAX_QUEUE];
 	u8			q_ids[BNXT_MAX_QUEUE];
 	u8			max_q;
+	u8			num_tc;
 
 	unsigned int		current_interval;
 #define BNXT_TIMER_INTERVAL	HZ
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
index 63e067038385..0dbb880a7aa0 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
@@ -228,7 +228,7 @@ static int bnxt_queue_remap(struct bnxt *bp, unsigned int lltc_mask)
 		}
 	}
 	if (bp->ieee_ets) {
-		int tc = netdev_get_num_tc(bp->dev);
+		int tc = bp->num_tc;
 
 		if (!tc)
 			tc = 1;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 27b983c0a8a9..dc4ca706b0e2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -884,7 +884,7 @@ static void bnxt_get_channels(struct net_device *dev,
 	if (max_tx_sch_inputs)
 		max_tx_rings = min_t(int, max_tx_rings, max_tx_sch_inputs);
 
-	tcs = netdev_get_num_tc(dev);
+	tcs = bp->num_tc;
 	tx_grps = max(tcs, 1);
 	if (bp->tx_nr_rings_xdp)
 		tx_grps++;
@@ -944,7 +944,7 @@ static int bnxt_set_channels(struct net_device *dev,
 	if (channel->combined_count)
 		sh = true;
 
-	tcs = netdev_get_num_tc(dev);
+	tcs = bp->num_tc;
 
 	req_tx_rings = sh ? channel->combined_count : channel->tx_count;
 	req_rx_rings = sh ? channel->combined_count : channel->rx_count;
@@ -1574,7 +1574,8 @@ u32 bnxt_get_rxfh_indir_size(struct net_device *dev)
 	struct bnxt *bp = netdev_priv(dev);
 
 	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
-		return ALIGN(bp->rx_nr_rings, BNXT_RSS_TABLE_ENTRIES_P5);
+		return bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) *
+		       BNXT_RSS_TABLE_ENTRIES_P5;
 	return HW_HASH_INDEX_SIZE;
 }
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index c2b25fc623ec..4079538bc310 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -407,7 +407,7 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
 	if (prog)
 		tx_xdp = bp->rx_nr_rings;
 
-	tc = netdev_get_num_tc(dev);
+	tc = bp->num_tc;
 	if (!tc)
 		tc = 1;
 	rc = bnxt_check_rings(bp, bp->tx_nr_rings_per_tc, bp->rx_nr_rings,
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
index 9cc6303c82ff..f38d31bfab1b 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -27,6 +27,7 @@
 #include "octeon_network.h"
 
 MODULE_AUTHOR("Cavium Networks, <support@cavium.com>");
+MODULE_DESCRIPTION("Cavium LiquidIO Intelligent Server Adapter Core");
 MODULE_LICENSE("GPL");
 
 /* OOM task polling interval */
diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c
index 1c2a540db13d..1f495cfd7959 100644
--- a/drivers/net/ethernet/cirrus/ep93xx_eth.c
+++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c
@@ -868,5 +868,6 @@ static struct platform_driver ep93xx_eth_driver = {
 
 module_platform_driver(ep93xx_eth_driver);
 
+MODULE_DESCRIPTION("Cirrus EP93xx Ethernet driver");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:ep93xx-eth");
diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
index df40c720e7b2..9aeff2b37a61 100644
--- a/drivers/net/ethernet/engleder/tsnep_main.c
+++ b/drivers/net/ethernet/engleder/tsnep_main.c
@@ -1485,7 +1485,7 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
 
 			xdp_prepare_buff(&xdp, page_address(entry->page),
 					 XDP_PACKET_HEADROOM + TSNEP_RX_INLINE_METADATA_SIZE,
-					 length, false);
+					 length - ETH_FCS_LEN, false);
 
 			consume = tsnep_xdp_run_prog(rx, prog, &xdp,
 						     &xdp_status, tx_nq, tx);
@@ -1568,7 +1568,7 @@ static int tsnep_rx_poll_zc(struct tsnep_rx *rx, struct napi_struct *napi,
 		prefetch(entry->xdp->data);
 		length = __le32_to_cpu(entry->desc_wb->properties) &
 			 TSNEP_DESC_LENGTH_MASK;
-		xsk_buff_set_size(entry->xdp, length);
+		xsk_buff_set_size(entry->xdp, length - ETH_FCS_LEN);
 		xsk_buff_dma_sync_for_cpu(entry->xdp, rx->xsk_pool);
 
 		/* RX metadata with timestamps is in front of actual data,
@@ -1762,6 +1762,19 @@ static void tsnep_rx_reopen_xsk(struct tsnep_rx *rx)
 			allocated--;
 		}
 	}
+
+	/* set need wakeup flag immediately if ring is not filled completely,
+	 * first polling would be too late as need wakeup signalisation would
+	 * be delayed for an indefinite time
+	 */
+	if (xsk_uses_need_wakeup(rx->xsk_pool)) {
+		int desc_available = tsnep_rx_desc_available(rx);
+
+		if (desc_available)
+			xsk_set_rx_need_wakeup(rx->xsk_pool);
+		else
+			xsk_clear_rx_need_wakeup(rx->xsk_pool);
+	}
 }
 
 static bool tsnep_pending(struct tsnep_queue *queue)
diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c
index 07c2b701b5fa..9ebe751c1df0 100644
--- a/drivers/net/ethernet/ezchip/nps_enet.c
+++ b/drivers/net/ethernet/ezchip/nps_enet.c
@@ -661,4 +661,5 @@ static struct platform_driver nps_enet_driver = {
 module_platform_driver(nps_enet_driver);
 
 MODULE_AUTHOR("EZchip Semiconductor");
+MODULE_DESCRIPTION("EZchip NPS Ethernet driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index cffbf27c4656..bfdbdab443ae 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -3216,4 +3216,5 @@ void enetc_pci_remove(struct pci_dev *pdev)
 }
 EXPORT_SYMBOL_GPL(enetc_pci_remove);
 
+MODULE_DESCRIPTION("NXP ENETC Ethernet driver");
 MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index d42594f32275..432523b2c789 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -2036,6 +2036,7 @@ static void fec_enet_adjust_link(struct net_device *ndev)
 
 		/* if any of the above changed restart the FEC */
 		if (status_change) {
+			netif_stop_queue(ndev);
 			napi_disable(&fep->napi);
 			netif_tx_lock_bh(ndev);
 			fec_restart(ndev);
@@ -2045,6 +2046,7 @@ static void fec_enet_adjust_link(struct net_device *ndev)
 		}
 	} else {
 		if (fep->link) {
+			netif_stop_queue(ndev);
 			napi_disable(&fep->napi);
 			netif_tx_lock_bh(ndev);
 			fec_stop(ndev);
@@ -4769,4 +4771,5 @@ static struct platform_driver fec_driver = {
 
 module_platform_driver(fec_driver);
 
+MODULE_DESCRIPTION("NXP Fast Ethernet Controller (FEC) driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
index 70dd982a5edc..026f7270a54d 100644
--- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c
+++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
@@ -531,4 +531,5 @@ static struct platform_driver fsl_pq_mdio_driver = {
 
 module_platform_driver(fsl_pq_mdio_driver);
 
+MODULE_DESCRIPTION("Freescale PQ MDIO helpers");
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index ae8f9f135725..6e7fd473abfd 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -3588,40 +3588,55 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
 	struct i40e_hmc_obj_rxq rx_ctx;
 	int err = 0;
 	bool ok;
-	int ret;
 
 	bitmap_zero(ring->state, __I40E_RING_STATE_NBITS);
 
 	/* clear the context structure first */
 	memset(&rx_ctx, 0, sizeof(rx_ctx));
 
-	if (ring->vsi->type == I40E_VSI_MAIN)
-		xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+	ring->rx_buf_len = vsi->rx_buf_len;
+
+	/* XDP RX-queue info only needed for RX rings exposed to XDP */
+	if (ring->vsi->type != I40E_VSI_MAIN)
+		goto skip;
+
+	if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) {
+		err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+					 ring->queue_index,
+					 ring->q_vector->napi.napi_id,
+					 ring->rx_buf_len);
+		if (err)
+			return err;
+	}
 
 	ring->xsk_pool = i40e_xsk_pool(ring);
 	if (ring->xsk_pool) {
-		ring->rx_buf_len =
-		  xsk_pool_get_rx_frame_size(ring->xsk_pool);
-		ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+		xdp_rxq_info_unreg(&ring->xdp_rxq);
+		ring->rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool);
+		err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+					 ring->queue_index,
+					 ring->q_vector->napi.napi_id,
+					 ring->rx_buf_len);
+		if (err)
+			return err;
+		err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
 						 MEM_TYPE_XSK_BUFF_POOL,
 						 NULL);
-		if (ret)
-			return ret;
+		if (err)
+			return err;
 		dev_info(&vsi->back->pdev->dev,
 			 "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
 			 ring->queue_index);
 
 	} else {
-		ring->rx_buf_len = vsi->rx_buf_len;
-		if (ring->vsi->type == I40E_VSI_MAIN) {
-			ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
-							 MEM_TYPE_PAGE_SHARED,
-							 NULL);
-			if (ret)
-				return ret;
-		}
+		err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+						 MEM_TYPE_PAGE_SHARED,
+						 NULL);
+		if (err)
+			return err;
 	}
 
+skip:
 	xdp_init_buff(&ring->xdp, i40e_rx_pg_size(ring) / 2, &ring->xdp_rxq);
 
 	rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 971ba3322038..0d7177083708 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1548,7 +1548,6 @@ void i40e_free_rx_resources(struct i40e_ring *rx_ring)
 int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
 {
 	struct device *dev = rx_ring->dev;
-	int err;
 
 	u64_stats_init(&rx_ring->syncp);
 
@@ -1569,14 +1568,6 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
 	rx_ring->next_to_process = 0;
 	rx_ring->next_to_use = 0;
 
-	/* XDP RX-queue info only needed for RX rings exposed to XDP */
-	if (rx_ring->vsi->type == I40E_VSI_MAIN) {
-		err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
-				       rx_ring->queue_index, rx_ring->q_vector->napi.napi_id);
-		if (err < 0)
-			return err;
-	}
-
 	rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;
 
 	rx_ring->rx_bi =
@@ -2087,7 +2078,8 @@ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
 static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res,
 				  struct xdp_buff *xdp)
 {
-	u32 next = rx_ring->next_to_clean;
+	u32 nr_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags;
+	u32 next = rx_ring->next_to_clean, i = 0;
 	struct i40e_rx_buffer *rx_buffer;
 
 	xdp->flags = 0;
@@ -2100,10 +2092,10 @@ static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res,
 		if (!rx_buffer->page)
 			continue;
 
-		if (xdp_res == I40E_XDP_CONSUMED)
-			rx_buffer->pagecnt_bias++;
-		else
+		if (xdp_res != I40E_XDP_CONSUMED)
 			i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz);
+		else if (i++ <= nr_frags)
+			rx_buffer->pagecnt_bias++;
 
 		/* EOP buffer will be put in i40e_clean_rx_irq() */
 		if (next == rx_ring->next_to_process)
@@ -2117,20 +2109,20 @@ static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res,
  * i40e_construct_skb - Allocate skb and populate it
  * @rx_ring: rx descriptor ring to transact packets on
  * @xdp: xdp_buff pointing to the data
- * @nr_frags: number of buffers for the packet
  *
  * This function allocates an skb.  It then populates it with the page
  * data from the current receive descriptor, taking care to set up the
  * skb correctly.
  */
 static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
-					  struct xdp_buff *xdp,
-					  u32 nr_frags)
+					  struct xdp_buff *xdp)
 {
 	unsigned int size = xdp->data_end - xdp->data;
 	struct i40e_rx_buffer *rx_buffer;
+	struct skb_shared_info *sinfo;
 	unsigned int headlen;
 	struct sk_buff *skb;
+	u32 nr_frags = 0;
 
 	/* prefetch first cache line of first page */
 	net_prefetch(xdp->data);
@@ -2168,6 +2160,10 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
 	memcpy(__skb_put(skb, headlen), xdp->data,
 	       ALIGN(headlen, sizeof(long)));
 
+	if (unlikely(xdp_buff_has_frags(xdp))) {
+		sinfo = xdp_get_shared_info_from_buff(xdp);
+		nr_frags = sinfo->nr_frags;
+	}
 	rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
 	/* update all of the pointers */
 	size -= headlen;
@@ -2187,9 +2183,8 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
 	}
 
 	if (unlikely(xdp_buff_has_frags(xdp))) {
-		struct skb_shared_info *sinfo, *skinfo = skb_shinfo(skb);
+		struct skb_shared_info *skinfo = skb_shinfo(skb);
 
-		sinfo = xdp_get_shared_info_from_buff(xdp);
 		memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0],
 		       sizeof(skb_frag_t) * nr_frags);
 
@@ -2212,17 +2207,17 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
  * i40e_build_skb - Build skb around an existing buffer
  * @rx_ring: Rx descriptor ring to transact packets on
  * @xdp: xdp_buff pointing to the data
- * @nr_frags: number of buffers for the packet
  *
  * This function builds an skb around an existing Rx buffer, taking care
  * to set up the skb correctly and avoid any memcpy overhead.
  */
 static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
-				      struct xdp_buff *xdp,
-				      u32 nr_frags)
+				      struct xdp_buff *xdp)
 {
 	unsigned int metasize = xdp->data - xdp->data_meta;
+	struct skb_shared_info *sinfo;
 	struct sk_buff *skb;
+	u32 nr_frags;
 
 	/* Prefetch first cache line of first page. If xdp->data_meta
 	 * is unused, this points exactly as xdp->data, otherwise we
@@ -2231,6 +2226,11 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
 	 */
 	net_prefetch(xdp->data_meta);
 
+	if (unlikely(xdp_buff_has_frags(xdp))) {
+		sinfo = xdp_get_shared_info_from_buff(xdp);
+		nr_frags = sinfo->nr_frags;
+	}
+
 	/* build an skb around the page buffer */
 	skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz);
 	if (unlikely(!skb))
@@ -2243,9 +2243,6 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
 		skb_metadata_set(skb, metasize);
 
 	if (unlikely(xdp_buff_has_frags(xdp))) {
-		struct skb_shared_info *sinfo;
-
-		sinfo = xdp_get_shared_info_from_buff(xdp);
 		xdp_update_skb_shared_info(skb, nr_frags,
 					   sinfo->xdp_frags_size,
 					   nr_frags * xdp->frame_sz,
@@ -2589,9 +2586,9 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
 			total_rx_bytes += size;
 		} else {
 			if (ring_uses_build_skb(rx_ring))
-				skb = i40e_build_skb(rx_ring, xdp, nfrags);
+				skb = i40e_build_skb(rx_ring, xdp);
 			else
-				skb = i40e_construct_skb(rx_ring, xdp, nfrags);
+				skb = i40e_construct_skb(rx_ring, xdp);
 
 			/* drop if we failed to retrieve a buffer */
 			if (!skb) {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index af7d5fa6cdc1..11500003af0d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -414,7 +414,8 @@ i40e_add_xsk_frag(struct i40e_ring *rx_ring, struct xdp_buff *first,
 	}
 
 	__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++,
-				   virt_to_page(xdp->data_hard_start), 0, size);
+				   virt_to_page(xdp->data_hard_start),
+				   XDP_PACKET_HEADROOM, size);
 	sinfo->xdp_frags_size += size;
 	xsk_buff_add_frag(xdp);
 
@@ -498,7 +499,6 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
 		xdp_res = i40e_run_xdp_zc(rx_ring, first, xdp_prog);
 		i40e_handle_xdp_result_zc(rx_ring, first, rx_desc, &rx_packets,
 					  &rx_bytes, xdp_res, &failure);
-		first->flags = 0;
 		next_to_clean = next_to_process;
 		if (failure)
 			break;
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 533b923cae2d..7ac847718882 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -547,19 +547,27 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
 	ring->rx_buf_len = ring->vsi->rx_buf_len;
 
 	if (ring->vsi->type == ICE_VSI_PF) {
-		if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
-			/* coverity[check_return] */
-			__xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
-					   ring->q_index,
-					   ring->q_vector->napi.napi_id,
-					   ring->vsi->rx_buf_len);
+		if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) {
+			err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+						 ring->q_index,
+						 ring->q_vector->napi.napi_id,
+						 ring->rx_buf_len);
+			if (err)
+				return err;
+		}
 
 		ring->xsk_pool = ice_xsk_pool(ring);
 		if (ring->xsk_pool) {
-			xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+			xdp_rxq_info_unreg(&ring->xdp_rxq);
 
 			ring->rx_buf_len =
 				xsk_pool_get_rx_frame_size(ring->xsk_pool);
+			err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+						 ring->q_index,
+						 ring->q_vector->napi.napi_id,
+						 ring->rx_buf_len);
+			if (err)
+				return err;
 			err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
 							 MEM_TYPE_XSK_BUFF_POOL,
 							 NULL);
@@ -571,13 +579,14 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
 			dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
 				 ring->q_index);
 		} else {
-			if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
-				/* coverity[check_return] */
-				__xdp_rxq_info_reg(&ring->xdp_rxq,
-						   ring->netdev,
-						   ring->q_index,
-						   ring->q_vector->napi.napi_id,
-						   ring->vsi->rx_buf_len);
+			if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) {
+				err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+							 ring->q_index,
+							 ring->q_vector->napi.napi_id,
+							 ring->rx_buf_len);
+				if (err)
+					return err;
+			}
 
 			err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
 							 MEM_TYPE_PAGE_SHARED,
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 74d13cc5a3a7..97d41d6ebf1f 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -513,11 +513,6 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring)
 	if (ice_is_xdp_ena_vsi(rx_ring->vsi))
 		WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog);
 
-	if (rx_ring->vsi->type == ICE_VSI_PF &&
-	    !xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
-		if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
-				     rx_ring->q_index, rx_ring->q_vector->napi.napi_id))
-			goto err;
 	return 0;
 
 err:
@@ -603,9 +598,7 @@ out_failure:
 		ret = ICE_XDP_CONSUMED;
 	}
 exit:
-	rx_buf->act = ret;
-	if (unlikely(xdp_buff_has_frags(xdp)))
-		ice_set_rx_bufs_act(xdp, rx_ring, ret);
+	ice_set_rx_bufs_act(xdp, rx_ring, ret);
 }
 
 /**
@@ -893,14 +886,17 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
 	}
 
 	if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) {
-		if (unlikely(xdp_buff_has_frags(xdp)))
-			ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED);
+		ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED);
 		return -ENOMEM;
 	}
 
 	__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page,
 				   rx_buf->page_offset, size);
 	sinfo->xdp_frags_size += size;
+	/* remember frag count before XDP prog execution; bpf_xdp_adjust_tail()
+	 * can pop off frags but driver has to handle it on its own
+	 */
+	rx_ring->nr_frags = sinfo->nr_frags;
 
 	if (page_is_pfmemalloc(rx_buf->page))
 		xdp_buff_set_frag_pfmemalloc(xdp);
@@ -1251,6 +1247,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
 
 		xdp->data = NULL;
 		rx_ring->first_desc = ntc;
+		rx_ring->nr_frags = 0;
 		continue;
 construct_skb:
 		if (likely(ice_ring_uses_build_skb(rx_ring)))
@@ -1266,10 +1263,12 @@ construct_skb:
 						    ICE_XDP_CONSUMED);
 			xdp->data = NULL;
 			rx_ring->first_desc = ntc;
+			rx_ring->nr_frags = 0;
 			break;
 		}
 		xdp->data = NULL;
 		rx_ring->first_desc = ntc;
+		rx_ring->nr_frags = 0;
 
 		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
 		if (unlikely(ice_test_staterr(rx_desc->wb.status_error0,
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index b3379ff73674..af955b0e5dc5 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -358,6 +358,7 @@ struct ice_rx_ring {
 	struct ice_tx_ring *xdp_ring;
 	struct ice_rx_ring *next;	/* pointer to next ring in q_vector */
 	struct xsk_buff_pool *xsk_pool;
+	u32 nr_frags;
 	dma_addr_t dma;			/* physical address of ring */
 	u16 rx_buf_len;
 	u8 dcb_tc;			/* Traffic class of ring */
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
index 762047508619..afcead4baef4 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
@@ -12,26 +12,39 @@
  * act: action to store onto Rx buffers related to XDP buffer parts
  *
  * Set action that should be taken before putting Rx buffer from first frag
- * to one before last. Last one is handled by caller of this function as it
- * is the EOP frag that is currently being processed. This function is
- * supposed to be called only when XDP buffer contains frags.
+ * to the last.
  */
 static inline void
 ice_set_rx_bufs_act(struct xdp_buff *xdp, const struct ice_rx_ring *rx_ring,
 		    const unsigned int act)
 {
-	const struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
-	u32 first = rx_ring->first_desc;
-	u32 nr_frags = sinfo->nr_frags;
+	u32 sinfo_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags;
+	u32 nr_frags = rx_ring->nr_frags + 1;
+	u32 idx = rx_ring->first_desc;
 	u32 cnt = rx_ring->count;
 	struct ice_rx_buf *buf;
 
 	for (int i = 0; i < nr_frags; i++) {
-		buf = &rx_ring->rx_buf[first];
+		buf = &rx_ring->rx_buf[idx];
 		buf->act = act;
 
-		if (++first == cnt)
-			first = 0;
+		if (++idx == cnt)
+			idx = 0;
+	}
+
+	/* adjust pagecnt_bias on frags freed by XDP prog */
+	if (sinfo_frags < rx_ring->nr_frags && act == ICE_XDP_CONSUMED) {
+		u32 delta = rx_ring->nr_frags - sinfo_frags;
+
+		while (delta) {
+			if (idx == 0)
+				idx = cnt - 1;
+			else
+				idx--;
+			buf = &rx_ring->rx_buf[idx];
+			buf->pagecnt_bias--;
+			delta--;
+		}
 	}
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 5d1ae8e4058a..8b81a1677045 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -825,7 +825,8 @@ ice_add_xsk_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *first,
 	}
 
 	__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++,
-				   virt_to_page(xdp->data_hard_start), 0, size);
+				   virt_to_page(xdp->data_hard_start),
+				   XDP_PACKET_HEADROOM, size);
 	sinfo->xdp_frags_size += size;
 	xsk_buff_add_frag(xdp);
 
@@ -895,7 +896,6 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
 
 		if (!first) {
 			first = xdp;
-			xdp_buff_clear_frags_flag(first);
 		} else if (ice_add_xsk_frag(rx_ring, first, xdp, size)) {
 			break;
 		}
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index 5fea2fd957eb..58179bd733ff 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -783,6 +783,8 @@ static int idpf_cfg_netdev(struct idpf_vport *vport)
 	/* setup watchdog timeout value to be 5 second */
 	netdev->watchdog_timeo = 5 * HZ;
 
+	netdev->dev_port = idx;
+
 	/* configure default MTU size */
 	netdev->min_mtu = ETH_MIN_MTU;
 	netdev->max_mtu = vport->max_mtu;
diff --git a/drivers/net/ethernet/litex/litex_liteeth.c b/drivers/net/ethernet/litex/litex_liteeth.c
index 5182fe737c37..ff54fbe41bcc 100644
--- a/drivers/net/ethernet/litex/litex_liteeth.c
+++ b/drivers/net/ethernet/litex/litex_liteeth.c
@@ -318,4 +318,5 @@ static struct platform_driver liteeth_driver = {
 module_platform_driver(liteeth_driver);
 
 MODULE_AUTHOR("Joel Stanley <joel@jms.id.au>");
+MODULE_DESCRIPTION("LiteX Liteeth Ethernet driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 820b1fabe297..23adf53c2aa1 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -614,12 +614,38 @@ static void mvpp23_bm_set_8pool_mode(struct mvpp2 *priv)
 	mvpp2_write(priv, MVPP22_BM_POOL_BASE_ADDR_HIGH_REG, val);
 }
 
+/* Cleanup pool before actual initialization in the OS */
+static void mvpp2_bm_pool_cleanup(struct mvpp2 *priv, int pool_id)
+{
+	unsigned int thread = mvpp2_cpu_to_thread(priv, get_cpu());
+	u32 val;
+	int i;
+
+	/* Drain the BM from all possible residues left by firmware */
+	for (i = 0; i < MVPP2_BM_POOL_SIZE_MAX; i++)
+		mvpp2_thread_read(priv, thread, MVPP2_BM_PHY_ALLOC_REG(pool_id));
+
+	put_cpu();
+
+	/* Stop the BM pool */
+	val = mvpp2_read(priv, MVPP2_BM_POOL_CTRL_REG(pool_id));
+	val |= MVPP2_BM_STOP_MASK;
+	mvpp2_write(priv, MVPP2_BM_POOL_CTRL_REG(pool_id), val);
+}
+
 static int mvpp2_bm_init(struct device *dev, struct mvpp2 *priv)
 {
 	enum dma_data_direction dma_dir = DMA_FROM_DEVICE;
 	int i, err, poolnum = MVPP2_BM_POOLS_NUM;
 	struct mvpp2_port *port;
 
+	if (priv->percpu_pools)
+		poolnum = mvpp2_get_nrxqs(priv) * 2;
+
+	/* Clean up the pool state in case it contains stale state */
+	for (i = 0; i < poolnum; i++)
+		mvpp2_bm_pool_cleanup(priv, i);
+
 	if (priv->percpu_pools) {
 		for (i = 0; i < priv->port_count; i++) {
 			port = priv->port_list[i];
@@ -629,7 +655,6 @@ static int mvpp2_bm_init(struct device *dev, struct mvpp2 *priv)
 			}
 		}
 
-		poolnum = mvpp2_get_nrxqs(priv) * 2;
 		for (i = 0; i < poolnum; i++) {
 			/* the pool in use */
 			int pn = i / (poolnum / 2);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
index 9690ac01f02c..b92264d0a77e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
@@ -413,4 +413,5 @@ const char *otx2_mbox_id2name(u16 id)
 EXPORT_SYMBOL(otx2_mbox_id2name);
 
 MODULE_AUTHOR("Marvell.");
+MODULE_DESCRIPTION("Marvell RVU NIC Mbox helpers");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index a7b1f9686c09..4957412ff1f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1923,6 +1923,7 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status,
 {
 	const char *namep = mlx5_command_str(opcode);
 	struct mlx5_cmd_stats *stats;
+	unsigned long flags;
 
 	if (!err || !(strcmp(namep, "unknown command opcode")))
 		return;
@@ -1930,7 +1931,7 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status,
 	stats = xa_load(&dev->cmd.stats, opcode);
 	if (!stats)
 		return;
-	spin_lock_irq(&stats->lock);
+	spin_lock_irqsave(&stats->lock, flags);
 	stats->failed++;
 	if (err < 0)
 		stats->last_failed_errno = -err;
@@ -1939,7 +1940,7 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status,
 		stats->last_failed_mbox_status = status;
 		stats->last_failed_syndrome = syndrome;
 	}
-	spin_unlock_irq(&stats->lock);
+	spin_unlock_irqrestore(&stats->lock, flags);
 }
 
 /* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 0bfe1ca8a364..55c6ace0acd5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -1124,7 +1124,7 @@ static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev)
 extern const struct ethtool_ops mlx5e_ethtool_ops;
 
 int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey);
-int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev);
+int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev, bool create_tises);
 void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
 int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb,
 		       bool enable_mc_lb);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
index e1283531e0b8..671adbad0a40 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
@@ -436,6 +436,7 @@ static int fs_any_create_groups(struct mlx5e_flow_table *ft)
 	in = kvzalloc(inlen, GFP_KERNEL);
 	if  (!in || !ft->g) {
 		kfree(ft->g);
+		ft->g = NULL;
 		kvfree(in);
 		return -ENOMEM;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 284253b79266..5d213a9886f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -1064,8 +1064,8 @@ void mlx5e_build_sq_param(struct mlx5_core_dev *mdev,
 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
 	bool allow_swp;
 
-	allow_swp =
-		mlx5_geneve_tx_allowed(mdev) || !!mlx5_ipsec_device_caps(mdev);
+	allow_swp = mlx5_geneve_tx_allowed(mdev) ||
+		    (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO);
 	mlx5e_build_sq_param_common(mdev, param);
 	MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
 	MLX5_SET(sqc, sqc, allow_swp, allow_swp);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index c206cc0a8483..078f56a3cbb2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -213,7 +213,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
 	mlx5e_ptpsq_mark_ts_cqes_undelivered(ptpsq, hwtstamp);
 out:
 	napi_consume_skb(skb, budget);
-	md_buff[*md_buff_sz++] = metadata_id;
+	md_buff[(*md_buff_sz)++] = metadata_id;
 	if (unlikely(mlx5e_ptp_metadata_map_unhealthy(&ptpsq->metadata_map)) &&
 	    !test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
 		queue_work(ptpsq->txqsq.priv->wq, &ptpsq->report_unhealthy_work);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 161c5190c236..05612d9c6080 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -336,12 +336,17 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
 	/* iv len */
 	aes_gcm->icv_len = x->aead->alg_icv_len;
 
+	attrs->dir = x->xso.dir;
+
 	/* esn */
 	if (x->props.flags & XFRM_STATE_ESN) {
 		attrs->replay_esn.trigger = true;
 		attrs->replay_esn.esn = sa_entry->esn_state.esn;
 		attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb;
 		attrs->replay_esn.overlap = sa_entry->esn_state.overlap;
+		if (attrs->dir == XFRM_DEV_OFFLOAD_OUT)
+			goto skip_replay_window;
+
 		switch (x->replay_esn->replay_window) {
 		case 32:
 			attrs->replay_esn.replay_window =
@@ -365,7 +370,7 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
 		}
 	}
 
-	attrs->dir = x->xso.dir;
+skip_replay_window:
 	/* spi */
 	attrs->spi = be32_to_cpu(x->id.spi);
 
@@ -501,7 +506,8 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
 			return -EINVAL;
 		}
 
-		if (x->replay_esn && x->replay_esn->replay_window != 32 &&
+		if (x->replay_esn && x->xso.dir == XFRM_DEV_OFFLOAD_IN &&
+		    x->replay_esn->replay_window != 32 &&
 		    x->replay_esn->replay_window != 64 &&
 		    x->replay_esn->replay_window != 128 &&
 		    x->replay_esn->replay_window != 256) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index bb7f86c993e5..e66f486faafe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -254,11 +254,13 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft,
 
 	ft->g = kcalloc(MLX5E_ARFS_NUM_GROUPS,
 			sizeof(*ft->g), GFP_KERNEL);
-	in = kvzalloc(inlen, GFP_KERNEL);
-	if  (!in || !ft->g) {
-		kfree(ft->g);
-		kvfree(in);
+	if (!ft->g)
 		return -ENOMEM;
+
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in) {
+		err = -ENOMEM;
+		goto err_free_g;
 	}
 
 	mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
@@ -278,7 +280,7 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft,
 		break;
 	default:
 		err = -EINVAL;
-		goto out;
+		goto err_free_in;
 	}
 
 	switch (type) {
@@ -300,7 +302,7 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft,
 		break;
 	default:
 		err = -EINVAL;
-		goto out;
+		goto err_free_in;
 	}
 
 	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
@@ -309,7 +311,7 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft,
 	MLX5_SET_CFG(in, end_flow_index, ix - 1);
 	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
 	if (IS_ERR(ft->g[ft->num_groups]))
-		goto err;
+		goto err_clean_group;
 	ft->num_groups++;
 
 	memset(in, 0, inlen);
@@ -318,18 +320,20 @@ static int arfs_create_groups(struct mlx5e_flow_table *ft,
 	MLX5_SET_CFG(in, end_flow_index, ix - 1);
 	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
 	if (IS_ERR(ft->g[ft->num_groups]))
-		goto err;
+		goto err_clean_group;
 	ft->num_groups++;
 
 	kvfree(in);
 	return 0;
 
-err:
+err_clean_group:
 	err = PTR_ERR(ft->g[ft->num_groups]);
 	ft->g[ft->num_groups] = NULL;
-out:
+err_free_in:
 	kvfree(in);
-
+err_free_g:
+	kfree(ft->g);
+	ft->g = NULL;
 	return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index 67f546683e85..6ed3a32b7e22 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -95,7 +95,7 @@ static void mlx5e_destroy_tises(struct mlx5_core_dev *mdev, u32 tisn[MLX5_MAX_PO
 {
 	int tc, i;
 
-	for (i = 0; i < MLX5_MAX_PORTS; i++)
+	for (i = 0; i < mlx5e_get_num_lag_ports(mdev); i++)
 		for (tc = 0; tc < MLX5_MAX_NUM_TC; tc++)
 			mlx5e_destroy_tis(mdev, tisn[i][tc]);
 }
@@ -110,7 +110,7 @@ static int mlx5e_create_tises(struct mlx5_core_dev *mdev, u32 tisn[MLX5_MAX_PORT
 	int tc, i;
 	int err;
 
-	for (i = 0; i < MLX5_MAX_PORTS; i++) {
+	for (i = 0; i < mlx5e_get_num_lag_ports(mdev); i++) {
 		for (tc = 0; tc < MLX5_MAX_NUM_TC; tc++) {
 			u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
 			void *tisc;
@@ -140,7 +140,7 @@ err_close_tises:
 	return err;
 }
 
-int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev)
+int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev, bool create_tises)
 {
 	struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs;
 	int err;
@@ -169,11 +169,15 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev)
 		goto err_destroy_mkey;
 	}
 
-	err = mlx5e_create_tises(mdev, res->tisn);
-	if (err) {
-		mlx5_core_err(mdev, "alloc tises failed, %d\n", err);
-		goto err_destroy_bfreg;
+	if (create_tises) {
+		err = mlx5e_create_tises(mdev, res->tisn);
+		if (err) {
+			mlx5_core_err(mdev, "alloc tises failed, %d\n", err);
+			goto err_destroy_bfreg;
+		}
+		res->tisn_valid = true;
 	}
+
 	INIT_LIST_HEAD(&res->td.tirs_list);
 	mutex_init(&res->td.list_lock);
 
@@ -203,7 +207,8 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev)
 
 	mlx5_crypto_dek_cleanup(mdev->mlx5e_res.dek_priv);
 	mdev->mlx5e_res.dek_priv = NULL;
-	mlx5e_destroy_tises(mdev, res->tisn);
+	if (res->tisn_valid)
+		mlx5e_destroy_tises(mdev, res->tisn);
 	mlx5_free_bfreg(mdev, &res->bfreg);
 	mlx5_core_destroy_mkey(mdev, res->mkey);
 	mlx5_core_dealloc_transport_domain(mdev, res->td.tdn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b5f1c4ca38ba..c8e8f512803e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -5992,7 +5992,7 @@ static int mlx5e_resume(struct auxiliary_device *adev)
 	if (netif_device_present(netdev))
 		return 0;
 
-	err = mlx5e_create_mdev_resources(mdev);
+	err = mlx5e_create_mdev_resources(mdev, true);
 	if (err)
 		return err;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 30932c9c9a8f..9fb2c057bd78 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -761,7 +761,7 @@ static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
 
 	err = mlx5e_rss_params_indir_init(&indir, mdev,
 					  mlx5e_rqt_size(mdev, hp->num_channels),
-					  mlx5e_rqt_size(mdev, priv->max_nch));
+					  mlx5e_rqt_size(mdev, hp->num_channels));
 	if (err)
 		return err;
 
@@ -2014,9 +2014,10 @@ static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow,
 	list_for_each_entry_safe(peer_flow, tmp, &flow->peer_flows, peer_flows) {
 		if (peer_index != mlx5_get_dev_index(peer_flow->priv->mdev))
 			continue;
+
+		list_del(&peer_flow->peer_flows);
 		if (refcount_dec_and_test(&peer_flow->refcnt)) {
 			mlx5e_tc_del_fdb_flow(peer_flow->priv, peer_flow);
-			list_del(&peer_flow->peer_flows);
 			kfree(peer_flow);
 		}
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c
index a7ed87e9d842..22dd30cf8033 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c
@@ -83,6 +83,7 @@ mlx5_esw_bridge_mdb_flow_create(u16 esw_owner_vhca_id, struct mlx5_esw_bridge_md
 		i++;
 	}
 
+	rule_spec->flow_context.flags |= FLOW_CONTEXT_UPLINK_HAIRPIN_EN;
 	rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
 	dmac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, outer_headers.dmac_47_16);
 	ether_addr_copy(dmac_v, entry->key.addr);
@@ -587,6 +588,7 @@ mlx5_esw_bridge_mcast_vlan_flow_create(u16 vlan_proto, struct mlx5_esw_bridge_po
 	if (!rule_spec)
 		return ERR_PTR(-ENOMEM);
 
+	rule_spec->flow_context.flags |= FLOW_CONTEXT_UPLINK_HAIRPIN_EN;
 	rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
 
 	flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
@@ -662,6 +664,7 @@ mlx5_esw_bridge_mcast_fwd_flow_create(struct mlx5_esw_bridge_port *port)
 		dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
 		dest.vport.vhca_id = port->esw_owner_vhca_id;
 	}
+	rule_spec->flow_context.flags |= FLOW_CONTEXT_UPLINK_HAIRPIN_EN;
 	handle = mlx5_add_flow_rules(port->mcast.ft, rule_spec, &flow_act, &dest, 1);
 
 	kvfree(rule_spec);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 1616a6144f7b..9b8599c200e2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -566,6 +566,8 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 		 fte->flow_context.flow_tag);
 	MLX5_SET(flow_context, in_flow_context, flow_source,
 		 fte->flow_context.flow_source);
+	MLX5_SET(flow_context, in_flow_context, uplink_hairpin_en,
+		 !!(fte->flow_context.flags & FLOW_CONTEXT_UPLINK_HAIRPIN_EN));
 
 	MLX5_SET(flow_context, in_flow_context, extended_destination,
 		 extended_dest);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 58845121954c..d77be1b4dd9c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -783,7 +783,7 @@ static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u32 port_num,
 		}
 
 		/* This should only be called once per mdev */
-		err = mlx5e_create_mdev_resources(mdev);
+		err = mlx5e_create_mdev_resources(mdev, false);
 		if (err)
 			goto destroy_ht;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c
index 40c7be124041..58bd749b5e4d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c
@@ -98,7 +98,7 @@ static int create_aso_cq(struct mlx5_aso_cq *cq, void *cqc_data)
 	mlx5_fill_page_frag_array(&cq->wq_ctrl.buf,
 				  (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
 
-	MLX5_SET(cqc,   cqc, cq_period_mode, DIM_CQ_PERIOD_MODE_START_FROM_EQE);
+	MLX5_SET(cqc,   cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
 	MLX5_SET(cqc,   cqc, c_eqn_or_apu_element, eqn);
 	MLX5_SET(cqc,   cqc, uar_page,      mdev->priv.uar->index);
 	MLX5_SET(cqc,   cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
index 6f9790e97fed..2ebb61ef3ea9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -788,6 +788,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
 		switch (action_type) {
 		case DR_ACTION_TYP_DROP:
 			attr.final_icm_addr = nic_dmn->drop_icm_addr;
+			attr.hit_gvmi = nic_dmn->drop_icm_addr >> 48;
 			break;
 		case DR_ACTION_TYP_FT:
 			dest_action = action;
@@ -873,11 +874,17 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
 							action->sampler->tx_icm_addr;
 			break;
 		case DR_ACTION_TYP_VPORT:
-			attr.hit_gvmi = action->vport->caps->vhca_gvmi;
-			dest_action = action;
-			attr.final_icm_addr = rx_rule ?
-				action->vport->caps->icm_address_rx :
-				action->vport->caps->icm_address_tx;
+			if (unlikely(rx_rule && action->vport->caps->num == MLX5_VPORT_UPLINK)) {
+				/* can't go to uplink on RX rule - dropping instead */
+				attr.final_icm_addr = nic_dmn->drop_icm_addr;
+				attr.hit_gvmi = nic_dmn->drop_icm_addr >> 48;
+			} else {
+				attr.hit_gvmi = action->vport->caps->vhca_gvmi;
+				dest_action = action;
+				attr.final_icm_addr = rx_rule ?
+						      action->vport->caps->icm_address_rx :
+						      action->vport->caps->icm_address_tx;
+			}
 			break;
 		case DR_ACTION_TYP_POP_VLAN:
 			if (!rx_rule && !(dmn->ste_ctx->actions_caps &
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 21753f327868..1005bb6935b6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -440,6 +440,27 @@ out:
 }
 EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_system_image_guid);
 
+int mlx5_query_nic_vport_sd_group(struct mlx5_core_dev *mdev, u8 *sd_group)
+{
+	int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+	u32 *out;
+	int err;
+
+	out = kvzalloc(outlen, GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+
+	err = mlx5_query_nic_vport_context(mdev, 0, out);
+	if (err)
+		goto out;
+
+	*sd_group = MLX5_GET(query_nic_vport_context_out, out,
+			     nic_vport_context.sd_group);
+out:
+	kvfree(out);
+	return err;
+}
+
 int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid)
 {
 	u32 *out;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index a0e46369ae15..b334eb16da23 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -7542,6 +7542,9 @@ int stmmac_dvr_probe(struct device *device,
 		dev_err(priv->device, "unable to bring out of ahb reset: %pe\n",
 			ERR_PTR(ret));
 
+	/* Wait a bit for the reset to take effect */
+	udelay(10);
+
 	/* Init MAC and get the capabilities */
 	ret = stmmac_hw_init(priv);
 	if (ret)
diff --git a/drivers/net/fjes/fjes_hw.c b/drivers/net/fjes/fjes_hw.c
index 704e949484d0..b9b5554ea862 100644
--- a/drivers/net/fjes/fjes_hw.c
+++ b/drivers/net/fjes/fjes_hw.c
@@ -221,21 +221,25 @@ static int fjes_hw_setup(struct fjes_hw *hw)
 
 	mem_size = FJES_DEV_REQ_BUF_SIZE(hw->max_epid);
 	hw->hw_info.req_buf = kzalloc(mem_size, GFP_KERNEL);
-	if (!(hw->hw_info.req_buf))
-		return -ENOMEM;
+	if (!(hw->hw_info.req_buf)) {
+		result = -ENOMEM;
+		goto free_ep_info;
+	}
 
 	hw->hw_info.req_buf_size = mem_size;
 
 	mem_size = FJES_DEV_RES_BUF_SIZE(hw->max_epid);
 	hw->hw_info.res_buf = kzalloc(mem_size, GFP_KERNEL);
-	if (!(hw->hw_info.res_buf))
-		return -ENOMEM;
+	if (!(hw->hw_info.res_buf)) {
+		result = -ENOMEM;
+		goto free_req_buf;
+	}
 
 	hw->hw_info.res_buf_size = mem_size;
 
 	result = fjes_hw_alloc_shared_status_region(hw);
 	if (result)
-		return result;
+		goto free_res_buf;
 
 	hw->hw_info.buffer_share_bit = 0;
 	hw->hw_info.buffer_unshare_reserve_bit = 0;
@@ -246,11 +250,11 @@ static int fjes_hw_setup(struct fjes_hw *hw)
 
 			result = fjes_hw_alloc_epbuf(&buf_pair->tx);
 			if (result)
-				return result;
+				goto free_epbuf;
 
 			result = fjes_hw_alloc_epbuf(&buf_pair->rx);
 			if (result)
-				return result;
+				goto free_epbuf;
 
 			spin_lock_irqsave(&hw->rx_status_lock, flags);
 			fjes_hw_setup_epbuf(&buf_pair->tx, mac,
@@ -273,6 +277,25 @@ static int fjes_hw_setup(struct fjes_hw *hw)
 	fjes_hw_init_command_registers(hw, &param);
 
 	return 0;
+
+free_epbuf:
+	for (epidx = 0; epidx < hw->max_epid ; epidx++) {
+		if (epidx == hw->my_epid)
+			continue;
+		fjes_hw_free_epbuf(&hw->ep_shm_info[epidx].tx);
+		fjes_hw_free_epbuf(&hw->ep_shm_info[epidx].rx);
+	}
+	fjes_hw_free_shared_status_region(hw);
+free_res_buf:
+	kfree(hw->hw_info.res_buf);
+	hw->hw_info.res_buf = NULL;
+free_req_buf:
+	kfree(hw->hw_info.req_buf);
+	hw->hw_info.req_buf = NULL;
+free_ep_info:
+	kfree(hw->ep_shm_info);
+	hw->ep_shm_info = NULL;
+	return result;
 }
 
 static void fjes_hw_cleanup(struct fjes_hw *hw)
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 4406427d4617..273bd8a20122 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -44,7 +44,7 @@
 
 static unsigned int ring_size __ro_after_init = 128;
 module_param(ring_size, uint, 0444);
-MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
+MODULE_PARM_DESC(ring_size, "Ring buffer size (# of 4K pages)");
 unsigned int netvsc_ring_bytes __ro_after_init;
 
 static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE |
@@ -2807,7 +2807,7 @@ static int __init netvsc_drv_init(void)
 		pr_info("Increased ring_size to %u (min allowed)\n",
 			ring_size);
 	}
-	netvsc_ring_bytes = ring_size * PAGE_SIZE;
+	netvsc_ring_bytes = VMBUS_RING_SIZE(ring_size * 4096);
 
 	register_netdevice_notifier(&netvsc_netdev_notifier);
 
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index e34816638569..7f5426285c61 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -607,11 +607,26 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
 		return ERR_PTR(-EINVAL);
 	}
 
-	ret = skb_ensure_writable_head_tail(skb, dev);
-	if (unlikely(ret < 0)) {
-		macsec_txsa_put(tx_sa);
-		kfree_skb(skb);
-		return ERR_PTR(ret);
+	if (unlikely(skb_headroom(skb) < MACSEC_NEEDED_HEADROOM ||
+		     skb_tailroom(skb) < MACSEC_NEEDED_TAILROOM)) {
+		struct sk_buff *nskb = skb_copy_expand(skb,
+						       MACSEC_NEEDED_HEADROOM,
+						       MACSEC_NEEDED_TAILROOM,
+						       GFP_ATOMIC);
+		if (likely(nskb)) {
+			consume_skb(skb);
+			skb = nskb;
+		} else {
+			macsec_txsa_put(tx_sa);
+			kfree_skb(skb);
+			return ERR_PTR(-ENOMEM);
+		}
+	} else {
+		skb = skb_unshare(skb, GFP_ATOMIC);
+		if (!skb) {
+			macsec_txsa_put(tx_sa);
+			return ERR_PTR(-ENOMEM);
+		}
 	}
 
 	unprotected_len = skb->len;
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 81c20eb4b54b..dad720138baa 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -120,6 +120,11 @@
  */
 #define LAN8814_1PPM_FORMAT			17179
 
+#define PTP_RX_VERSION				0x0248
+#define PTP_TX_VERSION				0x0288
+#define PTP_MAX_VERSION(x)			(((x) & GENMASK(7, 0)) << 8)
+#define PTP_MIN_VERSION(x)			((x) & GENMASK(7, 0))
+
 #define PTP_RX_MOD				0x024F
 #define PTP_RX_MOD_BAD_UDPV4_CHKSUM_FORCE_FCS_DIS_ BIT(3)
 #define PTP_RX_TIMESTAMP_EN			0x024D
@@ -3150,6 +3155,12 @@ static void lan8814_ptp_init(struct phy_device *phydev)
 	lanphy_write_page_reg(phydev, 5, PTP_TX_PARSE_IP_ADDR_EN, 0);
 	lanphy_write_page_reg(phydev, 5, PTP_RX_PARSE_IP_ADDR_EN, 0);
 
+	/* Disable checking for minorVersionPTP field */
+	lanphy_write_page_reg(phydev, 5, PTP_RX_VERSION,
+			      PTP_MAX_VERSION(0xff) | PTP_MIN_VERSION(0x0));
+	lanphy_write_page_reg(phydev, 5, PTP_TX_VERSION,
+			      PTP_MAX_VERSION(0xff) | PTP_MIN_VERSION(0x0));
+
 	skb_queue_head_init(&ptp_priv->tx_queue);
 	skb_queue_head_init(&ptp_priv->rx_queue);
 	INIT_LIST_HEAD(&ptp_priv->rx_ts_list);
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index afa5497f7c35..4a4f8c8e79fa 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1630,13 +1630,19 @@ static int tun_xdp_act(struct tun_struct *tun, struct bpf_prog *xdp_prog,
 	switch (act) {
 	case XDP_REDIRECT:
 		err = xdp_do_redirect(tun->dev, xdp, xdp_prog);
-		if (err)
+		if (err) {
+			dev_core_stats_rx_dropped_inc(tun->dev);
 			return err;
+		}
+		dev_sw_netstats_rx_add(tun->dev, xdp->data_end - xdp->data);
 		break;
 	case XDP_TX:
 		err = tun_xdp_tx(tun->dev, xdp);
-		if (err < 0)
+		if (err < 0) {
+			dev_core_stats_rx_dropped_inc(tun->dev);
 			return err;
+		}
+		dev_sw_netstats_rx_add(tun->dev, xdp->data_end - xdp->data);
 		break;
 	case XDP_PASS:
 		break;
diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h
index 7e3b6779f4e9..02e160d831be 100644
--- a/drivers/net/wireless/ath/ath11k/core.h
+++ b/drivers/net/wireless/ath/ath11k/core.h
@@ -368,10 +368,6 @@ struct ath11k_vif {
 	struct ieee80211_chanctx_conf chanctx;
 	struct ath11k_arp_ns_offload arp_ns_offload;
 	struct ath11k_rekey_data rekey_data;
-
-#ifdef CONFIG_ATH11K_DEBUGFS
-	struct dentry *debugfs_twt;
-#endif /* CONFIG_ATH11K_DEBUGFS */
 };
 
 struct ath11k_vif_iter {
diff --git a/drivers/net/wireless/ath/ath11k/debugfs.c b/drivers/net/wireless/ath/ath11k/debugfs.c
index a847bc0d50c0..a48e737ef35d 100644
--- a/drivers/net/wireless/ath/ath11k/debugfs.c
+++ b/drivers/net/wireless/ath/ath11k/debugfs.c
@@ -1894,35 +1894,30 @@ static const struct file_operations ath11k_fops_twt_resume_dialog = {
 	.open = simple_open
 };
 
-void ath11k_debugfs_add_interface(struct ath11k_vif *arvif)
+void ath11k_debugfs_op_vif_add(struct ieee80211_hw *hw,
+			       struct ieee80211_vif *vif)
 {
+	struct ath11k_vif *arvif = ath11k_vif_to_arvif(vif);
 	struct ath11k_base *ab = arvif->ar->ab;
+	struct dentry *debugfs_twt;
 
 	if (arvif->vif->type != NL80211_IFTYPE_AP &&
 	    !(arvif->vif->type == NL80211_IFTYPE_STATION &&
 	      test_bit(WMI_TLV_SERVICE_STA_TWT, ab->wmi_ab.svc_map)))
 		return;
 
-	arvif->debugfs_twt = debugfs_create_dir("twt",
-						arvif->vif->debugfs_dir);
-	debugfs_create_file("add_dialog", 0200, arvif->debugfs_twt,
+	debugfs_twt = debugfs_create_dir("twt",
+					 arvif->vif->debugfs_dir);
+	debugfs_create_file("add_dialog", 0200, debugfs_twt,
 			    arvif, &ath11k_fops_twt_add_dialog);
 
-	debugfs_create_file("del_dialog", 0200, arvif->debugfs_twt,
+	debugfs_create_file("del_dialog", 0200, debugfs_twt,
 			    arvif, &ath11k_fops_twt_del_dialog);
 
-	debugfs_create_file("pause_dialog", 0200, arvif->debugfs_twt,
+	debugfs_create_file("pause_dialog", 0200, debugfs_twt,
 			    arvif, &ath11k_fops_twt_pause_dialog);
 
-	debugfs_create_file("resume_dialog", 0200, arvif->debugfs_twt,
+	debugfs_create_file("resume_dialog", 0200, debugfs_twt,
 			    arvif, &ath11k_fops_twt_resume_dialog);
 }
 
-void ath11k_debugfs_remove_interface(struct ath11k_vif *arvif)
-{
-	if (!arvif->debugfs_twt)
-		return;
-
-	debugfs_remove_recursive(arvif->debugfs_twt);
-	arvif->debugfs_twt = NULL;
-}
diff --git a/drivers/net/wireless/ath/ath11k/debugfs.h b/drivers/net/wireless/ath/ath11k/debugfs.h
index 44d15845f39a..a39e458637b0 100644
--- a/drivers/net/wireless/ath/ath11k/debugfs.h
+++ b/drivers/net/wireless/ath/ath11k/debugfs.h
@@ -307,8 +307,8 @@ static inline int ath11k_debugfs_rx_filter(struct ath11k *ar)
 	return ar->debug.rx_filter;
 }
 
-void ath11k_debugfs_add_interface(struct ath11k_vif *arvif);
-void ath11k_debugfs_remove_interface(struct ath11k_vif *arvif);
+void ath11k_debugfs_op_vif_add(struct ieee80211_hw *hw,
+			       struct ieee80211_vif *vif);
 void ath11k_debugfs_add_dbring_entry(struct ath11k *ar,
 				     enum wmi_direct_buffer_module id,
 				     enum ath11k_dbg_dbr_event event,
@@ -387,14 +387,6 @@ static inline int ath11k_debugfs_get_fw_stats(struct ath11k *ar,
 	return 0;
 }
 
-static inline void ath11k_debugfs_add_interface(struct ath11k_vif *arvif)
-{
-}
-
-static inline void ath11k_debugfs_remove_interface(struct ath11k_vif *arvif)
-{
-}
-
 static inline void
 ath11k_debugfs_add_dbring_entry(struct ath11k *ar,
 				enum wmi_direct_buffer_module id,
diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index db241589424d..b13525bbbb80 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -6756,13 +6756,6 @@ static int ath11k_mac_op_add_interface(struct ieee80211_hw *hw,
 		goto err;
 	}
 
-	/* In the case of hardware recovery, debugfs files are
-	 * not deleted since ieee80211_ops.remove_interface() is
-	 * not invoked. In such cases, try to delete the files.
-	 * These will be re-created later.
-	 */
-	ath11k_debugfs_remove_interface(arvif);
-
 	memset(arvif, 0, sizeof(*arvif));
 
 	arvif->ar = ar;
@@ -6939,8 +6932,6 @@ static int ath11k_mac_op_add_interface(struct ieee80211_hw *hw,
 
 	ath11k_dp_vdev_tx_attach(ar, arvif);
 
-	ath11k_debugfs_add_interface(arvif);
-
 	if (vif->type != NL80211_IFTYPE_MONITOR &&
 	    test_bit(ATH11K_FLAG_MONITOR_CONF_ENABLED, &ar->monitor_flags)) {
 		ret = ath11k_mac_monitor_vdev_create(ar);
@@ -7056,8 +7047,6 @@ err_vdev_del:
 	/* Recalc txpower for remaining vdev */
 	ath11k_mac_txpower_recalc(ar);
 
-	ath11k_debugfs_remove_interface(arvif);
-
 	/* TODO: recal traffic pause state based on the available vdevs */
 
 	mutex_unlock(&ar->conf_mutex);
@@ -9153,6 +9142,7 @@ static const struct ieee80211_ops ath11k_ops = {
 #endif
 
 #ifdef CONFIG_ATH11K_DEBUGFS
+	.vif_add_debugfs		= ath11k_debugfs_op_vif_add,
 	.sta_add_debugfs		= ath11k_debugfs_sta_op_add,
 #endif
 
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
index 3b14f6476743..72075720969c 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
  */
 #include <linux/firmware.h>
 #include "iwl-drv.h"
@@ -1096,7 +1096,7 @@ static int iwl_dbg_tlv_override_trig_node(struct iwl_fw_runtime *fwrt,
 		node_trig = (void *)node_tlv->data;
 	}
 
-	memcpy(node_trig->data + offset, trig->data, trig_data_len);
+	memcpy((u8 *)node_trig->data + offset, trig->data, trig_data_len);
 	node_tlv->length = cpu_to_le32(size);
 
 	if (policy & IWL_FW_INI_APPLY_POLICY_OVERRIDE_CFG) {
diff --git a/drivers/net/wireless/intersil/p54/fwio.c b/drivers/net/wireless/intersil/p54/fwio.c
index b52cce38115d..c4fe70e05b9b 100644
--- a/drivers/net/wireless/intersil/p54/fwio.c
+++ b/drivers/net/wireless/intersil/p54/fwio.c
@@ -125,7 +125,7 @@ int p54_parse_firmware(struct ieee80211_hw *dev, const struct firmware *fw)
 			   "FW rev %s - Softmac protocol %x.%x\n",
 			   fw_version, priv->fw_var >> 8, priv->fw_var & 0xff);
 		snprintf(dev->wiphy->fw_version, sizeof(dev->wiphy->fw_version),
-				"%s - %x.%x", fw_version,
+				"%.19s - %x.%x", fw_version,
 				priv->fw_var >> 8, priv->fw_var & 0xff);
 	}
 
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 88f760a7cbc3..d7503aef599f 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -463,12 +463,25 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
 	}
 
 	for (shinfo->nr_frags = 0; nr_slots > 0 && shinfo->nr_frags < MAX_SKB_FRAGS;
-	     shinfo->nr_frags++, gop++, nr_slots--) {
+	     nr_slots--) {
+		if (unlikely(!txp->size)) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&queue->response_lock, flags);
+			make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY);
+			push_tx_responses(queue);
+			spin_unlock_irqrestore(&queue->response_lock, flags);
+			++txp;
+			continue;
+		}
+
 		index = pending_index(queue->pending_cons++);
 		pending_idx = queue->pending_ring[index];
 		xenvif_tx_create_map_op(queue, pending_idx, txp,
 				        txp == first ? extra_count : 0, gop);
 		frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx);
+		++shinfo->nr_frags;
+		++gop;
 
 		if (txp == first)
 			txp = txfrags;
@@ -481,20 +494,39 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
 		shinfo = skb_shinfo(nskb);
 		frags = shinfo->frags;
 
-		for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots;
-		     shinfo->nr_frags++, txp++, gop++) {
+		for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; ++txp) {
+			if (unlikely(!txp->size)) {
+				unsigned long flags;
+
+				spin_lock_irqsave(&queue->response_lock, flags);
+				make_tx_response(queue, txp, 0,
+						 XEN_NETIF_RSP_OKAY);
+				push_tx_responses(queue);
+				spin_unlock_irqrestore(&queue->response_lock,
+						       flags);
+				continue;
+			}
+
 			index = pending_index(queue->pending_cons++);
 			pending_idx = queue->pending_ring[index];
 			xenvif_tx_create_map_op(queue, pending_idx, txp, 0,
 						gop);
 			frag_set_pending_idx(&frags[shinfo->nr_frags],
 					     pending_idx);
+			++shinfo->nr_frags;
+			++gop;
 		}
 
-		skb_shinfo(skb)->frag_list = nskb;
-	} else if (nskb) {
+		if (shinfo->nr_frags) {
+			skb_shinfo(skb)->frag_list = nskb;
+			nskb = NULL;
+		}
+	}
+
+	if (nskb) {
 		/* A frag_list skb was allocated but it is no longer needed
-		 * because enough slots were converted to copy ops above.
+		 * because enough slots were converted to copy ops above or some
+		 * were empty.
 		 */
 		kfree_skb(nskb);
 	}
diff --git a/drivers/nvme/common/keyring.c b/drivers/nvme/common/keyring.c
index ee341b83eeba..a5c0431c101c 100644
--- a/drivers/nvme/common/keyring.c
+++ b/drivers/nvme/common/keyring.c
@@ -111,7 +111,7 @@ static struct key *nvme_tls_psk_lookup(struct key *keyring,
  * should be preferred to 'generated' PSKs,
  * and SHA-384 should be preferred to SHA-256.
  */
-struct nvme_tls_psk_priority_list {
+static struct nvme_tls_psk_priority_list {
 	bool generated;
 	enum nvme_tcp_tls_cipher cipher;
 } nvme_tls_psk_prio[] = {
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 0af612387083..85ab0fcf9e88 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1740,13 +1740,13 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk,
 		struct nvme_ns_head *head)
 {
 	struct request_queue *queue = disk->queue;
-	u32 size = queue_logical_block_size(queue);
+	u32 max_discard_sectors;
 
-	if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX))
-		ctrl->max_discard_sectors =
-			nvme_lba_to_sect(head, ctrl->dmrsl);
-
-	if (ctrl->max_discard_sectors == 0) {
+	if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX)) {
+		max_discard_sectors = nvme_lba_to_sect(head, ctrl->dmrsl);
+	} else if (ctrl->oncs & NVME_CTRL_ONCS_DSM) {
+		max_discard_sectors = UINT_MAX;
+	} else {
 		blk_queue_max_discard_sectors(queue, 0);
 		return;
 	}
@@ -1754,14 +1754,22 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk,
 	BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
 			NVME_DSM_MAX_RANGES);
 
-	queue->limits.discard_granularity = size;
-
-	/* If discard is already enabled, don't reset queue limits */
+	/*
+	 * If discard is already enabled, don't reset queue limits.
+	 *
+	 * This works around the fact that the block layer can't cope well with
+	 * updating the hardware limits when overridden through sysfs.  This is
+	 * harmless because discard limits in NVMe are purely advisory.
+	 */
 	if (queue->limits.max_discard_sectors)
 		return;
 
-	blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors);
-	blk_queue_max_discard_segments(queue, ctrl->max_discard_segments);
+	blk_queue_max_discard_sectors(queue, max_discard_sectors);
+	if (ctrl->dmrl)
+		blk_queue_max_discard_segments(queue, ctrl->dmrl);
+	else
+		blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES);
+	queue->limits.discard_granularity = queue_logical_block_size(queue);
 
 	if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
 		blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
@@ -2930,14 +2938,6 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
 	struct nvme_id_ctrl_nvm *id;
 	int ret;
 
-	if (ctrl->oncs & NVME_CTRL_ONCS_DSM) {
-		ctrl->max_discard_sectors = UINT_MAX;
-		ctrl->max_discard_segments = NVME_DSM_MAX_RANGES;
-	} else {
-		ctrl->max_discard_sectors = 0;
-		ctrl->max_discard_segments = 0;
-	}
-
 	/*
 	 * Even though NVMe spec explicitly states that MDTS is not applicable
 	 * to the write-zeroes, we are cautious and limit the size to the
@@ -2967,8 +2967,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
 	if (ret)
 		goto free_data;
 
-	if (id->dmrl)
-		ctrl->max_discard_segments = id->dmrl;
+	ctrl->dmrl = id->dmrl;
 	ctrl->dmrsl = le32_to_cpu(id->dmrsl);
 	if (id->wzsl)
 		ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 4be7f6822966..030c80818240 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -303,14 +303,13 @@ struct nvme_ctrl {
 	u32 max_hw_sectors;
 	u32 max_segments;
 	u32 max_integrity_segments;
-	u32 max_discard_sectors;
-	u32 max_discard_segments;
 	u32 max_zeroes_sectors;
 #ifdef CONFIG_BLK_DEV_ZONED
 	u32 max_zone_append;
 #endif
 	u16 crdt[3];
 	u16 oncs;
+	u8 dmrl;
 	u32 dmrsl;
 	u16 oacs;
 	u16 sqsize;
@@ -932,6 +931,10 @@ extern struct device_attribute dev_attr_ana_grpid;
 extern struct device_attribute dev_attr_ana_state;
 extern struct device_attribute subsys_attr_iopolicy;
 
+static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
+{
+	return disk->fops == &nvme_ns_head_ops;
+}
 #else
 #define multipath false
 static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
@@ -1009,6 +1012,10 @@ static inline void nvme_mpath_start_request(struct request *rq)
 static inline void nvme_mpath_end_request(struct request *rq)
 {
 }
+static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
+{
+	return false;
+}
 #endif /* CONFIG_NVME_MULTIPATH */
 
 int nvme_revalidate_zones(struct nvme_ns *ns);
@@ -1037,7 +1044,10 @@ static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
 
 static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
 {
-	return dev_to_disk(dev)->private_data;
+	struct gendisk *disk = dev_to_disk(dev);
+
+	WARN_ON(nvme_disk_is_ns_head(disk));
+	return disk->private_data;
 }
 
 #ifdef CONFIG_NVME_HWMON
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 61af7ff1a9d6..c1d6357ec98a 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1284,6 +1284,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
 	struct request *abort_req;
 	struct nvme_command cmd = { };
 	u32 csts = readl(dev->bar + NVME_REG_CSTS);
+	u8 opcode;
 
 	/* If PCI error recovery process is happening, we cannot reset or
 	 * the recovery mechanism will surely fail.
@@ -1310,8 +1311,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
 
 	if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT) {
 		dev_warn(dev->ctrl.device,
-			 "I/O %d QID %d timeout, completion polled\n",
-			 req->tag, nvmeq->qid);
+			 "I/O tag %d (%04x) QID %d timeout, completion polled\n",
+			 req->tag, nvme_cid(req), nvmeq->qid);
 		return BLK_EH_DONE;
 	}
 
@@ -1327,8 +1328,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
 		fallthrough;
 	case NVME_CTRL_DELETING:
 		dev_warn_ratelimited(dev->ctrl.device,
-			 "I/O %d QID %d timeout, disable controller\n",
-			 req->tag, nvmeq->qid);
+			 "I/O tag %d (%04x) QID %d timeout, disable controller\n",
+			 req->tag, nvme_cid(req), nvmeq->qid);
 		nvme_req(req)->flags |= NVME_REQ_CANCELLED;
 		nvme_dev_disable(dev, true);
 		return BLK_EH_DONE;
@@ -1343,10 +1344,12 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
 	 * command was already aborted once before and still hasn't been
 	 * returned to the driver, or if this is the admin queue.
 	 */
+	opcode = nvme_req(req)->cmd->common.opcode;
 	if (!nvmeq->qid || iod->aborted) {
 		dev_warn(dev->ctrl.device,
-			 "I/O %d QID %d timeout, reset controller\n",
-			 req->tag, nvmeq->qid);
+			 "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, reset controller\n",
+			 req->tag, nvme_cid(req), opcode,
+			 nvme_opcode_str(nvmeq->qid, opcode, 0), nvmeq->qid);
 		nvme_req(req)->flags |= NVME_REQ_CANCELLED;
 		goto disable;
 	}
@@ -1362,10 +1365,10 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
 	cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
 
 	dev_warn(nvmeq->dev->ctrl.device,
-		"I/O %d (%s) QID %d timeout, aborting\n",
-		 req->tag,
-		 nvme_get_opcode_str(nvme_req(req)->cmd->common.opcode),
-		 nvmeq->qid);
+		 "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, aborting req_op:%s(%u) size:%u\n",
+		 req->tag, nvme_cid(req), opcode, nvme_get_opcode_str(opcode),
+		 nvmeq->qid, blk_op_str(req_op(req)), req_op(req),
+		 blk_rq_bytes(req));
 
 	abort_req = blk_mq_alloc_request(dev->ctrl.admin_q, nvme_req_op(&cmd),
 					 BLK_MQ_REQ_NOWAIT);
@@ -2743,10 +2746,10 @@ static void nvme_reset_work(struct work_struct *work)
 	 * controller around but remove all namespaces.
 	 */
 	if (dev->online_queues > 1) {
+		nvme_dbbuf_set(dev);
 		nvme_unquiesce_io_queues(&dev->ctrl);
 		nvme_wait_freeze(&dev->ctrl);
 		nvme_pci_update_nr_queues(dev);
-		nvme_dbbuf_set(dev);
 		nvme_unfreeze(&dev->ctrl);
 	} else {
 		dev_warn(dev->ctrl.device, "IO queues lost\n");
@@ -3408,6 +3411,8 @@ static const struct pci_device_id nvme_id_table[] = {
 		.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
 	{ PCI_DEVICE(0x1c5c, 0x174a),   /* SK Hynix P31 SSD */
 		.driver_data = NVME_QUIRK_BOGUS_NID, },
+	{ PCI_DEVICE(0x1c5c, 0x1D59),   /* SK Hynix BC901 */
+		.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
 	{ PCI_DEVICE(0x15b7, 0x2001),   /*  Sandisk Skyhawk */
 		.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
 	{ PCI_DEVICE(0x1d97, 0x2263),   /* SPCC */
diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c
index 391b1465ebfd..fc3eed00f9ff 100644
--- a/drivers/nvme/host/pr.c
+++ b/drivers/nvme/host/pr.c
@@ -98,7 +98,7 @@ static int nvme_send_pr_command(struct block_device *bdev,
 		struct nvme_command *c, void *data, unsigned int data_len)
 {
 	if (IS_ENABLED(CONFIG_NVME_MULTIPATH) &&
-	    bdev->bd_disk->fops == &nvme_ns_head_ops)
+	    nvme_disk_is_ns_head(bdev->bd_disk))
 		return nvme_send_ns_head_pr_command(bdev, c, data, data_len);
 
 	return nvme_send_ns_pr_command(bdev->bd_disk->private_data, c, data,
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index c89503da24d7..11dde0d83044 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1946,9 +1946,14 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq)
 	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
 	struct nvme_rdma_queue *queue = req->queue;
 	struct nvme_rdma_ctrl *ctrl = queue->ctrl;
-
-	dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n",
-		 rq->tag, nvme_rdma_queue_idx(queue));
+	u8 opcode = req->req.cmd->common.opcode;
+	u8 fctype = req->req.cmd->fabrics.fctype;
+	int qid = nvme_rdma_queue_idx(queue);
+
+	dev_warn(ctrl->ctrl.device,
+		 "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout\n",
+		 rq->tag, nvme_cid(rq), opcode,
+		 nvme_opcode_str(qid, opcode, fctype), qid);
 
 	if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_LIVE) {
 		/*
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index ac24ad102380..754e91111042 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -39,10 +39,9 @@ static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev)
 {
 	struct gendisk *disk = dev_to_disk(dev);
 
-	if (disk->fops == &nvme_bdev_ops)
-		return nvme_get_ns_from_dev(dev)->head;
-	else
+	if (nvme_disk_is_ns_head(disk))
 		return disk->private_data;
+	return nvme_get_ns_from_dev(dev)->head;
 }
 
 static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
@@ -233,7 +232,8 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
 	}
 #ifdef CONFIG_NVME_MULTIPATH
 	if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) {
-		if (dev_to_disk(dev)->fops != &nvme_bdev_ops) /* per-path attr */
+		/* per-path attr */
+		if (nvme_disk_is_ns_head(dev_to_disk(dev)))
 			return 0;
 		if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl))
 			return 0;
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 08805f027810..d058d990532b 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1922,14 +1922,13 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl)
 						      ctrl->opts->subsysnqn);
 		if (!pskid) {
 			dev_err(ctrl->device, "no valid PSK found\n");
-			ret = -ENOKEY;
-			goto out_free_queue;
+			return -ENOKEY;
 		}
 	}
 
 	ret = nvme_tcp_alloc_queue(ctrl, 0, pskid);
 	if (ret)
-		goto out_free_queue;
+		return ret;
 
 	ret = nvme_tcp_alloc_async_req(to_tcp_ctrl(ctrl));
 	if (ret)
@@ -2433,9 +2432,9 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq)
 	int qid = nvme_tcp_queue_id(req->queue);
 
 	dev_warn(ctrl->device,
-		"queue %d: timeout cid %#x type %d opcode %#x (%s)\n",
-		nvme_tcp_queue_id(req->queue), nvme_cid(rq), pdu->hdr.type,
-		opc, nvme_opcode_str(qid, opc, fctype));
+		 "I/O tag %d (%04x) type %d opcode %#x (%s) QID %d timeout\n",
+		 rq->tag, nvme_cid(rq), pdu->hdr.type, opc,
+		 nvme_opcode_str(qid, opc, fctype), qid);
 
 	if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) {
 		/*
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index bd59990b5250..bda7a3009e85 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -1031,7 +1031,7 @@ nvmet_fc_match_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
 	list_for_each_entry(host, &tgtport->host_list, host_list) {
 		if (host->hosthandle == hosthandle && !host->invalid) {
 			if (nvmet_fc_hostport_get(host))
-				return (host);
+				return host;
 		}
 	}
 
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index c65a73433c05..ead349af30f1 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -995,11 +995,6 @@ fcloop_nport_free(struct kref *ref)
 {
 	struct fcloop_nport *nport =
 		container_of(ref, struct fcloop_nport, ref);
-	unsigned long flags;
-
-	spin_lock_irqsave(&fcloop_lock, flags);
-	list_del(&nport->nport_list);
-	spin_unlock_irqrestore(&fcloop_lock, flags);
 
 	kfree(nport);
 }
@@ -1357,6 +1352,8 @@ __unlink_remote_port(struct fcloop_nport *nport)
 		nport->tport->remoteport = NULL;
 	nport->rport = NULL;
 
+	list_del(&nport->nport_list);
+
 	return rport;
 }
 
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 4597bca43a6d..667f9c04f35d 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -37,6 +37,8 @@
 #define NVMET_RDMA_MAX_MDTS			8
 #define NVMET_RDMA_MAX_METADATA_MDTS		5
 
+#define NVMET_RDMA_BACKLOG 128
+
 struct nvmet_rdma_srq;
 
 struct nvmet_rdma_cmd {
@@ -1583,8 +1585,19 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
 	}
 
 	if (queue->host_qid == 0) {
-		/* Let inflight controller teardown complete */
-		flush_workqueue(nvmet_wq);
+		struct nvmet_rdma_queue *q;
+		int pending = 0;
+
+		/* Check for pending controller teardown */
+		mutex_lock(&nvmet_rdma_queue_mutex);
+		list_for_each_entry(q, &nvmet_rdma_queue_list, queue_list) {
+			if (q->nvme_sq.ctrl == queue->nvme_sq.ctrl &&
+			    q->state == NVMET_RDMA_Q_DISCONNECTING)
+				pending++;
+		}
+		mutex_unlock(&nvmet_rdma_queue_mutex);
+		if (pending > NVMET_RDMA_BACKLOG)
+			return NVME_SC_CONNECT_CTRL_BUSY;
 	}
 
 	ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
@@ -1880,7 +1893,7 @@ static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port)
 		goto out_destroy_id;
 	}
 
-	ret = rdma_listen(cm_id, 128);
+	ret = rdma_listen(cm_id, NVMET_RDMA_BACKLOG);
 	if (ret) {
 		pr_err("listening to %pISpcs failed (%d)\n", addr, ret);
 		goto out_destroy_id;
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 4cc27856aa8f..6a1e6bb80062 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -24,6 +24,8 @@
 #include "nvmet.h"
 
 #define NVMET_TCP_DEF_INLINE_DATA_SIZE	(4 * PAGE_SIZE)
+#define NVMET_TCP_MAXH2CDATA		0x400000 /* 16M arbitrary limit */
+#define NVMET_TCP_BACKLOG 128
 
 static int param_store_val(const char *str, int *val, int min, int max)
 {
@@ -923,7 +925,7 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
 	icresp->hdr.pdo = 0;
 	icresp->hdr.plen = cpu_to_le32(icresp->hdr.hlen);
 	icresp->pfv = cpu_to_le16(NVME_TCP_PFV_1_0);
-	icresp->maxdata = cpu_to_le32(0x400000); /* 16M arbitrary limit */
+	icresp->maxdata = cpu_to_le32(NVMET_TCP_MAXH2CDATA);
 	icresp->cpda = 0;
 	if (queue->hdr_digest)
 		icresp->digest |= NVME_TCP_HDR_DIGEST_ENABLE;
@@ -978,13 +980,13 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
 {
 	struct nvme_tcp_data_pdu *data = &queue->pdu.data;
 	struct nvmet_tcp_cmd *cmd;
+	unsigned int exp_data_len;
 
 	if (likely(queue->nr_cmds)) {
 		if (unlikely(data->ttag >= queue->nr_cmds)) {
 			pr_err("queue %d: received out of bound ttag %u, nr_cmds %u\n",
 				queue->idx, data->ttag, queue->nr_cmds);
-			nvmet_tcp_fatal_error(queue);
-			return -EPROTO;
+			goto err_proto;
 		}
 		cmd = &queue->cmds[data->ttag];
 	} else {
@@ -995,19 +997,32 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
 		pr_err("ttag %u unexpected data offset %u (expected %u)\n",
 			data->ttag, le32_to_cpu(data->data_offset),
 			cmd->rbytes_done);
-		/* FIXME: use path and transport errors */
-		nvmet_req_complete(&cmd->req,
-			NVME_SC_INVALID_FIELD | NVME_SC_DNR);
-		return -EPROTO;
+		goto err_proto;
 	}
 
+	exp_data_len = le32_to_cpu(data->hdr.plen) -
+			nvmet_tcp_hdgst_len(queue) -
+			nvmet_tcp_ddgst_len(queue) -
+			sizeof(*data);
+
 	cmd->pdu_len = le32_to_cpu(data->data_length);
+	if (unlikely(cmd->pdu_len != exp_data_len ||
+		     cmd->pdu_len == 0 ||
+		     cmd->pdu_len > NVMET_TCP_MAXH2CDATA)) {
+		pr_err("H2CData PDU len %u is invalid\n", cmd->pdu_len);
+		goto err_proto;
+	}
 	cmd->pdu_recv = 0;
 	nvmet_tcp_build_pdu_iovec(cmd);
 	queue->cmd = cmd;
 	queue->rcv_state = NVMET_TCP_RECV_DATA;
 
 	return 0;
+
+err_proto:
+	/* FIXME: use proper transport errors */
+	nvmet_tcp_fatal_error(queue);
+	return -EPROTO;
 }
 
 static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
@@ -1768,7 +1783,7 @@ static int nvmet_tcp_try_peek_pdu(struct nvmet_tcp_queue *queue)
 		 (int)sizeof(struct nvme_tcp_icreq_pdu));
 	if (hdr->type == nvme_tcp_icreq &&
 	    hdr->hlen == sizeof(struct nvme_tcp_icreq_pdu) &&
-	    hdr->plen == (__le32)sizeof(struct nvme_tcp_icreq_pdu)) {
+	    hdr->plen == cpu_to_le32(sizeof(struct nvme_tcp_icreq_pdu))) {
 		pr_debug("queue %d: icreq detected\n",
 			 queue->idx);
 		return len;
@@ -2053,7 +2068,7 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
 		goto err_sock;
 	}
 
-	ret = kernel_listen(port->sock, 128);
+	ret = kernel_listen(port->sock, NVMET_TCP_BACKLOG);
 	if (ret) {
 		pr_err("failed to listen %d on port sock\n", ret);
 		goto err_sock;
@@ -2119,8 +2134,19 @@ static u16 nvmet_tcp_install_queue(struct nvmet_sq *sq)
 		container_of(sq, struct nvmet_tcp_queue, nvme_sq);
 
 	if (sq->qid == 0) {
-		/* Let inflight controller teardown complete */
-		flush_workqueue(nvmet_wq);
+		struct nvmet_tcp_queue *q;
+		int pending = 0;
+
+		/* Check for pending controller teardown */
+		mutex_lock(&nvmet_tcp_queue_mutex);
+		list_for_each_entry(q, &nvmet_tcp_queue_list, queue_list) {
+			if (q->nvme_sq.ctrl == sq->ctrl &&
+			    q->state == NVMET_TCP_Q_DISCONNECTING)
+				pending++;
+		}
+		mutex_unlock(&nvmet_tcp_queue_mutex);
+		if (pending > NVMET_TCP_BACKLOG)
+			return NVME_SC_CONNECT_CTRL_BUSY;
 	}
 
 	queue->nr_cmds = sq->size * 2;
diff --git a/drivers/nvme/target/trace.c b/drivers/nvme/target/trace.c
index bff454d46255..6ee1f3db81d0 100644
--- a/drivers/nvme/target/trace.c
+++ b/drivers/nvme/target/trace.c
@@ -211,7 +211,7 @@ const char *nvmet_trace_disk_name(struct trace_seq *p, char *name)
 	return ret;
 }
 
-const char *nvmet_trace_ctrl_name(struct trace_seq *p, struct nvmet_ctrl *ctrl)
+const char *nvmet_trace_ctrl_id(struct trace_seq *p, u16 ctrl_id)
 {
 	const char *ret = trace_seq_buffer_ptr(p);
 
@@ -224,8 +224,8 @@ const char *nvmet_trace_ctrl_name(struct trace_seq *p, struct nvmet_ctrl *ctrl)
 	 * If we can know the extra data of the connect command in this stage,
 	 * we can update this print statement later.
 	 */
-	if (ctrl)
-		trace_seq_printf(p, "%d", ctrl->cntlid);
+	if (ctrl_id)
+		trace_seq_printf(p, "%d", ctrl_id);
 	else
 		trace_seq_printf(p, "_");
 	trace_seq_putc(p, 0);
diff --git a/drivers/nvme/target/trace.h b/drivers/nvme/target/trace.h
index 6109b3806b12..7f7ebf9558e5 100644
--- a/drivers/nvme/target/trace.h
+++ b/drivers/nvme/target/trace.h
@@ -32,18 +32,24 @@ const char *nvmet_trace_parse_fabrics_cmd(struct trace_seq *p, u8 fctype,
 	 nvmet_trace_parse_nvm_cmd(p, opcode, cdw10) :			\
 	 nvmet_trace_parse_admin_cmd(p, opcode, cdw10)))
 
-const char *nvmet_trace_ctrl_name(struct trace_seq *p, struct nvmet_ctrl *ctrl);
-#define __print_ctrl_name(ctrl)				\
-	nvmet_trace_ctrl_name(p, ctrl)
+const char *nvmet_trace_ctrl_id(struct trace_seq *p, u16 ctrl_id);
+#define __print_ctrl_id(ctrl_id)			\
+	nvmet_trace_ctrl_id(p, ctrl_id)
 
 const char *nvmet_trace_disk_name(struct trace_seq *p, char *name);
 #define __print_disk_name(name)				\
 	nvmet_trace_disk_name(p, name)
 
 #ifndef TRACE_HEADER_MULTI_READ
-static inline struct nvmet_ctrl *nvmet_req_to_ctrl(struct nvmet_req *req)
+static inline u16 nvmet_req_to_ctrl_id(struct nvmet_req *req)
 {
-	return req->sq->ctrl;
+	/*
+	 * The queue and controller pointers are not valid until an association
+	 * has been established.
+	 */
+	if (!req->sq || !req->sq->ctrl)
+		return 0;
+	return req->sq->ctrl->cntlid;
 }
 
 static inline void __assign_req_name(char *name, struct nvmet_req *req)
@@ -53,8 +59,7 @@ static inline void __assign_req_name(char *name, struct nvmet_req *req)
 		return;
 	}
 
-	strncpy(name, req->ns->device_path,
-		min_t(size_t, DISK_NAME_LEN, strlen(req->ns->device_path)));
+	strscpy_pad(name, req->ns->device_path, DISK_NAME_LEN);
 }
 #endif
 
@@ -63,7 +68,7 @@ TRACE_EVENT(nvmet_req_init,
 	TP_ARGS(req, cmd),
 	TP_STRUCT__entry(
 		__field(struct nvme_command *, cmd)
-		__field(struct nvmet_ctrl *, ctrl)
+		__field(u16, ctrl_id)
 		__array(char, disk, DISK_NAME_LEN)
 		__field(int, qid)
 		__field(u16, cid)
@@ -76,7 +81,7 @@ TRACE_EVENT(nvmet_req_init,
 	),
 	TP_fast_assign(
 		__entry->cmd = cmd;
-		__entry->ctrl = nvmet_req_to_ctrl(req);
+		__entry->ctrl_id = nvmet_req_to_ctrl_id(req);
 		__assign_req_name(__entry->disk, req);
 		__entry->qid = req->sq->qid;
 		__entry->cid = cmd->common.command_id;
@@ -85,12 +90,12 @@ TRACE_EVENT(nvmet_req_init,
 		__entry->flags = cmd->common.flags;
 		__entry->nsid = le32_to_cpu(cmd->common.nsid);
 		__entry->metadata = le64_to_cpu(cmd->common.metadata);
-		memcpy(__entry->cdw10, &cmd->common.cdw10,
+		memcpy(__entry->cdw10, &cmd->common.cdws,
 			sizeof(__entry->cdw10));
 	),
 	TP_printk("nvmet%s: %sqid=%d, cmdid=%u, nsid=%u, flags=%#x, "
 		  "meta=%#llx, cmd=(%s, %s)",
-		__print_ctrl_name(__entry->ctrl),
+		__print_ctrl_id(__entry->ctrl_id),
 		__print_disk_name(__entry->disk),
 		__entry->qid, __entry->cid, __entry->nsid,
 		__entry->flags, __entry->metadata,
@@ -104,7 +109,7 @@ TRACE_EVENT(nvmet_req_complete,
 	TP_PROTO(struct nvmet_req *req),
 	TP_ARGS(req),
 	TP_STRUCT__entry(
-		__field(struct nvmet_ctrl *, ctrl)
+		__field(u16, ctrl_id)
 		__array(char, disk, DISK_NAME_LEN)
 		__field(int, qid)
 		__field(int, cid)
@@ -112,7 +117,7 @@ TRACE_EVENT(nvmet_req_complete,
 		__field(u16, status)
 	),
 	TP_fast_assign(
-		__entry->ctrl = nvmet_req_to_ctrl(req);
+		__entry->ctrl_id = nvmet_req_to_ctrl_id(req);
 		__entry->qid = req->cq->qid;
 		__entry->cid = req->cqe->command_id;
 		__entry->result = le64_to_cpu(req->cqe->result.u64);
@@ -120,7 +125,7 @@ TRACE_EVENT(nvmet_req_complete,
 		__assign_req_name(__entry->disk, req);
 	),
 	TP_printk("nvmet%s: %sqid=%d, cmdid=%u, res=%#llx, status=%#x",
-		__print_ctrl_name(__entry->ctrl),
+		__print_ctrl_id(__entry->ctrl_id),
 		__print_disk_name(__entry->disk),
 		__entry->qid, __entry->cid, __entry->result, __entry->status)
 
diff --git a/drivers/power/reset/as3722-poweroff.c b/drivers/power/reset/as3722-poweroff.c
index 829e0dba2fda..ab3350ce2d62 100644
--- a/drivers/power/reset/as3722-poweroff.c
+++ b/drivers/power/reset/as3722-poweroff.c
@@ -61,13 +61,11 @@ static int as3722_poweroff_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int as3722_poweroff_remove(struct platform_device *pdev)
+static void as3722_poweroff_remove(struct platform_device *pdev)
 {
 	if (pm_power_off == as3722_pm_power_off)
 		pm_power_off = NULL;
 	as3722_pm_poweroff = NULL;
-
-	return 0;
 }
 
 static struct platform_driver as3722_poweroff_driver = {
@@ -75,7 +73,7 @@ static struct platform_driver as3722_poweroff_driver = {
 		.name = "as3722-power-off",
 	},
 	.probe = as3722_poweroff_probe,
-	.remove = as3722_poweroff_remove,
+	.remove_new = as3722_poweroff_remove,
 };
 
 module_platform_driver(as3722_poweroff_driver);
diff --git a/drivers/power/reset/at91-poweroff.c b/drivers/power/reset/at91-poweroff.c
index dd5399785b69..93eece027865 100644
--- a/drivers/power/reset/at91-poweroff.c
+++ b/drivers/power/reset/at91-poweroff.c
@@ -57,7 +57,7 @@ static struct shdwc {
 	void __iomem *mpddrc_base;
 } at91_shdwc;
 
-static void __init at91_wakeup_status(struct platform_device *pdev)
+static void at91_wakeup_status(struct platform_device *pdev)
 {
 	const char *reason;
 	u32 reg = readl(at91_shdwc.shdwc_base + AT91_SHDW_SR);
@@ -149,7 +149,7 @@ static void at91_poweroff_dt_set_wakeup_mode(struct platform_device *pdev)
 	writel(wakeup_mode | mode, at91_shdwc.shdwc_base + AT91_SHDW_MR);
 }
 
-static int __init at91_poweroff_probe(struct platform_device *pdev)
+static int at91_poweroff_probe(struct platform_device *pdev)
 {
 	struct device_node *np;
 	u32 ddr_type;
@@ -202,7 +202,7 @@ clk_disable:
 	return ret;
 }
 
-static int __exit at91_poweroff_remove(struct platform_device *pdev)
+static void at91_poweroff_remove(struct platform_device *pdev)
 {
 	if (pm_power_off == at91_poweroff)
 		pm_power_off = NULL;
@@ -211,8 +211,6 @@ static int __exit at91_poweroff_remove(struct platform_device *pdev)
 		iounmap(at91_shdwc.mpddrc_base);
 
 	clk_disable_unprepare(at91_shdwc.sclk);
-
-	return 0;
 }
 
 static const struct of_device_id at91_poweroff_of_match[] = {
@@ -224,13 +222,14 @@ static const struct of_device_id at91_poweroff_of_match[] = {
 MODULE_DEVICE_TABLE(of, at91_poweroff_of_match);
 
 static struct platform_driver at91_poweroff_driver = {
-	.remove = __exit_p(at91_poweroff_remove),
+	.probe = at91_poweroff_probe,
+	.remove_new = at91_poweroff_remove,
 	.driver = {
 		.name = "at91-poweroff",
 		.of_match_table = at91_poweroff_of_match,
 	},
 };
-module_platform_driver_probe(at91_poweroff_driver, at91_poweroff_probe);
+module_platform_driver(at91_poweroff_driver);
 
 MODULE_AUTHOR("Atmel Corporation");
 MODULE_DESCRIPTION("Shutdown driver for Atmel SoCs");
diff --git a/drivers/power/reset/at91-reset.c b/drivers/power/reset/at91-reset.c
index aa9b012d3d00..16512654295f 100644
--- a/drivers/power/reset/at91-reset.c
+++ b/drivers/power/reset/at91-reset.c
@@ -337,7 +337,7 @@ static int at91_rcdev_init(struct at91_reset *reset,
 	return devm_reset_controller_register(&pdev->dev, &reset->rcdev);
 }
 
-static int __init at91_reset_probe(struct platform_device *pdev)
+static int at91_reset_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *match;
 	struct at91_reset *reset;
@@ -417,24 +417,23 @@ disable_clk:
 	return ret;
 }
 
-static int __exit at91_reset_remove(struct platform_device *pdev)
+static void at91_reset_remove(struct platform_device *pdev)
 {
 	struct at91_reset *reset = platform_get_drvdata(pdev);
 
 	unregister_restart_handler(&reset->nb);
 	clk_disable_unprepare(reset->sclk);
-
-	return 0;
 }
 
 static struct platform_driver at91_reset_driver = {
-	.remove = __exit_p(at91_reset_remove),
+	.probe = at91_reset_probe,
+	.remove_new = at91_reset_remove,
 	.driver = {
 		.name = "at91-reset",
 		.of_match_table = at91_reset_of_match,
 	},
 };
-module_platform_driver_probe(at91_reset_driver, at91_reset_probe);
+module_platform_driver(at91_reset_driver);
 
 MODULE_AUTHOR("Atmel Corporation");
 MODULE_DESCRIPTION("Reset driver for Atmel SoCs");
diff --git a/drivers/power/reset/at91-sama5d2_shdwc.c b/drivers/power/reset/at91-sama5d2_shdwc.c
index e76b102b57b1..959ce0dbe91d 100644
--- a/drivers/power/reset/at91-sama5d2_shdwc.c
+++ b/drivers/power/reset/at91-sama5d2_shdwc.c
@@ -107,7 +107,7 @@ static const unsigned long long sdwc_dbc_period[] = {
 	0, 3, 32, 512, 4096, 32768,
 };
 
-static void __init at91_wakeup_status(struct platform_device *pdev)
+static void at91_wakeup_status(struct platform_device *pdev)
 {
 	struct shdwc *shdw = platform_get_drvdata(pdev);
 	const struct reg_config *rcfg = shdw->rcfg;
@@ -329,7 +329,7 @@ static const struct of_device_id at91_pmc_ids[] = {
 	{ /* Sentinel. */ }
 };
 
-static int __init at91_shdwc_probe(struct platform_device *pdev)
+static int at91_shdwc_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *match;
 	struct device_node *np;
@@ -421,7 +421,7 @@ clk_disable:
 	return ret;
 }
 
-static int __exit at91_shdwc_remove(struct platform_device *pdev)
+static void at91_shdwc_remove(struct platform_device *pdev)
 {
 	struct shdwc *shdw = platform_get_drvdata(pdev);
 
@@ -437,18 +437,17 @@ static int __exit at91_shdwc_remove(struct platform_device *pdev)
 	iounmap(shdw->pmc_base);
 
 	clk_disable_unprepare(shdw->sclk);
-
-	return 0;
 }
 
 static struct platform_driver at91_shdwc_driver = {
-	.remove = __exit_p(at91_shdwc_remove),
+	.probe = at91_shdwc_probe,
+	.remove_new = at91_shdwc_remove,
 	.driver = {
 		.name = "at91-shdwc",
 		.of_match_table = at91_shdwc_of_match,
 	},
 };
-module_platform_driver_probe(at91_shdwc_driver, at91_shdwc_probe);
+module_platform_driver(at91_shdwc_driver);
 
 MODULE_AUTHOR("Nicolas Ferre <nicolas.ferre@atmel.com>");
 MODULE_DESCRIPTION("Atmel shutdown controller driver");
diff --git a/drivers/power/reset/atc260x-poweroff.c b/drivers/power/reset/atc260x-poweroff.c
index 98f20251a6d1..b4aa50e9685e 100644
--- a/drivers/power/reset/atc260x-poweroff.c
+++ b/drivers/power/reset/atc260x-poweroff.c
@@ -233,7 +233,7 @@ static int atc260x_pwrc_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static int atc260x_pwrc_remove(struct platform_device *pdev)
+static void atc260x_pwrc_remove(struct platform_device *pdev)
 {
 	struct atc260x_pwrc *priv = platform_get_drvdata(pdev);
 
@@ -243,13 +243,11 @@ static int atc260x_pwrc_remove(struct platform_device *pdev)
 	}
 
 	unregister_restart_handler(&priv->restart_nb);
-
-	return 0;
 }
 
 static struct platform_driver atc260x_pwrc_driver = {
 	.probe = atc260x_pwrc_probe,
-	.remove = atc260x_pwrc_remove,
+	.remove_new = atc260x_pwrc_remove,
 	.driver = {
 		.name = "atc260x-pwrc",
 	},
diff --git a/drivers/power/reset/gpio-restart.c b/drivers/power/reset/gpio-restart.c
index 3aa19765772d..d1e177176fa1 100644
--- a/drivers/power/reset/gpio-restart.c
+++ b/drivers/power/reset/gpio-restart.c
@@ -17,17 +17,14 @@
 
 struct gpio_restart {
 	struct gpio_desc *reset_gpio;
-	struct notifier_block restart_handler;
 	u32 active_delay_ms;
 	u32 inactive_delay_ms;
 	u32 wait_delay_ms;
 };
 
-static int gpio_restart_notify(struct notifier_block *this,
-				unsigned long mode, void *cmd)
+static int gpio_restart_notify(struct sys_off_data *data)
 {
-	struct gpio_restart *gpio_restart =
-		container_of(this, struct gpio_restart, restart_handler);
+	struct gpio_restart *gpio_restart = data->cb_data;
 
 	/* drive it active, also inactive->active edge */
 	gpiod_direction_output(gpio_restart->reset_gpio, 1);
@@ -52,6 +49,7 @@ static int gpio_restart_probe(struct platform_device *pdev)
 {
 	struct gpio_restart *gpio_restart;
 	bool open_source = false;
+	int priority = 129;
 	u32 property;
 	int ret;
 
@@ -71,8 +69,6 @@ static int gpio_restart_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	gpio_restart->restart_handler.notifier_call = gpio_restart_notify;
-	gpio_restart->restart_handler.priority = 129;
 	gpio_restart->active_delay_ms = 100;
 	gpio_restart->inactive_delay_ms = 100;
 	gpio_restart->wait_delay_ms = 3000;
@@ -83,7 +79,7 @@ static int gpio_restart_probe(struct platform_device *pdev)
 			dev_err(&pdev->dev, "Invalid priority property: %u\n",
 					property);
 		else
-			gpio_restart->restart_handler.priority = property;
+			priority = property;
 	}
 
 	of_property_read_u32(pdev->dev.of_node, "active-delay",
@@ -93,9 +89,11 @@ static int gpio_restart_probe(struct platform_device *pdev)
 	of_property_read_u32(pdev->dev.of_node, "wait-delay",
 			&gpio_restart->wait_delay_ms);
 
-	platform_set_drvdata(pdev, gpio_restart);
-
-	ret = register_restart_handler(&gpio_restart->restart_handler);
+	ret = devm_register_sys_off_handler(&pdev->dev,
+					    SYS_OFF_MODE_RESTART,
+					    priority,
+					    gpio_restart_notify,
+					    gpio_restart);
 	if (ret) {
 		dev_err(&pdev->dev, "%s: cannot register restart handler, %d\n",
 				__func__, ret);
@@ -105,19 +103,6 @@ static int gpio_restart_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static void gpio_restart_remove(struct platform_device *pdev)
-{
-	struct gpio_restart *gpio_restart = platform_get_drvdata(pdev);
-	int ret;
-
-	ret = unregister_restart_handler(&gpio_restart->restart_handler);
-	if (ret) {
-		dev_err(&pdev->dev,
-				"%s: cannot unregister restart handler, %d\n",
-				__func__, ret);
-	}
-}
-
 static const struct of_device_id of_gpio_restart_match[] = {
 	{ .compatible = "gpio-restart", },
 	{},
@@ -125,7 +110,6 @@ static const struct of_device_id of_gpio_restart_match[] = {
 
 static struct platform_driver gpio_restart_driver = {
 	.probe = gpio_restart_probe,
-	.remove_new = gpio_restart_remove,
 	.driver = {
 		.name = "restart-gpio",
 		.of_match_table = of_gpio_restart_match,
diff --git a/drivers/power/reset/ltc2952-poweroff.c b/drivers/power/reset/ltc2952-poweroff.c
index eea05921a054..fa25fbd53934 100644
--- a/drivers/power/reset/ltc2952-poweroff.c
+++ b/drivers/power/reset/ltc2952-poweroff.c
@@ -286,7 +286,7 @@ static int ltc2952_poweroff_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int ltc2952_poweroff_remove(struct platform_device *pdev)
+static void ltc2952_poweroff_remove(struct platform_device *pdev)
 {
 	struct ltc2952_poweroff *data = platform_get_drvdata(pdev);
 
@@ -295,7 +295,6 @@ static int ltc2952_poweroff_remove(struct platform_device *pdev)
 	hrtimer_cancel(&data->timer_wde);
 	atomic_notifier_chain_unregister(&panic_notifier_list,
 					 &data->panic_notifier);
-	return 0;
 }
 
 static const struct of_device_id of_ltc2952_poweroff_match[] = {
@@ -306,7 +305,7 @@ MODULE_DEVICE_TABLE(of, of_ltc2952_poweroff_match);
 
 static struct platform_driver ltc2952_poweroff_driver = {
 	.probe = ltc2952_poweroff_probe,
-	.remove = ltc2952_poweroff_remove,
+	.remove_new = ltc2952_poweroff_remove,
 	.driver = {
 		.name = "ltc2952-poweroff",
 		.of_match_table = of_ltc2952_poweroff_match,
diff --git a/drivers/power/reset/mt6323-poweroff.c b/drivers/power/reset/mt6323-poweroff.c
index 108167f7738b..57a63c0ab7fb 100644
--- a/drivers/power/reset/mt6323-poweroff.c
+++ b/drivers/power/reset/mt6323-poweroff.c
@@ -70,12 +70,10 @@ static int mt6323_pwrc_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int mt6323_pwrc_remove(struct platform_device *pdev)
+static void mt6323_pwrc_remove(struct platform_device *pdev)
 {
 	if (pm_power_off == &mt6323_do_pwroff)
 		pm_power_off = NULL;
-
-	return 0;
 }
 
 static const struct of_device_id mt6323_pwrc_dt_match[] = {
@@ -86,7 +84,7 @@ MODULE_DEVICE_TABLE(of, mt6323_pwrc_dt_match);
 
 static struct platform_driver mt6323_pwrc_driver = {
 	.probe          = mt6323_pwrc_probe,
-	.remove         = mt6323_pwrc_remove,
+	.remove_new     = mt6323_pwrc_remove,
 	.driver         = {
 		.name   = "mt6323-pwrc",
 		.of_match_table = mt6323_pwrc_dt_match,
diff --git a/drivers/power/reset/pwr-mlxbf.c b/drivers/power/reset/pwr-mlxbf.c
index de35d24bb7ef..1775b318d0ef 100644
--- a/drivers/power/reset/pwr-mlxbf.c
+++ b/drivers/power/reset/pwr-mlxbf.c
@@ -17,11 +17,17 @@
 #include <linux/types.h>
 
 struct pwr_mlxbf {
-	struct work_struct send_work;
+	struct work_struct reboot_work;
+	struct work_struct shutdown_work;
 	const char *hid;
 };
 
-static void pwr_mlxbf_send_work(struct work_struct *work)
+static void pwr_mlxbf_reboot_work(struct work_struct *work)
+{
+	acpi_bus_generate_netlink_event("button/reboot.*", "Reboot Button", 0x80, 1);
+}
+
+static void pwr_mlxbf_shutdown_work(struct work_struct *work)
 {
 	acpi_bus_generate_netlink_event("button/power.*", "Power Button", 0x80, 1);
 }
@@ -33,10 +39,10 @@ static irqreturn_t pwr_mlxbf_irq(int irq, void *ptr)
 	struct pwr_mlxbf *priv = ptr;
 
 	if (!strncmp(priv->hid, rst_pwr_hid, 8))
-		emergency_restart();
+		schedule_work(&priv->reboot_work);
 
 	if (!strncmp(priv->hid, low_pwr_hid, 8))
-		schedule_work(&priv->send_work);
+		schedule_work(&priv->shutdown_work);
 
 	return IRQ_HANDLED;
 }
@@ -64,7 +70,11 @@ static int pwr_mlxbf_probe(struct platform_device *pdev)
 	if (irq < 0)
 		return dev_err_probe(dev, irq, "Error getting %s irq.\n", priv->hid);
 
-	err = devm_work_autocancel(dev, &priv->send_work, pwr_mlxbf_send_work);
+	err = devm_work_autocancel(dev, &priv->shutdown_work, pwr_mlxbf_shutdown_work);
+	if (err)
+		return err;
+
+	err = devm_work_autocancel(dev, &priv->reboot_work, pwr_mlxbf_reboot_work);
 	if (err)
 		return err;
 
diff --git a/drivers/power/reset/qnap-poweroff.c b/drivers/power/reset/qnap-poweroff.c
index 0ddf7f25f7b8..e0f2ff6b147c 100644
--- a/drivers/power/reset/qnap-poweroff.c
+++ b/drivers/power/reset/qnap-poweroff.c
@@ -111,15 +111,14 @@ static int qnap_power_off_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int qnap_power_off_remove(struct platform_device *pdev)
+static void qnap_power_off_remove(struct platform_device *pdev)
 {
 	pm_power_off = NULL;
-	return 0;
 }
 
 static struct platform_driver qnap_power_off_driver = {
 	.probe	= qnap_power_off_probe,
-	.remove	= qnap_power_off_remove,
+	.remove_new = qnap_power_off_remove,
 	.driver	= {
 		.name	= "qnap_power_off",
 		.of_match_table = of_match_ptr(qnap_power_off_of_match_table),
diff --git a/drivers/power/reset/regulator-poweroff.c b/drivers/power/reset/regulator-poweroff.c
index 7f87fbb8b051..15160809c423 100644
--- a/drivers/power/reset/regulator-poweroff.c
+++ b/drivers/power/reset/regulator-poweroff.c
@@ -52,12 +52,10 @@ static int regulator_poweroff_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int regulator_poweroff_remove(__maybe_unused struct platform_device *pdev)
+static void regulator_poweroff_remove(struct platform_device *pdev)
 {
 	if (pm_power_off == &regulator_poweroff_do_poweroff)
 		pm_power_off = NULL;
-
-	return 0;
 }
 
 static const struct of_device_id of_regulator_poweroff_match[] = {
@@ -68,7 +66,7 @@ MODULE_DEVICE_TABLE(of, of_regulator_poweroff_match);
 
 static struct platform_driver regulator_poweroff_driver = {
 	.probe = regulator_poweroff_probe,
-	.remove = regulator_poweroff_remove,
+	.remove_new = regulator_poweroff_remove,
 	.driver = {
 		.name = "poweroff-regulator",
 		.of_match_table = of_regulator_poweroff_match,
diff --git a/drivers/power/reset/restart-poweroff.c b/drivers/power/reset/restart-poweroff.c
index 28f1822db162..f4d6004793d3 100644
--- a/drivers/power/reset/restart-poweroff.c
+++ b/drivers/power/reset/restart-poweroff.c
@@ -33,12 +33,10 @@ static int restart_poweroff_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int restart_poweroff_remove(struct platform_device *pdev)
+static void restart_poweroff_remove(struct platform_device *pdev)
 {
 	if (pm_power_off == &restart_poweroff_do_poweroff)
 		pm_power_off = NULL;
-
-	return 0;
 }
 
 static const struct of_device_id of_restart_poweroff_match[] = {
@@ -49,7 +47,7 @@ MODULE_DEVICE_TABLE(of, of_restart_poweroff_match);
 
 static struct platform_driver restart_poweroff_driver = {
 	.probe = restart_poweroff_probe,
-	.remove = restart_poweroff_remove,
+	.remove_new = restart_poweroff_remove,
 	.driver = {
 		.name = "poweroff-restart",
 		.of_match_table = of_restart_poweroff_match,
diff --git a/drivers/power/reset/rmobile-reset.c b/drivers/power/reset/rmobile-reset.c
index bd3b396558e0..5df9b41c68c7 100644
--- a/drivers/power/reset/rmobile-reset.c
+++ b/drivers/power/reset/rmobile-reset.c
@@ -59,11 +59,10 @@ fail_unmap:
 	return error;
 }
 
-static int rmobile_reset_remove(struct platform_device *pdev)
+static void rmobile_reset_remove(struct platform_device *pdev)
 {
 	unregister_restart_handler(&rmobile_reset_nb);
 	iounmap(sysc_base2);
-	return 0;
 }
 
 static const struct of_device_id rmobile_reset_of_match[] = {
@@ -74,7 +73,7 @@ MODULE_DEVICE_TABLE(of, rmobile_reset_of_match);
 
 static struct platform_driver rmobile_reset_driver = {
 	.probe = rmobile_reset_probe,
-	.remove = rmobile_reset_remove,
+	.remove_new = rmobile_reset_remove,
 	.driver = {
 		.name = "rmobile_reset",
 		.of_match_table = rmobile_reset_of_match,
diff --git a/drivers/power/reset/syscon-poweroff.c b/drivers/power/reset/syscon-poweroff.c
index c3aab7f59345..1b2ce7734260 100644
--- a/drivers/power/reset/syscon-poweroff.c
+++ b/drivers/power/reset/syscon-poweroff.c
@@ -76,12 +76,10 @@ static int syscon_poweroff_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int syscon_poweroff_remove(struct platform_device *pdev)
+static void syscon_poweroff_remove(struct platform_device *pdev)
 {
 	if (pm_power_off == syscon_poweroff)
 		pm_power_off = NULL;
-
-	return 0;
 }
 
 static const struct of_device_id syscon_poweroff_of_match[] = {
@@ -91,7 +89,7 @@ static const struct of_device_id syscon_poweroff_of_match[] = {
 
 static struct platform_driver syscon_poweroff_driver = {
 	.probe = syscon_poweroff_probe,
-	.remove = syscon_poweroff_remove,
+	.remove_new = syscon_poweroff_remove,
 	.driver = {
 		.name = "syscon-poweroff",
 		.of_match_table = syscon_poweroff_of_match,
diff --git a/drivers/power/reset/tps65086-restart.c b/drivers/power/reset/tps65086-restart.c
index 5ec819eac7da..ee8e9f4b837e 100644
--- a/drivers/power/reset/tps65086-restart.c
+++ b/drivers/power/reset/tps65086-restart.c
@@ -62,19 +62,21 @@ static int tps65086_restart_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int tps65086_restart_remove(struct platform_device *pdev)
+static void tps65086_restart_remove(struct platform_device *pdev)
 {
 	struct tps65086_restart *tps65086_restart = platform_get_drvdata(pdev);
 	int ret;
 
 	ret = unregister_restart_handler(&tps65086_restart->handler);
 	if (ret) {
+		/*
+		 * tps65086_restart_probe() registered the restart handler. So
+		 * unregistering should work fine. Checking the error code
+		 * shouldn't be needed, still doing it for completeness.
+		 */
 		dev_err(&pdev->dev, "%s: cannot unregister restart handler: %d\n",
 			__func__, ret);
-		return -ENODEV;
 	}
-
-	return 0;
 }
 
 static const struct platform_device_id tps65086_restart_id_table[] = {
@@ -88,7 +90,7 @@ static struct platform_driver tps65086_restart_driver = {
 		.name = "tps65086-restart",
 	},
 	.probe = tps65086_restart_probe,
-	.remove = tps65086_restart_remove,
+	.remove_new = tps65086_restart_remove,
 	.id_table = tps65086_restart_id_table,
 };
 module_platform_driver(tps65086_restart_driver);
diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c
index 1db290ee2591..2b393eb5c282 100644
--- a/drivers/power/supply/bq24190_charger.c
+++ b/drivers/power/supply/bq24190_charger.c
@@ -36,10 +36,16 @@
 #define BQ24190_REG_POC_WDT_RESET_SHIFT		6
 #define BQ24190_REG_POC_CHG_CONFIG_MASK		(BIT(5) | BIT(4))
 #define BQ24190_REG_POC_CHG_CONFIG_SHIFT	4
-#define BQ24190_REG_POC_CHG_CONFIG_DISABLE		0x0
-#define BQ24190_REG_POC_CHG_CONFIG_CHARGE		0x1
-#define BQ24190_REG_POC_CHG_CONFIG_OTG			0x2
-#define BQ24190_REG_POC_CHG_CONFIG_OTG_ALT		0x3
+#define BQ24190_REG_POC_CHG_CONFIG_DISABLE	0x0
+#define BQ24190_REG_POC_CHG_CONFIG_CHARGE	0x1
+#define BQ24190_REG_POC_CHG_CONFIG_OTG		0x2
+#define BQ24190_REG_POC_CHG_CONFIG_OTG_ALT	0x3
+#define BQ24296_REG_POC_OTG_CONFIG_MASK		BIT(5)
+#define BQ24296_REG_POC_OTG_CONFIG_SHIFT	5
+#define BQ24296_REG_POC_CHG_CONFIG_MASK		BIT(4)
+#define BQ24296_REG_POC_CHG_CONFIG_SHIFT	4
+#define BQ24296_REG_POC_OTG_CONFIG_DISABLE	0x0
+#define BQ24296_REG_POC_OTG_CONFIG_OTG		0x1
 #define BQ24190_REG_POC_SYS_MIN_MASK		(BIT(3) | BIT(2) | BIT(1))
 #define BQ24190_REG_POC_SYS_MIN_SHIFT		1
 #define BQ24190_REG_POC_SYS_MIN_MIN			3000
@@ -134,59 +140,24 @@
 #define BQ24190_REG_F_BAT_FAULT_SHIFT		3
 #define BQ24190_REG_F_NTC_FAULT_MASK		(BIT(2) | BIT(1) | BIT(0))
 #define BQ24190_REG_F_NTC_FAULT_SHIFT		0
+#define BQ24296_REG_F_NTC_FAULT_MASK		(BIT(1) | BIT(0))
+#define BQ24296_REG_F_NTC_FAULT_SHIFT		0
 
 #define BQ24190_REG_VPRS	0x0A /* Vendor/Part/Revision Status */
 #define BQ24190_REG_VPRS_PN_MASK		(BIT(5) | BIT(4) | BIT(3))
 #define BQ24190_REG_VPRS_PN_SHIFT		3
-#define BQ24190_REG_VPRS_PN_24190			0x4
-#define BQ24190_REG_VPRS_PN_24192			0x5 /* Also 24193, 24196 */
-#define BQ24190_REG_VPRS_PN_24192I			0x3
+#define BQ24190_REG_VPRS_PN_24190		0x4
+#define BQ24190_REG_VPRS_PN_24192		0x5 /* Also 24193, 24196 */
+#define BQ24190_REG_VPRS_PN_24192I		0x3
+#define BQ24296_REG_VPRS_PN_MASK		(BIT(7) | BIT(6) | BIT(5))
+#define BQ24296_REG_VPRS_PN_SHIFT		5
+#define BQ24296_REG_VPRS_PN_24296		0x1
 #define BQ24190_REG_VPRS_TS_PROFILE_MASK	BIT(2)
 #define BQ24190_REG_VPRS_TS_PROFILE_SHIFT	2
 #define BQ24190_REG_VPRS_DEV_REG_MASK		(BIT(1) | BIT(0))
 #define BQ24190_REG_VPRS_DEV_REG_SHIFT		0
 
 /*
- * The FAULT register is latched by the bq24190 (except for NTC_FAULT)
- * so the first read after a fault returns the latched value and subsequent
- * reads return the current value.  In order to return the fault status
- * to the user, have the interrupt handler save the reg's value and retrieve
- * it in the appropriate health/status routine.
- */
-struct bq24190_dev_info {
-	struct i2c_client		*client;
-	struct device			*dev;
-	struct extcon_dev		*edev;
-	struct power_supply		*charger;
-	struct power_supply		*battery;
-	struct delayed_work		input_current_limit_work;
-	char				model_name[I2C_NAME_SIZE];
-	bool				initialized;
-	bool				irq_event;
-	bool				otg_vbus_enabled;
-	int				charge_type;
-	u16				sys_min;
-	u16				iprechg;
-	u16				iterm;
-	u32				ichg;
-	u32				ichg_max;
-	u32				vreg;
-	u32				vreg_max;
-	struct mutex			f_reg_lock;
-	u8				f_reg;
-	u8				ss_reg;
-	u8				watchdog;
-};
-
-static int bq24190_charger_set_charge_type(struct bq24190_dev_info *bdi,
-					   const union power_supply_propval *val);
-
-static const unsigned int bq24190_usb_extcon_cable[] = {
-	EXTCON_USB,
-	EXTCON_NONE,
-};
-
-/*
  * The tables below provide a 2-way mapping for the value that goes in
  * the register field and the real-world value that it represents.
  * The index of the array is the value that goes in the register; the
@@ -211,6 +182,9 @@ static const int bq24190_ccc_ichg_values[] = {
 	4096000, 4160000, 4224000, 4288000, 4352000, 4416000, 4480000, 4544000
 };
 
+/* ICHG higher than 3008mA is not supported in BQ24296 */
+#define BQ24296_CCC_ICHG_VALUES_LEN	40
+
 /* REG04[7:2] (VREG) in uV */
 static const int bq24190_cvc_vreg_values[] = {
 	3504000, 3520000, 3536000, 3552000, 3568000, 3584000, 3600000, 3616000,
@@ -228,6 +202,68 @@ static const int bq24190_ictrc_treg_values[] = {
 	600, 800, 1000, 1200
 };
 
+enum bq24190_chip {
+	BQ24190,
+	BQ24192,
+	BQ24192i,
+	BQ24196,
+	BQ24296,
+};
+
+/*
+ * The FAULT register is latched by the bq24190 (except for NTC_FAULT)
+ * so the first read after a fault returns the latched value and subsequent
+ * reads return the current value.  In order to return the fault status
+ * to the user, have the interrupt handler save the reg's value and retrieve
+ * it in the appropriate health/status routine.
+ */
+struct bq24190_dev_info {
+	struct i2c_client		*client;
+	struct device			*dev;
+	struct extcon_dev		*edev;
+	struct power_supply		*charger;
+	struct power_supply		*battery;
+	struct delayed_work		input_current_limit_work;
+	char				model_name[I2C_NAME_SIZE];
+	bool				initialized;
+	bool				irq_event;
+	bool				otg_vbus_enabled;
+	int				charge_type;
+	u16				sys_min;
+	u16				iprechg;
+	u16				iterm;
+	u32				ichg;
+	u32				ichg_max;
+	u32				vreg;
+	u32				vreg_max;
+	struct mutex			f_reg_lock;
+	u8				f_reg;
+	u8				ss_reg;
+	u8				watchdog;
+	const struct bq24190_chip_info	*info;
+};
+
+struct bq24190_chip_info {
+	int ichg_array_size;
+#ifdef CONFIG_REGULATOR
+	const struct regulator_desc *vbus_desc;
+#endif
+	int (*check_chip)(struct bq24190_dev_info *bdi);
+	int (*set_chg_config)(struct bq24190_dev_info *bdi, const u8 chg_config);
+	int (*set_otg_vbus)(struct bq24190_dev_info *bdi, bool enable);
+	u8 ntc_fault_mask;
+	int (*get_ntc_status)(const u8 value);
+};
+
+static int bq24190_charger_set_charge_type(struct bq24190_dev_info *bdi,
+					   const union power_supply_propval *val);
+
+static const unsigned int bq24190_usb_extcon_cable[] = {
+	EXTCON_USB,
+	EXTCON_NONE,
+};
+
+
 /*
  * Return the index in 'tbl' of greatest value that is less than or equal to
  * 'val'.  The index range returned is 0 to 'tbl_size' - 1.  Assumes that
@@ -529,6 +565,43 @@ static int bq24190_set_otg_vbus(struct bq24190_dev_info *bdi, bool enable)
 	return ret;
 }
 
+static int bq24296_set_otg_vbus(struct bq24190_dev_info *bdi, bool enable)
+{
+	int ret;
+
+	ret = pm_runtime_resume_and_get(bdi->dev);
+	if (ret < 0) {
+		dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", ret);
+		return ret;
+	}
+
+	bdi->otg_vbus_enabled = enable;
+	if (enable) {
+		ret = bq24190_write_mask(bdi, BQ24190_REG_POC,
+					 BQ24296_REG_POC_CHG_CONFIG_MASK,
+					 BQ24296_REG_POC_CHG_CONFIG_SHIFT,
+					 BQ24190_REG_POC_CHG_CONFIG_DISABLE);
+
+		if (ret < 0)
+			goto out;
+
+		ret = bq24190_write_mask(bdi, BQ24190_REG_POC,
+					 BQ24296_REG_POC_OTG_CONFIG_MASK,
+					 BQ24296_REG_POC_CHG_CONFIG_SHIFT,
+					 BQ24296_REG_POC_OTG_CONFIG_OTG);
+	} else
+		ret = bq24190_write_mask(bdi, BQ24190_REG_POC,
+					 BQ24296_REG_POC_OTG_CONFIG_MASK,
+					 BQ24296_REG_POC_CHG_CONFIG_SHIFT,
+					 BQ24296_REG_POC_OTG_CONFIG_DISABLE);
+
+out:
+	pm_runtime_mark_last_busy(bdi->dev);
+	pm_runtime_put_autosuspend(bdi->dev);
+
+	return ret;
+}
+
 #ifdef CONFIG_REGULATOR
 static int bq24190_vbus_enable(struct regulator_dev *dev)
 {
@@ -567,6 +640,43 @@ static int bq24190_vbus_is_enabled(struct regulator_dev *dev)
 	return bdi->otg_vbus_enabled;
 }
 
+static int bq24296_vbus_enable(struct regulator_dev *dev)
+{
+	return bq24296_set_otg_vbus(rdev_get_drvdata(dev), true);
+}
+
+static int bq24296_vbus_disable(struct regulator_dev *dev)
+{
+	return bq24296_set_otg_vbus(rdev_get_drvdata(dev), false);
+}
+
+static int bq24296_vbus_is_enabled(struct regulator_dev *dev)
+{
+	struct bq24190_dev_info *bdi = rdev_get_drvdata(dev);
+	int ret;
+	u8 val;
+
+	ret = pm_runtime_resume_and_get(bdi->dev);
+	if (ret < 0) {
+		dev_warn(bdi->dev, "pm_runtime_get failed: %i\n", ret);
+		return ret;
+	}
+
+	ret = bq24190_read_mask(bdi, BQ24190_REG_POC,
+				BQ24296_REG_POC_OTG_CONFIG_MASK,
+				BQ24296_REG_POC_OTG_CONFIG_SHIFT, &val);
+
+	pm_runtime_mark_last_busy(bdi->dev);
+	pm_runtime_put_autosuspend(bdi->dev);
+
+	if (ret)
+		return ret;
+
+	bdi->otg_vbus_enabled = (val == BQ24296_REG_POC_OTG_CONFIG_OTG);
+
+	return bdi->otg_vbus_enabled;
+}
+
 static const struct regulator_ops bq24190_vbus_ops = {
 	.enable = bq24190_vbus_enable,
 	.disable = bq24190_vbus_disable,
@@ -583,6 +693,22 @@ static const struct regulator_desc bq24190_vbus_desc = {
 	.n_voltages = 1,
 };
 
+static const struct regulator_ops bq24296_vbus_ops = {
+	.enable = bq24296_vbus_enable,
+	.disable = bq24296_vbus_disable,
+	.is_enabled = bq24296_vbus_is_enabled,
+};
+
+static const struct regulator_desc bq24296_vbus_desc = {
+	.name = "usb_otg_vbus",
+	.of_match = "usb-otg-vbus",
+	.type = REGULATOR_VOLTAGE,
+	.owner = THIS_MODULE,
+	.ops = &bq24296_vbus_ops,
+	.fixed_uV = 5000000,
+	.n_voltages = 1,
+};
+
 static const struct regulator_init_data bq24190_vbus_init_data = {
 	.constraints = {
 		.valid_ops_mask = REGULATOR_CHANGE_STATUS,
@@ -602,7 +728,7 @@ static int bq24190_register_vbus_regulator(struct bq24190_dev_info *bdi)
 	else
 		cfg.init_data = &bq24190_vbus_init_data;
 	cfg.driver_data = bdi;
-	reg = devm_regulator_register(bdi->dev, &bq24190_vbus_desc, &cfg);
+	reg = devm_regulator_register(bdi->dev, bdi->info->vbus_desc, &cfg);
 	if (IS_ERR(reg)) {
 		ret = PTR_ERR(reg);
 		dev_err(bdi->dev, "Can't register regulator: %d\n", ret);
@@ -678,7 +804,7 @@ static int bq24190_set_config(struct bq24190_dev_info *bdi)
 					    BQ24190_REG_CCC_ICHG_MASK,
 					    BQ24190_REG_CCC_ICHG_SHIFT,
 					    bq24190_ccc_ichg_values,
-					    ARRAY_SIZE(bq24190_ccc_ichg_values),
+					    bdi->info->ichg_array_size,
 					    bdi->ichg);
 		if (ret < 0)
 			return ret;
@@ -777,6 +903,24 @@ static int bq24190_charger_get_charge_type(struct bq24190_dev_info *bdi,
 	return 0;
 }
 
+static int bq24190_battery_set_chg_config(struct bq24190_dev_info *bdi,
+		const u8 chg_config)
+{
+	return bq24190_write_mask(bdi, BQ24190_REG_POC,
+			BQ24190_REG_POC_CHG_CONFIG_MASK,
+			BQ24190_REG_POC_CHG_CONFIG_SHIFT,
+			chg_config);
+}
+
+static int bq24296_battery_set_chg_config(struct bq24190_dev_info *bdi,
+		const u8 chg_config)
+{
+	return bq24190_write_mask(bdi, BQ24190_REG_POC,
+			BQ24296_REG_POC_CHG_CONFIG_MASK,
+			BQ24296_REG_POC_CHG_CONFIG_SHIFT,
+			chg_config);
+}
+
 static int bq24190_charger_set_charge_type(struct bq24190_dev_info *bdi,
 		const union power_supply_propval *val)
 {
@@ -835,9 +979,50 @@ static int bq24190_charger_set_charge_type(struct bq24190_dev_info *bdi,
 			return ret;
 	}
 
-	return bq24190_write_mask(bdi, BQ24190_REG_POC,
-			BQ24190_REG_POC_CHG_CONFIG_MASK,
-			BQ24190_REG_POC_CHG_CONFIG_SHIFT, chg_config);
+	return bdi->info->set_chg_config(bdi, chg_config);
+}
+
+static int bq24190_charger_get_ntc_status(u8 value)
+{
+	int health;
+
+	switch (value >> BQ24190_REG_F_NTC_FAULT_SHIFT & 0x7) {
+	case 0x1: /* TS1  Cold */
+	case 0x3: /* TS2  Cold */
+	case 0x5: /* Both Cold */
+		health = POWER_SUPPLY_HEALTH_COLD;
+		break;
+	case 0x2: /* TS1  Hot */
+	case 0x4: /* TS2  Hot */
+	case 0x6: /* Both Hot */
+		health = POWER_SUPPLY_HEALTH_OVERHEAT;
+		break;
+	default:
+		health = POWER_SUPPLY_HEALTH_UNKNOWN;
+	}
+
+	return health;
+}
+
+static int bq24296_charger_get_ntc_status(u8 value)
+{
+	int health;
+
+	switch (value >> BQ24296_REG_F_NTC_FAULT_SHIFT & 0x3) {
+	case 0x0: /* Normal */
+		health = POWER_SUPPLY_HEALTH_GOOD;
+		break;
+	case 0x1: /* Hot */
+		health = POWER_SUPPLY_HEALTH_OVERHEAT;
+		break;
+	case 0x2: /* Cold */
+		health = POWER_SUPPLY_HEALTH_COLD;
+		break;
+	default:
+		health = POWER_SUPPLY_HEALTH_UNKNOWN;
+	}
+
+	return health;
 }
 
 static int bq24190_charger_get_health(struct bq24190_dev_info *bdi,
@@ -850,21 +1035,8 @@ static int bq24190_charger_get_health(struct bq24190_dev_info *bdi,
 	v = bdi->f_reg;
 	mutex_unlock(&bdi->f_reg_lock);
 
-	if (v & BQ24190_REG_F_NTC_FAULT_MASK) {
-		switch (v >> BQ24190_REG_F_NTC_FAULT_SHIFT & 0x7) {
-		case 0x1: /* TS1  Cold */
-		case 0x3: /* TS2  Cold */
-		case 0x5: /* Both Cold */
-			health = POWER_SUPPLY_HEALTH_COLD;
-			break;
-		case 0x2: /* TS1  Hot */
-		case 0x4: /* TS2  Hot */
-		case 0x6: /* Both Hot */
-			health = POWER_SUPPLY_HEALTH_OVERHEAT;
-			break;
-		default:
-			health = POWER_SUPPLY_HEALTH_UNKNOWN;
-		}
+	if (v & bdi->info->ntc_fault_mask) {
+		health = bdi->info->get_ntc_status(v);
 	} else if (v & BQ24190_REG_F_BAT_FAULT_MASK) {
 		health = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
 	} else if (v & BQ24190_REG_F_CHRG_FAULT_MASK) {
@@ -1015,7 +1187,7 @@ static int bq24190_charger_get_current(struct bq24190_dev_info *bdi,
 	ret = bq24190_get_field_val(bdi, BQ24190_REG_CCC,
 			BQ24190_REG_CCC_ICHG_MASK, BQ24190_REG_CCC_ICHG_SHIFT,
 			bq24190_ccc_ichg_values,
-			ARRAY_SIZE(bq24190_ccc_ichg_values), &curr);
+			bdi->info->ichg_array_size, &curr);
 	if (ret < 0)
 		return ret;
 
@@ -1055,7 +1227,7 @@ static int bq24190_charger_set_current(struct bq24190_dev_info *bdi,
 	ret = bq24190_set_field_val(bdi, BQ24190_REG_CCC,
 			BQ24190_REG_CCC_ICHG_MASK, BQ24190_REG_CCC_ICHG_SHIFT,
 			bq24190_ccc_ichg_values,
-			ARRAY_SIZE(bq24190_ccc_ichg_values), curr);
+			bdi->info->ichg_array_size, curr);
 	if (ret < 0)
 		return ret;
 
@@ -1395,26 +1567,9 @@ static int bq24190_battery_get_health(struct bq24190_dev_info *bdi,
 	if (v & BQ24190_REG_F_BAT_FAULT_MASK) {
 		health = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
 	} else {
-		v &= BQ24190_REG_F_NTC_FAULT_MASK;
-		v >>= BQ24190_REG_F_NTC_FAULT_SHIFT;
+		v &= bdi->info->ntc_fault_mask;
 
-		switch (v) {
-		case 0x0: /* Normal */
-			health = POWER_SUPPLY_HEALTH_GOOD;
-			break;
-		case 0x1: /* TS1 Cold */
-		case 0x3: /* TS2 Cold */
-		case 0x5: /* Both Cold */
-			health = POWER_SUPPLY_HEALTH_COLD;
-			break;
-		case 0x2: /* TS1 Hot */
-		case 0x4: /* TS2 Hot */
-		case 0x6: /* Both Hot */
-			health = POWER_SUPPLY_HEALTH_OVERHEAT;
-			break;
-		default:
-			health = POWER_SUPPLY_HEALTH_UNKNOWN;
-		}
+		health = v ? bdi->info->get_ntc_status(v) : POWER_SUPPLY_HEALTH_GOOD;
 	}
 
 	val->intval = health;
@@ -1601,12 +1756,13 @@ static int bq24190_configure_usb_otg(struct bq24190_dev_info *bdi, u8 ss_reg)
 static void bq24190_check_status(struct bq24190_dev_info *bdi)
 {
 	const u8 battery_mask_ss = BQ24190_REG_SS_CHRG_STAT_MASK;
-	const u8 battery_mask_f = BQ24190_REG_F_BAT_FAULT_MASK
-				| BQ24190_REG_F_NTC_FAULT_MASK;
+	u8 battery_mask_f = BQ24190_REG_F_BAT_FAULT_MASK;
 	bool alert_charger = false, alert_battery = false;
 	u8 ss_reg = 0, f_reg = 0;
 	int i, ret;
 
+	battery_mask_f |= bdi->info->ntc_fault_mask;
+
 	ret = bq24190_read(bdi, BQ24190_REG_SS, &ss_reg);
 	if (ret < 0) {
 		dev_err(bdi->dev, "Can't read SS reg: %d\n", ret);
@@ -1633,7 +1789,7 @@ static void bq24190_check_status(struct bq24190_dev_info *bdi)
 			!!(f_reg & BQ24190_REG_F_BOOST_FAULT_MASK),
 			!!(f_reg & BQ24190_REG_F_CHRG_FAULT_MASK),
 			!!(f_reg & BQ24190_REG_F_BAT_FAULT_MASK),
-			!!(f_reg & BQ24190_REG_F_NTC_FAULT_MASK));
+			!!(f_reg & bdi->info->ntc_fault_mask));
 
 		mutex_lock(&bdi->f_reg_lock);
 		if ((bdi->f_reg & battery_mask_f) != (f_reg & battery_mask_f))
@@ -1696,12 +1852,11 @@ static irqreturn_t bq24190_irq_handler_thread(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static int bq24190_hw_init(struct bq24190_dev_info *bdi)
+static int bq24190_check_chip(struct bq24190_dev_info *bdi)
 {
 	u8 v;
 	int ret;
 
-	/* First check that the device really is what its supposed to be */
 	ret = bq24190_read_mask(bdi, BQ24190_REG_VPRS,
 			BQ24190_REG_VPRS_PN_MASK,
 			BQ24190_REG_VPRS_PN_SHIFT,
@@ -1719,6 +1874,40 @@ static int bq24190_hw_init(struct bq24190_dev_info *bdi)
 		return -ENODEV;
 	}
 
+	return 0;
+}
+
+static int bq24296_check_chip(struct bq24190_dev_info *bdi)
+{
+	u8 v;
+	int ret;
+
+	ret = bq24190_read_mask(bdi, BQ24190_REG_VPRS,
+			BQ24296_REG_VPRS_PN_MASK,
+			BQ24296_REG_VPRS_PN_SHIFT,
+			&v);
+	if (ret < 0)
+		return ret;
+
+	switch (v) {
+	case BQ24296_REG_VPRS_PN_24296:
+		break;
+	default:
+		dev_err(bdi->dev, "Error unknown model: 0x%02x\n", v);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static int bq24190_hw_init(struct bq24190_dev_info *bdi)
+{
+	int ret;
+
+	ret = bdi->info->check_chip(bdi);
+	if (ret < 0)
+		return ret;
+
 	ret = bq24190_register_reset(bdi);
 	if (ret < 0)
 		return ret;
@@ -1736,7 +1925,8 @@ static int bq24190_get_config(struct bq24190_dev_info *bdi)
 	struct power_supply_battery_info *info;
 	int v, idx;
 
-	idx = ARRAY_SIZE(bq24190_ccc_ichg_values) - 1;
+	idx = bdi->info->ichg_array_size - 1;
+
 	bdi->ichg_max = bq24190_ccc_ichg_values[idx];
 
 	idx = ARRAY_SIZE(bq24190_cvc_vreg_values) - 1;
@@ -1781,6 +1971,64 @@ static int bq24190_get_config(struct bq24190_dev_info *bdi)
 	return 0;
 }
 
+static const struct bq24190_chip_info bq24190_chip_info_tbl[] = {
+	[BQ24190] = {
+		.ichg_array_size = ARRAY_SIZE(bq24190_ccc_ichg_values),
+#ifdef CONFIG_REGULATOR
+		.vbus_desc = &bq24190_vbus_desc,
+#endif
+		.check_chip = bq24190_check_chip,
+		.set_chg_config = bq24190_battery_set_chg_config,
+		.ntc_fault_mask = BQ24190_REG_F_NTC_FAULT_MASK,
+		.get_ntc_status = bq24190_charger_get_ntc_status,
+		.set_otg_vbus = bq24190_set_otg_vbus,
+	},
+	[BQ24192] = {
+		.ichg_array_size = ARRAY_SIZE(bq24190_ccc_ichg_values),
+#ifdef CONFIG_REGULATOR
+		.vbus_desc = &bq24190_vbus_desc,
+#endif
+		.check_chip = bq24190_check_chip,
+		.set_chg_config = bq24190_battery_set_chg_config,
+		.ntc_fault_mask = BQ24190_REG_F_NTC_FAULT_MASK,
+		.get_ntc_status = bq24190_charger_get_ntc_status,
+		.set_otg_vbus = bq24190_set_otg_vbus,
+	},
+	[BQ24192i] = {
+		.ichg_array_size = ARRAY_SIZE(bq24190_ccc_ichg_values),
+#ifdef CONFIG_REGULATOR
+		.vbus_desc = &bq24190_vbus_desc,
+#endif
+		.check_chip = bq24190_check_chip,
+		.set_chg_config = bq24190_battery_set_chg_config,
+		.ntc_fault_mask = BQ24190_REG_F_NTC_FAULT_MASK,
+		.get_ntc_status = bq24190_charger_get_ntc_status,
+		.set_otg_vbus = bq24190_set_otg_vbus,
+	},
+	[BQ24196] = {
+		.ichg_array_size = ARRAY_SIZE(bq24190_ccc_ichg_values),
+#ifdef CONFIG_REGULATOR
+		.vbus_desc = &bq24190_vbus_desc,
+#endif
+		.check_chip = bq24190_check_chip,
+		.set_chg_config = bq24190_battery_set_chg_config,
+		.ntc_fault_mask = BQ24190_REG_F_NTC_FAULT_MASK,
+		.get_ntc_status = bq24190_charger_get_ntc_status,
+		.set_otg_vbus = bq24190_set_otg_vbus,
+	},
+	[BQ24296] = {
+		.ichg_array_size = BQ24296_CCC_ICHG_VALUES_LEN,
+#ifdef CONFIG_REGULATOR
+		.vbus_desc = &bq24296_vbus_desc,
+#endif
+		.check_chip = bq24296_check_chip,
+		.set_chg_config = bq24296_battery_set_chg_config,
+		.ntc_fault_mask = BQ24296_REG_F_NTC_FAULT_MASK,
+		.get_ntc_status = bq24296_charger_get_ntc_status,
+		.set_otg_vbus = bq24296_set_otg_vbus,
+	},
+};
+
 static int bq24190_probe(struct i2c_client *client)
 {
 	const struct i2c_device_id *id = i2c_client_get_device_id(client);
@@ -1804,6 +2052,7 @@ static int bq24190_probe(struct i2c_client *client)
 	bdi->client = client;
 	bdi->dev = dev;
 	strscpy(bdi->model_name, id->name, sizeof(bdi->model_name));
+	bdi->info = i2c_get_match_data(client);
 	mutex_init(&bdi->f_reg_lock);
 	bdi->charge_type = POWER_SUPPLY_CHARGE_TYPE_FAST;
 	bdi->f_reg = 0;
@@ -1940,7 +2189,7 @@ static void bq24190_shutdown(struct i2c_client *client)
 	struct bq24190_dev_info *bdi = i2c_get_clientdata(client);
 
 	/* Turn off 5V boost regulator on shutdown */
-	bq24190_set_otg_vbus(bdi, false);
+	bdi->info->set_otg_vbus(bdi, false);
 }
 
 static __maybe_unused int bq24190_runtime_suspend(struct device *dev)
@@ -2029,19 +2278,21 @@ static const struct dev_pm_ops bq24190_pm_ops = {
 };
 
 static const struct i2c_device_id bq24190_i2c_ids[] = {
-	{ "bq24190" },
-	{ "bq24192" },
-	{ "bq24192i" },
-	{ "bq24196" },
+	{ "bq24190", (kernel_ulong_t)&bq24190_chip_info_tbl[BQ24190] },
+	{ "bq24192", (kernel_ulong_t)&bq24190_chip_info_tbl[BQ24192] },
+	{ "bq24192i", (kernel_ulong_t)&bq24190_chip_info_tbl[BQ24192i] },
+	{ "bq24196", (kernel_ulong_t)&bq24190_chip_info_tbl[BQ24196] },
+	{ "bq24296", (kernel_ulong_t)&bq24190_chip_info_tbl[BQ24296] },
 	{ },
 };
 MODULE_DEVICE_TABLE(i2c, bq24190_i2c_ids);
 
 static const struct of_device_id bq24190_of_match[] = {
-	{ .compatible = "ti,bq24190", },
-	{ .compatible = "ti,bq24192", },
-	{ .compatible = "ti,bq24192i", },
-	{ .compatible = "ti,bq24196", },
+	{ .compatible = "ti,bq24190", .data = &bq24190_chip_info_tbl[BQ24190] },
+	{ .compatible = "ti,bq24192", .data = &bq24190_chip_info_tbl[BQ24192] },
+	{ .compatible = "ti,bq24192i", .data = &bq24190_chip_info_tbl[BQ24192i] },
+	{ .compatible = "ti,bq24196", .data = &bq24190_chip_info_tbl[BQ24196] },
+	{ .compatible = "ti,bq24296", .data = &bq24190_chip_info_tbl[BQ24296] },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, bq24190_of_match);
diff --git a/drivers/power/supply/bq256xx_charger.c b/drivers/power/supply/bq256xx_charger.c
index 789a31bd70c3..1a935bc88510 100644
--- a/drivers/power/supply/bq256xx_charger.c
+++ b/drivers/power/supply/bq256xx_charger.c
@@ -1574,13 +1574,16 @@ static int bq256xx_hw_init(struct bq256xx_device *bq)
 			wd_reg_val = i;
 			break;
 		}
-		if (bq->watchdog_timer > bq256xx_watchdog_time[i] &&
+		if (i + 1 < BQ256XX_NUM_WD_VAL &&
+		    bq->watchdog_timer > bq256xx_watchdog_time[i] &&
 		    bq->watchdog_timer < bq256xx_watchdog_time[i + 1])
 			wd_reg_val = i;
 	}
 	ret = regmap_update_bits(bq->regmap, BQ256XX_CHARGER_CONTROL_1,
 				 BQ256XX_WATCHDOG_MASK, wd_reg_val <<
 						BQ256XX_WDT_BIT_SHIFT);
+	if (ret)
+		return ret;
 
 	ret = power_supply_get_battery_info(bq->charger, &bat_info);
 	if (ret == -ENOMEM)
diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c
index 4296600e8912..1c4a9d137744 100644
--- a/drivers/power/supply/bq27xxx_battery.c
+++ b/drivers/power/supply/bq27xxx_battery.c
@@ -2162,6 +2162,28 @@ void bq27xxx_battery_teardown(struct bq27xxx_device_info *di)
 }
 EXPORT_SYMBOL_GPL(bq27xxx_battery_teardown);
 
+#ifdef CONFIG_PM_SLEEP
+static int bq27xxx_battery_suspend(struct device *dev)
+{
+	struct bq27xxx_device_info *di = dev_get_drvdata(dev);
+
+	cancel_delayed_work(&di->work);
+	return 0;
+}
+
+static int bq27xxx_battery_resume(struct device *dev)
+{
+	struct bq27xxx_device_info *di = dev_get_drvdata(dev);
+
+	schedule_delayed_work(&di->work, 0);
+	return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+SIMPLE_DEV_PM_OPS(bq27xxx_battery_battery_pm_ops,
+		  bq27xxx_battery_suspend, bq27xxx_battery_resume);
+EXPORT_SYMBOL_GPL(bq27xxx_battery_battery_pm_ops);
+
 MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>");
 MODULE_DESCRIPTION("BQ27xxx battery monitor driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c
index 9b5475590518..3a1798b0c1a7 100644
--- a/drivers/power/supply/bq27xxx_battery_i2c.c
+++ b/drivers/power/supply/bq27xxx_battery_i2c.c
@@ -295,6 +295,7 @@ static struct i2c_driver bq27xxx_battery_i2c_driver = {
 	.driver = {
 		.name = "bq27xxx-battery",
 		.of_match_table = of_match_ptr(bq27xxx_battery_i2c_of_match_table),
+		.pm = &bq27xxx_battery_battery_pm_ops,
 	},
 	.probe = bq27xxx_battery_i2c_probe,
 	.remove = bq27xxx_battery_i2c_remove,
diff --git a/drivers/power/supply/cw2015_battery.c b/drivers/power/supply/cw2015_battery.c
index bb29e9ebd24a..99f3ccdc30a6 100644
--- a/drivers/power/supply/cw2015_battery.c
+++ b/drivers/power/supply/cw2015_battery.c
@@ -491,7 +491,7 @@ static int cw_battery_get_property(struct power_supply *psy,
 
 	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW:
 		if (cw_battery_valid_time_to_empty(cw_bat))
-			val->intval = cw_bat->time_to_empty;
+			val->intval = cw_bat->time_to_empty * 60;
 		else
 			val->intval = 0;
 		break;
diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c
index 73265001dd4b..ecef35ac3b7e 100644
--- a/drivers/power/supply/power_supply_core.c
+++ b/drivers/power/supply/power_supply_core.c
@@ -861,44 +861,44 @@ const size_t power_supply_battery_info_properties_size = ARRAY_SIZE(power_supply
 EXPORT_SYMBOL_GPL(power_supply_battery_info_properties_size);
 
 bool power_supply_battery_info_has_prop(struct power_supply_battery_info *info,
-				        enum power_supply_property psp)
+					enum power_supply_property psp)
 {
 	if (!info)
 		return false;
 
 	switch (psp) {
-		case POWER_SUPPLY_PROP_TECHNOLOGY:
-			return info->technology != POWER_SUPPLY_TECHNOLOGY_UNKNOWN;
-		case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
-			return info->energy_full_design_uwh >= 0;
-		case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
-			return info->charge_full_design_uah >= 0;
-		case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
-			return info->voltage_min_design_uv >= 0;
-		case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
-			return info->voltage_max_design_uv >= 0;
-		case POWER_SUPPLY_PROP_PRECHARGE_CURRENT:
-			return info->precharge_current_ua >= 0;
-		case POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT:
-			return info->charge_term_current_ua >= 0;
-		case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX:
-			return info->constant_charge_current_max_ua >= 0;
-		case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX:
-			return info->constant_charge_voltage_max_uv >= 0;
-		case POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MIN:
-			return info->temp_ambient_alert_min > INT_MIN;
-		case POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MAX:
-			return info->temp_ambient_alert_max < INT_MAX;
-		case POWER_SUPPLY_PROP_TEMP_ALERT_MIN:
-			return info->temp_alert_min > INT_MIN;
-		case POWER_SUPPLY_PROP_TEMP_ALERT_MAX:
-			return info->temp_alert_max < INT_MAX;
-		case POWER_SUPPLY_PROP_TEMP_MIN:
-			return info->temp_min > INT_MIN;
-		case POWER_SUPPLY_PROP_TEMP_MAX:
-			return info->temp_max < INT_MAX;
-		default:
-			return false;
+	case POWER_SUPPLY_PROP_TECHNOLOGY:
+		return info->technology != POWER_SUPPLY_TECHNOLOGY_UNKNOWN;
+	case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
+		return info->energy_full_design_uwh >= 0;
+	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
+		return info->charge_full_design_uah >= 0;
+	case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
+		return info->voltage_min_design_uv >= 0;
+	case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
+		return info->voltage_max_design_uv >= 0;
+	case POWER_SUPPLY_PROP_PRECHARGE_CURRENT:
+		return info->precharge_current_ua >= 0;
+	case POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT:
+		return info->charge_term_current_ua >= 0;
+	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX:
+		return info->constant_charge_current_max_ua >= 0;
+	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX:
+		return info->constant_charge_voltage_max_uv >= 0;
+	case POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MIN:
+		return info->temp_ambient_alert_min > INT_MIN;
+	case POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MAX:
+		return info->temp_ambient_alert_max < INT_MAX;
+	case POWER_SUPPLY_PROP_TEMP_ALERT_MIN:
+		return info->temp_alert_min > INT_MIN;
+	case POWER_SUPPLY_PROP_TEMP_ALERT_MAX:
+		return info->temp_alert_max < INT_MAX;
+	case POWER_SUPPLY_PROP_TEMP_MIN:
+		return info->temp_min > INT_MIN;
+	case POWER_SUPPLY_PROP_TEMP_MAX:
+		return info->temp_max < INT_MAX;
+	default:
+		return false;
 	}
 }
 EXPORT_SYMBOL_GPL(power_supply_battery_info_has_prop);
@@ -914,53 +914,53 @@ int power_supply_battery_info_get_prop(struct power_supply_battery_info *info,
 		return -EINVAL;
 
 	switch (psp) {
-		case POWER_SUPPLY_PROP_TECHNOLOGY:
-			val->intval = info->technology;
-			return 0;
-		case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
-			val->intval = info->energy_full_design_uwh;
-			return 0;
-		case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
-			val->intval = info->charge_full_design_uah;
-			return 0;
-		case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
-			val->intval = info->voltage_min_design_uv;
-			return 0;
-		case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
-			val->intval = info->voltage_max_design_uv;
-			return 0;
-		case POWER_SUPPLY_PROP_PRECHARGE_CURRENT:
-			val->intval = info->precharge_current_ua;
-			return 0;
-		case POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT:
-			val->intval = info->charge_term_current_ua;
-			return 0;
-		case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX:
-			val->intval = info->constant_charge_current_max_ua;
-			return 0;
-		case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX:
-			val->intval = info->constant_charge_voltage_max_uv;
-			return 0;
-		case POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MIN:
-			val->intval = info->temp_ambient_alert_min;
-			return 0;
-		case POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MAX:
-			val->intval = info->temp_ambient_alert_max;
-			return 0;
-		case POWER_SUPPLY_PROP_TEMP_ALERT_MIN:
-			val->intval = info->temp_alert_min;
-			return 0;
-		case POWER_SUPPLY_PROP_TEMP_ALERT_MAX:
-			val->intval = info->temp_alert_max;
-			return 0;
-		case POWER_SUPPLY_PROP_TEMP_MIN:
-			val->intval = info->temp_min;
-			return 0;
-		case POWER_SUPPLY_PROP_TEMP_MAX:
-			val->intval = info->temp_max;
-			return 0;
-		default:
-			return -EINVAL;
+	case POWER_SUPPLY_PROP_TECHNOLOGY:
+		val->intval = info->technology;
+		return 0;
+	case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
+		val->intval = info->energy_full_design_uwh;
+		return 0;
+	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
+		val->intval = info->charge_full_design_uah;
+		return 0;
+	case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
+		val->intval = info->voltage_min_design_uv;
+		return 0;
+	case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
+		val->intval = info->voltage_max_design_uv;
+		return 0;
+	case POWER_SUPPLY_PROP_PRECHARGE_CURRENT:
+		val->intval = info->precharge_current_ua;
+		return 0;
+	case POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT:
+		val->intval = info->charge_term_current_ua;
+		return 0;
+	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX:
+		val->intval = info->constant_charge_current_max_ua;
+		return 0;
+	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX:
+		val->intval = info->constant_charge_voltage_max_uv;
+		return 0;
+	case POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MIN:
+		val->intval = info->temp_ambient_alert_min;
+		return 0;
+	case POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MAX:
+		val->intval = info->temp_ambient_alert_max;
+		return 0;
+	case POWER_SUPPLY_PROP_TEMP_ALERT_MIN:
+		val->intval = info->temp_alert_min;
+		return 0;
+	case POWER_SUPPLY_PROP_TEMP_ALERT_MAX:
+		val->intval = info->temp_alert_max;
+		return 0;
+	case POWER_SUPPLY_PROP_TEMP_MIN:
+		val->intval = info->temp_min;
+		return 0;
+	case POWER_SUPPLY_PROP_TEMP_MAX:
+		val->intval = info->temp_max;
+		return 0;
+	default:
+		return -EINVAL;
 	}
 }
 EXPORT_SYMBOL_GPL(power_supply_battery_info_get_prop);
@@ -1255,6 +1255,7 @@ EXPORT_SYMBOL_GPL(power_supply_powers);
 static void power_supply_dev_release(struct device *dev)
 {
 	struct power_supply *psy = to_power_supply(dev);
+
 	dev_dbg(dev, "%s\n", __func__);
 	kfree(psy);
 }
@@ -1636,6 +1637,6 @@ subsys_initcall(power_supply_class_init);
 module_exit(power_supply_class_exit);
 
 MODULE_DESCRIPTION("Universal power supply monitor class");
-MODULE_AUTHOR("Ian Molton <spyro@f2s.com>, "
-	      "Szabolcs Gyurko, "
-	      "Anton Vorontsov <cbou@mail.ru>");
+MODULE_AUTHOR("Ian Molton <spyro@f2s.com>");
+MODULE_AUTHOR("Szabolcs Gyurko");
+MODULE_AUTHOR("Anton Vorontsov <cbou@mail.ru>");
diff --git a/drivers/power/supply/qcom_battmgr.c b/drivers/power/supply/qcom_battmgr.c
index ec163d1bcd18..a12e2a66d516 100644
--- a/drivers/power/supply/qcom_battmgr.c
+++ b/drivers/power/supply/qcom_battmgr.c
@@ -282,6 +282,7 @@ struct qcom_battmgr_wireless {
 
 struct qcom_battmgr {
 	struct device *dev;
+	struct auxiliary_device *adev;
 	struct pmic_glink_client *client;
 
 	enum qcom_battmgr_variant variant;
@@ -1293,11 +1294,69 @@ static void qcom_battmgr_enable_worker(struct work_struct *work)
 		dev_err(battmgr->dev, "failed to request power notifications\n");
 }
 
+static char *qcom_battmgr_battery[] = { "battery" };
+
+static void qcom_battmgr_register_psy(struct qcom_battmgr *battmgr)
+{
+	struct power_supply_config psy_cfg_supply = {};
+	struct auxiliary_device *adev = battmgr->adev;
+	struct power_supply_config psy_cfg = {};
+	struct device *dev = &adev->dev;
+
+	psy_cfg.drv_data = battmgr;
+	psy_cfg.of_node = adev->dev.of_node;
+
+	psy_cfg_supply.drv_data = battmgr;
+	psy_cfg_supply.of_node = adev->dev.of_node;
+	psy_cfg_supply.supplied_to = qcom_battmgr_battery;
+	psy_cfg_supply.num_supplicants = 1;
+
+	if (battmgr->variant == QCOM_BATTMGR_SC8280XP) {
+		battmgr->bat_psy = devm_power_supply_register(dev, &sc8280xp_bat_psy_desc, &psy_cfg);
+		if (IS_ERR(battmgr->bat_psy))
+			dev_err(dev, "failed to register battery power supply (%ld)\n",
+				PTR_ERR(battmgr->bat_psy));
+
+		battmgr->ac_psy = devm_power_supply_register(dev, &sc8280xp_ac_psy_desc, &psy_cfg_supply);
+		if (IS_ERR(battmgr->ac_psy))
+			dev_err(dev, "failed to register AC power supply (%ld)\n",
+				PTR_ERR(battmgr->ac_psy));
+
+		battmgr->usb_psy = devm_power_supply_register(dev, &sc8280xp_usb_psy_desc, &psy_cfg_supply);
+		if (IS_ERR(battmgr->usb_psy))
+			dev_err(dev, "failed to register USB power supply (%ld)\n",
+				PTR_ERR(battmgr->usb_psy));
+
+		battmgr->wls_psy = devm_power_supply_register(dev, &sc8280xp_wls_psy_desc, &psy_cfg_supply);
+		if (IS_ERR(battmgr->wls_psy))
+			dev_err(dev, "failed to register wireless charing power supply (%ld)\n",
+				PTR_ERR(battmgr->wls_psy));
+	} else {
+		battmgr->bat_psy = devm_power_supply_register(dev, &sm8350_bat_psy_desc, &psy_cfg);
+		if (IS_ERR(battmgr->bat_psy))
+			dev_err(dev, "failed to register battery power supply (%ld)\n",
+				PTR_ERR(battmgr->bat_psy));
+
+		battmgr->usb_psy = devm_power_supply_register(dev, &sm8350_usb_psy_desc, &psy_cfg_supply);
+		if (IS_ERR(battmgr->usb_psy))
+			dev_err(dev, "failed to register USB power supply (%ld)\n",
+				PTR_ERR(battmgr->usb_psy));
+
+		battmgr->wls_psy = devm_power_supply_register(dev, &sm8350_wls_psy_desc, &psy_cfg_supply);
+		if (IS_ERR(battmgr->wls_psy))
+			dev_err(dev, "failed to register wireless charing power supply (%ld)\n",
+				PTR_ERR(battmgr->wls_psy));
+	}
+}
+
 static void qcom_battmgr_pdr_notify(void *priv, int state)
 {
 	struct qcom_battmgr *battmgr = priv;
 
 	if (state == SERVREG_SERVICE_STATE_UP) {
+		if (!battmgr->bat_psy)
+			qcom_battmgr_register_psy(battmgr);
+
 		battmgr->service_up = true;
 		schedule_work(&battmgr->enable_work);
 	} else {
@@ -1312,13 +1371,9 @@ static const struct of_device_id qcom_battmgr_of_variants[] = {
 	{}
 };
 
-static char *qcom_battmgr_battery[] = { "battery" };
-
 static int qcom_battmgr_probe(struct auxiliary_device *adev,
 			      const struct auxiliary_device_id *id)
 {
-	struct power_supply_config psy_cfg_supply = {};
-	struct power_supply_config psy_cfg = {};
 	const struct of_device_id *match;
 	struct qcom_battmgr *battmgr;
 	struct device *dev = &adev->dev;
@@ -1328,14 +1383,7 @@ static int qcom_battmgr_probe(struct auxiliary_device *adev,
 		return -ENOMEM;
 
 	battmgr->dev = dev;
-
-	psy_cfg.drv_data = battmgr;
-	psy_cfg.of_node = adev->dev.of_node;
-
-	psy_cfg_supply.drv_data = battmgr;
-	psy_cfg_supply.of_node = adev->dev.of_node;
-	psy_cfg_supply.supplied_to = qcom_battmgr_battery;
-	psy_cfg_supply.num_supplicants = 1;
+	battmgr->adev = adev;
 
 	INIT_WORK(&battmgr->enable_work, qcom_battmgr_enable_worker);
 	mutex_init(&battmgr->lock);
@@ -1347,43 +1395,6 @@ static int qcom_battmgr_probe(struct auxiliary_device *adev,
 	else
 		battmgr->variant = QCOM_BATTMGR_SM8350;
 
-	if (battmgr->variant == QCOM_BATTMGR_SC8280XP) {
-		battmgr->bat_psy = devm_power_supply_register(dev, &sc8280xp_bat_psy_desc, &psy_cfg);
-		if (IS_ERR(battmgr->bat_psy))
-			return dev_err_probe(dev, PTR_ERR(battmgr->bat_psy),
-					     "failed to register battery power supply\n");
-
-		battmgr->ac_psy = devm_power_supply_register(dev, &sc8280xp_ac_psy_desc, &psy_cfg_supply);
-		if (IS_ERR(battmgr->ac_psy))
-			return dev_err_probe(dev, PTR_ERR(battmgr->ac_psy),
-					     "failed to register AC power supply\n");
-
-		battmgr->usb_psy = devm_power_supply_register(dev, &sc8280xp_usb_psy_desc, &psy_cfg_supply);
-		if (IS_ERR(battmgr->usb_psy))
-			return dev_err_probe(dev, PTR_ERR(battmgr->usb_psy),
-					     "failed to register USB power supply\n");
-
-		battmgr->wls_psy = devm_power_supply_register(dev, &sc8280xp_wls_psy_desc, &psy_cfg_supply);
-		if (IS_ERR(battmgr->wls_psy))
-			return dev_err_probe(dev, PTR_ERR(battmgr->wls_psy),
-					     "failed to register wireless charing power supply\n");
-	} else {
-		battmgr->bat_psy = devm_power_supply_register(dev, &sm8350_bat_psy_desc, &psy_cfg);
-		if (IS_ERR(battmgr->bat_psy))
-			return dev_err_probe(dev, PTR_ERR(battmgr->bat_psy),
-					     "failed to register battery power supply\n");
-
-		battmgr->usb_psy = devm_power_supply_register(dev, &sm8350_usb_psy_desc, &psy_cfg_supply);
-		if (IS_ERR(battmgr->usb_psy))
-			return dev_err_probe(dev, PTR_ERR(battmgr->usb_psy),
-					     "failed to register USB power supply\n");
-
-		battmgr->wls_psy = devm_power_supply_register(dev, &sm8350_wls_psy_desc, &psy_cfg_supply);
-		if (IS_ERR(battmgr->wls_psy))
-			return dev_err_probe(dev, PTR_ERR(battmgr->wls_psy),
-					     "failed to register wireless charing power supply\n");
-	}
-
 	battmgr->client = devm_pmic_glink_register_client(dev,
 							  PMIC_GLINK_OWNER_BATTMGR,
 							  qcom_battmgr_callback,
diff --git a/drivers/power/supply/qcom_pmi8998_charger.c b/drivers/power/supply/qcom_pmi8998_charger.c
index 8acf63ee6897..9bb777406013 100644
--- a/drivers/power/supply/qcom_pmi8998_charger.c
+++ b/drivers/power/supply/qcom_pmi8998_charger.c
@@ -972,10 +972,14 @@ static int smb2_probe(struct platform_device *pdev)
 	supply_config.of_node = pdev->dev.of_node;
 
 	desc = devm_kzalloc(chip->dev, sizeof(smb2_psy_desc), GFP_KERNEL);
+	if (!desc)
+		return -ENOMEM;
 	memcpy(desc, &smb2_psy_desc, sizeof(smb2_psy_desc));
 	desc->name =
 		devm_kasprintf(chip->dev, GFP_KERNEL, "%s-charger",
 			       (const char *)device_get_match_data(chip->dev));
+	if (!desc->name)
+		return -ENOMEM;
 
 	chip->chg_psy =
 		devm_power_supply_register(chip->dev, desc, &supply_config);
diff --git a/drivers/scsi/fcoe/fcoe_sysfs.c b/drivers/scsi/fcoe/fcoe_sysfs.c
index 408a806bf4c2..c64a085a7ee2 100644
--- a/drivers/scsi/fcoe/fcoe_sysfs.c
+++ b/drivers/scsi/fcoe/fcoe_sysfs.c
@@ -263,6 +263,7 @@ static ssize_t store_ctlr_mode(struct device *dev,
 			       const char *buf, size_t count)
 {
 	struct fcoe_ctlr_device *ctlr = dev_to_ctlr(dev);
+	int res;
 
 	if (count > FCOE_MAX_MODENAME_LEN)
 		return -EINVAL;
@@ -279,12 +280,13 @@ static ssize_t store_ctlr_mode(struct device *dev,
 			return -ENOTSUPP;
 		}
 
-		ctlr->mode = sysfs_match_string(fip_conn_type_names, buf);
-		if (ctlr->mode < 0 || ctlr->mode == FIP_CONN_TYPE_UNKNOWN) {
+		res = sysfs_match_string(fip_conn_type_names, buf);
+		if (res < 0 || res == FIP_CONN_TYPE_UNKNOWN) {
 			LIBFCOE_SYSFS_DBG(ctlr, "Unknown mode %s provided.\n",
 					  buf);
 			return -EINVAL;
 		}
+		ctlr->mode = res;
 
 		ctlr->f->set_fcoe_ctlr_mode(ctlr);
 		LIBFCOE_SYSFS_DBG(ctlr, "Mode changed to %s.\n", buf);
diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index 4d6db4509e75..8d7fc5284293 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -546,6 +546,7 @@ int fnic_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *sc)
 	if (fnic->sw_copy_wq[hwq].io_req_table[blk_mq_unique_tag_to_tag(mqtag)] != NULL) {
 		WARN(1, "fnic<%d>: %s: hwq: %d tag 0x%x already exists\n",
 				fnic->fnic_num, __func__, hwq, blk_mq_unique_tag_to_tag(mqtag));
+		spin_unlock_irqrestore(&fnic->wq_copy_lock[hwq], flags);
 		return SCSI_MLQUEUE_HOST_BUSY;
 	}
 
diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c
index d8c57a0a518f..528f19f782f2 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_fw.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c
@@ -475,7 +475,7 @@ int mpi3mr_process_admin_reply_q(struct mpi3mr_ioc *mrioc)
  * @op_reply_q: op_reply_qinfo object
  * @reply_ci: operational reply descriptor's queue consumer index
  *
- * Returns reply descriptor frame address
+ * Returns: reply descriptor frame address
  */
 static inline struct mpi3_default_reply_descriptor *
 mpi3mr_get_reply_desc(struct op_reply_qinfo *op_reply_q, u32 reply_ci)
@@ -1063,7 +1063,6 @@ enum mpi3mr_iocstate mpi3mr_get_iocstate(struct mpi3mr_ioc *mrioc)
  * @mrioc: Adapter instance reference
  *
  * Free the DMA memory allocated for IOCTL handling purpose.
-
  *
  * Return: None
  */
@@ -1106,7 +1105,6 @@ static void mpi3mr_free_ioctl_dma_memory(struct mpi3mr_ioc *mrioc)
 /**
  * mpi3mr_alloc_ioctl_dma_memory - Alloc memory for ioctl dma
  * @mrioc: Adapter instance reference
-
  *
  * This function allocates dmaable memory required to handle the
  * application issued MPI3 IOCTL requests.
@@ -1241,7 +1239,7 @@ static int mpi3mr_issue_and_process_mur(struct mpi3mr_ioc *mrioc,
  * during reset/resume
  * @mrioc: Adapter instance reference
  *
- * Return zero if the new IOCFacts parameters value is compatible with
+ * Return: zero if the new IOCFacts parameters value is compatible with
  * older values else return -EPERM
  */
 static int
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 885a7d5df3b9..79da4b1c1df0 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -2197,15 +2197,18 @@ void scsi_eh_flush_done_q(struct list_head *done_q)
 	struct scsi_cmnd *scmd, *next;
 
 	list_for_each_entry_safe(scmd, next, done_q, eh_entry) {
+		struct scsi_device *sdev = scmd->device;
+
 		list_del_init(&scmd->eh_entry);
-		if (scsi_device_online(scmd->device) &&
-		    !scsi_noretry_cmd(scmd) && scsi_cmd_retry_allowed(scmd) &&
-			scsi_eh_should_retry_cmd(scmd)) {
+		if (scsi_device_online(sdev) && !scsi_noretry_cmd(scmd) &&
+		    scsi_cmd_retry_allowed(scmd) &&
+		    scsi_eh_should_retry_cmd(scmd)) {
 			SCSI_LOG_ERROR_RECOVERY(3,
 				scmd_printk(KERN_INFO, scmd,
 					     "%s: flush retry cmd\n",
 					     current->comm));
 				scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);
+				blk_mq_kick_requeue_list(sdev->request_queue);
 		} else {
 			/*
 			 * If just we got sense for the device (called
diff --git a/drivers/scsi/smartpqi/smartpqi.h b/drivers/scsi/smartpqi/smartpqi.h
index 041940183516..cdedc271857a 100644
--- a/drivers/scsi/smartpqi/smartpqi.h
+++ b/drivers/scsi/smartpqi/smartpqi.h
@@ -1347,7 +1347,6 @@ struct pqi_ctrl_info {
 	bool		controller_online;
 	bool		block_requests;
 	bool		scan_blocked;
-	u8		logical_volume_rescan_needed : 1;
 	u8		inbound_spanning_supported : 1;
 	u8		outbound_spanning_supported : 1;
 	u8		pqi_mode_enabled : 1;
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index 9a58df9312fa..ceff1ec13f9e 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -33,11 +33,11 @@
 #define BUILD_TIMESTAMP
 #endif
 
-#define DRIVER_VERSION		"2.1.24-046"
+#define DRIVER_VERSION		"2.1.26-030"
 #define DRIVER_MAJOR		2
 #define DRIVER_MINOR		1
-#define DRIVER_RELEASE		24
-#define DRIVER_REVISION		46
+#define DRIVER_RELEASE		26
+#define DRIVER_REVISION		30
 
 #define DRIVER_NAME		"Microchip SmartPQI Driver (v" \
 				DRIVER_VERSION BUILD_TIMESTAMP ")"
@@ -2093,8 +2093,6 @@ static void pqi_scsi_update_device(struct pqi_ctrl_info *ctrl_info,
 		if (existing_device->devtype == TYPE_DISK) {
 			existing_device->raid_level = new_device->raid_level;
 			existing_device->volume_status = new_device->volume_status;
-			if (ctrl_info->logical_volume_rescan_needed)
-				existing_device->rescan = true;
 			memset(existing_device->next_bypass_group, 0, sizeof(existing_device->next_bypass_group));
 			if (!pqi_raid_maps_equal(existing_device->raid_map, new_device->raid_map)) {
 				kfree(existing_device->raid_map);
@@ -2164,6 +2162,20 @@ static inline void pqi_init_device_tmf_work(struct pqi_scsi_dev *device)
 		INIT_WORK(&tmf_work->work_struct, pqi_tmf_worker);
 }
 
+static inline bool pqi_volume_rescan_needed(struct pqi_scsi_dev *device)
+{
+	if (pqi_device_in_remove(device))
+		return false;
+
+	if (device->sdev == NULL)
+		return false;
+
+	if (!scsi_device_online(device->sdev))
+		return false;
+
+	return device->rescan;
+}
+
 static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info,
 	struct pqi_scsi_dev *new_device_list[], unsigned int num_new_devices)
 {
@@ -2284,9 +2296,13 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info,
 		if (device->sdev && device->queue_depth != device->advertised_queue_depth) {
 			device->advertised_queue_depth = device->queue_depth;
 			scsi_change_queue_depth(device->sdev, device->advertised_queue_depth);
-			if (device->rescan) {
-				scsi_rescan_device(device->sdev);
+			spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags);
+			if (pqi_volume_rescan_needed(device)) {
 				device->rescan = false;
+				spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags);
+				scsi_rescan_device(device->sdev);
+			} else {
+				spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags);
 			}
 		}
 	}
@@ -2308,8 +2324,6 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info,
 		}
 	}
 
-	ctrl_info->logical_volume_rescan_needed = false;
-
 }
 
 static inline bool pqi_is_supported_device(struct pqi_scsi_dev *device)
@@ -3702,6 +3716,21 @@ static bool pqi_ofa_process_event(struct pqi_ctrl_info *ctrl_info,
 	return ack_event;
 }
 
+static void pqi_mark_volumes_for_rescan(struct pqi_ctrl_info *ctrl_info)
+{
+	unsigned long flags;
+	struct pqi_scsi_dev *device;
+
+	spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags);
+
+	list_for_each_entry(device, &ctrl_info->scsi_device_list, scsi_device_list_entry) {
+		if (pqi_is_logical_device(device) && device->devtype == TYPE_DISK)
+			device->rescan = true;
+	}
+
+	spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags);
+}
+
 static void pqi_disable_raid_bypass(struct pqi_ctrl_info *ctrl_info)
 {
 	unsigned long flags;
@@ -3742,7 +3771,7 @@ static void pqi_event_worker(struct work_struct *work)
 				ack_event = true;
 				rescan_needed = true;
 				if (event->event_type == PQI_EVENT_TYPE_LOGICAL_DEVICE)
-					ctrl_info->logical_volume_rescan_needed = true;
+					pqi_mark_volumes_for_rescan(ctrl_info);
 				else if (event->event_type == PQI_EVENT_TYPE_AIO_STATE_CHANGE)
 					pqi_disable_raid_bypass(ctrl_info);
 			}
@@ -10144,6 +10173,18 @@ static const struct pci_device_id pqi_pci_id_table[] = {
 	},
 	{
 		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       0x1137, 0x02f8)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       0x1137, 0x02f9)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       0x1137, 0x02fa)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
 				0x1e93, 0x1000)
 	},
 	{
@@ -10200,6 +10241,34 @@ static const struct pci_device_id pqi_pci_id_table[] = {
 	},
 	{
 		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       0x1f51, 0x100e)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       0x1f51, 0x100f)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       0x1f51, 0x1010)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       0x1f51, 0x1011)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       0x1f51, 0x1043)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       0x1f51, 0x1044)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       0x1f51, 0x1045)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
 			       PCI_ANY_ID, PCI_ANY_ID)
 	},
 	{ 0 }
diff --git a/drivers/spi/spi-coldfire-qspi.c b/drivers/spi/spi-coldfire-qspi.c
index f0b630fe16c3..b341b6908df0 100644
--- a/drivers/spi/spi-coldfire-qspi.c
+++ b/drivers/spi/spi-coldfire-qspi.c
@@ -441,7 +441,6 @@ static void mcfqspi_remove(struct platform_device *pdev)
 	mcfqspi_wr_qmr(mcfqspi, MCFQSPI_QMR_MSTR);
 
 	mcfqspi_cs_teardown(mcfqspi);
-	clk_disable_unprepare(mcfqspi->clk);
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 506193e870c4..7a85e6477e46 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -147,7 +147,6 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd)
 	struct se_session *se_sess = se_cmd->se_sess;
 	struct se_node_acl *nacl = se_sess->se_node_acl;
 	struct se_tmr_req *se_tmr = se_cmd->se_tmr_req;
-	unsigned long flags;
 
 	rcu_read_lock();
 	deve = target_nacl_find_deve(nacl, se_cmd->orig_fe_lun);
@@ -178,10 +177,6 @@ out_unlock:
 	se_cmd->se_dev = rcu_dereference_raw(se_lun->lun_se_dev);
 	se_tmr->tmr_dev = rcu_dereference_raw(se_lun->lun_se_dev);
 
-	spin_lock_irqsave(&se_tmr->tmr_dev->se_tmr_lock, flags);
-	list_add_tail(&se_tmr->tmr_list, &se_tmr->tmr_dev->dev_tmr_list);
-	spin_unlock_irqrestore(&se_tmr->tmr_dev->se_tmr_lock, flags);
-
 	return 0;
 }
 EXPORT_SYMBOL(transport_lookup_tmr_lun);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 670cfb7bd426..73d0d6133ac8 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -3629,6 +3629,10 @@ int transport_generic_handle_tmr(
 	unsigned long flags;
 	bool aborted = false;
 
+	spin_lock_irqsave(&cmd->se_dev->se_tmr_lock, flags);
+	list_add_tail(&cmd->se_tmr_req->tmr_list, &cmd->se_dev->dev_tmr_list);
+	spin_unlock_irqrestore(&cmd->se_dev->se_tmr_lock, flags);
+
 	spin_lock_irqsave(&cmd->t_state_lock, flags);
 	if (cmd->transport_state & CMD_T_ABORTED) {
 		aborted = true;
diff --git a/drivers/thermal/loongson2_thermal.c b/drivers/thermal/loongson2_thermal.c
index 99ca0c7bc41c..0f475fe46bc9 100644
--- a/drivers/thermal/loongson2_thermal.c
+++ b/drivers/thermal/loongson2_thermal.c
@@ -8,9 +8,10 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/minmax.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 #include <linux/thermal.h>
 #include <linux/units.h>
 #include "thermal_hwmon.h"
diff --git a/drivers/tty/hvc/Kconfig b/drivers/tty/hvc/Kconfig
index 4f9264d005c0..6e05c5c7bca1 100644
--- a/drivers/tty/hvc/Kconfig
+++ b/drivers/tty/hvc/Kconfig
@@ -108,7 +108,7 @@ config HVC_DCC_SERIALIZE_SMP
 
 config HVC_RISCV_SBI
 	bool "RISC-V SBI console support"
-	depends on RISCV_SBI_V01
+	depends on RISCV_SBI
 	select HVC_DRIVER
 	help
 	  This enables support for console output via RISC-V SBI calls, which
diff --git a/drivers/tty/hvc/hvc_riscv_sbi.c b/drivers/tty/hvc/hvc_riscv_sbi.c
index a72591279f86..cede8a572594 100644
--- a/drivers/tty/hvc/hvc_riscv_sbi.c
+++ b/drivers/tty/hvc/hvc_riscv_sbi.c
@@ -40,21 +40,44 @@ static ssize_t hvc_sbi_tty_get(uint32_t vtermno, u8 *buf, size_t count)
 	return i;
 }
 
-static const struct hv_ops hvc_sbi_ops = {
+static const struct hv_ops hvc_sbi_v01_ops = {
 	.get_chars = hvc_sbi_tty_get,
 	.put_chars = hvc_sbi_tty_put,
 };
 
-static int __init hvc_sbi_init(void)
+static ssize_t hvc_sbi_dbcn_tty_put(uint32_t vtermno, const u8 *buf, size_t count)
 {
-	return PTR_ERR_OR_ZERO(hvc_alloc(0, 0, &hvc_sbi_ops, 16));
+	return sbi_debug_console_write(buf, count);
 }
-device_initcall(hvc_sbi_init);
 
-static int __init hvc_sbi_console_init(void)
+static ssize_t hvc_sbi_dbcn_tty_get(uint32_t vtermno, u8 *buf, size_t count)
 {
-	hvc_instantiate(0, 0, &hvc_sbi_ops);
+	return sbi_debug_console_read(buf, count);
+}
+
+static const struct hv_ops hvc_sbi_dbcn_ops = {
+	.put_chars = hvc_sbi_dbcn_tty_put,
+	.get_chars = hvc_sbi_dbcn_tty_get,
+};
+
+static int __init hvc_sbi_init(void)
+{
+	int err;
+
+	if (sbi_debug_console_available) {
+		err = PTR_ERR_OR_ZERO(hvc_alloc(0, 0, &hvc_sbi_dbcn_ops, 256));
+		if (err)
+			return err;
+		hvc_instantiate(0, 0, &hvc_sbi_dbcn_ops);
+	} else if (IS_ENABLED(CONFIG_RISCV_SBI_V01)) {
+		err = PTR_ERR_OR_ZERO(hvc_alloc(0, 0, &hvc_sbi_v01_ops, 256));
+		if (err)
+			return err;
+		hvc_instantiate(0, 0, &hvc_sbi_v01_ops);
+	} else {
+		return -ENODEV;
+	}
 
 	return 0;
 }
-console_initcall(hvc_sbi_console_init);
+device_initcall(hvc_sbi_init);
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 8b1f5756002f..ffcf4882b25f 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -87,7 +87,7 @@ config SERIAL_EARLYCON_SEMIHOST
 
 config SERIAL_EARLYCON_RISCV_SBI
 	bool "Early console using RISC-V SBI"
-	depends on RISCV_SBI_V01
+	depends on RISCV_SBI
 	select SERIAL_CORE
 	select SERIAL_CORE_CONSOLE
 	select SERIAL_EARLYCON
diff --git a/drivers/tty/serial/earlycon-riscv-sbi.c b/drivers/tty/serial/earlycon-riscv-sbi.c
index 27afb0b74ea7..0162155f0c83 100644
--- a/drivers/tty/serial/earlycon-riscv-sbi.c
+++ b/drivers/tty/serial/earlycon-riscv-sbi.c
@@ -15,17 +15,38 @@ static void sbi_putc(struct uart_port *port, unsigned char c)
 	sbi_console_putchar(c);
 }
 
-static void sbi_console_write(struct console *con,
-			      const char *s, unsigned n)
+static void sbi_0_1_console_write(struct console *con,
+				  const char *s, unsigned int n)
 {
 	struct earlycon_device *dev = con->data;
 	uart_console_write(&dev->port, s, n, sbi_putc);
 }
 
+static void sbi_dbcn_console_write(struct console *con,
+				   const char *s, unsigned int n)
+{
+	int ret;
+
+	while (n) {
+		ret = sbi_debug_console_write(s, n);
+		if (ret < 0)
+			break;
+
+		s += ret;
+		n -= ret;
+	}
+}
+
 static int __init early_sbi_setup(struct earlycon_device *device,
 				  const char *opt)
 {
-	device->con->write = sbi_console_write;
+	if (sbi_debug_console_available)
+		device->con->write = sbi_dbcn_console_write;
+	else if (IS_ENABLED(CONFIG_RISCV_SBI_V01))
+		device->con->write = sbi_0_1_console_write;
+	else
+		return -ENODEV;
+
 	return 0;
 }
 EARLYCON_DECLARE(sbi, early_sbi_setup);
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index d1e33328ff3f..029d017fc1b6 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -8725,7 +8725,6 @@ static int ufshcd_add_lus(struct ufs_hba *hba)
 
 	ufs_bsg_probe(hba);
 	scsi_scan_host(hba->host);
-	pm_runtime_put_sync(hba->dev);
 
 out:
 	return ret;
@@ -8994,15 +8993,12 @@ static void ufshcd_async_scan(void *data, async_cookie_t cookie)
 
 	/* Probe and add UFS logical units  */
 	ret = ufshcd_add_lus(hba);
+
 out:
-	/*
-	 * If we failed to initialize the device or the device is not
-	 * present, turn off the power/clocks etc.
-	 */
-	if (ret) {
-		pm_runtime_put_sync(hba->dev);
-		ufshcd_hba_exit(hba);
-	}
+	pm_runtime_put_sync(hba->dev);
+
+	if (ret)
+		dev_err(hba->dev, "%s failed: %d\n", __func__, ret);
 }
 
 static enum scsi_timeout_action ufshcd_eh_timed_out(struct scsi_cmnd *scmd)
diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c
index 480787048e75..39eef470f8fa 100644
--- a/drivers/ufs/host/ufs-qcom.c
+++ b/drivers/ufs/host/ufs-qcom.c
@@ -1716,7 +1716,7 @@ static int ufs_qcom_config_esi(struct ufs_hba *hba)
 					     ufs_qcom_write_msi_msg);
 	if (ret) {
 		dev_err(hba->dev, "Failed to request Platform MSI %d\n", ret);
-		goto out;
+		return ret;
 	}
 
 	msi_lock_descs(hba->dev);
@@ -1750,11 +1750,8 @@ static int ufs_qcom_config_esi(struct ufs_hba *hba)
 				    FIELD_PREP(ESI_VEC_MASK, MAX_ESI_VEC - 1),
 				    REG_UFS_CFG3);
 		ufshcd_mcq_enable_esi(hba);
-	}
-
-out:
-	if (!ret)
 		host->esi_enabled = true;
+	}
 
 	return ret;
 }
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 63af6ab034b5..1183e7a871f8 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -631,8 +631,7 @@ static void fbcon_prepare_logo(struct vc_data *vc, struct fb_info *info,
 
 	if (logo_lines > vc->vc_bottom) {
 		logo_shown = FBCON_LOGO_CANSHOW;
-		printk(KERN_INFO
-		       "fbcon_init: disable boot-logo (boot-logo bigger than screen).\n");
+		pr_info("fbcon: disable boot-logo (boot-logo bigger than screen).\n");
 	} else {
 		logo_shown = FBCON_LOGO_DRAW;
 		vc->vc_top = logo_lines;
diff --git a/drivers/video/fbdev/savage/savagefb_driver.c b/drivers/video/fbdev/savage/savagefb_driver.c
index dddd6afcb972..ebc9aeffdde7 100644
--- a/drivers/video/fbdev/savage/savagefb_driver.c
+++ b/drivers/video/fbdev/savage/savagefb_driver.c
@@ -869,6 +869,9 @@ static int savagefb_check_var(struct fb_var_screeninfo   *var,
 
 	DBG("savagefb_check_var");
 
+	if (!var->pixclock)
+		return -EINVAL;
+
 	var->transp.offset = 0;
 	var->transp.length = 0;
 	switch (var->bits_per_pixel) {
diff --git a/drivers/video/fbdev/sis/sis_main.c b/drivers/video/fbdev/sis/sis_main.c
index 803ccb6aa479..009bf1d92644 100644
--- a/drivers/video/fbdev/sis/sis_main.c
+++ b/drivers/video/fbdev/sis/sis_main.c
@@ -1444,6 +1444,8 @@ sisfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 
 	vtotal = var->upper_margin + var->lower_margin + var->vsync_len;
 
+	if (!var->pixclock)
+		return -EINVAL;
 	pixclock = var->pixclock;
 
 	if((var->vmode & FB_VMODE_MASK) == FB_VMODE_NONINTERLACED) {
diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c
index 2de0e675fd15..8e5bac27542d 100644
--- a/drivers/video/fbdev/stifb.c
+++ b/drivers/video/fbdev/stifb.c
@@ -1158,7 +1158,7 @@ stifb_init_display(struct stifb_info *fb)
 	    }
 	    break;
 	}
-	stifb_blank(0, (struct fb_info *)fb);	/* 0=enable screen */
+	stifb_blank(0, fb->info);	/* 0=enable screen */
 
 	SETUP_FB(fb);
 }
diff --git a/drivers/video/fbdev/vt8500lcdfb.c b/drivers/video/fbdev/vt8500lcdfb.c
index 42c25dc85197..ac73937073a7 100644
--- a/drivers/video/fbdev/vt8500lcdfb.c
+++ b/drivers/video/fbdev/vt8500lcdfb.c
@@ -374,7 +374,6 @@ static int vt8500lcd_probe(struct platform_device *pdev)
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
-		dev_err(&pdev->dev, "no IRQ defined\n");
 		ret = -ENODEV;
 		goto failed_free_palette;
 	}
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 731e3d14b67d..0e8418066a48 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -42,6 +42,7 @@ struct inode *v9fs_alloc_inode(struct super_block *sb);
 void v9fs_free_inode(struct inode *inode);
 struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode,
 			     dev_t rdev);
+void v9fs_set_netfs_context(struct inode *inode);
 int v9fs_init_inode(struct v9fs_session_info *v9ses,
 		    struct inode *inode, umode_t mode, dev_t rdev);
 void v9fs_evict_inode(struct inode *inode);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 8a635999a7d6..047855033d32 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -19,12 +19,45 @@
 #include <linux/netfs.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
+#include <trace/events/netfs.h>
 
 #include "v9fs.h"
 #include "v9fs_vfs.h"
 #include "cache.h"
 #include "fid.h"
 
+static void v9fs_upload_to_server(struct netfs_io_subrequest *subreq)
+{
+	struct p9_fid *fid = subreq->rreq->netfs_priv;
+	int err, len;
+
+	trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
+	len = p9_client_write(fid, subreq->start, &subreq->io_iter, &err);
+	netfs_write_subrequest_terminated(subreq, len ?: err, false);
+}
+
+static void v9fs_upload_to_server_worker(struct work_struct *work)
+{
+	struct netfs_io_subrequest *subreq =
+		container_of(work, struct netfs_io_subrequest, work);
+
+	v9fs_upload_to_server(subreq);
+}
+
+/*
+ * Set up write requests for a writeback slice.  We need to add a write request
+ * for each write we want to make.
+ */
+static void v9fs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len)
+{
+	struct netfs_io_subrequest *subreq;
+
+	subreq = netfs_create_write_request(wreq, NETFS_UPLOAD_TO_SERVER,
+					    start, len, v9fs_upload_to_server_worker);
+	if (subreq)
+		netfs_queue_write_request(subreq);
+}
+
 /**
  * v9fs_issue_read - Issue a read from 9P
  * @subreq: The read to make
@@ -33,14 +66,10 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
 {
 	struct netfs_io_request *rreq = subreq->rreq;
 	struct p9_fid *fid = rreq->netfs_priv;
-	struct iov_iter to;
-	loff_t pos = subreq->start + subreq->transferred;
-	size_t len = subreq->len   - subreq->transferred;
 	int total, err;
 
-	iov_iter_xarray(&to, ITER_DEST, &rreq->mapping->i_pages, pos, len);
-
-	total = p9_client_read(fid, pos, &to, &err);
+	total = p9_client_read(fid, subreq->start + subreq->transferred,
+			       &subreq->io_iter, &err);
 
 	/* if we just extended the file size, any portion not in
 	 * cache won't be on server and is zeroes */
@@ -50,25 +79,42 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
 }
 
 /**
- * v9fs_init_request - Initialise a read request
+ * v9fs_init_request - Initialise a request
  * @rreq: The read request
  * @file: The file being read from
  */
 static int v9fs_init_request(struct netfs_io_request *rreq, struct file *file)
 {
-	struct p9_fid *fid = file->private_data;
-
-	BUG_ON(!fid);
+	struct p9_fid *fid;
+	bool writing = (rreq->origin == NETFS_READ_FOR_WRITE ||
+			rreq->origin == NETFS_WRITEBACK ||
+			rreq->origin == NETFS_WRITETHROUGH ||
+			rreq->origin == NETFS_LAUNDER_WRITE ||
+			rreq->origin == NETFS_UNBUFFERED_WRITE ||
+			rreq->origin == NETFS_DIO_WRITE);
+
+	if (file) {
+		fid = file->private_data;
+		if (!fid)
+			goto no_fid;
+		p9_fid_get(fid);
+	} else {
+		fid = v9fs_fid_find_inode(rreq->inode, writing, INVALID_UID, true);
+		if (!fid)
+			goto no_fid;
+	}
 
 	/* we might need to read from a fid that was opened write-only
 	 * for read-modify-write of page cache, use the writeback fid
 	 * for that */
-	WARN_ON(rreq->origin == NETFS_READ_FOR_WRITE &&
-			!(fid->mode & P9_ORDWR));
-
-	p9_fid_get(fid);
+	WARN_ON(rreq->origin == NETFS_READ_FOR_WRITE && !(fid->mode & P9_ORDWR));
 	rreq->netfs_priv = fid;
 	return 0;
+
+no_fid:
+	WARN_ONCE(1, "folio expected an open fid inode->i_ino=%lx\n",
+		  rreq->inode->i_ino);
+	return -EINVAL;
 }
 
 /**
@@ -82,281 +128,20 @@ static void v9fs_free_request(struct netfs_io_request *rreq)
 	p9_fid_put(fid);
 }
 
-/**
- * v9fs_begin_cache_operation - Begin a cache operation for a read
- * @rreq: The read request
- */
-static int v9fs_begin_cache_operation(struct netfs_io_request *rreq)
-{
-#ifdef CONFIG_9P_FSCACHE
-	struct fscache_cookie *cookie = v9fs_inode_cookie(V9FS_I(rreq->inode));
-
-	return fscache_begin_read_operation(&rreq->cache_resources, cookie);
-#else
-	return -ENOBUFS;
-#endif
-}
-
 const struct netfs_request_ops v9fs_req_ops = {
 	.init_request		= v9fs_init_request,
 	.free_request		= v9fs_free_request,
-	.begin_cache_operation	= v9fs_begin_cache_operation,
 	.issue_read		= v9fs_issue_read,
+	.create_write_requests	= v9fs_create_write_requests,
 };
 
-/**
- * v9fs_release_folio - release the private state associated with a folio
- * @folio: The folio to be released
- * @gfp: The caller's allocation restrictions
- *
- * Returns true if the page can be released, false otherwise.
- */
-
-static bool v9fs_release_folio(struct folio *folio, gfp_t gfp)
-{
-	if (folio_test_private(folio))
-		return false;
-#ifdef CONFIG_9P_FSCACHE
-	if (folio_test_fscache(folio)) {
-		if (current_is_kswapd() || !(gfp & __GFP_FS))
-			return false;
-		folio_wait_fscache(folio);
-	}
-	fscache_note_page_release(v9fs_inode_cookie(V9FS_I(folio_inode(folio))));
-#endif
-	return true;
-}
-
-static void v9fs_invalidate_folio(struct folio *folio, size_t offset,
-				 size_t length)
-{
-	folio_wait_fscache(folio);
-}
-
-#ifdef CONFIG_9P_FSCACHE
-static void v9fs_write_to_cache_done(void *priv, ssize_t transferred_or_error,
-				     bool was_async)
-{
-	struct v9fs_inode *v9inode = priv;
-	__le32 version;
-
-	if (IS_ERR_VALUE(transferred_or_error) &&
-	    transferred_or_error != -ENOBUFS) {
-		version = cpu_to_le32(v9inode->qid.version);
-		fscache_invalidate(v9fs_inode_cookie(v9inode), &version,
-				   i_size_read(&v9inode->netfs.inode), 0);
-	}
-}
-#endif
-
-static int v9fs_vfs_write_folio_locked(struct folio *folio)
-{
-	struct inode *inode = folio_inode(folio);
-	loff_t start = folio_pos(folio);
-	loff_t i_size = i_size_read(inode);
-	struct iov_iter from;
-	size_t len = folio_size(folio);
-	struct p9_fid *writeback_fid;
-	int err;
-	struct v9fs_inode __maybe_unused *v9inode = V9FS_I(inode);
-	struct fscache_cookie __maybe_unused *cookie = v9fs_inode_cookie(v9inode);
-
-	if (start >= i_size)
-		return 0; /* Simultaneous truncation occurred */
-
-	len = min_t(loff_t, i_size - start, len);
-
-	iov_iter_xarray(&from, ITER_SOURCE, &folio_mapping(folio)->i_pages, start, len);
-
-	writeback_fid = v9fs_fid_find_inode(inode, true, INVALID_UID, true);
-	if (!writeback_fid) {
-		WARN_ONCE(1, "folio expected an open fid inode->i_private=%p\n",
-			inode->i_private);
-		return -EINVAL;
-	}
-
-	folio_wait_fscache(folio);
-	folio_start_writeback(folio);
-
-	p9_client_write(writeback_fid, start, &from, &err);
-
-#ifdef CONFIG_9P_FSCACHE
-	if (err == 0 &&
-		fscache_cookie_enabled(cookie) &&
-		test_bit(FSCACHE_COOKIE_IS_CACHING, &cookie->flags)) {
-		folio_start_fscache(folio);
-		fscache_write_to_cache(v9fs_inode_cookie(v9inode),
-					folio_mapping(folio), start, len, i_size,
-					v9fs_write_to_cache_done, v9inode,
-					true);
-	}
-#endif
-
-	folio_end_writeback(folio);
-	p9_fid_put(writeback_fid);
-
-	return err;
-}
-
-static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc)
-{
-	struct folio *folio = page_folio(page);
-	int retval;
-
-	p9_debug(P9_DEBUG_VFS, "folio %p\n", folio);
-
-	retval = v9fs_vfs_write_folio_locked(folio);
-	if (retval < 0) {
-		if (retval == -EAGAIN) {
-			folio_redirty_for_writepage(wbc, folio);
-			retval = 0;
-		} else {
-			mapping_set_error(folio_mapping(folio), retval);
-		}
-	} else
-		retval = 0;
-
-	folio_unlock(folio);
-	return retval;
-}
-
-static int v9fs_launder_folio(struct folio *folio)
-{
-	int retval;
-
-	if (folio_clear_dirty_for_io(folio)) {
-		retval = v9fs_vfs_write_folio_locked(folio);
-		if (retval)
-			return retval;
-	}
-	folio_wait_fscache(folio);
-	return 0;
-}
-
-/**
- * v9fs_direct_IO - 9P address space operation for direct I/O
- * @iocb: target I/O control block
- * @iter: The data/buffer to use
- *
- * The presence of v9fs_direct_IO() in the address space ops vector
- * allowes open() O_DIRECT flags which would have failed otherwise.
- *
- * In the non-cached mode, we shunt off direct read and write requests before
- * the VFS gets them, so this method should never be called.
- *
- * Direct IO is not 'yet' supported in the cached mode. Hence when
- * this routine is called through generic_file_aio_read(), the read/write fails
- * with an error.
- *
- */
-static ssize_t
-v9fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
-{
-	struct file *file = iocb->ki_filp;
-	loff_t pos = iocb->ki_pos;
-	ssize_t n;
-	int err = 0;
-
-	if (iov_iter_rw(iter) == WRITE) {
-		n = p9_client_write(file->private_data, pos, iter, &err);
-		if (n) {
-			struct inode *inode = file_inode(file);
-			loff_t i_size = i_size_read(inode);
-
-			if (pos + n > i_size)
-				inode_add_bytes(inode, pos + n - i_size);
-		}
-	} else {
-		n = p9_client_read(file->private_data, pos, iter, &err);
-	}
-	return n ? n : err;
-}
-
-static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
-			    loff_t pos, unsigned int len,
-			    struct page **subpagep, void **fsdata)
-{
-	int retval;
-	struct folio *folio;
-	struct v9fs_inode *v9inode = V9FS_I(mapping->host);
-
-	p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping);
-
-	/* Prefetch area to be written into the cache if we're caching this
-	 * file.  We need to do this before we get a lock on the page in case
-	 * there's more than one writer competing for the same cache block.
-	 */
-	retval = netfs_write_begin(&v9inode->netfs, filp, mapping, pos, len, &folio, fsdata);
-	if (retval < 0)
-		return retval;
-
-	*subpagep = &folio->page;
-	return retval;
-}
-
-static int v9fs_write_end(struct file *filp, struct address_space *mapping,
-			  loff_t pos, unsigned int len, unsigned int copied,
-			  struct page *subpage, void *fsdata)
-{
-	loff_t last_pos = pos + copied;
-	struct folio *folio = page_folio(subpage);
-	struct inode *inode = mapping->host;
-
-	p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping);
-
-	if (!folio_test_uptodate(folio)) {
-		if (unlikely(copied < len)) {
-			copied = 0;
-			goto out;
-		}
-
-		folio_mark_uptodate(folio);
-	}
-
-	/*
-	 * No need to use i_size_read() here, the i_size
-	 * cannot change under us because we hold the i_mutex.
-	 */
-	if (last_pos > inode->i_size) {
-		inode_add_bytes(inode, last_pos - inode->i_size);
-		i_size_write(inode, last_pos);
-#ifdef CONFIG_9P_FSCACHE
-		fscache_update_cookie(v9fs_inode_cookie(V9FS_I(inode)), NULL,
-			&last_pos);
-#endif
-	}
-	folio_mark_dirty(folio);
-out:
-	folio_unlock(folio);
-	folio_put(folio);
-
-	return copied;
-}
-
-#ifdef CONFIG_9P_FSCACHE
-/*
- * Mark a page as having been made dirty and thus needing writeback.  We also
- * need to pin the cache object to write back to.
- */
-static bool v9fs_dirty_folio(struct address_space *mapping, struct folio *folio)
-{
-	struct v9fs_inode *v9inode = V9FS_I(mapping->host);
-
-	return fscache_dirty_folio(mapping, folio, v9fs_inode_cookie(v9inode));
-}
-#else
-#define v9fs_dirty_folio filemap_dirty_folio
-#endif
-
 const struct address_space_operations v9fs_addr_operations = {
-	.read_folio = netfs_read_folio,
-	.readahead = netfs_readahead,
-	.dirty_folio = v9fs_dirty_folio,
-	.writepage = v9fs_vfs_writepage,
-	.write_begin = v9fs_write_begin,
-	.write_end = v9fs_write_end,
-	.release_folio = v9fs_release_folio,
-	.invalidate_folio = v9fs_invalidate_folio,
-	.launder_folio = v9fs_launder_folio,
-	.direct_IO = v9fs_direct_IO,
+	.read_folio		= netfs_read_folio,
+	.readahead		= netfs_readahead,
+	.dirty_folio		= netfs_dirty_folio,
+	.release_folio		= netfs_release_folio,
+	.invalidate_folio	= netfs_invalidate_folio,
+	.launder_folio		= netfs_launder_folio,
+	.direct_IO		= noop_direct_IO,
+	.writepages		= netfs_writepages,
 };
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 11cd8d23f6f2..bae330c2f0cf 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -353,25 +353,15 @@ static ssize_t
 v9fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct p9_fid *fid = iocb->ki_filp->private_data;
-	int ret, err = 0;
 
 	p9_debug(P9_DEBUG_VFS, "fid %d count %zu offset %lld\n",
 		 fid->fid, iov_iter_count(to), iocb->ki_pos);
 
-	if (!(fid->mode & P9L_DIRECT)) {
-		p9_debug(P9_DEBUG_VFS, "(cached)\n");
-		return generic_file_read_iter(iocb, to);
-	}
-
-	if (iocb->ki_filp->f_flags & O_NONBLOCK)
-		ret = p9_client_read_once(fid, iocb->ki_pos, to, &err);
-	else
-		ret = p9_client_read(fid, iocb->ki_pos, to, &err);
-	if (!ret)
-		return err;
+	if (fid->mode & P9L_DIRECT)
+		return netfs_unbuffered_read_iter(iocb, to);
 
-	iocb->ki_pos += ret;
-	return ret;
+	p9_debug(P9_DEBUG_VFS, "(cached)\n");
+	return netfs_file_read_iter(iocb, to);
 }
 
 /*
@@ -407,46 +397,14 @@ v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct p9_fid *fid = file->private_data;
-	ssize_t retval;
-	loff_t origin;
-	int err = 0;
 
 	p9_debug(P9_DEBUG_VFS, "fid %d\n", fid->fid);
 
-	if (!(fid->mode & (P9L_DIRECT | P9L_NOWRITECACHE))) {
-		p9_debug(P9_DEBUG_CACHE, "(cached)\n");
-		return generic_file_write_iter(iocb, from);
-	}
+	if (fid->mode & (P9L_DIRECT | P9L_NOWRITECACHE))
+		return netfs_unbuffered_write_iter(iocb, from);
 
-	retval = generic_write_checks(iocb, from);
-	if (retval <= 0)
-		return retval;
-
-	origin = iocb->ki_pos;
-	retval = p9_client_write(file->private_data, iocb->ki_pos, from, &err);
-	if (retval > 0) {
-		struct inode *inode = file_inode(file);
-		loff_t i_size;
-		unsigned long pg_start, pg_end;
-
-		pg_start = origin >> PAGE_SHIFT;
-		pg_end = (origin + retval - 1) >> PAGE_SHIFT;
-		if (inode->i_mapping && inode->i_mapping->nrpages)
-			invalidate_inode_pages2_range(inode->i_mapping,
-						      pg_start, pg_end);
-		iocb->ki_pos += retval;
-		i_size = i_size_read(inode);
-		if (iocb->ki_pos > i_size) {
-			inode_add_bytes(inode, iocb->ki_pos - i_size);
-			/*
-			 * Need to serialize against i_size_write() in
-			 * v9fs_stat2inode()
-			 */
-			v9fs_i_size_write(inode, iocb->ki_pos);
-		}
-		return retval;
-	}
-	return err;
+	p9_debug(P9_DEBUG_CACHE, "(cached)\n");
+	return netfs_file_write_iter(iocb, from);
 }
 
 static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end,
@@ -519,36 +477,7 @@ v9fs_file_mmap(struct file *filp, struct vm_area_struct *vma)
 static vm_fault_t
 v9fs_vm_page_mkwrite(struct vm_fault *vmf)
 {
-	struct folio *folio = page_folio(vmf->page);
-	struct file *filp = vmf->vma->vm_file;
-	struct inode *inode = file_inode(filp);
-
-
-	p9_debug(P9_DEBUG_VFS, "folio %p fid %lx\n",
-		 folio, (unsigned long)filp->private_data);
-
-	/* Wait for the page to be written to the cache before we allow it to
-	 * be modified.  We then assume the entire page will need writing back.
-	 */
-#ifdef CONFIG_9P_FSCACHE
-	if (folio_test_fscache(folio) &&
-	    folio_wait_fscache_killable(folio) < 0)
-		return VM_FAULT_NOPAGE;
-#endif
-
-	/* Update file times before taking page lock */
-	file_update_time(filp);
-
-	if (folio_lock_killable(folio) < 0)
-		return VM_FAULT_RETRY;
-	if (folio_mapping(folio) != inode->i_mapping)
-		goto out_unlock;
-	folio_wait_stable(folio);
-
-	return VM_FAULT_LOCKED;
-out_unlock:
-	folio_unlock(folio);
-	return VM_FAULT_NOPAGE;
+	return netfs_page_mkwrite(vmf, NULL);
 }
 
 static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index b845ee18a80b..32572982f72e 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -246,10 +246,10 @@ void v9fs_free_inode(struct inode *inode)
 /*
  * Set parameters for the netfs library
  */
-static void v9fs_set_netfs_context(struct inode *inode)
+void v9fs_set_netfs_context(struct inode *inode)
 {
 	struct v9fs_inode *v9inode = V9FS_I(inode);
-	netfs_inode_init(&v9inode->netfs, &v9fs_req_ops);
+	netfs_inode_init(&v9inode->netfs, &v9fs_req_ops, true);
 }
 
 int v9fs_init_inode(struct v9fs_session_info *v9ses,
@@ -326,8 +326,6 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
 		err = -EINVAL;
 		goto error;
 	}
-
-	v9fs_set_netfs_context(inode);
 error:
 	return err;
 
@@ -359,6 +357,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode, dev_t rdev)
 		iput(inode);
 		return ERR_PTR(err);
 	}
+	v9fs_set_netfs_context(inode);
 	return inode;
 }
 
@@ -374,11 +373,8 @@ void v9fs_evict_inode(struct inode *inode)
 
 	truncate_inode_pages_final(&inode->i_data);
 
-#ifdef CONFIG_9P_FSCACHE
 	version = cpu_to_le32(v9inode->qid.version);
-	fscache_clear_inode_writeback(v9fs_inode_cookie(v9inode), inode,
-				      &version);
-#endif
+	netfs_clear_inode_writeback(inode, &version);
 
 	clear_inode(inode);
 	filemap_fdatawrite(&inode->i_data);
@@ -464,6 +460,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
 		goto error;
 
 	v9fs_stat2inode(st, inode, sb, 0);
+	v9fs_set_netfs_context(inode);
 	v9fs_cache_inode_get_cookie(inode);
 	unlock_new_inode(inode);
 	return inode;
@@ -1113,7 +1110,7 @@ static int v9fs_vfs_setattr(struct mnt_idmap *idmap,
 	if ((iattr->ia_valid & ATTR_SIZE) &&
 		 iattr->ia_size != i_size_read(inode)) {
 		truncate_setsize(inode, iattr->ia_size);
-		truncate_pagecache(inode, iattr->ia_size);
+		netfs_resize_file(netfs_inode(inode), iattr->ia_size, true);
 
 #ifdef CONFIG_9P_FSCACHE
 		if (v9ses->cache & CACHE_FSCACHE) {
@@ -1181,6 +1178,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
 	mode |= inode->i_mode & ~S_IALLUGO;
 	inode->i_mode = mode;
 
+	v9inode->netfs.remote_i_size = stat->length;
 	if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE))
 		v9fs_i_size_write(inode, stat->length);
 	/* not real number of blocks, but 512 byte ones ... */
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index c7319af2f471..3505227e1704 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -128,6 +128,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
 		goto error;
 
 	v9fs_stat2inode_dotl(st, inode, 0);
+	v9fs_set_netfs_context(inode);
 	v9fs_cache_inode_get_cookie(inode);
 	retval = v9fs_get_acl(inode, fid);
 	if (retval)
@@ -598,7 +599,7 @@ int v9fs_vfs_setattr_dotl(struct mnt_idmap *idmap,
 	if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size !=
 		 i_size_read(inode)) {
 		truncate_setsize(inode, iattr->ia_size);
-		truncate_pagecache(inode, iattr->ia_size);
+		netfs_resize_file(netfs_inode(inode), iattr->ia_size, true);
 
 #ifdef CONFIG_9P_FSCACHE
 		if (v9ses->cache & CACHE_FSCACHE)
@@ -655,6 +656,7 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
 		mode |= inode->i_mode & ~S_IALLUGO;
 		inode->i_mode = mode;
 
+		v9inode->netfs.remote_i_size = stat->st_size;
 		if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE))
 			v9fs_i_size_write(inode, stat->st_size);
 		inode->i_blocks = stat->st_blocks;
@@ -683,8 +685,10 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
 			inode->i_mode = mode;
 		}
 		if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE) &&
-		    stat->st_result_mask & P9_STATS_SIZE)
+		    stat->st_result_mask & P9_STATS_SIZE) {
+			v9inode->netfs.remote_i_size = stat->st_size;
 			v9fs_i_size_write(inode, stat->st_size);
+		}
 		if (stat->st_result_mask & P9_STATS_BLOCKS)
 			inode->i_blocks = stat->st_blocks;
 	}
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 73db55c050bf..941f7d0e0bfa 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -289,31 +289,21 @@ static int v9fs_drop_inode(struct inode *inode)
 static int v9fs_write_inode(struct inode *inode,
 			    struct writeback_control *wbc)
 {
-	struct v9fs_inode *v9inode;
-
 	/*
 	 * send an fsync request to server irrespective of
 	 * wbc->sync_mode.
 	 */
 	p9_debug(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
-
-	v9inode = V9FS_I(inode);
-	fscache_unpin_writeback(wbc, v9fs_inode_cookie(v9inode));
-
-	return 0;
+	return netfs_unpin_writeback(inode, wbc);
 }
 
 static int v9fs_write_inode_dotl(struct inode *inode,
 				 struct writeback_control *wbc)
 {
-	struct v9fs_inode *v9inode;
 
-	v9inode = V9FS_I(inode);
 	p9_debug(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
 
-	fscache_unpin_writeback(wbc, v9fs_inode_cookie(v9inode));
-
-	return 0;
+	return netfs_unpin_writeback(inode, wbc);
 }
 
 static const struct super_operations v9fs_super_ops = {
diff --git a/fs/Kconfig b/fs/Kconfig
index a3159831ba98..89fdbefd1075 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -144,7 +144,6 @@ source "fs/overlayfs/Kconfig"
 menu "Caches"
 
 source "fs/netfs/Kconfig"
-source "fs/fscache/Kconfig"
 source "fs/cachefiles/Kconfig"
 
 endmenu
diff --git a/fs/Makefile b/fs/Makefile
index a6962c588962..c09016257f05 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -61,7 +61,6 @@ obj-$(CONFIG_DLM)		+= dlm/
  
 # Do not add any filesystems before this line
 obj-$(CONFIG_NETFS_SUPPORT)	+= netfs/
-obj-$(CONFIG_FSCACHE)		+= fscache/
 obj-$(CONFIG_REISERFS_FS)	+= reiserfs/
 obj-$(CONFIG_EXT4_FS)		+= ext4/
 # We place ext4 before ext2 so that clean ext3 root fs's do NOT mount using the
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index c14533ef108f..b5b8de521f99 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -124,7 +124,7 @@ static void afs_dir_read_cleanup(struct afs_read *req)
 		if (xas_retry(&xas, folio))
 			continue;
 		BUG_ON(xa_is_value(folio));
-		ASSERTCMP(folio_file_mapping(folio), ==, mapping);
+		ASSERTCMP(folio->mapping, ==, mapping);
 
 		folio_put(folio);
 	}
@@ -202,12 +202,12 @@ static void afs_dir_dump(struct afs_vnode *dvnode, struct afs_read *req)
 		if (xas_retry(&xas, folio))
 			continue;
 
-		BUG_ON(folio_file_mapping(folio) != mapping);
+		BUG_ON(folio->mapping != mapping);
 
 		size = min_t(loff_t, folio_size(folio), req->actual_len - folio_pos(folio));
 		for (offset = 0; offset < size; offset += sizeof(*block)) {
 			block = kmap_local_folio(folio, offset);
-			pr_warn("[%02lx] %32phN\n", folio_index(folio) + offset, block);
+			pr_warn("[%02lx] %32phN\n", folio->index + offset, block);
 			kunmap_local(block);
 		}
 	}
@@ -233,7 +233,7 @@ static int afs_dir_check(struct afs_vnode *dvnode, struct afs_read *req)
 		if (xas_retry(&xas, folio))
 			continue;
 
-		BUG_ON(folio_file_mapping(folio) != mapping);
+		BUG_ON(folio->mapping != mapping);
 
 		if (!afs_dir_check_folio(dvnode, folio, req->actual_len)) {
 			afs_dir_dump(dvnode, req);
@@ -474,6 +474,14 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode,
 			continue;
 		}
 
+		/* Don't expose silly rename entries to userspace. */
+		if (nlen > 6 &&
+		    dire->u.name[0] == '.' &&
+		    ctx->actor != afs_lookup_filldir &&
+		    ctx->actor != afs_lookup_one_filldir &&
+		    memcmp(dire->u.name, ".__afs", 6) == 0)
+			continue;
+
 		/* found the next entry */
 		if (!dir_emit(ctx, dire->u.name, nlen,
 			      ntohl(dire->u.vnode),
@@ -708,6 +716,8 @@ static void afs_do_lookup_success(struct afs_operation *op)
 			break;
 		}
 
+		if (vp->scb.status.abort_code)
+			trace_afs_bulkstat_error(op, &vp->fid, i, vp->scb.status.abort_code);
 		if (!vp->scb.have_status && !vp->scb.have_error)
 			continue;
 
@@ -897,12 +907,16 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
 		afs_begin_vnode_operation(op);
 		afs_wait_for_operation(op);
 	}
-	inode = ERR_PTR(afs_op_error(op));
 
 out_op:
 	if (!afs_op_error(op)) {
-		inode = &op->file[1].vnode->netfs.inode;
-		op->file[1].vnode = NULL;
+		if (op->file[1].scb.status.abort_code) {
+			afs_op_accumulate_error(op, -ECONNABORTED,
+						op->file[1].scb.status.abort_code);
+		} else {
+			inode = &op->file[1].vnode->netfs.inode;
+			op->file[1].vnode = NULL;
+		}
 	}
 
 	if (op->file[0].scb.have_status)
@@ -2022,7 +2036,7 @@ static bool afs_dir_release_folio(struct folio *folio, gfp_t gfp_flags)
 {
 	struct afs_vnode *dvnode = AFS_FS_I(folio_inode(folio));
 
-	_enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, folio_index(folio));
+	_enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, folio->index);
 
 	folio_detach_private(folio);
 
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index 2cd40ba601f1..c4d2711e20ad 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -76,7 +76,7 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
 	/* there shouldn't be an existing inode */
 	BUG_ON(!(inode->i_state & I_NEW));
 
-	netfs_inode_init(&vnode->netfs, NULL);
+	netfs_inode_init(&vnode->netfs, NULL, false);
 	inode->i_size		= 0;
 	inode->i_mode		= S_IFDIR | S_IRUGO | S_IXUGO;
 	if (root) {
@@ -258,16 +258,7 @@ const struct inode_operations afs_dynroot_inode_operations = {
 	.lookup		= afs_dynroot_lookup,
 };
 
-/*
- * Dirs in the dynamic root don't need revalidation.
- */
-static int afs_dynroot_d_revalidate(struct dentry *dentry, unsigned int flags)
-{
-	return 1;
-}
-
 const struct dentry_operations afs_dynroot_dentry_operations = {
-	.d_revalidate	= afs_dynroot_d_revalidate,
 	.d_delete	= always_delete_dentry,
 	.d_release	= afs_d_release,
 	.d_automount	= afs_d_automount,
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 30914e0d9cb2..3d33b221d9ca 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -20,9 +20,6 @@
 
 static int afs_file_mmap(struct file *file, struct vm_area_struct *vma);
 static int afs_symlink_read_folio(struct file *file, struct folio *folio);
-static void afs_invalidate_folio(struct folio *folio, size_t offset,
-			       size_t length);
-static bool afs_release_folio(struct folio *folio, gfp_t gfp_flags);
 
 static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter);
 static ssize_t afs_file_splice_read(struct file *in, loff_t *ppos,
@@ -37,7 +34,7 @@ const struct file_operations afs_file_operations = {
 	.release	= afs_release,
 	.llseek		= generic_file_llseek,
 	.read_iter	= afs_file_read_iter,
-	.write_iter	= afs_file_write,
+	.write_iter	= netfs_file_write_iter,
 	.mmap		= afs_file_mmap,
 	.splice_read	= afs_file_splice_read,
 	.splice_write	= iter_file_splice_write,
@@ -53,22 +50,21 @@ const struct inode_operations afs_file_inode_operations = {
 };
 
 const struct address_space_operations afs_file_aops = {
+	.direct_IO	= noop_direct_IO,
 	.read_folio	= netfs_read_folio,
 	.readahead	= netfs_readahead,
-	.dirty_folio	= afs_dirty_folio,
-	.launder_folio	= afs_launder_folio,
-	.release_folio	= afs_release_folio,
-	.invalidate_folio = afs_invalidate_folio,
-	.write_begin	= afs_write_begin,
-	.write_end	= afs_write_end,
-	.writepages	= afs_writepages,
+	.dirty_folio	= netfs_dirty_folio,
+	.launder_folio	= netfs_launder_folio,
+	.release_folio	= netfs_release_folio,
+	.invalidate_folio = netfs_invalidate_folio,
 	.migrate_folio	= filemap_migrate_folio,
+	.writepages	= afs_writepages,
 };
 
 const struct address_space_operations afs_symlink_aops = {
 	.read_folio	= afs_symlink_read_folio,
-	.release_folio	= afs_release_folio,
-	.invalidate_folio = afs_invalidate_folio,
+	.release_folio	= netfs_release_folio,
+	.invalidate_folio = netfs_invalidate_folio,
 	.migrate_folio	= filemap_migrate_folio,
 };
 
@@ -323,11 +319,7 @@ static void afs_issue_read(struct netfs_io_subrequest *subreq)
 	fsreq->len	= subreq->len   - subreq->transferred;
 	fsreq->key	= key_get(subreq->rreq->netfs_priv);
 	fsreq->vnode	= vnode;
-	fsreq->iter	= &fsreq->def_iter;
-
-	iov_iter_xarray(&fsreq->def_iter, ITER_DEST,
-			&fsreq->vnode->netfs.inode.i_mapping->i_pages,
-			fsreq->pos, fsreq->len);
+	fsreq->iter	= &subreq->io_iter;
 
 	afs_fetch_data(fsreq->vnode, fsreq);
 	afs_put_read(fsreq);
@@ -359,22 +351,13 @@ static int afs_symlink_read_folio(struct file *file, struct folio *folio)
 
 static int afs_init_request(struct netfs_io_request *rreq, struct file *file)
 {
-	rreq->netfs_priv = key_get(afs_file_key(file));
+	if (file)
+		rreq->netfs_priv = key_get(afs_file_key(file));
+	rreq->rsize = 256 * 1024;
+	rreq->wsize = 256 * 1024;
 	return 0;
 }
 
-static int afs_begin_cache_operation(struct netfs_io_request *rreq)
-{
-#ifdef CONFIG_AFS_FSCACHE
-	struct afs_vnode *vnode = AFS_FS_I(rreq->inode);
-
-	return fscache_begin_read_operation(&rreq->cache_resources,
-					    afs_vnode_cache(vnode));
-#else
-	return -ENOBUFS;
-#endif
-}
-
 static int afs_check_write_begin(struct file *file, loff_t pos, unsigned len,
 				 struct folio **foliop, void **_fsdata)
 {
@@ -388,128 +371,37 @@ static void afs_free_request(struct netfs_io_request *rreq)
 	key_put(rreq->netfs_priv);
 }
 
-const struct netfs_request_ops afs_req_ops = {
-	.init_request		= afs_init_request,
-	.free_request		= afs_free_request,
-	.begin_cache_operation	= afs_begin_cache_operation,
-	.check_write_begin	= afs_check_write_begin,
-	.issue_read		= afs_issue_read,
-};
-
-int afs_write_inode(struct inode *inode, struct writeback_control *wbc)
+static void afs_update_i_size(struct inode *inode, loff_t new_i_size)
 {
-	fscache_unpin_writeback(wbc, afs_vnode_cache(AFS_FS_I(inode)));
-	return 0;
-}
-
-/*
- * Adjust the dirty region of the page on truncation or full invalidation,
- * getting rid of the markers altogether if the region is entirely invalidated.
- */
-static void afs_invalidate_dirty(struct folio *folio, size_t offset,
-				 size_t length)
-{
-	struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
-	unsigned long priv;
-	unsigned int f, t, end = offset + length;
-
-	priv = (unsigned long)folio_get_private(folio);
-
-	/* we clean up only if the entire page is being invalidated */
-	if (offset == 0 && length == folio_size(folio))
-		goto full_invalidate;
-
-	 /* If the page was dirtied by page_mkwrite(), the PTE stays writable
-	  * and we don't get another notification to tell us to expand it
-	  * again.
-	  */
-	if (afs_is_folio_dirty_mmapped(priv))
-		return;
-
-	/* We may need to shorten the dirty region */
-	f = afs_folio_dirty_from(folio, priv);
-	t = afs_folio_dirty_to(folio, priv);
-
-	if (t <= offset || f >= end)
-		return; /* Doesn't overlap */
-
-	if (f < offset && t > end)
-		return; /* Splits the dirty region - just absorb it */
-
-	if (f >= offset && t <= end)
-		goto undirty;
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	loff_t i_size;
 
-	if (f < offset)
-		t = offset;
-	else
-		f = end;
-	if (f == t)
-		goto undirty;
-
-	priv = afs_folio_dirty(folio, f, t);
-	folio_change_private(folio, (void *)priv);
-	trace_afs_folio_dirty(vnode, tracepoint_string("trunc"), folio);
-	return;
-
-undirty:
-	trace_afs_folio_dirty(vnode, tracepoint_string("undirty"), folio);
-	folio_clear_dirty_for_io(folio);
-full_invalidate:
-	trace_afs_folio_dirty(vnode, tracepoint_string("inval"), folio);
-	folio_detach_private(folio);
+	write_seqlock(&vnode->cb_lock);
+	i_size = i_size_read(&vnode->netfs.inode);
+	if (new_i_size > i_size) {
+		i_size_write(&vnode->netfs.inode, new_i_size);
+		inode_set_bytes(&vnode->netfs.inode, new_i_size);
+	}
+	write_sequnlock(&vnode->cb_lock);
+	fscache_update_cookie(afs_vnode_cache(vnode), NULL, &new_i_size);
 }
 
-/*
- * invalidate part or all of a page
- * - release a page and clean up its private data if offset is 0 (indicating
- *   the entire page)
- */
-static void afs_invalidate_folio(struct folio *folio, size_t offset,
-			       size_t length)
+static void afs_netfs_invalidate_cache(struct netfs_io_request *wreq)
 {
-	_enter("{%lu},%zu,%zu", folio->index, offset, length);
-
-	BUG_ON(!folio_test_locked(folio));
+	struct afs_vnode *vnode = AFS_FS_I(wreq->inode);
 
-	if (folio_get_private(folio))
-		afs_invalidate_dirty(folio, offset, length);
-
-	folio_wait_fscache(folio);
-	_leave("");
+	afs_invalidate_cache(vnode, 0);
 }
 
-/*
- * release a page and clean up its private state if it's not busy
- * - return true if the page can now be released, false if not
- */
-static bool afs_release_folio(struct folio *folio, gfp_t gfp)
-{
-	struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
-
-	_enter("{{%llx:%llu}[%lu],%lx},%x",
-	       vnode->fid.vid, vnode->fid.vnode, folio_index(folio), folio->flags,
-	       gfp);
-
-	/* deny if folio is being written to the cache and the caller hasn't
-	 * elected to wait */
-#ifdef CONFIG_AFS_FSCACHE
-	if (folio_test_fscache(folio)) {
-		if (current_is_kswapd() || !(gfp & __GFP_FS))
-			return false;
-		folio_wait_fscache(folio);
-	}
-	fscache_note_page_release(afs_vnode_cache(vnode));
-#endif
-
-	if (folio_test_private(folio)) {
-		trace_afs_folio_dirty(vnode, tracepoint_string("rel"), folio);
-		folio_detach_private(folio);
-	}
-
-	/* Indicate that the folio can be released */
-	_leave(" = T");
-	return true;
-}
+const struct netfs_request_ops afs_req_ops = {
+	.init_request		= afs_init_request,
+	.free_request		= afs_free_request,
+	.check_write_begin	= afs_check_write_begin,
+	.issue_read		= afs_issue_read,
+	.update_i_size		= afs_update_i_size,
+	.invalidate_cache	= afs_netfs_invalidate_cache,
+	.create_write_requests	= afs_create_write_requests,
+};
 
 static void afs_add_open_mmap(struct afs_vnode *vnode)
 {
@@ -576,28 +468,39 @@ static vm_fault_t afs_vm_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pg
 
 static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
-	struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp));
+	struct inode *inode = file_inode(iocb->ki_filp);
+	struct afs_vnode *vnode = AFS_FS_I(inode);
 	struct afs_file *af = iocb->ki_filp->private_data;
-	int ret;
+	ssize_t ret;
 
-	ret = afs_validate(vnode, af->key);
+	if (iocb->ki_flags & IOCB_DIRECT)
+		return netfs_unbuffered_read_iter(iocb, iter);
+
+	ret = netfs_start_io_read(inode);
 	if (ret < 0)
 		return ret;
-
-	return generic_file_read_iter(iocb, iter);
+	ret = afs_validate(vnode, af->key);
+	if (ret == 0)
+		ret = filemap_read(iocb, iter, 0);
+	netfs_end_io_read(inode);
+	return ret;
 }
 
 static ssize_t afs_file_splice_read(struct file *in, loff_t *ppos,
 				    struct pipe_inode_info *pipe,
 				    size_t len, unsigned int flags)
 {
-	struct afs_vnode *vnode = AFS_FS_I(file_inode(in));
+	struct inode *inode = file_inode(in);
+	struct afs_vnode *vnode = AFS_FS_I(inode);
 	struct afs_file *af = in->private_data;
-	int ret;
+	ssize_t ret;
 
-	ret = afs_validate(vnode, af->key);
+	ret = netfs_start_io_read(inode);
 	if (ret < 0)
 		return ret;
-
-	return filemap_splice_read(in, ppos, pipe, len, flags);
+	ret = afs_validate(vnode, af->key);
+	if (ret == 0)
+		ret = filemap_splice_read(in, ppos, pipe, len, flags);
+	netfs_end_io_read(inode);
+	return ret;
 }
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 4f04f6f33f46..94fc049aff58 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -58,7 +58,7 @@ static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *paren
  */
 static void afs_set_netfs_context(struct afs_vnode *vnode)
 {
-	netfs_inode_init(&vnode->netfs, &afs_req_ops);
+	netfs_inode_init(&vnode->netfs, &afs_req_ops, true);
 }
 
 /*
@@ -166,6 +166,7 @@ static void afs_apply_status(struct afs_operation *op,
 	struct inode *inode = &vnode->netfs.inode;
 	struct timespec64 t;
 	umode_t mode;
+	bool unexpected_jump = false;
 	bool data_changed = false;
 	bool change_size = vp->set_size;
 
@@ -230,6 +231,7 @@ static void afs_apply_status(struct afs_operation *op,
 		}
 		change_size = true;
 		data_changed = true;
+		unexpected_jump = true;
 	} else if (vnode->status.type == AFS_FTYPE_DIR) {
 		/* Expected directory change is handled elsewhere so
 		 * that we can locally edit the directory and save on a
@@ -249,8 +251,10 @@ static void afs_apply_status(struct afs_operation *op,
 		 * what's on the server.
 		 */
 		vnode->netfs.remote_i_size = status->size;
-		if (change_size) {
+		if (change_size || status->size > i_size_read(inode)) {
 			afs_set_i_size(vnode, status->size);
+			if (unexpected_jump)
+				vnode->netfs.zero_point = status->size;
 			inode_set_ctime_to_ts(inode, t);
 			inode_set_atime_to_ts(inode, t);
 		}
@@ -647,7 +651,7 @@ void afs_evict_inode(struct inode *inode)
 	truncate_inode_pages_final(&inode->i_data);
 
 	afs_set_cache_aux(vnode, &aux);
-	fscache_clear_inode_writeback(afs_vnode_cache(vnode), inode, &aux);
+	netfs_clear_inode_writeback(inode, &aux);
 	clear_inode(inode);
 
 	while (!list_empty(&vnode->wb_keys)) {
@@ -689,17 +693,17 @@ static void afs_setattr_success(struct afs_operation *op)
 static void afs_setattr_edit_file(struct afs_operation *op)
 {
 	struct afs_vnode_param *vp = &op->file[0];
-	struct inode *inode = &vp->vnode->netfs.inode;
+	struct afs_vnode *vnode = vp->vnode;
 
 	if (op->setattr.attr->ia_valid & ATTR_SIZE) {
 		loff_t size = op->setattr.attr->ia_size;
 		loff_t i_size = op->setattr.old_i_size;
 
-		if (size < i_size)
-			truncate_pagecache(inode, size);
-		if (size != i_size)
-			fscache_resize_cookie(afs_vnode_cache(vp->vnode),
-					      vp->scb.status.size);
+		if (size != i_size) {
+			truncate_setsize(&vnode->netfs.inode, size);
+			netfs_resize_file(&vnode->netfs, size, true);
+			fscache_resize_cookie(afs_vnode_cache(vnode), size);
+		}
 	}
 }
 
@@ -767,11 +771,11 @@ int afs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 		 */
 		if (!(attr->ia_valid & (supported & ~ATTR_SIZE & ~ATTR_MTIME)) &&
 		    attr->ia_size < i_size &&
-		    attr->ia_size > vnode->status.size) {
-			truncate_pagecache(inode, attr->ia_size);
+		    attr->ia_size > vnode->netfs.remote_i_size) {
+			truncate_setsize(inode, attr->ia_size);
+			netfs_resize_file(&vnode->netfs, size, false);
 			fscache_resize_cookie(afs_vnode_cache(vnode),
 					      attr->ia_size);
-			i_size_write(inode, attr->ia_size);
 			ret = 0;
 			goto out_unlock;
 		}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 55aa0679d8ce..9c03fcf7ffaa 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -985,62 +985,6 @@ static inline void afs_invalidate_cache(struct afs_vnode *vnode, unsigned int fl
 			   i_size_read(&vnode->netfs.inode), flags);
 }
 
-/*
- * We use folio->private to hold the amount of the folio that we've written to,
- * splitting the field into two parts.  However, we need to represent a range
- * 0...FOLIO_SIZE, so we reduce the resolution if the size of the folio
- * exceeds what we can encode.
- */
-#ifdef CONFIG_64BIT
-#define __AFS_FOLIO_PRIV_MASK		0x7fffffffUL
-#define __AFS_FOLIO_PRIV_SHIFT		32
-#define __AFS_FOLIO_PRIV_MMAPPED	0x80000000UL
-#else
-#define __AFS_FOLIO_PRIV_MASK		0x7fffUL
-#define __AFS_FOLIO_PRIV_SHIFT		16
-#define __AFS_FOLIO_PRIV_MMAPPED	0x8000UL
-#endif
-
-static inline unsigned int afs_folio_dirty_resolution(struct folio *folio)
-{
-	int shift = folio_shift(folio) - (__AFS_FOLIO_PRIV_SHIFT - 1);
-	return (shift > 0) ? shift : 0;
-}
-
-static inline size_t afs_folio_dirty_from(struct folio *folio, unsigned long priv)
-{
-	unsigned long x = priv & __AFS_FOLIO_PRIV_MASK;
-
-	/* The lower bound is inclusive */
-	return x << afs_folio_dirty_resolution(folio);
-}
-
-static inline size_t afs_folio_dirty_to(struct folio *folio, unsigned long priv)
-{
-	unsigned long x = (priv >> __AFS_FOLIO_PRIV_SHIFT) & __AFS_FOLIO_PRIV_MASK;
-
-	/* The upper bound is immediately beyond the region */
-	return (x + 1) << afs_folio_dirty_resolution(folio);
-}
-
-static inline unsigned long afs_folio_dirty(struct folio *folio, size_t from, size_t to)
-{
-	unsigned int res = afs_folio_dirty_resolution(folio);
-	from >>= res;
-	to = (to - 1) >> res;
-	return (to << __AFS_FOLIO_PRIV_SHIFT) | from;
-}
-
-static inline unsigned long afs_folio_dirty_mmapped(unsigned long priv)
-{
-	return priv | __AFS_FOLIO_PRIV_MMAPPED;
-}
-
-static inline bool afs_is_folio_dirty_mmapped(unsigned long priv)
-{
-	return priv & __AFS_FOLIO_PRIV_MMAPPED;
-}
-
 #include <trace/events/afs.h>
 
 /*****************************************************************************/
@@ -1167,7 +1111,6 @@ extern int afs_release(struct inode *, struct file *);
 extern int afs_fetch_data(struct afs_vnode *, struct afs_read *);
 extern struct afs_read *afs_alloc_read(gfp_t);
 extern void afs_put_read(struct afs_read *);
-extern int afs_write_inode(struct inode *, struct writeback_control *);
 
 static inline struct afs_read *afs_get_read(struct afs_read *req)
 {
@@ -1658,24 +1601,11 @@ extern int afs_check_volume_status(struct afs_volume *, struct afs_operation *);
 /*
  * write.c
  */
-#ifdef CONFIG_AFS_FSCACHE
-bool afs_dirty_folio(struct address_space *, struct folio *);
-#else
-#define afs_dirty_folio filemap_dirty_folio
-#endif
-extern int afs_write_begin(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len,
-			struct page **pagep, void **fsdata);
-extern int afs_write_end(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned copied,
-			struct page *page, void *fsdata);
-extern int afs_writepage(struct page *, struct writeback_control *);
 extern int afs_writepages(struct address_space *, struct writeback_control *);
-extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
 extern int afs_fsync(struct file *, loff_t, loff_t, int);
 extern vm_fault_t afs_page_mkwrite(struct vm_fault *vmf);
 extern void afs_prune_wb_keys(struct afs_vnode *);
-int afs_launder_folio(struct folio *);
+void afs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len);
 
 /*
  * xattr.c
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 3bd02571f30d..15eab053af6d 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -166,7 +166,7 @@ static int afs_proc_addr_prefs_show(struct seq_file *m, void *v)
 
 	if (!preflist) {
 		seq_puts(m, "NO PREFS\n");
-		return 0;
+		goto out;
 	}
 
 	seq_printf(m, "PROT SUBNET                                      PRIOR (v=%u n=%u/%u/%u)\n",
@@ -191,7 +191,8 @@ static int afs_proc_addr_prefs_show(struct seq_file *m, void *v)
 		}
 	}
 
-	rcu_read_lock();
+out:
+	rcu_read_unlock();
 	return 0;
 }
 
diff --git a/fs/afs/super.c b/fs/afs/super.c
index ae2d66a52add..f3ba1c3e72f5 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -55,7 +55,7 @@ int afs_net_id;
 static const struct super_operations afs_super_ops = {
 	.statfs		= afs_statfs,
 	.alloc_inode	= afs_alloc_inode,
-	.write_inode	= afs_write_inode,
+	.write_inode	= netfs_unpin_writeback,
 	.drop_inode	= afs_drop_inode,
 	.destroy_inode	= afs_destroy_inode,
 	.free_inode	= afs_free_inode,
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 61d34ad2ca7d..74402d95a884 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -12,309 +12,17 @@
 #include <linux/writeback.h>
 #include <linux/pagevec.h>
 #include <linux/netfs.h>
+#include <trace/events/netfs.h>
 #include "internal.h"
 
-static int afs_writepages_region(struct address_space *mapping,
-				 struct writeback_control *wbc,
-				 loff_t start, loff_t end, loff_t *_next,
-				 bool max_one_loop);
-
-static void afs_write_to_cache(struct afs_vnode *vnode, loff_t start, size_t len,
-			       loff_t i_size, bool caching);
-
-#ifdef CONFIG_AFS_FSCACHE
-/*
- * Mark a page as having been made dirty and thus needing writeback.  We also
- * need to pin the cache object to write back to.
- */
-bool afs_dirty_folio(struct address_space *mapping, struct folio *folio)
-{
-	return fscache_dirty_folio(mapping, folio,
-				afs_vnode_cache(AFS_FS_I(mapping->host)));
-}
-static void afs_folio_start_fscache(bool caching, struct folio *folio)
-{
-	if (caching)
-		folio_start_fscache(folio);
-}
-#else
-static void afs_folio_start_fscache(bool caching, struct folio *folio)
-{
-}
-#endif
-
-/*
- * Flush out a conflicting write.  This may extend the write to the surrounding
- * pages if also dirty and contiguous to the conflicting region..
- */
-static int afs_flush_conflicting_write(struct address_space *mapping,
-				       struct folio *folio)
-{
-	struct writeback_control wbc = {
-		.sync_mode	= WB_SYNC_ALL,
-		.nr_to_write	= LONG_MAX,
-		.range_start	= folio_pos(folio),
-		.range_end	= LLONG_MAX,
-	};
-	loff_t next;
-
-	return afs_writepages_region(mapping, &wbc, folio_pos(folio), LLONG_MAX,
-				     &next, true);
-}
-
-/*
- * prepare to perform part of a write to a page
- */
-int afs_write_begin(struct file *file, struct address_space *mapping,
-		    loff_t pos, unsigned len,
-		    struct page **_page, void **fsdata)
-{
-	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
-	struct folio *folio;
-	unsigned long priv;
-	unsigned f, from;
-	unsigned t, to;
-	pgoff_t index;
-	int ret;
-
-	_enter("{%llx:%llu},%llx,%x",
-	       vnode->fid.vid, vnode->fid.vnode, pos, len);
-
-	/* Prefetch area to be written into the cache if we're caching this
-	 * file.  We need to do this before we get a lock on the page in case
-	 * there's more than one writer competing for the same cache block.
-	 */
-	ret = netfs_write_begin(&vnode->netfs, file, mapping, pos, len, &folio, fsdata);
-	if (ret < 0)
-		return ret;
-
-	index = folio_index(folio);
-	from = pos - index * PAGE_SIZE;
-	to = from + len;
-
-try_again:
-	/* See if this page is already partially written in a way that we can
-	 * merge the new write with.
-	 */
-	if (folio_test_private(folio)) {
-		priv = (unsigned long)folio_get_private(folio);
-		f = afs_folio_dirty_from(folio, priv);
-		t = afs_folio_dirty_to(folio, priv);
-		ASSERTCMP(f, <=, t);
-
-		if (folio_test_writeback(folio)) {
-			trace_afs_folio_dirty(vnode, tracepoint_string("alrdy"), folio);
-			folio_unlock(folio);
-			goto wait_for_writeback;
-		}
-		/* If the file is being filled locally, allow inter-write
-		 * spaces to be merged into writes.  If it's not, only write
-		 * back what the user gives us.
-		 */
-		if (!test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags) &&
-		    (to < f || from > t))
-			goto flush_conflicting_write;
-	}
-
-	*_page = folio_file_page(folio, pos / PAGE_SIZE);
-	_leave(" = 0");
-	return 0;
-
-	/* The previous write and this write aren't adjacent or overlapping, so
-	 * flush the page out.
-	 */
-flush_conflicting_write:
-	trace_afs_folio_dirty(vnode, tracepoint_string("confl"), folio);
-	folio_unlock(folio);
-
-	ret = afs_flush_conflicting_write(mapping, folio);
-	if (ret < 0)
-		goto error;
-
-wait_for_writeback:
-	ret = folio_wait_writeback_killable(folio);
-	if (ret < 0)
-		goto error;
-
-	ret = folio_lock_killable(folio);
-	if (ret < 0)
-		goto error;
-	goto try_again;
-
-error:
-	folio_put(folio);
-	_leave(" = %d", ret);
-	return ret;
-}
-
-/*
- * finalise part of a write to a page
- */
-int afs_write_end(struct file *file, struct address_space *mapping,
-		  loff_t pos, unsigned len, unsigned copied,
-		  struct page *subpage, void *fsdata)
-{
-	struct folio *folio = page_folio(subpage);
-	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
-	unsigned long priv;
-	unsigned int f, from = offset_in_folio(folio, pos);
-	unsigned int t, to = from + copied;
-	loff_t i_size, write_end_pos;
-
-	_enter("{%llx:%llu},{%lx}",
-	       vnode->fid.vid, vnode->fid.vnode, folio_index(folio));
-
-	if (!folio_test_uptodate(folio)) {
-		if (copied < len) {
-			copied = 0;
-			goto out;
-		}
-
-		folio_mark_uptodate(folio);
-	}
-
-	if (copied == 0)
-		goto out;
-
-	write_end_pos = pos + copied;
-
-	i_size = i_size_read(&vnode->netfs.inode);
-	if (write_end_pos > i_size) {
-		write_seqlock(&vnode->cb_lock);
-		i_size = i_size_read(&vnode->netfs.inode);
-		if (write_end_pos > i_size)
-			afs_set_i_size(vnode, write_end_pos);
-		write_sequnlock(&vnode->cb_lock);
-		fscache_update_cookie(afs_vnode_cache(vnode), NULL, &write_end_pos);
-	}
-
-	if (folio_test_private(folio)) {
-		priv = (unsigned long)folio_get_private(folio);
-		f = afs_folio_dirty_from(folio, priv);
-		t = afs_folio_dirty_to(folio, priv);
-		if (from < f)
-			f = from;
-		if (to > t)
-			t = to;
-		priv = afs_folio_dirty(folio, f, t);
-		folio_change_private(folio, (void *)priv);
-		trace_afs_folio_dirty(vnode, tracepoint_string("dirty+"), folio);
-	} else {
-		priv = afs_folio_dirty(folio, from, to);
-		folio_attach_private(folio, (void *)priv);
-		trace_afs_folio_dirty(vnode, tracepoint_string("dirty"), folio);
-	}
-
-	if (folio_mark_dirty(folio))
-		_debug("dirtied %lx", folio_index(folio));
-
-out:
-	folio_unlock(folio);
-	folio_put(folio);
-	return copied;
-}
-
-/*
- * kill all the pages in the given range
- */
-static void afs_kill_pages(struct address_space *mapping,
-			   loff_t start, loff_t len)
-{
-	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
-	struct folio *folio;
-	pgoff_t index = start / PAGE_SIZE;
-	pgoff_t last = (start + len - 1) / PAGE_SIZE, next;
-
-	_enter("{%llx:%llu},%llx @%llx",
-	       vnode->fid.vid, vnode->fid.vnode, len, start);
-
-	do {
-		_debug("kill %lx (to %lx)", index, last);
-
-		folio = filemap_get_folio(mapping, index);
-		if (IS_ERR(folio)) {
-			next = index + 1;
-			continue;
-		}
-
-		next = folio_next_index(folio);
-
-		folio_clear_uptodate(folio);
-		folio_end_writeback(folio);
-		folio_lock(folio);
-		generic_error_remove_folio(mapping, folio);
-		folio_unlock(folio);
-		folio_put(folio);
-
-	} while (index = next, index <= last);
-
-	_leave("");
-}
-
-/*
- * Redirty all the pages in a given range.
- */
-static void afs_redirty_pages(struct writeback_control *wbc,
-			      struct address_space *mapping,
-			      loff_t start, loff_t len)
-{
-	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
-	struct folio *folio;
-	pgoff_t index = start / PAGE_SIZE;
-	pgoff_t last = (start + len - 1) / PAGE_SIZE, next;
-
-	_enter("{%llx:%llu},%llx @%llx",
-	       vnode->fid.vid, vnode->fid.vnode, len, start);
-
-	do {
-		_debug("redirty %llx @%llx", len, start);
-
-		folio = filemap_get_folio(mapping, index);
-		if (IS_ERR(folio)) {
-			next = index + 1;
-			continue;
-		}
-
-		next = index + folio_nr_pages(folio);
-		folio_redirty_for_writepage(wbc, folio);
-		folio_end_writeback(folio);
-		folio_put(folio);
-	} while (index = next, index <= last);
-
-	_leave("");
-}
-
 /*
  * completion of write to server
  */
 static void afs_pages_written_back(struct afs_vnode *vnode, loff_t start, unsigned int len)
 {
-	struct address_space *mapping = vnode->netfs.inode.i_mapping;
-	struct folio *folio;
-	pgoff_t end;
-
-	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
-
 	_enter("{%llx:%llu},{%x @%llx}",
 	       vnode->fid.vid, vnode->fid.vnode, len, start);
 
-	rcu_read_lock();
-
-	end = (start + len - 1) / PAGE_SIZE;
-	xas_for_each(&xas, folio, end) {
-		if (!folio_test_writeback(folio)) {
-			kdebug("bad %x @%llx page %lx %lx",
-			       len, start, folio_index(folio), end);
-			ASSERT(folio_test_writeback(folio));
-		}
-
-		trace_afs_folio_dirty(vnode, tracepoint_string("clear"), folio);
-		folio_detach_private(folio);
-		folio_end_writeback(folio);
-	}
-
-	rcu_read_unlock();
-
 	afs_prune_wb_keys(vnode);
 	_leave("");
 }
@@ -451,363 +159,53 @@ try_next_key:
 	return afs_put_operation(op);
 }
 
-/*
- * Extend the region to be written back to include subsequent contiguously
- * dirty pages if possible, but don't sleep while doing so.
- *
- * If this page holds new content, then we can include filler zeros in the
- * writeback.
- */
-static void afs_extend_writeback(struct address_space *mapping,
-				 struct afs_vnode *vnode,
-				 long *_count,
-				 loff_t start,
-				 loff_t max_len,
-				 bool new_content,
-				 bool caching,
-				 unsigned int *_len)
+static void afs_upload_to_server(struct netfs_io_subrequest *subreq)
 {
-	struct folio_batch fbatch;
-	struct folio *folio;
-	unsigned long priv;
-	unsigned int psize, filler = 0;
-	unsigned int f, t;
-	loff_t len = *_len;
-	pgoff_t index = (start + len) / PAGE_SIZE;
-	bool stop = true;
-	unsigned int i;
-
-	XA_STATE(xas, &mapping->i_pages, index);
-	folio_batch_init(&fbatch);
-
-	do {
-		/* Firstly, we gather up a batch of contiguous dirty pages
-		 * under the RCU read lock - but we can't clear the dirty flags
-		 * there if any of those pages are mapped.
-		 */
-		rcu_read_lock();
-
-		xas_for_each(&xas, folio, ULONG_MAX) {
-			stop = true;
-			if (xas_retry(&xas, folio))
-				continue;
-			if (xa_is_value(folio))
-				break;
-			if (folio_index(folio) != index)
-				break;
-
-			if (!folio_try_get_rcu(folio)) {
-				xas_reset(&xas);
-				continue;
-			}
-
-			/* Has the page moved or been split? */
-			if (unlikely(folio != xas_reload(&xas))) {
-				folio_put(folio);
-				break;
-			}
-
-			if (!folio_trylock(folio)) {
-				folio_put(folio);
-				break;
-			}
-			if (!folio_test_dirty(folio) ||
-			    folio_test_writeback(folio) ||
-			    folio_test_fscache(folio)) {
-				folio_unlock(folio);
-				folio_put(folio);
-				break;
-			}
-
-			psize = folio_size(folio);
-			priv = (unsigned long)folio_get_private(folio);
-			f = afs_folio_dirty_from(folio, priv);
-			t = afs_folio_dirty_to(folio, priv);
-			if (f != 0 && !new_content) {
-				folio_unlock(folio);
-				folio_put(folio);
-				break;
-			}
-
-			len += filler + t;
-			filler = psize - t;
-			if (len >= max_len || *_count <= 0)
-				stop = true;
-			else if (t == psize || new_content)
-				stop = false;
-
-			index += folio_nr_pages(folio);
-			if (!folio_batch_add(&fbatch, folio))
-				break;
-			if (stop)
-				break;
-		}
-
-		if (!stop)
-			xas_pause(&xas);
-		rcu_read_unlock();
-
-		/* Now, if we obtained any folios, we can shift them to being
-		 * writable and mark them for caching.
-		 */
-		if (!folio_batch_count(&fbatch))
-			break;
-
-		for (i = 0; i < folio_batch_count(&fbatch); i++) {
-			folio = fbatch.folios[i];
-			trace_afs_folio_dirty(vnode, tracepoint_string("store+"), folio);
-
-			if (!folio_clear_dirty_for_io(folio))
-				BUG();
-			folio_start_writeback(folio);
-			afs_folio_start_fscache(caching, folio);
-
-			*_count -= folio_nr_pages(folio);
-			folio_unlock(folio);
-		}
+	struct afs_vnode *vnode = AFS_FS_I(subreq->rreq->inode);
+	ssize_t ret;
 
-		folio_batch_release(&fbatch);
-		cond_resched();
-	} while (!stop);
+	_enter("%x[%x],%zx",
+	       subreq->rreq->debug_id, subreq->debug_index, subreq->io_iter.count);
 
-	*_len = len;
+	trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
+	ret = afs_store_data(vnode, &subreq->io_iter, subreq->start,
+			     subreq->rreq->origin == NETFS_LAUNDER_WRITE);
+	netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len,
+					  false);
 }
 
-/*
- * Synchronously write back the locked page and any subsequent non-locked dirty
- * pages.
- */
-static ssize_t afs_write_back_from_locked_folio(struct address_space *mapping,
-						struct writeback_control *wbc,
-						struct folio *folio,
-						loff_t start, loff_t end)
+static void afs_upload_to_server_worker(struct work_struct *work)
 {
-	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
-	struct iov_iter iter;
-	unsigned long priv;
-	unsigned int offset, to, len, max_len;
-	loff_t i_size = i_size_read(&vnode->netfs.inode);
-	bool new_content = test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
-	bool caching = fscache_cookie_enabled(afs_vnode_cache(vnode));
-	long count = wbc->nr_to_write;
-	int ret;
-
-	_enter(",%lx,%llx-%llx", folio_index(folio), start, end);
-
-	folio_start_writeback(folio);
-	afs_folio_start_fscache(caching, folio);
-
-	count -= folio_nr_pages(folio);
-
-	/* Find all consecutive lockable dirty pages that have contiguous
-	 * written regions, stopping when we find a page that is not
-	 * immediately lockable, is not dirty or is missing, or we reach the
-	 * end of the range.
-	 */
-	priv = (unsigned long)folio_get_private(folio);
-	offset = afs_folio_dirty_from(folio, priv);
-	to = afs_folio_dirty_to(folio, priv);
-	trace_afs_folio_dirty(vnode, tracepoint_string("store"), folio);
-
-	len = to - offset;
-	start += offset;
-	if (start < i_size) {
-		/* Trim the write to the EOF; the extra data is ignored.  Also
-		 * put an upper limit on the size of a single storedata op.
-		 */
-		max_len = 65536 * 4096;
-		max_len = min_t(unsigned long long, max_len, end - start + 1);
-		max_len = min_t(unsigned long long, max_len, i_size - start);
-
-		if (len < max_len &&
-		    (to == folio_size(folio) || new_content))
-			afs_extend_writeback(mapping, vnode, &count,
-					     start, max_len, new_content,
-					     caching, &len);
-		len = min_t(loff_t, len, max_len);
-	}
-
-	/* We now have a contiguous set of dirty pages, each with writeback
-	 * set; the first page is still locked at this point, but all the rest
-	 * have been unlocked.
-	 */
-	folio_unlock(folio);
-
-	if (start < i_size) {
-		_debug("write back %x @%llx [%llx]", len, start, i_size);
-
-		/* Speculatively write to the cache.  We have to fix this up
-		 * later if the store fails.
-		 */
-		afs_write_to_cache(vnode, start, len, i_size, caching);
-
-		iov_iter_xarray(&iter, ITER_SOURCE, &mapping->i_pages, start, len);
-		ret = afs_store_data(vnode, &iter, start, false);
-	} else {
-		_debug("write discard %x @%llx [%llx]", len, start, i_size);
-
-		/* The dirty region was entirely beyond the EOF. */
-		fscache_clear_page_bits(mapping, start, len, caching);
-		afs_pages_written_back(vnode, start, len);
-		ret = 0;
-	}
-
-	switch (ret) {
-	case 0:
-		wbc->nr_to_write = count;
-		ret = len;
-		break;
+	struct netfs_io_subrequest *subreq =
+		container_of(work, struct netfs_io_subrequest, work);
 
-	default:
-		pr_notice("kAFS: Unexpected error from FS.StoreData %d\n", ret);
-		fallthrough;
-	case -EACCES:
-	case -EPERM:
-	case -ENOKEY:
-	case -EKEYEXPIRED:
-	case -EKEYREJECTED:
-	case -EKEYREVOKED:
-	case -ENETRESET:
-		afs_redirty_pages(wbc, mapping, start, len);
-		mapping_set_error(mapping, ret);
-		break;
-
-	case -EDQUOT:
-	case -ENOSPC:
-		afs_redirty_pages(wbc, mapping, start, len);
-		mapping_set_error(mapping, -ENOSPC);
-		break;
-
-	case -EROFS:
-	case -EIO:
-	case -EREMOTEIO:
-	case -EFBIG:
-	case -ENOENT:
-	case -ENOMEDIUM:
-	case -ENXIO:
-		trace_afs_file_error(vnode, ret, afs_file_error_writeback_fail);
-		afs_kill_pages(mapping, start, len);
-		mapping_set_error(mapping, ret);
-		break;
-	}
-
-	_leave(" = %d", ret);
-	return ret;
+	afs_upload_to_server(subreq);
 }
 
 /*
- * write a region of pages back to the server
+ * Set up write requests for a writeback slice.  We need to add a write request
+ * for each write we want to make.
  */
-static int afs_writepages_region(struct address_space *mapping,
-				 struct writeback_control *wbc,
-				 loff_t start, loff_t end, loff_t *_next,
-				 bool max_one_loop)
+void afs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len)
 {
-	struct folio *folio;
-	struct folio_batch fbatch;
-	ssize_t ret;
-	unsigned int i;
-	int n, skips = 0;
-
-	_enter("%llx,%llx,", start, end);
-	folio_batch_init(&fbatch);
-
-	do {
-		pgoff_t index = start / PAGE_SIZE;
-
-		n = filemap_get_folios_tag(mapping, &index, end / PAGE_SIZE,
-					PAGECACHE_TAG_DIRTY, &fbatch);
-
-		if (!n)
-			break;
-		for (i = 0; i < n; i++) {
-			folio = fbatch.folios[i];
-			start = folio_pos(folio); /* May regress with THPs */
-
-			_debug("wback %lx", folio_index(folio));
-
-			/* At this point we hold neither the i_pages lock nor the
-			 * page lock: the page may be truncated or invalidated
-			 * (changing page->mapping to NULL), or even swizzled
-			 * back from swapper_space to tmpfs file mapping
-			 */
-try_again:
-			if (wbc->sync_mode != WB_SYNC_NONE) {
-				ret = folio_lock_killable(folio);
-				if (ret < 0) {
-					folio_batch_release(&fbatch);
-					return ret;
-				}
-			} else {
-				if (!folio_trylock(folio))
-					continue;
-			}
-
-			if (folio->mapping != mapping ||
-			    !folio_test_dirty(folio)) {
-				start += folio_size(folio);
-				folio_unlock(folio);
-				continue;
-			}
-
-			if (folio_test_writeback(folio) ||
-			    folio_test_fscache(folio)) {
-				folio_unlock(folio);
-				if (wbc->sync_mode != WB_SYNC_NONE) {
-					folio_wait_writeback(folio);
-#ifdef CONFIG_AFS_FSCACHE
-					folio_wait_fscache(folio);
-#endif
-					goto try_again;
-				}
-
-				start += folio_size(folio);
-				if (wbc->sync_mode == WB_SYNC_NONE) {
-					if (skips >= 5 || need_resched()) {
-						*_next = start;
-						folio_batch_release(&fbatch);
-						_leave(" = 0 [%llx]", *_next);
-						return 0;
-					}
-					skips++;
-				}
-				continue;
-			}
-
-			if (!folio_clear_dirty_for_io(folio))
-				BUG();
-			ret = afs_write_back_from_locked_folio(mapping, wbc,
-					folio, start, end);
-			if (ret < 0) {
-				_leave(" = %zd", ret);
-				folio_batch_release(&fbatch);
-				return ret;
-			}
-
-			start += ret;
-		}
+	struct netfs_io_subrequest *subreq;
 
-		folio_batch_release(&fbatch);
-		cond_resched();
-	} while (wbc->nr_to_write > 0);
+	_enter("%x,%llx-%llx", wreq->debug_id, start, start + len);
 
-	*_next = start;
-	_leave(" = 0 [%llx]", *_next);
-	return 0;
+	subreq = netfs_create_write_request(wreq, NETFS_UPLOAD_TO_SERVER,
+					    start, len, afs_upload_to_server_worker);
+	if (subreq)
+		netfs_queue_write_request(subreq);
 }
 
 /*
  * write some of the pending data back to the server
  */
-int afs_writepages(struct address_space *mapping,
-		   struct writeback_control *wbc)
+int afs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
 	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
-	loff_t start, next;
 	int ret;
 
-	_enter("");
-
 	/* We have to be careful as we can end up racing with setattr()
 	 * truncating the pagecache since the caller doesn't take a lock here
 	 * to prevent it.
@@ -817,69 +215,12 @@ int afs_writepages(struct address_space *mapping,
 	else if (!down_read_trylock(&vnode->validate_lock))
 		return 0;
 
-	if (wbc->range_cyclic) {
-		start = mapping->writeback_index * PAGE_SIZE;
-		ret = afs_writepages_region(mapping, wbc, start, LLONG_MAX,
-					    &next, false);
-		if (ret == 0) {
-			mapping->writeback_index = next / PAGE_SIZE;
-			if (start > 0 && wbc->nr_to_write > 0) {
-				ret = afs_writepages_region(mapping, wbc, 0,
-							    start, &next, false);
-				if (ret == 0)
-					mapping->writeback_index =
-						next / PAGE_SIZE;
-			}
-		}
-	} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
-		ret = afs_writepages_region(mapping, wbc, 0, LLONG_MAX,
-					    &next, false);
-		if (wbc->nr_to_write > 0 && ret == 0)
-			mapping->writeback_index = next / PAGE_SIZE;
-	} else {
-		ret = afs_writepages_region(mapping, wbc,
-					    wbc->range_start, wbc->range_end,
-					    &next, false);
-	}
-
+	ret = netfs_writepages(mapping, wbc);
 	up_read(&vnode->validate_lock);
-	_leave(" = %d", ret);
 	return ret;
 }
 
 /*
- * write to an AFS file
- */
-ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from)
-{
-	struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp));
-	struct afs_file *af = iocb->ki_filp->private_data;
-	ssize_t result;
-	size_t count = iov_iter_count(from);
-
-	_enter("{%llx:%llu},{%zu},",
-	       vnode->fid.vid, vnode->fid.vnode, count);
-
-	if (IS_SWAPFILE(&vnode->netfs.inode)) {
-		printk(KERN_INFO
-		       "AFS: Attempt to write to active swap file!\n");
-		return -EBUSY;
-	}
-
-	if (!count)
-		return 0;
-
-	result = afs_validate(vnode, af->key);
-	if (result < 0)
-		return result;
-
-	result = generic_file_write_iter(iocb, from);
-
-	_leave(" = %zd", result);
-	return result;
-}
-
-/*
  * flush any dirty pages for this process, and check for write errors.
  * - the return status from this call provides a reliable indication of
  *   whether any write errors occurred for this process.
@@ -907,59 +248,11 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
  */
 vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
 {
-	struct folio *folio = page_folio(vmf->page);
 	struct file *file = vmf->vma->vm_file;
-	struct inode *inode = file_inode(file);
-	struct afs_vnode *vnode = AFS_FS_I(inode);
-	struct afs_file *af = file->private_data;
-	unsigned long priv;
-	vm_fault_t ret = VM_FAULT_RETRY;
-
-	_enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, folio_index(folio));
-
-	afs_validate(vnode, af->key);
 
-	sb_start_pagefault(inode->i_sb);
-
-	/* Wait for the page to be written to the cache before we allow it to
-	 * be modified.  We then assume the entire page will need writing back.
-	 */
-#ifdef CONFIG_AFS_FSCACHE
-	if (folio_test_fscache(folio) &&
-	    folio_wait_fscache_killable(folio) < 0)
-		goto out;
-#endif
-
-	if (folio_wait_writeback_killable(folio))
-		goto out;
-
-	if (folio_lock_killable(folio) < 0)
-		goto out;
-
-	/* We mustn't change folio->private until writeback is complete as that
-	 * details the portion of the page we need to write back and we might
-	 * need to redirty the page if there's a problem.
-	 */
-	if (folio_wait_writeback_killable(folio) < 0) {
-		folio_unlock(folio);
-		goto out;
-	}
-
-	priv = afs_folio_dirty(folio, 0, folio_size(folio));
-	priv = afs_folio_dirty_mmapped(priv);
-	if (folio_test_private(folio)) {
-		folio_change_private(folio, (void *)priv);
-		trace_afs_folio_dirty(vnode, tracepoint_string("mkwrite+"), folio);
-	} else {
-		folio_attach_private(folio, (void *)priv);
-		trace_afs_folio_dirty(vnode, tracepoint_string("mkwrite"), folio);
-	}
-	file_update_time(file);
-
-	ret = VM_FAULT_LOCKED;
-out:
-	sb_end_pagefault(inode->i_sb);
-	return ret;
+	if (afs_validate(AFS_FS_I(file_inode(file)), afs_file_key(file)) < 0)
+		return VM_FAULT_SIGBUS;
+	return netfs_page_mkwrite(vmf, NULL);
 }
 
 /*
@@ -989,64 +282,3 @@ void afs_prune_wb_keys(struct afs_vnode *vnode)
 		afs_put_wb_key(wbk);
 	}
 }
-
-/*
- * Clean up a page during invalidation.
- */
-int afs_launder_folio(struct folio *folio)
-{
-	struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
-	struct iov_iter iter;
-	struct bio_vec bv;
-	unsigned long priv;
-	unsigned int f, t;
-	int ret = 0;
-
-	_enter("{%lx}", folio->index);
-
-	priv = (unsigned long)folio_get_private(folio);
-	if (folio_clear_dirty_for_io(folio)) {
-		f = 0;
-		t = folio_size(folio);
-		if (folio_test_private(folio)) {
-			f = afs_folio_dirty_from(folio, priv);
-			t = afs_folio_dirty_to(folio, priv);
-		}
-
-		bvec_set_folio(&bv, folio, t - f, f);
-		iov_iter_bvec(&iter, ITER_SOURCE, &bv, 1, bv.bv_len);
-
-		trace_afs_folio_dirty(vnode, tracepoint_string("launder"), folio);
-		ret = afs_store_data(vnode, &iter, folio_pos(folio) + f, true);
-	}
-
-	trace_afs_folio_dirty(vnode, tracepoint_string("laundered"), folio);
-	folio_detach_private(folio);
-	folio_wait_fscache(folio);
-	return ret;
-}
-
-/*
- * Deal with the completion of writing the data to the cache.
- */
-static void afs_write_to_cache_done(void *priv, ssize_t transferred_or_error,
-				    bool was_async)
-{
-	struct afs_vnode *vnode = priv;
-
-	if (IS_ERR_VALUE(transferred_or_error) &&
-	    transferred_or_error != -ENOBUFS)
-		afs_invalidate_cache(vnode, 0);
-}
-
-/*
- * Save the write to the cache also.
- */
-static void afs_write_to_cache(struct afs_vnode *vnode,
-			       loff_t start, size_t len, loff_t i_size,
-			       bool caching)
-{
-	fscache_write_to_cache(afs_vnode_cache(vnode),
-			       vnode->netfs.inode.i_mapping, start, len, i_size,
-			       afs_write_to_cache_done, vnode, caching);
-}
diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile
index 7423a3557c68..1a05cecda7cc 100644
--- a/fs/bcachefs/Makefile
+++ b/fs/bcachefs/Makefile
@@ -27,7 +27,6 @@ bcachefs-y		:=	\
 	checksum.o		\
 	clock.o			\
 	compress.o		\
-	counters.o		\
 	darray.o		\
 	debug.o			\
 	dirent.o		\
@@ -71,6 +70,7 @@ bcachefs-y		:=	\
 	reflink.o		\
 	replicas.o		\
 	sb-clean.o		\
+	sb-counters.o		\
 	sb-downgrade.o		\
 	sb-errors.o		\
 	sb-members.o		\
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index a09b9d00226a..10704f2d3af5 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -273,7 +273,7 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
 		bkey_fsck_err_on(!bch2_bucket_sectors_dirty(*a.v),
 				 c, err, alloc_key_dirty_sectors_0,
 				 "data_type %s but dirty_sectors==0",
-				 bch2_data_types[a.v->data_type]);
+				 bch2_data_type_str(a.v->data_type));
 		break;
 	case BCH_DATA_cached:
 		bkey_fsck_err_on(!a.v->cached_sectors ||
@@ -321,16 +321,12 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
 {
 	struct bch_alloc_v4 _a;
 	const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a);
-	unsigned i;
 
 	prt_newline(out);
 	printbuf_indent_add(out, 2);
 
-	prt_printf(out, "gen %u oldest_gen %u data_type %s",
-	       a->gen, a->oldest_gen,
-	       a->data_type < BCH_DATA_NR
-	       ? bch2_data_types[a->data_type]
-	       : "(invalid data type)");
+	prt_printf(out, "gen %u oldest_gen %u data_type ", a->gen, a->oldest_gen);
+	bch2_prt_data_type(out, a->data_type);
 	prt_newline(out);
 	prt_printf(out, "journal_seq       %llu",	a->journal_seq);
 	prt_newline(out);
@@ -353,23 +349,6 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
 	prt_printf(out, "fragmentation     %llu",	a->fragmentation_lru);
 	prt_newline(out);
 	prt_printf(out, "bp_start          %llu", BCH_ALLOC_V4_BACKPOINTERS_START(a));
-	prt_newline(out);
-
-	if (BCH_ALLOC_V4_NR_BACKPOINTERS(a)) {
-		struct bkey_s_c_alloc_v4 a_raw = bkey_s_c_to_alloc_v4(k);
-		const struct bch_backpointer *bps = alloc_v4_backpointers_c(a_raw.v);
-
-		prt_printf(out, "backpointers:     %llu", BCH_ALLOC_V4_NR_BACKPOINTERS(a_raw.v));
-		printbuf_indent_add(out, 2);
-
-		for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a_raw.v); i++) {
-			prt_newline(out);
-			bch2_backpointer_to_text(out, &bps[i]);
-		}
-
-		printbuf_indent_sub(out, 2);
-	}
-
 	printbuf_indent_sub(out, 2);
 }
 
@@ -839,7 +818,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
 		}
 	}
 
-	if (!(flags & BTREE_TRIGGER_TRANSACTIONAL) && (flags & BTREE_TRIGGER_INSERT)) {
+	if ((flags & BTREE_TRIGGER_ATOMIC) && (flags & BTREE_TRIGGER_INSERT)) {
 		struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v;
 		u64 journal_seq = trans->journal_res.seq;
 		u64 bucket_journal_seq = new_a->journal_seq;
@@ -1625,13 +1604,36 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
 	return ret;
 }
 
+struct discard_buckets_state {
+	u64		seen;
+	u64		open;
+	u64		need_journal_commit;
+	u64		discarded;
+	struct bch_dev	*ca;
+	u64		need_journal_commit_this_dev;
+};
+
+static void discard_buckets_next_dev(struct bch_fs *c, struct discard_buckets_state *s, struct bch_dev *ca)
+{
+	if (s->ca == ca)
+		return;
+
+	if (s->ca && s->need_journal_commit_this_dev >
+	    bch2_dev_usage_read(s->ca).d[BCH_DATA_free].buckets)
+		bch2_journal_flush_async(&c->journal, NULL);
+
+	if (s->ca)
+		percpu_ref_put(&s->ca->ref);
+	if (ca)
+		percpu_ref_get(&ca->ref);
+	s->ca = ca;
+	s->need_journal_commit_this_dev = 0;
+}
+
 static int bch2_discard_one_bucket(struct btree_trans *trans,
 				   struct btree_iter *need_discard_iter,
 				   struct bpos *discard_pos_done,
-				   u64 *seen,
-				   u64 *open,
-				   u64 *need_journal_commit,
-				   u64 *discarded)
+				   struct discard_buckets_state *s)
 {
 	struct bch_fs *c = trans->c;
 	struct bpos pos = need_discard_iter->pos;
@@ -1643,20 +1645,24 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
 	int ret = 0;
 
 	ca = bch_dev_bkey_exists(c, pos.inode);
+
 	if (!percpu_ref_tryget(&ca->io_ref)) {
 		bch2_btree_iter_set_pos(need_discard_iter, POS(pos.inode + 1, 0));
 		return 0;
 	}
 
+	discard_buckets_next_dev(c, s, ca);
+
 	if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) {
-		(*open)++;
+		s->open++;
 		goto out;
 	}
 
 	if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
 			c->journal.flushed_seq_ondisk,
 			pos.inode, pos.offset)) {
-		(*need_journal_commit)++;
+		s->need_journal_commit++;
+		s->need_journal_commit_this_dev++;
 		goto out;
 	}
 
@@ -1732,9 +1738,9 @@ write:
 		goto out;
 
 	count_event(c, bucket_discard);
-	(*discarded)++;
+	s->discarded++;
 out:
-	(*seen)++;
+	s->seen++;
 	bch2_trans_iter_exit(trans, &iter);
 	percpu_ref_put(&ca->io_ref);
 	printbuf_exit(&buf);
@@ -1744,7 +1750,7 @@ out:
 static void bch2_do_discards_work(struct work_struct *work)
 {
 	struct bch_fs *c = container_of(work, struct bch_fs, discard_work);
-	u64 seen = 0, open = 0, need_journal_commit = 0, discarded = 0;
+	struct discard_buckets_state s = {};
 	struct bpos discard_pos_done = POS_MAX;
 	int ret;
 
@@ -1756,19 +1762,14 @@ static void bch2_do_discards_work(struct work_struct *work)
 	ret = bch2_trans_run(c,
 		for_each_btree_key(trans, iter,
 				   BTREE_ID_need_discard, POS_MIN, 0, k,
-			bch2_discard_one_bucket(trans, &iter, &discard_pos_done,
-						&seen,
-						&open,
-						&need_journal_commit,
-						&discarded)));
-
-	if (need_journal_commit * 2 > seen)
-		bch2_journal_flush_async(&c->journal, NULL);
+			bch2_discard_one_bucket(trans, &iter, &discard_pos_done, &s)));
 
-	bch2_write_ref_put(c, BCH_WRITE_REF_discard);
+	discard_buckets_next_dev(c, &s, NULL);
 
-	trace_discard_buckets(c, seen, open, need_journal_commit, discarded,
+	trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
 			      bch2_err_str(ret));
+
+	bch2_write_ref_put(c, BCH_WRITE_REF_discard);
 }
 
 void bch2_do_discards(struct bch_fs *c)
diff --git a/fs/bcachefs/alloc_background_format.h b/fs/bcachefs/alloc_background_format.h
new file mode 100644
index 000000000000..b4ec20be93b8
--- /dev/null
+++ b/fs/bcachefs/alloc_background_format.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_ALLOC_BACKGROUND_FORMAT_H
+#define _BCACHEFS_ALLOC_BACKGROUND_FORMAT_H
+
+struct bch_alloc {
+	struct bch_val		v;
+	__u8			fields;
+	__u8			gen;
+	__u8			data[];
+} __packed __aligned(8);
+
+#define BCH_ALLOC_FIELDS_V1()			\
+	x(read_time,		16)		\
+	x(write_time,		16)		\
+	x(data_type,		8)		\
+	x(dirty_sectors,	16)		\
+	x(cached_sectors,	16)		\
+	x(oldest_gen,		8)		\
+	x(stripe,		32)		\
+	x(stripe_redundancy,	8)
+
+enum {
+#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name,
+	BCH_ALLOC_FIELDS_V1()
+#undef x
+};
+
+struct bch_alloc_v2 {
+	struct bch_val		v;
+	__u8			nr_fields;
+	__u8			gen;
+	__u8			oldest_gen;
+	__u8			data_type;
+	__u8			data[];
+} __packed __aligned(8);
+
+#define BCH_ALLOC_FIELDS_V2()			\
+	x(read_time,		64)		\
+	x(write_time,		64)		\
+	x(dirty_sectors,	32)		\
+	x(cached_sectors,	32)		\
+	x(stripe,		32)		\
+	x(stripe_redundancy,	8)
+
+struct bch_alloc_v3 {
+	struct bch_val		v;
+	__le64			journal_seq;
+	__le32			flags;
+	__u8			nr_fields;
+	__u8			gen;
+	__u8			oldest_gen;
+	__u8			data_type;
+	__u8			data[];
+} __packed __aligned(8);
+
+LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags,  0,  1)
+LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags,  1,  2)
+
+struct bch_alloc_v4 {
+	struct bch_val		v;
+	__u64			journal_seq;
+	__u32			flags;
+	__u8			gen;
+	__u8			oldest_gen;
+	__u8			data_type;
+	__u8			stripe_redundancy;
+	__u32			dirty_sectors;
+	__u32			cached_sectors;
+	__u64			io_time[2];
+	__u32			stripe;
+	__u32			nr_external_backpointers;
+	__u64			fragmentation_lru;
+} __packed __aligned(8);
+
+#define BCH_ALLOC_V4_U64s_V0	6
+#define BCH_ALLOC_V4_U64s	(sizeof(struct bch_alloc_v4) / sizeof(__u64))
+
+BITMASK(BCH_ALLOC_V4_NEED_DISCARD,	struct bch_alloc_v4, flags,  0,  1)
+BITMASK(BCH_ALLOC_V4_NEED_INC_GEN,	struct bch_alloc_v4, flags,  1,  2)
+BITMASK(BCH_ALLOC_V4_BACKPOINTERS_START,struct bch_alloc_v4, flags,  2,  8)
+BITMASK(BCH_ALLOC_V4_NR_BACKPOINTERS,	struct bch_alloc_v4, flags,  8,  14)
+
+#define KEY_TYPE_BUCKET_GENS_BITS	8
+#define KEY_TYPE_BUCKET_GENS_NR		(1U << KEY_TYPE_BUCKET_GENS_BITS)
+#define KEY_TYPE_BUCKET_GENS_MASK	(KEY_TYPE_BUCKET_GENS_NR - 1)
+
+struct bch_bucket_gens {
+	struct bch_val		v;
+	u8			gens[KEY_TYPE_BUCKET_GENS_NR];
+} __packed __aligned(8);
+
+#endif /* _BCACHEFS_ALLOC_BACKGROUND_FORMAT_H */
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index b0ff47998a94..633d3223b353 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -1525,10 +1525,11 @@ static void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, str
 	unsigned data_type = ob->data_type;
 	barrier(); /* READ_ONCE() doesn't work on bitfields */
 
-	prt_printf(out, "%zu ref %u %s %u:%llu gen %u allocated %u/%u",
+	prt_printf(out, "%zu ref %u ",
 		   ob - c->open_buckets,
-		   atomic_read(&ob->pin),
-		   data_type < BCH_DATA_NR ? bch2_data_types[data_type] : "invalid data type",
+		   atomic_read(&ob->pin));
+	bch2_prt_data_type(out, data_type);
+	prt_printf(out, " %u:%llu gen %u allocated %u/%u",
 		   ob->dev, ob->bucket, ob->gen,
 		   ca->mi.bucket_size - ob->sectors_free, ca->mi.bucket_size);
 	if (ob->ec)
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index e358a2ffffde..b4dc319bcb2b 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -400,13 +400,24 @@ int bch2_check_btree_backpointers(struct bch_fs *c)
 	return ret;
 }
 
+static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
+{
+	return bpos_eq(l.k->p, r.k->p) &&
+		bkey_bytes(l.k) == bkey_bytes(r.k) &&
+		!memcmp(l.v, r.v, bkey_val_bytes(l.k));
+}
+
+struct extents_to_bp_state {
+	struct bpos	bucket_start;
+	struct bpos	bucket_end;
+	struct bkey_buf last_flushed;
+};
+
 static int check_bp_exists(struct btree_trans *trans,
+			   struct extents_to_bp_state *s,
 			   struct bpos bucket,
 			   struct bch_backpointer bp,
-			   struct bkey_s_c orig_k,
-			   struct bpos bucket_start,
-			   struct bpos bucket_end,
-			   struct bkey_buf *last_flushed)
+			   struct bkey_s_c orig_k)
 {
 	struct bch_fs *c = trans->c;
 	struct btree_iter bp_iter = { NULL };
@@ -417,8 +428,8 @@ static int check_bp_exists(struct btree_trans *trans,
 
 	bch2_bkey_buf_init(&tmp);
 
-	if (bpos_lt(bucket, bucket_start) ||
-	    bpos_gt(bucket, bucket_end))
+	if (bpos_lt(bucket, s->bucket_start) ||
+	    bpos_gt(bucket, s->bucket_end))
 		return 0;
 
 	if (!bch2_dev_bucket_exists(c, bucket))
@@ -433,11 +444,9 @@ static int check_bp_exists(struct btree_trans *trans,
 
 	if (bp_k.k->type != KEY_TYPE_backpointer ||
 	    memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
-		if (!bpos_eq(orig_k.k->p, last_flushed->k->k.p) ||
-		    bkey_bytes(orig_k.k) != bkey_bytes(&last_flushed->k->k) ||
-		    memcmp(orig_k.v, &last_flushed->k->v, bkey_val_bytes(orig_k.k))) {
-			bch2_bkey_buf_reassemble(&tmp, c, orig_k);
+		bch2_bkey_buf_reassemble(&tmp, c, orig_k);
 
+		if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) {
 			if (bp.level) {
 				bch2_trans_unlock(trans);
 				bch2_btree_interior_updates_flush(c);
@@ -447,7 +456,7 @@ static int check_bp_exists(struct btree_trans *trans,
 			if (ret)
 				goto err;
 
-			bch2_bkey_buf_copy(last_flushed, c, tmp.k);
+			bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k);
 			ret = -BCH_ERR_transaction_restart_write_buffer_flush;
 			goto out;
 		}
@@ -475,10 +484,8 @@ missing:
 }
 
 static int check_extent_to_backpointers(struct btree_trans *trans,
+					struct extents_to_bp_state *s,
 					enum btree_id btree, unsigned level,
-					struct bpos bucket_start,
-					struct bpos bucket_end,
-					struct bkey_buf *last_flushed,
 					struct bkey_s_c k)
 {
 	struct bch_fs *c = trans->c;
@@ -498,9 +505,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
 		bch2_extent_ptr_to_bp(c, btree, level,
 				      k, p, &bucket_pos, &bp);
 
-		ret = check_bp_exists(trans, bucket_pos, bp, k,
-				      bucket_start, bucket_end,
-				      last_flushed);
+		ret = check_bp_exists(trans, s, bucket_pos, bp, k);
 		if (ret)
 			return ret;
 	}
@@ -509,10 +514,8 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
 }
 
 static int check_btree_root_to_backpointers(struct btree_trans *trans,
+					    struct extents_to_bp_state *s,
 					    enum btree_id btree_id,
-					    struct bpos bucket_start,
-					    struct bpos bucket_end,
-					    struct bkey_buf *last_flushed,
 					    int *level)
 {
 	struct bch_fs *c = trans->c;
@@ -536,9 +539,7 @@ retry:
 	*level = b->c.level;
 
 	k = bkey_i_to_s_c(&b->key);
-	ret = check_extent_to_backpointers(trans, btree_id, b->c.level + 1,
-				      bucket_start, bucket_end,
-				      last_flushed, k);
+	ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k);
 err:
 	bch2_trans_iter_exit(trans, &iter);
 	return ret;
@@ -559,7 +560,7 @@ static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
 
 	si_meminfo(&i);
 	mem_bytes = i.totalram * i.mem_unit;
-	return div_u64(mem_bytes >> 1, btree_bytes(c));
+	return div_u64(mem_bytes >> 1, c->opts.btree_node_size);
 }
 
 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
@@ -610,43 +611,35 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
 }
 
 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
-						   struct bpos bucket_start,
-						   struct bpos bucket_end)
+						   struct extents_to_bp_state *s)
 {
 	struct bch_fs *c = trans->c;
-	struct btree_iter iter;
-	enum btree_id btree_id;
-	struct bkey_s_c k;
-	struct bkey_buf last_flushed;
 	int ret = 0;
 
-	bch2_bkey_buf_init(&last_flushed);
-	bkey_init(&last_flushed.k->k);
-
-	for (btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) {
+	for (enum btree_id btree_id = 0;
+	     btree_id < btree_id_nr_alive(c);
+	     btree_id++) {
 		int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
 
 		ret = commit_do(trans, NULL, NULL,
 				BCH_TRANS_COMMIT_no_enospc,
-				check_btree_root_to_backpointers(trans, btree_id,
-							bucket_start, bucket_end,
-							&last_flushed, &level));
+				check_btree_root_to_backpointers(trans, s, btree_id, &level));
 		if (ret)
 			return ret;
 
 		while (level >= depth) {
+			struct btree_iter iter;
 			bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0,
 						  level,
 						  BTREE_ITER_PREFETCH);
 			while (1) {
 				bch2_trans_begin(trans);
-				k = bch2_btree_iter_peek(&iter);
+
+				struct bkey_s_c k = bch2_btree_iter_peek(&iter);
 				if (!k.k)
 					break;
 				ret = bkey_err(k) ?:
-					check_extent_to_backpointers(trans, btree_id, level,
-								     bucket_start, bucket_end,
-								     &last_flushed, k) ?:
+					check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
 					bch2_trans_commit(trans, NULL, NULL,
 							  BCH_TRANS_COMMIT_no_enospc);
 				if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
@@ -668,7 +661,6 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
 		}
 	}
 
-	bch2_bkey_buf_exit(&last_flushed, c);
 	return 0;
 }
 
@@ -731,37 +723,43 @@ static int bch2_get_alloc_in_memory_pos(struct btree_trans *trans,
 int bch2_check_extents_to_backpointers(struct bch_fs *c)
 {
 	struct btree_trans *trans = bch2_trans_get(c);
-	struct bpos start = POS_MIN, end;
+	struct extents_to_bp_state s = { .bucket_start = POS_MIN };
 	int ret;
 
+	bch2_bkey_buf_init(&s.last_flushed);
+	bkey_init(&s.last_flushed.k->k);
+
 	while (1) {
-		ret = bch2_get_alloc_in_memory_pos(trans, start, &end);
+		ret = bch2_get_alloc_in_memory_pos(trans, s.bucket_start, &s.bucket_end);
 		if (ret)
 			break;
 
-		if (bpos_eq(start, POS_MIN) && !bpos_eq(end, SPOS_MAX))
+		if ( bpos_eq(s.bucket_start, POS_MIN) &&
+		    !bpos_eq(s.bucket_end, SPOS_MAX))
 			bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
 				    __func__, btree_nodes_fit_in_ram(c));
 
-		if (!bpos_eq(start, POS_MIN) || !bpos_eq(end, SPOS_MAX)) {
+		if (!bpos_eq(s.bucket_start, POS_MIN) ||
+		    !bpos_eq(s.bucket_end, SPOS_MAX)) {
 			struct printbuf buf = PRINTBUF;
 
 			prt_str(&buf, "check_extents_to_backpointers(): ");
-			bch2_bpos_to_text(&buf, start);
+			bch2_bpos_to_text(&buf, s.bucket_start);
 			prt_str(&buf, "-");
-			bch2_bpos_to_text(&buf, end);
+			bch2_bpos_to_text(&buf, s.bucket_end);
 
 			bch_verbose(c, "%s", buf.buf);
 			printbuf_exit(&buf);
 		}
 
-		ret = bch2_check_extents_to_backpointers_pass(trans, start, end);
-		if (ret || bpos_eq(end, SPOS_MAX))
+		ret = bch2_check_extents_to_backpointers_pass(trans, &s);
+		if (ret || bpos_eq(s.bucket_end, SPOS_MAX))
 			break;
 
-		start = bpos_successor(end);
+		s.bucket_start = bpos_successor(s.bucket_end);
 	}
 	bch2_trans_put(trans);
+	bch2_bkey_buf_exit(&s.last_flushed, c);
 
 	bch_err_fn(c, ret);
 	return ret;
diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h
index 737e2396ade7..327365a9feac 100644
--- a/fs/bcachefs/backpointers.h
+++ b/fs/bcachefs/backpointers.h
@@ -2,6 +2,7 @@
 #ifndef _BCACHEFS_BACKPOINTERS_BACKGROUND_H
 #define _BCACHEFS_BACKPOINTERS_BACKGROUND_H
 
+#include "btree_cache.h"
 #include "btree_iter.h"
 #include "btree_update.h"
 #include "buckets.h"
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index dac383e37181..b80c6c9efd8c 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -1204,11 +1204,6 @@ static inline unsigned block_sectors(const struct bch_fs *c)
 	return c->opts.block_size >> 9;
 }
 
-static inline size_t btree_sectors(const struct bch_fs *c)
-{
-	return c->opts.btree_node_size >> 9;
-}
-
 static inline bool btree_id_cached(const struct bch_fs *c, enum btree_id btree)
 {
 	return c->btree_key_cache_btrees & (1U << btree);
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 0d5ac4184fbc..0668b682a21c 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -417,600 +417,12 @@ struct bch_set {
 	struct bch_val		v;
 };
 
-/* Extents */
-
-/*
- * In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally
- * preceded by checksum/compression information (bch_extent_crc32 or
- * bch_extent_crc64).
- *
- * One major determining factor in the format of extents is how we handle and
- * represent extents that have been partially overwritten and thus trimmed:
- *
- * If an extent is not checksummed or compressed, when the extent is trimmed we
- * don't have to remember the extent we originally allocated and wrote: we can
- * merely adjust ptr->offset to point to the start of the data that is currently
- * live. The size field in struct bkey records the current (live) size of the
- * extent, and is also used to mean "size of region on disk that we point to" in
- * this case.
- *
- * Thus an extent that is not checksummed or compressed will consist only of a
- * list of bch_extent_ptrs, with none of the fields in
- * bch_extent_crc32/bch_extent_crc64.
- *
- * When an extent is checksummed or compressed, it's not possible to read only
- * the data that is currently live: we have to read the entire extent that was
- * originally written, and then return only the part of the extent that is
- * currently live.
- *
- * Thus, in addition to the current size of the extent in struct bkey, we need
- * to store the size of the originally allocated space - this is the
- * compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also,
- * when the extent is trimmed, instead of modifying the offset field of the
- * pointer, we keep a second smaller offset field - "offset into the original
- * extent of the currently live region".
- *
- * The other major determining factor is replication and data migration:
- *
- * Each pointer may have its own bch_extent_crc32/64. When doing a replicated
- * write, we will initially write all the replicas in the same format, with the
- * same checksum type and compression format - however, when copygc runs later (or
- * tiering/cache promotion, anything that moves data), it is not in general
- * going to rewrite all the pointers at once - one of the replicas may be in a
- * bucket on one device that has very little fragmentation while another lives
- * in a bucket that has become heavily fragmented, and thus is being rewritten
- * sooner than the rest.
- *
- * Thus it will only move a subset of the pointers (or in the case of
- * tiering/cache promotion perhaps add a single pointer without dropping any
- * current pointers), and if the extent has been partially overwritten it must
- * write only the currently live portion (or copygc would not be able to reduce
- * fragmentation!) - which necessitates a different bch_extent_crc format for
- * the new pointer.
- *
- * But in the interests of space efficiency, we don't want to store one
- * bch_extent_crc for each pointer if we don't have to.
- *
- * Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and
- * bch_extent_ptrs appended arbitrarily one after the other. We determine the
- * type of a given entry with a scheme similar to utf8 (except we're encoding a
- * type, not a size), encoding the type in the position of the first set bit:
- *
- * bch_extent_crc32	- 0b1
- * bch_extent_ptr	- 0b10
- * bch_extent_crc64	- 0b100
- *
- * We do it this way because bch_extent_crc32 is _very_ constrained on bits (and
- * bch_extent_crc64 is the least constrained).
- *
- * Then, each bch_extent_crc32/64 applies to the pointers that follow after it,
- * until the next bch_extent_crc32/64.
- *
- * If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer
- * is neither checksummed nor compressed.
- */
-
 /* 128 bits, sufficient for cryptographic MACs: */
 struct bch_csum {
 	__le64			lo;
 	__le64			hi;
 } __packed __aligned(8);
 
-#define BCH_EXTENT_ENTRY_TYPES()		\
-	x(ptr,			0)		\
-	x(crc32,		1)		\
-	x(crc64,		2)		\
-	x(crc128,		3)		\
-	x(stripe_ptr,		4)		\
-	x(rebalance,		5)
-#define BCH_EXTENT_ENTRY_MAX	6
-
-enum bch_extent_entry_type {
-#define x(f, n) BCH_EXTENT_ENTRY_##f = n,
-	BCH_EXTENT_ENTRY_TYPES()
-#undef x
-};
-
-/* Compressed/uncompressed size are stored biased by 1: */
-struct bch_extent_crc32 {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-	__u32			type:2,
-				_compressed_size:7,
-				_uncompressed_size:7,
-				offset:7,
-				_unused:1,
-				csum_type:4,
-				compression_type:4;
-	__u32			csum;
-#elif defined (__BIG_ENDIAN_BITFIELD)
-	__u32			csum;
-	__u32			compression_type:4,
-				csum_type:4,
-				_unused:1,
-				offset:7,
-				_uncompressed_size:7,
-				_compressed_size:7,
-				type:2;
-#endif
-} __packed __aligned(8);
-
-#define CRC32_SIZE_MAX		(1U << 7)
-#define CRC32_NONCE_MAX		0
-
-struct bch_extent_crc64 {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-	__u64			type:3,
-				_compressed_size:9,
-				_uncompressed_size:9,
-				offset:9,
-				nonce:10,
-				csum_type:4,
-				compression_type:4,
-				csum_hi:16;
-#elif defined (__BIG_ENDIAN_BITFIELD)
-	__u64			csum_hi:16,
-				compression_type:4,
-				csum_type:4,
-				nonce:10,
-				offset:9,
-				_uncompressed_size:9,
-				_compressed_size:9,
-				type:3;
-#endif
-	__u64			csum_lo;
-} __packed __aligned(8);
-
-#define CRC64_SIZE_MAX		(1U << 9)
-#define CRC64_NONCE_MAX		((1U << 10) - 1)
-
-struct bch_extent_crc128 {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-	__u64			type:4,
-				_compressed_size:13,
-				_uncompressed_size:13,
-				offset:13,
-				nonce:13,
-				csum_type:4,
-				compression_type:4;
-#elif defined (__BIG_ENDIAN_BITFIELD)
-	__u64			compression_type:4,
-				csum_type:4,
-				nonce:13,
-				offset:13,
-				_uncompressed_size:13,
-				_compressed_size:13,
-				type:4;
-#endif
-	struct bch_csum		csum;
-} __packed __aligned(8);
-
-#define CRC128_SIZE_MAX		(1U << 13)
-#define CRC128_NONCE_MAX	((1U << 13) - 1)
-
-/*
- * @reservation - pointer hasn't been written to, just reserved
- */
-struct bch_extent_ptr {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-	__u64			type:1,
-				cached:1,
-				unused:1,
-				unwritten:1,
-				offset:44, /* 8 petabytes */
-				dev:8,
-				gen:8;
-#elif defined (__BIG_ENDIAN_BITFIELD)
-	__u64			gen:8,
-				dev:8,
-				offset:44,
-				unwritten:1,
-				unused:1,
-				cached:1,
-				type:1;
-#endif
-} __packed __aligned(8);
-
-struct bch_extent_stripe_ptr {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-	__u64			type:5,
-				block:8,
-				redundancy:4,
-				idx:47;
-#elif defined (__BIG_ENDIAN_BITFIELD)
-	__u64			idx:47,
-				redundancy:4,
-				block:8,
-				type:5;
-#endif
-};
-
-struct bch_extent_rebalance {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-	__u64			type:6,
-				unused:34,
-				compression:8, /* enum bch_compression_opt */
-				target:16;
-#elif defined (__BIG_ENDIAN_BITFIELD)
-	__u64			target:16,
-				compression:8,
-				unused:34,
-				type:6;
-#endif
-};
-
-union bch_extent_entry {
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ||  __BITS_PER_LONG == 64
-	unsigned long			type;
-#elif __BITS_PER_LONG == 32
-	struct {
-		unsigned long		pad;
-		unsigned long		type;
-	};
-#else
-#error edit for your odd byteorder.
-#endif
-
-#define x(f, n) struct bch_extent_##f	f;
-	BCH_EXTENT_ENTRY_TYPES()
-#undef x
-};
-
-struct bch_btree_ptr {
-	struct bch_val		v;
-
-	__u64			_data[0];
-	struct bch_extent_ptr	start[];
-} __packed __aligned(8);
-
-struct bch_btree_ptr_v2 {
-	struct bch_val		v;
-
-	__u64			mem_ptr;
-	__le64			seq;
-	__le16			sectors_written;
-	__le16			flags;
-	struct bpos		min_key;
-	__u64			_data[0];
-	struct bch_extent_ptr	start[];
-} __packed __aligned(8);
-
-LE16_BITMASK(BTREE_PTR_RANGE_UPDATED,	struct bch_btree_ptr_v2, flags, 0, 1);
-
-struct bch_extent {
-	struct bch_val		v;
-
-	__u64			_data[0];
-	union bch_extent_entry	start[];
-} __packed __aligned(8);
-
-struct bch_reservation {
-	struct bch_val		v;
-
-	__le32			generation;
-	__u8			nr_replicas;
-	__u8			pad[3];
-} __packed __aligned(8);
-
-/* Maximum size (in u64s) a single pointer could be: */
-#define BKEY_EXTENT_PTR_U64s_MAX\
-	((sizeof(struct bch_extent_crc128) +			\
-	  sizeof(struct bch_extent_ptr)) / sizeof(__u64))
-
-/* Maximum possible size of an entire extent value: */
-#define BKEY_EXTENT_VAL_U64s_MAX				\
-	(1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
-
-/* * Maximum possible size of an entire extent, key + value: */
-#define BKEY_EXTENT_U64s_MAX		(BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
-
-/* Btree pointers don't carry around checksums: */
-#define BKEY_BTREE_PTR_VAL_U64s_MAX				\
-	((sizeof(struct bch_btree_ptr_v2) +			\
-	  sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64))
-#define BKEY_BTREE_PTR_U64s_MAX					\
-	(BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
-
-/* Inodes */
-
-#define BLOCKDEV_INODE_MAX	4096
-
-#define BCACHEFS_ROOT_INO	4096
-
-struct bch_inode {
-	struct bch_val		v;
-
-	__le64			bi_hash_seed;
-	__le32			bi_flags;
-	__le16			bi_mode;
-	__u8			fields[];
-} __packed __aligned(8);
-
-struct bch_inode_v2 {
-	struct bch_val		v;
-
-	__le64			bi_journal_seq;
-	__le64			bi_hash_seed;
-	__le64			bi_flags;
-	__le16			bi_mode;
-	__u8			fields[];
-} __packed __aligned(8);
-
-struct bch_inode_v3 {
-	struct bch_val		v;
-
-	__le64			bi_journal_seq;
-	__le64			bi_hash_seed;
-	__le64			bi_flags;
-	__le64			bi_sectors;
-	__le64			bi_size;
-	__le64			bi_version;
-	__u8			fields[];
-} __packed __aligned(8);
-
-#define INODEv3_FIELDS_START_INITIAL	6
-#define INODEv3_FIELDS_START_CUR	(offsetof(struct bch_inode_v3, fields) / sizeof(__u64))
-
-struct bch_inode_generation {
-	struct bch_val		v;
-
-	__le32			bi_generation;
-	__le32			pad;
-} __packed __aligned(8);
-
-/*
- * bi_subvol and bi_parent_subvol are only set for subvolume roots:
- */
-
-#define BCH_INODE_FIELDS_v2()			\
-	x(bi_atime,			96)	\
-	x(bi_ctime,			96)	\
-	x(bi_mtime,			96)	\
-	x(bi_otime,			96)	\
-	x(bi_size,			64)	\
-	x(bi_sectors,			64)	\
-	x(bi_uid,			32)	\
-	x(bi_gid,			32)	\
-	x(bi_nlink,			32)	\
-	x(bi_generation,		32)	\
-	x(bi_dev,			32)	\
-	x(bi_data_checksum,		8)	\
-	x(bi_compression,		8)	\
-	x(bi_project,			32)	\
-	x(bi_background_compression,	8)	\
-	x(bi_data_replicas,		8)	\
-	x(bi_promote_target,		16)	\
-	x(bi_foreground_target,		16)	\
-	x(bi_background_target,		16)	\
-	x(bi_erasure_code,		16)	\
-	x(bi_fields_set,		16)	\
-	x(bi_dir,			64)	\
-	x(bi_dir_offset,		64)	\
-	x(bi_subvol,			32)	\
-	x(bi_parent_subvol,		32)
-
-#define BCH_INODE_FIELDS_v3()			\
-	x(bi_atime,			96)	\
-	x(bi_ctime,			96)	\
-	x(bi_mtime,			96)	\
-	x(bi_otime,			96)	\
-	x(bi_uid,			32)	\
-	x(bi_gid,			32)	\
-	x(bi_nlink,			32)	\
-	x(bi_generation,		32)	\
-	x(bi_dev,			32)	\
-	x(bi_data_checksum,		8)	\
-	x(bi_compression,		8)	\
-	x(bi_project,			32)	\
-	x(bi_background_compression,	8)	\
-	x(bi_data_replicas,		8)	\
-	x(bi_promote_target,		16)	\
-	x(bi_foreground_target,		16)	\
-	x(bi_background_target,		16)	\
-	x(bi_erasure_code,		16)	\
-	x(bi_fields_set,		16)	\
-	x(bi_dir,			64)	\
-	x(bi_dir_offset,		64)	\
-	x(bi_subvol,			32)	\
-	x(bi_parent_subvol,		32)	\
-	x(bi_nocow,			8)
-
-/* subset of BCH_INODE_FIELDS */
-#define BCH_INODE_OPTS()			\
-	x(data_checksum,		8)	\
-	x(compression,			8)	\
-	x(project,			32)	\
-	x(background_compression,	8)	\
-	x(data_replicas,		8)	\
-	x(promote_target,		16)	\
-	x(foreground_target,		16)	\
-	x(background_target,		16)	\
-	x(erasure_code,			16)	\
-	x(nocow,			8)
-
-enum inode_opt_id {
-#define x(name, ...)				\
-	Inode_opt_##name,
-	BCH_INODE_OPTS()
-#undef  x
-	Inode_opt_nr,
-};
-
-#define BCH_INODE_FLAGS()			\
-	x(sync,				0)	\
-	x(immutable,			1)	\
-	x(append,			2)	\
-	x(nodump,			3)	\
-	x(noatime,			4)	\
-	x(i_size_dirty,			5)	\
-	x(i_sectors_dirty,		6)	\
-	x(unlinked,			7)	\
-	x(backptr_untrusted,		8)
-
-/* bits 20+ reserved for packed fields below: */
-
-enum bch_inode_flags {
-#define x(t, n)	BCH_INODE_##t = 1U << n,
-	BCH_INODE_FLAGS()
-#undef x
-};
-
-enum __bch_inode_flags {
-#define x(t, n)	__BCH_INODE_##t = n,
-	BCH_INODE_FLAGS()
-#undef x
-};
-
-LE32_BITMASK(INODE_STR_HASH,	struct bch_inode, bi_flags, 20, 24);
-LE32_BITMASK(INODE_NR_FIELDS,	struct bch_inode, bi_flags, 24, 31);
-LE32_BITMASK(INODE_NEW_VARINT,	struct bch_inode, bi_flags, 31, 32);
-
-LE64_BITMASK(INODEv2_STR_HASH,	struct bch_inode_v2, bi_flags, 20, 24);
-LE64_BITMASK(INODEv2_NR_FIELDS,	struct bch_inode_v2, bi_flags, 24, 31);
-
-LE64_BITMASK(INODEv3_STR_HASH,	struct bch_inode_v3, bi_flags, 20, 24);
-LE64_BITMASK(INODEv3_NR_FIELDS,	struct bch_inode_v3, bi_flags, 24, 31);
-
-LE64_BITMASK(INODEv3_FIELDS_START,
-				struct bch_inode_v3, bi_flags, 31, 36);
-LE64_BITMASK(INODEv3_MODE,	struct bch_inode_v3, bi_flags, 36, 52);
-
-/* Dirents */
-
-/*
- * Dirents (and xattrs) have to implement string lookups; since our b-tree
- * doesn't support arbitrary length strings for the key, we instead index by a
- * 64 bit hash (currently truncated sha1) of the string, stored in the offset
- * field of the key - using linear probing to resolve hash collisions. This also
- * provides us with the readdir cookie posix requires.
- *
- * Linear probing requires us to use whiteouts for deletions, in the event of a
- * collision:
- */
-
-struct bch_dirent {
-	struct bch_val		v;
-
-	/* Target inode number: */
-	union {
-	__le64			d_inum;
-	struct {		/* DT_SUBVOL */
-	__le32			d_child_subvol;
-	__le32			d_parent_subvol;
-	};
-	};
-
-	/*
-	 * Copy of mode bits 12-15 from the target inode - so userspace can get
-	 * the filetype without having to do a stat()
-	 */
-	__u8			d_type;
-
-	__u8			d_name[];
-} __packed __aligned(8);
-
-#define DT_SUBVOL	16
-#define BCH_DT_MAX	17
-
-#define BCH_NAME_MAX	512
-
-/* Xattrs */
-
-#define KEY_TYPE_XATTR_INDEX_USER			0
-#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS	1
-#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT	2
-#define KEY_TYPE_XATTR_INDEX_TRUSTED			3
-#define KEY_TYPE_XATTR_INDEX_SECURITY	        4
-
-struct bch_xattr {
-	struct bch_val		v;
-	__u8			x_type;
-	__u8			x_name_len;
-	__le16			x_val_len;
-	__u8			x_name[];
-} __packed __aligned(8);
-
-/* Bucket/allocation information: */
-
-struct bch_alloc {
-	struct bch_val		v;
-	__u8			fields;
-	__u8			gen;
-	__u8			data[];
-} __packed __aligned(8);
-
-#define BCH_ALLOC_FIELDS_V1()			\
-	x(read_time,		16)		\
-	x(write_time,		16)		\
-	x(data_type,		8)		\
-	x(dirty_sectors,	16)		\
-	x(cached_sectors,	16)		\
-	x(oldest_gen,		8)		\
-	x(stripe,		32)		\
-	x(stripe_redundancy,	8)
-
-enum {
-#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name,
-	BCH_ALLOC_FIELDS_V1()
-#undef x
-};
-
-struct bch_alloc_v2 {
-	struct bch_val		v;
-	__u8			nr_fields;
-	__u8			gen;
-	__u8			oldest_gen;
-	__u8			data_type;
-	__u8			data[];
-} __packed __aligned(8);
-
-#define BCH_ALLOC_FIELDS_V2()			\
-	x(read_time,		64)		\
-	x(write_time,		64)		\
-	x(dirty_sectors,	32)		\
-	x(cached_sectors,	32)		\
-	x(stripe,		32)		\
-	x(stripe_redundancy,	8)
-
-struct bch_alloc_v3 {
-	struct bch_val		v;
-	__le64			journal_seq;
-	__le32			flags;
-	__u8			nr_fields;
-	__u8			gen;
-	__u8			oldest_gen;
-	__u8			data_type;
-	__u8			data[];
-} __packed __aligned(8);
-
-LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags,  0,  1)
-LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags,  1,  2)
-
-struct bch_alloc_v4 {
-	struct bch_val		v;
-	__u64			journal_seq;
-	__u32			flags;
-	__u8			gen;
-	__u8			oldest_gen;
-	__u8			data_type;
-	__u8			stripe_redundancy;
-	__u32			dirty_sectors;
-	__u32			cached_sectors;
-	__u64			io_time[2];
-	__u32			stripe;
-	__u32			nr_external_backpointers;
-	__u64			fragmentation_lru;
-} __packed __aligned(8);
-
-#define BCH_ALLOC_V4_U64s_V0	6
-#define BCH_ALLOC_V4_U64s	(sizeof(struct bch_alloc_v4) / sizeof(__u64))
-
-BITMASK(BCH_ALLOC_V4_NEED_DISCARD,	struct bch_alloc_v4, flags,  0,  1)
-BITMASK(BCH_ALLOC_V4_NEED_INC_GEN,	struct bch_alloc_v4, flags,  1,  2)
-BITMASK(BCH_ALLOC_V4_BACKPOINTERS_START,struct bch_alloc_v4, flags,  2,  8)
-BITMASK(BCH_ALLOC_V4_NR_BACKPOINTERS,	struct bch_alloc_v4, flags,  8,  14)
-
-#define BCH_ALLOC_V4_NR_BACKPOINTERS_MAX	40
-
 struct bch_backpointer {
 	struct bch_val		v;
 	__u8			btree_id;
@@ -1021,154 +433,6 @@ struct bch_backpointer {
 	struct bpos		pos;
 } __packed __aligned(8);
 
-#define KEY_TYPE_BUCKET_GENS_BITS	8
-#define KEY_TYPE_BUCKET_GENS_NR		(1U << KEY_TYPE_BUCKET_GENS_BITS)
-#define KEY_TYPE_BUCKET_GENS_MASK	(KEY_TYPE_BUCKET_GENS_NR - 1)
-
-struct bch_bucket_gens {
-	struct bch_val		v;
-	u8			gens[KEY_TYPE_BUCKET_GENS_NR];
-} __packed __aligned(8);
-
-/* Quotas: */
-
-enum quota_types {
-	QTYP_USR		= 0,
-	QTYP_GRP		= 1,
-	QTYP_PRJ		= 2,
-	QTYP_NR			= 3,
-};
-
-enum quota_counters {
-	Q_SPC			= 0,
-	Q_INO			= 1,
-	Q_COUNTERS		= 2,
-};
-
-struct bch_quota_counter {
-	__le64			hardlimit;
-	__le64			softlimit;
-};
-
-struct bch_quota {
-	struct bch_val		v;
-	struct bch_quota_counter c[Q_COUNTERS];
-} __packed __aligned(8);
-
-/* Erasure coding */
-
-struct bch_stripe {
-	struct bch_val		v;
-	__le16			sectors;
-	__u8			algorithm;
-	__u8			nr_blocks;
-	__u8			nr_redundant;
-
-	__u8			csum_granularity_bits;
-	__u8			csum_type;
-	__u8			pad;
-
-	struct bch_extent_ptr	ptrs[];
-} __packed __aligned(8);
-
-/* Reflink: */
-
-struct bch_reflink_p {
-	struct bch_val		v;
-	__le64			idx;
-	/*
-	 * A reflink pointer might point to an indirect extent which is then
-	 * later split (by copygc or rebalance). If we only pointed to part of
-	 * the original indirect extent, and then one of the fragments is
-	 * outside the range we point to, we'd leak a refcount: so when creating
-	 * reflink pointers, we need to store pad values to remember the full
-	 * range we were taking a reference on.
-	 */
-	__le32			front_pad;
-	__le32			back_pad;
-} __packed __aligned(8);
-
-struct bch_reflink_v {
-	struct bch_val		v;
-	__le64			refcount;
-	union bch_extent_entry	start[0];
-	__u64			_data[];
-} __packed __aligned(8);
-
-struct bch_indirect_inline_data {
-	struct bch_val		v;
-	__le64			refcount;
-	u8			data[];
-};
-
-/* Inline data */
-
-struct bch_inline_data {
-	struct bch_val		v;
-	u8			data[];
-};
-
-/* Subvolumes: */
-
-#define SUBVOL_POS_MIN		POS(0, 1)
-#define SUBVOL_POS_MAX		POS(0, S32_MAX)
-#define BCACHEFS_ROOT_SUBVOL	1
-
-struct bch_subvolume {
-	struct bch_val		v;
-	__le32			flags;
-	__le32			snapshot;
-	__le64			inode;
-	/*
-	 * Snapshot subvolumes form a tree, separate from the snapshot nodes
-	 * tree - if this subvolume is a snapshot, this is the ID of the
-	 * subvolume it was created from:
-	 */
-	__le32			parent;
-	__le32			pad;
-	bch_le128		otime;
-};
-
-LE32_BITMASK(BCH_SUBVOLUME_RO,		struct bch_subvolume, flags,  0,  1)
-/*
- * We need to know whether a subvolume is a snapshot so we can know whether we
- * can delete it (or whether it should just be rm -rf'd)
- */
-LE32_BITMASK(BCH_SUBVOLUME_SNAP,	struct bch_subvolume, flags,  1,  2)
-LE32_BITMASK(BCH_SUBVOLUME_UNLINKED,	struct bch_subvolume, flags,  2,  3)
-
-/* Snapshots */
-
-struct bch_snapshot {
-	struct bch_val		v;
-	__le32			flags;
-	__le32			parent;
-	__le32			children[2];
-	__le32			subvol;
-	/* corresponds to a bch_snapshot_tree in BTREE_ID_snapshot_trees */
-	__le32			tree;
-	__le32			depth;
-	__le32			skip[3];
-};
-
-LE32_BITMASK(BCH_SNAPSHOT_DELETED,	struct bch_snapshot, flags,  0,  1)
-
-/* True if a subvolume points to this snapshot node: */
-LE32_BITMASK(BCH_SNAPSHOT_SUBVOL,	struct bch_snapshot, flags,  1,  2)
-
-/*
- * Snapshot trees:
- *
- * The snapshot_trees btree gives us persistent indentifier for each tree of
- * bch_snapshot nodes, and allow us to record and easily find the root/master
- * subvolume that other snapshots were created from:
- */
-struct bch_snapshot_tree {
-	struct bch_val		v;
-	__le32			master_subvol;
-	__le32			root_snapshot;
-};
-
 /* LRU btree: */
 
 struct bch_lru {
@@ -1178,33 +442,6 @@ struct bch_lru {
 
 #define LRU_ID_STRIPES		(1U << 16)
 
-/* Logged operations btree: */
-
-struct bch_logged_op_truncate {
-	struct bch_val		v;
-	__le32			subvol;
-	__le32			pad;
-	__le64			inum;
-	__le64			new_i_size;
-};
-
-enum logged_op_finsert_state {
-	LOGGED_OP_FINSERT_start,
-	LOGGED_OP_FINSERT_shift_extents,
-	LOGGED_OP_FINSERT_finish,
-};
-
-struct bch_logged_op_finsert {
-	struct bch_val		v;
-	__u8			state;
-	__u8			pad[3];
-	__le32			subvol;
-	__le64			inum;
-	__le64			dst_offset;
-	__le64			src_offset;
-	__le64			pos;
-};
-
 /* Optional/variable size superblock sections: */
 
 struct bch_sb_field {
@@ -1230,6 +467,19 @@ struct bch_sb_field {
 	x(ext,				13)	\
 	x(downgrade,			14)
 
+#include "alloc_background_format.h"
+#include "extents_format.h"
+#include "reflink_format.h"
+#include "ec_format.h"
+#include "inode_format.h"
+#include "dirent_format.h"
+#include "xattr_format.h"
+#include "quota_format.h"
+#include "logged_ops_format.h"
+#include "snapshot_format.h"
+#include "subvolume_format.h"
+#include "sb-counters_format.h"
+
 enum bch_sb_field_type {
 #define x(f, nr)	BCH_SB_FIELD_##f = nr,
 	BCH_SB_FIELDS()
@@ -1465,23 +715,6 @@ struct bch_sb_field_replicas {
 	struct bch_replicas_entry_v1 entries[];
 } __packed __aligned(8);
 
-/* BCH_SB_FIELD_quota: */
-
-struct bch_sb_quota_counter {
-	__le32				timelimit;
-	__le32				warnlimit;
-};
-
-struct bch_sb_quota_type {
-	__le64				flags;
-	struct bch_sb_quota_counter	c[Q_COUNTERS];
-};
-
-struct bch_sb_field_quota {
-	struct bch_sb_field		field;
-	struct bch_sb_quota_type	q[QTYP_NR];
-} __packed __aligned(8);
-
 /* BCH_SB_FIELD_disk_groups: */
 
 #define BCH_SB_LABEL_SIZE		32
@@ -1500,101 +733,6 @@ struct bch_sb_field_disk_groups {
 	struct bch_disk_group	entries[];
 } __packed __aligned(8);
 
-/* BCH_SB_FIELD_counters */
-
-#define BCH_PERSISTENT_COUNTERS()				\
-	x(io_read,					0)	\
-	x(io_write,					1)	\
-	x(io_move,					2)	\
-	x(bucket_invalidate,				3)	\
-	x(bucket_discard,				4)	\
-	x(bucket_alloc,					5)	\
-	x(bucket_alloc_fail,				6)	\
-	x(btree_cache_scan,				7)	\
-	x(btree_cache_reap,				8)	\
-	x(btree_cache_cannibalize,			9)	\
-	x(btree_cache_cannibalize_lock,			10)	\
-	x(btree_cache_cannibalize_lock_fail,		11)	\
-	x(btree_cache_cannibalize_unlock,		12)	\
-	x(btree_node_write,				13)	\
-	x(btree_node_read,				14)	\
-	x(btree_node_compact,				15)	\
-	x(btree_node_merge,				16)	\
-	x(btree_node_split,				17)	\
-	x(btree_node_rewrite,				18)	\
-	x(btree_node_alloc,				19)	\
-	x(btree_node_free,				20)	\
-	x(btree_node_set_root,				21)	\
-	x(btree_path_relock_fail,			22)	\
-	x(btree_path_upgrade_fail,			23)	\
-	x(btree_reserve_get_fail,			24)	\
-	x(journal_entry_full,				25)	\
-	x(journal_full,					26)	\
-	x(journal_reclaim_finish,			27)	\
-	x(journal_reclaim_start,			28)	\
-	x(journal_write,				29)	\
-	x(read_promote,					30)	\
-	x(read_bounce,					31)	\
-	x(read_split,					33)	\
-	x(read_retry,					32)	\
-	x(read_reuse_race,				34)	\
-	x(move_extent_read,				35)	\
-	x(move_extent_write,				36)	\
-	x(move_extent_finish,				37)	\
-	x(move_extent_fail,				38)	\
-	x(move_extent_start_fail,			39)	\
-	x(copygc,					40)	\
-	x(copygc_wait,					41)	\
-	x(gc_gens_end,					42)	\
-	x(gc_gens_start,				43)	\
-	x(trans_blocked_journal_reclaim,		44)	\
-	x(trans_restart_btree_node_reused,		45)	\
-	x(trans_restart_btree_node_split,		46)	\
-	x(trans_restart_fault_inject,			47)	\
-	x(trans_restart_iter_upgrade,			48)	\
-	x(trans_restart_journal_preres_get,		49)	\
-	x(trans_restart_journal_reclaim,		50)	\
-	x(trans_restart_journal_res_get,		51)	\
-	x(trans_restart_key_cache_key_realloced,	52)	\
-	x(trans_restart_key_cache_raced,		53)	\
-	x(trans_restart_mark_replicas,			54)	\
-	x(trans_restart_mem_realloced,			55)	\
-	x(trans_restart_memory_allocation_failure,	56)	\
-	x(trans_restart_relock,				57)	\
-	x(trans_restart_relock_after_fill,		58)	\
-	x(trans_restart_relock_key_cache_fill,		59)	\
-	x(trans_restart_relock_next_node,		60)	\
-	x(trans_restart_relock_parent_for_fill,		61)	\
-	x(trans_restart_relock_path,			62)	\
-	x(trans_restart_relock_path_intent,		63)	\
-	x(trans_restart_too_many_iters,			64)	\
-	x(trans_restart_traverse,			65)	\
-	x(trans_restart_upgrade,			66)	\
-	x(trans_restart_would_deadlock,			67)	\
-	x(trans_restart_would_deadlock_write,		68)	\
-	x(trans_restart_injected,			69)	\
-	x(trans_restart_key_cache_upgrade,		70)	\
-	x(trans_traverse_all,				71)	\
-	x(transaction_commit,				72)	\
-	x(write_super,					73)	\
-	x(trans_restart_would_deadlock_recursion_limit,	74)	\
-	x(trans_restart_write_buffer_flush,		75)	\
-	x(trans_restart_split_race,			76)	\
-	x(write_buffer_flush_slowpath,			77)	\
-	x(write_buffer_flush_sync,			78)
-
-enum bch_persistent_counters {
-#define x(t, n, ...) BCH_COUNTER_##t,
-	BCH_PERSISTENT_COUNTERS()
-#undef x
-	BCH_COUNTER_NR
-};
-
-struct bch_sb_field_counters {
-	struct bch_sb_field	field;
-	__le64			d[];
-};
-
 /*
  * On clean shutdown, store btree roots and current journal sequence number in
  * the superblock:
diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c
index abdb05507d16..76e79a15ba08 100644
--- a/fs/bcachefs/bkey.c
+++ b/fs/bcachefs/bkey.c
@@ -33,7 +33,7 @@ void bch2_bkey_packed_to_binary_text(struct printbuf *out,
 			next_key_bits -= 64;
 		}
 
-		bch2_prt_u64_binary(out, v, min(word_bits, nr_key_bits));
+		bch2_prt_u64_base2_nbits(out, v, min(word_bits, nr_key_bits));
 
 		if (!next_key_bits)
 			break;
diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
index 761f5e33b1e6..5e52684764eb 100644
--- a/fs/bcachefs/bkey_methods.c
+++ b/fs/bcachefs/bkey_methods.c
@@ -63,8 +63,17 @@ static int key_type_cookie_invalid(struct bch_fs *c, struct bkey_s_c k,
 	return 0;
 }
 
+static void key_type_cookie_to_text(struct printbuf *out, struct bch_fs *c,
+				    struct bkey_s_c k)
+{
+	struct bkey_s_c_cookie ck = bkey_s_c_to_cookie(k);
+
+	prt_printf(out, "%llu", le64_to_cpu(ck.v->cookie));
+}
+
 #define bch2_bkey_ops_cookie ((struct bkey_ops) {	\
 	.key_invalid	= key_type_cookie_invalid,	\
+	.val_to_text	= key_type_cookie_to_text,	\
 	.min_val_size	= 8,				\
 })
 
diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h
index ee82283722b7..03efe8ee565a 100644
--- a/fs/bcachefs/bkey_methods.h
+++ b/fs/bcachefs/bkey_methods.h
@@ -83,9 +83,10 @@ enum btree_update_flags {
 
 	__BTREE_TRIGGER_NORUN,
 	__BTREE_TRIGGER_TRANSACTIONAL,
+	__BTREE_TRIGGER_ATOMIC,
+	__BTREE_TRIGGER_GC,
 	__BTREE_TRIGGER_INSERT,
 	__BTREE_TRIGGER_OVERWRITE,
-	__BTREE_TRIGGER_GC,
 	__BTREE_TRIGGER_BUCKET_INVALIDATE,
 };
 
@@ -107,6 +108,10 @@ enum btree_update_flags {
  * causing us to go emergency read-only)
  */
 #define BTREE_TRIGGER_TRANSACTIONAL	(1U << __BTREE_TRIGGER_TRANSACTIONAL)
+#define BTREE_TRIGGER_ATOMIC		(1U << __BTREE_TRIGGER_ATOMIC)
+
+/* We're in gc/fsck: running triggers to recalculate e.g. disk usage */
+#define BTREE_TRIGGER_GC		(1U << __BTREE_TRIGGER_GC)
 
 /* @new is entering the btree */
 #define BTREE_TRIGGER_INSERT		(1U << __BTREE_TRIGGER_INSERT)
@@ -114,9 +119,6 @@ enum btree_update_flags {
 /* @old is leaving the btree */
 #define BTREE_TRIGGER_OVERWRITE		(1U << __BTREE_TRIGGER_OVERWRITE)
 
-/* We're in gc/fsck: running triggers to recalculate e.g. disk usage */
-#define BTREE_TRIGGER_GC		(1U << __BTREE_TRIGGER_GC)
-
 /* signal from bucket invalidate path to alloc trigger */
 #define BTREE_TRIGGER_BUCKET_INVALIDATE	(1U << __BTREE_TRIGGER_BUCKET_INVALIDATE)
 
diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c
index 74bf8eb90a4c..3fd1085b6c61 100644
--- a/fs/bcachefs/bset.c
+++ b/fs/bcachefs/bset.c
@@ -720,7 +720,7 @@ static noinline void __build_ro_aux_tree(struct btree *b, struct bset_tree *t)
 {
 	struct bkey_packed *prev = NULL, *k = btree_bkey_first(b, t);
 	struct bkey_i min_key, max_key;
-	unsigned j, cacheline = 1;
+	unsigned cacheline = 1;
 
 	t->size = min(bkey_to_cacheline(b, t, btree_bkey_last(b, t)),
 		      bset_ro_tree_capacity(b, t));
@@ -823,13 +823,12 @@ void bch2_bset_init_first(struct btree *b, struct bset *i)
 	set_btree_bset(b, t, i);
 }
 
-void bch2_bset_init_next(struct bch_fs *c, struct btree *b,
-			 struct btree_node_entry *bne)
+void bch2_bset_init_next(struct btree *b, struct btree_node_entry *bne)
 {
 	struct bset *i = &bne->keys;
 	struct bset_tree *t;
 
-	BUG_ON(bset_byte_offset(b, bne) >= btree_bytes(c));
+	BUG_ON(bset_byte_offset(b, bne) >= btree_buf_bytes(b));
 	BUG_ON((void *) bne < (void *) btree_bkey_last(b, bset_tree_last(b)));
 	BUG_ON(b->nsets >= MAX_BSETS);
 
diff --git a/fs/bcachefs/bset.h b/fs/bcachefs/bset.h
index 632c2b8c5460..79c77baaa383 100644
--- a/fs/bcachefs/bset.h
+++ b/fs/bcachefs/bset.h
@@ -264,8 +264,7 @@ static inline struct bset *bset_next_set(struct btree *b,
 void bch2_btree_keys_init(struct btree *);
 
 void bch2_bset_init_first(struct btree *, struct bset *);
-void bch2_bset_init_next(struct bch_fs *, struct btree *,
-			 struct btree_node_entry *);
+void bch2_bset_init_next(struct btree *, struct btree_node_entry *);
 void bch2_bset_build_aux_tree(struct btree *, struct bset_tree *, bool);
 
 void bch2_bset_insert(struct btree *, struct btree_node_iter *,
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 8e2488a4b58d..d7c81beac14a 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -60,7 +60,7 @@ static void btree_node_data_free(struct bch_fs *c, struct btree *b)
 
 	clear_btree_node_just_written(b);
 
-	kvpfree(b->data, btree_bytes(c));
+	kvpfree(b->data, btree_buf_bytes(b));
 	b->data = NULL;
 #ifdef __KERNEL__
 	kvfree(b->aux_data);
@@ -94,7 +94,7 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
 {
 	BUG_ON(b->data || b->aux_data);
 
-	b->data = kvpmalloc(btree_bytes(c), gfp);
+	b->data = kvpmalloc(btree_buf_bytes(b), gfp);
 	if (!b->data)
 		return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
 #ifdef __KERNEL__
@@ -107,7 +107,7 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
 		b->aux_data = NULL;
 #endif
 	if (!b->aux_data) {
-		kvpfree(b->data, btree_bytes(c));
+		kvpfree(b->data, btree_buf_bytes(b));
 		b->data = NULL;
 		return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
 	}
@@ -126,7 +126,7 @@ static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp)
 	bkey_btree_ptr_init(&b->key);
 	INIT_LIST_HEAD(&b->list);
 	INIT_LIST_HEAD(&b->write_blocked);
-	b->byte_order = ilog2(btree_bytes(c));
+	b->byte_order = ilog2(c->opts.btree_node_size);
 	return b;
 }
 
@@ -408,7 +408,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
 	if (c->verify_data)
 		list_move(&c->verify_data->list, &bc->live);
 
-	kvpfree(c->verify_ondisk, btree_bytes(c));
+	kvpfree(c->verify_ondisk, c->opts.btree_node_size);
 
 	for (i = 0; i < btree_id_nr_alive(c); i++) {
 		struct btree_root *r = bch2_btree_id_root(c, i);
@@ -1192,7 +1192,7 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struc
 	       "    failed unpacked %zu\n",
 	       b->unpack_fn_len,
 	       b->nr.live_u64s * sizeof(u64),
-	       btree_bytes(c) - sizeof(struct btree_node),
+	       btree_buf_bytes(b) - sizeof(struct btree_node),
 	       b->nr.live_u64s * 100 / btree_max_u64s(c),
 	       b->sib_u64s[0],
 	       b->sib_u64s[1],
diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h
index 4e1af5882052..6d33885fdbde 100644
--- a/fs/bcachefs/btree_cache.h
+++ b/fs/bcachefs/btree_cache.h
@@ -74,22 +74,27 @@ static inline bool btree_node_hashed(struct btree *b)
 	     _iter = 0;	_iter < (_tbl)->size; _iter++)			\
 		rht_for_each_entry_rcu((_b), (_pos), _tbl, _iter, hash)
 
-static inline size_t btree_bytes(struct bch_fs *c)
+static inline size_t btree_buf_bytes(const struct btree *b)
 {
-	return c->opts.btree_node_size;
+	return 1UL << b->byte_order;
 }
 
-static inline size_t btree_max_u64s(struct bch_fs *c)
+static inline size_t btree_buf_max_u64s(const struct btree *b)
 {
-	return (btree_bytes(c) - sizeof(struct btree_node)) / sizeof(u64);
+	return (btree_buf_bytes(b) - sizeof(struct btree_node)) / sizeof(u64);
 }
 
-static inline size_t btree_pages(struct bch_fs *c)
+static inline size_t btree_max_u64s(const struct bch_fs *c)
 {
-	return btree_bytes(c) / PAGE_SIZE;
+	return (c->opts.btree_node_size - sizeof(struct btree_node)) / sizeof(u64);
 }
 
-static inline unsigned btree_blocks(struct bch_fs *c)
+static inline size_t btree_sectors(const struct bch_fs *c)
+{
+	return c->opts.btree_node_size >> SECTOR_SHIFT;
+}
+
+static inline unsigned btree_blocks(const struct bch_fs *c)
 {
 	return btree_sectors(c) >> c->block_bits;
 }
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 49b4ade758c3..1102995643b1 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -597,7 +597,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
 			      "bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n"
 			      "while marking %s",
 			      p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
-			      bch2_data_types[ptr_data_type(k->k, &p.ptr)],
+			      bch2_data_type_str(ptr_data_type(k->k, &p.ptr)),
 			      p.ptr.gen,
 			      (printbuf_reset(&buf),
 			       bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) {
@@ -615,7 +615,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
 			      "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n"
 			      "while marking %s",
 			      p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
-			      bch2_data_types[ptr_data_type(k->k, &p.ptr)],
+			      bch2_data_type_str(ptr_data_type(k->k, &p.ptr)),
 			      p.ptr.gen, g->gen,
 			      (printbuf_reset(&buf),
 			       bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) {
@@ -637,7 +637,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
 			      "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
 			      "while marking %s",
 			      p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen,
-			      bch2_data_types[ptr_data_type(k->k, &p.ptr)],
+			      bch2_data_type_str(ptr_data_type(k->k, &p.ptr)),
 			      p.ptr.gen,
 			      (printbuf_reset(&buf),
 			       bch2_bkey_val_to_text(&buf, c, *k), buf.buf))))
@@ -649,7 +649,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
 			      "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n"
 			      "while marking %s",
 			      p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
-			      bch2_data_types[ptr_data_type(k->k, &p.ptr)],
+			      bch2_data_type_str(ptr_data_type(k->k, &p.ptr)),
 			      p.ptr.gen, g->gen,
 			      (printbuf_reset(&buf),
 			       bch2_bkey_val_to_text(&buf, c, *k), buf.buf))))
@@ -664,8 +664,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
 				"bucket %u:%zu different types of data in same bucket: %s, %s\n"
 				"while marking %s",
 				p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
-				bch2_data_types[g->data_type],
-				bch2_data_types[data_type],
+				bch2_data_type_str(g->data_type),
+				bch2_data_type_str(data_type),
 				(printbuf_reset(&buf),
 				 bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) {
 			if (data_type == BCH_DATA_btree) {
@@ -1238,11 +1238,11 @@ static int bch2_gc_done(struct bch_fs *c,
 
 		for (i = 0; i < BCH_DATA_NR; i++) {
 			copy_dev_field(dev_usage_buckets_wrong,
-				       d[i].buckets,	"%s buckets", bch2_data_types[i]);
+				       d[i].buckets,	"%s buckets", bch2_data_type_str(i));
 			copy_dev_field(dev_usage_sectors_wrong,
-				       d[i].sectors,	"%s sectors", bch2_data_types[i]);
+				       d[i].sectors,	"%s sectors", bch2_data_type_str(i));
 			copy_dev_field(dev_usage_fragmented_wrong,
-				       d[i].fragmented,	"%s fragmented", bch2_data_types[i]);
+				       d[i].fragmented,	"%s fragmented", bch2_data_type_str(i));
 		}
 	}
 
@@ -1253,19 +1253,19 @@ static int bch2_gc_done(struct bch_fs *c,
 			bch2_acc_percpu_u64s((u64 __percpu *) c->usage_gc, nr);
 
 		copy_fs_field(fs_usage_hidden_wrong,
-			      hidden,		"hidden");
+			      b.hidden,		"hidden");
 		copy_fs_field(fs_usage_btree_wrong,
-			      btree,		"btree");
+			      b.btree,		"btree");
 
 		if (!metadata_only) {
 			copy_fs_field(fs_usage_data_wrong,
-				      data,	"data");
+				      b.data,	"data");
 			copy_fs_field(fs_usage_cached_wrong,
-				      cached,	"cached");
+				      b.cached,	"cached");
 			copy_fs_field(fs_usage_reserved_wrong,
-				      reserved,	"reserved");
+				      b.reserved,	"reserved");
 			copy_fs_field(fs_usage_nr_inodes_wrong,
-				      nr_inodes,"nr_inodes");
+				      b.nr_inodes,"nr_inodes");
 
 			for (i = 0; i < BCH_REPLICAS_MAX; i++)
 				copy_fs_field(fs_usage_persistent_reserved_wrong,
@@ -1417,8 +1417,8 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
 			": got %s, should be %s",
 			iter->pos.inode, iter->pos.offset,
 			gc.gen,
-			bch2_data_types[new.data_type],
-			bch2_data_types[gc.data_type]))
+			bch2_data_type_str(new.data_type),
+			bch2_data_type_str(gc.data_type)))
 		new.data_type = gc.data_type;
 
 #define copy_bucket_field(_errtype, _f)					\
@@ -1428,7 +1428,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
 			": got %u, should be %u",			\
 			iter->pos.inode, iter->pos.offset,		\
 			gc.gen,						\
-			bch2_data_types[gc.data_type],			\
+			bch2_data_type_str(gc.data_type),		\
 			new._f, gc._f))					\
 		new._f = gc._f;						\
 
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 33db48e2153f..aa9b6cbe3226 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -112,7 +112,7 @@ static void *btree_bounce_alloc(struct bch_fs *c, size_t size,
 	unsigned flags = memalloc_nofs_save();
 	void *p;
 
-	BUG_ON(size > btree_bytes(c));
+	BUG_ON(size > c->opts.btree_node_size);
 
 	*used_mempool = false;
 	p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT);
@@ -174,8 +174,8 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b)
 
 	ptrs = ptrs_end = ((void *) new_whiteouts + bytes);
 
-	for (k = unwritten_whiteouts_start(c, b);
-	     k != unwritten_whiteouts_end(c, b);
+	for (k = unwritten_whiteouts_start(b);
+	     k != unwritten_whiteouts_end(b);
 	     k = bkey_p_next(k))
 		*--ptrs = k;
 
@@ -192,7 +192,7 @@ static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b)
 	verify_no_dups(b, new_whiteouts,
 		       (void *) ((u64 *) new_whiteouts + b->whiteout_u64s));
 
-	memcpy_u64s(unwritten_whiteouts_start(c, b),
+	memcpy_u64s(unwritten_whiteouts_start(b),
 		    new_whiteouts, b->whiteout_u64s);
 
 	btree_bounce_free(c, bytes, used_mempool, new_whiteouts);
@@ -313,7 +313,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
 	}
 
 	bytes = sorting_entire_node
-		? btree_bytes(c)
+		? btree_buf_bytes(b)
 		: __vstruct_bytes(struct btree_node, u64s);
 
 	out = btree_bounce_alloc(c, bytes, &used_mempool);
@@ -338,7 +338,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
 	if (sorting_entire_node) {
 		u64s = le16_to_cpu(out->keys.u64s);
 
-		BUG_ON(bytes != btree_bytes(c));
+		BUG_ON(bytes != btree_buf_bytes(b));
 
 		/*
 		 * Our temporary buffer is the same size as the btree node's
@@ -502,7 +502,7 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b)
 
 	bne = want_new_bset(c, b);
 	if (bne)
-		bch2_bset_init_next(c, b, bne);
+		bch2_bset_init_next(b, bne);
 
 	bch2_btree_build_aux_trees(b);
 
@@ -1160,7 +1160,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 			     ptr_written, b->written);
 	} else {
 		for (bne = write_block(b);
-		     bset_byte_offset(b, bne) < btree_bytes(c);
+		     bset_byte_offset(b, bne) < btree_buf_bytes(b);
 		     bne = (void *) bne + block_bytes(c))
 			btree_err_on(bne->keys.seq == b->data->keys.seq &&
 				     !bch2_journal_seq_is_blacklisted(c,
@@ -1172,7 +1172,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 				     "found bset signature after last bset");
 	}
 
-	sorted = btree_bounce_alloc(c, btree_bytes(c), &used_mempool);
+	sorted = btree_bounce_alloc(c, btree_buf_bytes(b), &used_mempool);
 	sorted->keys.u64s = 0;
 
 	set_btree_bset(b, b->set, &b->data->keys);
@@ -1188,7 +1188,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 
 	BUG_ON(b->nr.live_u64s != u64s);
 
-	btree_bounce_free(c, btree_bytes(c), used_mempool, sorted);
+	btree_bounce_free(c, btree_buf_bytes(b), used_mempool, sorted);
 
 	if (updated_range)
 		bch2_btree_node_drop_keys_outside_node(b);
@@ -1284,7 +1284,7 @@ static void btree_node_read_work(struct work_struct *work)
 		rb->have_ioref		= bch2_dev_get_ioref(ca, READ);
 		bio_reset(bio, NULL, REQ_OP_READ|REQ_SYNC|REQ_META);
 		bio->bi_iter.bi_sector	= rb->pick.ptr.offset;
-		bio->bi_iter.bi_size	= btree_bytes(c);
+		bio->bi_iter.bi_size	= btree_buf_bytes(b);
 
 		if (rb->have_ioref) {
 			bio_set_dev(bio, ca->disk_sb.bdev);
@@ -1512,7 +1512,7 @@ fsck_err:
 	}
 
 	if (best >= 0) {
-		memcpy(b->data, ra->buf[best], btree_bytes(c));
+		memcpy(b->data, ra->buf[best], btree_buf_bytes(b));
 		ret = bch2_btree_node_read_done(c, NULL, b, false, saw_error);
 	} else {
 		ret = -1;
@@ -1578,7 +1578,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool
 	for (i = 0; i < ra->nr; i++) {
 		ra->buf[i] = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS);
 		ra->bio[i] = bio_alloc_bioset(NULL,
-					      buf_pages(ra->buf[i], btree_bytes(c)),
+					      buf_pages(ra->buf[i], btree_buf_bytes(b)),
 					      REQ_OP_READ|REQ_SYNC|REQ_META,
 					      GFP_NOFS,
 					      &c->btree_bio);
@@ -1598,7 +1598,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool
 		rb->pick		= pick;
 		rb->bio.bi_iter.bi_sector = pick.ptr.offset;
 		rb->bio.bi_end_io	= btree_node_read_all_replicas_endio;
-		bch2_bio_map(&rb->bio, ra->buf[i], btree_bytes(c));
+		bch2_bio_map(&rb->bio, ra->buf[i], btree_buf_bytes(b));
 
 		if (rb->have_ioref) {
 			this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree],
@@ -1665,7 +1665,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
 	ca = bch_dev_bkey_exists(c, pick.ptr.dev);
 
 	bio = bio_alloc_bioset(NULL,
-			       buf_pages(b->data, btree_bytes(c)),
+			       buf_pages(b->data, btree_buf_bytes(b)),
 			       REQ_OP_READ|REQ_SYNC|REQ_META,
 			       GFP_NOFS,
 			       &c->btree_bio);
@@ -1679,7 +1679,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
 	INIT_WORK(&rb->work, btree_node_read_work);
 	bio->bi_iter.bi_sector	= pick.ptr.offset;
 	bio->bi_end_io		= btree_node_read_endio;
-	bch2_bio_map(bio, b->data, btree_bytes(c));
+	bch2_bio_map(bio, b->data, btree_buf_bytes(b));
 
 	if (rb->have_ioref) {
 		this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree],
@@ -2074,8 +2074,8 @@ do_write:
 	i->u64s		= 0;
 
 	sort_iter_add(&sort_iter.iter,
-		      unwritten_whiteouts_start(c, b),
-		      unwritten_whiteouts_end(c, b));
+		      unwritten_whiteouts_start(b),
+		      unwritten_whiteouts_end(b));
 	SET_BSET_SEPARATE_WHITEOUTS(i, false);
 
 	b->whiteout_u64s = 0;
@@ -2251,7 +2251,7 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
 
 	bne = want_new_bset(c, b);
 	if (bne)
-		bch2_bset_init_next(c, b, bne);
+		bch2_bset_init_next(b, bne);
 
 	bch2_btree_build_aux_trees(b);
 
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index fa298289e016..5467a8635be1 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -1337,7 +1337,7 @@ void bch2_path_put(struct btree_trans *trans, btree_path_idx_t path_idx, bool in
 
 	if (path->should_be_locked &&
 	    !trans->restarted &&
-	    (!dup || !bch2_btree_path_relock_norestart(trans, dup, _THIS_IP_)))
+	    (!dup || !bch2_btree_path_relock_norestart(trans, dup)))
 		return;
 
 	if (dup) {
diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
index da2b74fa63fc..24772538e4cc 100644
--- a/fs/bcachefs/btree_iter.h
+++ b/fs/bcachefs/btree_iter.h
@@ -819,6 +819,11 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
 #define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret)	\
 	for_each_btree_key_upto_continue_norestart(_iter, SPOS_MAX, _flags, _k, _ret)
 
+/*
+ * This should not be used in a fastpath, without first trying _do in
+ * nonblocking mode - it will cause excessive transaction restarts and
+ * potentially livelocking:
+ */
 #define drop_locks_do(_trans, _do)					\
 ({									\
 	bch2_trans_unlock(_trans);					\
diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c
index 2d1c95c42f24..bed75c93c069 100644
--- a/fs/bcachefs/btree_locking.c
+++ b/fs/bcachefs/btree_locking.c
@@ -631,8 +631,7 @@ int bch2_btree_path_relock_intent(struct btree_trans *trans,
 }
 
 __flatten
-bool bch2_btree_path_relock_norestart(struct btree_trans *trans,
-			struct btree_path *path, unsigned long trace_ip)
+bool bch2_btree_path_relock_norestart(struct btree_trans *trans, struct btree_path *path)
 {
 	struct get_locks_fail f;
 
@@ -642,7 +641,7 @@ bool bch2_btree_path_relock_norestart(struct btree_trans *trans,
 int __bch2_btree_path_relock(struct btree_trans *trans,
 			struct btree_path *path, unsigned long trace_ip)
 {
-	if (!bch2_btree_path_relock_norestart(trans, path, trace_ip)) {
+	if (!bch2_btree_path_relock_norestart(trans, path)) {
 		trace_and_count(trans->c, trans_restart_relock_path, trans, trace_ip, path);
 		return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path);
 	}
@@ -759,12 +758,39 @@ int bch2_trans_relock(struct btree_trans *trans)
 	if (unlikely(trans->restarted))
 		return -((int) trans->restarted);
 
-	trans_for_each_path(trans, path, i)
+	trans_for_each_path(trans, path, i) {
+		struct get_locks_fail f;
+
 		if (path->should_be_locked &&
-		    !bch2_btree_path_relock_norestart(trans, path, _RET_IP_)) {
-			trace_and_count(trans->c, trans_restart_relock, trans, _RET_IP_, path);
+		    !btree_path_get_locks(trans, path, false, &f)) {
+			if (trace_trans_restart_relock_enabled()) {
+				struct printbuf buf = PRINTBUF;
+
+				bch2_bpos_to_text(&buf, path->pos);
+				prt_printf(&buf, " l=%u seq=%u node seq=",
+					   f.l, path->l[f.l].lock_seq);
+				if (IS_ERR_OR_NULL(f.b)) {
+					prt_str(&buf, bch2_err_str(PTR_ERR(f.b)));
+				} else {
+					prt_printf(&buf, "%u", f.b->c.lock.seq);
+
+					struct six_lock_count c =
+						bch2_btree_node_lock_counts(trans, NULL, &f.b->c, f.l);
+					prt_printf(&buf, " self locked %u.%u.%u", c.n[0], c.n[1], c.n[2]);
+
+					c = six_lock_counts(&f.b->c.lock);
+					prt_printf(&buf, " total locked %u.%u.%u", c.n[0], c.n[1], c.n[2]);
+				}
+
+				trace_trans_restart_relock(trans, _RET_IP_, buf.buf);
+				printbuf_exit(&buf);
+			}
+
+			count_event(trans->c, trans_restart_relock);
 			return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock);
 		}
+	}
+
 	return 0;
 }
 
@@ -778,7 +804,7 @@ int bch2_trans_relock_notrace(struct btree_trans *trans)
 
 	trans_for_each_path(trans, path, i)
 		if (path->should_be_locked &&
-		    !bch2_btree_path_relock_norestart(trans, path, _RET_IP_)) {
+		    !bch2_btree_path_relock_norestart(trans, path)) {
 			return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock);
 		}
 	return 0;
diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h
index cc5500a957a1..4bd72c855da1 100644
--- a/fs/bcachefs/btree_locking.h
+++ b/fs/bcachefs/btree_locking.h
@@ -312,8 +312,7 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *,
 
 /* relock: */
 
-bool bch2_btree_path_relock_norestart(struct btree_trans *,
-				      struct btree_path *, unsigned long);
+bool bch2_btree_path_relock_norestart(struct btree_trans *, struct btree_path *);
 int __bch2_btree_path_relock(struct btree_trans *,
 			     struct btree_path *, unsigned long);
 
@@ -353,12 +352,6 @@ static inline bool bch2_btree_node_relock_notrace(struct btree_trans *trans,
 
 /* upgrade */
 
-
-struct get_locks_fail {
-	unsigned	l;
-	struct btree	*b;
-};
-
 bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *,
 			       struct btree_path *, unsigned,
 			       struct get_locks_fail *);
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index 90eb8065ff2d..30d69a6d133e 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -139,8 +139,7 @@ bool bch2_btree_bset_insert_key(struct btree_trans *trans,
 	EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k));
 	EBUG_ON(bpos_lt(insert->k.p, b->data->min_key));
 	EBUG_ON(bpos_gt(insert->k.p, b->data->max_key));
-	EBUG_ON(insert->k.u64s >
-		bch_btree_keys_u64s_remaining(trans->c, b));
+	EBUG_ON(insert->k.u64s > bch2_btree_keys_u64s_remaining(b));
 	EBUG_ON(!b->c.level && !bpos_eq(insert->k.p, path->pos));
 
 	k = bch2_btree_node_iter_peek_all(node_iter, b);
@@ -160,7 +159,7 @@ bool bch2_btree_bset_insert_key(struct btree_trans *trans,
 		k->type = KEY_TYPE_deleted;
 
 		if (k->needs_whiteout)
-			push_whiteout(trans->c, b, insert->k.p);
+			push_whiteout(b, insert->k.p);
 		k->needs_whiteout = false;
 
 		if (k >= btree_bset_last(b)->start) {
@@ -348,9 +347,7 @@ static noinline void journal_transaction_name(struct btree_trans *trans)
 static inline int btree_key_can_insert(struct btree_trans *trans,
 				       struct btree *b, unsigned u64s)
 {
-	struct bch_fs *c = trans->c;
-
-	if (!bch2_btree_node_insert_fits(c, b, u64s))
+	if (!bch2_btree_node_insert_fits(b, u64s))
 		return -BCH_ERR_btree_insert_btree_node_full;
 
 	return 0;
@@ -418,7 +415,7 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags
 		return 0;
 
 	new_u64s	= roundup_pow_of_two(u64s);
-	new_k		= krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOWAIT);
+	new_k		= krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOWAIT|__GFP_NOWARN);
 	if (unlikely(!new_k))
 		return btree_key_can_insert_cached_slowpath(trans, flags, path, new_u64s);
 
@@ -448,9 +445,6 @@ static int run_one_mem_trigger(struct btree_trans *trans,
 	if (unlikely(flags & BTREE_TRIGGER_NORUN))
 		return 0;
 
-	if (!btree_node_type_needs_gc(__btree_node_type(i->level, i->btree_id)))
-		return 0;
-
 	if (old_ops->trigger == new_ops->trigger) {
 		ret   = bch2_key_trigger(trans, i->btree_id, i->level,
 				old, bkey_i_to_s(new),
@@ -586,9 +580,6 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
 
 static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans)
 {
-	struct bch_fs *c = trans->c;
-	int ret = 0;
-
 	trans_for_each_update(trans, i) {
 		/*
 		 * XXX: synchronization of cached update triggers with gc
@@ -596,14 +587,15 @@ static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans)
 		 */
 		BUG_ON(i->cached || i->level);
 
-		if (gc_visited(c, gc_pos_btree_node(insert_l(trans, i)->b))) {
-			ret = run_one_mem_trigger(trans, i, i->flags|BTREE_TRIGGER_GC);
+		if (btree_node_type_needs_gc(__btree_node_type(i->level, i->btree_id)) &&
+		    gc_visited(trans->c, gc_pos_btree_node(insert_l(trans, i)->b))) {
+			int ret = run_one_mem_trigger(trans, i, i->flags|BTREE_TRIGGER_GC);
 			if (ret)
-				break;
+				return ret;
 		}
 	}
 
-	return ret;
+	return 0;
 }
 
 static inline int
@@ -680,6 +672,9 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
 	    bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas))
 		return -BCH_ERR_btree_insert_need_mark_replicas;
 
+	/* XXX: we only want to run this if deltas are nonzero */
+	bch2_trans_account_disk_usage_change(trans);
+
 	h = trans->hooks;
 	while (h) {
 		ret = h->fn(trans, h);
@@ -689,8 +684,8 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
 	}
 
 	trans_for_each_update(trans, i)
-		if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type)) {
-			ret = run_one_mem_trigger(trans, i, i->flags);
+		if (BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS & (1U << i->bkey_type)) {
+			ret = run_one_mem_trigger(trans, i, BTREE_TRIGGER_ATOMIC|i->flags);
 			if (ret)
 				goto fatal_err;
 		}
@@ -994,6 +989,8 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
 	    !trans->journal_entries_u64s)
 		goto out_reset;
 
+	memset(&trans->fs_usage_delta, 0, sizeof(trans->fs_usage_delta));
+
 	ret = bch2_trans_commit_run_triggers(trans);
 	if (ret)
 		goto out_reset;
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index d530307046f4..4a5a64499eb7 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -430,6 +430,9 @@ struct btree_trans {
 	struct journal_res	journal_res;
 	u64			*journal_seq;
 	struct disk_reservation *disk_res;
+
+	struct bch_fs_usage_base fs_usage_delta;
+
 	unsigned		journal_u64s;
 	unsigned		extra_disk_res; /* XXX kill */
 	struct replicas_delta_list *fs_usage_deltas;
@@ -653,7 +656,7 @@ const char *bch2_btree_node_type_str(enum btree_node_type);
 	 BIT_ULL(BKEY_TYPE_reflink)|			\
 	 BIT_ULL(BKEY_TYPE_btree))
 
-#define BTREE_NODE_TYPE_HAS_MEM_TRIGGERS		\
+#define BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS		\
 	(BIT_ULL(BKEY_TYPE_alloc)|			\
 	 BIT_ULL(BKEY_TYPE_inodes)|			\
 	 BIT_ULL(BKEY_TYPE_stripes)|			\
@@ -661,7 +664,7 @@ const char *bch2_btree_node_type_str(enum btree_node_type);
 
 #define BTREE_NODE_TYPE_HAS_TRIGGERS			\
 	(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS|		\
-	 BTREE_NODE_TYPE_HAS_MEM_TRIGGERS)
+	 BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS)
 
 static inline bool btree_node_type_needs_gc(enum btree_node_type type)
 {
@@ -738,4 +741,9 @@ enum btree_node_sibling {
 	btree_next_sib,
 };
 
+struct get_locks_fail {
+	unsigned	l;
+	struct btree	*b;
+};
+
 #endif /* _BCACHEFS_BTREE_TYPES_H */
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 44f9dfa28a09..17a5938aa71a 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -159,7 +159,7 @@ static bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b,
 {
 	size_t u64s = btree_node_u64s_with_format(nr, &b->format, new_f);
 
-	return __vstruct_bytes(struct btree_node, u64s) < btree_bytes(c);
+	return __vstruct_bytes(struct btree_node, u64s) < btree_buf_bytes(b);
 }
 
 /* Btree node freeing/allocation: */
@@ -1097,7 +1097,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
 		 * Always check for space for two keys, even if we won't have to
 		 * split at prior level - it might have been a merge instead:
 		 */
-		if (bch2_btree_node_insert_fits(c, path->l[update_level].b,
+		if (bch2_btree_node_insert_fits(path->l[update_level].b,
 						BKEY_BTREE_PTR_U64s_MAX * 2))
 			break;
 
@@ -1401,7 +1401,7 @@ static void __btree_split_node(struct btree_update *as,
 
 		unsigned u64s = nr_keys[i].nr_keys * n[i]->data->format.key_u64s +
 			nr_keys[i].val_u64s;
-		if (__vstruct_bytes(struct btree_node, u64s) > btree_bytes(as->c))
+		if (__vstruct_bytes(struct btree_node, u64s) > btree_buf_bytes(b))
 			n[i]->data->format = b->format;
 
 		btree_node_set_format(n[i], n[i]->data->format);
@@ -1703,7 +1703,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
 
 	bch2_btree_node_prep_for_write(trans, path, b);
 
-	if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) {
+	if (!bch2_btree_node_insert_fits(b, bch2_keylist_u64s(keys))) {
 		bch2_btree_node_unlock_write(trans, path, b);
 		goto split;
 	}
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index adfc62083844..c593c925d1e3 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -184,21 +184,19 @@ static inline void btree_node_reset_sib_u64s(struct btree *b)
 	b->sib_u64s[1] = b->nr.live_u64s;
 }
 
-static inline void *btree_data_end(struct bch_fs *c, struct btree *b)
+static inline void *btree_data_end(struct btree *b)
 {
-	return (void *) b->data + btree_bytes(c);
+	return (void *) b->data + btree_buf_bytes(b);
 }
 
-static inline struct bkey_packed *unwritten_whiteouts_start(struct bch_fs *c,
-							    struct btree *b)
+static inline struct bkey_packed *unwritten_whiteouts_start(struct btree *b)
 {
-	return (void *) ((u64 *) btree_data_end(c, b) - b->whiteout_u64s);
+	return (void *) ((u64 *) btree_data_end(b) - b->whiteout_u64s);
 }
 
-static inline struct bkey_packed *unwritten_whiteouts_end(struct bch_fs *c,
-							  struct btree *b)
+static inline struct bkey_packed *unwritten_whiteouts_end(struct btree *b)
 {
-	return btree_data_end(c, b);
+	return btree_data_end(b);
 }
 
 static inline void *write_block(struct btree *b)
@@ -221,13 +219,11 @@ static inline bool bkey_written(struct btree *b, struct bkey_packed *k)
 	return __btree_addr_written(b, k);
 }
 
-static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c,
-						 struct btree *b,
-						 void *end)
+static inline ssize_t __bch2_btree_u64s_remaining(struct btree *b, void *end)
 {
 	ssize_t used = bset_byte_offset(b, end) / sizeof(u64) +
 		b->whiteout_u64s;
-	ssize_t total = c->opts.btree_node_size >> 3;
+	ssize_t total = btree_buf_bytes(b) >> 3;
 
 	/* Always leave one extra u64 for bch2_varint_decode: */
 	used++;
@@ -235,10 +231,9 @@ static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c,
 	return total - used;
 }
 
-static inline size_t bch_btree_keys_u64s_remaining(struct bch_fs *c,
-						   struct btree *b)
+static inline size_t bch2_btree_keys_u64s_remaining(struct btree *b)
 {
-	ssize_t remaining = __bch_btree_u64s_remaining(c, b,
+	ssize_t remaining = __bch2_btree_u64s_remaining(b,
 				btree_bkey_last(b, bset_tree_last(b)));
 
 	BUG_ON(remaining < 0);
@@ -260,14 +255,13 @@ static inline unsigned btree_write_set_buffer(struct btree *b)
 	return 8 << BTREE_WRITE_SET_U64s_BITS;
 }
 
-static inline struct btree_node_entry *want_new_bset(struct bch_fs *c,
-						     struct btree *b)
+static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, struct btree *b)
 {
 	struct bset_tree *t = bset_tree_last(b);
 	struct btree_node_entry *bne = max(write_block(b),
 			(void *) btree_bkey_last(b, bset_tree_last(b)));
 	ssize_t remaining_space =
-		__bch_btree_u64s_remaining(c, b, bne->keys.start);
+		__bch2_btree_u64s_remaining(b, bne->keys.start);
 
 	if (unlikely(bset_written(b, bset(b, t)))) {
 		if (remaining_space > (ssize_t) (block_bytes(c) >> 3))
@@ -281,12 +275,11 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c,
 	return NULL;
 }
 
-static inline void push_whiteout(struct bch_fs *c, struct btree *b,
-				 struct bpos pos)
+static inline void push_whiteout(struct btree *b, struct bpos pos)
 {
 	struct bkey_packed k;
 
-	BUG_ON(bch_btree_keys_u64s_remaining(c, b) < BKEY_U64s);
+	BUG_ON(bch2_btree_keys_u64s_remaining(b) < BKEY_U64s);
 	EBUG_ON(btree_node_just_written(b));
 
 	if (!bkey_pack_pos(&k, pos, b)) {
@@ -299,20 +292,19 @@ static inline void push_whiteout(struct bch_fs *c, struct btree *b,
 	k.needs_whiteout = true;
 
 	b->whiteout_u64s += k.u64s;
-	bkey_p_copy(unwritten_whiteouts_start(c, b), &k);
+	bkey_p_copy(unwritten_whiteouts_start(b), &k);
 }
 
 /*
  * write lock must be held on @b (else the dirty bset that we were going to
  * insert into could be written out from under us)
  */
-static inline bool bch2_btree_node_insert_fits(struct bch_fs *c,
-					       struct btree *b, unsigned u64s)
+static inline bool bch2_btree_node_insert_fits(struct btree *b, unsigned u64s)
 {
 	if (unlikely(btree_node_need_rewrite(b)))
 		return false;
 
-	return u64s <= bch_btree_keys_u64s_remaining(c, b);
+	return u64s <= bch2_btree_keys_u64s_remaining(b);
 }
 
 void bch2_btree_updates_to_text(struct printbuf *, struct bch_fs *);
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
index 5c1169c78daf..ac7844861966 100644
--- a/fs/bcachefs/btree_write_buffer.c
+++ b/fs/bcachefs/btree_write_buffer.c
@@ -125,13 +125,12 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite
 			       struct btree_write_buffered_key *wb,
 			       bool *write_locked, size_t *fast)
 {
-	struct bch_fs *c = trans->c;
 	struct btree_path *path;
 	int ret;
 
 	EBUG_ON(!wb->journal_seq);
-	EBUG_ON(!c->btree_write_buffer.flushing.pin.seq);
-	EBUG_ON(c->btree_write_buffer.flushing.pin.seq > wb->journal_seq);
+	EBUG_ON(!trans->c->btree_write_buffer.flushing.pin.seq);
+	EBUG_ON(trans->c->btree_write_buffer.flushing.pin.seq > wb->journal_seq);
 
 	ret = bch2_btree_iter_traverse(iter);
 	if (ret)
@@ -155,7 +154,7 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite
 		*write_locked = true;
 	}
 
-	if (unlikely(!bch2_btree_node_insert_fits(c, path->l[0].b, wb->k.k.u64s))) {
+	if (unlikely(!bch2_btree_node_insert_fits(path->l[0].b, wb->k.k.u64s))) {
 		*write_locked = false;
 		return wb_flush_one_slowpath(trans, iter, wb);
 	}
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index d83ea0e53df3..54f7826ac498 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -25,7 +25,7 @@
 
 #include <linux/preempt.h>
 
-static inline void fs_usage_data_type_to_base(struct bch_fs_usage *fs_usage,
+static inline void fs_usage_data_type_to_base(struct bch_fs_usage_base *fs_usage,
 					      enum bch_data_type data_type,
 					      s64 sectors)
 {
@@ -54,20 +54,20 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
 		bch2_fs_usage_acc_to_base(c, i);
 
 	for (unsigned i = 0; i < BCH_REPLICAS_MAX; i++)
-		usage->reserved += usage->persistent_reserved[i];
+		usage->b.reserved += usage->persistent_reserved[i];
 
 	for (unsigned i = 0; i < c->replicas.nr; i++) {
 		struct bch_replicas_entry_v1 *e =
 			cpu_replicas_entry(&c->replicas, i);
 
-		fs_usage_data_type_to_base(usage, e->data_type, usage->replicas[i]);
+		fs_usage_data_type_to_base(&usage->b, e->data_type, usage->replicas[i]);
 	}
 
 	for_each_member_device(c, ca) {
 		struct bch_dev_usage dev = bch2_dev_usage_read(ca);
 
-		usage->hidden += (dev.d[BCH_DATA_sb].buckets +
-				  dev.d[BCH_DATA_journal].buckets) *
+		usage->b.hidden += (dev.d[BCH_DATA_sb].buckets +
+				    dev.d[BCH_DATA_journal].buckets) *
 			ca->mi.bucket_size;
 	}
 
@@ -188,15 +188,15 @@ void bch2_fs_usage_to_text(struct printbuf *out,
 	prt_printf(out, "capacity:\t\t\t%llu\n", c->capacity);
 
 	prt_printf(out, "hidden:\t\t\t\t%llu\n",
-	       fs_usage->u.hidden);
+	       fs_usage->u.b.hidden);
 	prt_printf(out, "data:\t\t\t\t%llu\n",
-	       fs_usage->u.data);
+	       fs_usage->u.b.data);
 	prt_printf(out, "cached:\t\t\t\t%llu\n",
-	       fs_usage->u.cached);
+	       fs_usage->u.b.cached);
 	prt_printf(out, "reserved:\t\t\t%llu\n",
-	       fs_usage->u.reserved);
+	       fs_usage->u.b.reserved);
 	prt_printf(out, "nr_inodes:\t\t\t%llu\n",
-	       fs_usage->u.nr_inodes);
+	       fs_usage->u.b.nr_inodes);
 	prt_printf(out, "online reserved:\t\t%llu\n",
 	       fs_usage->online_reserved);
 
@@ -225,10 +225,10 @@ static u64 reserve_factor(u64 r)
 
 u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage_online *fs_usage)
 {
-	return min(fs_usage->u.hidden +
-		   fs_usage->u.btree +
-		   fs_usage->u.data +
-		   reserve_factor(fs_usage->u.reserved +
+	return min(fs_usage->u.b.hidden +
+		   fs_usage->u.b.btree +
+		   fs_usage->u.b.data +
+		   reserve_factor(fs_usage->u.b.reserved +
 				  fs_usage->online_reserved),
 		   c->capacity);
 }
@@ -240,17 +240,17 @@ __bch2_fs_usage_read_short(struct bch_fs *c)
 	u64 data, reserved;
 
 	ret.capacity = c->capacity -
-		bch2_fs_usage_read_one(c, &c->usage_base->hidden);
+		bch2_fs_usage_read_one(c, &c->usage_base->b.hidden);
 
-	data		= bch2_fs_usage_read_one(c, &c->usage_base->data) +
-		bch2_fs_usage_read_one(c, &c->usage_base->btree);
-	reserved	= bch2_fs_usage_read_one(c, &c->usage_base->reserved) +
+	data		= bch2_fs_usage_read_one(c, &c->usage_base->b.data) +
+		bch2_fs_usage_read_one(c, &c->usage_base->b.btree);
+	reserved	= bch2_fs_usage_read_one(c, &c->usage_base->b.reserved) +
 		percpu_u64_get(c->online_reserved);
 
 	ret.used	= min(ret.capacity, data + reserve_factor(reserved));
 	ret.free	= ret.capacity - ret.used;
 
-	ret.nr_inodes	= bch2_fs_usage_read_one(c, &c->usage_base->nr_inodes);
+	ret.nr_inodes	= bch2_fs_usage_read_one(c, &c->usage_base->b.nr_inodes);
 
 	return ret;
 }
@@ -284,7 +284,7 @@ void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev_usage *usage)
 	prt_newline(out);
 
 	for (unsigned i = 0; i < BCH_DATA_NR; i++) {
-		prt_str(out, bch2_data_types[i]);
+		bch2_prt_data_type(out, i);
 		prt_tab(out);
 		prt_u64(out, usage->d[i].buckets);
 		prt_tab_rjust(out);
@@ -308,9 +308,9 @@ void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
 	fs_usage = fs_usage_ptr(c, journal_seq, gc);
 
 	if (data_type_is_hidden(old->data_type))
-		fs_usage->hidden -= ca->mi.bucket_size;
+		fs_usage->b.hidden -= ca->mi.bucket_size;
 	if (data_type_is_hidden(new->data_type))
-		fs_usage->hidden += ca->mi.bucket_size;
+		fs_usage->b.hidden += ca->mi.bucket_size;
 
 	u = dev_usage_ptr(ca, journal_seq, gc);
 
@@ -359,7 +359,7 @@ static inline int __update_replicas(struct bch_fs *c,
 	if (idx < 0)
 		return -1;
 
-	fs_usage_data_type_to_base(fs_usage, r->data_type, sectors);
+	fs_usage_data_type_to_base(&fs_usage->b, r->data_type, sectors);
 	fs_usage->replicas[idx]		+= sectors;
 	return 0;
 }
@@ -394,7 +394,7 @@ int bch2_update_replicas(struct bch_fs *c, struct bkey_s_c k,
 
 	preempt_disable();
 	fs_usage = fs_usage_ptr(c, journal_seq, gc);
-	fs_usage_data_type_to_base(fs_usage, r->data_type, sectors);
+	fs_usage_data_type_to_base(&fs_usage->b, r->data_type, sectors);
 	fs_usage->replicas[idx]		+= sectors;
 	preempt_enable();
 err:
@@ -523,8 +523,8 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
 	if (bch2_fs_inconsistent_on(g->data_type &&
 			g->data_type != data_type, c,
 			"different types of data in same bucket: %s, %s",
-			bch2_data_types[g->data_type],
-			bch2_data_types[data_type])) {
+			bch2_data_type_str(g->data_type),
+			bch2_data_type_str(data_type))) {
 		ret = -EIO;
 		goto err;
 	}
@@ -532,7 +532,7 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
 	if (bch2_fs_inconsistent_on((u64) g->dirty_sectors + sectors > ca->mi.bucket_size, c,
 			"bucket %u:%zu gen %u data type %s sector count overflow: %u + %u > bucket size",
 			ca->dev_idx, b, g->gen,
-			bch2_data_types[g->data_type ?: data_type],
+			bch2_data_type_str(g->data_type ?: data_type),
 			g->dirty_sectors, sectors)) {
 		ret = -EIO;
 		goto err;
@@ -575,7 +575,7 @@ int bch2_check_bucket_ref(struct btree_trans *trans,
 			"bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n"
 			"while marking %s",
 			ptr->dev, bucket_nr, b_gen,
-			bch2_data_types[bucket_data_type ?: ptr_data_type],
+			bch2_data_type_str(bucket_data_type ?: ptr_data_type),
 			ptr->gen,
 			(bch2_bkey_val_to_text(&buf, c, k), buf.buf));
 		ret = -EIO;
@@ -588,7 +588,7 @@ int bch2_check_bucket_ref(struct btree_trans *trans,
 			"bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
 			"while marking %s",
 			ptr->dev, bucket_nr, b_gen,
-			bch2_data_types[bucket_data_type ?: ptr_data_type],
+			bch2_data_type_str(bucket_data_type ?: ptr_data_type),
 			ptr->gen,
 			(printbuf_reset(&buf),
 			 bch2_bkey_val_to_text(&buf, c, k), buf.buf));
@@ -603,7 +603,7 @@ int bch2_check_bucket_ref(struct btree_trans *trans,
 			"while marking %s",
 			ptr->dev, bucket_nr, b_gen,
 			*bucket_gen(ca, bucket_nr),
-			bch2_data_types[bucket_data_type ?: ptr_data_type],
+			bch2_data_type_str(bucket_data_type ?: ptr_data_type),
 			ptr->gen,
 			(printbuf_reset(&buf),
 			 bch2_bkey_val_to_text(&buf, c, k), buf.buf));
@@ -624,8 +624,8 @@ int bch2_check_bucket_ref(struct btree_trans *trans,
 			"bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
 			"while marking %s",
 			ptr->dev, bucket_nr, b_gen,
-			bch2_data_types[bucket_data_type],
-			bch2_data_types[ptr_data_type],
+			bch2_data_type_str(bucket_data_type),
+			bch2_data_type_str(ptr_data_type),
 			(printbuf_reset(&buf),
 			 bch2_bkey_val_to_text(&buf, c, k), buf.buf));
 		ret = -EIO;
@@ -638,7 +638,7 @@ int bch2_check_bucket_ref(struct btree_trans *trans,
 			"bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n"
 			"while marking %s",
 			ptr->dev, bucket_nr, b_gen,
-			bch2_data_types[bucket_data_type ?: ptr_data_type],
+			bch2_data_type_str(bucket_data_type ?: ptr_data_type),
 			bucket_sectors, sectors,
 			(printbuf_reset(&buf),
 			 bch2_bkey_val_to_text(&buf, c, k), buf.buf));
@@ -677,11 +677,11 @@ void bch2_trans_fs_usage_revert(struct btree_trans *trans,
 		BUG_ON(__update_replicas(c, dst, &d->r, -d->delta));
 	}
 
-	dst->nr_inodes -= deltas->nr_inodes;
+	dst->b.nr_inodes -= deltas->nr_inodes;
 
 	for (i = 0; i < BCH_REPLICAS_MAX; i++) {
 		added				-= deltas->persistent_reserved[i];
-		dst->reserved			-= deltas->persistent_reserved[i];
+		dst->b.reserved			-= deltas->persistent_reserved[i];
 		dst->persistent_reserved[i]	-= deltas->persistent_reserved[i];
 	}
 
@@ -694,48 +694,25 @@ void bch2_trans_fs_usage_revert(struct btree_trans *trans,
 	percpu_up_read(&c->mark_lock);
 }
 
-int bch2_trans_fs_usage_apply(struct btree_trans *trans,
-			      struct replicas_delta_list *deltas)
+void bch2_trans_account_disk_usage_change(struct btree_trans *trans)
 {
 	struct bch_fs *c = trans->c;
+	u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
 	static int warned_disk_usage = 0;
 	bool warn = false;
-	u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
-	struct replicas_delta *d, *d2;
-	struct replicas_delta *top = (void *) deltas->d + deltas->used;
-	struct bch_fs_usage *dst;
-	s64 added = 0, should_not_have_added;
-	unsigned i;
 
 	percpu_down_read(&c->mark_lock);
 	preempt_disable();
-	dst = fs_usage_ptr(c, trans->journal_res.seq, false);
-
-	for (d = deltas->d; d != top; d = replicas_delta_next(d)) {
-		switch (d->r.data_type) {
-		case BCH_DATA_btree:
-		case BCH_DATA_user:
-		case BCH_DATA_parity:
-			added += d->delta;
-		}
+	struct bch_fs_usage_base *dst = &fs_usage_ptr(c, trans->journal_res.seq, false)->b;
+	struct bch_fs_usage_base *src = &trans->fs_usage_delta;
 
-		if (__update_replicas(c, dst, &d->r, d->delta))
-			goto need_mark;
-	}
-
-	dst->nr_inodes += deltas->nr_inodes;
-
-	for (i = 0; i < BCH_REPLICAS_MAX; i++) {
-		added				+= deltas->persistent_reserved[i];
-		dst->reserved			+= deltas->persistent_reserved[i];
-		dst->persistent_reserved[i]	+= deltas->persistent_reserved[i];
-	}
+	s64 added = src->btree + src->data + src->reserved;
 
 	/*
 	 * Not allowed to reduce sectors_available except by getting a
 	 * reservation:
 	 */
-	should_not_have_added = added - (s64) disk_res_sectors;
+	s64 should_not_have_added = added - (s64) disk_res_sectors;
 	if (unlikely(should_not_have_added > 0)) {
 		u64 old, new, v = atomic64_read(&c->sectors_available);
 
@@ -754,6 +731,13 @@ int bch2_trans_fs_usage_apply(struct btree_trans *trans,
 		this_cpu_sub(*c->online_reserved, added);
 	}
 
+	dst->hidden	+= src->hidden;
+	dst->btree	+= src->btree;
+	dst->data	+= src->data;
+	dst->cached	+= src->cached;
+	dst->reserved	+= src->reserved;
+	dst->nr_inodes	+= src->nr_inodes;
+
 	preempt_enable();
 	percpu_up_read(&c->mark_lock);
 
@@ -761,6 +745,34 @@ int bch2_trans_fs_usage_apply(struct btree_trans *trans,
 		bch2_trans_inconsistent(trans,
 					"disk usage increased %lli more than %llu sectors reserved)",
 					should_not_have_added, disk_res_sectors);
+}
+
+int bch2_trans_fs_usage_apply(struct btree_trans *trans,
+			      struct replicas_delta_list *deltas)
+{
+	struct bch_fs *c = trans->c;
+	struct replicas_delta *d, *d2;
+	struct replicas_delta *top = (void *) deltas->d + deltas->used;
+	struct bch_fs_usage *dst;
+	unsigned i;
+
+	percpu_down_read(&c->mark_lock);
+	preempt_disable();
+	dst = fs_usage_ptr(c, trans->journal_res.seq, false);
+
+	for (d = deltas->d; d != top; d = replicas_delta_next(d))
+		if (__update_replicas(c, dst, &d->r, d->delta))
+			goto need_mark;
+
+	dst->b.nr_inodes += deltas->nr_inodes;
+
+	for (i = 0; i < BCH_REPLICAS_MAX; i++) {
+		dst->b.reserved			+= deltas->persistent_reserved[i];
+		dst->persistent_reserved[i]	+= deltas->persistent_reserved[i];
+	}
+
+	preempt_enable();
+	percpu_up_read(&c->mark_lock);
 	return 0;
 need_mark:
 	/* revert changes: */
@@ -1084,7 +1096,7 @@ static int __trigger_reservation(struct btree_trans *trans,
 		struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage_gc);
 
 		replicas = min(replicas, ARRAY_SIZE(fs_usage->persistent_reserved));
-		fs_usage->reserved				+= sectors;
+		fs_usage->b.reserved				+= sectors;
 		fs_usage->persistent_reserved[replicas - 1]	+= sectors;
 
 		preempt_enable();
@@ -1130,9 +1142,9 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
 			"bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
 			"while marking %s",
 			iter.pos.inode, iter.pos.offset, a->v.gen,
-			bch2_data_types[a->v.data_type],
-			bch2_data_types[type],
-			bch2_data_types[type]);
+			bch2_data_type_str(a->v.data_type),
+			bch2_data_type_str(type),
+			bch2_data_type_str(type));
 		ret = -EIO;
 		goto err;
 	}
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index 2c95cc5d86be..6387e039f789 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -356,6 +356,8 @@ int bch2_trigger_reservation(struct btree_trans *, enum btree_id, unsigned,
 	ret;											\
 })
 
+void bch2_trans_account_disk_usage_change(struct btree_trans *);
+
 void bch2_trans_fs_usage_revert(struct btree_trans *, struct replicas_delta_list *);
 int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *);
 
@@ -385,6 +387,21 @@ static inline bool is_superblock_bucket(struct bch_dev *ca, u64 b)
 	return false;
 }
 
+static inline const char *bch2_data_type_str(enum bch_data_type type)
+{
+	return type < BCH_DATA_NR
+		? __bch2_data_types[type]
+		: "(invalid data type)";
+}
+
+static inline void bch2_prt_data_type(struct printbuf *out, enum bch_data_type type)
+{
+	if (type < BCH_DATA_NR)
+		prt_str(out, __bch2_data_types[type]);
+	else
+		prt_printf(out, "(invalid data type %u)", type);
+}
+
 /* disk reservations: */
 
 static inline void bch2_disk_reservation_put(struct bch_fs *c,
diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h
index 783f71017204..6a31740222a7 100644
--- a/fs/bcachefs/buckets_types.h
+++ b/fs/bcachefs/buckets_types.h
@@ -45,23 +45,18 @@ struct bch_dev_usage {
 	}			d[BCH_DATA_NR];
 };
 
-struct bch_fs_usage {
-	/* all fields are in units of 512 byte sectors: */
+struct bch_fs_usage_base {
 	u64			hidden;
 	u64			btree;
 	u64			data;
 	u64			cached;
 	u64			reserved;
 	u64			nr_inodes;
+};
 
-	/* XXX: add stats for compression ratio */
-#if 0
-	u64			uncompressed;
-	u64			compressed;
-#endif
-
-	/* broken out: */
-
+struct bch_fs_usage {
+	/* all fields are in units of 512 byte sectors: */
+	struct bch_fs_usage_base b;
 	u64			persistent_reserved[BCH_REPLICAS_MAX];
 	u64			replicas[];
 };
diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c
index f41889093a2c..363644451106 100644
--- a/fs/bcachefs/clock.c
+++ b/fs/bcachefs/clock.c
@@ -109,7 +109,7 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock,
 	if (cpu_timeout != MAX_SCHEDULE_TIMEOUT)
 		mod_timer(&wait.cpu_timer, cpu_timeout + jiffies);
 
-	while (1) {
+	do {
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (kthread && kthread_should_stop())
 			break;
@@ -119,7 +119,7 @@ void bch2_kthread_io_clock_wait(struct io_clock *clock,
 
 		schedule();
 		try_to_freeze();
-	}
+	} while (0);
 
 	__set_current_state(TASK_RUNNING);
 	del_timer_sync(&wait.cpu_timer);
diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h
index 607fd5e232c9..58c2eb45570f 100644
--- a/fs/bcachefs/compress.h
+++ b/fs/bcachefs/compress.h
@@ -47,6 +47,14 @@ static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v)
 	return __bch2_compression_opt_to_type[bch2_compression_decode(v).type];
 }
 
+static inline void bch2_prt_compression_type(struct printbuf *out, enum bch_compression_type type)
+{
+	if (type < BCH_COMPRESSION_TYPE_NR)
+		prt_str(out, __bch2_compression_types[type]);
+	else
+		prt_printf(out, "(invalid compression type %u)", type);
+}
+
 int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *,
 				struct bch_extent_crc_unpacked *);
 int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *,
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 6f13477ff652..4150feca42a2 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -285,9 +285,7 @@ restart_drop_extra_replicas:
 						k.k->p, bkey_start_pos(&insert->k)) ?:
 			bch2_insert_snapshot_whiteouts(trans, m->btree_id,
 						k.k->p, insert->k.p) ?:
-			bch2_bkey_set_needs_rebalance(c, insert,
-						      op->opts.background_target,
-						      op->opts.background_compression) ?:
+			bch2_bkey_set_needs_rebalance(c, insert, &op->opts) ?:
 			bch2_trans_update(trans, &iter, insert,
 				BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
 			bch2_trans_commit(trans, &op->res,
@@ -529,7 +527,7 @@ int bch2_data_update_init(struct btree_trans *trans,
 		BCH_WRITE_DATA_ENCODED|
 		BCH_WRITE_MOVE|
 		m->data_opts.write_flags;
-	m->op.compression_opt	= io_opts.background_compression ?: io_opts.compression;
+	m->op.compression_opt	= background_compression(io_opts);
 	m->op.watermark		= m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK;
 
 	bkey_for_each_ptr(ptrs, ptr)
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
index d6418948495f..cadda9bbe4a4 100644
--- a/fs/bcachefs/debug.c
+++ b/fs/bcachefs/debug.c
@@ -44,19 +44,19 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b,
 		return false;
 
 	bio = bio_alloc_bioset(ca->disk_sb.bdev,
-			       buf_pages(n_sorted, btree_bytes(c)),
+			       buf_pages(n_sorted, btree_buf_bytes(b)),
 			       REQ_OP_READ|REQ_META,
 			       GFP_NOFS,
 			       &c->btree_bio);
 	bio->bi_iter.bi_sector	= pick.ptr.offset;
-	bch2_bio_map(bio, n_sorted, btree_bytes(c));
+	bch2_bio_map(bio, n_sorted, btree_buf_bytes(b));
 
 	submit_bio_wait(bio);
 
 	bio_put(bio);
 	percpu_ref_put(&ca->io_ref);
 
-	memcpy(n_ondisk, n_sorted, btree_bytes(c));
+	memcpy(n_ondisk, n_sorted, btree_buf_bytes(b));
 
 	v->written = 0;
 	if (bch2_btree_node_read_done(c, ca, v, false, &saw_error) || saw_error)
@@ -137,7 +137,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
 	mutex_lock(&c->verify_lock);
 
 	if (!c->verify_ondisk) {
-		c->verify_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL);
+		c->verify_ondisk = kvpmalloc(btree_buf_bytes(b), GFP_KERNEL);
 		if (!c->verify_ondisk)
 			goto out;
 	}
@@ -199,19 +199,19 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c,
 		return;
 	}
 
-	n_ondisk = kvpmalloc(btree_bytes(c), GFP_KERNEL);
+	n_ondisk = kvpmalloc(btree_buf_bytes(b), GFP_KERNEL);
 	if (!n_ondisk) {
 		prt_printf(out, "memory allocation failure\n");
 		goto out;
 	}
 
 	bio = bio_alloc_bioset(ca->disk_sb.bdev,
-			       buf_pages(n_ondisk, btree_bytes(c)),
+			       buf_pages(n_ondisk, btree_buf_bytes(b)),
 			       REQ_OP_READ|REQ_META,
 			       GFP_NOFS,
 			       &c->btree_bio);
 	bio->bi_iter.bi_sector	= pick.ptr.offset;
-	bch2_bio_map(bio, n_ondisk, btree_bytes(c));
+	bch2_bio_map(bio, n_ondisk, btree_buf_bytes(b));
 
 	ret = submit_bio_wait(bio);
 	if (ret) {
@@ -293,7 +293,7 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c,
 out:
 	if (bio)
 		bio_put(bio);
-	kvpfree(n_ondisk, btree_bytes(c));
+	kvpfree(n_ondisk, btree_buf_bytes(b));
 	percpu_ref_put(&ca->io_ref);
 }
 
diff --git a/fs/bcachefs/dirent_format.h b/fs/bcachefs/dirent_format.h
new file mode 100644
index 000000000000..5e116b88e814
--- /dev/null
+++ b/fs/bcachefs/dirent_format.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_DIRENT_FORMAT_H
+#define _BCACHEFS_DIRENT_FORMAT_H
+
+/*
+ * Dirents (and xattrs) have to implement string lookups; since our b-tree
+ * doesn't support arbitrary length strings for the key, we instead index by a
+ * 64 bit hash (currently truncated sha1) of the string, stored in the offset
+ * field of the key - using linear probing to resolve hash collisions. This also
+ * provides us with the readdir cookie posix requires.
+ *
+ * Linear probing requires us to use whiteouts for deletions, in the event of a
+ * collision:
+ */
+
+struct bch_dirent {
+	struct bch_val		v;
+
+	/* Target inode number: */
+	union {
+	__le64			d_inum;
+	struct {		/* DT_SUBVOL */
+	__le32			d_child_subvol;
+	__le32			d_parent_subvol;
+	};
+	};
+
+	/*
+	 * Copy of mode bits 12-15 from the target inode - so userspace can get
+	 * the filetype without having to do a stat()
+	 */
+	__u8			d_type;
+
+	__u8			d_name[];
+} __packed __aligned(8);
+
+#define DT_SUBVOL	16
+#define BCH_DT_MAX	17
+
+#define BCH_NAME_MAX	512
+
+#endif /* _BCACHEFS_DIRENT_FORMAT_H */
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index d802bc63c8d0..d503af270024 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -190,7 +190,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
 					       a->v.stripe_redundancy, trans,
 				"bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)",
 				iter.pos.inode, iter.pos.offset, a->v.gen,
-				bch2_data_types[a->v.data_type],
+				bch2_data_type_str(a->v.data_type),
 				a->v.dirty_sectors,
 				a->v.stripe, s.k->p.offset)) {
 			ret = -EIO;
@@ -200,7 +200,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
 		if (bch2_trans_inconsistent_on(data_type && a->v.dirty_sectors, trans,
 				"bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu",
 				iter.pos.inode, iter.pos.offset, a->v.gen,
-				bch2_data_types[a->v.data_type],
+				bch2_data_type_str(a->v.data_type),
 				a->v.dirty_sectors,
 				s.k->p.offset)) {
 			ret = -EIO;
@@ -367,7 +367,7 @@ int bch2_trigger_stripe(struct btree_trans *trans,
 		}
 	}
 
-	if (!(flags & (BTREE_TRIGGER_TRANSACTIONAL|BTREE_TRIGGER_GC))) {
+	if (flags & BTREE_TRIGGER_ATOMIC) {
 		struct stripe *m = genradix_ptr(&c->stripes, idx);
 
 		if (!m) {
diff --git a/fs/bcachefs/ec_format.h b/fs/bcachefs/ec_format.h
new file mode 100644
index 000000000000..44ce88ba08d7
--- /dev/null
+++ b/fs/bcachefs/ec_format.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_EC_FORMAT_H
+#define _BCACHEFS_EC_FORMAT_H
+
+struct bch_stripe {
+	struct bch_val		v;
+	__le16			sectors;
+	__u8			algorithm;
+	__u8			nr_blocks;
+	__u8			nr_redundant;
+
+	__u8			csum_granularity_bits;
+	__u8			csum_type;
+	__u8			pad;
+
+	struct bch_extent_ptr	ptrs[];
+} __packed __aligned(8);
+
+#endif /* _BCACHEFS_EC_FORMAT_H */
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 82ec056f4cdb..61395b113df9 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -8,6 +8,7 @@
 
 #include "bcachefs.h"
 #include "bkey_methods.h"
+#include "btree_cache.h"
 #include "btree_gc.h"
 #include "btree_io.h"
 #include "btree_iter.h"
@@ -1018,12 +1019,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
 			struct bch_extent_crc_unpacked crc =
 				bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
 
-			prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress %s",
+			prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress ",
 			       crc.compressed_size,
 			       crc.uncompressed_size,
 			       crc.offset, crc.nonce,
-			       bch2_csum_types[crc.csum_type],
-			       bch2_compression_types[crc.compression_type]);
+			       bch2_csum_types[crc.csum_type]);
+			bch2_prt_compression_type(out, crc.compression_type);
 			break;
 		}
 		case BCH_EXTENT_ENTRY_stripe_ptr: {
@@ -1334,10 +1335,12 @@ bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k)
 }
 
 int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k,
-				  unsigned target, unsigned compression)
+				  struct bch_io_opts *opts)
 {
 	struct bkey_s k = bkey_i_to_s(_k);
 	struct bch_extent_rebalance *r;
+	unsigned target = opts->background_target;
+	unsigned compression = background_compression(*opts);
 	bool needs_rebalance;
 
 	if (!bkey_extent_is_direct_data(k.k))
diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
index a855c94d43dd..6bf839d69e84 100644
--- a/fs/bcachefs/extents.h
+++ b/fs/bcachefs/extents.h
@@ -708,7 +708,7 @@ unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bkey_s_c,
 bool bch2_bkey_needs_rebalance(struct bch_fs *, struct bkey_s_c);
 
 int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bkey_i *,
-				  unsigned, unsigned);
+				  struct bch_io_opts *);
 
 /* Generic extent code: */
 
diff --git a/fs/bcachefs/extents_format.h b/fs/bcachefs/extents_format.h
new file mode 100644
index 000000000000..3bd2fdbb0817
--- /dev/null
+++ b/fs/bcachefs/extents_format.h
@@ -0,0 +1,295 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_EXTENTS_FORMAT_H
+#define _BCACHEFS_EXTENTS_FORMAT_H
+
+/*
+ * In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally
+ * preceded by checksum/compression information (bch_extent_crc32 or
+ * bch_extent_crc64).
+ *
+ * One major determining factor in the format of extents is how we handle and
+ * represent extents that have been partially overwritten and thus trimmed:
+ *
+ * If an extent is not checksummed or compressed, when the extent is trimmed we
+ * don't have to remember the extent we originally allocated and wrote: we can
+ * merely adjust ptr->offset to point to the start of the data that is currently
+ * live. The size field in struct bkey records the current (live) size of the
+ * extent, and is also used to mean "size of region on disk that we point to" in
+ * this case.
+ *
+ * Thus an extent that is not checksummed or compressed will consist only of a
+ * list of bch_extent_ptrs, with none of the fields in
+ * bch_extent_crc32/bch_extent_crc64.
+ *
+ * When an extent is checksummed or compressed, it's not possible to read only
+ * the data that is currently live: we have to read the entire extent that was
+ * originally written, and then return only the part of the extent that is
+ * currently live.
+ *
+ * Thus, in addition to the current size of the extent in struct bkey, we need
+ * to store the size of the originally allocated space - this is the
+ * compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also,
+ * when the extent is trimmed, instead of modifying the offset field of the
+ * pointer, we keep a second smaller offset field - "offset into the original
+ * extent of the currently live region".
+ *
+ * The other major determining factor is replication and data migration:
+ *
+ * Each pointer may have its own bch_extent_crc32/64. When doing a replicated
+ * write, we will initially write all the replicas in the same format, with the
+ * same checksum type and compression format - however, when copygc runs later (or
+ * tiering/cache promotion, anything that moves data), it is not in general
+ * going to rewrite all the pointers at once - one of the replicas may be in a
+ * bucket on one device that has very little fragmentation while another lives
+ * in a bucket that has become heavily fragmented, and thus is being rewritten
+ * sooner than the rest.
+ *
+ * Thus it will only move a subset of the pointers (or in the case of
+ * tiering/cache promotion perhaps add a single pointer without dropping any
+ * current pointers), and if the extent has been partially overwritten it must
+ * write only the currently live portion (or copygc would not be able to reduce
+ * fragmentation!) - which necessitates a different bch_extent_crc format for
+ * the new pointer.
+ *
+ * But in the interests of space efficiency, we don't want to store one
+ * bch_extent_crc for each pointer if we don't have to.
+ *
+ * Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and
+ * bch_extent_ptrs appended arbitrarily one after the other. We determine the
+ * type of a given entry with a scheme similar to utf8 (except we're encoding a
+ * type, not a size), encoding the type in the position of the first set bit:
+ *
+ * bch_extent_crc32	- 0b1
+ * bch_extent_ptr	- 0b10
+ * bch_extent_crc64	- 0b100
+ *
+ * We do it this way because bch_extent_crc32 is _very_ constrained on bits (and
+ * bch_extent_crc64 is the least constrained).
+ *
+ * Then, each bch_extent_crc32/64 applies to the pointers that follow after it,
+ * until the next bch_extent_crc32/64.
+ *
+ * If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer
+ * is neither checksummed nor compressed.
+ */
+
+#define BCH_EXTENT_ENTRY_TYPES()		\
+	x(ptr,			0)		\
+	x(crc32,		1)		\
+	x(crc64,		2)		\
+	x(crc128,		3)		\
+	x(stripe_ptr,		4)		\
+	x(rebalance,		5)
+#define BCH_EXTENT_ENTRY_MAX	6
+
+enum bch_extent_entry_type {
+#define x(f, n) BCH_EXTENT_ENTRY_##f = n,
+	BCH_EXTENT_ENTRY_TYPES()
+#undef x
+};
+
+/* Compressed/uncompressed size are stored biased by 1: */
+struct bch_extent_crc32 {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	__u32			type:2,
+				_compressed_size:7,
+				_uncompressed_size:7,
+				offset:7,
+				_unused:1,
+				csum_type:4,
+				compression_type:4;
+	__u32			csum;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+	__u32			csum;
+	__u32			compression_type:4,
+				csum_type:4,
+				_unused:1,
+				offset:7,
+				_uncompressed_size:7,
+				_compressed_size:7,
+				type:2;
+#endif
+} __packed __aligned(8);
+
+#define CRC32_SIZE_MAX		(1U << 7)
+#define CRC32_NONCE_MAX		0
+
+struct bch_extent_crc64 {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	__u64			type:3,
+				_compressed_size:9,
+				_uncompressed_size:9,
+				offset:9,
+				nonce:10,
+				csum_type:4,
+				compression_type:4,
+				csum_hi:16;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+	__u64			csum_hi:16,
+				compression_type:4,
+				csum_type:4,
+				nonce:10,
+				offset:9,
+				_uncompressed_size:9,
+				_compressed_size:9,
+				type:3;
+#endif
+	__u64			csum_lo;
+} __packed __aligned(8);
+
+#define CRC64_SIZE_MAX		(1U << 9)
+#define CRC64_NONCE_MAX		((1U << 10) - 1)
+
+struct bch_extent_crc128 {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	__u64			type:4,
+				_compressed_size:13,
+				_uncompressed_size:13,
+				offset:13,
+				nonce:13,
+				csum_type:4,
+				compression_type:4;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+	__u64			compression_type:4,
+				csum_type:4,
+				nonce:13,
+				offset:13,
+				_uncompressed_size:13,
+				_compressed_size:13,
+				type:4;
+#endif
+	struct bch_csum		csum;
+} __packed __aligned(8);
+
+#define CRC128_SIZE_MAX		(1U << 13)
+#define CRC128_NONCE_MAX	((1U << 13) - 1)
+
+/*
+ * @reservation - pointer hasn't been written to, just reserved
+ */
+struct bch_extent_ptr {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	__u64			type:1,
+				cached:1,
+				unused:1,
+				unwritten:1,
+				offset:44, /* 8 petabytes */
+				dev:8,
+				gen:8;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+	__u64			gen:8,
+				dev:8,
+				offset:44,
+				unwritten:1,
+				unused:1,
+				cached:1,
+				type:1;
+#endif
+} __packed __aligned(8);
+
+struct bch_extent_stripe_ptr {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	__u64			type:5,
+				block:8,
+				redundancy:4,
+				idx:47;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+	__u64			idx:47,
+				redundancy:4,
+				block:8,
+				type:5;
+#endif
+};
+
+struct bch_extent_rebalance {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	__u64			type:6,
+				unused:34,
+				compression:8, /* enum bch_compression_opt */
+				target:16;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+	__u64			target:16,
+				compression:8,
+				unused:34,
+				type:6;
+#endif
+};
+
+union bch_extent_entry {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ||  __BITS_PER_LONG == 64
+	unsigned long			type;
+#elif __BITS_PER_LONG == 32
+	struct {
+		unsigned long		pad;
+		unsigned long		type;
+	};
+#else
+#error edit for your odd byteorder.
+#endif
+
+#define x(f, n) struct bch_extent_##f	f;
+	BCH_EXTENT_ENTRY_TYPES()
+#undef x
+};
+
+struct bch_btree_ptr {
+	struct bch_val		v;
+
+	__u64			_data[0];
+	struct bch_extent_ptr	start[];
+} __packed __aligned(8);
+
+struct bch_btree_ptr_v2 {
+	struct bch_val		v;
+
+	__u64			mem_ptr;
+	__le64			seq;
+	__le16			sectors_written;
+	__le16			flags;
+	struct bpos		min_key;
+	__u64			_data[0];
+	struct bch_extent_ptr	start[];
+} __packed __aligned(8);
+
+LE16_BITMASK(BTREE_PTR_RANGE_UPDATED,	struct bch_btree_ptr_v2, flags, 0, 1);
+
+struct bch_extent {
+	struct bch_val		v;
+
+	__u64			_data[0];
+	union bch_extent_entry	start[];
+} __packed __aligned(8);
+
+/* Maximum size (in u64s) a single pointer could be: */
+#define BKEY_EXTENT_PTR_U64s_MAX\
+	((sizeof(struct bch_extent_crc128) +			\
+	  sizeof(struct bch_extent_ptr)) / sizeof(__u64))
+
+/* Maximum possible size of an entire extent value: */
+#define BKEY_EXTENT_VAL_U64s_MAX				\
+	(1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
+
+/* * Maximum possible size of an entire extent, key + value: */
+#define BKEY_EXTENT_U64s_MAX		(BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
+
+/* Btree pointers don't carry around checksums: */
+#define BKEY_BTREE_PTR_VAL_U64s_MAX				\
+	((sizeof(struct bch_btree_ptr_v2) +			\
+	  sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64))
+#define BKEY_BTREE_PTR_U64s_MAX					\
+	(BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
+
+struct bch_reservation {
+	struct bch_val		v;
+
+	__le32			generation;
+	__u8			nr_replicas;
+	__u8			pad[3];
+} __packed __aligned(8);
+
+struct bch_inline_data {
+	struct bch_val		v;
+	u8			data[];
+};
+
+#endif /* _BCACHEFS_EXTENTS_FORMAT_H */
diff --git a/fs/bcachefs/eytzinger.h b/fs/bcachefs/eytzinger.h
index 9637f636e32d..b04750dbf870 100644
--- a/fs/bcachefs/eytzinger.h
+++ b/fs/bcachefs/eytzinger.h
@@ -156,7 +156,7 @@ static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size)
 }
 
 #define eytzinger1_for_each(_i, _size)			\
-	for ((_i) = eytzinger1_first((_size));		\
+	for (unsigned (_i) = eytzinger1_first((_size));	\
 	     (_i) != 0;					\
 	     (_i) = eytzinger1_next((_i), (_size)))
 
@@ -227,7 +227,7 @@ static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
 }
 
 #define eytzinger0_for_each(_i, _size)			\
-	for ((_i) = eytzinger0_first((_size));		\
+	for (unsigned (_i) = eytzinger0_first((_size));	\
 	     (_i) != -1;				\
 	     (_i) = eytzinger0_next((_i), (_size)))
 
diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c
index fdd57c5785c9..e3b219e19e10 100644
--- a/fs/bcachefs/fs-io-direct.c
+++ b/fs/bcachefs/fs-io-direct.c
@@ -77,6 +77,10 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
 
 	bch2_inode_opts_get(&opts, c, &inode->ei_inode);
 
+	/* bios must be 512 byte aligned: */
+	if ((offset|iter->count) & (SECTOR_SIZE - 1))
+		return -EINVAL;
+
 	ret = min_t(loff_t, iter->count,
 		    max_t(loff_t, 0, i_size_read(&inode->v) - offset));
 
diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c
index ff664fd0d8ef..d359aa9b33b8 100644
--- a/fs/bcachefs/fs-io-pagecache.c
+++ b/fs/bcachefs/fs-io-pagecache.c
@@ -309,39 +309,49 @@ void bch2_mark_pagecache_unallocated(struct bch_inode_info *inode,
 	}
 }
 
-void bch2_mark_pagecache_reserved(struct bch_inode_info *inode,
-				  u64 start, u64 end)
+int bch2_mark_pagecache_reserved(struct bch_inode_info *inode,
+				 u64 *start, u64 end,
+				 bool nonblocking)
 {
 	struct bch_fs *c = inode->v.i_sb->s_fs_info;
-	pgoff_t index = start >> PAGE_SECTORS_SHIFT;
+	pgoff_t index = *start >> PAGE_SECTORS_SHIFT;
 	pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
 	struct folio_batch fbatch;
 	s64 i_sectors_delta = 0;
-	unsigned i, j;
+	int ret = 0;
 
-	if (end <= start)
-		return;
+	if (end <= *start)
+		return 0;
 
 	folio_batch_init(&fbatch);
 
 	while (filemap_get_folios(inode->v.i_mapping,
 				  &index, end_index, &fbatch)) {
-		for (i = 0; i < folio_batch_count(&fbatch); i++) {
+		for (unsigned i = 0; i < folio_batch_count(&fbatch); i++) {
 			struct folio *folio = fbatch.folios[i];
+
+			if (!nonblocking)
+				folio_lock(folio);
+			else if (!folio_trylock(folio)) {
+				folio_batch_release(&fbatch);
+				ret = -EAGAIN;
+				break;
+			}
+
 			u64 folio_start = folio_sector(folio);
 			u64 folio_end = folio_end_sector(folio);
-			unsigned folio_offset = max(start, folio_start) - folio_start;
-			unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
-			struct bch_folio *s;
 
 			BUG_ON(end <= folio_start);
 
-			folio_lock(folio);
-			s = bch2_folio(folio);
+			*start = min(end, folio_end);
 
+			struct bch_folio *s = bch2_folio(folio);
 			if (s) {
+				unsigned folio_offset = max(*start, folio_start) - folio_start;
+				unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
+
 				spin_lock(&s->lock);
-				for (j = folio_offset; j < folio_offset + folio_len; j++) {
+				for (unsigned j = folio_offset; j < folio_offset + folio_len; j++) {
 					i_sectors_delta -= s->s[j].state == SECTOR_dirty;
 					bch2_folio_sector_set(folio, s, j,
 						folio_sector_reserve(s->s[j].state));
@@ -356,6 +366,7 @@ void bch2_mark_pagecache_reserved(struct bch_inode_info *inode,
 	}
 
 	bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
+	return ret;
 }
 
 static inline unsigned sectors_to_reserve(struct bch_folio_sector *s,
diff --git a/fs/bcachefs/fs-io-pagecache.h b/fs/bcachefs/fs-io-pagecache.h
index 27f712ae37a6..8cbaba6565b4 100644
--- a/fs/bcachefs/fs-io-pagecache.h
+++ b/fs/bcachefs/fs-io-pagecache.h
@@ -143,7 +143,7 @@ int bch2_folio_set(struct bch_fs *, subvol_inum, struct folio **, unsigned);
 void bch2_bio_page_state_set(struct bio *, struct bkey_s_c);
 
 void bch2_mark_pagecache_unallocated(struct bch_inode_info *, u64, u64);
-void bch2_mark_pagecache_reserved(struct bch_inode_info *, u64, u64);
+int bch2_mark_pagecache_reserved(struct bch_inode_info *, u64 *, u64, bool);
 
 int bch2_get_folio_disk_reservation(struct bch_fs *,
 				struct bch_inode_info *,
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index 98bd5babab19..dc52918d06ef 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -675,8 +675,11 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
 
 		bch2_i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
 
-		drop_locks_do(trans,
-			(bch2_mark_pagecache_reserved(inode, hole_start, iter.pos.offset), 0));
+		if (bch2_mark_pagecache_reserved(inode, &hole_start,
+						 iter.pos.offset, true))
+			drop_locks_do(trans,
+				bch2_mark_pagecache_reserved(inode, &hole_start,
+							     iter.pos.offset, false));
 bkey_err:
 		bch2_quota_reservation_put(c, inode, &quota_res);
 		if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
index 1cbc5807bc80..3a4c24c28e7f 100644
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -337,11 +337,12 @@ static long __bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
 	if (arg.flags & BCH_SUBVOL_SNAPSHOT_RO)
 		create_flags |= BCH_CREATE_SNAPSHOT_RO;
 
-	/* why do we need this lock? */
-	down_read(&c->vfs_sb->s_umount);
-
-	if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE)
+	if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) {
+		/* sync_inodes_sb enforce s_umount is locked */
+		down_read(&c->vfs_sb->s_umount);
 		sync_inodes_sb(c->vfs_sb);
+		up_read(&c->vfs_sb->s_umount);
+	}
 retry:
 	if (arg.src_ptr) {
 		error = user_path_at(arg.dirfd,
@@ -425,8 +426,6 @@ err2:
 		goto retry;
 	}
 err1:
-	up_read(&c->vfs_sb->s_umount);
-
 	return error;
 }
 
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index 37dce96f48ac..086f0090b03a 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -506,22 +506,33 @@ fsck_err:
 static void __bch2_inode_unpacked_to_text(struct printbuf *out,
 					  struct bch_inode_unpacked *inode)
 {
-	prt_printf(out, "mode=%o ", inode->bi_mode);
+	printbuf_indent_add(out, 2);
+	prt_printf(out, "mode=%o", inode->bi_mode);
+	prt_newline(out);
 
 	prt_str(out, "flags=");
 	prt_bitflags(out, bch2_inode_flag_strs, inode->bi_flags & ((1U << 20) - 1));
 	prt_printf(out, " (%x)", inode->bi_flags);
+	prt_newline(out);
 
-	prt_printf(out, " journal_seq=%llu bi_size=%llu bi_sectors=%llu bi_version=%llu",
-	       inode->bi_journal_seq,
-	       inode->bi_size,
-	       inode->bi_sectors,
-	       inode->bi_version);
+	prt_printf(out, "journal_seq=%llu", inode->bi_journal_seq);
+	prt_newline(out);
+
+	prt_printf(out, "bi_size=%llu", inode->bi_size);
+	prt_newline(out);
+
+	prt_printf(out, "bi_sectors=%llu", inode->bi_sectors);
+	prt_newline(out);
+
+	prt_newline(out);
+	prt_printf(out, "bi_version=%llu", inode->bi_version);
 
 #define x(_name, _bits)						\
-	prt_printf(out, " "#_name "=%llu", (u64) inode->_name);
+	prt_printf(out, #_name "=%llu", (u64) inode->_name);	\
+	prt_newline(out);
 	BCH_INODE_FIELDS_v3()
 #undef  x
+	printbuf_indent_sub(out, 2);
 }
 
 void bch2_inode_unpacked_to_text(struct printbuf *out, struct bch_inode_unpacked *inode)
@@ -587,7 +598,7 @@ int bch2_trigger_inode(struct btree_trans *trans,
 		}
 	}
 
-	if (!(flags & BTREE_TRIGGER_TRANSACTIONAL) && (flags & BTREE_TRIGGER_INSERT)) {
+	if ((flags & BTREE_TRIGGER_ATOMIC) && (flags & BTREE_TRIGGER_INSERT)) {
 		BUG_ON(!trans->journal_res.seq);
 
 		bkey_s_to_inode_v3(new).v->bi_journal_seq = cpu_to_le64(trans->journal_res.seq);
@@ -597,7 +608,7 @@ int bch2_trigger_inode(struct btree_trans *trans,
 		struct bch_fs *c = trans->c;
 
 		percpu_down_read(&c->mark_lock);
-		this_cpu_add(c->usage_gc->nr_inodes, nr);
+		this_cpu_add(c->usage_gc->b.nr_inodes, nr);
 		percpu_up_read(&c->mark_lock);
 	}
 
diff --git a/fs/bcachefs/inode_format.h b/fs/bcachefs/inode_format.h
new file mode 100644
index 000000000000..83d107331edf
--- /dev/null
+++ b/fs/bcachefs/inode_format.h
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_INODE_FORMAT_H
+#define _BCACHEFS_INODE_FORMAT_H
+
+#define BLOCKDEV_INODE_MAX	4096
+#define BCACHEFS_ROOT_INO	4096
+
+struct bch_inode {
+	struct bch_val		v;
+
+	__le64			bi_hash_seed;
+	__le32			bi_flags;
+	__le16			bi_mode;
+	__u8			fields[];
+} __packed __aligned(8);
+
+struct bch_inode_v2 {
+	struct bch_val		v;
+
+	__le64			bi_journal_seq;
+	__le64			bi_hash_seed;
+	__le64			bi_flags;
+	__le16			bi_mode;
+	__u8			fields[];
+} __packed __aligned(8);
+
+struct bch_inode_v3 {
+	struct bch_val		v;
+
+	__le64			bi_journal_seq;
+	__le64			bi_hash_seed;
+	__le64			bi_flags;
+	__le64			bi_sectors;
+	__le64			bi_size;
+	__le64			bi_version;
+	__u8			fields[];
+} __packed __aligned(8);
+
+#define INODEv3_FIELDS_START_INITIAL	6
+#define INODEv3_FIELDS_START_CUR	(offsetof(struct bch_inode_v3, fields) / sizeof(__u64))
+
+struct bch_inode_generation {
+	struct bch_val		v;
+
+	__le32			bi_generation;
+	__le32			pad;
+} __packed __aligned(8);
+
+/*
+ * bi_subvol and bi_parent_subvol are only set for subvolume roots:
+ */
+
+#define BCH_INODE_FIELDS_v2()			\
+	x(bi_atime,			96)	\
+	x(bi_ctime,			96)	\
+	x(bi_mtime,			96)	\
+	x(bi_otime,			96)	\
+	x(bi_size,			64)	\
+	x(bi_sectors,			64)	\
+	x(bi_uid,			32)	\
+	x(bi_gid,			32)	\
+	x(bi_nlink,			32)	\
+	x(bi_generation,		32)	\
+	x(bi_dev,			32)	\
+	x(bi_data_checksum,		8)	\
+	x(bi_compression,		8)	\
+	x(bi_project,			32)	\
+	x(bi_background_compression,	8)	\
+	x(bi_data_replicas,		8)	\
+	x(bi_promote_target,		16)	\
+	x(bi_foreground_target,		16)	\
+	x(bi_background_target,		16)	\
+	x(bi_erasure_code,		16)	\
+	x(bi_fields_set,		16)	\
+	x(bi_dir,			64)	\
+	x(bi_dir_offset,		64)	\
+	x(bi_subvol,			32)	\
+	x(bi_parent_subvol,		32)
+
+#define BCH_INODE_FIELDS_v3()			\
+	x(bi_atime,			96)	\
+	x(bi_ctime,			96)	\
+	x(bi_mtime,			96)	\
+	x(bi_otime,			96)	\
+	x(bi_uid,			32)	\
+	x(bi_gid,			32)	\
+	x(bi_nlink,			32)	\
+	x(bi_generation,		32)	\
+	x(bi_dev,			32)	\
+	x(bi_data_checksum,		8)	\
+	x(bi_compression,		8)	\
+	x(bi_project,			32)	\
+	x(bi_background_compression,	8)	\
+	x(bi_data_replicas,		8)	\
+	x(bi_promote_target,		16)	\
+	x(bi_foreground_target,		16)	\
+	x(bi_background_target,		16)	\
+	x(bi_erasure_code,		16)	\
+	x(bi_fields_set,		16)	\
+	x(bi_dir,			64)	\
+	x(bi_dir_offset,		64)	\
+	x(bi_subvol,			32)	\
+	x(bi_parent_subvol,		32)	\
+	x(bi_nocow,			8)
+
+/* subset of BCH_INODE_FIELDS */
+#define BCH_INODE_OPTS()			\
+	x(data_checksum,		8)	\
+	x(compression,			8)	\
+	x(project,			32)	\
+	x(background_compression,	8)	\
+	x(data_replicas,		8)	\
+	x(promote_target,		16)	\
+	x(foreground_target,		16)	\
+	x(background_target,		16)	\
+	x(erasure_code,			16)	\
+	x(nocow,			8)
+
+enum inode_opt_id {
+#define x(name, ...)				\
+	Inode_opt_##name,
+	BCH_INODE_OPTS()
+#undef  x
+	Inode_opt_nr,
+};
+
+#define BCH_INODE_FLAGS()			\
+	x(sync,				0)	\
+	x(immutable,			1)	\
+	x(append,			2)	\
+	x(nodump,			3)	\
+	x(noatime,			4)	\
+	x(i_size_dirty,			5)	\
+	x(i_sectors_dirty,		6)	\
+	x(unlinked,			7)	\
+	x(backptr_untrusted,		8)
+
+/* bits 20+ reserved for packed fields below: */
+
+enum bch_inode_flags {
+#define x(t, n)	BCH_INODE_##t = 1U << n,
+	BCH_INODE_FLAGS()
+#undef x
+};
+
+enum __bch_inode_flags {
+#define x(t, n)	__BCH_INODE_##t = n,
+	BCH_INODE_FLAGS()
+#undef x
+};
+
+LE32_BITMASK(INODE_STR_HASH,	struct bch_inode, bi_flags, 20, 24);
+LE32_BITMASK(INODE_NR_FIELDS,	struct bch_inode, bi_flags, 24, 31);
+LE32_BITMASK(INODE_NEW_VARINT,	struct bch_inode, bi_flags, 31, 32);
+
+LE64_BITMASK(INODEv2_STR_HASH,	struct bch_inode_v2, bi_flags, 20, 24);
+LE64_BITMASK(INODEv2_NR_FIELDS,	struct bch_inode_v2, bi_flags, 24, 31);
+
+LE64_BITMASK(INODEv3_STR_HASH,	struct bch_inode_v3, bi_flags, 20, 24);
+LE64_BITMASK(INODEv3_NR_FIELDS,	struct bch_inode_v3, bi_flags, 24, 31);
+
+LE64_BITMASK(INODEv3_FIELDS_START,
+				struct bch_inode_v3, bi_flags, 31, 36);
+LE64_BITMASK(INODEv3_MODE,	struct bch_inode_v3, bi_flags, 36, 52);
+
+#endif /* _BCACHEFS_INODE_FORMAT_H */
diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c
index ca6d5f516aa2..1baf78594cca 100644
--- a/fs/bcachefs/io_misc.c
+++ b/fs/bcachefs/io_misc.c
@@ -442,9 +442,7 @@ case LOGGED_OP_FINSERT_shift_extents:
 
 		op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset);
 
-		ret =   bch2_bkey_set_needs_rebalance(c, copy,
-					opts.background_target,
-					opts.background_compression) ?:
+		ret =   bch2_bkey_set_needs_rebalance(c, copy, &opts) ?:
 			bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?:
 			bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?:
 			bch2_logged_op_update(trans, &op->k_i) ?:
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index 33c0e783d546..ef3a53f9045a 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -362,9 +362,7 @@ static int bch2_write_index_default(struct bch_write_op *op)
 				     bkey_start_pos(&sk.k->k),
 				     BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
 
-		ret =   bch2_bkey_set_needs_rebalance(c, sk.k,
-					op->opts.background_target,
-					op->opts.background_compression) ?:
+		ret =   bch2_bkey_set_needs_rebalance(c, sk.k, &op->opts) ?:
 			bch2_extent_update(trans, inum, &iter, sk.k,
 					&op->res,
 					op->new_i_size, &op->i_sectors_delta,
@@ -1447,10 +1445,11 @@ err:
 			op->flags |= BCH_WRITE_DONE;
 
 			if (ret < 0) {
-				bch_err_inum_offset_ratelimited(c,
-					op->pos.inode,
-					op->pos.offset << 9,
-					"%s(): error: %s", __func__, bch2_err_str(ret));
+				if (!(op->flags & BCH_WRITE_ALLOC_NOWAIT))
+					bch_err_inum_offset_ratelimited(c,
+						op->pos.inode,
+						op->pos.offset << 9,
+						"%s(): error: %s", __func__, bch2_err_str(ret));
 				op->error = ret;
 				break;
 			}
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 8538ef34f62b..d71d26e39521 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -27,6 +27,47 @@ static const char * const bch2_journal_errors[] = {
 	NULL
 };
 
+static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u64 seq)
+{
+	union journal_res_state s = READ_ONCE(j->reservations);
+	unsigned i = seq & JOURNAL_BUF_MASK;
+	struct journal_buf *buf = j->buf + i;
+
+	prt_printf(out, "seq:");
+	prt_tab(out);
+	prt_printf(out, "%llu", seq);
+	prt_newline(out);
+	printbuf_indent_add(out, 2);
+
+	prt_printf(out, "refcount:");
+	prt_tab(out);
+	prt_printf(out, "%u", journal_state_count(s, i));
+	prt_newline(out);
+
+	prt_printf(out, "size:");
+	prt_tab(out);
+	prt_human_readable_u64(out, vstruct_bytes(buf->data));
+	prt_newline(out);
+
+	prt_printf(out, "expires");
+	prt_tab(out);
+	prt_printf(out, "%li jiffies", buf->expires - jiffies);
+	prt_newline(out);
+
+	printbuf_indent_sub(out, 2);
+}
+
+static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j)
+{
+	if (!out->nr_tabstops)
+		printbuf_tabstop_push(out, 24);
+
+	for (u64 seq = journal_last_unwritten_seq(j);
+	     seq <= journal_cur_seq(j);
+	     seq++)
+		bch2_journal_buf_to_text(out, j, seq);
+}
+
 static inline bool journal_seq_unwritten(struct journal *j, u64 seq)
 {
 	return seq > j->seq_ondisk;
@@ -156,7 +197,7 @@ void bch2_journal_buf_put_final(struct journal *j, u64 seq, bool write)
  * We don't close a journal_buf until the next journal_buf is finished writing,
  * and can be opened again - this also initializes the next journal_buf:
  */
-static void __journal_entry_close(struct journal *j, unsigned closed_val)
+static void __journal_entry_close(struct journal *j, unsigned closed_val, bool trace)
 {
 	struct bch_fs *c = container_of(j, struct bch_fs, journal);
 	struct journal_buf *buf = journal_cur_buf(j);
@@ -185,7 +226,17 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val)
 	/* Close out old buffer: */
 	buf->data->u64s		= cpu_to_le32(old.cur_entry_offset);
 
-	trace_journal_entry_close(c, vstruct_bytes(buf->data));
+	if (trace_journal_entry_close_enabled() && trace) {
+		struct printbuf pbuf = PRINTBUF;
+		pbuf.atomic++;
+
+		prt_str(&pbuf, "entry size: ");
+		prt_human_readable_u64(&pbuf, vstruct_bytes(buf->data));
+		prt_newline(&pbuf);
+		bch2_prt_task_backtrace(&pbuf, current, 1);
+		trace_journal_entry_close(c, pbuf.buf);
+		printbuf_exit(&pbuf);
+	}
 
 	sectors = vstruct_blocks_plus(buf->data, c->block_bits,
 				      buf->u64s_reserved) << c->block_bits;
@@ -225,7 +276,7 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val)
 void bch2_journal_halt(struct journal *j)
 {
 	spin_lock(&j->lock);
-	__journal_entry_close(j, JOURNAL_ENTRY_ERROR_VAL);
+	__journal_entry_close(j, JOURNAL_ENTRY_ERROR_VAL, true);
 	if (!j->err_seq)
 		j->err_seq = journal_cur_seq(j);
 	journal_wake(j);
@@ -239,7 +290,7 @@ static bool journal_entry_want_write(struct journal *j)
 
 	/* Don't close it yet if we already have a write in flight: */
 	if (ret)
-		__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
+		__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true);
 	else if (nr_unwritten_journal_entries(j)) {
 		struct journal_buf *buf = journal_cur_buf(j);
 
@@ -406,7 +457,7 @@ static void journal_write_work(struct work_struct *work)
 	if (delta > 0)
 		mod_delayed_work(c->io_complete_wq, &j->write_work, delta);
 	else
-		__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
+		__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true);
 unlock:
 	spin_unlock(&j->lock);
 }
@@ -463,13 +514,21 @@ retry:
 	    buf->buf_size < JOURNAL_ENTRY_SIZE_MAX)
 		j->buf_size_want = max(j->buf_size_want, buf->buf_size << 1);
 
-	__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
+	__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, false);
 	ret = journal_entry_open(j);
 
 	if (ret == JOURNAL_ERR_max_in_flight) {
 		track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight],
 				   &j->max_in_flight_start, true);
-		trace_and_count(c, journal_entry_full, c);
+		if (trace_journal_entry_full_enabled()) {
+			struct printbuf buf = PRINTBUF;
+			buf.atomic++;
+
+			bch2_journal_bufs_to_text(&buf, j);
+			trace_journal_entry_full(c, buf.buf);
+			printbuf_exit(&buf);
+		}
+		count_event(c, journal_entry_full);
 	}
 unlock:
 	can_discard = j->can_discard;
@@ -549,7 +608,7 @@ void bch2_journal_entry_res_resize(struct journal *j,
 		/*
 		 * Not enough room in current journal entry, have to flush it:
 		 */
-		__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
+		__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true);
 	} else {
 		journal_cur_buf(j)->u64s_reserved += d;
 	}
@@ -606,7 +665,7 @@ recheck_need_open:
 		struct journal_res res = { 0 };
 
 		if (journal_entry_is_open(j))
-			__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
+			__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true);
 
 		spin_unlock(&j->lock);
 
@@ -786,7 +845,7 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou
 
 		if (buf->need_flush_to_write_buffer) {
 			if (seq == journal_cur_seq(j))
-				__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
+				__journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true);
 
 			union journal_res_state s;
 			s.v = atomic64_read_acquire(&j->reservations.counter);
@@ -1339,35 +1398,9 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
 	}
 
 	prt_newline(out);
-
-	for (u64 seq = journal_cur_seq(j);
-	     seq >= journal_last_unwritten_seq(j);
-	     --seq) {
-		unsigned i = seq & JOURNAL_BUF_MASK;
-
-		prt_printf(out, "unwritten entry:");
-		prt_tab(out);
-		prt_printf(out, "%llu", seq);
-		prt_newline(out);
-		printbuf_indent_add(out, 2);
-
-		prt_printf(out, "refcount:");
-		prt_tab(out);
-		prt_printf(out, "%u", journal_state_count(s, i));
-		prt_newline(out);
-
-		prt_printf(out, "sectors:");
-		prt_tab(out);
-		prt_printf(out, "%u", j->buf[i].sectors);
-		prt_newline(out);
-
-		prt_printf(out, "expires");
-		prt_tab(out);
-		prt_printf(out, "%li jiffies", j->buf[i].expires - jiffies);
-		prt_newline(out);
-
-		printbuf_indent_sub(out, 2);
-	}
+	prt_printf(out, "unwritten entries:");
+	prt_newline(out);
+	bch2_journal_bufs_to_text(out, j);
 
 	prt_printf(out,
 	       "replay done:\t\t%i\n",
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index b0f4dd491e12..04a1e79a5ed3 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -683,10 +683,7 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs
 	prt_printf(out, "dev=%u", le32_to_cpu(u->dev));
 
 	for (i = 0; i < nr_types; i++) {
-		if (i < BCH_DATA_NR)
-			prt_printf(out, " %s", bch2_data_types[i]);
-		else
-			prt_printf(out, " (unknown data type %u)", i);
+		bch2_prt_data_type(out, i);
 		prt_printf(out, ": buckets=%llu sectors=%llu fragmented=%llu",
 		       le64_to_cpu(u->d[i].buckets),
 		       le64_to_cpu(u->d[i].sectors),
diff --git a/fs/bcachefs/logged_ops_format.h b/fs/bcachefs/logged_ops_format.h
new file mode 100644
index 000000000000..6a4bf7129dba
--- /dev/null
+++ b/fs/bcachefs/logged_ops_format.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_LOGGED_OPS_FORMAT_H
+#define _BCACHEFS_LOGGED_OPS_FORMAT_H
+
+struct bch_logged_op_truncate {
+	struct bch_val		v;
+	__le32			subvol;
+	__le32			pad;
+	__le64			inum;
+	__le64			new_i_size;
+};
+
+enum logged_op_finsert_state {
+	LOGGED_OP_FINSERT_start,
+	LOGGED_OP_FINSERT_shift_extents,
+	LOGGED_OP_FINSERT_finish,
+};
+
+struct bch_logged_op_finsert {
+	struct bch_val		v;
+	__u8			state;
+	__u8			pad[3];
+	__le32			subvol;
+	__le64			inum;
+	__le64			dst_offset;
+	__le64			src_offset;
+	__le64			pos;
+};
+
+#endif /* _BCACHEFS_LOGGED_OPS_FORMAT_H */
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 7a33319dcd16..bf68ea49447b 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -6,9 +6,11 @@
 #include "backpointers.h"
 #include "bkey_buf.h"
 #include "btree_gc.h"
+#include "btree_io.h"
 #include "btree_update.h"
 #include "btree_update_interior.h"
 #include "btree_write_buffer.h"
+#include "compress.h"
 #include "disk_groups.h"
 #include "ec.h"
 #include "errcode.h"
@@ -34,12 +36,46 @@ const char * const bch2_data_ops_strs[] = {
 	NULL
 };
 
-static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k)
+static void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
+					  struct bch_io_opts *io_opts,
+					  struct data_update_opts *data_opts)
+{
+	printbuf_tabstop_push(out, 20);
+	prt_str(out, "rewrite ptrs:");
+	prt_tab(out);
+	bch2_prt_u64_base2(out, data_opts->rewrite_ptrs);
+	prt_newline(out);
+
+	prt_str(out, "kill ptrs: ");
+	prt_tab(out);
+	bch2_prt_u64_base2(out, data_opts->kill_ptrs);
+	prt_newline(out);
+
+	prt_str(out, "target: ");
+	prt_tab(out);
+	bch2_target_to_text(out, c, data_opts->target);
+	prt_newline(out);
+
+	prt_str(out, "compression: ");
+	prt_tab(out);
+	bch2_compression_opt_to_text(out, background_compression(*io_opts));
+	prt_newline(out);
+
+	prt_str(out, "extra replicas: ");
+	prt_tab(out);
+	prt_u64(out, data_opts->extra_replicas);
+}
+
+static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k,
+			       struct bch_io_opts *io_opts,
+			       struct data_update_opts *data_opts)
 {
 	if (trace_move_extent_enabled()) {
 		struct printbuf buf = PRINTBUF;
 
 		bch2_bkey_val_to_text(&buf, c, k);
+		prt_newline(&buf);
+		bch2_data_update_opts_to_text(&buf, c, io_opts, data_opts);
 		trace_move_extent(c, buf.buf);
 		printbuf_exit(&buf);
 	}
@@ -111,6 +147,15 @@ static void move_write(struct moving_io *io)
 		return;
 	}
 
+	if (trace_move_extent_write_enabled()) {
+		struct bch_fs *c = io->write.op.c;
+		struct printbuf buf = PRINTBUF;
+
+		bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(io->write.k.k));
+		trace_move_extent_write(c, buf.buf);
+		printbuf_exit(&buf);
+	}
+
 	closure_get(&io->write.ctxt->cl);
 	atomic_add(io->write_sectors, &io->write.ctxt->write_sectors);
 	atomic_inc(&io->write.ctxt->write_ios);
@@ -241,9 +286,10 @@ int bch2_move_extent(struct moving_context *ctxt,
 	unsigned sectors = k.k->size, pages;
 	int ret = -ENOMEM;
 
+	trace_move_extent2(c, k, &io_opts, &data_opts);
+
 	if (ctxt->stats)
 		ctxt->stats->pos = BBPOS(iter->btree_id, iter->pos);
-	trace_move_extent2(c, k);
 
 	bch2_data_update_opts_normalize(k, &data_opts);
 
@@ -759,6 +805,8 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
 			if (!b)
 				goto next;
 
+			unsigned sectors = btree_ptr_sectors_written(&b->key);
+
 			ret = bch2_btree_node_rewrite(trans, &iter, b, 0);
 			bch2_trans_iter_exit(trans, &iter);
 
@@ -768,11 +816,10 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
 				goto err;
 
 			if (ctxt->rate)
-				bch2_ratelimit_increment(ctxt->rate,
-							 c->opts.btree_node_size >> 9);
+				bch2_ratelimit_increment(ctxt->rate, sectors);
 			if (ctxt->stats) {
-				atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_seen);
-				atomic64_add(c->opts.btree_node_size >> 9, &ctxt->stats->sectors_moved);
+				atomic64_add(sectors, &ctxt->stats->sectors_seen);
+				atomic64_add(sectors, &ctxt->stats->sectors_moved);
 			}
 		}
 next:
@@ -1083,9 +1130,9 @@ int bch2_data_job(struct bch_fs *c,
 
 void bch2_move_stats_to_text(struct printbuf *out, struct bch_move_stats *stats)
 {
-	prt_printf(out, "%s: data type=%s pos=",
-		   stats->name,
-		   bch2_data_types[stats->data_type]);
+	prt_printf(out, "%s: data type==", stats->name);
+	bch2_prt_data_type(out, stats->data_type);
+	prt_str(out, " pos=");
 	bch2_bbpos_to_text(out, stats->pos);
 	prt_newline(out);
 	printbuf_indent_add(out, 2);
diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c
index 8e6f230eac38..b1ed0b9a20d3 100644
--- a/fs/bcachefs/opts.c
+++ b/fs/bcachefs/opts.c
@@ -52,7 +52,7 @@ const char * const bch2_csum_opts[] = {
 	NULL
 };
 
-const char * const bch2_compression_types[] = {
+const char * const __bch2_compression_types[] = {
 	BCH_COMPRESSION_TYPES()
 	NULL
 };
@@ -72,7 +72,7 @@ const char * const bch2_str_hash_opts[] = {
 	NULL
 };
 
-const char * const bch2_data_types[] = {
+const char * const __bch2_data_types[] = {
 	BCH_DATA_TYPES()
 	NULL
 };
diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
index 93a24fef4214..9a4b7faa3765 100644
--- a/fs/bcachefs/opts.h
+++ b/fs/bcachefs/opts.h
@@ -18,11 +18,11 @@ extern const char * const bch2_sb_compat[];
 extern const char * const __bch2_btree_ids[];
 extern const char * const bch2_csum_types[];
 extern const char * const bch2_csum_opts[];
-extern const char * const bch2_compression_types[];
+extern const char * const __bch2_compression_types[];
 extern const char * const bch2_compression_opts[];
 extern const char * const bch2_str_hash_types[];
 extern const char * const bch2_str_hash_opts[];
-extern const char * const bch2_data_types[];
+extern const char * const __bch2_data_types[];
 extern const char * const bch2_member_states[];
 extern const char * const bch2_jset_entry_types[];
 extern const char * const bch2_fs_usage_types[];
@@ -564,6 +564,11 @@ struct bch_io_opts {
 #undef x
 };
 
+static inline unsigned background_compression(struct bch_io_opts opts)
+{
+	return opts.background_compression ?: opts.compression;
+}
+
 struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts);
 bool bch2_opt_is_inode_opt(enum bch_opt_id);
 
diff --git a/fs/bcachefs/quota_format.h b/fs/bcachefs/quota_format.h
new file mode 100644
index 000000000000..dc34347ef6c7
--- /dev/null
+++ b/fs/bcachefs/quota_format.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_QUOTA_FORMAT_H
+#define _BCACHEFS_QUOTA_FORMAT_H
+
+/* KEY_TYPE_quota: */
+
+enum quota_types {
+	QTYP_USR		= 0,
+	QTYP_GRP		= 1,
+	QTYP_PRJ		= 2,
+	QTYP_NR			= 3,
+};
+
+enum quota_counters {
+	Q_SPC			= 0,
+	Q_INO			= 1,
+	Q_COUNTERS		= 2,
+};
+
+struct bch_quota_counter {
+	__le64			hardlimit;
+	__le64			softlimit;
+};
+
+struct bch_quota {
+	struct bch_val		v;
+	struct bch_quota_counter c[Q_COUNTERS];
+} __packed __aligned(8);
+
+/* BCH_SB_FIELD_quota: */
+
+struct bch_sb_quota_counter {
+	__le32				timelimit;
+	__le32				warnlimit;
+};
+
+struct bch_sb_quota_type {
+	__le64				flags;
+	struct bch_sb_quota_counter	c[Q_COUNTERS];
+};
+
+struct bch_sb_field_quota {
+	struct bch_sb_field		field;
+	struct bch_sb_quota_type	q[QTYP_NR];
+} __packed __aligned(8);
+
+#endif /* _BCACHEFS_QUOTA_FORMAT_H */
diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
index 95f46cb3b5bd..22d1017aa49b 100644
--- a/fs/bcachefs/rebalance.c
+++ b/fs/bcachefs/rebalance.c
@@ -177,8 +177,7 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
 		prt_str(&buf, "target=");
 		bch2_target_to_text(&buf, c, r->target);
 		prt_str(&buf, " compression=");
-		struct bch_compression_opt opt = __bch2_compression_decode(r->compression);
-		prt_str(&buf, bch2_compression_opts[opt.type]);
+		bch2_compression_opt_to_text(&buf, r->compression);
 		prt_str(&buf, " ");
 		bch2_bkey_val_to_text(&buf, c, k);
 
@@ -254,13 +253,12 @@ static bool rebalance_pred(struct bch_fs *c, void *arg,
 
 	if (k.k->p.inode) {
 		target		= io_opts->background_target;
-		compression	= io_opts->background_compression ?: io_opts->compression;
+		compression	= background_compression(*io_opts);
 	} else {
 		const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k);
 
 		target		= r ? r->target : io_opts->background_target;
-		compression	= r ? r->compression :
-			(io_opts->background_compression ?: io_opts->compression);
+		compression	= r ? r->compression : background_compression(*io_opts);
 	}
 
 	data_opts->rewrite_ptrs		= bch2_bkey_ptrs_need_rebalance(c, k, target, compression);
@@ -371,6 +369,7 @@ static int do_rebalance(struct moving_context *ctxt)
 	    !kthread_should_stop() &&
 	    !atomic64_read(&r->work_stats.sectors_seen) &&
 	    !atomic64_read(&r->scan_stats.sectors_seen)) {
+		bch2_moving_ctxt_flush_all(ctxt);
 		bch2_trans_unlock_long(trans);
 		rebalance_wait(c);
 	}
@@ -385,7 +384,6 @@ static int bch2_rebalance_thread(void *arg)
 	struct bch_fs *c = arg;
 	struct bch_fs_rebalance *r = &c->rebalance;
 	struct moving_context ctxt;
-	int ret;
 
 	set_freezable();
 
@@ -393,8 +391,7 @@ static int bch2_rebalance_thread(void *arg)
 			      writepoint_ptr(&c->rebalance_write_point),
 			      true);
 
-	while (!kthread_should_stop() &&
-	       !(ret = do_rebalance(&ctxt)))
+	while (!kthread_should_stop() && !do_rebalance(&ctxt))
 		;
 
 	bch2_moving_ctxt_exit(&ctxt);
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 725214605a05..9127d0e3ca2f 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -280,7 +280,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
 					le64_to_cpu(u->v);
 			break;
 		case BCH_FS_USAGE_inodes:
-			c->usage_base->nr_inodes = le64_to_cpu(u->v);
+			c->usage_base->b.nr_inodes = le64_to_cpu(u->v);
 			break;
 		case BCH_FS_USAGE_key_version:
 			atomic64_set(&c->key_version,
diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
index faa5d3670058..c47c66c2b394 100644
--- a/fs/bcachefs/reflink.c
+++ b/fs/bcachefs/reflink.c
@@ -292,10 +292,10 @@ static inline void check_indirect_extent_deleting(struct bkey_s new, unsigned *f
 	}
 }
 
-int bch2_trans_mark_reflink_v(struct btree_trans *trans,
-			      enum btree_id btree_id, unsigned level,
-			      struct bkey_s_c old, struct bkey_s new,
-			      unsigned flags)
+int bch2_trigger_reflink_v(struct btree_trans *trans,
+			   enum btree_id btree_id, unsigned level,
+			   struct bkey_s_c old, struct bkey_s new,
+			   unsigned flags)
 {
 	if ((flags & BTREE_TRIGGER_TRANSACTIONAL) &&
 	    (flags & BTREE_TRIGGER_INSERT))
@@ -324,7 +324,7 @@ void bch2_indirect_inline_data_to_text(struct printbuf *out,
 	       min(datalen, 32U), d.v->data);
 }
 
-int bch2_trans_mark_indirect_inline_data(struct btree_trans *trans,
+int bch2_trigger_indirect_inline_data(struct btree_trans *trans,
 			      enum btree_id btree_id, unsigned level,
 			      struct bkey_s_c old, struct bkey_s new,
 			      unsigned flags)
@@ -486,6 +486,13 @@ s64 bch2_remap_range(struct bch_fs *c,
 
 		bch2_btree_iter_set_snapshot(&dst_iter, dst_snapshot);
 
+		if (dst_inum.inum < src_inum.inum) {
+			/* Avoid some lock cycle transaction restarts */
+			ret = bch2_btree_iter_traverse(&dst_iter);
+			if (ret)
+				continue;
+		}
+
 		dst_done = dst_iter.pos.offset - dst_start.offset;
 		src_want = POS(src_start.inode, src_start.offset + dst_done);
 		bch2_btree_iter_set_pos(&src_iter, src_want);
@@ -538,9 +545,7 @@ s64 bch2_remap_range(struct bch_fs *c,
 				min(src_k.k->p.offset - src_want.offset,
 				    dst_end.offset - dst_iter.pos.offset));
 
-		ret =   bch2_bkey_set_needs_rebalance(c, new_dst.k,
-					opts.background_target,
-					opts.background_compression) ?:
+		ret =   bch2_bkey_set_needs_rebalance(c, new_dst.k, &opts) ?:
 			bch2_extent_update(trans, dst_inum, &dst_iter,
 					new_dst.k, &disk_res,
 					new_i_size, i_sectors_delta,
diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h
index 8ee778ec0022..4d8867289717 100644
--- a/fs/bcachefs/reflink.h
+++ b/fs/bcachefs/reflink.h
@@ -24,14 +24,14 @@ int bch2_reflink_v_invalid(struct bch_fs *, struct bkey_s_c,
 			   enum bkey_invalid_flags, struct printbuf *);
 void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *,
 			    struct bkey_s_c);
-int bch2_trans_mark_reflink_v(struct btree_trans *, enum btree_id, unsigned,
+int bch2_trigger_reflink_v(struct btree_trans *, enum btree_id, unsigned,
 			      struct bkey_s_c, struct bkey_s, unsigned);
 
 #define bch2_bkey_ops_reflink_v ((struct bkey_ops) {		\
 	.key_invalid	= bch2_reflink_v_invalid,		\
 	.val_to_text	= bch2_reflink_v_to_text,		\
 	.swab		= bch2_ptr_swab,			\
-	.trigger	= bch2_trans_mark_reflink_v,		\
+	.trigger	= bch2_trigger_reflink_v,		\
 	.min_val_size	= 8,					\
 })
 
@@ -39,7 +39,7 @@ int bch2_indirect_inline_data_invalid(struct bch_fs *, struct bkey_s_c,
 				      enum bkey_invalid_flags, struct printbuf *);
 void bch2_indirect_inline_data_to_text(struct printbuf *,
 				struct bch_fs *, struct bkey_s_c);
-int bch2_trans_mark_indirect_inline_data(struct btree_trans *,
+int bch2_trigger_indirect_inline_data(struct btree_trans *,
 					 enum btree_id, unsigned,
 			      struct bkey_s_c, struct bkey_s,
 			      unsigned);
@@ -47,7 +47,7 @@ int bch2_trans_mark_indirect_inline_data(struct btree_trans *,
 #define bch2_bkey_ops_indirect_inline_data ((struct bkey_ops) {	\
 	.key_invalid	= bch2_indirect_inline_data_invalid,	\
 	.val_to_text	= bch2_indirect_inline_data_to_text,	\
-	.trigger	= bch2_trans_mark_indirect_inline_data,	\
+	.trigger	= bch2_trigger_indirect_inline_data,	\
 	.min_val_size	= 8,					\
 })
 
diff --git a/fs/bcachefs/reflink_format.h b/fs/bcachefs/reflink_format.h
new file mode 100644
index 000000000000..6772eebb1fc6
--- /dev/null
+++ b/fs/bcachefs/reflink_format.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_REFLINK_FORMAT_H
+#define _BCACHEFS_REFLINK_FORMAT_H
+
+struct bch_reflink_p {
+	struct bch_val		v;
+	__le64			idx;
+	/*
+	 * A reflink pointer might point to an indirect extent which is then
+	 * later split (by copygc or rebalance). If we only pointed to part of
+	 * the original indirect extent, and then one of the fragments is
+	 * outside the range we point to, we'd leak a refcount: so when creating
+	 * reflink pointers, we need to store pad values to remember the full
+	 * range we were taking a reference on.
+	 */
+	__le32			front_pad;
+	__le32			back_pad;
+} __packed __aligned(8);
+
+struct bch_reflink_v {
+	struct bch_val		v;
+	__le64			refcount;
+	union bch_extent_entry	start[0];
+	__u64			_data[];
+} __packed __aligned(8);
+
+struct bch_indirect_inline_data {
+	struct bch_val		v;
+	__le64			refcount;
+	u8			data[];
+};
+
+#endif /* _BCACHEFS_REFLINK_FORMAT_H */
diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
index 92ba56ef1fc8..cc2672c12031 100644
--- a/fs/bcachefs/replicas.c
+++ b/fs/bcachefs/replicas.c
@@ -9,6 +9,12 @@
 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
 					    struct bch_replicas_cpu *);
 
+/* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */
+static int bch2_memcmp(const void *l, const void *r, size_t size)
+{
+	return memcmp(l, r, size);
+}
+
 /* Replicas tracking - in memory: */
 
 static void verify_replicas_entry(struct bch_replicas_entry_v1 *e)
@@ -33,21 +39,16 @@ void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e)
 
 static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
 {
-	eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL);
+	eytzinger0_sort(r->entries, r->nr, r->entry_size, bch2_memcmp, NULL);
 }
 
 static void bch2_replicas_entry_v0_to_text(struct printbuf *out,
 					   struct bch_replicas_entry_v0 *e)
 {
-	unsigned i;
-
-	if (e->data_type < BCH_DATA_NR)
-		prt_printf(out, "%s", bch2_data_types[e->data_type]);
-	else
-		prt_printf(out, "(invalid data type %u)", e->data_type);
+	bch2_prt_data_type(out, e->data_type);
 
 	prt_printf(out, ": %u [", e->nr_devs);
-	for (i = 0; i < e->nr_devs; i++)
+	for (unsigned i = 0; i < e->nr_devs; i++)
 		prt_printf(out, i ? " %u" : "%u", e->devs[i]);
 	prt_printf(out, "]");
 }
@@ -55,15 +56,10 @@ static void bch2_replicas_entry_v0_to_text(struct printbuf *out,
 void bch2_replicas_entry_to_text(struct printbuf *out,
 				 struct bch_replicas_entry_v1 *e)
 {
-	unsigned i;
-
-	if (e->data_type < BCH_DATA_NR)
-		prt_printf(out, "%s", bch2_data_types[e->data_type]);
-	else
-		prt_printf(out, "(invalid data type %u)", e->data_type);
+	bch2_prt_data_type(out, e->data_type);
 
 	prt_printf(out, ": %u/%u [", e->nr_required, e->nr_devs);
-	for (i = 0; i < e->nr_devs; i++)
+	for (unsigned i = 0; i < e->nr_devs; i++)
 		prt_printf(out, i ? " %u" : "%u", e->devs[i]);
 	prt_printf(out, "]");
 }
@@ -831,7 +827,7 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
 	sort_cmp_size(cpu_r->entries,
 		      cpu_r->nr,
 		      cpu_r->entry_size,
-		      memcmp, NULL);
+		      bch2_memcmp, NULL);
 
 	for (i = 0; i < cpu_r->nr; i++) {
 		struct bch_replicas_entry_v1 *e =
diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c
index 9632f36f5f31..b6bf0ebe7e84 100644
--- a/fs/bcachefs/sb-clean.c
+++ b/fs/bcachefs/sb-clean.c
@@ -207,7 +207,7 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c,
 
 		u->entry.type	= BCH_JSET_ENTRY_usage;
 		u->entry.btree_id = BCH_FS_USAGE_inodes;
-		u->v		= cpu_to_le64(c->usage_base->nr_inodes);
+		u->v		= cpu_to_le64(c->usage_base->b.nr_inodes);
 	}
 
 	{
diff --git a/fs/bcachefs/counters.c b/fs/bcachefs/sb-counters.c
index 02a996e06a64..7dc898761bb3 100644
--- a/fs/bcachefs/counters.c
+++ b/fs/bcachefs/sb-counters.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
 #include "super-io.h"
-#include "counters.h"
+#include "sb-counters.h"
 
 /* BCH_SB_FIELD_counters */
 
diff --git a/fs/bcachefs/counters.h b/fs/bcachefs/sb-counters.h
index 4778aa19bf34..81f8aec9fcb1 100644
--- a/fs/bcachefs/counters.h
+++ b/fs/bcachefs/sb-counters.h
@@ -1,11 +1,10 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _BCACHEFS_COUNTERS_H
-#define _BCACHEFS_COUNTERS_H
+#ifndef _BCACHEFS_SB_COUNTERS_H
+#define _BCACHEFS_SB_COUNTERS_H
 
 #include "bcachefs.h"
 #include "super-io.h"
 
-
 int bch2_sb_counters_to_cpu(struct bch_fs *);
 int bch2_sb_counters_from_cpu(struct bch_fs *);
 
@@ -14,4 +13,4 @@ int bch2_fs_counters_init(struct bch_fs *);
 
 extern const struct bch_sb_field_ops bch_sb_field_ops_counters;
 
-#endif // _BCACHEFS_COUNTERS_H
+#endif // _BCACHEFS_SB_COUNTERS_H
diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h
new file mode 100644
index 000000000000..62ea478215d0
--- /dev/null
+++ b/fs/bcachefs/sb-counters_format.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SB_COUNTERS_FORMAT_H
+#define _BCACHEFS_SB_COUNTERS_FORMAT_H
+
+#define BCH_PERSISTENT_COUNTERS()				\
+	x(io_read,					0)	\
+	x(io_write,					1)	\
+	x(io_move,					2)	\
+	x(bucket_invalidate,				3)	\
+	x(bucket_discard,				4)	\
+	x(bucket_alloc,					5)	\
+	x(bucket_alloc_fail,				6)	\
+	x(btree_cache_scan,				7)	\
+	x(btree_cache_reap,				8)	\
+	x(btree_cache_cannibalize,			9)	\
+	x(btree_cache_cannibalize_lock,			10)	\
+	x(btree_cache_cannibalize_lock_fail,		11)	\
+	x(btree_cache_cannibalize_unlock,		12)	\
+	x(btree_node_write,				13)	\
+	x(btree_node_read,				14)	\
+	x(btree_node_compact,				15)	\
+	x(btree_node_merge,				16)	\
+	x(btree_node_split,				17)	\
+	x(btree_node_rewrite,				18)	\
+	x(btree_node_alloc,				19)	\
+	x(btree_node_free,				20)	\
+	x(btree_node_set_root,				21)	\
+	x(btree_path_relock_fail,			22)	\
+	x(btree_path_upgrade_fail,			23)	\
+	x(btree_reserve_get_fail,			24)	\
+	x(journal_entry_full,				25)	\
+	x(journal_full,					26)	\
+	x(journal_reclaim_finish,			27)	\
+	x(journal_reclaim_start,			28)	\
+	x(journal_write,				29)	\
+	x(read_promote,					30)	\
+	x(read_bounce,					31)	\
+	x(read_split,					33)	\
+	x(read_retry,					32)	\
+	x(read_reuse_race,				34)	\
+	x(move_extent_read,				35)	\
+	x(move_extent_write,				36)	\
+	x(move_extent_finish,				37)	\
+	x(move_extent_fail,				38)	\
+	x(move_extent_start_fail,			39)	\
+	x(copygc,					40)	\
+	x(copygc_wait,					41)	\
+	x(gc_gens_end,					42)	\
+	x(gc_gens_start,				43)	\
+	x(trans_blocked_journal_reclaim,		44)	\
+	x(trans_restart_btree_node_reused,		45)	\
+	x(trans_restart_btree_node_split,		46)	\
+	x(trans_restart_fault_inject,			47)	\
+	x(trans_restart_iter_upgrade,			48)	\
+	x(trans_restart_journal_preres_get,		49)	\
+	x(trans_restart_journal_reclaim,		50)	\
+	x(trans_restart_journal_res_get,		51)	\
+	x(trans_restart_key_cache_key_realloced,	52)	\
+	x(trans_restart_key_cache_raced,		53)	\
+	x(trans_restart_mark_replicas,			54)	\
+	x(trans_restart_mem_realloced,			55)	\
+	x(trans_restart_memory_allocation_failure,	56)	\
+	x(trans_restart_relock,				57)	\
+	x(trans_restart_relock_after_fill,		58)	\
+	x(trans_restart_relock_key_cache_fill,		59)	\
+	x(trans_restart_relock_next_node,		60)	\
+	x(trans_restart_relock_parent_for_fill,		61)	\
+	x(trans_restart_relock_path,			62)	\
+	x(trans_restart_relock_path_intent,		63)	\
+	x(trans_restart_too_many_iters,			64)	\
+	x(trans_restart_traverse,			65)	\
+	x(trans_restart_upgrade,			66)	\
+	x(trans_restart_would_deadlock,			67)	\
+	x(trans_restart_would_deadlock_write,		68)	\
+	x(trans_restart_injected,			69)	\
+	x(trans_restart_key_cache_upgrade,		70)	\
+	x(trans_traverse_all,				71)	\
+	x(transaction_commit,				72)	\
+	x(write_super,					73)	\
+	x(trans_restart_would_deadlock_recursion_limit,	74)	\
+	x(trans_restart_write_buffer_flush,		75)	\
+	x(trans_restart_split_race,			76)	\
+	x(write_buffer_flush_slowpath,			77)	\
+	x(write_buffer_flush_sync,			78)
+
+enum bch_persistent_counters {
+#define x(t, n, ...) BCH_COUNTER_##t,
+	BCH_PERSISTENT_COUNTERS()
+#undef x
+	BCH_COUNTER_NR
+};
+
+struct bch_sb_field_counters {
+	struct bch_sb_field	field;
+	__le64			d[];
+};
+
+#endif /* _BCACHEFS_SB_COUNTERS_FORMAT_H */
diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c
index a44a238bf8b5..a45354d2acde 100644
--- a/fs/bcachefs/sb-members.c
+++ b/fs/bcachefs/sb-members.c
@@ -251,7 +251,7 @@ static void member_to_text(struct printbuf *out,
 	prt_printf(out, "Data allowed:");
 	prt_tab(out);
 	if (BCH_MEMBER_DATA_ALLOWED(&m))
-		prt_bitflags(out, bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m));
+		prt_bitflags(out, __bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m));
 	else
 		prt_printf(out, "(none)");
 	prt_newline(out);
@@ -259,7 +259,7 @@ static void member_to_text(struct printbuf *out,
 	prt_printf(out, "Has data:");
 	prt_tab(out);
 	if (data_have)
-		prt_bitflags(out, bch2_data_types, data_have);
+		prt_bitflags(out, __bch2_data_types, data_have);
 	else
 		prt_printf(out, "(none)");
 	prt_newline(out);
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index 56af937523ff..45f67e8b29eb 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -1053,6 +1053,8 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
 		n->v.subvol	= cpu_to_le32(snapshot_subvols[i]);
 		n->v.tree	= cpu_to_le32(tree);
 		n->v.depth	= cpu_to_le32(depth);
+		n->v.btime.lo	= cpu_to_le64(bch2_current_time(c));
+		n->v.btime.hi	= 0;
 
 		for (j = 0; j < ARRAY_SIZE(n->v.skip); j++)
 			n->v.skip[j] = cpu_to_le32(bch2_snapshot_skiplist_get(c, parent));
@@ -1681,5 +1683,5 @@ int bch2_snapshots_read(struct bch_fs *c)
 
 void bch2_fs_snapshots_exit(struct bch_fs *c)
 {
-	kfree(rcu_dereference_protected(c->snapshots, true));
+	kvfree(rcu_dereference_protected(c->snapshots, true));
 }
diff --git a/fs/bcachefs/snapshot_format.h b/fs/bcachefs/snapshot_format.h
new file mode 100644
index 000000000000..aabcd3a74cd9
--- /dev/null
+++ b/fs/bcachefs/snapshot_format.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SNAPSHOT_FORMAT_H
+#define _BCACHEFS_SNAPSHOT_FORMAT_H
+
+struct bch_snapshot {
+	struct bch_val		v;
+	__le32			flags;
+	__le32			parent;
+	__le32			children[2];
+	__le32			subvol;
+	/* corresponds to a bch_snapshot_tree in BTREE_ID_snapshot_trees */
+	__le32			tree;
+	__le32			depth;
+	__le32			skip[3];
+	bch_le128		btime;
+};
+
+LE32_BITMASK(BCH_SNAPSHOT_DELETED,	struct bch_snapshot, flags,  0,  1)
+
+/* True if a subvolume points to this snapshot node: */
+LE32_BITMASK(BCH_SNAPSHOT_SUBVOL,	struct bch_snapshot, flags,  1,  2)
+
+/*
+ * Snapshot trees:
+ *
+ * The snapshot_trees btree gives us persistent indentifier for each tree of
+ * bch_snapshot nodes, and allow us to record and easily find the root/master
+ * subvolume that other snapshots were created from:
+ */
+struct bch_snapshot_tree {
+	struct bch_val		v;
+	__le32			master_subvol;
+	__le32			root_snapshot;
+};
+
+#endif /* _BCACHEFS_SNAPSHOT_FORMAT_H */
diff --git a/fs/bcachefs/subvolume_format.h b/fs/bcachefs/subvolume_format.h
new file mode 100644
index 000000000000..af79134b07d6
--- /dev/null
+++ b/fs/bcachefs/subvolume_format.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SUBVOLUME_FORMAT_H
+#define _BCACHEFS_SUBVOLUME_FORMAT_H
+
+#define SUBVOL_POS_MIN		POS(0, 1)
+#define SUBVOL_POS_MAX		POS(0, S32_MAX)
+#define BCACHEFS_ROOT_SUBVOL	1
+
+struct bch_subvolume {
+	struct bch_val		v;
+	__le32			flags;
+	__le32			snapshot;
+	__le64			inode;
+	/*
+	 * Snapshot subvolumes form a tree, separate from the snapshot nodes
+	 * tree - if this subvolume is a snapshot, this is the ID of the
+	 * subvolume it was created from:
+	 *
+	 * This is _not_ necessarily the subvolume of the directory containing
+	 * this subvolume:
+	 */
+	__le32			parent;
+	__le32			pad;
+	bch_le128		otime;
+};
+
+LE32_BITMASK(BCH_SUBVOLUME_RO,		struct bch_subvolume, flags,  0,  1)
+/*
+ * We need to know whether a subvolume is a snapshot so we can know whether we
+ * can delete it (or whether it should just be rm -rf'd)
+ */
+LE32_BITMASK(BCH_SUBVOLUME_SNAP,	struct bch_subvolume, flags,  1,  2)
+LE32_BITMASK(BCH_SUBVOLUME_UNLINKED,	struct bch_subvolume, flags,  2,  3)
+
+#endif /* _BCACHEFS_SUBVOLUME_FORMAT_H */
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index 6d3db5cce5f6..d60c7d27a047 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -2,7 +2,6 @@
 
 #include "bcachefs.h"
 #include "checksum.h"
-#include "counters.h"
 #include "disk_groups.h"
 #include "ec.h"
 #include "error.h"
@@ -13,6 +12,7 @@
 #include "replicas.h"
 #include "quota.h"
 #include "sb-clean.h"
+#include "sb-counters.h"
 #include "sb-downgrade.h"
 #include "sb-errors.h"
 #include "sb-members.h"
@@ -1321,7 +1321,9 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
 
 	prt_printf(out, "Superblock size:");
 	prt_tab(out);
-	prt_printf(out, "%zu", vstruct_bytes(sb));
+	prt_units_u64(out, vstruct_bytes(sb));
+	prt_str(out, "/");
+	prt_units_u64(out, 512ULL << sb->layout.sb_max_size_bits);
 	prt_newline(out);
 
 	prt_printf(out, "Clean:");
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 9dbc35940197..b9911402b175 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -23,7 +23,6 @@
 #include "checksum.h"
 #include "clock.h"
 #include "compress.h"
-#include "counters.h"
 #include "debug.h"
 #include "disk_groups.h"
 #include "ec.h"
@@ -49,6 +48,7 @@
 #include "recovery.h"
 #include "replicas.h"
 #include "sb-clean.h"
+#include "sb-counters.h"
 #include "sb-errors.h"
 #include "sb-members.h"
 #include "snapshot.h"
@@ -883,7 +883,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
 	    !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
 	    !(c->online_reserved = alloc_percpu(u64)) ||
 	    mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
-					btree_bytes(c)) ||
+					c->opts.btree_node_size) ||
 	    mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) ||
 	    !(c->unused_inode_hints = kcalloc(1U << c->inode_shard_bits,
 					      sizeof(u64), GFP_KERNEL))) {
@@ -1386,8 +1386,8 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
 	prt_bdevname(&name, ca->disk_sb.bdev);
 
 	if (c->sb.nr_devices == 1)
-		strlcpy(c->name, name.buf, sizeof(c->name));
-	strlcpy(ca->name, name.buf, sizeof(ca->name));
+		strscpy(c->name, name.buf, sizeof(c->name));
+	strscpy(ca->name, name.buf, sizeof(ca->name));
 
 	printbuf_exit(&name);
 
@@ -1625,7 +1625,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
 	if (data) {
 		struct printbuf data_has = PRINTBUF;
 
-		prt_bitflags(&data_has, bch2_data_types, data);
+		prt_bitflags(&data_has, __bch2_data_types, data);
 		bch_err(ca, "Remove failed, still has data (%s)", data_has.buf);
 		printbuf_exit(&data_has);
 		ret = -EBUSY;
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index 8ed52319ff68..cee80c47feea 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -21,6 +21,7 @@
 #include "btree_gc.h"
 #include "buckets.h"
 #include "clock.h"
+#include "compress.h"
 #include "disk_groups.h"
 #include "ec.h"
 #include "inode.h"
@@ -247,7 +248,7 @@ static size_t bch2_btree_cache_size(struct bch_fs *c)
 
 	mutex_lock(&c->btree_cache.lock);
 	list_for_each_entry(b, &c->btree_cache.live, list)
-		ret += btree_bytes(c);
+		ret += btree_buf_bytes(b);
 
 	mutex_unlock(&c->btree_cache.lock);
 	return ret;
@@ -330,7 +331,7 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
 	prt_newline(out);
 
 	for (unsigned i = 0; i < ARRAY_SIZE(s); i++) {
-		prt_str(out, bch2_compression_types[i]);
+		bch2_prt_compression_type(out, i);
 		prt_tab(out);
 
 		prt_human_readable_u64(out, s[i].sectors_compressed << 9);
@@ -725,8 +726,10 @@ STORE(bch2_fs_opts_dir)
 	bch2_opt_set_sb(c, opt, v);
 	bch2_opt_set_by_id(&c->opts, id, v);
 
-	if ((id == Opt_background_target ||
-	     id == Opt_background_compression) && v)
+	if (v &&
+	    (id == Opt_background_target ||
+	     id == Opt_background_compression ||
+	     (id == Opt_compression && !c->opts.background_compression)))
 		bch2_set_rebalance_needs_scan(c, 0);
 
 	ret = size;
@@ -883,7 +886,7 @@ static void dev_io_done_to_text(struct printbuf *out, struct bch_dev *ca)
 
 		for (i = 1; i < BCH_DATA_NR; i++)
 			prt_printf(out, "%-12s:%12llu\n",
-			       bch2_data_types[i],
+			       bch2_data_type_str(i),
 			       percpu_u64_get(&ca->io_done->sectors[rw][i]) << 9);
 	}
 }
@@ -908,7 +911,7 @@ SHOW(bch2_dev)
 	}
 
 	if (attr == &sysfs_has_data) {
-		prt_bitflags(out, bch2_data_types, bch2_dev_has_data(c, ca));
+		prt_bitflags(out, __bch2_data_types, bch2_dev_has_data(c, ca));
 		prt_char(out, '\n');
 	}
 
diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h
index c94876b3bb06..293b90d704fb 100644
--- a/fs/bcachefs/trace.h
+++ b/fs/bcachefs/trace.h
@@ -46,7 +46,7 @@ DECLARE_EVENT_CLASS(fs_str,
 		__assign_str(str, str);
 	),
 
-	TP_printk("%d,%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(str))
+	TP_printk("%d,%d\n%s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(str))
 );
 
 DECLARE_EVENT_CLASS(trans_str,
@@ -273,28 +273,14 @@ DEFINE_EVENT(bch_fs, journal_full,
 	TP_ARGS(c)
 );
 
-DEFINE_EVENT(bch_fs, journal_entry_full,
-	TP_PROTO(struct bch_fs *c),
-	TP_ARGS(c)
+DEFINE_EVENT(fs_str, journal_entry_full,
+	TP_PROTO(struct bch_fs *c, const char *str),
+	TP_ARGS(c, str)
 );
 
-TRACE_EVENT(journal_entry_close,
-	TP_PROTO(struct bch_fs *c, unsigned bytes),
-	TP_ARGS(c, bytes),
-
-	TP_STRUCT__entry(
-		__field(dev_t,		dev			)
-		__field(u32,		bytes			)
-	),
-
-	TP_fast_assign(
-		__entry->dev			= c->dev;
-		__entry->bytes			= bytes;
-	),
-
-	TP_printk("%d,%d entry bytes %u",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->bytes)
+DEFINE_EVENT(fs_str, journal_entry_close,
+	TP_PROTO(struct bch_fs *c, const char *str),
+	TP_ARGS(c, str)
 );
 
 DEFINE_EVENT(bio, journal_write,
@@ -542,7 +528,7 @@ TRACE_EVENT(btree_path_relock_fail,
 		__entry->level			= path->level;
 		TRACE_BPOS_assign(pos, path->pos);
 
-		c = bch2_btree_node_lock_counts(trans, NULL, &path->l[level].b->c, level),
+		c = bch2_btree_node_lock_counts(trans, NULL, &path->l[level].b->c, level);
 		__entry->self_read_count	= c.n[SIX_LOCK_read];
 		__entry->self_intent_count	= c.n[SIX_LOCK_intent];
 
@@ -827,40 +813,28 @@ TRACE_EVENT(bucket_evacuate,
 );
 
 DEFINE_EVENT(fs_str, move_extent,
-	TP_PROTO(struct bch_fs *c, const char *k),
-	TP_ARGS(c, k)
+	TP_PROTO(struct bch_fs *c, const char *str),
+	TP_ARGS(c, str)
 );
 
 DEFINE_EVENT(fs_str, move_extent_read,
-	TP_PROTO(struct bch_fs *c, const char *k),
-	TP_ARGS(c, k)
+	TP_PROTO(struct bch_fs *c, const char *str),
+	TP_ARGS(c, str)
 );
 
 DEFINE_EVENT(fs_str, move_extent_write,
-	TP_PROTO(struct bch_fs *c, const char *k),
-	TP_ARGS(c, k)
+	TP_PROTO(struct bch_fs *c, const char *str),
+	TP_ARGS(c, str)
 );
 
 DEFINE_EVENT(fs_str, move_extent_finish,
-	TP_PROTO(struct bch_fs *c, const char *k),
-	TP_ARGS(c, k)
+	TP_PROTO(struct bch_fs *c, const char *str),
+	TP_ARGS(c, str)
 );
 
-TRACE_EVENT(move_extent_fail,
-	TP_PROTO(struct bch_fs *c, const char *msg),
-	TP_ARGS(c, msg),
-
-	TP_STRUCT__entry(
-		__field(dev_t,		dev			)
-		__string(msg,		msg			)
-	),
-
-	TP_fast_assign(
-		__entry->dev		= c->dev;
-		__assign_str(msg, msg);
-	),
-
-	TP_printk("%d:%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(msg))
+DEFINE_EVENT(fs_str, move_extent_fail,
+	TP_PROTO(struct bch_fs *c, const char *str),
+	TP_ARGS(c, str)
 );
 
 DEFINE_EVENT(fs_str, move_extent_start_fail,
@@ -1039,7 +1013,7 @@ TRACE_EVENT(trans_restart_split_race,
 		__entry->level		= b->c.level;
 		__entry->written	= b->written;
 		__entry->blocks		= btree_blocks(trans->c);
-		__entry->u64s_remaining	= bch_btree_keys_u64s_remaining(trans->c, b);
+		__entry->u64s_remaining	= bch2_btree_keys_u64s_remaining(b);
 	),
 
 	TP_printk("%s %pS l=%u written %u/%u u64s remaining %u",
@@ -1146,8 +1120,6 @@ DEFINE_EVENT(transaction_restart_iter,	trans_restart_btree_node_split,
 	TP_ARGS(trans, caller_ip, path)
 );
 
-struct get_locks_fail;
-
 TRACE_EVENT(trans_restart_upgrade,
 	TP_PROTO(struct btree_trans *trans,
 		 unsigned long caller_ip,
@@ -1195,11 +1167,9 @@ TRACE_EVENT(trans_restart_upgrade,
 		  __entry->node_seq)
 );
 
-DEFINE_EVENT(transaction_restart_iter,	trans_restart_relock,
-	TP_PROTO(struct btree_trans *trans,
-		 unsigned long caller_ip,
-		 struct btree_path *path),
-	TP_ARGS(trans, caller_ip, path)
+DEFINE_EVENT(trans_str,	trans_restart_relock,
+	TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, const char *str),
+	TP_ARGS(trans, caller_ip, str)
 );
 
 DEFINE_EVENT(transaction_restart_iter,	trans_restart_relock_next_node,
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index c2ef7cddaa4f..a135136adeee 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -241,12 +241,17 @@ bool bch2_is_zero(const void *_p, size_t n)
 	return true;
 }
 
-void bch2_prt_u64_binary(struct printbuf *out, u64 v, unsigned nr_bits)
+void bch2_prt_u64_base2_nbits(struct printbuf *out, u64 v, unsigned nr_bits)
 {
 	while (nr_bits)
 		prt_char(out, '0' + ((v >> --nr_bits) & 1));
 }
 
+void bch2_prt_u64_base2(struct printbuf *out, u64 v)
+{
+	bch2_prt_u64_base2_nbits(out, v, fls64(v) ?: 1);
+}
+
 void bch2_print_string_as_lines(const char *prefix, const char *lines)
 {
 	const char *p;
@@ -1186,7 +1191,9 @@ int bch2_split_devs(const char *_dev_name, darray_str *ret)
 {
 	darray_init(ret);
 
-	char *dev_name = kstrdup(_dev_name, GFP_KERNEL), *s = dev_name;
+	char *dev_name, *s, *orig;
+
+	dev_name = orig = kstrdup(_dev_name, GFP_KERNEL);
 	if (!dev_name)
 		return -ENOMEM;
 
@@ -1201,10 +1208,10 @@ int bch2_split_devs(const char *_dev_name, darray_str *ret)
 		}
 	}
 
-	kfree(dev_name);
+	kfree(orig);
 	return 0;
 err:
 	bch2_darray_str_exit(ret);
-	kfree(dev_name);
+	kfree(orig);
 	return -ENOMEM;
 }
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index c75fc31915d3..df67bf55fe2b 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -342,7 +342,8 @@ bool bch2_is_zero(const void *, size_t);
 
 u64 bch2_read_flag_list(char *, const char * const[]);
 
-void bch2_prt_u64_binary(struct printbuf *, u64, unsigned);
+void bch2_prt_u64_base2_nbits(struct printbuf *, u64, unsigned);
+void bch2_prt_u64_base2(struct printbuf *, u64);
 
 void bch2_print_string_as_lines(const char *prefix, const char *lines);
 
diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c
index 5a1858fb9879..9c0d2316031b 100644
--- a/fs/bcachefs/xattr.c
+++ b/fs/bcachefs/xattr.c
@@ -590,8 +590,9 @@ err:
 	mutex_unlock(&inode->ei_update_lock);
 
 	if (value &&
-	    (opt_id == Opt_background_compression ||
-	     opt_id == Opt_background_target))
+	    (opt_id == Opt_background_target ||
+	     opt_id == Opt_background_compression ||
+	     (opt_id == Opt_compression && !inode_opt_get(c, &inode->ei_inode, background_compression))))
 		bch2_set_rebalance_needs_scan(c, inode->ei_inode.bi_inum);
 
 	return bch2_err_class(ret);
diff --git a/fs/bcachefs/xattr_format.h b/fs/bcachefs/xattr_format.h
new file mode 100644
index 000000000000..e9f810539552
--- /dev/null
+++ b/fs/bcachefs/xattr_format.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_XATTR_FORMAT_H
+#define _BCACHEFS_XATTR_FORMAT_H
+
+#define KEY_TYPE_XATTR_INDEX_USER		0
+#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS	1
+#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT	2
+#define KEY_TYPE_XATTR_INDEX_TRUSTED		3
+#define KEY_TYPE_XATTR_INDEX_SECURITY	        4
+
+struct bch_xattr {
+	struct bch_val		v;
+	__u8			x_type;
+	__u8			x_name_len;
+	__le16			x_val_len;
+	__u8			x_name[];
+} __packed __aligned(8);
+
+#endif /* _BCACHEFS_XATTR_FORMAT_H */
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 193168214eeb..68345f73d429 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -141,16 +141,16 @@ static int compression_decompress_bio(struct list_head *ws,
 }
 
 static int compression_decompress(int type, struct list_head *ws,
-               const u8 *data_in, struct page *dest_page,
-               unsigned long start_byte, size_t srclen, size_t destlen)
+		const u8 *data_in, struct page *dest_page,
+		unsigned long dest_pgoff, size_t srclen, size_t destlen)
 {
 	switch (type) {
 	case BTRFS_COMPRESS_ZLIB: return zlib_decompress(ws, data_in, dest_page,
-						start_byte, srclen, destlen);
+						dest_pgoff, srclen, destlen);
 	case BTRFS_COMPRESS_LZO:  return lzo_decompress(ws, data_in, dest_page,
-						start_byte, srclen, destlen);
+						dest_pgoff, srclen, destlen);
 	case BTRFS_COMPRESS_ZSTD: return zstd_decompress(ws, data_in, dest_page,
-						start_byte, srclen, destlen);
+						dest_pgoff, srclen, destlen);
 	case BTRFS_COMPRESS_NONE:
 	default:
 		/*
@@ -1037,14 +1037,23 @@ static int btrfs_decompress_bio(struct compressed_bio *cb)
  * start_byte tells us the offset into the compressed data we're interested in
  */
 int btrfs_decompress(int type, const u8 *data_in, struct page *dest_page,
-		     unsigned long start_byte, size_t srclen, size_t destlen)
+		     unsigned long dest_pgoff, size_t srclen, size_t destlen)
 {
+	struct btrfs_fs_info *fs_info = btrfs_sb(dest_page->mapping->host->i_sb);
 	struct list_head *workspace;
+	const u32 sectorsize = fs_info->sectorsize;
 	int ret;
 
+	/*
+	 * The full destination page range should not exceed the page size.
+	 * And the @destlen should not exceed sectorsize, as this is only called for
+	 * inline file extents, which should not exceed sectorsize.
+	 */
+	ASSERT(dest_pgoff + destlen <= PAGE_SIZE && destlen <= sectorsize);
+
 	workspace = get_workspace(type, 0);
 	ret = compression_decompress(type, workspace, data_in, dest_page,
-				     start_byte, srclen, destlen);
+				     dest_pgoff, srclen, destlen);
 	put_workspace(type, workspace);
 
 	return ret;
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 93cc92974dee..afd7e50d073d 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -148,7 +148,7 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
 		unsigned long *total_in, unsigned long *total_out);
 int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
 int zlib_decompress(struct list_head *ws, const u8 *data_in,
-		struct page *dest_page, unsigned long start_byte, size_t srclen,
+		struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
 		size_t destlen);
 struct list_head *zlib_alloc_workspace(unsigned int level);
 void zlib_free_workspace(struct list_head *ws);
@@ -159,7 +159,7 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
 		unsigned long *total_in, unsigned long *total_out);
 int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
 int lzo_decompress(struct list_head *ws, const u8 *data_in,
-		struct page *dest_page, unsigned long start_byte, size_t srclen,
+		struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
 		size_t destlen);
 struct list_head *lzo_alloc_workspace(unsigned int level);
 void lzo_free_workspace(struct list_head *ws);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f396aba92c57..8e8cc1111277 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1260,7 +1260,8 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
 	u64 bytes_left, end;
 	u64 aligned_start = ALIGN(start, 1 << SECTOR_SHIFT);
 
-	if (WARN_ON(start != aligned_start)) {
+	/* Adjust the range to be aligned to 512B sectors if necessary. */
+	if (start != aligned_start) {
 		len -= aligned_start - start;
 		len = round_down(len, 1 << SECTOR_SHIFT);
 		start = aligned_start;
@@ -4298,6 +4299,42 @@ static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
 	return 0;
 }
 
+static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
+				    struct find_free_extent_ctl *ffe_ctl)
+{
+	if (ffe_ctl->for_treelog) {
+		spin_lock(&fs_info->treelog_bg_lock);
+		if (fs_info->treelog_bg)
+			ffe_ctl->hint_byte = fs_info->treelog_bg;
+		spin_unlock(&fs_info->treelog_bg_lock);
+	} else if (ffe_ctl->for_data_reloc) {
+		spin_lock(&fs_info->relocation_bg_lock);
+		if (fs_info->data_reloc_bg)
+			ffe_ctl->hint_byte = fs_info->data_reloc_bg;
+		spin_unlock(&fs_info->relocation_bg_lock);
+	} else if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
+		struct btrfs_block_group *block_group;
+
+		spin_lock(&fs_info->zone_active_bgs_lock);
+		list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) {
+			/*
+			 * No lock is OK here because avail is monotinically
+			 * decreasing, and this is just a hint.
+			 */
+			u64 avail = block_group->zone_capacity - block_group->alloc_offset;
+
+			if (block_group_bits(block_group, ffe_ctl->flags) &&
+			    avail >= ffe_ctl->num_bytes) {
+				ffe_ctl->hint_byte = block_group->start;
+				break;
+			}
+		}
+		spin_unlock(&fs_info->zone_active_bgs_lock);
+	}
+
+	return 0;
+}
+
 static int prepare_allocation(struct btrfs_fs_info *fs_info,
 			      struct find_free_extent_ctl *ffe_ctl,
 			      struct btrfs_space_info *space_info,
@@ -4308,19 +4345,7 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info,
 		return prepare_allocation_clustered(fs_info, ffe_ctl,
 						    space_info, ins);
 	case BTRFS_EXTENT_ALLOC_ZONED:
-		if (ffe_ctl->for_treelog) {
-			spin_lock(&fs_info->treelog_bg_lock);
-			if (fs_info->treelog_bg)
-				ffe_ctl->hint_byte = fs_info->treelog_bg;
-			spin_unlock(&fs_info->treelog_bg_lock);
-		}
-		if (ffe_ctl->for_data_reloc) {
-			spin_lock(&fs_info->relocation_bg_lock);
-			if (fs_info->data_reloc_bg)
-				ffe_ctl->hint_byte = fs_info->data_reloc_bg;
-			spin_unlock(&fs_info->relocation_bg_lock);
-		}
-		return 0;
+		return prepare_allocation_zoned(fs_info, ffe_ctl);
 	default:
 		BUG();
 	}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 809b11472a80..1eb93d3962aa 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4458,6 +4458,8 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
 	u64 root_flags;
 	int ret;
 
+	down_write(&fs_info->subvol_sem);
+
 	/*
 	 * Don't allow to delete a subvolume with send in progress. This is
 	 * inside the inode lock so the error handling that has to drop the bit
@@ -4469,25 +4471,25 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
 		btrfs_warn(fs_info,
 			   "attempt to delete subvolume %llu during send",
 			   dest->root_key.objectid);
-		return -EPERM;
+		ret = -EPERM;
+		goto out_up_write;
 	}
 	if (atomic_read(&dest->nr_swapfiles)) {
 		spin_unlock(&dest->root_item_lock);
 		btrfs_warn(fs_info,
 			   "attempt to delete subvolume %llu with active swapfile",
 			   root->root_key.objectid);
-		return -EPERM;
+		ret = -EPERM;
+		goto out_up_write;
 	}
 	root_flags = btrfs_root_flags(&dest->root_item);
 	btrfs_set_root_flags(&dest->root_item,
 			     root_flags | BTRFS_ROOT_SUBVOL_DEAD);
 	spin_unlock(&dest->root_item_lock);
 
-	down_write(&fs_info->subvol_sem);
-
 	ret = may_destroy_subvol(dest);
 	if (ret)
-		goto out_up_write;
+		goto out_undead;
 
 	btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
 	/*
@@ -4497,7 +4499,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
 	 */
 	ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
 	if (ret)
-		goto out_up_write;
+		goto out_undead;
 
 	trans = btrfs_start_transaction(root, 0);
 	if (IS_ERR(trans)) {
@@ -4563,15 +4565,17 @@ out_end_trans:
 	inode->i_flags |= S_DEAD;
 out_release:
 	btrfs_subvolume_release_metadata(root, &block_rsv);
-out_up_write:
-	up_write(&fs_info->subvol_sem);
+out_undead:
 	if (ret) {
 		spin_lock(&dest->root_item_lock);
 		root_flags = btrfs_root_flags(&dest->root_item);
 		btrfs_set_root_flags(&dest->root_item,
 				root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
 		spin_unlock(&dest->root_item_lock);
-	} else {
+	}
+out_up_write:
+	up_write(&fs_info->subvol_sem);
+	if (!ret) {
 		d_invalidate(dentry);
 		btrfs_prune_dentries(dest);
 		ASSERT(dest->send_in_progress == 0);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 41b479861b3c..dfed9dd9c2d7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -790,6 +790,9 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
 		return -EOPNOTSUPP;
 	}
 
+	if (btrfs_root_refs(&root->root_item) == 0)
+		return -ENOENT;
+
 	if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
 		return -EINVAL;
 
@@ -2608,6 +2611,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
 				ret = -EFAULT;
 				goto out;
 			}
+			if (range.flags & ~BTRFS_DEFRAG_RANGE_FLAGS_SUPP) {
+				ret = -EOPNOTSUPP;
+				goto out;
+			}
 			/* compression requires us to start the IO */
 			if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
 				range.flags |= BTRFS_DEFRAG_RANGE_START_IO;
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index 1131d5a29d61..e43bc0fdc74e 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -425,16 +425,16 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
 }
 
 int lzo_decompress(struct list_head *ws, const u8 *data_in,
-		struct page *dest_page, unsigned long start_byte, size_t srclen,
+		struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
 		size_t destlen)
 {
 	struct workspace *workspace = list_entry(ws, struct workspace, list);
+	struct btrfs_fs_info *fs_info = btrfs_sb(dest_page->mapping->host->i_sb);
+	const u32 sectorsize = fs_info->sectorsize;
 	size_t in_len;
 	size_t out_len;
 	size_t max_segment_len = WORKSPACE_BUF_LENGTH;
 	int ret = 0;
-	char *kaddr;
-	unsigned long bytes;
 
 	if (srclen < LZO_LEN || srclen > max_segment_len + LZO_LEN * 2)
 		return -EUCLEAN;
@@ -451,7 +451,7 @@ int lzo_decompress(struct list_head *ws, const u8 *data_in,
 	}
 	data_in += LZO_LEN;
 
-	out_len = PAGE_SIZE;
+	out_len = sectorsize;
 	ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len);
 	if (ret != LZO_E_OK) {
 		pr_warn("BTRFS: decompress failed!\n");
@@ -459,29 +459,13 @@ int lzo_decompress(struct list_head *ws, const u8 *data_in,
 		goto out;
 	}
 
-	if (out_len < start_byte) {
+	ASSERT(out_len <= sectorsize);
+	memcpy_to_page(dest_page, dest_pgoff, workspace->buf, out_len);
+	/* Early end, considered as an error. */
+	if (unlikely(out_len < destlen)) {
 		ret = -EIO;
-		goto out;
+		memzero_page(dest_page, dest_pgoff + out_len, destlen - out_len);
 	}
-
-	/*
-	 * the caller is already checking against PAGE_SIZE, but lets
-	 * move this check closer to the memcpy/memset
-	 */
-	destlen = min_t(unsigned long, destlen, PAGE_SIZE);
-	bytes = min_t(unsigned long, destlen, out_len - start_byte);
-
-	kaddr = kmap_local_page(dest_page);
-	memcpy(kaddr, workspace->buf + start_byte, bytes);
-
-	/*
-	 * btrfs_getblock is doing a zero on the tail of the page too,
-	 * but this will cover anything missing from the decompressed
-	 * data.
-	 */
-	if (bytes < destlen)
-		memset(kaddr+bytes, 0, destlen-bytes);
-	kunmap_local(kaddr);
 out:
 	return ret;
 }
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index 6486f0d7e993..8c4fc98ca9ce 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -889,8 +889,10 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
 out_unlock:
 	spin_unlock(&fs_info->ref_verify_lock);
 out:
-	if (ret)
+	if (ret) {
+		btrfs_free_ref_cache(fs_info);
 		btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
+	}
 	return ret;
 }
 
@@ -1021,8 +1023,8 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
 		}
 	}
 	if (ret) {
-		btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
 		btrfs_free_ref_cache(fs_info);
+		btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
 	}
 	btrfs_free_path(path);
 	return ret;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index a01807cbd4d4..0123d2728923 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1098,12 +1098,22 @@ out:
 static void scrub_read_endio(struct btrfs_bio *bbio)
 {
 	struct scrub_stripe *stripe = bbio->private;
+	struct bio_vec *bvec;
+	int sector_nr = calc_sector_number(stripe, bio_first_bvec_all(&bbio->bio));
+	int num_sectors;
+	u32 bio_size = 0;
+	int i;
+
+	ASSERT(sector_nr < stripe->nr_sectors);
+	bio_for_each_bvec_all(bvec, &bbio->bio, i)
+		bio_size += bvec->bv_len;
+	num_sectors = bio_size >> stripe->bg->fs_info->sectorsize_bits;
 
 	if (bbio->bio.bi_status) {
-		bitmap_set(&stripe->io_error_bitmap, 0, stripe->nr_sectors);
-		bitmap_set(&stripe->error_bitmap, 0, stripe->nr_sectors);
+		bitmap_set(&stripe->io_error_bitmap, sector_nr, num_sectors);
+		bitmap_set(&stripe->error_bitmap, sector_nr, num_sectors);
 	} else {
-		bitmap_clear(&stripe->io_error_bitmap, 0, stripe->nr_sectors);
+		bitmap_clear(&stripe->io_error_bitmap, sector_nr, num_sectors);
 	}
 	bio_put(&bbio->bio);
 	if (atomic_dec_and_test(&stripe->pending_io)) {
@@ -1636,6 +1646,9 @@ static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx,
 {
 	struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
 	struct btrfs_bio *bbio = NULL;
+	unsigned int nr_sectors = min(BTRFS_STRIPE_LEN, stripe->bg->start +
+				      stripe->bg->length - stripe->logical) >>
+				  fs_info->sectorsize_bits;
 	u64 stripe_len = BTRFS_STRIPE_LEN;
 	int mirror = stripe->mirror_num;
 	int i;
@@ -1646,6 +1659,10 @@ static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx,
 		struct page *page = scrub_stripe_get_page(stripe, i);
 		unsigned int pgoff = scrub_stripe_get_page_offset(stripe, i);
 
+		/* We're beyond the chunk boundary, no need to read anymore. */
+		if (i >= nr_sectors)
+			break;
+
 		/* The current sector cannot be merged, submit the bio. */
 		if (bbio &&
 		    ((i > 0 &&
@@ -1701,6 +1718,9 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
 {
 	struct btrfs_fs_info *fs_info = sctx->fs_info;
 	struct btrfs_bio *bbio;
+	unsigned int nr_sectors = min(BTRFS_STRIPE_LEN, stripe->bg->start +
+				      stripe->bg->length - stripe->logical) >>
+				  fs_info->sectorsize_bits;
 	int mirror = stripe->mirror_num;
 
 	ASSERT(stripe->bg);
@@ -1715,14 +1735,16 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
 	bbio = btrfs_bio_alloc(SCRUB_STRIPE_PAGES, REQ_OP_READ, fs_info,
 			       scrub_read_endio, stripe);
 
-	/* Read the whole stripe. */
 	bbio->bio.bi_iter.bi_sector = stripe->logical >> SECTOR_SHIFT;
-	for (int i = 0; i < BTRFS_STRIPE_LEN >> PAGE_SHIFT; i++) {
+	/* Read the whole range inside the chunk boundary. */
+	for (unsigned int cur = 0; cur < nr_sectors; cur++) {
+		struct page *page = scrub_stripe_get_page(stripe, cur);
+		unsigned int pgoff = scrub_stripe_get_page_offset(stripe, cur);
 		int ret;
 
-		ret = bio_add_page(&bbio->bio, stripe->pages[i], PAGE_SIZE, 0);
+		ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff);
 		/* We should have allocated enough bio vectors. */
-		ASSERT(ret == PAGE_SIZE);
+		ASSERT(ret == fs_info->sectorsize);
 	}
 	atomic_inc(&stripe->pending_io);
 
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 4e36550618e5..2d7519a6ce72 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -8205,8 +8205,8 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
 		goto out;
 	}
 
-	sctx->clone_roots = kvcalloc(sizeof(*sctx->clone_roots),
-				     arg->clone_sources_count + 1,
+	sctx->clone_roots = kvcalloc(arg->clone_sources_count + 1,
+				     sizeof(*sctx->clone_roots),
 				     GFP_KERNEL);
 	if (!sctx->clone_roots) {
 		ret = -ENOMEM;
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index 93511d54abf8..0e49dab8dad2 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -475,7 +475,8 @@ void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info,
 
 	spin_lock_irqsave(&subpage->lock, flags);
 	bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
-	folio_start_writeback(folio);
+	if (!folio_test_writeback(folio))
+		folio_start_writeback(folio);
 	spin_unlock_irqrestore(&subpage->lock, flags);
 }
 
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 896acfda1789..101f786963d4 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1457,6 +1457,14 @@ static int btrfs_reconfigure(struct fs_context *fc)
 
 	btrfs_info_to_ctx(fs_info, &old_ctx);
 
+	/*
+	 * This is our "bind mount" trick, we don't want to allow the user to do
+	 * anything other than mount a different ro/rw and a different subvol,
+	 * all of the mount options should be maintained.
+	 */
+	if (mount_reconfigure)
+		ctx->mount_opt = old_ctx.mount_opt;
+
 	sync_filesystem(sb);
 	set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
 
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 50fdc69fdddf..6eccf8496486 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -1436,7 +1436,7 @@ static int check_extent_item(struct extent_buffer *leaf,
 		if (unlikely(ptr + btrfs_extent_inline_ref_size(inline_type) > end)) {
 			extent_err(leaf, slot,
 "inline ref item overflows extent item, ptr %lu iref size %u end %lu",
-				   ptr, inline_type, end);
+				   ptr, btrfs_extent_inline_ref_size(inline_type), end);
 			return -EUCLEAN;
 		}
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 4c32497311d2..d67785be2c77 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3087,7 +3087,6 @@ struct btrfs_chunk_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
 	map = btrfs_find_chunk_map(fs_info, logical, length);
 
 	if (unlikely(!map)) {
-		read_unlock(&fs_info->mapping_tree_lock);
 		btrfs_crit(fs_info,
 			   "unable to find chunk map for logical %llu length %llu",
 			   logical, length);
@@ -3095,7 +3094,6 @@ struct btrfs_chunk_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
 	}
 
 	if (unlikely(map->start > logical || map->start + map->chunk_len <= logical)) {
-		read_unlock(&fs_info->mapping_tree_lock);
 		btrfs_crit(fs_info,
 			   "found a bad chunk map, wanted %llu-%llu, found %llu-%llu",
 			   logical, logical + length, map->start,
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 36cf1f0e338e..8da66ea699e8 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -354,18 +354,13 @@ done:
 }
 
 int zlib_decompress(struct list_head *ws, const u8 *data_in,
-		struct page *dest_page, unsigned long start_byte, size_t srclen,
+		struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
 		size_t destlen)
 {
 	struct workspace *workspace = list_entry(ws, struct workspace, list);
 	int ret = 0;
 	int wbits = MAX_WBITS;
-	unsigned long bytes_left;
-	unsigned long total_out = 0;
-	unsigned long pg_offset = 0;
-
-	destlen = min_t(unsigned long, destlen, PAGE_SIZE);
-	bytes_left = destlen;
+	unsigned long to_copy;
 
 	workspace->strm.next_in = data_in;
 	workspace->strm.avail_in = srclen;
@@ -390,60 +385,30 @@ int zlib_decompress(struct list_head *ws, const u8 *data_in,
 		return -EIO;
 	}
 
-	while (bytes_left > 0) {
-		unsigned long buf_start;
-		unsigned long buf_offset;
-		unsigned long bytes;
-
-		ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH);
-		if (ret != Z_OK && ret != Z_STREAM_END)
-			break;
-
-		buf_start = total_out;
-		total_out = workspace->strm.total_out;
-
-		if (total_out == buf_start) {
-			ret = -EIO;
-			break;
-		}
-
-		if (total_out <= start_byte)
-			goto next;
-
-		if (total_out > start_byte && buf_start < start_byte)
-			buf_offset = start_byte - buf_start;
-		else
-			buf_offset = 0;
-
-		bytes = min(PAGE_SIZE - pg_offset,
-			    PAGE_SIZE - (buf_offset % PAGE_SIZE));
-		bytes = min(bytes, bytes_left);
+	/*
+	 * Everything (in/out buf) should be at most one sector, there should
+	 * be no need to switch any input/output buffer.
+	 */
+	ret = zlib_inflate(&workspace->strm, Z_FINISH);
+	to_copy = min(workspace->strm.total_out, destlen);
+	if (ret != Z_STREAM_END)
+		goto out;
 
-		memcpy_to_page(dest_page, pg_offset,
-			       workspace->buf + buf_offset, bytes);
+	memcpy_to_page(dest_page, dest_pgoff, workspace->buf, to_copy);
 
-		pg_offset += bytes;
-		bytes_left -= bytes;
-next:
-		workspace->strm.next_out = workspace->buf;
-		workspace->strm.avail_out = workspace->buf_size;
-	}
-
-	if (ret != Z_STREAM_END && bytes_left != 0)
+out:
+	if (unlikely(to_copy != destlen)) {
+		pr_warn_ratelimited("BTRFS: infalte failed, decompressed=%lu expected=%zu\n",
+					to_copy, destlen);
 		ret = -EIO;
-	else
+	} else {
 		ret = 0;
+	}
 
 	zlib_inflateEnd(&workspace->strm);
 
-	/*
-	 * this should only happen if zlib returned fewer bytes than we
-	 * expected.  btrfs_get_block is responsible for zeroing from the
-	 * end of the inline extent (destlen) to the end of the page
-	 */
-	if (pg_offset < destlen) {
-		memzero_page(dest_page, pg_offset, destlen - pg_offset);
-	}
+	if (unlikely(to_copy < destlen))
+		memzero_page(dest_page, dest_pgoff + to_copy, destlen - to_copy);
 	return ret;
 }
 
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 5bd76813b23f..168af9d000d1 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -2055,6 +2055,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
 
 	map = block_group->physical_map;
 
+	spin_lock(&fs_info->zone_active_bgs_lock);
 	spin_lock(&block_group->lock);
 	if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) {
 		ret = true;
@@ -2067,7 +2068,6 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
 		goto out_unlock;
 	}
 
-	spin_lock(&fs_info->zone_active_bgs_lock);
 	for (i = 0; i < map->num_stripes; i++) {
 		struct btrfs_zoned_device_info *zinfo;
 		int reserved = 0;
@@ -2087,20 +2087,17 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
 		 */
 		if (atomic_read(&zinfo->active_zones_left) <= reserved) {
 			ret = false;
-			spin_unlock(&fs_info->zone_active_bgs_lock);
 			goto out_unlock;
 		}
 
 		if (!btrfs_dev_set_active_zone(device, physical)) {
 			/* Cannot activate the zone */
 			ret = false;
-			spin_unlock(&fs_info->zone_active_bgs_lock);
 			goto out_unlock;
 		}
 		if (!is_data)
 			zinfo->reserved_active_zones--;
 	}
-	spin_unlock(&fs_info->zone_active_bgs_lock);
 
 	/* Successfully activated all the zones */
 	set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
@@ -2108,8 +2105,6 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
 
 	/* For the active block group list */
 	btrfs_get_block_group(block_group);
-
-	spin_lock(&fs_info->zone_active_bgs_lock);
 	list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs);
 	spin_unlock(&fs_info->zone_active_bgs_lock);
 
@@ -2117,6 +2112,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
 
 out_unlock:
 	spin_unlock(&block_group->lock);
+	spin_unlock(&fs_info->zone_active_bgs_lock);
 	return ret;
 }
 
diff --git a/fs/cachefiles/Kconfig b/fs/cachefiles/Kconfig
index 8df715640a48..c5a070550ee3 100644
--- a/fs/cachefiles/Kconfig
+++ b/fs/cachefiles/Kconfig
@@ -2,7 +2,7 @@
 
 config CACHEFILES
 	tristate "Filesystem caching on files"
-	depends on FSCACHE && BLOCK
+	depends on NETFS_SUPPORT && FSCACHE && BLOCK
 	help
 	  This permits use of a mounted filesystem as a cache for other
 	  filesystems - primarily networking filesystems - thus allowing fast
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 4a87c9d714a9..d33169f0018b 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -246,7 +246,7 @@ extern bool cachefiles_begin_operation(struct netfs_cache_resources *cres,
 				       enum fscache_want_state want_state);
 extern int __cachefiles_prepare_write(struct cachefiles_object *object,
 				      struct file *file,
-				      loff_t *_start, size_t *_len,
+				      loff_t *_start, size_t *_len, size_t upper_len,
 				      bool no_space_allocated_yet);
 extern int __cachefiles_write(struct cachefiles_object *object,
 			      struct file *file,
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index 5857241c5918..1d685357e67f 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -517,18 +517,26 @@ cachefiles_prepare_ondemand_read(struct netfs_cache_resources *cres,
  */
 int __cachefiles_prepare_write(struct cachefiles_object *object,
 			       struct file *file,
-			       loff_t *_start, size_t *_len,
+			       loff_t *_start, size_t *_len, size_t upper_len,
 			       bool no_space_allocated_yet)
 {
 	struct cachefiles_cache *cache = object->volume->cache;
 	loff_t start = *_start, pos;
-	size_t len = *_len, down;
+	size_t len = *_len;
 	int ret;
 
 	/* Round to DIO size */
-	down = start - round_down(start, PAGE_SIZE);
-	*_start = start - down;
-	*_len = round_up(down + len, PAGE_SIZE);
+	start = round_down(*_start, PAGE_SIZE);
+	if (start != *_start || *_len > upper_len) {
+		/* Probably asked to cache a streaming write written into the
+		 * pagecache when the cookie was temporarily out of service to
+		 * culling.
+		 */
+		fscache_count_dio_misfit();
+		return -ENOBUFS;
+	}
+
+	*_len = round_up(len, PAGE_SIZE);
 
 	/* We need to work out whether there's sufficient disk space to perform
 	 * the write - but we can skip that check if we have space already
@@ -539,7 +547,7 @@ int __cachefiles_prepare_write(struct cachefiles_object *object,
 
 	pos = cachefiles_inject_read_error();
 	if (pos == 0)
-		pos = vfs_llseek(file, *_start, SEEK_DATA);
+		pos = vfs_llseek(file, start, SEEK_DATA);
 	if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
 		if (pos == -ENXIO)
 			goto check_space; /* Unallocated tail */
@@ -547,7 +555,7 @@ int __cachefiles_prepare_write(struct cachefiles_object *object,
 					  cachefiles_trace_seek_error);
 		return pos;
 	}
-	if ((u64)pos >= (u64)*_start + *_len)
+	if ((u64)pos >= (u64)start + *_len)
 		goto check_space; /* Unallocated region */
 
 	/* We have a block that's at least partially filled - if we're low on
@@ -560,13 +568,13 @@ int __cachefiles_prepare_write(struct cachefiles_object *object,
 
 	pos = cachefiles_inject_read_error();
 	if (pos == 0)
-		pos = vfs_llseek(file, *_start, SEEK_HOLE);
+		pos = vfs_llseek(file, start, SEEK_HOLE);
 	if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
 		trace_cachefiles_io_error(object, file_inode(file), pos,
 					  cachefiles_trace_seek_error);
 		return pos;
 	}
-	if ((u64)pos >= (u64)*_start + *_len)
+	if ((u64)pos >= (u64)start + *_len)
 		return 0; /* Fully allocated */
 
 	/* Partially allocated, but insufficient space: cull. */
@@ -574,7 +582,7 @@ int __cachefiles_prepare_write(struct cachefiles_object *object,
 	ret = cachefiles_inject_remove_error();
 	if (ret == 0)
 		ret = vfs_fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
-				    *_start, *_len);
+				    start, *_len);
 	if (ret < 0) {
 		trace_cachefiles_io_error(object, file_inode(file), ret,
 					  cachefiles_trace_fallocate_error);
@@ -591,8 +599,8 @@ check_space:
 }
 
 static int cachefiles_prepare_write(struct netfs_cache_resources *cres,
-				    loff_t *_start, size_t *_len, loff_t i_size,
-				    bool no_space_allocated_yet)
+				    loff_t *_start, size_t *_len, size_t upper_len,
+				    loff_t i_size, bool no_space_allocated_yet)
 {
 	struct cachefiles_object *object = cachefiles_cres_object(cres);
 	struct cachefiles_cache *cache = object->volume->cache;
@@ -608,7 +616,7 @@ static int cachefiles_prepare_write(struct netfs_cache_resources *cres,
 
 	cachefiles_begin_secure(cache, &saved_cred);
 	ret = __cachefiles_prepare_write(object, cachefiles_cres_file(cres),
-					 _start, _len,
+					 _start, _len, upper_len,
 					 no_space_allocated_yet);
 	cachefiles_end_secure(cache, saved_cred);
 	return ret;
diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c
index b8fbbb1961bb..4ba42f1fa3b4 100644
--- a/fs/cachefiles/ondemand.c
+++ b/fs/cachefiles/ondemand.c
@@ -50,7 +50,7 @@ static ssize_t cachefiles_ondemand_fd_write_iter(struct kiocb *kiocb,
 		return -ENOBUFS;
 
 	cachefiles_begin_secure(cache, &saved_cred);
-	ret = __cachefiles_prepare_write(object, file, &pos, &len, true);
+	ret = __cachefiles_prepare_write(object, file, &pos, &len, len, true);
 	cachefiles_end_secure(cache, saved_cred);
 	if (ret < 0)
 		return ret;
@@ -539,6 +539,9 @@ int cachefiles_ondemand_init_object(struct cachefiles_object *object)
 	struct fscache_volume *volume = object->volume->vcookie;
 	size_t volume_key_size, cookie_key_size, data_len;
 
+	if (!object->ondemand)
+		return 0;
+
 	/*
 	 * CacheFiles will firstly check the cache file under the root cache
 	 * directory. If the coherency check failed, it will fallback to
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index 94df854147d3..7249d70e1a43 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -7,6 +7,7 @@ config CEPH_FS
 	select CRYPTO_AES
 	select CRYPTO
 	select NETFS_SUPPORT
+	select FS_ENCRYPTION_ALGS if FS_ENCRYPTION
 	default n
 	help
 	  Choose Y or M here to include support for mounting the
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 13af429ab030..1340d77124ae 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -159,27 +159,7 @@ static void ceph_invalidate_folio(struct folio *folio, size_t offset,
 		ceph_put_snap_context(snapc);
 	}
 
-	folio_wait_fscache(folio);
-}
-
-static bool ceph_release_folio(struct folio *folio, gfp_t gfp)
-{
-	struct inode *inode = folio->mapping->host;
-	struct ceph_client *cl = ceph_inode_to_client(inode);
-
-	doutc(cl, "%llx.%llx idx %lu (%sdirty)\n", ceph_vinop(inode),
-	      folio->index, folio_test_dirty(folio) ? "" : "not ");
-
-	if (folio_test_private(folio))
-		return false;
-
-	if (folio_test_fscache(folio)) {
-		if (current_is_kswapd() || !(gfp & __GFP_FS))
-			return false;
-		folio_wait_fscache(folio);
-	}
-	ceph_fscache_note_page_release(inode);
-	return true;
+	netfs_invalidate_folio(folio, offset, length);
 }
 
 static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq)
@@ -357,6 +337,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
 	u64 len = subreq->len;
 	bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD);
 	u64 off = subreq->start;
+	int extent_cnt;
 
 	if (ceph_inode_is_shutdown(inode)) {
 		err = -EIO;
@@ -370,8 +351,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
 
 	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino,
 			off, &len, 0, 1, sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ,
-			CEPH_OSD_FLAG_READ | fsc->client->osdc.client->options->read_from_replica,
-			NULL, ci->i_truncate_seq, ci->i_truncate_size, false);
+			CEPH_OSD_FLAG_READ, NULL, ci->i_truncate_seq,
+			ci->i_truncate_size, false);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		req = NULL;
@@ -379,7 +360,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
 	}
 
 	if (sparse) {
-		err = ceph_alloc_sparse_ext_map(&req->r_ops[0]);
+		extent_cnt = __ceph_sparse_read_ext_count(inode, len);
+		err = ceph_alloc_sparse_ext_map(&req->r_ops[0], extent_cnt);
 		if (err)
 			goto out;
 	}
@@ -509,7 +491,6 @@ static void ceph_netfs_free_request(struct netfs_io_request *rreq)
 const struct netfs_request_ops ceph_netfs_ops = {
 	.init_request		= ceph_init_request,
 	.free_request		= ceph_netfs_free_request,
-	.begin_cache_operation	= ceph_begin_cache_operation,
 	.issue_read		= ceph_netfs_issue_read,
 	.expand_readahead	= ceph_netfs_expand_readahead,
 	.clamp_length		= ceph_netfs_clamp_length,
@@ -1586,7 +1567,7 @@ const struct address_space_operations ceph_aops = {
 	.write_end = ceph_write_end,
 	.dirty_folio = ceph_dirty_folio,
 	.invalidate_folio = ceph_invalidate_folio,
-	.release_folio = ceph_release_folio,
+	.release_folio = netfs_release_folio,
 	.direct_IO = noop_direct_IO,
 };
 
diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h
index dc502daac49a..20efac020394 100644
--- a/fs/ceph/cache.h
+++ b/fs/ceph/cache.h
@@ -43,38 +43,19 @@ static inline void ceph_fscache_resize(struct inode *inode, loff_t to)
 	}
 }
 
-static inline void ceph_fscache_unpin_writeback(struct inode *inode,
+static inline int ceph_fscache_unpin_writeback(struct inode *inode,
 						struct writeback_control *wbc)
 {
-	fscache_unpin_writeback(wbc, ceph_fscache_cookie(ceph_inode(inode)));
+	return netfs_unpin_writeback(inode, wbc);
 }
 
-static inline int ceph_fscache_dirty_folio(struct address_space *mapping,
-		struct folio *folio)
-{
-	struct ceph_inode_info *ci = ceph_inode(mapping->host);
-
-	return fscache_dirty_folio(mapping, folio, ceph_fscache_cookie(ci));
-}
-
-static inline int ceph_begin_cache_operation(struct netfs_io_request *rreq)
-{
-	struct fscache_cookie *cookie = ceph_fscache_cookie(ceph_inode(rreq->inode));
-
-	return fscache_begin_read_operation(&rreq->cache_resources, cookie);
-}
+#define ceph_fscache_dirty_folio netfs_dirty_folio
 
 static inline bool ceph_is_cache_enabled(struct inode *inode)
 {
 	return fscache_cookie_enabled(ceph_fscache_cookie(ceph_inode(inode)));
 }
 
-static inline void ceph_fscache_note_page_release(struct inode *inode)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-
-	fscache_note_page_release(ceph_fscache_cookie(ci));
-}
 #else /* CONFIG_CEPH_FSCACHE */
 static inline int ceph_fscache_register_fs(struct ceph_fs_client* fsc,
 					   struct fs_context *fc)
@@ -119,30 +100,18 @@ static inline void ceph_fscache_resize(struct inode *inode, loff_t to)
 {
 }
 
-static inline void ceph_fscache_unpin_writeback(struct inode *inode,
-						struct writeback_control *wbc)
+static inline int ceph_fscache_unpin_writeback(struct inode *inode,
+					       struct writeback_control *wbc)
 {
+	return 0;
 }
 
-static inline int ceph_fscache_dirty_folio(struct address_space *mapping,
-		struct folio *folio)
-{
-	return filemap_dirty_folio(mapping, folio);
-}
+#define ceph_fscache_dirty_folio filemap_dirty_folio
 
 static inline bool ceph_is_cache_enabled(struct inode *inode)
 {
 	return false;
 }
-
-static inline int ceph_begin_cache_operation(struct netfs_io_request *rreq)
-{
-	return -ENOBUFS;
-}
-
-static inline void ceph_fscache_note_page_release(struct inode *inode)
-{
-}
 #endif /* CONFIG_CEPH_FSCACHE */
 
 #endif
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 2c0b8dc3dd0d..9c02f328c966 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -4887,13 +4887,15 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
 			       struct inode *dir,
 			       int mds, int drop, int unless)
 {
-	struct dentry *parent = NULL;
 	struct ceph_mds_request_release *rel = *p;
 	struct ceph_dentry_info *di = ceph_dentry(dentry);
 	struct ceph_client *cl;
 	int force = 0;
 	int ret;
 
+	/* This shouldn't happen */
+	BUG_ON(!dir);
+
 	/*
 	 * force an record for the directory caps if we have a dentry lease.
 	 * this is racy (can't take i_ceph_lock and d_lock together), but it
@@ -4903,14 +4905,9 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
 	spin_lock(&dentry->d_lock);
 	if (di->lease_session && di->lease_session->s_mds == mds)
 		force = 1;
-	if (!dir) {
-		parent = dget(dentry->d_parent);
-		dir = d_inode(parent);
-	}
 	spin_unlock(&dentry->d_lock);
 
 	ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force);
-	dput(parent);
 
 	cl = ceph_inode_to_client(dir);
 	spin_lock(&dentry->d_lock);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 678596684596..0e9f56eaba1e 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1593,10 +1593,12 @@ struct ceph_lease_walk_control {
 	unsigned long dir_lease_ttl;
 };
 
+static int __dir_lease_check(const struct dentry *, struct ceph_lease_walk_control *);
+static int __dentry_lease_check(const struct dentry *);
+
 static unsigned long
 __dentry_leases_walk(struct ceph_mds_client *mdsc,
-		     struct ceph_lease_walk_control *lwc,
-		     int (*check)(struct dentry*, void*))
+		     struct ceph_lease_walk_control *lwc)
 {
 	struct ceph_dentry_info *di, *tmp;
 	struct dentry *dentry, *last = NULL;
@@ -1624,7 +1626,10 @@ __dentry_leases_walk(struct ceph_mds_client *mdsc,
 			goto next;
 		}
 
-		ret = check(dentry, lwc);
+		if (lwc->dir_lease)
+			ret = __dir_lease_check(dentry, lwc);
+		else
+			ret = __dentry_lease_check(dentry);
 		if (ret & TOUCH) {
 			/* move it into tail of dir lease list */
 			__dentry_dir_lease_touch(mdsc, di);
@@ -1681,7 +1686,7 @@ next:
 	return freed;
 }
 
-static int __dentry_lease_check(struct dentry *dentry, void *arg)
+static int __dentry_lease_check(const struct dentry *dentry)
 {
 	struct ceph_dentry_info *di = ceph_dentry(dentry);
 	int ret;
@@ -1696,9 +1701,9 @@ static int __dentry_lease_check(struct dentry *dentry, void *arg)
 	return DELETE;
 }
 
-static int __dir_lease_check(struct dentry *dentry, void *arg)
+static int __dir_lease_check(const struct dentry *dentry,
+			     struct ceph_lease_walk_control *lwc)
 {
-	struct ceph_lease_walk_control *lwc = arg;
 	struct ceph_dentry_info *di = ceph_dentry(dentry);
 
 	int ret = __dir_lease_try_check(dentry);
@@ -1737,7 +1742,7 @@ int ceph_trim_dentries(struct ceph_mds_client *mdsc)
 
 	lwc.dir_lease = false;
 	lwc.nr_to_scan  = CEPH_CAPS_PER_RELEASE * 2;
-	freed = __dentry_leases_walk(mdsc, &lwc, __dentry_lease_check);
+	freed = __dentry_leases_walk(mdsc, &lwc);
 	if (!lwc.nr_to_scan) /* more invalid leases */
 		return -EAGAIN;
 
@@ -1747,7 +1752,7 @@ int ceph_trim_dentries(struct ceph_mds_client *mdsc)
 	lwc.dir_lease = true;
 	lwc.expire_dir_lease = freed < count;
 	lwc.dir_lease_ttl = mdsc->fsc->mount_options->caps_wanted_delay_max * HZ;
-	freed +=__dentry_leases_walk(mdsc, &lwc, __dir_lease_check);
+	freed +=__dentry_leases_walk(mdsc, &lwc);
 	if (!lwc.nr_to_scan) /* more to check */
 		return -EAGAIN;
 
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 726af69d4d62..a79f163ae4ed 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -286,8 +286,6 @@ static struct dentry *__snapfh_to_dentry(struct super_block *sb,
 		doutc(cl, "%llx.%llx parent %llx hash %x err=%d", vino.ino,
 		      vino.snap, sfh->parent_ino, sfh->hash, err);
 	}
-	if (IS_ERR(inode))
-		return ERR_CAST(inode);
 	/* see comments in ceph_get_parent() */
 	return unlinked ? d_obtain_root(inode) : d_obtain_alias(inode);
 }
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index d380d9dad0e0..abe8028d95bf 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1029,6 +1029,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
 		struct ceph_osd_req_op *op;
 		u64 read_off = off;
 		u64 read_len = len;
+		int extent_cnt;
 
 		/* determine new offset/length if encrypted */
 		ceph_fscrypt_adjust_off_and_len(inode, &read_off, &read_len);
@@ -1068,7 +1069,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
 
 		op = &req->r_ops[0];
 		if (sparse) {
-			ret = ceph_alloc_sparse_ext_map(op);
+			extent_cnt = __ceph_sparse_read_ext_count(inode, read_len);
+			ret = ceph_alloc_sparse_ext_map(op, extent_cnt);
 			if (ret) {
 				ceph_osdc_put_request(req);
 				break;
@@ -1465,6 +1467,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
 		ssize_t len;
 		struct ceph_osd_req_op *op;
 		int readop = sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ;
+		int extent_cnt;
 
 		if (write)
 			size = min_t(u64, size, fsc->mount_options->wsize);
@@ -1528,7 +1531,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
 		osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len);
 		op = &req->r_ops[0];
 		if (sparse) {
-			ret = ceph_alloc_sparse_ext_map(op);
+			extent_cnt = __ceph_sparse_read_ext_count(inode, size);
+			ret = ceph_alloc_sparse_ext_map(op, extent_cnt);
 			if (ret) {
 				ceph_osdc_put_request(req);
 				break;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 0679240f06db..0c25d326afc4 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -574,7 +574,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 	doutc(fsc->client, "%p\n", &ci->netfs.inode);
 
 	/* Set parameters for the netfs library */
-	netfs_inode_init(&ci->netfs, &ceph_netfs_ops);
+	netfs_inode_init(&ci->netfs, &ceph_netfs_ops, false);
 
 	spin_lock_init(&ci->i_ceph_lock);
 
@@ -694,7 +694,7 @@ void ceph_evict_inode(struct inode *inode)
 	percpu_counter_dec(&mdsc->metric.total_inodes);
 
 	truncate_inode_pages_final(&inode->i_data);
-	if (inode->i_state & I_PINNING_FSCACHE_WB)
+	if (inode->i_state & I_PINNING_NETFS_WB)
 		ceph_fscache_unuse_cookie(inode, true);
 	clear_inode(inode);
 
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 02ebfabfc8ee..548d1de379f3 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1534,7 +1534,8 @@ static int encode_metric_spec(void **p, void *end)
  * session message, specialization for CEPH_SESSION_REQUEST_OPEN
  * to include additional client metadata fields.
  */
-static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u64 seq)
+static struct ceph_msg *
+create_session_full_msg(struct ceph_mds_client *mdsc, int op, u64 seq)
 {
 	struct ceph_msg *msg;
 	struct ceph_mds_session_head *h;
@@ -1578,6 +1579,9 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
 		size = METRIC_BYTES(count);
 	extra_bytes += 2 + 4 + 4 + size;
 
+	/* flags, mds auth caps and oldest_client_tid */
+	extra_bytes += 4 + 4 + 8;
+
 	/* Allocate the message */
 	msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes,
 			   GFP_NOFS, false);
@@ -1589,16 +1593,16 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
 	end = p + msg->front.iov_len;
 
 	h = p;
-	h->op = cpu_to_le32(CEPH_SESSION_REQUEST_OPEN);
+	h->op = cpu_to_le32(op);
 	h->seq = cpu_to_le64(seq);
 
 	/*
 	 * Serialize client metadata into waiting buffer space, using
 	 * the format that userspace expects for map<string, string>
 	 *
-	 * ClientSession messages with metadata are v4
+	 * ClientSession messages with metadata are v7
 	 */
-	msg->hdr.version = cpu_to_le16(4);
+	msg->hdr.version = cpu_to_le16(7);
 	msg->hdr.compat_version = cpu_to_le16(1);
 
 	/* The write pointer, following the session_head structure */
@@ -1634,6 +1638,15 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
 		return ERR_PTR(ret);
 	}
 
+	/* version == 5, flags */
+	ceph_encode_32(&p, 0);
+
+	/* version == 6, mds auth caps */
+	ceph_encode_32(&p, 0);
+
+	/* version == 7, oldest_client_tid */
+	ceph_encode_64(&p, mdsc->oldest_tid);
+
 	msg->front.iov_len = p - msg->front.iov_base;
 	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
 
@@ -1663,7 +1676,8 @@ static int __open_session(struct ceph_mds_client *mdsc,
 	session->s_renew_requested = jiffies;
 
 	/* send connect message */
-	msg = create_session_open_msg(mdsc, session->s_seq);
+	msg = create_session_full_msg(mdsc, CEPH_SESSION_REQUEST_OPEN,
+				      session->s_seq);
 	if (IS_ERR(msg))
 		return PTR_ERR(msg);
 	ceph_con_send(&session->s_con, msg);
@@ -2028,10 +2042,10 @@ static int send_renew_caps(struct ceph_mds_client *mdsc,
 
 	doutc(cl, "to mds%d (%s)\n", session->s_mds,
 	      ceph_mds_state_name(state));
-	msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS,
+	msg = create_session_full_msg(mdsc, CEPH_SESSION_REQUEST_RENEWCAPS,
 				      ++session->s_renew_seq);
-	if (!msg)
-		return -ENOMEM;
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 	ceph_con_send(&session->s_con, msg);
 	return 0;
 }
@@ -4128,12 +4142,12 @@ static void handle_session(struct ceph_mds_session *session,
 			pr_info_client(cl, "mds%d reconnect success\n",
 				       session->s_mds);
 
+		session->s_features = features;
 		if (session->s_state == CEPH_MDS_SESSION_OPEN) {
 			pr_notice_client(cl, "mds%d is already opened\n",
 					 session->s_mds);
 		} else {
 			session->s_state = CEPH_MDS_SESSION_OPEN;
-			session->s_features = features;
 			renewed_caps(mdsc, session, 0);
 			if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT,
 				     &session->s_features))
@@ -5870,7 +5884,8 @@ static void mds_peer_reset(struct ceph_connection *con)
 
 	pr_warn_client(mdsc->fsc->client, "mds%d closed our session\n",
 		       s->s_mds);
-	if (READ_ONCE(mdsc->fsc->mount_state) != CEPH_MOUNT_FENCE_IO)
+	if (READ_ONCE(mdsc->fsc->mount_state) != CEPH_MOUNT_FENCE_IO &&
+	    ceph_mdsmap_get_state(mdsc->mdsmap, s->s_mds) >= CEPH_MDS_STATE_RECONNECT)
 		send_mds_reconnect(mdsc, s);
 }
 
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index 9d36c3532de1..06ee397e0c3a 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -197,10 +197,10 @@ void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
 }
 
 /*
- * This function walks through the snaprealm for an inode and returns the
- * ceph_snap_realm for the first snaprealm that has quotas set (max_files,
+ * This function walks through the snaprealm for an inode and set the
+ * realmp with the first snaprealm that has quotas set (max_files,
  * max_bytes, or any, depending on the 'which_quota' argument).  If the root is
- * reached, return the root ceph_snap_realm instead.
+ * reached, set the realmp with the root ceph_snap_realm instead.
  *
  * Note that the caller is responsible for calling ceph_put_snap_realm() on the
  * returned realm.
@@ -211,10 +211,9 @@ void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
  * this function will return -EAGAIN; otherwise, the snaprealms walk-through
  * will be restarted.
  */
-static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
-					       struct inode *inode,
-					       enum quota_get_realm which_quota,
-					       bool retry)
+static int get_quota_realm(struct ceph_mds_client *mdsc, struct inode *inode,
+			   enum quota_get_realm which_quota,
+			   struct ceph_snap_realm **realmp, bool retry)
 {
 	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_inode_info *ci = NULL;
@@ -222,8 +221,10 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
 	struct inode *in;
 	bool has_quota;
 
+	if (realmp)
+		*realmp = NULL;
 	if (ceph_snap(inode) != CEPH_NOSNAP)
-		return NULL;
+		return 0;
 
 restart:
 	realm = ceph_inode(inode)->i_snap_realm;
@@ -250,7 +251,7 @@ restart:
 				break;
 			ceph_put_snap_realm(mdsc, realm);
 			if (!retry)
-				return ERR_PTR(-EAGAIN);
+				return -EAGAIN;
 			goto restart;
 		}
 
@@ -259,8 +260,11 @@ restart:
 		iput(in);
 
 		next = realm->parent;
-		if (has_quota || !next)
-		       return realm;
+		if (has_quota || !next) {
+			if (realmp)
+				*realmp = realm;
+			return 0;
+		}
 
 		ceph_get_snap_realm(mdsc, next);
 		ceph_put_snap_realm(mdsc, realm);
@@ -269,7 +273,7 @@ restart:
 	if (realm)
 		ceph_put_snap_realm(mdsc, realm);
 
-	return NULL;
+	return 0;
 }
 
 bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
@@ -277,6 +281,7 @@ bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb);
 	struct ceph_snap_realm *old_realm, *new_realm;
 	bool is_same;
+	int ret;
 
 restart:
 	/*
@@ -286,9 +291,9 @@ restart:
 	 * dropped and we can then restart the whole operation.
 	 */
 	down_read(&mdsc->snap_rwsem);
-	old_realm = get_quota_realm(mdsc, old, QUOTA_GET_ANY, true);
-	new_realm = get_quota_realm(mdsc, new, QUOTA_GET_ANY, false);
-	if (PTR_ERR(new_realm) == -EAGAIN) {
+	get_quota_realm(mdsc, old, QUOTA_GET_ANY, &old_realm, true);
+	ret = get_quota_realm(mdsc, new, QUOTA_GET_ANY, &new_realm, false);
+	if (ret == -EAGAIN) {
 		up_read(&mdsc->snap_rwsem);
 		if (old_realm)
 			ceph_put_snap_realm(mdsc, old_realm);
@@ -492,8 +497,8 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
 	bool is_updated = false;
 
 	down_read(&mdsc->snap_rwsem);
-	realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root),
-				QUOTA_GET_MAX_BYTES, true);
+	get_quota_realm(mdsc, d_inode(fsc->sb->s_root), QUOTA_GET_MAX_BYTES,
+			&realm, true);
 	up_read(&mdsc->snap_rwsem);
 	if (!realm)
 		return false;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index fe0f64a0acb2..b06e2bc86221 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -3,6 +3,7 @@
 #define _FS_CEPH_SUPER_H
 
 #include <linux/ceph/ceph_debug.h>
+#include <linux/ceph/osd_client.h>
 
 #include <asm/unaligned.h>
 #include <linux/backing-dev.h>
@@ -1407,6 +1408,19 @@ static inline void __ceph_update_quota(struct ceph_inode_info *ci,
 		ceph_adjust_quota_realms_count(&ci->netfs.inode, has_quota);
 }
 
+static inline int __ceph_sparse_read_ext_count(struct inode *inode, u64 len)
+{
+	int cnt = 0;
+
+	if (IS_ENCRYPTED(inode)) {
+		cnt = len >> CEPH_FSCRYPT_BLOCK_SHIFT;
+		if (cnt > CEPH_SPARSE_EXT_ARRAY_INITIAL)
+			cnt = 0;
+	}
+
+	return cnt;
+}
+
 extern void ceph_handle_quota(struct ceph_mds_client *mdsc,
 			      struct ceph_mds_session *session,
 			      struct ceph_msg *msg);
diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig
index 1d318f85232d..fffd3919343e 100644
--- a/fs/erofs/Kconfig
+++ b/fs/erofs/Kconfig
@@ -114,8 +114,11 @@ config EROFS_FS_ZIP_DEFLATE
 
 config EROFS_FS_ONDEMAND
 	bool "EROFS fscache-based on-demand read support"
-	depends on CACHEFILES_ONDEMAND && (EROFS_FS=m && FSCACHE || EROFS_FS=y && FSCACHE=y)
-	default n
+	depends on EROFS_FS
+	select NETFS_SUPPORT
+	select FSCACHE
+	select CACHEFILES
+	select CACHEFILES_ONDEMAND
 	help
 	  This permits EROFS to use fscache-backed data blobs with on-demand
 	  read support.
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 1d65b9f60a39..072ef6a66823 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -408,7 +408,7 @@ int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb)
 	int size, ret = 0;
 
 	if (!erofs_sb_has_compr_cfgs(sbi)) {
-		sbi->available_compr_algs = Z_EROFS_COMPRESSION_LZ4;
+		sbi->available_compr_algs = 1 << Z_EROFS_COMPRESSION_LZ4;
 		return z_erofs_load_lz4_config(sb, dsb, NULL, 0);
 	}
 
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index 87ff35bff8d5..bc12030393b2 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -165,10 +165,10 @@ static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie,
 static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
 {
 	int ret;
-	struct erofs_fscache *ctx = folio_mapping(folio)->host->i_private;
+	struct erofs_fscache *ctx = folio->mapping->host->i_private;
 	struct erofs_fscache_request *req;
 
-	req = erofs_fscache_req_alloc(folio_mapping(folio),
+	req = erofs_fscache_req_alloc(folio->mapping,
 				folio_pos(folio), folio_size(folio));
 	if (IS_ERR(req)) {
 		folio_unlock(folio);
@@ -276,7 +276,7 @@ static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
 	struct erofs_fscache_request *req;
 	int ret;
 
-	req = erofs_fscache_req_alloc(folio_mapping(folio),
+	req = erofs_fscache_req_alloc(folio->mapping,
 			folio_pos(folio), folio_size(folio));
 	if (IS_ERR(req)) {
 		folio_unlock(folio);
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 9753875e41cb..e313c936351d 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -454,7 +454,7 @@ static int z_erofs_do_map_blocks(struct inode *inode,
 		.map = map,
 	};
 	int err = 0;
-	unsigned int lclusterbits, endoff;
+	unsigned int lclusterbits, endoff, afmt;
 	unsigned long initial_lcn;
 	unsigned long long ofs, end;
 
@@ -543,17 +543,20 @@ static int z_erofs_do_map_blocks(struct inode *inode,
 			err = -EFSCORRUPTED;
 			goto unmap_out;
 		}
-		if (vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER)
-			map->m_algorithmformat =
-				Z_EROFS_COMPRESSION_INTERLACED;
-		else
-			map->m_algorithmformat =
-				Z_EROFS_COMPRESSION_SHIFTED;
-	} else if (m.headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) {
-		map->m_algorithmformat = vi->z_algorithmtype[1];
+		afmt = vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER ?
+			Z_EROFS_COMPRESSION_INTERLACED :
+			Z_EROFS_COMPRESSION_SHIFTED;
 	} else {
-		map->m_algorithmformat = vi->z_algorithmtype[0];
+		afmt = m.headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2 ?
+			vi->z_algorithmtype[1] : vi->z_algorithmtype[0];
+		if (!(EROFS_I_SB(inode)->available_compr_algs & (1 << afmt))) {
+			erofs_err(inode->i_sb, "inconsistent algorithmtype %u for nid %llu",
+				  afmt, vi->nid);
+			err = -EFSCORRUPTED;
+			goto unmap_out;
+		}
 	}
+	map->m_algorithmformat = afmt;
 
 	if ((flags & EROFS_GET_BLOCKS_FIEMAP) ||
 	    ((flags & EROFS_GET_BLOCKS_READMORE) &&
diff --git a/fs/exec.c b/fs/exec.c
index 73e4045df271..af4fbb61cd53 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -128,7 +128,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
 	struct filename *tmp = getname(library);
 	int error = PTR_ERR(tmp);
 	static const struct open_flags uselib_flags = {
-		.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
+		.open_flag = O_LARGEFILE | O_RDONLY,
 		.acc_mode = MAY_READ | MAY_EXEC,
 		.intent = LOOKUP_OPEN,
 		.lookup_flags = LOOKUP_FOLLOW,
@@ -904,6 +904,10 @@ EXPORT_SYMBOL(transfer_args_to_stack);
 
 #endif /* CONFIG_MMU */
 
+/*
+ * On success, caller must call do_close_execat() on the returned
+ * struct file to close it.
+ */
 static struct file *do_open_execat(int fd, struct filename *name, int flags)
 {
 	struct file *file;
@@ -948,6 +952,17 @@ exit:
 	return ERR_PTR(err);
 }
 
+/**
+ * open_exec - Open a path name for execution
+ *
+ * @name: path name to open with the intent of executing it.
+ *
+ * Returns ERR_PTR on failure or allocated struct file on success.
+ *
+ * As this is a wrapper for the internal do_open_execat(), callers
+ * must call allow_write_access() before fput() on release. Also see
+ * do_close_execat().
+ */
 struct file *open_exec(const char *name)
 {
 	struct filename *filename = getname_kernel(name);
@@ -1409,6 +1424,9 @@ int begin_new_exec(struct linux_binprm * bprm)
 
 out_unlock:
 	up_write(&me->signal->exec_update_lock);
+	if (!bprm->cred)
+		mutex_unlock(&me->signal->cred_guard_mutex);
+
 out:
 	return retval;
 }
@@ -1484,6 +1502,15 @@ static int prepare_bprm_creds(struct linux_binprm *bprm)
 	return -ENOMEM;
 }
 
+/* Matches do_open_execat() */
+static void do_close_execat(struct file *file)
+{
+	if (!file)
+		return;
+	allow_write_access(file);
+	fput(file);
+}
+
 static void free_bprm(struct linux_binprm *bprm)
 {
 	if (bprm->mm) {
@@ -1495,10 +1522,7 @@ static void free_bprm(struct linux_binprm *bprm)
 		mutex_unlock(&current->signal->cred_guard_mutex);
 		abort_creds(bprm->cred);
 	}
-	if (bprm->file) {
-		allow_write_access(bprm->file);
-		fput(bprm->file);
-	}
+	do_close_execat(bprm->file);
 	if (bprm->executable)
 		fput(bprm->executable);
 	/* If a binfmt changed the interp, free it. */
@@ -1508,12 +1532,23 @@ static void free_bprm(struct linux_binprm *bprm)
 	kfree(bprm);
 }
 
-static struct linux_binprm *alloc_bprm(int fd, struct filename *filename)
+static struct linux_binprm *alloc_bprm(int fd, struct filename *filename, int flags)
 {
-	struct linux_binprm *bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
+	struct linux_binprm *bprm;
+	struct file *file;
 	int retval = -ENOMEM;
-	if (!bprm)
-		goto out;
+
+	file = do_open_execat(fd, filename, flags);
+	if (IS_ERR(file))
+		return ERR_CAST(file);
+
+	bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
+	if (!bprm) {
+		do_close_execat(file);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	bprm->file = file;
 
 	if (fd == AT_FDCWD || filename->name[0] == '/') {
 		bprm->filename = filename->name;
@@ -1526,18 +1561,28 @@ static struct linux_binprm *alloc_bprm(int fd, struct filename *filename)
 		if (!bprm->fdpath)
 			goto out_free;
 
+		/*
+		 * Record that a name derived from an O_CLOEXEC fd will be
+		 * inaccessible after exec.  This allows the code in exec to
+		 * choose to fail when the executable is not mmaped into the
+		 * interpreter and an open file descriptor is not passed to
+		 * the interpreter.  This makes for a better user experience
+		 * than having the interpreter start and then immediately fail
+		 * when it finds the executable is inaccessible.
+		 */
+		if (get_close_on_exec(fd))
+			bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
+
 		bprm->filename = bprm->fdpath;
 	}
 	bprm->interp = bprm->filename;
 
 	retval = bprm_mm_init(bprm);
-	if (retval)
-		goto out_free;
-	return bprm;
+	if (!retval)
+		return bprm;
 
 out_free:
 	free_bprm(bprm);
-out:
 	return ERR_PTR(retval);
 }
 
@@ -1588,6 +1633,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
 	}
 	rcu_read_unlock();
 
+	/* "users" and "in_exec" locked for copy_fs() */
 	if (p->fs->users > n_fs)
 		bprm->unsafe |= LSM_UNSAFE_SHARE;
 	else
@@ -1804,13 +1850,8 @@ static int exec_binprm(struct linux_binprm *bprm)
 	return 0;
 }
 
-/*
- * sys_execve() executes a new program.
- */
-static int bprm_execve(struct linux_binprm *bprm,
-		       int fd, struct filename *filename, int flags)
+static int bprm_execve(struct linux_binprm *bprm)
 {
-	struct file *file;
 	int retval;
 
 	retval = prepare_bprm_creds(bprm);
@@ -1826,26 +1867,8 @@ static int bprm_execve(struct linux_binprm *bprm,
 	current->in_execve = 1;
 	sched_mm_cid_before_execve(current);
 
-	file = do_open_execat(fd, filename, flags);
-	retval = PTR_ERR(file);
-	if (IS_ERR(file))
-		goto out_unmark;
-
 	sched_exec();
 
-	bprm->file = file;
-	/*
-	 * Record that a name derived from an O_CLOEXEC fd will be
-	 * inaccessible after exec.  This allows the code in exec to
-	 * choose to fail when the executable is not mmaped into the
-	 * interpreter and an open file descriptor is not passed to
-	 * the interpreter.  This makes for a better user experience
-	 * than having the interpreter start and then immediately fail
-	 * when it finds the executable is inaccessible.
-	 */
-	if (bprm->fdpath && get_close_on_exec(fd))
-		bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
-
 	/* Set the unchanging part of bprm->cred */
 	retval = security_bprm_creds_for_exec(bprm);
 	if (retval)
@@ -1875,7 +1898,6 @@ out:
 	if (bprm->point_of_no_return && !fatal_signal_pending(current))
 		force_fatal_sig(SIGSEGV);
 
-out_unmark:
 	sched_mm_cid_after_execve(current);
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
@@ -1910,7 +1932,7 @@ static int do_execveat_common(int fd, struct filename *filename,
 	 * further execve() calls fail. */
 	current->flags &= ~PF_NPROC_EXCEEDED;
 
-	bprm = alloc_bprm(fd, filename);
+	bprm = alloc_bprm(fd, filename, flags);
 	if (IS_ERR(bprm)) {
 		retval = PTR_ERR(bprm);
 		goto out_ret;
@@ -1959,7 +1981,7 @@ static int do_execveat_common(int fd, struct filename *filename,
 		bprm->argc = 1;
 	}
 
-	retval = bprm_execve(bprm, fd, filename, flags);
+	retval = bprm_execve(bprm);
 out_free:
 	free_bprm(bprm);
 
@@ -1984,7 +2006,7 @@ int kernel_execve(const char *kernel_filename,
 	if (IS_ERR(filename))
 		return PTR_ERR(filename);
 
-	bprm = alloc_bprm(fd, filename);
+	bprm = alloc_bprm(fd, filename, 0);
 	if (IS_ERR(bprm)) {
 		retval = PTR_ERR(bprm);
 		goto out_ret;
@@ -2019,7 +2041,7 @@ int kernel_execve(const char *kernel_filename,
 	if (retval < 0)
 		goto out_free;
 
-	retval = bprm_execve(bprm, fd, filename, 0);
+	retval = bprm_execve(bprm);
 out_free:
 	free_bprm(bprm);
 out_ret:
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1767493dffda..3d84fcc471c6 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1675,11 +1675,11 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 
 	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
 		inode->i_state |= I_DIRTY_PAGES;
-	else if (unlikely(inode->i_state & I_PINNING_FSCACHE_WB)) {
+	else if (unlikely(inode->i_state & I_PINNING_NETFS_WB)) {
 		if (!(inode->i_state & I_DIRTY_PAGES)) {
-			inode->i_state &= ~I_PINNING_FSCACHE_WB;
-			wbc->unpinned_fscache_wb = true;
-			dirty |= I_PINNING_FSCACHE_WB; /* Cause write_inode */
+			inode->i_state &= ~I_PINNING_NETFS_WB;
+			wbc->unpinned_netfs_wb = true;
+			dirty |= I_PINNING_NETFS_WB; /* Cause write_inode */
 		}
 	}
 
@@ -1691,7 +1691,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 		if (ret == 0)
 			ret = err;
 	}
-	wbc->unpinned_fscache_wb = false;
+	wbc->unpinned_netfs_wb = false;
 	trace_writeback_single_inode(inode, wbc, nr_to_write);
 	return ret;
 }
diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig
deleted file mode 100644
index b313a978ae0a..000000000000
--- a/fs/fscache/Kconfig
+++ /dev/null
@@ -1,40 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-
-config FSCACHE
-	tristate "General filesystem local caching manager"
-	select NETFS_SUPPORT
-	help
-	  This option enables a generic filesystem caching manager that can be
-	  used by various network and other filesystems to cache data locally.
-	  Different sorts of caches can be plugged in, depending on the
-	  resources available.
-
-	  See Documentation/filesystems/caching/fscache.rst for more information.
-
-config FSCACHE_STATS
-	bool "Gather statistical information on local caching"
-	depends on FSCACHE && PROC_FS
-	select NETFS_STATS
-	help
-	  This option causes statistical information to be gathered on local
-	  caching and exported through file:
-
-		/proc/fs/fscache/stats
-
-	  The gathering of statistics adds a certain amount of overhead to
-	  execution as there are a quite a few stats gathered, and on a
-	  multi-CPU system these may be on cachelines that keep bouncing
-	  between CPUs.  On the other hand, the stats are very useful for
-	  debugging purposes.  Saying 'Y' here is recommended.
-
-	  See Documentation/filesystems/caching/fscache.rst for more information.
-
-config FSCACHE_DEBUG
-	bool "Debug FS-Cache"
-	depends on FSCACHE
-	help
-	  This permits debugging to be dynamically enabled in the local caching
-	  management module.  If this is set, the debugging output may be
-	  enabled by setting bits in /sys/modules/fscache/parameter/debug.
-
-	  See Documentation/filesystems/caching/fscache.rst for more information.
diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile
deleted file mode 100644
index afb090ea16c4..000000000000
--- a/fs/fscache/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for general filesystem caching code
-#
-
-fscache-y := \
-	cache.o \
-	cookie.o \
-	io.o \
-	main.o \
-	volume.o
-
-fscache-$(CONFIG_PROC_FS) += proc.o
-fscache-$(CONFIG_FSCACHE_STATS) += stats.o
-
-obj-$(CONFIG_FSCACHE) := fscache.o
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
deleted file mode 100644
index 1336f517e9b1..000000000000
--- a/fs/fscache/internal.h
+++ /dev/null
@@ -1,277 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* Internal definitions for FS-Cache
- *
- * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- */
-
-#ifdef pr_fmt
-#undef pr_fmt
-#endif
-
-#define pr_fmt(fmt) "FS-Cache: " fmt
-
-#include <linux/slab.h>
-#include <linux/fscache-cache.h>
-#include <trace/events/fscache.h>
-#include <linux/sched.h>
-#include <linux/seq_file.h>
-
-/*
- * cache.c
- */
-#ifdef CONFIG_PROC_FS
-extern const struct seq_operations fscache_caches_seq_ops;
-#endif
-bool fscache_begin_cache_access(struct fscache_cache *cache, enum fscache_access_trace why);
-void fscache_end_cache_access(struct fscache_cache *cache, enum fscache_access_trace why);
-struct fscache_cache *fscache_lookup_cache(const char *name, bool is_cache);
-void fscache_put_cache(struct fscache_cache *cache, enum fscache_cache_trace where);
-
-static inline enum fscache_cache_state fscache_cache_state(const struct fscache_cache *cache)
-{
-	return smp_load_acquire(&cache->state);
-}
-
-static inline bool fscache_cache_is_live(const struct fscache_cache *cache)
-{
-	return fscache_cache_state(cache) == FSCACHE_CACHE_IS_ACTIVE;
-}
-
-static inline void fscache_set_cache_state(struct fscache_cache *cache,
-					   enum fscache_cache_state new_state)
-{
-	smp_store_release(&cache->state, new_state);
-
-}
-
-static inline bool fscache_set_cache_state_maybe(struct fscache_cache *cache,
-						 enum fscache_cache_state old_state,
-						 enum fscache_cache_state new_state)
-{
-	return try_cmpxchg_release(&cache->state, &old_state, new_state);
-}
-
-/*
- * cookie.c
- */
-extern struct kmem_cache *fscache_cookie_jar;
-#ifdef CONFIG_PROC_FS
-extern const struct seq_operations fscache_cookies_seq_ops;
-#endif
-extern struct timer_list fscache_cookie_lru_timer;
-
-extern void fscache_print_cookie(struct fscache_cookie *cookie, char prefix);
-extern bool fscache_begin_cookie_access(struct fscache_cookie *cookie,
-					enum fscache_access_trace why);
-
-static inline void fscache_see_cookie(struct fscache_cookie *cookie,
-				      enum fscache_cookie_trace where)
-{
-	trace_fscache_cookie(cookie->debug_id, refcount_read(&cookie->ref),
-			     where);
-}
-
-/*
- * main.c
- */
-extern unsigned fscache_debug;
-
-extern unsigned int fscache_hash(unsigned int salt, const void *data, size_t len);
-
-/*
- * proc.c
- */
-#ifdef CONFIG_PROC_FS
-extern int __init fscache_proc_init(void);
-extern void fscache_proc_cleanup(void);
-#else
-#define fscache_proc_init()	(0)
-#define fscache_proc_cleanup()	do {} while (0)
-#endif
-
-/*
- * stats.c
- */
-#ifdef CONFIG_FSCACHE_STATS
-extern atomic_t fscache_n_volumes;
-extern atomic_t fscache_n_volumes_collision;
-extern atomic_t fscache_n_volumes_nomem;
-extern atomic_t fscache_n_cookies;
-extern atomic_t fscache_n_cookies_lru;
-extern atomic_t fscache_n_cookies_lru_expired;
-extern atomic_t fscache_n_cookies_lru_removed;
-extern atomic_t fscache_n_cookies_lru_dropped;
-
-extern atomic_t fscache_n_acquires;
-extern atomic_t fscache_n_acquires_ok;
-extern atomic_t fscache_n_acquires_oom;
-
-extern atomic_t fscache_n_invalidates;
-
-extern atomic_t fscache_n_relinquishes;
-extern atomic_t fscache_n_relinquishes_retire;
-extern atomic_t fscache_n_relinquishes_dropped;
-
-extern atomic_t fscache_n_resizes;
-extern atomic_t fscache_n_resizes_null;
-
-static inline void fscache_stat(atomic_t *stat)
-{
-	atomic_inc(stat);
-}
-
-static inline void fscache_stat_d(atomic_t *stat)
-{
-	atomic_dec(stat);
-}
-
-#define __fscache_stat(stat) (stat)
-
-int fscache_stats_show(struct seq_file *m, void *v);
-#else
-
-#define __fscache_stat(stat) (NULL)
-#define fscache_stat(stat) do {} while (0)
-#define fscache_stat_d(stat) do {} while (0)
-#endif
-
-/*
- * volume.c
- */
-#ifdef CONFIG_PROC_FS
-extern const struct seq_operations fscache_volumes_seq_ops;
-#endif
-
-struct fscache_volume *fscache_get_volume(struct fscache_volume *volume,
-					  enum fscache_volume_trace where);
-void fscache_put_volume(struct fscache_volume *volume,
-			enum fscache_volume_trace where);
-bool fscache_begin_volume_access(struct fscache_volume *volume,
-				 struct fscache_cookie *cookie,
-				 enum fscache_access_trace why);
-void fscache_create_volume(struct fscache_volume *volume, bool wait);
-
-
-/*****************************************************************************/
-/*
- * debug tracing
- */
-#define dbgprintk(FMT, ...) \
-	printk("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)
-
-#define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__)
-#define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
-#define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__)
-
-#define kjournal(FMT, ...) no_printk(FMT, ##__VA_ARGS__)
-
-#ifdef __KDEBUG
-#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__)
-#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__)
-#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__)
-
-#elif defined(CONFIG_FSCACHE_DEBUG)
-#define _enter(FMT, ...)			\
-do {						\
-	if (__do_kdebug(ENTER))			\
-		kenter(FMT, ##__VA_ARGS__);	\
-} while (0)
-
-#define _leave(FMT, ...)			\
-do {						\
-	if (__do_kdebug(LEAVE))			\
-		kleave(FMT, ##__VA_ARGS__);	\
-} while (0)
-
-#define _debug(FMT, ...)			\
-do {						\
-	if (__do_kdebug(DEBUG))			\
-		kdebug(FMT, ##__VA_ARGS__);	\
-} while (0)
-
-#else
-#define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__)
-#define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
-#define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__)
-#endif
-
-/*
- * determine whether a particular optional debugging point should be logged
- * - we need to go through three steps to persuade cpp to correctly join the
- *   shorthand in FSCACHE_DEBUG_LEVEL with its prefix
- */
-#define ____do_kdebug(LEVEL, POINT) \
-	unlikely((fscache_debug & \
-		  (FSCACHE_POINT_##POINT << (FSCACHE_DEBUG_ ## LEVEL * 3))))
-#define ___do_kdebug(LEVEL, POINT) \
-	____do_kdebug(LEVEL, POINT)
-#define __do_kdebug(POINT) \
-	___do_kdebug(FSCACHE_DEBUG_LEVEL, POINT)
-
-#define FSCACHE_DEBUG_CACHE	0
-#define FSCACHE_DEBUG_COOKIE	1
-#define FSCACHE_DEBUG_OBJECT	2
-#define FSCACHE_DEBUG_OPERATION	3
-
-#define FSCACHE_POINT_ENTER	1
-#define FSCACHE_POINT_LEAVE	2
-#define FSCACHE_POINT_DEBUG	4
-
-#ifndef FSCACHE_DEBUG_LEVEL
-#define FSCACHE_DEBUG_LEVEL CACHE
-#endif
-
-/*
- * assertions
- */
-#if 1 /* defined(__KDEBUGALL) */
-
-#define ASSERT(X)							\
-do {									\
-	if (unlikely(!(X))) {						\
-		pr_err("\n");					\
-		pr_err("Assertion failed\n");	\
-		BUG();							\
-	}								\
-} while (0)
-
-#define ASSERTCMP(X, OP, Y)						\
-do {									\
-	if (unlikely(!((X) OP (Y)))) {					\
-		pr_err("\n");					\
-		pr_err("Assertion failed\n");	\
-		pr_err("%lx " #OP " %lx is false\n",		\
-		       (unsigned long)(X), (unsigned long)(Y));		\
-		BUG();							\
-	}								\
-} while (0)
-
-#define ASSERTIF(C, X)							\
-do {									\
-	if (unlikely((C) && !(X))) {					\
-		pr_err("\n");					\
-		pr_err("Assertion failed\n");	\
-		BUG();							\
-	}								\
-} while (0)
-
-#define ASSERTIFCMP(C, X, OP, Y)					\
-do {									\
-	if (unlikely((C) && !((X) OP (Y)))) {				\
-		pr_err("\n");					\
-		pr_err("Assertion failed\n");	\
-		pr_err("%lx " #OP " %lx is false\n",		\
-		       (unsigned long)(X), (unsigned long)(Y));		\
-		BUG();							\
-	}								\
-} while (0)
-
-#else
-
-#define ASSERT(X)			do {} while (0)
-#define ASSERTCMP(X, OP, Y)		do {} while (0)
-#define ASSERTIF(C, X)			do {} while (0)
-#define ASSERTIFCMP(C, X, OP, Y)	do {} while (0)
-
-#endif /* assert or not */
diff --git a/fs/netfs/Kconfig b/fs/netfs/Kconfig
index b4db21022cb4..bec805e0c44c 100644
--- a/fs/netfs/Kconfig
+++ b/fs/netfs/Kconfig
@@ -21,3 +21,42 @@ config NETFS_STATS
 	  multi-CPU system these may be on cachelines that keep bouncing
 	  between CPUs.  On the other hand, the stats are very useful for
 	  debugging purposes.  Saying 'Y' here is recommended.
+
+config FSCACHE
+	bool "General filesystem local caching manager"
+	depends on NETFS_SUPPORT
+	help
+	  This option enables a generic filesystem caching manager that can be
+	  used by various network and other filesystems to cache data locally.
+	  Different sorts of caches can be plugged in, depending on the
+	  resources available.
+
+	  See Documentation/filesystems/caching/fscache.rst for more information.
+
+config FSCACHE_STATS
+	bool "Gather statistical information on local caching"
+	depends on FSCACHE && PROC_FS
+	select NETFS_STATS
+	help
+	  This option causes statistical information to be gathered on local
+	  caching and exported through file:
+
+		/proc/fs/fscache/stats
+
+	  The gathering of statistics adds a certain amount of overhead to
+	  execution as there are a quite a few stats gathered, and on a
+	  multi-CPU system these may be on cachelines that keep bouncing
+	  between CPUs.  On the other hand, the stats are very useful for
+	  debugging purposes.  Saying 'Y' here is recommended.
+
+	  See Documentation/filesystems/caching/fscache.rst for more information.
+
+config FSCACHE_DEBUG
+	bool "Debug FS-Cache"
+	depends on FSCACHE
+	help
+	  This permits debugging to be dynamically enabled in the local caching
+	  management module.  If this is set, the debugging output may be
+	  enabled by setting bits in /sys/modules/fscache/parameter/debug.
+
+	  See Documentation/filesystems/caching/fscache.rst for more information.
diff --git a/fs/netfs/Makefile b/fs/netfs/Makefile
index 386d6fb92793..d4d1d799819e 100644
--- a/fs/netfs/Makefile
+++ b/fs/netfs/Makefile
@@ -2,11 +2,29 @@
 
 netfs-y := \
 	buffered_read.o \
+	buffered_write.o \
+	direct_read.o \
+	direct_write.o \
 	io.o \
 	iterator.o \
+	locking.o \
 	main.o \
-	objects.o
+	misc.o \
+	objects.o \
+	output.o
 
 netfs-$(CONFIG_NETFS_STATS) += stats.o
 
-obj-$(CONFIG_NETFS_SUPPORT) := netfs.o
+netfs-$(CONFIG_FSCACHE) += \
+	fscache_cache.o \
+	fscache_cookie.o \
+	fscache_io.o \
+	fscache_main.o \
+	fscache_volume.o
+
+ifeq ($(CONFIG_PROC_FS),y)
+netfs-$(CONFIG_FSCACHE) += fscache_proc.o
+endif
+netfs-$(CONFIG_FSCACHE_STATS) += fscache_stats.o
+
+obj-$(CONFIG_NETFS_SUPPORT) += netfs.o
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index 2cd3ccf4c439..3298c29b5548 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -16,6 +16,7 @@
 void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
 {
 	struct netfs_io_subrequest *subreq;
+	struct netfs_folio *finfo;
 	struct folio *folio;
 	pgoff_t start_page = rreq->start / PAGE_SIZE;
 	pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
@@ -63,6 +64,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
 				break;
 			}
 			if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) {
+				trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
 				folio_start_fscache(folio);
 				folio_started = true;
 			}
@@ -86,11 +88,20 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
 
 		if (!pg_failed) {
 			flush_dcache_folio(folio);
+			finfo = netfs_folio_info(folio);
+			if (finfo) {
+				trace_netfs_folio(folio, netfs_folio_trace_filled_gaps);
+				if (finfo->netfs_group)
+					folio_change_private(folio, finfo->netfs_group);
+				else
+					folio_detach_private(folio);
+				kfree(finfo);
+			}
 			folio_mark_uptodate(folio);
 		}
 
 		if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
-			if (folio_index(folio) == rreq->no_unlock_folio &&
+			if (folio->index == rreq->no_unlock_folio &&
 			    test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
 				_debug("no unlock");
 			else
@@ -147,6 +158,15 @@ static void netfs_rreq_expand(struct netfs_io_request *rreq,
 	}
 }
 
+/*
+ * Begin an operation, and fetch the stored zero point value from the cookie if
+ * available.
+ */
+static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_inode *ctx)
+{
+	return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx));
+}
+
 /**
  * netfs_readahead - Helper to manage a read request
  * @ractl: The description of the readahead request
@@ -180,11 +200,9 @@ void netfs_readahead(struct readahead_control *ractl)
 	if (IS_ERR(rreq))
 		return;
 
-	if (ctx->ops->begin_cache_operation) {
-		ret = ctx->ops->begin_cache_operation(rreq);
-		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
-			goto cleanup_free;
-	}
+	ret = netfs_begin_cache_read(rreq, ctx);
+	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
+		goto cleanup_free;
 
 	netfs_stat(&netfs_n_rh_readahead);
 	trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
@@ -192,6 +210,10 @@ void netfs_readahead(struct readahead_control *ractl)
 
 	netfs_rreq_expand(rreq, ractl);
 
+	/* Set up the output buffer */
+	iov_iter_xarray(&rreq->iter, ITER_DEST, &ractl->mapping->i_pages,
+			rreq->start, rreq->len);
+
 	/* Drop the refs on the folios here rather than in the cache or
 	 * filesystem.  The locks will be dropped in netfs_rreq_unlock().
 	 */
@@ -199,6 +221,7 @@ void netfs_readahead(struct readahead_control *ractl)
 		;
 
 	netfs_begin_read(rreq, false);
+	netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
 	return;
 
 cleanup_free:
@@ -223,12 +246,13 @@ EXPORT_SYMBOL(netfs_readahead);
  */
 int netfs_read_folio(struct file *file, struct folio *folio)
 {
-	struct address_space *mapping = folio_file_mapping(folio);
+	struct address_space *mapping = folio->mapping;
 	struct netfs_io_request *rreq;
 	struct netfs_inode *ctx = netfs_inode(mapping->host);
+	struct folio *sink = NULL;
 	int ret;
 
-	_enter("%lx", folio_index(folio));
+	_enter("%lx", folio->index);
 
 	rreq = netfs_alloc_request(mapping, file,
 				   folio_file_pos(folio), folio_size(folio),
@@ -238,15 +262,64 @@ int netfs_read_folio(struct file *file, struct folio *folio)
 		goto alloc_error;
 	}
 
-	if (ctx->ops->begin_cache_operation) {
-		ret = ctx->ops->begin_cache_operation(rreq);
-		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
-			goto discard;
-	}
+	ret = netfs_begin_cache_read(rreq, ctx);
+	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
+		goto discard;
 
 	netfs_stat(&netfs_n_rh_readpage);
 	trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
-	return netfs_begin_read(rreq, true);
+
+	/* Set up the output buffer */
+	if (folio_test_dirty(folio)) {
+		/* Handle someone trying to read from an unflushed streaming
+		 * write.  We fiddle the buffer so that a gap at the beginning
+		 * and/or a gap at the end get copied to, but the middle is
+		 * discarded.
+		 */
+		struct netfs_folio *finfo = netfs_folio_info(folio);
+		struct bio_vec *bvec;
+		unsigned int from = finfo->dirty_offset;
+		unsigned int to = from + finfo->dirty_len;
+		unsigned int off = 0, i = 0;
+		size_t flen = folio_size(folio);
+		size_t nr_bvec = flen / PAGE_SIZE + 2;
+		size_t part;
+
+		ret = -ENOMEM;
+		bvec = kmalloc_array(nr_bvec, sizeof(*bvec), GFP_KERNEL);
+		if (!bvec)
+			goto discard;
+
+		sink = folio_alloc(GFP_KERNEL, 0);
+		if (!sink)
+			goto discard;
+
+		trace_netfs_folio(folio, netfs_folio_trace_read_gaps);
+
+		rreq->direct_bv = bvec;
+		rreq->direct_bv_count = nr_bvec;
+		if (from > 0) {
+			bvec_set_folio(&bvec[i++], folio, from, 0);
+			off = from;
+		}
+		while (off < to) {
+			part = min_t(size_t, to - off, PAGE_SIZE);
+			bvec_set_folio(&bvec[i++], sink, part, 0);
+			off += part;
+		}
+		if (to < flen)
+			bvec_set_folio(&bvec[i++], folio, flen - to, to);
+		iov_iter_bvec(&rreq->iter, ITER_DEST, bvec, i, rreq->len);
+	} else {
+		iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages,
+				rreq->start, rreq->len);
+	}
+
+	ret = netfs_begin_read(rreq, true);
+	if (sink)
+		folio_put(sink);
+	netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
+	return ret < 0 ? ret : 0;
 
 discard:
 	netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
@@ -387,14 +460,12 @@ retry:
 		ret = PTR_ERR(rreq);
 		goto error;
 	}
-	rreq->no_unlock_folio	= folio_index(folio);
+	rreq->no_unlock_folio	= folio->index;
 	__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
 
-	if (ctx->ops->begin_cache_operation) {
-		ret = ctx->ops->begin_cache_operation(rreq);
-		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
-			goto error_put;
-	}
+	ret = netfs_begin_cache_read(rreq, ctx);
+	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
+		goto error_put;
 
 	netfs_stat(&netfs_n_rh_write_begin);
 	trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
@@ -405,6 +476,10 @@ retry:
 	ractl._nr_pages = folio_nr_pages(folio);
 	netfs_rreq_expand(rreq, &ractl);
 
+	/* Set up the output buffer */
+	iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages,
+			rreq->start, rreq->len);
+
 	/* We hold the folio locks, so we can drop the references */
 	folio_get(folio);
 	while (readahead_folio(&ractl))
@@ -413,6 +488,7 @@ retry:
 	ret = netfs_begin_read(rreq, true);
 	if (ret < 0)
 		goto error;
+	netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
 
 have_folio:
 	ret = folio_wait_fscache_killable(folio);
@@ -434,3 +510,124 @@ error:
 	return ret;
 }
 EXPORT_SYMBOL(netfs_write_begin);
+
+/*
+ * Preload the data into a page we're proposing to write into.
+ */
+int netfs_prefetch_for_write(struct file *file, struct folio *folio,
+			     size_t offset, size_t len)
+{
+	struct netfs_io_request *rreq;
+	struct address_space *mapping = folio->mapping;
+	struct netfs_inode *ctx = netfs_inode(mapping->host);
+	unsigned long long start = folio_pos(folio);
+	size_t flen = folio_size(folio);
+	int ret;
+
+	_enter("%zx @%llx", flen, start);
+
+	ret = -ENOMEM;
+
+	rreq = netfs_alloc_request(mapping, file, start, flen,
+				   NETFS_READ_FOR_WRITE);
+	if (IS_ERR(rreq)) {
+		ret = PTR_ERR(rreq);
+		goto error;
+	}
+
+	rreq->no_unlock_folio = folio->index;
+	__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
+	ret = netfs_begin_cache_read(rreq, ctx);
+	if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
+		goto error_put;
+
+	netfs_stat(&netfs_n_rh_write_begin);
+	trace_netfs_read(rreq, start, flen, netfs_read_trace_prefetch_for_write);
+
+	/* Set up the output buffer */
+	iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages,
+			rreq->start, rreq->len);
+
+	ret = netfs_begin_read(rreq, true);
+	netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
+	return ret;
+
+error_put:
+	netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
+error:
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/**
+ * netfs_buffered_read_iter - Filesystem buffered I/O read routine
+ * @iocb: kernel I/O control block
+ * @iter: destination for the data read
+ *
+ * This is the ->read_iter() routine for all filesystems that can use the page
+ * cache directly.
+ *
+ * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be
+ * returned when no data can be read without waiting for I/O requests to
+ * complete; it doesn't prevent readahead.
+ *
+ * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests
+ * shall be made for the read or for readahead.  When no data can be read,
+ * -EAGAIN shall be returned.  When readahead would be triggered, a partial,
+ * possibly empty read shall be returned.
+ *
+ * Return:
+ * * number of bytes copied, even for partial reads
+ * * negative error code (or 0 if IOCB_NOIO) if nothing was read
+ */
+ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+	struct inode *inode = file_inode(iocb->ki_filp);
+	struct netfs_inode *ictx = netfs_inode(inode);
+	ssize_t ret;
+
+	if (WARN_ON_ONCE((iocb->ki_flags & IOCB_DIRECT) ||
+			 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)))
+		return -EINVAL;
+
+	ret = netfs_start_io_read(inode);
+	if (ret == 0) {
+		ret = filemap_read(iocb, iter, 0);
+		netfs_end_io_read(inode);
+	}
+	return ret;
+}
+EXPORT_SYMBOL(netfs_buffered_read_iter);
+
+/**
+ * netfs_file_read_iter - Generic filesystem read routine
+ * @iocb: kernel I/O control block
+ * @iter: destination for the data read
+ *
+ * This is the ->read_iter() routine for all filesystems that can use the page
+ * cache directly.
+ *
+ * The IOCB_NOWAIT flag in iocb->ki_flags indicates that -EAGAIN shall be
+ * returned when no data can be read without waiting for I/O requests to
+ * complete; it doesn't prevent readahead.
+ *
+ * The IOCB_NOIO flag in iocb->ki_flags indicates that no new I/O requests
+ * shall be made for the read or for readahead.  When no data can be read,
+ * -EAGAIN shall be returned.  When readahead would be triggered, a partial,
+ * possibly empty read shall be returned.
+ *
+ * Return:
+ * * number of bytes copied, even for partial reads
+ * * negative error code (or 0 if IOCB_NOIO) if nothing was read
+ */
+ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+	struct netfs_inode *ictx = netfs_inode(iocb->ki_filp->f_mapping->host);
+
+	if ((iocb->ki_flags & IOCB_DIRECT) ||
+	    test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags))
+		return netfs_unbuffered_read_iter(iocb, iter);
+
+	return netfs_buffered_read_iter(iocb, iter);
+}
+EXPORT_SYMBOL(netfs_file_read_iter);
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
new file mode 100644
index 000000000000..a3059b3168fd
--- /dev/null
+++ b/fs/netfs/buffered_write.c
@@ -0,0 +1,1254 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Network filesystem high-level write support.
+ *
+ * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/export.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/pagevec.h>
+#include "internal.h"
+
+/*
+ * Determined write method.  Adjust netfs_folio_traces if this is changed.
+ */
+enum netfs_how_to_modify {
+	NETFS_FOLIO_IS_UPTODATE,	/* Folio is uptodate already */
+	NETFS_JUST_PREFETCH,		/* We have to read the folio anyway */
+	NETFS_WHOLE_FOLIO_MODIFY,	/* We're going to overwrite the whole folio */
+	NETFS_MODIFY_AND_CLEAR,		/* We can assume there is no data to be downloaded. */
+	NETFS_STREAMING_WRITE,		/* Store incomplete data in non-uptodate page. */
+	NETFS_STREAMING_WRITE_CONT,	/* Continue streaming write. */
+	NETFS_FLUSH_CONTENT,		/* Flush incompatible content. */
+};
+
+static void netfs_cleanup_buffered_write(struct netfs_io_request *wreq);
+
+static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
+{
+	if (netfs_group && !folio_get_private(folio))
+		folio_attach_private(folio, netfs_get_group(netfs_group));
+}
+
+#if IS_ENABLED(CONFIG_FSCACHE)
+static void netfs_folio_start_fscache(bool caching, struct folio *folio)
+{
+	if (caching)
+		folio_start_fscache(folio);
+}
+#else
+static void netfs_folio_start_fscache(bool caching, struct folio *folio)
+{
+}
+#endif
+
+/*
+ * Decide how we should modify a folio.  We might be attempting to do
+ * write-streaming, in which case we don't want to a local RMW cycle if we can
+ * avoid it.  If we're doing local caching or content crypto, we award that
+ * priority over avoiding RMW.  If the file is open readably, then we also
+ * assume that we may want to read what we wrote.
+ */
+static enum netfs_how_to_modify netfs_how_to_modify(struct netfs_inode *ctx,
+						    struct file *file,
+						    struct folio *folio,
+						    void *netfs_group,
+						    size_t flen,
+						    size_t offset,
+						    size_t len,
+						    bool maybe_trouble)
+{
+	struct netfs_folio *finfo = netfs_folio_info(folio);
+	loff_t pos = folio_file_pos(folio);
+
+	_enter("");
+
+	if (netfs_folio_group(folio) != netfs_group)
+		return NETFS_FLUSH_CONTENT;
+
+	if (folio_test_uptodate(folio))
+		return NETFS_FOLIO_IS_UPTODATE;
+
+	if (pos >= ctx->zero_point)
+		return NETFS_MODIFY_AND_CLEAR;
+
+	if (!maybe_trouble && offset == 0 && len >= flen)
+		return NETFS_WHOLE_FOLIO_MODIFY;
+
+	if (file->f_mode & FMODE_READ)
+		goto no_write_streaming;
+	if (test_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags))
+		goto no_write_streaming;
+
+	if (netfs_is_cache_enabled(ctx)) {
+		/* We don't want to get a streaming write on a file that loses
+		 * caching service temporarily because the backing store got
+		 * culled.
+		 */
+		if (!test_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags))
+			set_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags);
+		goto no_write_streaming;
+	}
+
+	if (!finfo)
+		return NETFS_STREAMING_WRITE;
+
+	/* We can continue a streaming write only if it continues on from the
+	 * previous.  If it overlaps, we must flush lest we suffer a partial
+	 * copy and disjoint dirty regions.
+	 */
+	if (offset == finfo->dirty_offset + finfo->dirty_len)
+		return NETFS_STREAMING_WRITE_CONT;
+	return NETFS_FLUSH_CONTENT;
+
+no_write_streaming:
+	if (finfo) {
+		netfs_stat(&netfs_n_wh_wstream_conflict);
+		return NETFS_FLUSH_CONTENT;
+	}
+	return NETFS_JUST_PREFETCH;
+}
+
+/*
+ * Grab a folio for writing and lock it.  Attempt to allocate as large a folio
+ * as possible to hold as much of the remaining length as possible in one go.
+ */
+static struct folio *netfs_grab_folio_for_write(struct address_space *mapping,
+						loff_t pos, size_t part)
+{
+	pgoff_t index = pos / PAGE_SIZE;
+	fgf_t fgp_flags = FGP_WRITEBEGIN;
+
+	if (mapping_large_folio_support(mapping))
+		fgp_flags |= fgf_set_order(pos % PAGE_SIZE + part);
+
+	return __filemap_get_folio(mapping, index, fgp_flags,
+				   mapping_gfp_mask(mapping));
+}
+
+/**
+ * netfs_perform_write - Copy data into the pagecache.
+ * @iocb: The operation parameters
+ * @iter: The source buffer
+ * @netfs_group: Grouping for dirty pages (eg. ceph snaps).
+ *
+ * Copy data into pagecache pages attached to the inode specified by @iocb.
+ * The caller must hold appropriate inode locks.
+ *
+ * Dirty pages are tagged with a netfs_folio struct if they're not up to date
+ * to indicate the range modified.  Dirty pages may also be tagged with a
+ * netfs-specific grouping such that data from an old group gets flushed before
+ * a new one is started.
+ */
+ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
+			    struct netfs_group *netfs_group)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file_inode(file);
+	struct address_space *mapping = inode->i_mapping;
+	struct netfs_inode *ctx = netfs_inode(inode);
+	struct writeback_control wbc = {
+		.sync_mode	= WB_SYNC_NONE,
+		.for_sync	= true,
+		.nr_to_write	= LONG_MAX,
+		.range_start	= iocb->ki_pos,
+		.range_end	= iocb->ki_pos + iter->count,
+	};
+	struct netfs_io_request *wreq = NULL;
+	struct netfs_folio *finfo;
+	struct folio *folio;
+	enum netfs_how_to_modify howto;
+	enum netfs_folio_trace trace;
+	unsigned int bdp_flags = (iocb->ki_flags & IOCB_SYNC) ? 0: BDP_ASYNC;
+	ssize_t written = 0, ret;
+	loff_t i_size, pos = iocb->ki_pos, from, to;
+	size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
+	bool maybe_trouble = false;
+
+	if (unlikely(test_bit(NETFS_ICTX_WRITETHROUGH, &ctx->flags) ||
+		     iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC))
+	    ) {
+		if (pos < i_size_read(inode)) {
+			ret = filemap_write_and_wait_range(mapping, pos, pos + iter->count);
+			if (ret < 0) {
+				goto out;
+			}
+		}
+
+		wbc_attach_fdatawrite_inode(&wbc, mapping->host);
+
+		wreq = netfs_begin_writethrough(iocb, iter->count);
+		if (IS_ERR(wreq)) {
+			wbc_detach_inode(&wbc);
+			ret = PTR_ERR(wreq);
+			wreq = NULL;
+			goto out;
+		}
+		if (!is_sync_kiocb(iocb))
+			wreq->iocb = iocb;
+		wreq->cleanup = netfs_cleanup_buffered_write;
+	}
+
+	do {
+		size_t flen;
+		size_t offset;	/* Offset into pagecache folio */
+		size_t part;	/* Bytes to write to folio */
+		size_t copied;	/* Bytes copied from user */
+
+		ret = balance_dirty_pages_ratelimited_flags(mapping, bdp_flags);
+		if (unlikely(ret < 0))
+			break;
+
+		offset = pos & (max_chunk - 1);
+		part = min(max_chunk - offset, iov_iter_count(iter));
+
+		/* Bring in the user pages that we will copy from _first_ lest
+		 * we hit a nasty deadlock on copying from the same page as
+		 * we're writing to, without it being marked uptodate.
+		 *
+		 * Not only is this an optimisation, but it is also required to
+		 * check that the address is actually valid, when atomic
+		 * usercopies are used below.
+		 *
+		 * We rely on the page being held onto long enough by the LRU
+		 * that we can grab it below if this causes it to be read.
+		 */
+		ret = -EFAULT;
+		if (unlikely(fault_in_iov_iter_readable(iter, part) == part))
+			break;
+
+		folio = netfs_grab_folio_for_write(mapping, pos, part);
+		if (IS_ERR(folio)) {
+			ret = PTR_ERR(folio);
+			break;
+		}
+
+		flen = folio_size(folio);
+		offset = pos & (flen - 1);
+		part = min_t(size_t, flen - offset, part);
+
+		if (signal_pending(current)) {
+			ret = written ? -EINTR : -ERESTARTSYS;
+			goto error_folio_unlock;
+		}
+
+		/* See if we need to prefetch the area we're going to modify.
+		 * We need to do this before we get a lock on the folio in case
+		 * there's more than one writer competing for the same cache
+		 * block.
+		 */
+		howto = netfs_how_to_modify(ctx, file, folio, netfs_group,
+					    flen, offset, part, maybe_trouble);
+		_debug("howto %u", howto);
+		switch (howto) {
+		case NETFS_JUST_PREFETCH:
+			ret = netfs_prefetch_for_write(file, folio, offset, part);
+			if (ret < 0) {
+				_debug("prefetch = %zd", ret);
+				goto error_folio_unlock;
+			}
+			break;
+		case NETFS_FOLIO_IS_UPTODATE:
+		case NETFS_WHOLE_FOLIO_MODIFY:
+		case NETFS_STREAMING_WRITE_CONT:
+			break;
+		case NETFS_MODIFY_AND_CLEAR:
+			zero_user_segment(&folio->page, 0, offset);
+			break;
+		case NETFS_STREAMING_WRITE:
+			ret = -EIO;
+			if (WARN_ON(folio_get_private(folio)))
+				goto error_folio_unlock;
+			break;
+		case NETFS_FLUSH_CONTENT:
+			trace_netfs_folio(folio, netfs_flush_content);
+			from = folio_pos(folio);
+			to = from + folio_size(folio) - 1;
+			folio_unlock(folio);
+			folio_put(folio);
+			ret = filemap_write_and_wait_range(mapping, from, to);
+			if (ret < 0)
+				goto error_folio_unlock;
+			continue;
+		}
+
+		if (mapping_writably_mapped(mapping))
+			flush_dcache_folio(folio);
+
+		copied = copy_folio_from_iter_atomic(folio, offset, part, iter);
+
+		flush_dcache_folio(folio);
+
+		/* Deal with a (partially) failed copy */
+		if (copied == 0) {
+			ret = -EFAULT;
+			goto error_folio_unlock;
+		}
+
+		trace = (enum netfs_folio_trace)howto;
+		switch (howto) {
+		case NETFS_FOLIO_IS_UPTODATE:
+		case NETFS_JUST_PREFETCH:
+			netfs_set_group(folio, netfs_group);
+			break;
+		case NETFS_MODIFY_AND_CLEAR:
+			zero_user_segment(&folio->page, offset + copied, flen);
+			netfs_set_group(folio, netfs_group);
+			folio_mark_uptodate(folio);
+			break;
+		case NETFS_WHOLE_FOLIO_MODIFY:
+			if (unlikely(copied < part)) {
+				maybe_trouble = true;
+				iov_iter_revert(iter, copied);
+				copied = 0;
+				goto retry;
+			}
+			netfs_set_group(folio, netfs_group);
+			folio_mark_uptodate(folio);
+			break;
+		case NETFS_STREAMING_WRITE:
+			if (offset == 0 && copied == flen) {
+				netfs_set_group(folio, netfs_group);
+				folio_mark_uptodate(folio);
+				trace = netfs_streaming_filled_page;
+				break;
+			}
+			finfo = kzalloc(sizeof(*finfo), GFP_KERNEL);
+			if (!finfo) {
+				iov_iter_revert(iter, copied);
+				ret = -ENOMEM;
+				goto error_folio_unlock;
+			}
+			finfo->netfs_group = netfs_get_group(netfs_group);
+			finfo->dirty_offset = offset;
+			finfo->dirty_len = copied;
+			folio_attach_private(folio, (void *)((unsigned long)finfo |
+							     NETFS_FOLIO_INFO));
+			break;
+		case NETFS_STREAMING_WRITE_CONT:
+			finfo = netfs_folio_info(folio);
+			finfo->dirty_len += copied;
+			if (finfo->dirty_offset == 0 && finfo->dirty_len == flen) {
+				if (finfo->netfs_group)
+					folio_change_private(folio, finfo->netfs_group);
+				else
+					folio_detach_private(folio);
+				folio_mark_uptodate(folio);
+				kfree(finfo);
+				trace = netfs_streaming_cont_filled_page;
+			}
+			break;
+		default:
+			WARN(true, "Unexpected modify type %u ix=%lx\n",
+			     howto, folio->index);
+			ret = -EIO;
+			goto error_folio_unlock;
+		}
+
+		trace_netfs_folio(folio, trace);
+
+		/* Update the inode size if we moved the EOF marker */
+		i_size = i_size_read(inode);
+		pos += copied;
+		if (pos > i_size) {
+			if (ctx->ops->update_i_size) {
+				ctx->ops->update_i_size(inode, pos);
+			} else {
+				i_size_write(inode, pos);
+#if IS_ENABLED(CONFIG_FSCACHE)
+				fscache_update_cookie(ctx->cache, NULL, &pos);
+#endif
+			}
+		}
+		written += copied;
+
+		if (likely(!wreq)) {
+			folio_mark_dirty(folio);
+		} else {
+			if (folio_test_dirty(folio))
+				/* Sigh.  mmap. */
+				folio_clear_dirty_for_io(folio);
+			/* We make multiple writes to the folio... */
+			if (!folio_test_writeback(folio)) {
+				folio_wait_fscache(folio);
+				folio_start_writeback(folio);
+				folio_start_fscache(folio);
+				if (wreq->iter.count == 0)
+					trace_netfs_folio(folio, netfs_folio_trace_wthru);
+				else
+					trace_netfs_folio(folio, netfs_folio_trace_wthru_plus);
+			}
+			netfs_advance_writethrough(wreq, copied,
+						   offset + copied == flen);
+		}
+	retry:
+		folio_unlock(folio);
+		folio_put(folio);
+		folio = NULL;
+
+		cond_resched();
+	} while (iov_iter_count(iter));
+
+out:
+	if (unlikely(wreq)) {
+		ret = netfs_end_writethrough(wreq, iocb);
+		wbc_detach_inode(&wbc);
+		if (ret == -EIOCBQUEUED)
+			return ret;
+	}
+
+	iocb->ki_pos += written;
+	_leave(" = %zd [%zd]", written, ret);
+	return written ? written : ret;
+
+error_folio_unlock:
+	folio_unlock(folio);
+	folio_put(folio);
+	goto out;
+}
+EXPORT_SYMBOL(netfs_perform_write);
+
+/**
+ * netfs_buffered_write_iter_locked - write data to a file
+ * @iocb:	IO state structure (file, offset, etc.)
+ * @from:	iov_iter with data to write
+ * @netfs_group: Grouping for dirty pages (eg. ceph snaps).
+ *
+ * This function does all the work needed for actually writing data to a
+ * file. It does all basic checks, removes SUID from the file, updates
+ * modification times and calls proper subroutines depending on whether we
+ * do direct IO or a standard buffered write.
+ *
+ * The caller must hold appropriate locks around this function and have called
+ * generic_write_checks() already.  The caller is also responsible for doing
+ * any necessary syncing afterwards.
+ *
+ * This function does *not* take care of syncing data in case of O_SYNC write.
+ * A caller has to handle it. This is mainly due to the fact that we want to
+ * avoid syncing under i_rwsem.
+ *
+ * Return:
+ * * number of bytes written, even for truncated writes
+ * * negative error code if no data has been written at all
+ */
+ssize_t netfs_buffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *from,
+					 struct netfs_group *netfs_group)
+{
+	struct file *file = iocb->ki_filp;
+	ssize_t ret;
+
+	trace_netfs_write_iter(iocb, from);
+
+	ret = file_remove_privs(file);
+	if (ret)
+		return ret;
+
+	ret = file_update_time(file);
+	if (ret)
+		return ret;
+
+	return netfs_perform_write(iocb, from, netfs_group);
+}
+EXPORT_SYMBOL(netfs_buffered_write_iter_locked);
+
+/**
+ * netfs_file_write_iter - write data to a file
+ * @iocb: IO state structure
+ * @from: iov_iter with data to write
+ *
+ * Perform a write to a file, writing into the pagecache if possible and doing
+ * an unbuffered write instead if not.
+ *
+ * Return:
+ * * Negative error code if no data has been written at all of
+ *   vfs_fsync_range() failed for a synchronous write
+ * * Number of bytes written, even for truncated writes
+ */
+ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file->f_mapping->host;
+	struct netfs_inode *ictx = netfs_inode(inode);
+	ssize_t ret;
+
+	_enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
+
+	if ((iocb->ki_flags & IOCB_DIRECT) ||
+	    test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags))
+		return netfs_unbuffered_write_iter(iocb, from);
+
+	ret = netfs_start_io_write(inode);
+	if (ret < 0)
+		return ret;
+
+	ret = generic_write_checks(iocb, from);
+	if (ret > 0)
+		ret = netfs_buffered_write_iter_locked(iocb, from, NULL);
+	netfs_end_io_write(inode);
+	if (ret > 0)
+		ret = generic_write_sync(iocb, ret);
+	return ret;
+}
+EXPORT_SYMBOL(netfs_file_write_iter);
+
+/*
+ * Notification that a previously read-only page is about to become writable.
+ * Note that the caller indicates a single page of a multipage folio.
+ */
+vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group)
+{
+	struct folio *folio = page_folio(vmf->page);
+	struct file *file = vmf->vma->vm_file;
+	struct inode *inode = file_inode(file);
+	vm_fault_t ret = VM_FAULT_RETRY;
+	int err;
+
+	_enter("%lx", folio->index);
+
+	sb_start_pagefault(inode->i_sb);
+
+	if (folio_wait_writeback_killable(folio))
+		goto out;
+
+	if (folio_lock_killable(folio) < 0)
+		goto out;
+
+	/* Can we see a streaming write here? */
+	if (WARN_ON(!folio_test_uptodate(folio))) {
+		ret = VM_FAULT_SIGBUS | VM_FAULT_LOCKED;
+		goto out;
+	}
+
+	if (netfs_folio_group(folio) != netfs_group) {
+		folio_unlock(folio);
+		err = filemap_fdatawait_range(inode->i_mapping,
+					      folio_pos(folio),
+					      folio_pos(folio) + folio_size(folio));
+		switch (err) {
+		case 0:
+			ret = VM_FAULT_RETRY;
+			goto out;
+		case -ENOMEM:
+			ret = VM_FAULT_OOM;
+			goto out;
+		default:
+			ret = VM_FAULT_SIGBUS;
+			goto out;
+		}
+	}
+
+	if (folio_test_dirty(folio))
+		trace_netfs_folio(folio, netfs_folio_trace_mkwrite_plus);
+	else
+		trace_netfs_folio(folio, netfs_folio_trace_mkwrite);
+	netfs_set_group(folio, netfs_group);
+	file_update_time(file);
+	ret = VM_FAULT_LOCKED;
+out:
+	sb_end_pagefault(inode->i_sb);
+	return ret;
+}
+EXPORT_SYMBOL(netfs_page_mkwrite);
+
+/*
+ * Kill all the pages in the given range
+ */
+static void netfs_kill_pages(struct address_space *mapping,
+			     loff_t start, loff_t len)
+{
+	struct folio *folio;
+	pgoff_t index = start / PAGE_SIZE;
+	pgoff_t last = (start + len - 1) / PAGE_SIZE, next;
+
+	_enter("%llx-%llx", start, start + len - 1);
+
+	do {
+		_debug("kill %lx (to %lx)", index, last);
+
+		folio = filemap_get_folio(mapping, index);
+		if (IS_ERR(folio)) {
+			next = index + 1;
+			continue;
+		}
+
+		next = folio_next_index(folio);
+
+		trace_netfs_folio(folio, netfs_folio_trace_kill);
+		folio_clear_uptodate(folio);
+		if (folio_test_fscache(folio))
+			folio_end_fscache(folio);
+		folio_end_writeback(folio);
+		folio_lock(folio);
+		generic_error_remove_folio(mapping, folio);
+		folio_unlock(folio);
+		folio_put(folio);
+
+	} while (index = next, index <= last);
+
+	_leave("");
+}
+
+/*
+ * Redirty all the pages in a given range.
+ */
+static void netfs_redirty_pages(struct address_space *mapping,
+				loff_t start, loff_t len)
+{
+	struct folio *folio;
+	pgoff_t index = start / PAGE_SIZE;
+	pgoff_t last = (start + len - 1) / PAGE_SIZE, next;
+
+	_enter("%llx-%llx", start, start + len - 1);
+
+	do {
+		_debug("redirty %llx @%llx", len, start);
+
+		folio = filemap_get_folio(mapping, index);
+		if (IS_ERR(folio)) {
+			next = index + 1;
+			continue;
+		}
+
+		next = folio_next_index(folio);
+		trace_netfs_folio(folio, netfs_folio_trace_redirty);
+		filemap_dirty_folio(mapping, folio);
+		if (folio_test_fscache(folio))
+			folio_end_fscache(folio);
+		folio_end_writeback(folio);
+		folio_put(folio);
+	} while (index = next, index <= last);
+
+	balance_dirty_pages_ratelimited(mapping);
+
+	_leave("");
+}
+
+/*
+ * Completion of write to server
+ */
+static void netfs_pages_written_back(struct netfs_io_request *wreq)
+{
+	struct address_space *mapping = wreq->mapping;
+	struct netfs_folio *finfo;
+	struct netfs_group *group = NULL;
+	struct folio *folio;
+	pgoff_t last;
+	int gcount = 0;
+
+	XA_STATE(xas, &mapping->i_pages, wreq->start / PAGE_SIZE);
+
+	_enter("%llx-%llx", wreq->start, wreq->start + wreq->len);
+
+	rcu_read_lock();
+
+	last = (wreq->start + wreq->len - 1) / PAGE_SIZE;
+	xas_for_each(&xas, folio, last) {
+		WARN(!folio_test_writeback(folio),
+		     "bad %zx @%llx page %lx %lx\n",
+		     wreq->len, wreq->start, folio->index, last);
+
+		if ((finfo = netfs_folio_info(folio))) {
+			/* Streaming writes cannot be redirtied whilst under
+			 * writeback, so discard the streaming record.
+			 */
+			folio_detach_private(folio);
+			group = finfo->netfs_group;
+			gcount++;
+			trace_netfs_folio(folio, netfs_folio_trace_clear_s);
+			kfree(finfo);
+		} else if ((group = netfs_folio_group(folio))) {
+			/* Need to detach the group pointer if the page didn't
+			 * get redirtied.  If it has been redirtied, then it
+			 * must be within the same group.
+			 */
+			if (folio_test_dirty(folio)) {
+				trace_netfs_folio(folio, netfs_folio_trace_redirtied);
+				goto end_wb;
+			}
+			if (folio_trylock(folio)) {
+				if (!folio_test_dirty(folio)) {
+					folio_detach_private(folio);
+					gcount++;
+					trace_netfs_folio(folio, netfs_folio_trace_clear_g);
+				} else {
+					trace_netfs_folio(folio, netfs_folio_trace_redirtied);
+				}
+				folio_unlock(folio);
+				goto end_wb;
+			}
+
+			xas_pause(&xas);
+			rcu_read_unlock();
+			folio_lock(folio);
+			if (!folio_test_dirty(folio)) {
+				folio_detach_private(folio);
+				gcount++;
+				trace_netfs_folio(folio, netfs_folio_trace_clear_g);
+			} else {
+				trace_netfs_folio(folio, netfs_folio_trace_redirtied);
+			}
+			folio_unlock(folio);
+			rcu_read_lock();
+		} else {
+			trace_netfs_folio(folio, netfs_folio_trace_clear);
+		}
+	end_wb:
+		if (folio_test_fscache(folio))
+			folio_end_fscache(folio);
+		xas_advance(&xas, folio_next_index(folio) - 1);
+		folio_end_writeback(folio);
+	}
+
+	rcu_read_unlock();
+	netfs_put_group_many(group, gcount);
+	_leave("");
+}
+
+/*
+ * Deal with the disposition of the folios that are under writeback to close
+ * out the operation.
+ */
+static void netfs_cleanup_buffered_write(struct netfs_io_request *wreq)
+{
+	struct address_space *mapping = wreq->mapping;
+
+	_enter("");
+
+	switch (wreq->error) {
+	case 0:
+		netfs_pages_written_back(wreq);
+		break;
+
+	default:
+		pr_notice("R=%08x Unexpected error %d\n", wreq->debug_id, wreq->error);
+		fallthrough;
+	case -EACCES:
+	case -EPERM:
+	case -ENOKEY:
+	case -EKEYEXPIRED:
+	case -EKEYREJECTED:
+	case -EKEYREVOKED:
+	case -ENETRESET:
+	case -EDQUOT:
+	case -ENOSPC:
+		netfs_redirty_pages(mapping, wreq->start, wreq->len);
+		break;
+
+	case -EROFS:
+	case -EIO:
+	case -EREMOTEIO:
+	case -EFBIG:
+	case -ENOENT:
+	case -ENOMEDIUM:
+	case -ENXIO:
+		netfs_kill_pages(mapping, wreq->start, wreq->len);
+		break;
+	}
+
+	if (wreq->error)
+		mapping_set_error(mapping, wreq->error);
+	if (wreq->netfs_ops->done)
+		wreq->netfs_ops->done(wreq);
+}
+
+/*
+ * Extend the region to be written back to include subsequent contiguously
+ * dirty pages if possible, but don't sleep while doing so.
+ *
+ * If this page holds new content, then we can include filler zeros in the
+ * writeback.
+ */
+static void netfs_extend_writeback(struct address_space *mapping,
+				   struct netfs_group *group,
+				   struct xa_state *xas,
+				   long *_count,
+				   loff_t start,
+				   loff_t max_len,
+				   bool caching,
+				   size_t *_len,
+				   size_t *_top)
+{
+	struct netfs_folio *finfo;
+	struct folio_batch fbatch;
+	struct folio *folio;
+	unsigned int i;
+	pgoff_t index = (start + *_len) / PAGE_SIZE;
+	size_t len;
+	void *priv;
+	bool stop = true;
+
+	folio_batch_init(&fbatch);
+
+	do {
+		/* Firstly, we gather up a batch of contiguous dirty pages
+		 * under the RCU read lock - but we can't clear the dirty flags
+		 * there if any of those pages are mapped.
+		 */
+		rcu_read_lock();
+
+		xas_for_each(xas, folio, ULONG_MAX) {
+			stop = true;
+			if (xas_retry(xas, folio))
+				continue;
+			if (xa_is_value(folio))
+				break;
+			if (folio->index != index) {
+				xas_reset(xas);
+				break;
+			}
+
+			if (!folio_try_get_rcu(folio)) {
+				xas_reset(xas);
+				continue;
+			}
+
+			/* Has the folio moved or been split? */
+			if (unlikely(folio != xas_reload(xas))) {
+				folio_put(folio);
+				xas_reset(xas);
+				break;
+			}
+
+			if (!folio_trylock(folio)) {
+				folio_put(folio);
+				xas_reset(xas);
+				break;
+			}
+			if (!folio_test_dirty(folio) ||
+			    folio_test_writeback(folio) ||
+			    folio_test_fscache(folio)) {
+				folio_unlock(folio);
+				folio_put(folio);
+				xas_reset(xas);
+				break;
+			}
+
+			stop = false;
+			len = folio_size(folio);
+			priv = folio_get_private(folio);
+			if ((const struct netfs_group *)priv != group) {
+				stop = true;
+				finfo = netfs_folio_info(folio);
+				if (finfo->netfs_group != group ||
+				    finfo->dirty_offset > 0) {
+					folio_unlock(folio);
+					folio_put(folio);
+					xas_reset(xas);
+					break;
+				}
+				len = finfo->dirty_len;
+			}
+
+			*_top += folio_size(folio);
+			index += folio_nr_pages(folio);
+			*_count -= folio_nr_pages(folio);
+			*_len += len;
+			if (*_len >= max_len || *_count <= 0)
+				stop = true;
+
+			if (!folio_batch_add(&fbatch, folio))
+				break;
+			if (stop)
+				break;
+		}
+
+		xas_pause(xas);
+		rcu_read_unlock();
+
+		/* Now, if we obtained any folios, we can shift them to being
+		 * writable and mark them for caching.
+		 */
+		if (!folio_batch_count(&fbatch))
+			break;
+
+		for (i = 0; i < folio_batch_count(&fbatch); i++) {
+			folio = fbatch.folios[i];
+			trace_netfs_folio(folio, netfs_folio_trace_store_plus);
+
+			if (!folio_clear_dirty_for_io(folio))
+				BUG();
+			folio_start_writeback(folio);
+			netfs_folio_start_fscache(caching, folio);
+			folio_unlock(folio);
+		}
+
+		folio_batch_release(&fbatch);
+		cond_resched();
+	} while (!stop);
+}
+
+/*
+ * Synchronously write back the locked page and any subsequent non-locked dirty
+ * pages.
+ */
+static ssize_t netfs_write_back_from_locked_folio(struct address_space *mapping,
+						  struct writeback_control *wbc,
+						  struct netfs_group *group,
+						  struct xa_state *xas,
+						  struct folio *folio,
+						  unsigned long long start,
+						  unsigned long long end)
+{
+	struct netfs_io_request *wreq;
+	struct netfs_folio *finfo;
+	struct netfs_inode *ctx = netfs_inode(mapping->host);
+	unsigned long long i_size = i_size_read(&ctx->inode);
+	size_t len, max_len;
+	bool caching = netfs_is_cache_enabled(ctx);
+	long count = wbc->nr_to_write;
+	int ret;
+
+	_enter(",%lx,%llx-%llx,%u", folio->index, start, end, caching);
+
+	wreq = netfs_alloc_request(mapping, NULL, start, folio_size(folio),
+				   NETFS_WRITEBACK);
+	if (IS_ERR(wreq)) {
+		folio_unlock(folio);
+		return PTR_ERR(wreq);
+	}
+
+	if (!folio_clear_dirty_for_io(folio))
+		BUG();
+	folio_start_writeback(folio);
+	netfs_folio_start_fscache(caching, folio);
+
+	count -= folio_nr_pages(folio);
+
+	/* Find all consecutive lockable dirty pages that have contiguous
+	 * written regions, stopping when we find a page that is not
+	 * immediately lockable, is not dirty or is missing, or we reach the
+	 * end of the range.
+	 */
+	trace_netfs_folio(folio, netfs_folio_trace_store);
+
+	len = wreq->len;
+	finfo = netfs_folio_info(folio);
+	if (finfo) {
+		start += finfo->dirty_offset;
+		if (finfo->dirty_offset + finfo->dirty_len != len) {
+			len = finfo->dirty_len;
+			goto cant_expand;
+		}
+		len = finfo->dirty_len;
+	}
+
+	if (start < i_size) {
+		/* Trim the write to the EOF; the extra data is ignored.  Also
+		 * put an upper limit on the size of a single storedata op.
+		 */
+		max_len = 65536 * 4096;
+		max_len = min_t(unsigned long long, max_len, end - start + 1);
+		max_len = min_t(unsigned long long, max_len, i_size - start);
+
+		if (len < max_len)
+			netfs_extend_writeback(mapping, group, xas, &count, start,
+					       max_len, caching, &len, &wreq->upper_len);
+	}
+
+cant_expand:
+	len = min_t(unsigned long long, len, i_size - start);
+
+	/* We now have a contiguous set of dirty pages, each with writeback
+	 * set; the first page is still locked at this point, but all the rest
+	 * have been unlocked.
+	 */
+	folio_unlock(folio);
+	wreq->start = start;
+	wreq->len = len;
+
+	if (start < i_size) {
+		_debug("write back %zx @%llx [%llx]", len, start, i_size);
+
+		/* Speculatively write to the cache.  We have to fix this up
+		 * later if the store fails.
+		 */
+		wreq->cleanup = netfs_cleanup_buffered_write;
+
+		iov_iter_xarray(&wreq->iter, ITER_SOURCE, &mapping->i_pages, start,
+				wreq->upper_len);
+		__set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
+		ret = netfs_begin_write(wreq, true, netfs_write_trace_writeback);
+		if (ret == 0 || ret == -EIOCBQUEUED)
+			wbc->nr_to_write -= len / PAGE_SIZE;
+	} else {
+		_debug("write discard %zx @%llx [%llx]", len, start, i_size);
+
+		/* The dirty region was entirely beyond the EOF. */
+		fscache_clear_page_bits(mapping, start, len, caching);
+		netfs_pages_written_back(wreq);
+		ret = 0;
+	}
+
+	netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+	_leave(" = 1");
+	return 1;
+}
+
+/*
+ * Write a region of pages back to the server
+ */
+static ssize_t netfs_writepages_begin(struct address_space *mapping,
+				      struct writeback_control *wbc,
+				      struct netfs_group *group,
+				      struct xa_state *xas,
+				      unsigned long long *_start,
+				      unsigned long long end)
+{
+	const struct netfs_folio *finfo;
+	struct folio *folio;
+	unsigned long long start = *_start;
+	ssize_t ret;
+	void *priv;
+	int skips = 0;
+
+	_enter("%llx,%llx,", start, end);
+
+search_again:
+	/* Find the first dirty page in the group. */
+	rcu_read_lock();
+
+	for (;;) {
+		folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY);
+		if (xas_retry(xas, folio) || xa_is_value(folio))
+			continue;
+		if (!folio)
+			break;
+
+		if (!folio_try_get_rcu(folio)) {
+			xas_reset(xas);
+			continue;
+		}
+
+		if (unlikely(folio != xas_reload(xas))) {
+			folio_put(folio);
+			xas_reset(xas);
+			continue;
+		}
+
+		/* Skip any dirty folio that's not in the group of interest. */
+		priv = folio_get_private(folio);
+		if ((const struct netfs_group *)priv != group) {
+			finfo = netfs_folio_info(folio);
+			if (finfo->netfs_group != group) {
+				folio_put(folio);
+				continue;
+			}
+		}
+
+		xas_pause(xas);
+		break;
+	}
+	rcu_read_unlock();
+	if (!folio)
+		return 0;
+
+	start = folio_pos(folio); /* May regress with THPs */
+
+	_debug("wback %lx", folio->index);
+
+	/* At this point we hold neither the i_pages lock nor the page lock:
+	 * the page may be truncated or invalidated (changing page->mapping to
+	 * NULL), or even swizzled back from swapper_space to tmpfs file
+	 * mapping
+	 */
+lock_again:
+	if (wbc->sync_mode != WB_SYNC_NONE) {
+		ret = folio_lock_killable(folio);
+		if (ret < 0)
+			return ret;
+	} else {
+		if (!folio_trylock(folio))
+			goto search_again;
+	}
+
+	if (folio->mapping != mapping ||
+	    !folio_test_dirty(folio)) {
+		start += folio_size(folio);
+		folio_unlock(folio);
+		goto search_again;
+	}
+
+	if (folio_test_writeback(folio) ||
+	    folio_test_fscache(folio)) {
+		folio_unlock(folio);
+		if (wbc->sync_mode != WB_SYNC_NONE) {
+			folio_wait_writeback(folio);
+#ifdef CONFIG_FSCACHE
+			folio_wait_fscache(folio);
+#endif
+			goto lock_again;
+		}
+
+		start += folio_size(folio);
+		if (wbc->sync_mode == WB_SYNC_NONE) {
+			if (skips >= 5 || need_resched()) {
+				ret = 0;
+				goto out;
+			}
+			skips++;
+		}
+		goto search_again;
+	}
+
+	ret = netfs_write_back_from_locked_folio(mapping, wbc, group, xas,
+						 folio, start, end);
+out:
+	if (ret > 0)
+		*_start = start + ret;
+	_leave(" = %zd [%llx]", ret, *_start);
+	return ret;
+}
+
+/*
+ * Write a region of pages back to the server
+ */
+static int netfs_writepages_region(struct address_space *mapping,
+				   struct writeback_control *wbc,
+				   struct netfs_group *group,
+				   unsigned long long *_start,
+				   unsigned long long end)
+{
+	ssize_t ret;
+
+	XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE);
+
+	do {
+		ret = netfs_writepages_begin(mapping, wbc, group, &xas,
+					     _start, end);
+		if (ret > 0 && wbc->nr_to_write > 0)
+			cond_resched();
+	} while (ret > 0 && wbc->nr_to_write > 0);
+
+	return ret > 0 ? 0 : ret;
+}
+
+/*
+ * write some of the pending data back to the server
+ */
+int netfs_writepages(struct address_space *mapping,
+		     struct writeback_control *wbc)
+{
+	struct netfs_group *group = NULL;
+	loff_t start, end;
+	int ret;
+
+	_enter("");
+
+	/* We have to be careful as we can end up racing with setattr()
+	 * truncating the pagecache since the caller doesn't take a lock here
+	 * to prevent it.
+	 */
+
+	if (wbc->range_cyclic && mapping->writeback_index) {
+		start = mapping->writeback_index * PAGE_SIZE;
+		ret = netfs_writepages_region(mapping, wbc, group,
+					      &start, LLONG_MAX);
+		if (ret < 0)
+			goto out;
+
+		if (wbc->nr_to_write <= 0) {
+			mapping->writeback_index = start / PAGE_SIZE;
+			goto out;
+		}
+
+		start = 0;
+		end = mapping->writeback_index * PAGE_SIZE;
+		mapping->writeback_index = 0;
+		ret = netfs_writepages_region(mapping, wbc, group, &start, end);
+		if (ret == 0)
+			mapping->writeback_index = start / PAGE_SIZE;
+	} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
+		start = 0;
+		ret = netfs_writepages_region(mapping, wbc, group,
+					      &start, LLONG_MAX);
+		if (wbc->nr_to_write > 0 && ret == 0)
+			mapping->writeback_index = start / PAGE_SIZE;
+	} else {
+		start = wbc->range_start;
+		ret = netfs_writepages_region(mapping, wbc, group,
+					      &start, wbc->range_end);
+	}
+
+out:
+	_leave(" = %d", ret);
+	return ret;
+}
+EXPORT_SYMBOL(netfs_writepages);
+
+/*
+ * Deal with the disposition of a laundered folio.
+ */
+static void netfs_cleanup_launder_folio(struct netfs_io_request *wreq)
+{
+	if (wreq->error) {
+		pr_notice("R=%08x Laundering error %d\n", wreq->debug_id, wreq->error);
+		mapping_set_error(wreq->mapping, wreq->error);
+	}
+}
+
+/**
+ * netfs_launder_folio - Clean up a dirty folio that's being invalidated
+ * @folio: The folio to clean
+ *
+ * This is called to write back a folio that's being invalidated when an inode
+ * is getting torn down.  Ideally, writepages would be used instead.
+ */
+int netfs_launder_folio(struct folio *folio)
+{
+	struct netfs_io_request *wreq;
+	struct address_space *mapping = folio->mapping;
+	struct netfs_folio *finfo = netfs_folio_info(folio);
+	struct netfs_group *group = netfs_folio_group(folio);
+	struct bio_vec bvec;
+	unsigned long long i_size = i_size_read(mapping->host);
+	unsigned long long start = folio_pos(folio);
+	size_t offset = 0, len;
+	int ret = 0;
+
+	if (finfo) {
+		offset = finfo->dirty_offset;
+		start += offset;
+		len = finfo->dirty_len;
+	} else {
+		len = folio_size(folio);
+	}
+	len = min_t(unsigned long long, len, i_size - start);
+
+	wreq = netfs_alloc_request(mapping, NULL, start, len, NETFS_LAUNDER_WRITE);
+	if (IS_ERR(wreq)) {
+		ret = PTR_ERR(wreq);
+		goto out;
+	}
+
+	if (!folio_clear_dirty_for_io(folio))
+		goto out_put;
+
+	trace_netfs_folio(folio, netfs_folio_trace_launder);
+
+	_debug("launder %llx-%llx", start, start + len - 1);
+
+	/* Speculatively write to the cache.  We have to fix this up later if
+	 * the store fails.
+	 */
+	wreq->cleanup = netfs_cleanup_launder_folio;
+
+	bvec_set_folio(&bvec, folio, len, offset);
+	iov_iter_bvec(&wreq->iter, ITER_SOURCE, &bvec, 1, len);
+	__set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
+	ret = netfs_begin_write(wreq, true, netfs_write_trace_launder);
+
+out_put:
+	folio_detach_private(folio);
+	netfs_put_group(group);
+	kfree(finfo);
+	netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+out:
+	folio_wait_fscache(folio);
+	_leave(" = %d", ret);
+	return ret;
+}
+EXPORT_SYMBOL(netfs_launder_folio);
diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c
new file mode 100644
index 000000000000..ad4370b3935d
--- /dev/null
+++ b/fs/netfs/direct_read.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Direct I/O support.
+ *
+ * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/export.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/uio.h>
+#include <linux/sched/mm.h>
+#include <linux/task_io_accounting_ops.h>
+#include <linux/netfs.h>
+#include "internal.h"
+
+/**
+ * netfs_unbuffered_read_iter_locked - Perform an unbuffered or direct I/O read
+ * @iocb: The I/O control descriptor describing the read
+ * @iter: The output buffer (also specifies read length)
+ *
+ * Perform an unbuffered I/O or direct I/O from the file in @iocb to the
+ * output buffer.  No use is made of the pagecache.
+ *
+ * The caller must hold any appropriate locks.
+ */
+static ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *iter)
+{
+	struct netfs_io_request *rreq;
+	ssize_t ret;
+	size_t orig_count = iov_iter_count(iter);
+	bool async = !is_sync_kiocb(iocb);
+
+	_enter("");
+
+	if (!orig_count)
+		return 0; /* Don't update atime */
+
+	ret = kiocb_write_and_wait(iocb, orig_count);
+	if (ret < 0)
+		return ret;
+	file_accessed(iocb->ki_filp);
+
+	rreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp,
+				   iocb->ki_pos, orig_count,
+				   NETFS_DIO_READ);
+	if (IS_ERR(rreq))
+		return PTR_ERR(rreq);
+
+	netfs_stat(&netfs_n_rh_dio_read);
+	trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_dio_read);
+
+	/* If this is an async op, we have to keep track of the destination
+	 * buffer for ourselves as the caller's iterator will be trashed when
+	 * we return.
+	 *
+	 * In such a case, extract an iterator to represent as much of the the
+	 * output buffer as we can manage.  Note that the extraction might not
+	 * be able to allocate a sufficiently large bvec array and may shorten
+	 * the request.
+	 */
+	if (user_backed_iter(iter)) {
+		ret = netfs_extract_user_iter(iter, rreq->len, &rreq->iter, 0);
+		if (ret < 0)
+			goto out;
+		rreq->direct_bv = (struct bio_vec *)rreq->iter.bvec;
+		rreq->direct_bv_count = ret;
+		rreq->direct_bv_unpin = iov_iter_extract_will_pin(iter);
+		rreq->len = iov_iter_count(&rreq->iter);
+	} else {
+		rreq->iter = *iter;
+		rreq->len = orig_count;
+		rreq->direct_bv_unpin = false;
+		iov_iter_advance(iter, orig_count);
+	}
+
+	// TODO: Set up bounce buffer if needed
+
+	if (async)
+		rreq->iocb = iocb;
+
+	ret = netfs_begin_read(rreq, is_sync_kiocb(iocb));
+	if (ret < 0)
+		goto out; /* May be -EIOCBQUEUED */
+	if (!async) {
+		// TODO: Copy from bounce buffer
+		iocb->ki_pos += rreq->transferred;
+		ret = rreq->transferred;
+	}
+
+out:
+	netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
+	if (ret > 0)
+		orig_count -= ret;
+	if (ret != -EIOCBQUEUED)
+		iov_iter_revert(iter, orig_count - iov_iter_count(iter));
+	return ret;
+}
+
+/**
+ * netfs_unbuffered_read_iter - Perform an unbuffered or direct I/O read
+ * @iocb: The I/O control descriptor describing the read
+ * @iter: The output buffer (also specifies read length)
+ *
+ * Perform an unbuffered I/O or direct I/O from the file in @iocb to the
+ * output buffer.  No use is made of the pagecache.
+ */
+ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+	struct inode *inode = file_inode(iocb->ki_filp);
+	ssize_t ret;
+
+	if (!iter->count)
+		return 0; /* Don't update atime */
+
+	ret = netfs_start_io_direct(inode);
+	if (ret == 0) {
+		ret = netfs_unbuffered_read_iter_locked(iocb, iter);
+		netfs_end_io_direct(inode);
+	}
+	return ret;
+}
+EXPORT_SYMBOL(netfs_unbuffered_read_iter);
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
new file mode 100644
index 000000000000..60a40d293c87
--- /dev/null
+++ b/fs/netfs/direct_write.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Unbuffered and direct write support.
+ *
+ * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/export.h>
+#include <linux/uio.h>
+#include "internal.h"
+
+static void netfs_cleanup_dio_write(struct netfs_io_request *wreq)
+{
+	struct inode *inode = wreq->inode;
+	unsigned long long end = wreq->start + wreq->len;
+
+	if (!wreq->error &&
+	    i_size_read(inode) < end) {
+		if (wreq->netfs_ops->update_i_size)
+			wreq->netfs_ops->update_i_size(inode, end);
+		else
+			i_size_write(inode, end);
+	}
+}
+
+/*
+ * Perform an unbuffered write where we may have to do an RMW operation on an
+ * encrypted file.  This can also be used for direct I/O writes.
+ */
+static ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *iter,
+						  struct netfs_group *netfs_group)
+{
+	struct netfs_io_request *wreq;
+	unsigned long long start = iocb->ki_pos;
+	unsigned long long end = start + iov_iter_count(iter);
+	ssize_t ret, n;
+	bool async = !is_sync_kiocb(iocb);
+
+	_enter("");
+
+	/* We're going to need a bounce buffer if what we transmit is going to
+	 * be different in some way to the source buffer, e.g. because it gets
+	 * encrypted/compressed or because it needs expanding to a block size.
+	 */
+	// TODO
+
+	_debug("uw %llx-%llx", start, end);
+
+	wreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp,
+				   start, end - start,
+				   iocb->ki_flags & IOCB_DIRECT ?
+				   NETFS_DIO_WRITE : NETFS_UNBUFFERED_WRITE);
+	if (IS_ERR(wreq))
+		return PTR_ERR(wreq);
+
+	{
+		/* If this is an async op and we're not using a bounce buffer,
+		 * we have to save the source buffer as the iterator is only
+		 * good until we return.  In such a case, extract an iterator
+		 * to represent as much of the the output buffer as we can
+		 * manage.  Note that the extraction might not be able to
+		 * allocate a sufficiently large bvec array and may shorten the
+		 * request.
+		 */
+		if (async || user_backed_iter(iter)) {
+			n = netfs_extract_user_iter(iter, wreq->len, &wreq->iter, 0);
+			if (n < 0) {
+				ret = n;
+				goto out;
+			}
+			wreq->direct_bv = (struct bio_vec *)wreq->iter.bvec;
+			wreq->direct_bv_count = n;
+			wreq->direct_bv_unpin = iov_iter_extract_will_pin(iter);
+			wreq->len = iov_iter_count(&wreq->iter);
+		} else {
+			wreq->iter = *iter;
+		}
+
+		wreq->io_iter = wreq->iter;
+	}
+
+	/* Copy the data into the bounce buffer and encrypt it. */
+	// TODO
+
+	/* Dispatch the write. */
+	__set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
+	if (async)
+		wreq->iocb = iocb;
+	wreq->cleanup = netfs_cleanup_dio_write;
+	ret = netfs_begin_write(wreq, is_sync_kiocb(iocb),
+				iocb->ki_flags & IOCB_DIRECT ?
+				netfs_write_trace_dio_write :
+				netfs_write_trace_unbuffered_write);
+	if (ret < 0) {
+		_debug("begin = %zd", ret);
+		goto out;
+	}
+
+	if (!async) {
+		trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip);
+		wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS,
+			    TASK_UNINTERRUPTIBLE);
+
+		ret = wreq->error;
+		_debug("waited = %zd", ret);
+		if (ret == 0) {
+			ret = wreq->transferred;
+			iocb->ki_pos += ret;
+		}
+	} else {
+		ret = -EIOCBQUEUED;
+	}
+
+out:
+	netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+	return ret;
+}
+
+/**
+ * netfs_unbuffered_write_iter - Unbuffered write to a file
+ * @iocb: IO state structure
+ * @from: iov_iter with data to write
+ *
+ * Do an unbuffered write to a file, writing the data directly to the server
+ * and not lodging the data in the pagecache.
+ *
+ * Return:
+ * * Negative error code if no data has been written at all of
+ *   vfs_fsync_range() failed for a synchronous write
+ * * Number of bytes written, even for truncated writes
+ */
+ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file->f_mapping->host;
+	struct netfs_inode *ictx = netfs_inode(inode);
+	unsigned long long end;
+	ssize_t ret;
+
+	_enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
+
+	trace_netfs_write_iter(iocb, from);
+	netfs_stat(&netfs_n_rh_dio_write);
+
+	ret = netfs_start_io_direct(inode);
+	if (ret < 0)
+		return ret;
+	ret = generic_write_checks(iocb, from);
+	if (ret < 0)
+		goto out;
+	ret = file_remove_privs(file);
+	if (ret < 0)
+		goto out;
+	ret = file_update_time(file);
+	if (ret < 0)
+		goto out;
+	ret = kiocb_invalidate_pages(iocb, iov_iter_count(from));
+	if (ret < 0)
+		goto out;
+	end = iocb->ki_pos + iov_iter_count(from);
+	if (end > ictx->zero_point)
+		ictx->zero_point = end;
+
+	fscache_invalidate(netfs_i_cookie(ictx), NULL, i_size_read(inode),
+			   FSCACHE_INVAL_DIO_WRITE);
+	ret = netfs_unbuffered_write_iter_locked(iocb, from, NULL);
+out:
+	netfs_end_io_direct(inode);
+	return ret;
+}
+EXPORT_SYMBOL(netfs_unbuffered_write_iter);
diff --git a/fs/fscache/cache.c b/fs/netfs/fscache_cache.c
index d645f8b302a2..9397ed39b0b4 100644
--- a/fs/fscache/cache.c
+++ b/fs/netfs/fscache_cache.c
@@ -179,13 +179,14 @@ EXPORT_SYMBOL(fscache_acquire_cache);
 void fscache_put_cache(struct fscache_cache *cache,
 		       enum fscache_cache_trace where)
 {
-	unsigned int debug_id = cache->debug_id;
+	unsigned int debug_id;
 	bool zero;
 	int ref;
 
 	if (IS_ERR_OR_NULL(cache))
 		return;
 
+	debug_id = cache->debug_id;
 	zero = __refcount_dec_and_test(&cache->ref, &ref);
 	trace_fscache_cache(debug_id, ref - 1, where);
 
diff --git a/fs/fscache/cookie.c b/fs/netfs/fscache_cookie.c
index bce2492186d0..bce2492186d0 100644
--- a/fs/fscache/cookie.c
+++ b/fs/netfs/fscache_cookie.c
diff --git a/fs/netfs/fscache_internal.h b/fs/netfs/fscache_internal.h
new file mode 100644
index 000000000000..a09b948fcef2
--- /dev/null
+++ b/fs/netfs/fscache_internal.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Internal definitions for FS-Cache
+ *
+ * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include "internal.h"
+
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) "FS-Cache: " fmt
diff --git a/fs/fscache/io.c b/fs/netfs/fscache_io.c
index 0d2b8dec8f82..ad572f7ee897 100644
--- a/fs/fscache/io.c
+++ b/fs/netfs/fscache_io.c
@@ -158,46 +158,6 @@ int __fscache_begin_write_operation(struct netfs_cache_resources *cres,
 }
 EXPORT_SYMBOL(__fscache_begin_write_operation);
 
-/**
- * fscache_dirty_folio - Mark folio dirty and pin a cache object for writeback
- * @mapping: The mapping the folio belongs to.
- * @folio: The folio being dirtied.
- * @cookie: The cookie referring to the cache object
- *
- * Set the dirty flag on a folio and pin an in-use cache object in memory
- * so that writeback can later write to it.  This is intended
- * to be called from the filesystem's ->dirty_folio() method.
- *
- * Return: true if the dirty flag was set on the folio, false otherwise.
- */
-bool fscache_dirty_folio(struct address_space *mapping, struct folio *folio,
-				struct fscache_cookie *cookie)
-{
-	struct inode *inode = mapping->host;
-	bool need_use = false;
-
-	_enter("");
-
-	if (!filemap_dirty_folio(mapping, folio))
-		return false;
-	if (!fscache_cookie_valid(cookie))
-		return true;
-
-	if (!(inode->i_state & I_PINNING_FSCACHE_WB)) {
-		spin_lock(&inode->i_lock);
-		if (!(inode->i_state & I_PINNING_FSCACHE_WB)) {
-			inode->i_state |= I_PINNING_FSCACHE_WB;
-			need_use = true;
-		}
-		spin_unlock(&inode->i_lock);
-
-		if (need_use)
-			fscache_use_cookie(cookie, true);
-	}
-	return true;
-}
-EXPORT_SYMBOL(fscache_dirty_folio);
-
 struct fscache_write_request {
 	struct netfs_cache_resources cache_resources;
 	struct address_space	*mapping;
@@ -277,7 +237,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
 				    fscache_access_io_write) < 0)
 		goto abandon_free;
 
-	ret = cres->ops->prepare_write(cres, &start, &len, i_size, false);
+	ret = cres->ops->prepare_write(cres, &start, &len, len, i_size, false);
 	if (ret < 0)
 		goto abandon_end;
 
diff --git a/fs/fscache/main.c b/fs/netfs/fscache_main.c
index dad85fd84f6f..42e98bb523e3 100644
--- a/fs/fscache/main.c
+++ b/fs/netfs/fscache_main.c
@@ -8,18 +8,9 @@
 #define FSCACHE_DEBUG_LEVEL CACHE
 #include <linux/module.h>
 #include <linux/init.h>
-#define CREATE_TRACE_POINTS
 #include "internal.h"
-
-MODULE_DESCRIPTION("FS Cache Manager");
-MODULE_AUTHOR("Red Hat, Inc.");
-MODULE_LICENSE("GPL");
-
-unsigned fscache_debug;
-module_param_named(debug, fscache_debug, uint,
-		   S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(fscache_debug,
-		 "FS-Cache debugging mask");
+#define CREATE_TRACE_POINTS
+#include <trace/events/fscache.h>
 
 EXPORT_TRACEPOINT_SYMBOL(fscache_access_cache);
 EXPORT_TRACEPOINT_SYMBOL(fscache_access_volume);
@@ -71,7 +62,7 @@ unsigned int fscache_hash(unsigned int salt, const void *data, size_t len)
 /*
  * initialise the fs caching module
  */
-static int __init fscache_init(void)
+int __init fscache_init(void)
 {
 	int ret = -ENOMEM;
 
@@ -92,7 +83,7 @@ static int __init fscache_init(void)
 		goto error_cookie_jar;
 	}
 
-	pr_notice("Loaded\n");
+	pr_notice("FS-Cache loaded\n");
 	return 0;
 
 error_cookie_jar:
@@ -103,19 +94,15 @@ error_wq:
 	return ret;
 }
 
-fs_initcall(fscache_init);
-
 /*
  * clean up on module removal
  */
-static void __exit fscache_exit(void)
+void __exit fscache_exit(void)
 {
 	_enter("");
 
 	kmem_cache_destroy(fscache_cookie_jar);
 	fscache_proc_cleanup();
 	destroy_workqueue(fscache_wq);
-	pr_notice("Unloaded\n");
+	pr_notice("FS-Cache unloaded\n");
 }
-
-module_exit(fscache_exit);
diff --git a/fs/fscache/proc.c b/fs/netfs/fscache_proc.c
index dc3b0e9c8cce..874d951bc390 100644
--- a/fs/fscache/proc.c
+++ b/fs/netfs/fscache_proc.c
@@ -12,41 +12,34 @@
 #include "internal.h"
 
 /*
- * initialise the /proc/fs/fscache/ directory
+ * Add files to /proc/fs/netfs/.
  */
 int __init fscache_proc_init(void)
 {
-	if (!proc_mkdir("fs/fscache", NULL))
-		goto error_dir;
+	if (!proc_symlink("fs/fscache", NULL, "netfs"))
+		goto error_sym;
 
-	if (!proc_create_seq("fs/fscache/caches", S_IFREG | 0444, NULL,
+	if (!proc_create_seq("fs/netfs/caches", S_IFREG | 0444, NULL,
 			     &fscache_caches_seq_ops))
 		goto error;
 
-	if (!proc_create_seq("fs/fscache/volumes", S_IFREG | 0444, NULL,
+	if (!proc_create_seq("fs/netfs/volumes", S_IFREG | 0444, NULL,
 			     &fscache_volumes_seq_ops))
 		goto error;
 
-	if (!proc_create_seq("fs/fscache/cookies", S_IFREG | 0444, NULL,
+	if (!proc_create_seq("fs/netfs/cookies", S_IFREG | 0444, NULL,
 			     &fscache_cookies_seq_ops))
 		goto error;
-
-#ifdef CONFIG_FSCACHE_STATS
-	if (!proc_create_single("fs/fscache/stats", S_IFREG | 0444, NULL,
-				fscache_stats_show))
-		goto error;
-#endif
-
 	return 0;
 
 error:
 	remove_proc_entry("fs/fscache", NULL);
-error_dir:
+error_sym:
 	return -ENOMEM;
 }
 
 /*
- * clean up the /proc/fs/fscache/ directory
+ * Clean up the /proc/fs/fscache symlink.
  */
 void fscache_proc_cleanup(void)
 {
diff --git a/fs/fscache/stats.c b/fs/netfs/fscache_stats.c
index fc94e5e79f1c..add21abdf713 100644
--- a/fs/fscache/stats.c
+++ b/fs/netfs/fscache_stats.c
@@ -48,13 +48,15 @@ atomic_t fscache_n_no_create_space;
 EXPORT_SYMBOL(fscache_n_no_create_space);
 atomic_t fscache_n_culled;
 EXPORT_SYMBOL(fscache_n_culled);
+atomic_t fscache_n_dio_misfit;
+EXPORT_SYMBOL(fscache_n_dio_misfit);
 
 /*
  * display the general statistics
  */
-int fscache_stats_show(struct seq_file *m, void *v)
+int fscache_stats_show(struct seq_file *m)
 {
-	seq_puts(m, "FS-Cache statistics\n");
+	seq_puts(m, "-- FS-Cache statistics --\n");
 	seq_printf(m, "Cookies: n=%d v=%d vcol=%u voom=%u\n",
 		   atomic_read(&fscache_n_cookies),
 		   atomic_read(&fscache_n_volumes),
@@ -93,10 +95,9 @@ int fscache_stats_show(struct seq_file *m, void *v)
 		   atomic_read(&fscache_n_no_create_space),
 		   atomic_read(&fscache_n_culled));
 
-	seq_printf(m, "IO     : rd=%u wr=%u\n",
+	seq_printf(m, "IO     : rd=%u wr=%u mis=%u\n",
 		   atomic_read(&fscache_n_read),
-		   atomic_read(&fscache_n_write));
-
-	netfs_stats_show(m);
+		   atomic_read(&fscache_n_write),
+		   atomic_read(&fscache_n_dio_misfit));
 	return 0;
 }
diff --git a/fs/fscache/volume.c b/fs/netfs/fscache_volume.c
index cdf991bdd9de..cdf991bdd9de 100644
--- a/fs/fscache/volume.c
+++ b/fs/netfs/fscache_volume.c
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index 43fac1b14e40..ec7045d24400 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -5,9 +5,13 @@
  * Written by David Howells (dhowells@redhat.com)
  */
 
+#include <linux/slab.h>
+#include <linux/seq_file.h>
 #include <linux/netfs.h>
 #include <linux/fscache.h>
+#include <linux/fscache-cache.h>
 #include <trace/events/netfs.h>
+#include <trace/events/fscache.h>
 
 #ifdef pr_fmt
 #undef pr_fmt
@@ -19,6 +23,8 @@
  * buffered_read.c
  */
 void netfs_rreq_unlock_folios(struct netfs_io_request *rreq);
+int netfs_prefetch_for_write(struct file *file, struct folio *folio,
+			     size_t offset, size_t len);
 
 /*
  * io.c
@@ -29,6 +35,41 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync);
  * main.c
  */
 extern unsigned int netfs_debug;
+extern struct list_head netfs_io_requests;
+extern spinlock_t netfs_proc_lock;
+
+#ifdef CONFIG_PROC_FS
+static inline void netfs_proc_add_rreq(struct netfs_io_request *rreq)
+{
+	spin_lock(&netfs_proc_lock);
+	list_add_tail_rcu(&rreq->proc_link, &netfs_io_requests);
+	spin_unlock(&netfs_proc_lock);
+}
+static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq)
+{
+	if (!list_empty(&rreq->proc_link)) {
+		spin_lock(&netfs_proc_lock);
+		list_del_rcu(&rreq->proc_link);
+		spin_unlock(&netfs_proc_lock);
+	}
+}
+#else
+static inline void netfs_proc_add_rreq(struct netfs_io_request *rreq) {}
+static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq) {}
+#endif
+
+/*
+ * misc.c
+ */
+#define NETFS_FLAG_PUT_MARK		BIT(0)
+#define NETFS_FLAG_PAGECACHE_MARK	BIT(1)
+int netfs_xa_store_and_mark(struct xarray *xa, unsigned long index,
+			    struct folio *folio, unsigned int flags,
+			    gfp_t gfp_mask);
+int netfs_add_folios_to_buffer(struct xarray *buffer,
+			       struct address_space *mapping,
+			       pgoff_t index, pgoff_t to, gfp_t gfp_mask);
+void netfs_clear_buffer(struct xarray *buffer);
 
 /*
  * objects.c
@@ -50,9 +91,20 @@ static inline void netfs_see_request(struct netfs_io_request *rreq,
 }
 
 /*
+ * output.c
+ */
+int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait,
+		      enum netfs_write_trace what);
+struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len);
+int netfs_advance_writethrough(struct netfs_io_request *wreq, size_t copied, bool to_page_end);
+int netfs_end_writethrough(struct netfs_io_request *wreq, struct kiocb *iocb);
+
+/*
  * stats.c
  */
 #ifdef CONFIG_NETFS_STATS
+extern atomic_t netfs_n_rh_dio_read;
+extern atomic_t netfs_n_rh_dio_write;
 extern atomic_t netfs_n_rh_readahead;
 extern atomic_t netfs_n_rh_readpage;
 extern atomic_t netfs_n_rh_rreq;
@@ -71,7 +123,15 @@ extern atomic_t netfs_n_rh_write_begin;
 extern atomic_t netfs_n_rh_write_done;
 extern atomic_t netfs_n_rh_write_failed;
 extern atomic_t netfs_n_rh_write_zskip;
+extern atomic_t netfs_n_wh_wstream_conflict;
+extern atomic_t netfs_n_wh_upload;
+extern atomic_t netfs_n_wh_upload_done;
+extern atomic_t netfs_n_wh_upload_failed;
+extern atomic_t netfs_n_wh_write;
+extern atomic_t netfs_n_wh_write_done;
+extern atomic_t netfs_n_wh_write_failed;
 
+int netfs_stats_show(struct seq_file *m, void *v);
 
 static inline void netfs_stat(atomic_t *stat)
 {
@@ -103,6 +163,176 @@ static inline bool netfs_is_cache_enabled(struct netfs_inode *ctx)
 #endif
 }
 
+/*
+ * Get a ref on a netfs group attached to a dirty page (e.g. a ceph snap).
+ */
+static inline struct netfs_group *netfs_get_group(struct netfs_group *netfs_group)
+{
+	if (netfs_group)
+		refcount_inc(&netfs_group->ref);
+	return netfs_group;
+}
+
+/*
+ * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap).
+ */
+static inline void netfs_put_group(struct netfs_group *netfs_group)
+{
+	if (netfs_group && refcount_dec_and_test(&netfs_group->ref))
+		netfs_group->free(netfs_group);
+}
+
+/*
+ * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap).
+ */
+static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr)
+{
+	if (netfs_group && refcount_sub_and_test(nr, &netfs_group->ref))
+		netfs_group->free(netfs_group);
+}
+
+/*
+ * fscache-cache.c
+ */
+#ifdef CONFIG_PROC_FS
+extern const struct seq_operations fscache_caches_seq_ops;
+#endif
+bool fscache_begin_cache_access(struct fscache_cache *cache, enum fscache_access_trace why);
+void fscache_end_cache_access(struct fscache_cache *cache, enum fscache_access_trace why);
+struct fscache_cache *fscache_lookup_cache(const char *name, bool is_cache);
+void fscache_put_cache(struct fscache_cache *cache, enum fscache_cache_trace where);
+
+static inline enum fscache_cache_state fscache_cache_state(const struct fscache_cache *cache)
+{
+	return smp_load_acquire(&cache->state);
+}
+
+static inline bool fscache_cache_is_live(const struct fscache_cache *cache)
+{
+	return fscache_cache_state(cache) == FSCACHE_CACHE_IS_ACTIVE;
+}
+
+static inline void fscache_set_cache_state(struct fscache_cache *cache,
+					   enum fscache_cache_state new_state)
+{
+	smp_store_release(&cache->state, new_state);
+
+}
+
+static inline bool fscache_set_cache_state_maybe(struct fscache_cache *cache,
+						 enum fscache_cache_state old_state,
+						 enum fscache_cache_state new_state)
+{
+	return try_cmpxchg_release(&cache->state, &old_state, new_state);
+}
+
+/*
+ * fscache-cookie.c
+ */
+extern struct kmem_cache *fscache_cookie_jar;
+#ifdef CONFIG_PROC_FS
+extern const struct seq_operations fscache_cookies_seq_ops;
+#endif
+extern struct timer_list fscache_cookie_lru_timer;
+
+extern void fscache_print_cookie(struct fscache_cookie *cookie, char prefix);
+extern bool fscache_begin_cookie_access(struct fscache_cookie *cookie,
+					enum fscache_access_trace why);
+
+static inline void fscache_see_cookie(struct fscache_cookie *cookie,
+				      enum fscache_cookie_trace where)
+{
+	trace_fscache_cookie(cookie->debug_id, refcount_read(&cookie->ref),
+			     where);
+}
+
+/*
+ * fscache-main.c
+ */
+extern unsigned int fscache_hash(unsigned int salt, const void *data, size_t len);
+#ifdef CONFIG_FSCACHE
+int __init fscache_init(void);
+void __exit fscache_exit(void);
+#else
+static inline int fscache_init(void) { return 0; }
+static inline void fscache_exit(void) {}
+#endif
+
+/*
+ * fscache-proc.c
+ */
+#ifdef CONFIG_PROC_FS
+extern int __init fscache_proc_init(void);
+extern void fscache_proc_cleanup(void);
+#else
+#define fscache_proc_init()	(0)
+#define fscache_proc_cleanup()	do {} while (0)
+#endif
+
+/*
+ * fscache-stats.c
+ */
+#ifdef CONFIG_FSCACHE_STATS
+extern atomic_t fscache_n_volumes;
+extern atomic_t fscache_n_volumes_collision;
+extern atomic_t fscache_n_volumes_nomem;
+extern atomic_t fscache_n_cookies;
+extern atomic_t fscache_n_cookies_lru;
+extern atomic_t fscache_n_cookies_lru_expired;
+extern atomic_t fscache_n_cookies_lru_removed;
+extern atomic_t fscache_n_cookies_lru_dropped;
+
+extern atomic_t fscache_n_acquires;
+extern atomic_t fscache_n_acquires_ok;
+extern atomic_t fscache_n_acquires_oom;
+
+extern atomic_t fscache_n_invalidates;
+
+extern atomic_t fscache_n_relinquishes;
+extern atomic_t fscache_n_relinquishes_retire;
+extern atomic_t fscache_n_relinquishes_dropped;
+
+extern atomic_t fscache_n_resizes;
+extern atomic_t fscache_n_resizes_null;
+
+static inline void fscache_stat(atomic_t *stat)
+{
+	atomic_inc(stat);
+}
+
+static inline void fscache_stat_d(atomic_t *stat)
+{
+	atomic_dec(stat);
+}
+
+#define __fscache_stat(stat) (stat)
+
+int fscache_stats_show(struct seq_file *m);
+#else
+
+#define __fscache_stat(stat) (NULL)
+#define fscache_stat(stat) do {} while (0)
+#define fscache_stat_d(stat) do {} while (0)
+
+static inline int fscache_stats_show(struct seq_file *m) { return 0; }
+#endif
+
+/*
+ * fscache-volume.c
+ */
+#ifdef CONFIG_PROC_FS
+extern const struct seq_operations fscache_volumes_seq_ops;
+#endif
+
+struct fscache_volume *fscache_get_volume(struct fscache_volume *volume,
+					  enum fscache_volume_trace where);
+void fscache_put_volume(struct fscache_volume *volume,
+			enum fscache_volume_trace where);
+bool fscache_begin_volume_access(struct fscache_volume *volume,
+				 struct fscache_cookie *cookie,
+				 enum fscache_access_trace why);
+void fscache_create_volume(struct fscache_volume *volume, bool wait);
+
 /*****************************************************************************/
 /*
  * debug tracing
@@ -143,3 +373,57 @@ do {						\
 #define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
 #define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__)
 #endif
+
+/*
+ * assertions
+ */
+#if 1 /* defined(__KDEBUGALL) */
+
+#define ASSERT(X)							\
+do {									\
+	if (unlikely(!(X))) {						\
+		pr_err("\n");					\
+		pr_err("Assertion failed\n");	\
+		BUG();							\
+	}								\
+} while (0)
+
+#define ASSERTCMP(X, OP, Y)						\
+do {									\
+	if (unlikely(!((X) OP (Y)))) {					\
+		pr_err("\n");					\
+		pr_err("Assertion failed\n");	\
+		pr_err("%lx " #OP " %lx is false\n",		\
+		       (unsigned long)(X), (unsigned long)(Y));		\
+		BUG();							\
+	}								\
+} while (0)
+
+#define ASSERTIF(C, X)							\
+do {									\
+	if (unlikely((C) && !(X))) {					\
+		pr_err("\n");					\
+		pr_err("Assertion failed\n");	\
+		BUG();							\
+	}								\
+} while (0)
+
+#define ASSERTIFCMP(C, X, OP, Y)					\
+do {									\
+	if (unlikely((C) && !((X) OP (Y)))) {				\
+		pr_err("\n");					\
+		pr_err("Assertion failed\n");	\
+		pr_err("%lx " #OP " %lx is false\n",		\
+		       (unsigned long)(X), (unsigned long)(Y));		\
+		BUG();							\
+	}								\
+} while (0)
+
+#else
+
+#define ASSERT(X)			do {} while (0)
+#define ASSERTCMP(X, OP, Y)		do {} while (0)
+#define ASSERTIF(C, X)			do {} while (0)
+#define ASSERTIFCMP(C, X, OP, Y)	do {} while (0)
+
+#endif /* assert or not */
diff --git a/fs/netfs/io.c b/fs/netfs/io.c
index 7f753380e047..e8ff1e61ce79 100644
--- a/fs/netfs/io.c
+++ b/fs/netfs/io.c
@@ -21,12 +21,7 @@
  */
 static void netfs_clear_unread(struct netfs_io_subrequest *subreq)
 {
-	struct iov_iter iter;
-
-	iov_iter_xarray(&iter, ITER_DEST, &subreq->rreq->mapping->i_pages,
-			subreq->start + subreq->transferred,
-			subreq->len   - subreq->transferred);
-	iov_iter_zero(iov_iter_count(&iter), &iter);
+	iov_iter_zero(iov_iter_count(&subreq->io_iter), &subreq->io_iter);
 }
 
 static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error,
@@ -46,14 +41,9 @@ static void netfs_read_from_cache(struct netfs_io_request *rreq,
 				  enum netfs_read_from_hole read_hole)
 {
 	struct netfs_cache_resources *cres = &rreq->cache_resources;
-	struct iov_iter iter;
 
 	netfs_stat(&netfs_n_rh_read);
-	iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages,
-			subreq->start + subreq->transferred,
-			subreq->len   - subreq->transferred);
-
-	cres->ops->read(cres, subreq->start, &iter, read_hole,
+	cres->ops->read(cres, subreq->start, &subreq->io_iter, read_hole,
 			netfs_cache_read_terminated, subreq);
 }
 
@@ -88,6 +78,13 @@ static void netfs_read_from_server(struct netfs_io_request *rreq,
 				   struct netfs_io_subrequest *subreq)
 {
 	netfs_stat(&netfs_n_rh_download);
+
+	if (rreq->origin != NETFS_DIO_READ &&
+	    iov_iter_count(&subreq->io_iter) != subreq->len - subreq->transferred)
+		pr_warn("R=%08x[%u] ITER PRE-MISMATCH %zx != %zx-%zx %lx\n",
+			rreq->debug_id, subreq->debug_index,
+			iov_iter_count(&subreq->io_iter), subreq->len,
+			subreq->transferred, subreq->flags);
 	rreq->netfs_ops->issue_read(subreq);
 }
 
@@ -127,9 +124,10 @@ static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq,
 			/* We might have multiple writes from the same huge
 			 * folio, but we mustn't unlock a folio more than once.
 			 */
-			if (have_unlocked && folio_index(folio) <= unlocked)
+			if (have_unlocked && folio->index <= unlocked)
 				continue;
-			unlocked = folio_index(folio);
+			unlocked = folio_next_index(folio) - 1;
+			trace_netfs_folio(folio, netfs_folio_trace_end_copy);
 			folio_end_fscache(folio);
 			have_unlocked = true;
 		}
@@ -201,7 +199,7 @@ static void netfs_rreq_do_write_to_cache(struct netfs_io_request *rreq)
 		}
 
 		ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len,
-					       rreq->i_size, true);
+					       subreq->len, rreq->i_size, true);
 		if (ret < 0) {
 			trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write);
 			trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip);
@@ -260,6 +258,30 @@ static void netfs_rreq_short_read(struct netfs_io_request *rreq,
 }
 
 /*
+ * Reset the subrequest iterator prior to resubmission.
+ */
+static void netfs_reset_subreq_iter(struct netfs_io_request *rreq,
+				    struct netfs_io_subrequest *subreq)
+{
+	size_t remaining = subreq->len - subreq->transferred;
+	size_t count = iov_iter_count(&subreq->io_iter);
+
+	if (count == remaining)
+		return;
+
+	_debug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n",
+	       rreq->debug_id, subreq->debug_index,
+	       iov_iter_count(&subreq->io_iter), subreq->transferred,
+	       subreq->len, rreq->i_size,
+	       subreq->io_iter.iter_type);
+
+	if (count < remaining)
+		iov_iter_revert(&subreq->io_iter, remaining - count);
+	else
+		iov_iter_advance(&subreq->io_iter, count - remaining);
+}
+
+/*
  * Resubmit any short or failed operations.  Returns true if we got the rreq
  * ref back.
  */
@@ -287,6 +309,7 @@ static bool netfs_rreq_perform_resubmissions(struct netfs_io_request *rreq)
 			trace_netfs_sreq(subreq, netfs_sreq_trace_download_instead);
 			netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
 			atomic_inc(&rreq->nr_outstanding);
+			netfs_reset_subreq_iter(rreq, subreq);
 			netfs_read_from_server(rreq, subreq);
 		} else if (test_bit(NETFS_SREQ_SHORT_IO, &subreq->flags)) {
 			netfs_rreq_short_read(rreq, subreq);
@@ -321,6 +344,43 @@ static void netfs_rreq_is_still_valid(struct netfs_io_request *rreq)
 }
 
 /*
+ * Determine how much we can admit to having read from a DIO read.
+ */
+static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
+{
+	struct netfs_io_subrequest *subreq;
+	unsigned int i;
+	size_t transferred = 0;
+
+	for (i = 0; i < rreq->direct_bv_count; i++)
+		flush_dcache_page(rreq->direct_bv[i].bv_page);
+
+	list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
+		if (subreq->error || subreq->transferred == 0)
+			break;
+		transferred += subreq->transferred;
+		if (subreq->transferred < subreq->len)
+			break;
+	}
+
+	for (i = 0; i < rreq->direct_bv_count; i++)
+		flush_dcache_page(rreq->direct_bv[i].bv_page);
+
+	rreq->transferred = transferred;
+	task_io_account_read(transferred);
+
+	if (rreq->iocb) {
+		rreq->iocb->ki_pos += transferred;
+		if (rreq->iocb->ki_complete)
+			rreq->iocb->ki_complete(
+				rreq->iocb, rreq->error ? rreq->error : transferred);
+	}
+	if (rreq->netfs_ops->done)
+		rreq->netfs_ops->done(rreq);
+	inode_dio_end(rreq->inode);
+}
+
+/*
  * Assess the state of a read request and decide what to do next.
  *
  * Note that we could be in an ordinary kernel thread, on a workqueue or in
@@ -340,8 +400,12 @@ again:
 		return;
 	}
 
-	netfs_rreq_unlock_folios(rreq);
+	if (rreq->origin != NETFS_DIO_READ)
+		netfs_rreq_unlock_folios(rreq);
+	else
+		netfs_rreq_assess_dio(rreq);
 
+	trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip);
 	clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
 	wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS);
 
@@ -399,9 +463,9 @@ void netfs_subreq_terminated(struct netfs_io_subrequest *subreq,
 	struct netfs_io_request *rreq = subreq->rreq;
 	int u;
 
-	_enter("[%u]{%llx,%lx},%zd",
-	       subreq->debug_index, subreq->start, subreq->flags,
-	       transferred_or_error);
+	_enter("R=%x[%x]{%llx,%lx},%zd",
+	       rreq->debug_id, subreq->debug_index,
+	       subreq->start, subreq->flags, transferred_or_error);
 
 	switch (subreq->source) {
 	case NETFS_READ_FROM_CACHE:
@@ -501,15 +565,20 @@ static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_subrequest
  */
 static enum netfs_io_source
 netfs_rreq_prepare_read(struct netfs_io_request *rreq,
-			struct netfs_io_subrequest *subreq)
+			struct netfs_io_subrequest *subreq,
+			struct iov_iter *io_iter)
 {
-	enum netfs_io_source source;
+	enum netfs_io_source source = NETFS_DOWNLOAD_FROM_SERVER;
+	struct netfs_inode *ictx = netfs_inode(rreq->inode);
+	size_t lsize;
 
 	_enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size);
 
-	source = netfs_cache_prepare_read(subreq, rreq->i_size);
-	if (source == NETFS_INVALID_READ)
-		goto out;
+	if (rreq->origin != NETFS_DIO_READ) {
+		source = netfs_cache_prepare_read(subreq, rreq->i_size);
+		if (source == NETFS_INVALID_READ)
+			goto out;
+	}
 
 	if (source == NETFS_DOWNLOAD_FROM_SERVER) {
 		/* Call out to the netfs to let it shrink the request to fit
@@ -518,19 +587,52 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq,
 		 * to make serial calls, it can indicate a short read and then
 		 * we will call it again.
 		 */
+		if (rreq->origin != NETFS_DIO_READ) {
+			if (subreq->start >= ictx->zero_point) {
+				source = NETFS_FILL_WITH_ZEROES;
+				goto set;
+			}
+			if (subreq->len > ictx->zero_point - subreq->start)
+				subreq->len = ictx->zero_point - subreq->start;
+		}
 		if (subreq->len > rreq->i_size - subreq->start)
 			subreq->len = rreq->i_size - subreq->start;
+		if (rreq->rsize && subreq->len > rreq->rsize)
+			subreq->len = rreq->rsize;
 
 		if (rreq->netfs_ops->clamp_length &&
 		    !rreq->netfs_ops->clamp_length(subreq)) {
 			source = NETFS_INVALID_READ;
 			goto out;
 		}
+
+		if (subreq->max_nr_segs) {
+			lsize = netfs_limit_iter(io_iter, 0, subreq->len,
+						 subreq->max_nr_segs);
+			if (subreq->len > lsize) {
+				subreq->len = lsize;
+				trace_netfs_sreq(subreq, netfs_sreq_trace_limited);
+			}
+		}
 	}
 
-	if (WARN_ON(subreq->len == 0))
+set:
+	if (subreq->len > rreq->len)
+		pr_warn("R=%08x[%u] SREQ>RREQ %zx > %zx\n",
+			rreq->debug_id, subreq->debug_index,
+			subreq->len, rreq->len);
+
+	if (WARN_ON(subreq->len == 0)) {
 		source = NETFS_INVALID_READ;
+		goto out;
+	}
 
+	subreq->source = source;
+	trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
+
+	subreq->io_iter = *io_iter;
+	iov_iter_truncate(&subreq->io_iter, subreq->len);
+	iov_iter_advance(io_iter, subreq->len);
 out:
 	subreq->source = source;
 	trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
@@ -541,6 +643,7 @@ out:
  * Slice off a piece of a read request and submit an I/O request for it.
  */
 static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq,
+				    struct iov_iter *io_iter,
 				    unsigned int *_debug_index)
 {
 	struct netfs_io_subrequest *subreq;
@@ -552,7 +655,7 @@ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq,
 
 	subreq->debug_index	= (*_debug_index)++;
 	subreq->start		= rreq->start + rreq->submitted;
-	subreq->len		= rreq->len   - rreq->submitted;
+	subreq->len		= io_iter->count;
 
 	_debug("slice %llx,%zx,%zx", subreq->start, subreq->len, rreq->submitted);
 	list_add_tail(&subreq->rreq_link, &rreq->subrequests);
@@ -565,7 +668,7 @@ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq,
 	 * (the starts must coincide), in which case, we go around the loop
 	 * again and ask it to download the next piece.
 	 */
-	source = netfs_rreq_prepare_read(rreq, subreq);
+	source = netfs_rreq_prepare_read(rreq, subreq, io_iter);
 	if (source == NETFS_INVALID_READ)
 		goto subreq_failed;
 
@@ -603,6 +706,7 @@ subreq_failed:
  */
 int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
 {
+	struct iov_iter io_iter;
 	unsigned int debug_index = 0;
 	int ret;
 
@@ -611,50 +715,71 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
 
 	if (rreq->len == 0) {
 		pr_err("Zero-sized read [R=%x]\n", rreq->debug_id);
-		netfs_put_request(rreq, false, netfs_rreq_trace_put_zero_len);
 		return -EIO;
 	}
 
-	INIT_WORK(&rreq->work, netfs_rreq_work);
+	if (rreq->origin == NETFS_DIO_READ)
+		inode_dio_begin(rreq->inode);
 
-	if (sync)
-		netfs_get_request(rreq, netfs_rreq_trace_get_hold);
+	// TODO: Use bounce buffer if requested
+	rreq->io_iter = rreq->iter;
+
+	INIT_WORK(&rreq->work, netfs_rreq_work);
 
 	/* Chop the read into slices according to what the cache and the netfs
 	 * want and submit each one.
 	 */
+	netfs_get_request(rreq, netfs_rreq_trace_get_for_outstanding);
 	atomic_set(&rreq->nr_outstanding, 1);
+	io_iter = rreq->io_iter;
 	do {
-		if (!netfs_rreq_submit_slice(rreq, &debug_index))
+		_debug("submit %llx + %zx >= %llx",
+		       rreq->start, rreq->submitted, rreq->i_size);
+		if (rreq->origin == NETFS_DIO_READ &&
+		    rreq->start + rreq->submitted >= rreq->i_size)
+			break;
+		if (!netfs_rreq_submit_slice(rreq, &io_iter, &debug_index))
+			break;
+		if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) &&
+		    test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags))
 			break;
 
 	} while (rreq->submitted < rreq->len);
 
+	if (!rreq->submitted) {
+		netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit);
+		ret = 0;
+		goto out;
+	}
+
 	if (sync) {
-		/* Keep nr_outstanding incremented so that the ref always belongs to
-		 * us, and the service code isn't punted off to a random thread pool to
-		 * process.
+		/* Keep nr_outstanding incremented so that the ref always
+		 * belongs to us, and the service code isn't punted off to a
+		 * random thread pool to process.  Note that this might start
+		 * further work, such as writing to the cache.
 		 */
-		for (;;) {
-			wait_var_event(&rreq->nr_outstanding,
-				       atomic_read(&rreq->nr_outstanding) == 1);
+		wait_var_event(&rreq->nr_outstanding,
+			       atomic_read(&rreq->nr_outstanding) == 1);
+		if (atomic_dec_and_test(&rreq->nr_outstanding))
 			netfs_rreq_assess(rreq, false);
-			if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
-				break;
-			cond_resched();
-		}
+
+		trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip);
+		wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS,
+			    TASK_UNINTERRUPTIBLE);
 
 		ret = rreq->error;
-		if (ret == 0 && rreq->submitted < rreq->len) {
+		if (ret == 0 && rreq->submitted < rreq->len &&
+		    rreq->origin != NETFS_DIO_READ) {
 			trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
 			ret = -EIO;
 		}
-		netfs_put_request(rreq, false, netfs_rreq_trace_put_hold);
 	} else {
 		/* If we decrement nr_outstanding to 0, the ref belongs to us. */
 		if (atomic_dec_and_test(&rreq->nr_outstanding))
 			netfs_rreq_assess(rreq, false);
-		ret = 0;
+		ret = -EIOCBQUEUED;
 	}
+
+out:
 	return ret;
 }
diff --git a/fs/netfs/iterator.c b/fs/netfs/iterator.c
index 2ff07ba655a0..b781bbbf1d8d 100644
--- a/fs/netfs/iterator.c
+++ b/fs/netfs/iterator.c
@@ -101,3 +101,100 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
 	return npages;
 }
 EXPORT_SYMBOL_GPL(netfs_extract_user_iter);
+
+/*
+ * Select the span of a bvec iterator we're going to use.  Limit it by both maximum
+ * size and maximum number of segments.  Returns the size of the span in bytes.
+ */
+static size_t netfs_limit_bvec(const struct iov_iter *iter, size_t start_offset,
+			       size_t max_size, size_t max_segs)
+{
+	const struct bio_vec *bvecs = iter->bvec;
+	unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
+	size_t len, span = 0, n = iter->count;
+	size_t skip = iter->iov_offset + start_offset;
+
+	if (WARN_ON(!iov_iter_is_bvec(iter)) ||
+	    WARN_ON(start_offset > n) ||
+	    n == 0)
+		return 0;
+
+	while (n && ix < nbv && skip) {
+		len = bvecs[ix].bv_len;
+		if (skip < len)
+			break;
+		skip -= len;
+		n -= len;
+		ix++;
+	}
+
+	while (n && ix < nbv) {
+		len = min3(n, bvecs[ix].bv_len - skip, max_size);
+		span += len;
+		nsegs++;
+		ix++;
+		if (span >= max_size || nsegs >= max_segs)
+			break;
+		skip = 0;
+		n -= len;
+	}
+
+	return min(span, max_size);
+}
+
+/*
+ * Select the span of an xarray iterator we're going to use.  Limit it by both
+ * maximum size and maximum number of segments.  It is assumed that segments
+ * can be larger than a page in size, provided they're physically contiguous.
+ * Returns the size of the span in bytes.
+ */
+static size_t netfs_limit_xarray(const struct iov_iter *iter, size_t start_offset,
+				 size_t max_size, size_t max_segs)
+{
+	struct folio *folio;
+	unsigned int nsegs = 0;
+	loff_t pos = iter->xarray_start + iter->iov_offset;
+	pgoff_t index = pos / PAGE_SIZE;
+	size_t span = 0, n = iter->count;
+
+	XA_STATE(xas, iter->xarray, index);
+
+	if (WARN_ON(!iov_iter_is_xarray(iter)) ||
+	    WARN_ON(start_offset > n) ||
+	    n == 0)
+		return 0;
+	max_size = min(max_size, n - start_offset);
+
+	rcu_read_lock();
+	xas_for_each(&xas, folio, ULONG_MAX) {
+		size_t offset, flen, len;
+		if (xas_retry(&xas, folio))
+			continue;
+		if (WARN_ON(xa_is_value(folio)))
+			break;
+		if (WARN_ON(folio_test_hugetlb(folio)))
+			break;
+
+		flen = folio_size(folio);
+		offset = offset_in_folio(folio, pos);
+		len = min(max_size, flen - offset);
+		span += len;
+		nsegs++;
+		if (span >= max_size || nsegs >= max_segs)
+			break;
+	}
+
+	rcu_read_unlock();
+	return min(span, max_size);
+}
+
+size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset,
+			size_t max_size, size_t max_segs)
+{
+	if (iov_iter_is_bvec(iter))
+		return netfs_limit_bvec(iter, start_offset, max_size, max_segs);
+	if (iov_iter_is_xarray(iter))
+		return netfs_limit_xarray(iter, start_offset, max_size, max_segs);
+	BUG();
+}
+EXPORT_SYMBOL(netfs_limit_iter);
diff --git a/fs/netfs/locking.c b/fs/netfs/locking.c
new file mode 100644
index 000000000000..75dc52a49b3a
--- /dev/null
+++ b/fs/netfs/locking.c
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * I/O and data path helper functionality.
+ *
+ * Borrowed from NFS Copyright (c) 2016 Trond Myklebust
+ */
+
+#include <linux/kernel.h>
+#include <linux/netfs.h>
+#include "internal.h"
+
+/*
+ * inode_dio_wait_interruptible - wait for outstanding DIO requests to finish
+ * @inode: inode to wait for
+ *
+ * Waits for all pending direct I/O requests to finish so that we can
+ * proceed with a truncate or equivalent operation.
+ *
+ * Must be called under a lock that serializes taking new references
+ * to i_dio_count, usually by inode->i_mutex.
+ */
+static int inode_dio_wait_interruptible(struct inode *inode)
+{
+	if (!atomic_read(&inode->i_dio_count))
+		return 0;
+
+	wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
+	DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
+
+	for (;;) {
+		prepare_to_wait(wq, &q.wq_entry, TASK_INTERRUPTIBLE);
+		if (!atomic_read(&inode->i_dio_count))
+			break;
+		if (signal_pending(current))
+			break;
+		schedule();
+	}
+	finish_wait(wq, &q.wq_entry);
+
+	return atomic_read(&inode->i_dio_count) ? -ERESTARTSYS : 0;
+}
+
+/* Call with exclusively locked inode->i_rwsem */
+static int netfs_block_o_direct(struct netfs_inode *ictx)
+{
+	if (!test_bit(NETFS_ICTX_ODIRECT, &ictx->flags))
+		return 0;
+	clear_bit(NETFS_ICTX_ODIRECT, &ictx->flags);
+	return inode_dio_wait_interruptible(&ictx->inode);
+}
+
+/**
+ * netfs_start_io_read - declare the file is being used for buffered reads
+ * @inode: file inode
+ *
+ * Declare that a buffered read operation is about to start, and ensure
+ * that we block all direct I/O.
+ * On exit, the function ensures that the NETFS_ICTX_ODIRECT flag is unset,
+ * and holds a shared lock on inode->i_rwsem to ensure that the flag
+ * cannot be changed.
+ * In practice, this means that buffered read operations are allowed to
+ * execute in parallel, thanks to the shared lock, whereas direct I/O
+ * operations need to wait to grab an exclusive lock in order to set
+ * NETFS_ICTX_ODIRECT.
+ * Note that buffered writes and truncates both take a write lock on
+ * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
+ */
+int netfs_start_io_read(struct inode *inode)
+	__acquires(inode->i_rwsem)
+{
+	struct netfs_inode *ictx = netfs_inode(inode);
+
+	/* Be an optimist! */
+	if (down_read_interruptible(&inode->i_rwsem) < 0)
+		return -ERESTARTSYS;
+	if (test_bit(NETFS_ICTX_ODIRECT, &ictx->flags) == 0)
+		return 0;
+	up_read(&inode->i_rwsem);
+
+	/* Slow path.... */
+	if (down_write_killable(&inode->i_rwsem) < 0)
+		return -ERESTARTSYS;
+	if (netfs_block_o_direct(ictx) < 0) {
+		up_write(&inode->i_rwsem);
+		return -ERESTARTSYS;
+	}
+	downgrade_write(&inode->i_rwsem);
+	return 0;
+}
+EXPORT_SYMBOL(netfs_start_io_read);
+
+/**
+ * netfs_end_io_read - declare that the buffered read operation is done
+ * @inode: file inode
+ *
+ * Declare that a buffered read operation is done, and release the shared
+ * lock on inode->i_rwsem.
+ */
+void netfs_end_io_read(struct inode *inode)
+	__releases(inode->i_rwsem)
+{
+	up_read(&inode->i_rwsem);
+}
+EXPORT_SYMBOL(netfs_end_io_read);
+
+/**
+ * netfs_start_io_write - declare the file is being used for buffered writes
+ * @inode: file inode
+ *
+ * Declare that a buffered read operation is about to start, and ensure
+ * that we block all direct I/O.
+ */
+int netfs_start_io_write(struct inode *inode)
+	__acquires(inode->i_rwsem)
+{
+	struct netfs_inode *ictx = netfs_inode(inode);
+
+	if (down_write_killable(&inode->i_rwsem) < 0)
+		return -ERESTARTSYS;
+	if (netfs_block_o_direct(ictx) < 0) {
+		up_write(&inode->i_rwsem);
+		return -ERESTARTSYS;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(netfs_start_io_write);
+
+/**
+ * netfs_end_io_write - declare that the buffered write operation is done
+ * @inode: file inode
+ *
+ * Declare that a buffered write operation is done, and release the
+ * lock on inode->i_rwsem.
+ */
+void netfs_end_io_write(struct inode *inode)
+	__releases(inode->i_rwsem)
+{
+	up_write(&inode->i_rwsem);
+}
+EXPORT_SYMBOL(netfs_end_io_write);
+
+/* Call with exclusively locked inode->i_rwsem */
+static int netfs_block_buffered(struct inode *inode)
+{
+	struct netfs_inode *ictx = netfs_inode(inode);
+	int ret;
+
+	if (!test_bit(NETFS_ICTX_ODIRECT, &ictx->flags)) {
+		set_bit(NETFS_ICTX_ODIRECT, &ictx->flags);
+		if (inode->i_mapping->nrpages != 0) {
+			unmap_mapping_range(inode->i_mapping, 0, 0, 0);
+			ret = filemap_fdatawait(inode->i_mapping);
+			if (ret < 0) {
+				clear_bit(NETFS_ICTX_ODIRECT, &ictx->flags);
+				return ret;
+			}
+		}
+	}
+	return 0;
+}
+
+/**
+ * netfs_start_io_direct - declare the file is being used for direct i/o
+ * @inode: file inode
+ *
+ * Declare that a direct I/O operation is about to start, and ensure
+ * that we block all buffered I/O.
+ * On exit, the function ensures that the NETFS_ICTX_ODIRECT flag is set,
+ * and holds a shared lock on inode->i_rwsem to ensure that the flag
+ * cannot be changed.
+ * In practice, this means that direct I/O operations are allowed to
+ * execute in parallel, thanks to the shared lock, whereas buffered I/O
+ * operations need to wait to grab an exclusive lock in order to clear
+ * NETFS_ICTX_ODIRECT.
+ * Note that buffered writes and truncates both take a write lock on
+ * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
+ */
+int netfs_start_io_direct(struct inode *inode)
+	__acquires(inode->i_rwsem)
+{
+	struct netfs_inode *ictx = netfs_inode(inode);
+	int ret;
+
+	/* Be an optimist! */
+	if (down_read_interruptible(&inode->i_rwsem) < 0)
+		return -ERESTARTSYS;
+	if (test_bit(NETFS_ICTX_ODIRECT, &ictx->flags) != 0)
+		return 0;
+	up_read(&inode->i_rwsem);
+
+	/* Slow path.... */
+	if (down_write_killable(&inode->i_rwsem) < 0)
+		return -ERESTARTSYS;
+	ret = netfs_block_buffered(inode);
+	if (ret < 0) {
+		up_write(&inode->i_rwsem);
+		return ret;
+	}
+	downgrade_write(&inode->i_rwsem);
+	return 0;
+}
+EXPORT_SYMBOL(netfs_start_io_direct);
+
+/**
+ * netfs_end_io_direct - declare that the direct i/o operation is done
+ * @inode: file inode
+ *
+ * Declare that a direct I/O operation is done, and release the shared
+ * lock on inode->i_rwsem.
+ */
+void netfs_end_io_direct(struct inode *inode)
+	__releases(inode->i_rwsem)
+{
+	up_read(&inode->i_rwsem);
+}
+EXPORT_SYMBOL(netfs_end_io_direct);
diff --git a/fs/netfs/main.c b/fs/netfs/main.c
index 068568702957..5e77618a7940 100644
--- a/fs/netfs/main.c
+++ b/fs/netfs/main.c
@@ -7,6 +7,8 @@
 
 #include <linux/module.h>
 #include <linux/export.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include "internal.h"
 #define CREATE_TRACE_POINTS
 #include <trace/events/netfs.h>
@@ -15,6 +17,113 @@ MODULE_DESCRIPTION("Network fs support");
 MODULE_AUTHOR("Red Hat, Inc.");
 MODULE_LICENSE("GPL");
 
+EXPORT_TRACEPOINT_SYMBOL(netfs_sreq);
+
 unsigned netfs_debug;
 module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask");
+
+#ifdef CONFIG_PROC_FS
+LIST_HEAD(netfs_io_requests);
+DEFINE_SPINLOCK(netfs_proc_lock);
+
+static const char *netfs_origins[nr__netfs_io_origin] = {
+	[NETFS_READAHEAD]		= "RA",
+	[NETFS_READPAGE]		= "RP",
+	[NETFS_READ_FOR_WRITE]		= "RW",
+	[NETFS_WRITEBACK]		= "WB",
+	[NETFS_WRITETHROUGH]		= "WT",
+	[NETFS_LAUNDER_WRITE]		= "LW",
+	[NETFS_UNBUFFERED_WRITE]	= "UW",
+	[NETFS_DIO_READ]		= "DR",
+	[NETFS_DIO_WRITE]		= "DW",
+};
+
+/*
+ * Generate a list of I/O requests in /proc/fs/netfs/requests
+ */
+static int netfs_requests_seq_show(struct seq_file *m, void *v)
+{
+	struct netfs_io_request *rreq;
+
+	if (v == &netfs_io_requests) {
+		seq_puts(m,
+			 "REQUEST  OR REF FL ERR  OPS COVERAGE\n"
+			 "======== == === == ==== === =========\n"
+			 );
+		return 0;
+	}
+
+	rreq = list_entry(v, struct netfs_io_request, proc_link);
+	seq_printf(m,
+		   "%08x %s %3d %2lx %4d %3d @%04llx %zx/%zx",
+		   rreq->debug_id,
+		   netfs_origins[rreq->origin],
+		   refcount_read(&rreq->ref),
+		   rreq->flags,
+		   rreq->error,
+		   atomic_read(&rreq->nr_outstanding),
+		   rreq->start, rreq->submitted, rreq->len);
+	seq_putc(m, '\n');
+	return 0;
+}
+
+static void *netfs_requests_seq_start(struct seq_file *m, loff_t *_pos)
+	__acquires(rcu)
+{
+	rcu_read_lock();
+	return seq_list_start_head(&netfs_io_requests, *_pos);
+}
+
+static void *netfs_requests_seq_next(struct seq_file *m, void *v, loff_t *_pos)
+{
+	return seq_list_next(v, &netfs_io_requests, _pos);
+}
+
+static void netfs_requests_seq_stop(struct seq_file *m, void *v)
+	__releases(rcu)
+{
+	rcu_read_unlock();
+}
+
+static const struct seq_operations netfs_requests_seq_ops = {
+	.start  = netfs_requests_seq_start,
+	.next   = netfs_requests_seq_next,
+	.stop   = netfs_requests_seq_stop,
+	.show   = netfs_requests_seq_show,
+};
+#endif /* CONFIG_PROC_FS */
+
+static int __init netfs_init(void)
+{
+	int ret = -ENOMEM;
+
+	if (!proc_mkdir("fs/netfs", NULL))
+		goto error;
+	if (!proc_create_seq("fs/netfs/requests", S_IFREG | 0444, NULL,
+			     &netfs_requests_seq_ops))
+		goto error_proc;
+#ifdef CONFIG_FSCACHE_STATS
+	if (!proc_create_single("fs/netfs/stats", S_IFREG | 0444, NULL,
+				netfs_stats_show))
+		goto error_proc;
+#endif
+
+	ret = fscache_init();
+	if (ret < 0)
+		goto error_proc;
+	return 0;
+
+error_proc:
+	remove_proc_entry("fs/netfs", NULL);
+error:
+	return ret;
+}
+fs_initcall(netfs_init);
+
+static void __exit netfs_exit(void)
+{
+	fscache_exit();
+	remove_proc_entry("fs/netfs", NULL);
+}
+module_exit(netfs_exit);
diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
new file mode 100644
index 000000000000..90051ced8e2a
--- /dev/null
+++ b/fs/netfs/misc.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Miscellaneous routines.
+ *
+ * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/swap.h>
+#include "internal.h"
+
+/*
+ * Attach a folio to the buffer and maybe set marks on it to say that we need
+ * to put the folio later and twiddle the pagecache flags.
+ */
+int netfs_xa_store_and_mark(struct xarray *xa, unsigned long index,
+			    struct folio *folio, unsigned int flags,
+			    gfp_t gfp_mask)
+{
+	XA_STATE_ORDER(xas, xa, index, folio_order(folio));
+
+retry:
+	xas_lock(&xas);
+	for (;;) {
+		xas_store(&xas, folio);
+		if (!xas_error(&xas))
+			break;
+		xas_unlock(&xas);
+		if (!xas_nomem(&xas, gfp_mask))
+			return xas_error(&xas);
+		goto retry;
+	}
+
+	if (flags & NETFS_FLAG_PUT_MARK)
+		xas_set_mark(&xas, NETFS_BUF_PUT_MARK);
+	if (flags & NETFS_FLAG_PAGECACHE_MARK)
+		xas_set_mark(&xas, NETFS_BUF_PAGECACHE_MARK);
+	xas_unlock(&xas);
+	return xas_error(&xas);
+}
+
+/*
+ * Create the specified range of folios in the buffer attached to the read
+ * request.  The folios are marked with NETFS_BUF_PUT_MARK so that we know that
+ * these need freeing later.
+ */
+int netfs_add_folios_to_buffer(struct xarray *buffer,
+			       struct address_space *mapping,
+			       pgoff_t index, pgoff_t to, gfp_t gfp_mask)
+{
+	struct folio *folio;
+	int ret;
+
+	if (to + 1 == index) /* Page range is inclusive */
+		return 0;
+
+	do {
+		/* TODO: Figure out what order folio can be allocated here */
+		folio = filemap_alloc_folio(readahead_gfp_mask(mapping), 0);
+		if (!folio)
+			return -ENOMEM;
+		folio->index = index;
+		ret = netfs_xa_store_and_mark(buffer, index, folio,
+					      NETFS_FLAG_PUT_MARK, gfp_mask);
+		if (ret < 0) {
+			folio_put(folio);
+			return ret;
+		}
+
+		index += folio_nr_pages(folio);
+	} while (index <= to && index != 0);
+
+	return 0;
+}
+
+/*
+ * Clear an xarray buffer, putting a ref on the folios that have
+ * NETFS_BUF_PUT_MARK set.
+ */
+void netfs_clear_buffer(struct xarray *buffer)
+{
+	struct folio *folio;
+	XA_STATE(xas, buffer, 0);
+
+	rcu_read_lock();
+	xas_for_each_marked(&xas, folio, ULONG_MAX, NETFS_BUF_PUT_MARK) {
+		folio_put(folio);
+	}
+	rcu_read_unlock();
+	xa_destroy(buffer);
+}
+
+/**
+ * netfs_dirty_folio - Mark folio dirty and pin a cache object for writeback
+ * @mapping: The mapping the folio belongs to.
+ * @folio: The folio being dirtied.
+ *
+ * Set the dirty flag on a folio and pin an in-use cache object in memory so
+ * that writeback can later write to it.  This is intended to be called from
+ * the filesystem's ->dirty_folio() method.
+ *
+ * Return: true if the dirty flag was set on the folio, false otherwise.
+ */
+bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio)
+{
+	struct inode *inode = mapping->host;
+	struct netfs_inode *ictx = netfs_inode(inode);
+	struct fscache_cookie *cookie = netfs_i_cookie(ictx);
+	bool need_use = false;
+
+	_enter("");
+
+	if (!filemap_dirty_folio(mapping, folio))
+		return false;
+	if (!fscache_cookie_valid(cookie))
+		return true;
+
+	if (!(inode->i_state & I_PINNING_NETFS_WB)) {
+		spin_lock(&inode->i_lock);
+		if (!(inode->i_state & I_PINNING_NETFS_WB)) {
+			inode->i_state |= I_PINNING_NETFS_WB;
+			need_use = true;
+		}
+		spin_unlock(&inode->i_lock);
+
+		if (need_use)
+			fscache_use_cookie(cookie, true);
+	}
+	return true;
+}
+EXPORT_SYMBOL(netfs_dirty_folio);
+
+/**
+ * netfs_unpin_writeback - Unpin writeback resources
+ * @inode: The inode on which the cookie resides
+ * @wbc: The writeback control
+ *
+ * Unpin the writeback resources pinned by netfs_dirty_folio().  This is
+ * intended to be called as/by the netfs's ->write_inode() method.
+ */
+int netfs_unpin_writeback(struct inode *inode, struct writeback_control *wbc)
+{
+	struct fscache_cookie *cookie = netfs_i_cookie(netfs_inode(inode));
+
+	if (wbc->unpinned_netfs_wb)
+		fscache_unuse_cookie(cookie, NULL, NULL);
+	return 0;
+}
+EXPORT_SYMBOL(netfs_unpin_writeback);
+
+/**
+ * netfs_clear_inode_writeback - Clear writeback resources pinned by an inode
+ * @inode: The inode to clean up
+ * @aux: Auxiliary data to apply to the inode
+ *
+ * Clear any writeback resources held by an inode when the inode is evicted.
+ * This must be called before clear_inode() is called.
+ */
+void netfs_clear_inode_writeback(struct inode *inode, const void *aux)
+{
+	struct fscache_cookie *cookie = netfs_i_cookie(netfs_inode(inode));
+
+	if (inode->i_state & I_PINNING_NETFS_WB) {
+		loff_t i_size = i_size_read(inode);
+		fscache_unuse_cookie(cookie, aux, &i_size);
+	}
+}
+EXPORT_SYMBOL(netfs_clear_inode_writeback);
+
+/**
+ * netfs_invalidate_folio - Invalidate or partially invalidate a folio
+ * @folio: Folio proposed for release
+ * @offset: Offset of the invalidated region
+ * @length: Length of the invalidated region
+ *
+ * Invalidate part or all of a folio for a network filesystem.  The folio will
+ * be removed afterwards if the invalidated region covers the entire folio.
+ */
+void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
+{
+	struct netfs_folio *finfo = NULL;
+	size_t flen = folio_size(folio);
+
+	_enter("{%lx},%zx,%zx", folio->index, offset, length);
+
+	folio_wait_fscache(folio);
+
+	if (!folio_test_private(folio))
+		return;
+
+	finfo = netfs_folio_info(folio);
+
+	if (offset == 0 && length >= flen)
+		goto erase_completely;
+
+	if (finfo) {
+		/* We have a partially uptodate page from a streaming write. */
+		unsigned int fstart = finfo->dirty_offset;
+		unsigned int fend = fstart + finfo->dirty_len;
+		unsigned int end = offset + length;
+
+		if (offset >= fend)
+			return;
+		if (end <= fstart)
+			return;
+		if (offset <= fstart && end >= fend)
+			goto erase_completely;
+		if (offset <= fstart && end > fstart)
+			goto reduce_len;
+		if (offset > fstart && end >= fend)
+			goto move_start;
+		/* A partial write was split.  The caller has already zeroed
+		 * it, so just absorb the hole.
+		 */
+	}
+	return;
+
+erase_completely:
+	netfs_put_group(netfs_folio_group(folio));
+	folio_detach_private(folio);
+	folio_clear_uptodate(folio);
+	kfree(finfo);
+	return;
+reduce_len:
+	finfo->dirty_len = offset + length - finfo->dirty_offset;
+	return;
+move_start:
+	finfo->dirty_len -= offset - finfo->dirty_offset;
+	finfo->dirty_offset = offset;
+}
+EXPORT_SYMBOL(netfs_invalidate_folio);
+
+/**
+ * netfs_release_folio - Try to release a folio
+ * @folio: Folio proposed for release
+ * @gfp: Flags qualifying the release
+ *
+ * Request release of a folio and clean up its private state if it's not busy.
+ * Returns true if the folio can now be released, false if not
+ */
+bool netfs_release_folio(struct folio *folio, gfp_t gfp)
+{
+	struct netfs_inode *ctx = netfs_inode(folio_inode(folio));
+	unsigned long long end;
+
+	end = folio_pos(folio) + folio_size(folio);
+	if (end > ctx->zero_point)
+		ctx->zero_point = end;
+
+	if (folio_test_private(folio))
+		return false;
+	if (folio_test_fscache(folio)) {
+		if (current_is_kswapd() || !(gfp & __GFP_FS))
+			return false;
+		folio_wait_fscache(folio);
+	}
+
+	fscache_note_page_release(netfs_i_cookie(ctx));
+	return true;
+}
+EXPORT_SYMBOL(netfs_release_folio);
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index e17cdf53f6a7..610ceb5bd86c 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -20,14 +20,20 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
 	struct inode *inode = file ? file_inode(file) : mapping->host;
 	struct netfs_inode *ctx = netfs_inode(inode);
 	struct netfs_io_request *rreq;
+	bool is_unbuffered = (origin == NETFS_UNBUFFERED_WRITE ||
+			      origin == NETFS_DIO_READ ||
+			      origin == NETFS_DIO_WRITE);
+	bool cached = !is_unbuffered && netfs_is_cache_enabled(ctx);
 	int ret;
 
-	rreq = kzalloc(sizeof(struct netfs_io_request), GFP_KERNEL);
+	rreq = kzalloc(ctx->ops->io_request_size ?: sizeof(struct netfs_io_request),
+		       GFP_KERNEL);
 	if (!rreq)
 		return ERR_PTR(-ENOMEM);
 
 	rreq->start	= start;
 	rreq->len	= len;
+	rreq->upper_len	= len;
 	rreq->origin	= origin;
 	rreq->netfs_ops	= ctx->ops;
 	rreq->mapping	= mapping;
@@ -35,8 +41,14 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
 	rreq->i_size	= i_size_read(inode);
 	rreq->debug_id	= atomic_inc_return(&debug_ids);
 	INIT_LIST_HEAD(&rreq->subrequests);
+	INIT_WORK(&rreq->work, NULL);
 	refcount_set(&rreq->ref, 1);
+
 	__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
+	if (cached)
+		__set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags);
+	if (file && file->f_flags & O_NONBLOCK)
+		__set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags);
 	if (rreq->netfs_ops->init_request) {
 		ret = rreq->netfs_ops->init_request(rreq, file);
 		if (ret < 0) {
@@ -45,6 +57,8 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
 		}
 	}
 
+	trace_netfs_rreq_ref(rreq->debug_id, 1, netfs_rreq_trace_new);
+	netfs_proc_add_rreq(rreq);
 	netfs_stat(&netfs_n_rh_rreq);
 	return rreq;
 }
@@ -74,33 +88,47 @@ static void netfs_free_request(struct work_struct *work)
 {
 	struct netfs_io_request *rreq =
 		container_of(work, struct netfs_io_request, work);
+	unsigned int i;
 
 	trace_netfs_rreq(rreq, netfs_rreq_trace_free);
+	netfs_proc_del_rreq(rreq);
 	netfs_clear_subrequests(rreq, false);
 	if (rreq->netfs_ops->free_request)
 		rreq->netfs_ops->free_request(rreq);
 	if (rreq->cache_resources.ops)
 		rreq->cache_resources.ops->end_operation(&rreq->cache_resources);
-	kfree(rreq);
+	if (rreq->direct_bv) {
+		for (i = 0; i < rreq->direct_bv_count; i++) {
+			if (rreq->direct_bv[i].bv_page) {
+				if (rreq->direct_bv_unpin)
+					unpin_user_page(rreq->direct_bv[i].bv_page);
+			}
+		}
+		kvfree(rreq->direct_bv);
+	}
+	kfree_rcu(rreq, rcu);
 	netfs_stat_d(&netfs_n_rh_rreq);
 }
 
 void netfs_put_request(struct netfs_io_request *rreq, bool was_async,
 		       enum netfs_rreq_ref_trace what)
 {
-	unsigned int debug_id = rreq->debug_id;
+	unsigned int debug_id;
 	bool dead;
 	int r;
 
-	dead = __refcount_dec_and_test(&rreq->ref, &r);
-	trace_netfs_rreq_ref(debug_id, r - 1, what);
-	if (dead) {
-		if (was_async) {
-			rreq->work.func = netfs_free_request;
-			if (!queue_work(system_unbound_wq, &rreq->work))
-				BUG();
-		} else {
-			netfs_free_request(&rreq->work);
+	if (rreq) {
+		debug_id = rreq->debug_id;
+		dead = __refcount_dec_and_test(&rreq->ref, &r);
+		trace_netfs_rreq_ref(debug_id, r - 1, what);
+		if (dead) {
+			if (was_async) {
+				rreq->work.func = netfs_free_request;
+				if (!queue_work(system_unbound_wq, &rreq->work))
+					BUG();
+			} else {
+				netfs_free_request(&rreq->work);
+			}
 		}
 	}
 }
@@ -112,8 +140,11 @@ struct netfs_io_subrequest *netfs_alloc_subrequest(struct netfs_io_request *rreq
 {
 	struct netfs_io_subrequest *subreq;
 
-	subreq = kzalloc(sizeof(struct netfs_io_subrequest), GFP_KERNEL);
+	subreq = kzalloc(rreq->netfs_ops->io_subrequest_size ?:
+			 sizeof(struct netfs_io_subrequest),
+			 GFP_KERNEL);
 	if (subreq) {
+		INIT_WORK(&subreq->work, NULL);
 		INIT_LIST_HEAD(&subreq->rreq_link);
 		refcount_set(&subreq->ref, 2);
 		subreq->rreq = rreq;
@@ -140,6 +171,8 @@ static void netfs_free_subrequest(struct netfs_io_subrequest *subreq,
 	struct netfs_io_request *rreq = subreq->rreq;
 
 	trace_netfs_sreq(subreq, netfs_sreq_trace_free);
+	if (rreq->netfs_ops->free_subrequest)
+		rreq->netfs_ops->free_subrequest(subreq);
 	kfree(subreq);
 	netfs_stat_d(&netfs_n_rh_sreq);
 	netfs_put_request(rreq, was_async, netfs_rreq_trace_put_subreq);
diff --git a/fs/netfs/output.c b/fs/netfs/output.c
new file mode 100644
index 000000000000..625eb68f3e5a
--- /dev/null
+++ b/fs/netfs/output.c
@@ -0,0 +1,478 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Network filesystem high-level write support.
+ *
+ * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/writeback.h>
+#include <linux/pagevec.h>
+#include "internal.h"
+
+/**
+ * netfs_create_write_request - Create a write operation.
+ * @wreq: The write request this is storing from.
+ * @dest: The destination type
+ * @start: Start of the region this write will modify
+ * @len: Length of the modification
+ * @worker: The worker function to handle the write(s)
+ *
+ * Allocate a write operation, set it up and add it to the list on a write
+ * request.
+ */
+struct netfs_io_subrequest *netfs_create_write_request(struct netfs_io_request *wreq,
+						       enum netfs_io_source dest,
+						       loff_t start, size_t len,
+						       work_func_t worker)
+{
+	struct netfs_io_subrequest *subreq;
+
+	subreq = netfs_alloc_subrequest(wreq);
+	if (subreq) {
+		INIT_WORK(&subreq->work, worker);
+		subreq->source	= dest;
+		subreq->start	= start;
+		subreq->len	= len;
+		subreq->debug_index = wreq->subreq_counter++;
+
+		switch (subreq->source) {
+		case NETFS_UPLOAD_TO_SERVER:
+			netfs_stat(&netfs_n_wh_upload);
+			break;
+		case NETFS_WRITE_TO_CACHE:
+			netfs_stat(&netfs_n_wh_write);
+			break;
+		default:
+			BUG();
+		}
+
+		subreq->io_iter = wreq->io_iter;
+		iov_iter_advance(&subreq->io_iter, subreq->start - wreq->start);
+		iov_iter_truncate(&subreq->io_iter, subreq->len);
+
+		trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
+				     refcount_read(&subreq->ref),
+				     netfs_sreq_trace_new);
+		atomic_inc(&wreq->nr_outstanding);
+		list_add_tail(&subreq->rreq_link, &wreq->subrequests);
+		trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
+	}
+
+	return subreq;
+}
+EXPORT_SYMBOL(netfs_create_write_request);
+
+/*
+ * Process a completed write request once all the component operations have
+ * been completed.
+ */
+static void netfs_write_terminated(struct netfs_io_request *wreq, bool was_async)
+{
+	struct netfs_io_subrequest *subreq;
+	struct netfs_inode *ctx = netfs_inode(wreq->inode);
+	size_t transferred = 0;
+
+	_enter("R=%x[]", wreq->debug_id);
+
+	trace_netfs_rreq(wreq, netfs_rreq_trace_write_done);
+
+	list_for_each_entry(subreq, &wreq->subrequests, rreq_link) {
+		if (subreq->error || subreq->transferred == 0)
+			break;
+		transferred += subreq->transferred;
+		if (subreq->transferred < subreq->len)
+			break;
+	}
+	wreq->transferred = transferred;
+
+	list_for_each_entry(subreq, &wreq->subrequests, rreq_link) {
+		if (!subreq->error)
+			continue;
+		switch (subreq->source) {
+		case NETFS_UPLOAD_TO_SERVER:
+			/* Depending on the type of failure, this may prevent
+			 * writeback completion unless we're in disconnected
+			 * mode.
+			 */
+			if (!wreq->error)
+				wreq->error = subreq->error;
+			break;
+
+		case NETFS_WRITE_TO_CACHE:
+			/* Failure doesn't prevent writeback completion unless
+			 * we're in disconnected mode.
+			 */
+			if (subreq->error != -ENOBUFS)
+				ctx->ops->invalidate_cache(wreq);
+			break;
+
+		default:
+			WARN_ON_ONCE(1);
+			if (!wreq->error)
+				wreq->error = -EIO;
+			return;
+		}
+	}
+
+	wreq->cleanup(wreq);
+
+	if (wreq->origin == NETFS_DIO_WRITE &&
+	    wreq->mapping->nrpages) {
+		pgoff_t first = wreq->start >> PAGE_SHIFT;
+		pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT;
+		invalidate_inode_pages2_range(wreq->mapping, first, last);
+	}
+
+	if (wreq->origin == NETFS_DIO_WRITE)
+		inode_dio_end(wreq->inode);
+
+	_debug("finished");
+	trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip);
+	clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags);
+	wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
+
+	if (wreq->iocb) {
+		wreq->iocb->ki_pos += transferred;
+		if (wreq->iocb->ki_complete)
+			wreq->iocb->ki_complete(
+				wreq->iocb, wreq->error ? wreq->error : transferred);
+	}
+
+	netfs_clear_subrequests(wreq, was_async);
+	netfs_put_request(wreq, was_async, netfs_rreq_trace_put_complete);
+}
+
+/*
+ * Deal with the completion of writing the data to the cache.
+ */
+void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
+				       bool was_async)
+{
+	struct netfs_io_subrequest *subreq = _op;
+	struct netfs_io_request *wreq = subreq->rreq;
+	unsigned int u;
+
+	_enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
+
+	switch (subreq->source) {
+	case NETFS_UPLOAD_TO_SERVER:
+		netfs_stat(&netfs_n_wh_upload_done);
+		break;
+	case NETFS_WRITE_TO_CACHE:
+		netfs_stat(&netfs_n_wh_write_done);
+		break;
+	case NETFS_INVALID_WRITE:
+		break;
+	default:
+		BUG();
+	}
+
+	if (IS_ERR_VALUE(transferred_or_error)) {
+		subreq->error = transferred_or_error;
+		trace_netfs_failure(wreq, subreq, transferred_or_error,
+				    netfs_fail_write);
+		goto failed;
+	}
+
+	if (WARN(transferred_or_error > subreq->len - subreq->transferred,
+		 "Subreq excess write: R%x[%x] %zd > %zu - %zu",
+		 wreq->debug_id, subreq->debug_index,
+		 transferred_or_error, subreq->len, subreq->transferred))
+		transferred_or_error = subreq->len - subreq->transferred;
+
+	subreq->error = 0;
+	subreq->transferred += transferred_or_error;
+
+	if (iov_iter_count(&subreq->io_iter) != subreq->len - subreq->transferred)
+		pr_warn("R=%08x[%u] ITER POST-MISMATCH %zx != %zx-%zx %x\n",
+			wreq->debug_id, subreq->debug_index,
+			iov_iter_count(&subreq->io_iter), subreq->len,
+			subreq->transferred, subreq->io_iter.iter_type);
+
+	if (subreq->transferred < subreq->len)
+		goto incomplete;
+
+	__clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
+out:
+	trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
+
+	/* If we decrement nr_outstanding to 0, the ref belongs to us. */
+	u = atomic_dec_return(&wreq->nr_outstanding);
+	if (u == 0)
+		netfs_write_terminated(wreq, was_async);
+	else if (u == 1)
+		wake_up_var(&wreq->nr_outstanding);
+
+	netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
+	return;
+
+incomplete:
+	if (transferred_or_error == 0) {
+		if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) {
+			subreq->error = -ENODATA;
+			goto failed;
+		}
+	} else {
+		__clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
+	}
+
+	__set_bit(NETFS_SREQ_SHORT_IO, &subreq->flags);
+	set_bit(NETFS_RREQ_INCOMPLETE_IO, &wreq->flags);
+	goto out;
+
+failed:
+	switch (subreq->source) {
+	case NETFS_WRITE_TO_CACHE:
+		netfs_stat(&netfs_n_wh_write_failed);
+		set_bit(NETFS_RREQ_INCOMPLETE_IO, &wreq->flags);
+		break;
+	case NETFS_UPLOAD_TO_SERVER:
+		netfs_stat(&netfs_n_wh_upload_failed);
+		set_bit(NETFS_RREQ_FAILED, &wreq->flags);
+		wreq->error = subreq->error;
+		break;
+	default:
+		break;
+	}
+	goto out;
+}
+EXPORT_SYMBOL(netfs_write_subrequest_terminated);
+
+static void netfs_write_to_cache_op(struct netfs_io_subrequest *subreq)
+{
+	struct netfs_io_request *wreq = subreq->rreq;
+	struct netfs_cache_resources *cres = &wreq->cache_resources;
+
+	trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
+
+	cres->ops->write(cres, subreq->start, &subreq->io_iter,
+			 netfs_write_subrequest_terminated, subreq);
+}
+
+static void netfs_write_to_cache_op_worker(struct work_struct *work)
+{
+	struct netfs_io_subrequest *subreq =
+		container_of(work, struct netfs_io_subrequest, work);
+
+	netfs_write_to_cache_op(subreq);
+}
+
+/**
+ * netfs_queue_write_request - Queue a write request for attention
+ * @subreq: The write request to be queued
+ *
+ * Queue the specified write request for processing by a worker thread.  We
+ * pass the caller's ref on the request to the worker thread.
+ */
+void netfs_queue_write_request(struct netfs_io_subrequest *subreq)
+{
+	if (!queue_work(system_unbound_wq, &subreq->work))
+		netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_wip);
+}
+EXPORT_SYMBOL(netfs_queue_write_request);
+
+/*
+ * Set up a op for writing to the cache.
+ */
+static void netfs_set_up_write_to_cache(struct netfs_io_request *wreq)
+{
+	struct netfs_cache_resources *cres = &wreq->cache_resources;
+	struct netfs_io_subrequest *subreq;
+	struct netfs_inode *ctx = netfs_inode(wreq->inode);
+	struct fscache_cookie *cookie = netfs_i_cookie(ctx);
+	loff_t start = wreq->start;
+	size_t len = wreq->len;
+	int ret;
+
+	if (!fscache_cookie_enabled(cookie)) {
+		clear_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags);
+		return;
+	}
+
+	_debug("write to cache");
+	ret = fscache_begin_write_operation(cres, cookie);
+	if (ret < 0)
+		return;
+
+	ret = cres->ops->prepare_write(cres, &start, &len, wreq->upper_len,
+				       i_size_read(wreq->inode), true);
+	if (ret < 0)
+		return;
+
+	subreq = netfs_create_write_request(wreq, NETFS_WRITE_TO_CACHE, start, len,
+					    netfs_write_to_cache_op_worker);
+	if (!subreq)
+		return;
+
+	netfs_write_to_cache_op(subreq);
+}
+
+/*
+ * Begin the process of writing out a chunk of data.
+ *
+ * We are given a write request that holds a series of dirty regions and
+ * (partially) covers a sequence of folios, all of which are present.  The
+ * pages must have been marked as writeback as appropriate.
+ *
+ * We need to perform the following steps:
+ *
+ * (1) If encrypting, create an output buffer and encrypt each block of the
+ *     data into it, otherwise the output buffer will point to the original
+ *     folios.
+ *
+ * (2) If the data is to be cached, set up a write op for the entire output
+ *     buffer to the cache, if the cache wants to accept it.
+ *
+ * (3) If the data is to be uploaded (ie. not merely cached):
+ *
+ *     (a) If the data is to be compressed, create a compression buffer and
+ *         compress the data into it.
+ *
+ *     (b) For each destination we want to upload to, set up write ops to write
+ *         to that destination.  We may need multiple writes if the data is not
+ *         contiguous or the span exceeds wsize for a server.
+ */
+int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait,
+		      enum netfs_write_trace what)
+{
+	struct netfs_inode *ctx = netfs_inode(wreq->inode);
+
+	_enter("R=%x %llx-%llx f=%lx",
+	       wreq->debug_id, wreq->start, wreq->start + wreq->len - 1,
+	       wreq->flags);
+
+	trace_netfs_write(wreq, what);
+	if (wreq->len == 0 || wreq->iter.count == 0) {
+		pr_err("Zero-sized write [R=%x]\n", wreq->debug_id);
+		return -EIO;
+	}
+
+	if (wreq->origin == NETFS_DIO_WRITE)
+		inode_dio_begin(wreq->inode);
+
+	wreq->io_iter = wreq->iter;
+
+	/* ->outstanding > 0 carries a ref */
+	netfs_get_request(wreq, netfs_rreq_trace_get_for_outstanding);
+	atomic_set(&wreq->nr_outstanding, 1);
+
+	/* Start the encryption/compression going.  We can do that in the
+	 * background whilst we generate a list of write ops that we want to
+	 * perform.
+	 */
+	// TODO: Encrypt or compress the region as appropriate
+
+	/* We need to write all of the region to the cache */
+	if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags))
+		netfs_set_up_write_to_cache(wreq);
+
+	/* However, we don't necessarily write all of the region to the server.
+	 * Caching of reads is being managed this way also.
+	 */
+	if (test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))
+		ctx->ops->create_write_requests(wreq, wreq->start, wreq->len);
+
+	if (atomic_dec_and_test(&wreq->nr_outstanding))
+		netfs_write_terminated(wreq, false);
+
+	if (!may_wait)
+		return -EIOCBQUEUED;
+
+	wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS,
+		    TASK_UNINTERRUPTIBLE);
+	return wreq->error;
+}
+
+/*
+ * Begin a write operation for writing through the pagecache.
+ */
+struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len)
+{
+	struct netfs_io_request *wreq;
+	struct file *file = iocb->ki_filp;
+
+	wreq = netfs_alloc_request(file->f_mapping, file, iocb->ki_pos, len,
+				   NETFS_WRITETHROUGH);
+	if (IS_ERR(wreq))
+		return wreq;
+
+	trace_netfs_write(wreq, netfs_write_trace_writethrough);
+
+	__set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
+	iov_iter_xarray(&wreq->iter, ITER_SOURCE, &wreq->mapping->i_pages, wreq->start, 0);
+	wreq->io_iter = wreq->iter;
+
+	/* ->outstanding > 0 carries a ref */
+	netfs_get_request(wreq, netfs_rreq_trace_get_for_outstanding);
+	atomic_set(&wreq->nr_outstanding, 1);
+	return wreq;
+}
+
+static void netfs_submit_writethrough(struct netfs_io_request *wreq, bool final)
+{
+	struct netfs_inode *ictx = netfs_inode(wreq->inode);
+	unsigned long long start;
+	size_t len;
+
+	if (!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))
+		return;
+
+	start = wreq->start + wreq->submitted;
+	len = wreq->iter.count - wreq->submitted;
+	if (!final) {
+		len /= wreq->wsize; /* Round to number of maximum packets */
+		len *= wreq->wsize;
+	}
+
+	ictx->ops->create_write_requests(wreq, start, len);
+	wreq->submitted += len;
+}
+
+/*
+ * Advance the state of the write operation used when writing through the
+ * pagecache.  Data has been copied into the pagecache that we need to append
+ * to the request.  If we've added more than wsize then we need to create a new
+ * subrequest.
+ */
+int netfs_advance_writethrough(struct netfs_io_request *wreq, size_t copied, bool to_page_end)
+{
+	_enter("ic=%zu sb=%zu ws=%u cp=%zu tp=%u",
+	       wreq->iter.count, wreq->submitted, wreq->wsize, copied, to_page_end);
+
+	wreq->iter.count += copied;
+	wreq->io_iter.count += copied;
+	if (to_page_end && wreq->io_iter.count - wreq->submitted >= wreq->wsize)
+		netfs_submit_writethrough(wreq, false);
+
+	return wreq->error;
+}
+
+/*
+ * End a write operation used when writing through the pagecache.
+ */
+int netfs_end_writethrough(struct netfs_io_request *wreq, struct kiocb *iocb)
+{
+	int ret = -EIOCBQUEUED;
+
+	_enter("ic=%zu sb=%zu ws=%u",
+	       wreq->iter.count, wreq->submitted, wreq->wsize);
+
+	if (wreq->submitted < wreq->io_iter.count)
+		netfs_submit_writethrough(wreq, true);
+
+	if (atomic_dec_and_test(&wreq->nr_outstanding))
+		netfs_write_terminated(wreq, false);
+
+	if (is_sync_kiocb(iocb)) {
+		wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS,
+			    TASK_UNINTERRUPTIBLE);
+		ret = wreq->error;
+	}
+
+	netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+	return ret;
+}
diff --git a/fs/netfs/stats.c b/fs/netfs/stats.c
index 5510a7a14a40..deeba9f9dcf5 100644
--- a/fs/netfs/stats.c
+++ b/fs/netfs/stats.c
@@ -9,6 +9,8 @@
 #include <linux/seq_file.h>
 #include "internal.h"
 
+atomic_t netfs_n_rh_dio_read;
+atomic_t netfs_n_rh_dio_write;
 atomic_t netfs_n_rh_readahead;
 atomic_t netfs_n_rh_readpage;
 atomic_t netfs_n_rh_rreq;
@@ -27,32 +29,48 @@ atomic_t netfs_n_rh_write_begin;
 atomic_t netfs_n_rh_write_done;
 atomic_t netfs_n_rh_write_failed;
 atomic_t netfs_n_rh_write_zskip;
+atomic_t netfs_n_wh_wstream_conflict;
+atomic_t netfs_n_wh_upload;
+atomic_t netfs_n_wh_upload_done;
+atomic_t netfs_n_wh_upload_failed;
+atomic_t netfs_n_wh_write;
+atomic_t netfs_n_wh_write_done;
+atomic_t netfs_n_wh_write_failed;
 
-void netfs_stats_show(struct seq_file *m)
+int netfs_stats_show(struct seq_file *m, void *v)
 {
-	seq_printf(m, "RdHelp : RA=%u RP=%u WB=%u WBZ=%u rr=%u sr=%u\n",
+	seq_printf(m, "Netfs  : DR=%u DW=%u RA=%u RP=%u WB=%u WBZ=%u\n",
+		   atomic_read(&netfs_n_rh_dio_read),
+		   atomic_read(&netfs_n_rh_dio_write),
 		   atomic_read(&netfs_n_rh_readahead),
 		   atomic_read(&netfs_n_rh_readpage),
 		   atomic_read(&netfs_n_rh_write_begin),
-		   atomic_read(&netfs_n_rh_write_zskip),
-		   atomic_read(&netfs_n_rh_rreq),
-		   atomic_read(&netfs_n_rh_sreq));
-	seq_printf(m, "RdHelp : ZR=%u sh=%u sk=%u\n",
+		   atomic_read(&netfs_n_rh_write_zskip));
+	seq_printf(m, "Netfs  : ZR=%u sh=%u sk=%u\n",
 		   atomic_read(&netfs_n_rh_zero),
 		   atomic_read(&netfs_n_rh_short_read),
 		   atomic_read(&netfs_n_rh_write_zskip));
-	seq_printf(m, "RdHelp : DL=%u ds=%u df=%u di=%u\n",
+	seq_printf(m, "Netfs  : DL=%u ds=%u df=%u di=%u\n",
 		   atomic_read(&netfs_n_rh_download),
 		   atomic_read(&netfs_n_rh_download_done),
 		   atomic_read(&netfs_n_rh_download_failed),
 		   atomic_read(&netfs_n_rh_download_instead));
-	seq_printf(m, "RdHelp : RD=%u rs=%u rf=%u\n",
+	seq_printf(m, "Netfs  : RD=%u rs=%u rf=%u\n",
 		   atomic_read(&netfs_n_rh_read),
 		   atomic_read(&netfs_n_rh_read_done),
 		   atomic_read(&netfs_n_rh_read_failed));
-	seq_printf(m, "RdHelp : WR=%u ws=%u wf=%u\n",
-		   atomic_read(&netfs_n_rh_write),
-		   atomic_read(&netfs_n_rh_write_done),
-		   atomic_read(&netfs_n_rh_write_failed));
+	seq_printf(m, "Netfs  : UL=%u us=%u uf=%u\n",
+		   atomic_read(&netfs_n_wh_upload),
+		   atomic_read(&netfs_n_wh_upload_done),
+		   atomic_read(&netfs_n_wh_upload_failed));
+	seq_printf(m, "Netfs  : WR=%u ws=%u wf=%u\n",
+		   atomic_read(&netfs_n_wh_write),
+		   atomic_read(&netfs_n_wh_write_done),
+		   atomic_read(&netfs_n_wh_write_failed));
+	seq_printf(m, "Netfs  : rr=%u sr=%u wsc=%u\n",
+		   atomic_read(&netfs_n_rh_rreq),
+		   atomic_read(&netfs_n_rh_sreq),
+		   atomic_read(&netfs_n_wh_wstream_conflict));
+	return fscache_stats_show(m);
 }
 EXPORT_SYMBOL(netfs_stats_show);
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 01ac733a6320..f7e32d76e34d 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -169,8 +169,8 @@ config ROOT_NFS
 
 config NFS_FSCACHE
 	bool "Provide NFS client caching support"
-	depends on NFS_FS=m && FSCACHE || NFS_FS=y && FSCACHE=y
-	select NETFS_SUPPORT
+	depends on NFS_FS=m && NETFS_SUPPORT || NFS_FS=y && NETFS_SUPPORT=y
+	select FSCACHE
 	help
 	  Say Y here if you want NFS data to be cached locally on disc through
 	  the general filesystem cache manager
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index b05717fe0d4e..2d1bfee225c3 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -274,12 +274,6 @@ static void nfs_netfs_free_request(struct netfs_io_request *rreq)
 	put_nfs_open_context(rreq->netfs_priv);
 }
 
-static inline int nfs_netfs_begin_cache_operation(struct netfs_io_request *rreq)
-{
-	return fscache_begin_read_operation(&rreq->cache_resources,
-					    netfs_i_cookie(netfs_inode(rreq->inode)));
-}
-
 static struct nfs_netfs_io_data *nfs_netfs_alloc(struct netfs_io_subrequest *sreq)
 {
 	struct nfs_netfs_io_data *netfs;
@@ -387,7 +381,6 @@ void nfs_netfs_read_completion(struct nfs_pgio_header *hdr)
 const struct netfs_request_ops nfs_netfs_ops = {
 	.init_request		= nfs_netfs_init_request,
 	.free_request		= nfs_netfs_free_request,
-	.begin_cache_operation	= nfs_netfs_begin_cache_operation,
 	.issue_read		= nfs_netfs_issue_read,
 	.clamp_length		= nfs_netfs_clamp_length
 };
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index 5407ab8c8783..e3cb4923316b 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -80,7 +80,7 @@ static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs)
 }
 static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi)
 {
-	netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops);
+	netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops, false);
 }
 extern void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr);
 extern void nfs_netfs_read_completion(struct nfs_pgio_header *hdr);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2fa54cfd4882..6dc6340e2852 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -7911,14 +7911,16 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
 {
 	struct file_lock *fl;
 	int status = false;
-	struct nfsd_file *nf = find_any_file(fp);
+	struct nfsd_file *nf;
 	struct inode *inode;
 	struct file_lock_context *flctx;
 
+	spin_lock(&fp->fi_lock);
+	nf = find_any_file_locked(fp);
 	if (!nf) {
 		/* Any valid lock stateid should have some sort of access */
 		WARN_ON_ONCE(1);
-		return status;
+		goto out;
 	}
 
 	inode = file_inode(nf->nf_file);
@@ -7934,7 +7936,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
 		}
 		spin_unlock(&flctx->flc_lock);
 	}
-	nfsd_file_put(nf);
+out:
+	spin_unlock(&fp->fi_lock);
 	return status;
 }
 
@@ -7944,10 +7947,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
  * @cstate: NFSv4 COMPOUND state
  * @u: RELEASE_LOCKOWNER arguments
  *
- * The lockowner's so_count is bumped when a lock record is added
- * or when copying a conflicting lock. The latter case is brief,
- * but can lead to fleeting false positives when looking for
- * locks-in-use.
+ * Check if theree are any locks still held and if not - free the lockowner
+ * and any lock state that is owned.
  *
  * Return values:
  *   %nfs_ok: lockowner released or not found
@@ -7983,10 +7984,13 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
 		spin_unlock(&clp->cl_lock);
 		return nfs_ok;
 	}
-	if (atomic_read(&lo->lo_owner.so_count) != 2) {
-		spin_unlock(&clp->cl_lock);
-		nfs4_put_stateowner(&lo->lo_owner);
-		return nfserr_locks_held;
+
+	list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
+		if (check_for_locks(stp->st_stid.sc_file, lo)) {
+			spin_unlock(&clp->cl_lock);
+			nfs4_put_stateowner(&lo->lo_owner);
+			return nfserr_locks_held;
+		}
 	}
 	unhash_lockowner_locked(lo);
 	while (!list_empty(&lo->lo_owner.so_stateids)) {
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 984ffdaeed6c..5764f91d283e 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -18,10 +18,11 @@
 
 struct ovl_lookup_data {
 	struct super_block *sb;
-	struct vfsmount *mnt;
+	const struct ovl_layer *layer;
 	struct qstr name;
 	bool is_dir;
 	bool opaque;
+	bool xwhiteouts;
 	bool stop;
 	bool last;
 	char *redirect;
@@ -201,17 +202,13 @@ struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
 	return real;
 }
 
-static bool ovl_is_opaquedir(struct ovl_fs *ofs, const struct path *path)
-{
-	return ovl_path_check_dir_xattr(ofs, path, OVL_XATTR_OPAQUE);
-}
-
 static struct dentry *ovl_lookup_positive_unlocked(struct ovl_lookup_data *d,
 						   const char *name,
 						   struct dentry *base, int len,
 						   bool drop_negative)
 {
-	struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->mnt), name, base, len);
+	struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->layer->mnt), name,
+						 base, len);
 
 	if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
 		if (drop_negative && ret->d_lockref.count == 1) {
@@ -232,10 +229,13 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
 			     size_t prelen, const char *post,
 			     struct dentry **ret, bool drop_negative)
 {
+	struct ovl_fs *ofs = OVL_FS(d->sb);
 	struct dentry *this;
 	struct path path;
 	int err;
 	bool last_element = !post[0];
+	bool is_upper = d->layer->idx == 0;
+	char val;
 
 	this = ovl_lookup_positive_unlocked(d, name, base, namelen, drop_negative);
 	if (IS_ERR(this)) {
@@ -253,8 +253,8 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
 	}
 
 	path.dentry = this;
-	path.mnt = d->mnt;
-	if (ovl_path_is_whiteout(OVL_FS(d->sb), &path)) {
+	path.mnt = d->layer->mnt;
+	if (ovl_path_is_whiteout(ofs, &path)) {
 		d->stop = d->opaque = true;
 		goto put_and_out;
 	}
@@ -272,7 +272,7 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
 			d->stop = true;
 			goto put_and_out;
 		}
-		err = ovl_check_metacopy_xattr(OVL_FS(d->sb), &path, NULL);
+		err = ovl_check_metacopy_xattr(ofs, &path, NULL);
 		if (err < 0)
 			goto out_err;
 
@@ -292,7 +292,12 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
 		if (d->last)
 			goto out;
 
-		if (ovl_is_opaquedir(OVL_FS(d->sb), &path)) {
+		/* overlay.opaque=x means xwhiteouts directory */
+		val = ovl_get_opaquedir_val(ofs, &path);
+		if (last_element && !is_upper && val == 'x') {
+			d->xwhiteouts = true;
+			ovl_layer_set_xwhiteouts(ofs, d->layer);
+		} else if (val == 'y') {
 			d->stop = true;
 			if (last_element)
 				d->opaque = true;
@@ -863,7 +868,8 @@ fail:
  * Returns next layer in stack starting from top.
  * Returns -1 if this is the last layer.
  */
-int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
+int ovl_path_next(int idx, struct dentry *dentry, struct path *path,
+		  const struct ovl_layer **layer)
 {
 	struct ovl_entry *oe = OVL_E(dentry);
 	struct ovl_path *lowerstack = ovl_lowerstack(oe);
@@ -871,13 +877,16 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
 	BUG_ON(idx < 0);
 	if (idx == 0) {
 		ovl_path_upper(dentry, path);
-		if (path->dentry)
+		if (path->dentry) {
+			*layer = &OVL_FS(dentry->d_sb)->layers[0];
 			return ovl_numlower(oe) ? 1 : -1;
+		}
 		idx++;
 	}
 	BUG_ON(idx > ovl_numlower(oe));
 	path->dentry = lowerstack[idx - 1].dentry;
-	path->mnt = lowerstack[idx - 1].layer->mnt;
+	*layer = lowerstack[idx - 1].layer;
+	path->mnt = (*layer)->mnt;
 
 	return (idx < ovl_numlower(oe)) ? idx + 1 : -1;
 }
@@ -1055,7 +1064,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 	old_cred = ovl_override_creds(dentry->d_sb);
 	upperdir = ovl_dentry_upper(dentry->d_parent);
 	if (upperdir) {
-		d.mnt = ovl_upper_mnt(ofs);
+		d.layer = &ofs->layers[0];
 		err = ovl_lookup_layer(upperdir, &d, &upperdentry, true);
 		if (err)
 			goto out;
@@ -1111,7 +1120,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		else if (d.is_dir || !ofs->numdatalayer)
 			d.last = lower.layer->idx == ovl_numlower(roe);
 
-		d.mnt = lower.layer->mnt;
+		d.layer = lower.layer;
 		err = ovl_lookup_layer(lower.dentry, &d, &this, false);
 		if (err)
 			goto out_put;
@@ -1278,6 +1287,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 
 	if (upperopaque)
 		ovl_dentry_set_opaque(dentry);
+	if (d.xwhiteouts)
+		ovl_dentry_set_xwhiteouts(dentry);
 
 	if (upperdentry)
 		ovl_dentry_set_upper_alias(dentry);
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 5ba11eb43767..ee949f3e7c77 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -50,7 +50,6 @@ enum ovl_xattr {
 	OVL_XATTR_METACOPY,
 	OVL_XATTR_PROTATTR,
 	OVL_XATTR_XWHITEOUT,
-	OVL_XATTR_XWHITEOUTS,
 };
 
 enum ovl_inode_flag {
@@ -70,6 +69,8 @@ enum ovl_entry_flag {
 	OVL_E_UPPER_ALIAS,
 	OVL_E_OPAQUE,
 	OVL_E_CONNECTED,
+	/* Lower stack may contain xwhiteout entries */
+	OVL_E_XWHITEOUTS,
 };
 
 enum {
@@ -477,6 +478,10 @@ bool ovl_dentry_test_flag(unsigned long flag, struct dentry *dentry);
 bool ovl_dentry_is_opaque(struct dentry *dentry);
 bool ovl_dentry_is_whiteout(struct dentry *dentry);
 void ovl_dentry_set_opaque(struct dentry *dentry);
+bool ovl_dentry_has_xwhiteouts(struct dentry *dentry);
+void ovl_dentry_set_xwhiteouts(struct dentry *dentry);
+void ovl_layer_set_xwhiteouts(struct ovl_fs *ofs,
+			      const struct ovl_layer *layer);
 bool ovl_dentry_has_upper_alias(struct dentry *dentry);
 void ovl_dentry_set_upper_alias(struct dentry *dentry);
 bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags);
@@ -494,11 +499,10 @@ struct file *ovl_path_open(const struct path *path, int flags);
 int ovl_copy_up_start(struct dentry *dentry, int flags);
 void ovl_copy_up_end(struct dentry *dentry);
 bool ovl_already_copied_up(struct dentry *dentry, int flags);
-bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path,
-			      enum ovl_xattr ox);
+char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path *path,
+			   enum ovl_xattr ox);
 bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path);
 bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path);
-bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const struct path *path);
 bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs,
 			 const struct path *upperpath);
 
@@ -573,7 +577,13 @@ static inline bool ovl_is_impuredir(struct super_block *sb,
 		.mnt = ovl_upper_mnt(ofs),
 	};
 
-	return ovl_path_check_dir_xattr(ofs, &upperpath, OVL_XATTR_IMPURE);
+	return ovl_get_dir_xattr_val(ofs, &upperpath, OVL_XATTR_IMPURE) == 'y';
+}
+
+static inline char ovl_get_opaquedir_val(struct ovl_fs *ofs,
+					 const struct path *path)
+{
+	return ovl_get_dir_xattr_val(ofs, path, OVL_XATTR_OPAQUE);
 }
 
 static inline bool ovl_redirect_follow(struct ovl_fs *ofs)
@@ -680,7 +690,8 @@ int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
 struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh);
 struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
 				struct dentry *origin, bool verify);
-int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
+int ovl_path_next(int idx, struct dentry *dentry, struct path *path,
+		  const struct ovl_layer **layer);
 int ovl_verify_lowerdata(struct dentry *dentry);
 struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 			  unsigned int flags);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 5fa9c58af65f..cb449ab310a7 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -40,6 +40,8 @@ struct ovl_layer {
 	int idx;
 	/* One fsid per unique underlying sb (upper fsid == 0) */
 	int fsid;
+	/* xwhiteouts were found on this layer */
+	bool has_xwhiteouts;
 };
 
 struct ovl_path {
@@ -59,7 +61,7 @@ struct ovl_fs {
 	unsigned int numfs;
 	/* Number of data-only lower layers */
 	unsigned int numdatalayer;
-	const struct ovl_layer *layers;
+	struct ovl_layer *layers;
 	struct ovl_sb *fs;
 	/* workbasedir is the path at workdir= mount option */
 	struct dentry *workbasedir;
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index e71156baa7bc..0ca8af060b0c 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -305,8 +305,6 @@ static inline int ovl_dir_read(const struct path *realpath,
 	if (IS_ERR(realfile))
 		return PTR_ERR(realfile);
 
-	rdd->in_xwhiteouts_dir = rdd->dentry &&
-		ovl_path_check_xwhiteouts_xattr(OVL_FS(rdd->dentry->d_sb), realpath);
 	rdd->first_maybe_whiteout = NULL;
 	rdd->ctx.pos = 0;
 	do {
@@ -359,10 +357,13 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
 		.is_lowest = false,
 	};
 	int idx, next;
+	const struct ovl_layer *layer;
 
 	for (idx = 0; idx != -1; idx = next) {
-		next = ovl_path_next(idx, dentry, &realpath);
+		next = ovl_path_next(idx, dentry, &realpath, &layer);
 		rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
+		rdd.in_xwhiteouts_dir = layer->has_xwhiteouts &&
+					ovl_dentry_has_xwhiteouts(dentry);
 
 		if (next != -1) {
 			err = ovl_dir_read(&realpath, &rdd);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 4ab66e3d4cff..2eef6c70b2ae 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1249,6 +1249,7 @@ static struct dentry *ovl_get_root(struct super_block *sb,
 				   struct ovl_entry *oe)
 {
 	struct dentry *root;
+	struct ovl_fs *ofs = OVL_FS(sb);
 	struct ovl_path *lowerpath = ovl_lowerstack(oe);
 	unsigned long ino = d_inode(lowerpath->dentry)->i_ino;
 	int fsid = lowerpath->layer->fsid;
@@ -1270,6 +1271,20 @@ static struct dentry *ovl_get_root(struct super_block *sb,
 			ovl_set_flag(OVL_IMPURE, d_inode(root));
 	}
 
+	/* Look for xwhiteouts marker except in the lowermost layer */
+	for (int i = 0; i < ovl_numlower(oe) - 1; i++, lowerpath++) {
+		struct path path = {
+			.mnt = lowerpath->layer->mnt,
+			.dentry = lowerpath->dentry,
+		};
+
+		/* overlay.opaque=x means xwhiteouts directory */
+		if (ovl_get_opaquedir_val(ofs, &path) == 'x') {
+			ovl_layer_set_xwhiteouts(ofs, lowerpath->layer);
+			ovl_dentry_set_xwhiteouts(root);
+		}
+	}
+
 	/* Root is always merge -> can have whiteouts */
 	ovl_set_flag(OVL_WHITEOUTS, d_inode(root));
 	ovl_dentry_set_flag(OVL_E_CONNECTED, root);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 0217094c23ea..a8e17f14d7a2 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -461,6 +461,33 @@ void ovl_dentry_set_opaque(struct dentry *dentry)
 	ovl_dentry_set_flag(OVL_E_OPAQUE, dentry);
 }
 
+bool ovl_dentry_has_xwhiteouts(struct dentry *dentry)
+{
+	return ovl_dentry_test_flag(OVL_E_XWHITEOUTS, dentry);
+}
+
+void ovl_dentry_set_xwhiteouts(struct dentry *dentry)
+{
+	ovl_dentry_set_flag(OVL_E_XWHITEOUTS, dentry);
+}
+
+/*
+ * ovl_layer_set_xwhiteouts() is called before adding the overlay dir
+ * dentry to dcache, while readdir of that same directory happens after
+ * the overlay dir dentry is in dcache, so if some cpu observes that
+ * ovl_dentry_is_xwhiteouts(), it will also observe layer->has_xwhiteouts
+ * for the layers where xwhiteouts marker was found in that merge dir.
+ */
+void ovl_layer_set_xwhiteouts(struct ovl_fs *ofs,
+			      const struct ovl_layer *layer)
+{
+	if (layer->has_xwhiteouts)
+		return;
+
+	/* Write once to read-mostly layer properties */
+	ofs->layers[layer->idx].has_xwhiteouts = true;
+}
+
 /*
  * For hard links and decoded file handles, it's possible for ovl_dentry_upper()
  * to return positive, while there's no actual upper alias for the inode.
@@ -739,19 +766,6 @@ bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path)
 	return res >= 0;
 }
 
-bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const struct path *path)
-{
-	struct dentry *dentry = path->dentry;
-	int res;
-
-	/* xattr.whiteouts must be a directory */
-	if (!d_is_dir(dentry))
-		return false;
-
-	res = ovl_path_getxattr(ofs, path, OVL_XATTR_XWHITEOUTS, NULL, 0);
-	return res >= 0;
-}
-
 /*
  * Load persistent uuid from xattr into s_uuid if found, or store a new
  * random generated value in s_uuid and in xattr.
@@ -811,20 +825,17 @@ fail:
 	return false;
 }
 
-bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path,
-			       enum ovl_xattr ox)
+char ovl_get_dir_xattr_val(struct ovl_fs *ofs, const struct path *path,
+			   enum ovl_xattr ox)
 {
 	int res;
 	char val;
 
 	if (!d_is_dir(path->dentry))
-		return false;
+		return 0;
 
 	res = ovl_path_getxattr(ofs, path, ox, &val, 1);
-	if (res == 1 && val == 'y')
-		return true;
-
-	return false;
+	return res == 1 ? val : 0;
 }
 
 #define OVL_XATTR_OPAQUE_POSTFIX	"opaque"
@@ -837,7 +848,6 @@ bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path,
 #define OVL_XATTR_METACOPY_POSTFIX	"metacopy"
 #define OVL_XATTR_PROTATTR_POSTFIX	"protattr"
 #define OVL_XATTR_XWHITEOUT_POSTFIX	"whiteout"
-#define OVL_XATTR_XWHITEOUTS_POSTFIX	"whiteouts"
 
 #define OVL_XATTR_TAB_ENTRY(x) \
 	[x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \
@@ -854,7 +864,6 @@ const char *const ovl_xattr_table[][2] = {
 	OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY),
 	OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR),
 	OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUT),
-	OVL_XATTR_TAB_ENTRY(OVL_XATTR_XWHITEOUTS),
 };
 
 int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry,
diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index d64a306a414b..971892620504 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -151,7 +151,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 		return -EOPNOTSUPP;
 
 	ses = tcon->ses;
-	server = ses->server;
+	server = cifs_pick_channel(ses);
 	cfids = tcon->cfids;
 
 	if (!server->ops->new_lease_key)
diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c
index 60027f5aebe8..3e4209f41c18 100644
--- a/fs/smb/client/cifs_debug.c
+++ b/fs/smb/client/cifs_debug.c
@@ -659,6 +659,7 @@ static ssize_t cifs_stats_proc_write(struct file *file,
 					spin_lock(&tcon->stat_lock);
 					tcon->bytes_read = 0;
 					tcon->bytes_written = 0;
+					tcon->stats_from_time = ktime_get_real_seconds();
 					spin_unlock(&tcon->stat_lock);
 					if (server->ops->clear_stats)
 						server->ops->clear_stats(tcon);
@@ -737,8 +738,9 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v)
 				seq_printf(m, "\n%d) %s", i, tcon->tree_name);
 				if (tcon->need_reconnect)
 					seq_puts(m, "\tDISCONNECTED ");
-				seq_printf(m, "\nSMBs: %d",
-					   atomic_read(&tcon->num_smbs_sent));
+				seq_printf(m, "\nSMBs: %d since %ptTs UTC",
+					   atomic_read(&tcon->num_smbs_sent),
+					   &tcon->stats_from_time);
 				if (server->ops->print_stats)
 					server->ops->print_stats(m, tcon);
 			}
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index 99b0ade833aa..e902de4e475a 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -430,7 +430,7 @@ static void
 cifs_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages_final(&inode->i_data);
-	if (inode->i_state & I_PINNING_FSCACHE_WB)
+	if (inode->i_state & I_PINNING_NETFS_WB)
 		cifs_fscache_unuse_inode_cookie(inode, true);
 	cifs_fscache_release_inode_cookie(inode);
 	clear_inode(inode);
@@ -681,6 +681,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
 		seq_printf(s, ",rasize=%u", cifs_sb->ctx->rasize);
 	if (tcon->ses->server->min_offload)
 		seq_printf(s, ",esize=%u", tcon->ses->server->min_offload);
+	if (tcon->ses->server->retrans)
+		seq_printf(s, ",retrans=%u", tcon->ses->server->retrans);
 	seq_printf(s, ",echo_interval=%lu",
 			tcon->ses->server->echo_interval / HZ);
 
@@ -793,8 +795,7 @@ static int cifs_show_stats(struct seq_file *s, struct dentry *root)
 
 static int cifs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
-	fscache_unpin_writeback(wbc, cifs_inode_cookie(inode));
-	return 0;
+	return netfs_unpin_writeback(inode, wbc);
 }
 
 static int cifs_drop_inode(struct inode *inode)
@@ -1222,7 +1223,7 @@ static int cifs_precopy_set_eof(struct inode *src_inode, struct cifsInodeInfo *s
 	if (rc < 0)
 		goto set_failed;
 
-	netfs_resize_file(&src_cifsi->netfs, src_end);
+	netfs_resize_file(&src_cifsi->netfs, src_end, true);
 	fscache_resize_cookie(cifs_inode_cookie(src_inode), src_end);
 	return 0;
 
@@ -1353,7 +1354,7 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
 			smb_file_src, smb_file_target, off, len, destoff);
 		if (rc == 0 && new_size > i_size_read(target_inode)) {
 			truncate_setsize(target_inode, new_size);
-			netfs_resize_file(&target_cifsi->netfs, new_size);
+			netfs_resize_file(&target_cifsi->netfs, new_size, true);
 			fscache_resize_cookie(cifs_inode_cookie(target_inode),
 					      new_size);
 		}
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 879d5ef8a66e..20036fb16cec 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -204,6 +204,8 @@ struct cifs_open_info_data {
 		};
 	} reparse;
 	char *symlink_target;
+	struct cifs_sid posix_owner;
+	struct cifs_sid posix_group;
 	union {
 		struct smb2_file_all_info fi;
 		struct smb311_posix_qinfo posix_fi;
@@ -751,6 +753,7 @@ struct TCP_Server_Info {
 	unsigned int	max_read;
 	unsigned int	max_write;
 	unsigned int	min_offload;
+	unsigned int	retrans;
 	__le16	compress_algorithm;
 	__u16	signing_algorithm;
 	__le16	cipher_type;
@@ -1207,6 +1210,7 @@ struct cifs_tcon {
 	__u64    bytes_read;
 	__u64    bytes_written;
 	spinlock_t stat_lock;  /* protects the two fields above */
+	time64_t stats_from_time;
 	FILE_SYSTEM_DEVICE_INFO fsDevInfo;
 	FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */
 	FILE_SYSTEM_UNIX_INFO fsUnixInfo;
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 3052a208c6ca..bfd568f89710 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -1574,6 +1574,9 @@ static int match_server(struct TCP_Server_Info *server,
 	if (server->min_offload != ctx->min_offload)
 		return 0;
 
+	if (server->retrans != ctx->retrans)
+		return 0;
+
 	return 1;
 }
 
@@ -1798,6 +1801,7 @@ smbd_connected:
 		goto out_err_crypto_release;
 	}
 	tcp_ses->min_offload = ctx->min_offload;
+	tcp_ses->retrans = ctx->retrans;
 	/*
 	 * at this point we are the only ones with the pointer
 	 * to the struct since the kernel thread not created yet
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 1b4262aff8fa..90da81d0372a 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -87,7 +87,7 @@ void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len
 			continue;
 		if (!folio_test_writeback(folio)) {
 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
-				  len, start, folio_index(folio), end);
+				  len, start, folio->index, end);
 			continue;
 		}
 
@@ -120,7 +120,7 @@ void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len
 			continue;
 		if (!folio_test_writeback(folio)) {
 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
-				  len, start, folio_index(folio), end);
+				  len, start, folio->index, end);
 			continue;
 		}
 
@@ -151,7 +151,7 @@ void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int le
 	xas_for_each(&xas, folio, end) {
 		if (!folio_test_writeback(folio)) {
 			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
-				  len, start, folio_index(folio), end);
+				  len, start, folio->index, end);
 			continue;
 		}
 
@@ -2651,7 +2651,7 @@ static void cifs_extend_writeback(struct address_space *mapping,
 				continue;
 			if (xa_is_value(folio))
 				break;
-			if (folio_index(folio) != index)
+			if (folio->index != index)
 				break;
 			if (!folio_try_get_rcu(folio)) {
 				xas_reset(&xas);
@@ -2899,7 +2899,7 @@ redo_folio:
 					goto skip_write;
 			}
 
-			if (folio_mapping(folio) != mapping ||
+			if (folio->mapping != mapping ||
 			    !folio_test_dirty(folio)) {
 				start += folio_size(folio);
 				folio_unlock(folio);
@@ -5043,27 +5043,13 @@ static void cifs_swap_deactivate(struct file *file)
 	/* do we need to unpin (or unlock) the file */
 }
 
-/*
- * Mark a page as having been made dirty and thus needing writeback.  We also
- * need to pin the cache object to write back to.
- */
-#ifdef CONFIG_CIFS_FSCACHE
-static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
-{
-	return fscache_dirty_folio(mapping, folio,
-					cifs_inode_cookie(mapping->host));
-}
-#else
-#define cifs_dirty_folio filemap_dirty_folio
-#endif
-
 const struct address_space_operations cifs_addr_ops = {
 	.read_folio = cifs_read_folio,
 	.readahead = cifs_readahead,
 	.writepages = cifs_writepages,
 	.write_begin = cifs_write_begin,
 	.write_end = cifs_write_end,
-	.dirty_folio = cifs_dirty_folio,
+	.dirty_folio = netfs_dirty_folio,
 	.release_folio = cifs_release_folio,
 	.direct_IO = cifs_direct_io,
 	.invalidate_folio = cifs_invalidate_folio,
@@ -5087,7 +5073,7 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
 	.writepages = cifs_writepages,
 	.write_begin = cifs_write_begin,
 	.write_end = cifs_write_end,
-	.dirty_folio = cifs_dirty_folio,
+	.dirty_folio = netfs_dirty_folio,
 	.release_folio = cifs_release_folio,
 	.invalidate_folio = cifs_invalidate_folio,
 	.launder_folio = cifs_launder_folio,
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index a3493da12ad1..52cbef2eeb28 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -139,6 +139,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
 	fsparam_u32("dir_mode", Opt_dirmode),
 	fsparam_u32("port", Opt_port),
 	fsparam_u32("min_enc_offload", Opt_min_enc_offload),
+	fsparam_u32("retrans", Opt_retrans),
 	fsparam_u32("esize", Opt_min_enc_offload),
 	fsparam_u32("bsize", Opt_blocksize),
 	fsparam_u32("rasize", Opt_rasize),
@@ -1064,6 +1065,9 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
 	case Opt_min_enc_offload:
 		ctx->min_offload = result.uint_32;
 		break;
+	case Opt_retrans:
+		ctx->retrans = result.uint_32;
+		break;
 	case Opt_blocksize:
 		/*
 		 * inode blocksize realistically should never need to be
@@ -1619,6 +1623,8 @@ int smb3_init_fs_context(struct fs_context *fc)
 	ctx->backupuid_specified = false; /* no backup intent for a user */
 	ctx->backupgid_specified = false; /* no backup intent for a group */
 
+	ctx->retrans = 1;
+
 /*
  *	short int override_uid = -1;
  *	short int override_gid = -1;
diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h
index cf46916286d0..182ce11cbe93 100644
--- a/fs/smb/client/fs_context.h
+++ b/fs/smb/client/fs_context.h
@@ -118,6 +118,7 @@ enum cifs_param {
 	Opt_file_mode,
 	Opt_dirmode,
 	Opt_min_enc_offload,
+	Opt_retrans,
 	Opt_blocksize,
 	Opt_rasize,
 	Opt_rsize,
@@ -245,6 +246,7 @@ struct smb3_fs_context {
 	unsigned int rsize;
 	unsigned int wsize;
 	unsigned int min_offload;
+	unsigned int retrans;
 	bool sockopt_tcp_nodelay:1;
 	/* attribute cache timemout for files and directories in jiffies */
 	unsigned long acregmax;
diff --git a/fs/smb/client/fscache.c b/fs/smb/client/fscache.c
index e5cad149f5a2..c4a3cb736881 100644
--- a/fs/smb/client/fscache.c
+++ b/fs/smb/client/fscache.c
@@ -180,7 +180,7 @@ static int fscache_fallback_write_pages(struct inode *inode, loff_t start, size_
 	if (ret < 0)
 		return ret;
 
-	ret = cres.ops->prepare_write(&cres, &start, &len, i_size_read(inode),
+	ret = cres.ops->prepare_write(&cres, &start, &len, len, i_size_read(inode),
 				      no_space_allocated_yet);
 	if (ret == 0)
 		ret = fscache_write(&cres, start, &iter, NULL, NULL);
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index 9f37c1758f73..f0989484f2c6 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -665,8 +665,6 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path,
 /* Fill a cifs_fattr struct with info from POSIX info struct */
 static void smb311_posix_info_to_fattr(struct cifs_fattr *fattr,
 				       struct cifs_open_info_data *data,
-				       struct cifs_sid *owner,
-				       struct cifs_sid *group,
 				       struct super_block *sb)
 {
 	struct smb311_posix_qinfo *info = &data->posix_fi;
@@ -722,8 +720,8 @@ out_reparse:
 		fattr->cf_symlink_target = data->symlink_target;
 		data->symlink_target = NULL;
 	}
-	sid_to_id(cifs_sb, owner, fattr, SIDOWNER);
-	sid_to_id(cifs_sb, group, fattr, SIDGROUP);
+	sid_to_id(cifs_sb, &data->posix_owner, fattr, SIDOWNER);
+	sid_to_id(cifs_sb, &data->posix_group, fattr, SIDGROUP);
 
 	cifs_dbg(FYI, "POSIX query info: mode 0x%x uniqueid 0x%llx nlink %d\n",
 		fattr->cf_mode, fattr->cf_uniqueid, fattr->cf_nlink);
@@ -1070,9 +1068,7 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data,
 				 const unsigned int xid,
 				 struct cifs_tcon *tcon,
 				 const char *full_path,
-				 struct cifs_fattr *fattr,
-				 struct cifs_sid *owner,
-				 struct cifs_sid *group)
+				 struct cifs_fattr *fattr)
 {
 	struct TCP_Server_Info *server = tcon->ses->server;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
@@ -1117,7 +1113,7 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data,
 	}
 
 	if (tcon->posix_extensions)
-		smb311_posix_info_to_fattr(fattr, data, owner, group, sb);
+		smb311_posix_info_to_fattr(fattr, data, sb);
 	else
 		cifs_open_info_to_fattr(fattr, data, sb);
 out:
@@ -1171,8 +1167,7 @@ static int cifs_get_fattr(struct cifs_open_info_data *data,
 		 */
 		if (cifs_open_data_reparse(data)) {
 			rc = reparse_info_to_fattr(data, sb, xid, tcon,
-						   full_path, fattr,
-						   NULL, NULL);
+						   full_path, fattr);
 		} else {
 			cifs_open_info_to_fattr(fattr, data, sb);
 		}
@@ -1317,10 +1312,10 @@ static int smb311_posix_get_fattr(struct cifs_open_info_data *data,
 				  const unsigned int xid)
 {
 	struct cifs_open_info_data tmp_data = {};
+	struct TCP_Server_Info *server;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 	struct cifs_tcon *tcon;
 	struct tcon_link *tlink;
-	struct cifs_sid owner, group;
 	int tmprc;
 	int rc = 0;
 
@@ -1328,14 +1323,14 @@ static int smb311_posix_get_fattr(struct cifs_open_info_data *data,
 	if (IS_ERR(tlink))
 		return PTR_ERR(tlink);
 	tcon = tlink_tcon(tlink);
+	server = tcon->ses->server;
 
 	/*
 	 * 1. Fetch file metadata if not provided (data)
 	 */
 	if (!data) {
-		rc = smb311_posix_query_path_info(xid, tcon, cifs_sb,
-						  full_path, &tmp_data,
-						  &owner, &group);
+		rc = server->ops->query_path_info(xid, tcon, cifs_sb,
+						  full_path, &tmp_data);
 		data = &tmp_data;
 	}
 
@@ -1347,11 +1342,9 @@ static int smb311_posix_get_fattr(struct cifs_open_info_data *data,
 	case 0:
 		if (cifs_open_data_reparse(data)) {
 			rc = reparse_info_to_fattr(data, sb, xid, tcon,
-						   full_path, fattr,
-						   &owner, &group);
+						   full_path, fattr);
 		} else {
-			smb311_posix_info_to_fattr(fattr, data,
-						   &owner, &group, sb);
+			smb311_posix_info_to_fattr(fattr, data, sb);
 		}
 		break;
 	case -EREMOTE:
diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c
index c2137ea3c253..0748d7b757b9 100644
--- a/fs/smb/client/misc.c
+++ b/fs/smb/client/misc.c
@@ -140,6 +140,7 @@ tcon_info_alloc(bool dir_leases_enabled)
 	spin_lock_init(&ret_buf->stat_lock);
 	atomic_set(&ret_buf->num_local_opens, 0);
 	atomic_set(&ret_buf->num_remote_opens, 0);
+	ret_buf->stats_from_time = ktime_get_real_seconds();
 #ifdef CONFIG_CIFS_DFS_UPCALL
 	INIT_LIST_HEAD(&ret_buf->dfs_ses_list);
 #endif
diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c
index 056cae1ddcce..94255401b38d 100644
--- a/fs/smb/client/readdir.c
+++ b/fs/smb/client/readdir.c
@@ -133,14 +133,14 @@ retry:
 				 * Query dir responses don't provide enough
 				 * information about reparse points other than
 				 * their reparse tags.  Save an invalidation by
-				 * not clobbering the existing mode, size and
-				 * symlink target (if any) when reparse tag and
-				 * ctime haven't changed.
+				 * not clobbering some existing attributes when
+				 * reparse tag and ctime haven't changed.
 				 */
 				rc = 0;
 				if (fattr->cf_cifsattrs & ATTR_REPARSE) {
 					if (likely(reparse_inode_match(inode, fattr))) {
 						fattr->cf_mode = inode->i_mode;
+						fattr->cf_rdev = inode->i_rdev;
 						fattr->cf_eof = CIFS_I(inode)->server_eof;
 						fattr->cf_symlink_target = NULL;
 					} else {
@@ -645,10 +645,10 @@ static int cifs_entry_is_dot(struct cifs_dirent *de, bool is_unicode)
 static int is_dir_changed(struct file *file)
 {
 	struct inode *inode = file_inode(file);
-	struct cifsInodeInfo *cifsInfo = CIFS_I(inode);
+	struct cifsInodeInfo *cifs_inode_info = CIFS_I(inode);
 
-	if (cifsInfo->time == 0)
-		return 1; /* directory was changed, perhaps due to unlink */
+	if (cifs_inode_info->time == 0)
+		return 1; /* directory was changed, e.g. unlink or new file */
 	else
 		return 0;
 
diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c
index 5053a5550abe..a652200540c8 100644
--- a/fs/smb/client/smb2inode.c
+++ b/fs/smb/client/smb2inode.c
@@ -56,6 +56,35 @@ static inline __u32 file_create_options(struct dentry *dentry)
 	return 0;
 }
 
+/* Parse owner and group from SMB3.1.1 POSIX query info */
+static int parse_posix_sids(struct cifs_open_info_data *data,
+			    struct kvec *rsp_iov)
+{
+	struct smb2_query_info_rsp *qi = rsp_iov->iov_base;
+	unsigned int out_len = le32_to_cpu(qi->OutputBufferLength);
+	unsigned int qi_len = sizeof(data->posix_fi);
+	int owner_len, group_len;
+	u8 *sidsbuf, *sidsbuf_end;
+
+	if (out_len <= qi_len)
+		return -EINVAL;
+
+	sidsbuf = (u8 *)qi + le16_to_cpu(qi->OutputBufferOffset) + qi_len;
+	sidsbuf_end = sidsbuf + out_len - qi_len;
+
+	owner_len = posix_info_sid_size(sidsbuf, sidsbuf_end);
+	if (owner_len == -1)
+		return -EINVAL;
+
+	memcpy(&data->posix_owner, sidsbuf, owner_len);
+	group_len = posix_info_sid_size(sidsbuf + owner_len, sidsbuf_end);
+	if (group_len == -1)
+		return -EINVAL;
+
+	memcpy(&data->posix_group, sidsbuf + owner_len, group_len);
+	return 0;
+}
+
 /*
  * note: If cfile is passed, the reference to it is dropped here.
  * So make sure that you do not reuse cfile after return from this func.
@@ -69,7 +98,6 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
 			    __u32 desired_access, __u32 create_disposition,
 			    __u32 create_options, umode_t mode, struct kvec *in_iov,
 			    int *cmds, int num_cmds, struct cifsFileInfo *cfile,
-			    __u8 **extbuf, size_t *extbuflen,
 			    struct kvec *out_iov, int *out_buftype)
 {
 
@@ -494,21 +522,9 @@ finished:
 					&rsp_iov[i + 1], sizeof(idata->posix_fi) /* add SIDs */,
 					(char *)&idata->posix_fi);
 			}
-			if (rc == 0) {
-				unsigned int length = le32_to_cpu(qi_rsp->OutputBufferLength);
-
-				if (length > sizeof(idata->posix_fi)) {
-					char *base = (char *)rsp_iov[i + 1].iov_base +
-						le16_to_cpu(qi_rsp->OutputBufferOffset) +
-						sizeof(idata->posix_fi);
-					*extbuflen = length - sizeof(idata->posix_fi);
-					*extbuf = kmemdup(base, *extbuflen, GFP_KERNEL);
-					if (!*extbuf)
-						rc = -ENOMEM;
-				} else {
-					rc = -EINVAL;
-				}
-			}
+			if (rc == 0)
+				rc = parse_posix_sids(idata, &rsp_iov[i + 1]);
+
 			SMB2_query_info_free(&rqst[num_rqst++]);
 			if (rc)
 				trace_smb3_posix_query_info_compound_err(xid,  ses->Suid,
@@ -662,7 +678,7 @@ int smb2_query_path_info(const unsigned int xid,
 	struct smb2_hdr *hdr;
 	struct kvec in_iov[2], out_iov[3] = {};
 	int out_buftype[3] = {};
-	int cmds[2] = { SMB2_OP_QUERY_INFO,  };
+	int cmds[2];
 	bool islink;
 	int i, num_cmds;
 	int rc, rc2;
@@ -670,20 +686,36 @@ int smb2_query_path_info(const unsigned int xid,
 	data->adjust_tz = false;
 	data->reparse_point = false;
 
-	if (strcmp(full_path, ""))
-		rc = -ENOENT;
-	else
-		rc = open_cached_dir(xid, tcon, full_path, cifs_sb, false, &cfid);
-	/* If it is a root and its handle is cached then use it */
-	if (!rc) {
-		if (cfid->file_all_info_is_valid) {
-			memcpy(&data->fi, &cfid->file_all_info, sizeof(data->fi));
+	/*
+	 * BB TODO: Add support for using cached root handle in SMB3.1.1 POSIX.
+	 * Create SMB2_query_posix_info worker function to do non-compounded
+	 * query when we already have an open file handle for this. For now this
+	 * is fast enough (always using the compounded version).
+	 */
+	if (!tcon->posix_extensions) {
+		if (*full_path) {
+			rc = -ENOENT;
 		} else {
-			rc = SMB2_query_info(xid, tcon, cfid->fid.persistent_fid,
-					     cfid->fid.volatile_fid, &data->fi);
+			rc = open_cached_dir(xid, tcon, full_path,
+					     cifs_sb, false, &cfid);
+		}
+		/* If it is a root and its handle is cached then use it */
+		if (!rc) {
+			if (cfid->file_all_info_is_valid) {
+				memcpy(&data->fi, &cfid->file_all_info,
+				       sizeof(data->fi));
+			} else {
+				rc = SMB2_query_info(xid, tcon,
+						     cfid->fid.persistent_fid,
+						     cfid->fid.volatile_fid,
+						     &data->fi);
+			}
+			close_cached_dir(cfid);
+			return rc;
 		}
-		close_cached_dir(cfid);
-		return rc;
+		cmds[0] = SMB2_OP_QUERY_INFO;
+	} else {
+		cmds[0] = SMB2_OP_POSIX_QUERY_INFO;
 	}
 
 	in_iov[0].iov_base = data;
@@ -693,9 +725,8 @@ int smb2_query_path_info(const unsigned int xid,
 	cifs_get_readable_path(tcon, full_path, &cfile);
 	rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
 			      FILE_READ_ATTRIBUTES, FILE_OPEN,
-			      create_options, ACL_NO_MODE,
-			      in_iov, cmds, 1, cfile,
-			      NULL, NULL, out_iov, out_buftype);
+			      create_options, ACL_NO_MODE, in_iov,
+			      cmds, 1, cfile, out_iov, out_buftype);
 	hdr = out_iov[0].iov_base;
 	/*
 	 * If first iov is unset, then SMB session was dropped or we've got a
@@ -707,6 +738,10 @@ int smb2_query_path_info(const unsigned int xid,
 	switch (rc) {
 	case 0:
 	case -EOPNOTSUPP:
+		/*
+		 * BB TODO: When support for special files added to Samba
+		 * re-verify this path.
+		 */
 		rc = parse_create_response(data, cifs_sb, &out_iov[0]);
 		if (rc || !data->reparse_point)
 			goto out;
@@ -722,8 +757,8 @@ int smb2_query_path_info(const unsigned int xid,
 		cifs_get_readable_path(tcon, full_path, &cfile);
 		rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
 				      FILE_READ_ATTRIBUTES, FILE_OPEN,
-				      create_options, ACL_NO_MODE, in_iov, cmds,
-				      num_cmds, cfile, NULL, NULL, NULL, NULL);
+				      create_options, ACL_NO_MODE, in_iov,
+				      cmds, num_cmds, cfile, NULL, NULL);
 		break;
 	case -EREMOTE:
 		break;
@@ -746,101 +781,6 @@ out:
 	return rc;
 }
 
-int smb311_posix_query_path_info(const unsigned int xid,
-				 struct cifs_tcon *tcon,
-				 struct cifs_sb_info *cifs_sb,
-				 const char *full_path,
-				 struct cifs_open_info_data *data,
-				 struct cifs_sid *owner,
-				 struct cifs_sid *group)
-{
-	int rc;
-	__u32 create_options = 0;
-	struct cifsFileInfo *cfile;
-	struct kvec in_iov[2], out_iov[3] = {};
-	int out_buftype[3] = {};
-	__u8 *sidsbuf = NULL;
-	__u8 *sidsbuf_end = NULL;
-	size_t sidsbuflen = 0;
-	size_t owner_len, group_len;
-	int cmds[2] = { SMB2_OP_POSIX_QUERY_INFO,  };
-	int i, num_cmds;
-
-	data->adjust_tz = false;
-	data->reparse_point = false;
-
-	/*
-	 * BB TODO: Add support for using the cached root handle.
-	 * Create SMB2_query_posix_info worker function to do non-compounded query
-	 * when we already have an open file handle for this. For now this is fast enough
-	 * (always using the compounded version).
-	 */
-	in_iov[0].iov_base = data;
-	in_iov[0].iov_len = sizeof(*data);
-	in_iov[1] = in_iov[0];
-
-	cifs_get_readable_path(tcon, full_path, &cfile);
-	rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
-			      FILE_READ_ATTRIBUTES, FILE_OPEN,
-			      create_options, ACL_NO_MODE, in_iov, cmds, 1,
-			      cfile, &sidsbuf, &sidsbuflen, out_iov, out_buftype);
-	/*
-	 * If first iov is unset, then SMB session was dropped or we've got a
-	 * cached open file (@cfile).
-	 */
-	if (!out_iov[0].iov_base || out_buftype[0] == CIFS_NO_BUFFER)
-		goto out;
-
-	switch (rc) {
-	case 0:
-	case -EOPNOTSUPP:
-		/* BB TODO: When support for special files added to Samba re-verify this path */
-		rc = parse_create_response(data, cifs_sb, &out_iov[0]);
-		if (rc || !data->reparse_point)
-			goto out;
-
-		if (data->reparse.tag == IO_REPARSE_TAG_SYMLINK) {
-			/* symlink already parsed in create response */
-			num_cmds = 1;
-		} else {
-			cmds[1] = SMB2_OP_GET_REPARSE;
-			num_cmds = 2;
-		}
-		create_options |= OPEN_REPARSE_POINT;
-		cifs_get_readable_path(tcon, full_path, &cfile);
-		rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
-				      FILE_READ_ATTRIBUTES, FILE_OPEN,
-				      create_options, ACL_NO_MODE, in_iov, cmds,
-				      num_cmds, cfile, &sidsbuf, &sidsbuflen, NULL, NULL);
-		break;
-	}
-
-out:
-	if (rc == 0) {
-		sidsbuf_end = sidsbuf + sidsbuflen;
-
-		owner_len = posix_info_sid_size(sidsbuf, sidsbuf_end);
-		if (owner_len == -1) {
-			rc = -EINVAL;
-			goto out;
-		}
-		memcpy(owner, sidsbuf, owner_len);
-
-		group_len = posix_info_sid_size(
-			sidsbuf + owner_len, sidsbuf_end);
-		if (group_len == -1) {
-			rc = -EINVAL;
-			goto out;
-		}
-		memcpy(group, sidsbuf + owner_len, group_len);
-	}
-
-	kfree(sidsbuf);
-	for (i = 0; i < ARRAY_SIZE(out_buftype); i++)
-		free_rsp_buf(out_buftype[i], out_iov[i].iov_base);
-	return rc;
-}
-
 int
 smb2_mkdir(const unsigned int xid, struct inode *parent_inode, umode_t mode,
 	   struct cifs_tcon *tcon, const char *name,
@@ -848,9 +788,9 @@ smb2_mkdir(const unsigned int xid, struct inode *parent_inode, umode_t mode,
 {
 	return smb2_compound_op(xid, tcon, cifs_sb, name,
 				FILE_WRITE_ATTRIBUTES, FILE_CREATE,
-				CREATE_NOT_FILE, mode, NULL,
-				&(int){SMB2_OP_MKDIR}, 1,
-				NULL, NULL, NULL, NULL, NULL);
+				CREATE_NOT_FILE, mode,
+				NULL, &(int){SMB2_OP_MKDIR}, 1,
+				NULL, NULL, NULL);
 }
 
 void
@@ -875,7 +815,7 @@ smb2_mkdir_setinfo(struct inode *inode, const char *name,
 				 FILE_WRITE_ATTRIBUTES, FILE_CREATE,
 				 CREATE_NOT_FILE, ACL_NO_MODE, &in_iov,
 				 &(int){SMB2_OP_SET_INFO}, 1,
-				 cfile, NULL, NULL, NULL, NULL);
+				 cfile, NULL, NULL);
 	if (tmprc == 0)
 		cifs_i->cifsAttrs = dosattrs;
 }
@@ -887,8 +827,9 @@ smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
 	drop_cached_dir_by_name(xid, tcon, name, cifs_sb);
 	return smb2_compound_op(xid, tcon, cifs_sb, name,
 				DELETE, FILE_OPEN, CREATE_NOT_FILE,
-				ACL_NO_MODE, NULL, &(int){SMB2_OP_RMDIR}, 1,
-				NULL, NULL, NULL, NULL, NULL);
+				ACL_NO_MODE, NULL,
+				&(int){SMB2_OP_RMDIR}, 1,
+				NULL, NULL, NULL);
 }
 
 int
@@ -897,8 +838,9 @@ smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
 {
 	return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
 				CREATE_DELETE_ON_CLOSE | OPEN_REPARSE_POINT,
-				ACL_NO_MODE, NULL, &(int){SMB2_OP_DELETE}, 1,
-				NULL, NULL, NULL, NULL, NULL);
+				ACL_NO_MODE, NULL,
+				&(int){SMB2_OP_DELETE}, 1,
+				NULL, NULL, NULL);
 }
 
 static int smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon,
@@ -919,8 +861,8 @@ static int smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon,
 	in_iov.iov_base = smb2_to_name;
 	in_iov.iov_len = 2 * UniStrnlen((wchar_t *)smb2_to_name, PATH_MAX);
 	rc = smb2_compound_op(xid, tcon, cifs_sb, from_name, access,
-			      FILE_OPEN, create_options, ACL_NO_MODE, &in_iov,
-			      &command, 1, cfile, NULL, NULL, NULL, NULL);
+			      FILE_OPEN, create_options, ACL_NO_MODE,
+			      &in_iov, &command, 1, cfile, NULL, NULL);
 smb2_rename_path:
 	kfree(smb2_to_name);
 	return rc;
@@ -971,7 +913,7 @@ smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon,
 				FILE_WRITE_DATA, FILE_OPEN,
 				0, ACL_NO_MODE, &in_iov,
 				&(int){SMB2_OP_SET_EOF}, 1,
-				cfile, NULL, NULL, NULL, NULL);
+				cfile, NULL, NULL);
 }
 
 int
@@ -999,8 +941,8 @@ smb2_set_file_info(struct inode *inode, const char *full_path,
 	rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
 			      FILE_WRITE_ATTRIBUTES, FILE_OPEN,
 			      0, ACL_NO_MODE, &in_iov,
-			      &(int){SMB2_OP_SET_INFO}, 1, cfile,
-			      NULL, NULL, NULL, NULL);
+			      &(int){SMB2_OP_SET_INFO}, 1,
+			      cfile, NULL, NULL);
 	cifs_put_tlink(tlink);
 	return rc;
 }
@@ -1035,7 +977,7 @@ struct inode *smb2_get_reparse_inode(struct cifs_open_info_data *data,
 		cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile);
 		rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
 				      da, cd, co, ACL_NO_MODE, in_iov,
-				      cmds, 2, cfile, NULL, NULL, NULL, NULL);
+				      cmds, 2, cfile, NULL, NULL);
 		if (!rc) {
 			rc = smb311_posix_get_inode_info(&new, full_path,
 							 data, sb, xid);
@@ -1045,7 +987,7 @@ struct inode *smb2_get_reparse_inode(struct cifs_open_info_data *data,
 		cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile);
 		rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
 				      da, cd, co, ACL_NO_MODE, in_iov,
-				      cmds, 2, cfile, NULL, NULL, NULL, NULL);
+				      cmds, 2, cfile, NULL, NULL);
 		if (!rc) {
 			rc = cifs_get_inode_info(&new, full_path,
 						 data, sb, xid, NULL);
@@ -1072,8 +1014,8 @@ int smb2_query_reparse_point(const unsigned int xid,
 	rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
 			      FILE_READ_ATTRIBUTES, FILE_OPEN,
 			      OPEN_REPARSE_POINT, ACL_NO_MODE, &in_iov,
-			      &(int){SMB2_OP_GET_REPARSE}, 1, cfile,
-			      NULL, NULL, NULL, NULL);
+			      &(int){SMB2_OP_GET_REPARSE}, 1,
+			      cfile, NULL, NULL);
 	if (rc)
 		goto out;
 
diff --git a/fs/smb/client/smb2maperror.c b/fs/smb/client/smb2maperror.c
index 1a90dd78b238..ac1895358908 100644
--- a/fs/smb/client/smb2maperror.c
+++ b/fs/smb/client/smb2maperror.c
@@ -1210,6 +1210,8 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
 	{STATUS_INVALID_TASK_INDEX, -EIO, "STATUS_INVALID_TASK_INDEX"},
 	{STATUS_THREAD_ALREADY_IN_TASK, -EIO, "STATUS_THREAD_ALREADY_IN_TASK"},
 	{STATUS_CALLBACK_BYPASS, -EIO, "STATUS_CALLBACK_BYPASS"},
+	{STATUS_SERVER_UNAVAILABLE, -EAGAIN, "STATUS_SERVER_UNAVAILABLE"},
+	{STATUS_FILE_NOT_AVAILABLE, -EAGAIN, "STATUS_FILE_NOT_AVAILABLE"},
 	{STATUS_PORT_CLOSED, -EIO, "STATUS_PORT_CLOSED"},
 	{STATUS_MESSAGE_LOST, -EIO, "STATUS_MESSAGE_LOST"},
 	{STATUS_INVALID_MESSAGE, -EIO, "STATUS_INVALID_MESSAGE"},
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 01a5bd7e6a30..d9553c2556a2 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -614,7 +614,8 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
 				 "multichannel not available\n"
 				 "Empty network interface list returned by server %s\n",
 				 ses->server->hostname);
-		rc = -EINVAL;
+		rc = -EOPNOTSUPP;
+		ses->iface_last_update = jiffies;
 		goto out;
 	}
 
@@ -712,7 +713,6 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
 
 		ses->iface_count++;
 		spin_unlock(&ses->iface_lock);
-		ses->iface_last_update = jiffies;
 next_iface:
 		nb_iface++;
 		next = le32_to_cpu(p->Next);
@@ -734,11 +734,7 @@ next_iface:
 	if ((bytes_left > 8) || p->Next)
 		cifs_dbg(VFS, "%s: incomplete interface info\n", __func__);
 
-
-	if (!ses->iface_count) {
-		rc = -EINVAL;
-		goto out;
-	}
+	ses->iface_last_update = jiffies;
 
 out:
 	/*
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index bd25c34dc398..288199f0b987 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -156,6 +156,57 @@ out:
 	return;
 }
 
+/* helper function for code reuse */
+static int
+cifs_chan_skip_or_disable(struct cifs_ses *ses,
+			  struct TCP_Server_Info *server,
+			  bool from_reconnect)
+{
+	struct TCP_Server_Info *pserver;
+	unsigned int chan_index;
+
+	if (SERVER_IS_CHAN(server)) {
+		cifs_dbg(VFS,
+			"server %s does not support multichannel anymore. Skip secondary channel\n",
+			 ses->server->hostname);
+
+		spin_lock(&ses->chan_lock);
+		chan_index = cifs_ses_get_chan_index(ses, server);
+		if (chan_index == CIFS_INVAL_CHAN_INDEX) {
+			spin_unlock(&ses->chan_lock);
+			goto skip_terminate;
+		}
+
+		ses->chans[chan_index].server = NULL;
+		spin_unlock(&ses->chan_lock);
+
+		/*
+		 * the above reference of server by channel
+		 * needs to be dropped without holding chan_lock
+		 * as cifs_put_tcp_session takes a higher lock
+		 * i.e. cifs_tcp_ses_lock
+		 */
+		cifs_put_tcp_session(server, from_reconnect);
+
+		server->terminate = true;
+		cifs_signal_cifsd_for_reconnect(server, false);
+
+		/* mark primary server as needing reconnect */
+		pserver = server->primary_server;
+		cifs_signal_cifsd_for_reconnect(pserver, false);
+skip_terminate:
+		mutex_unlock(&ses->session_mutex);
+		return -EHOSTDOWN;
+	}
+
+	cifs_server_dbg(VFS,
+		"server does not support multichannel anymore. Disable all other channels\n");
+	cifs_disable_secondary_channels(ses);
+
+
+	return 0;
+}
+
 static int
 smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
 	       struct TCP_Server_Info *server, bool from_reconnect)
@@ -164,8 +215,6 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
 	struct nls_table *nls_codepage = NULL;
 	struct cifs_ses *ses;
 	int xid;
-	struct TCP_Server_Info *pserver;
-	unsigned int chan_index;
 
 	/*
 	 * SMB2s NegProt, SessSetup, Logoff do not have tcon yet so
@@ -310,44 +359,11 @@ again:
 		 */
 		if (ses->chan_count > 1 &&
 		    !(server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) {
-			if (SERVER_IS_CHAN(server)) {
-				cifs_dbg(VFS, "server %s does not support " \
-					 "multichannel anymore. skipping secondary channel\n",
-					 ses->server->hostname);
-
-				spin_lock(&ses->chan_lock);
-				chan_index = cifs_ses_get_chan_index(ses, server);
-				if (chan_index == CIFS_INVAL_CHAN_INDEX) {
-					spin_unlock(&ses->chan_lock);
-					goto skip_terminate;
-				}
-
-				ses->chans[chan_index].server = NULL;
-				spin_unlock(&ses->chan_lock);
-
-				/*
-				 * the above reference of server by channel
-				 * needs to be dropped without holding chan_lock
-				 * as cifs_put_tcp_session takes a higher lock
-				 * i.e. cifs_tcp_ses_lock
-				 */
-				cifs_put_tcp_session(server, from_reconnect);
-
-				server->terminate = true;
-				cifs_signal_cifsd_for_reconnect(server, false);
-
-				/* mark primary server as needing reconnect */
-				pserver = server->primary_server;
-				cifs_signal_cifsd_for_reconnect(pserver, false);
-
-skip_terminate:
+			rc = cifs_chan_skip_or_disable(ses, server,
+						       from_reconnect);
+			if (rc) {
 				mutex_unlock(&ses->session_mutex);
-				rc = -EHOSTDOWN;
 				goto out;
-			} else {
-				cifs_server_dbg(VFS, "does not support " \
-					 "multichannel anymore. disabling all other channels\n");
-				cifs_disable_secondary_channels(ses);
 			}
 		}
 
@@ -395,20 +411,35 @@ skip_sess_setup:
 		rc = SMB3_request_interfaces(xid, tcon, false);
 		free_xid(xid);
 
-		if (rc)
+		if (rc == -EOPNOTSUPP) {
+			/*
+			 * some servers like Azure SMB server do not advertise
+			 * that multichannel has been disabled with server
+			 * capabilities, rather return STATUS_NOT_IMPLEMENTED.
+			 * treat this as server not supporting multichannel
+			 */
+
+			rc = cifs_chan_skip_or_disable(ses, server,
+						       from_reconnect);
+			goto skip_add_channels;
+		} else if (rc)
 			cifs_dbg(FYI, "%s: failed to query server interfaces: %d\n",
 				 __func__, rc);
 
 		if (ses->chan_max > ses->chan_count &&
+		    ses->iface_count &&
 		    !SERVER_IS_CHAN(server)) {
 			if (ses->chan_count == 1)
 				cifs_server_dbg(VFS, "supports multichannel now\n");
 
 			cifs_try_adding_channels(ses);
+			queue_delayed_work(cifsiod_wq, &tcon->query_interfaces,
+					   (SMB_INTERFACE_POLL_INTERVAL * HZ));
 		}
 	} else {
 		mutex_unlock(&ses->session_mutex);
 	}
+skip_add_channels:
 
 	if (smb2_command != SMB2_INTERNAL_CMD)
 		mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
@@ -1958,10 +1989,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
 	__le16 *unc_path = NULL;
 	int flags = 0;
 	unsigned int total_len;
-	struct TCP_Server_Info *server;
-
-	/* always use master channel */
-	server = ses->server;
+	struct TCP_Server_Info *server = cifs_pick_channel(ses);
 
 	cifs_dbg(FYI, "TCON\n");
 
@@ -2094,6 +2122,7 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
 	struct smb2_tree_disconnect_req *req; /* response is trivial */
 	int rc = 0;
 	struct cifs_ses *ses = tcon->ses;
+	struct TCP_Server_Info *server = cifs_pick_channel(ses);
 	int flags = 0;
 	unsigned int total_len;
 	struct kvec iov[1];
@@ -2116,7 +2145,7 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
 
 	invalidate_all_cached_dirs(tcon);
 
-	rc = smb2_plain_req_init(SMB2_TREE_DISCONNECT, tcon, ses->server,
+	rc = smb2_plain_req_init(SMB2_TREE_DISCONNECT, tcon, server,
 				 (void **) &req,
 				 &total_len);
 	if (rc)
@@ -2134,7 +2163,7 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
 	rqst.rq_iov = iov;
 	rqst.rq_nvec = 1;
 
-	rc = cifs_send_recv(xid, ses, ses->server,
+	rc = cifs_send_recv(xid, ses, server,
 			    &rqst, &resp_buf_type, flags, &rsp_iov);
 	cifs_small_buf_release(req);
 	if (rc) {
@@ -2279,7 +2308,7 @@ int smb2_parse_contexts(struct TCP_Server_Info *server,
 
 		noff = le16_to_cpu(cc->NameOffset);
 		nlen = le16_to_cpu(cc->NameLength);
-		if (noff + nlen >= doff)
+		if (noff + nlen > doff)
 			return -EINVAL;
 
 		name = (char *)cc + noff;
@@ -3918,7 +3947,7 @@ void smb2_reconnect_server(struct work_struct *work)
 	struct cifs_ses *ses, *ses2;
 	struct cifs_tcon *tcon, *tcon2;
 	struct list_head tmp_list, tmp_ses_list;
-	bool tcon_exist = false, ses_exist = false;
+	bool ses_exist = false;
 	bool tcon_selected = false;
 	int rc;
 	bool resched = false;
@@ -3964,7 +3993,7 @@ void smb2_reconnect_server(struct work_struct *work)
 			if (tcon->need_reconnect || tcon->need_reopen_files) {
 				tcon->tc_count++;
 				list_add_tail(&tcon->rlist, &tmp_list);
-				tcon_selected = tcon_exist = true;
+				tcon_selected = true;
 			}
 		}
 		/*
@@ -3973,7 +4002,7 @@ void smb2_reconnect_server(struct work_struct *work)
 		 */
 		if (ses->tcon_ipc && ses->tcon_ipc->need_reconnect) {
 			list_add_tail(&ses->tcon_ipc->rlist, &tmp_list);
-			tcon_selected = tcon_exist = true;
+			tcon_selected = true;
 			cifs_smb_ses_inc_refcount(ses);
 		}
 		/*
diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h
index 343ada691e76..0034b537b0b3 100644
--- a/fs/smb/client/smb2proto.h
+++ b/fs/smb/client/smb2proto.h
@@ -299,9 +299,7 @@ int smb311_posix_query_path_info(const unsigned int xid,
 				 struct cifs_tcon *tcon,
 				 struct cifs_sb_info *cifs_sb,
 				 const char *full_path,
-				 struct cifs_open_info_data *data,
-				 struct cifs_sid *owner,
-				 struct cifs_sid *group);
+				 struct cifs_open_info_data *data);
 int posix_info_parse(const void *beg, const void *end,
 		     struct smb2_posix_info_parsed *out);
 int posix_info_sid_size(const void *beg, const void *end);
diff --git a/fs/smb/client/smb2status.h b/fs/smb/client/smb2status.h
index a9e958166fc5..9c6d79b0bd49 100644
--- a/fs/smb/client/smb2status.h
+++ b/fs/smb/client/smb2status.h
@@ -982,6 +982,8 @@ struct ntstatus {
 #define STATUS_INVALID_TASK_INDEX cpu_to_le32(0xC0000501)
 #define STATUS_THREAD_ALREADY_IN_TASK cpu_to_le32(0xC0000502)
 #define STATUS_CALLBACK_BYPASS cpu_to_le32(0xC0000503)
+#define STATUS_SERVER_UNAVAILABLE cpu_to_le32(0xC0000466)
+#define STATUS_FILE_NOT_AVAILABLE cpu_to_le32(0xC0000467)
 #define STATUS_PORT_CLOSED cpu_to_le32(0xC0000700)
 #define STATUS_MESSAGE_LOST cpu_to_le32(0xC0000701)
 #define STATUS_INVALID_MESSAGE cpu_to_le32(0xC0000702)
diff --git a/fs/smb/server/asn1.c b/fs/smb/server/asn1.c
index 4a4b2b03ff33..b931a99ab9c8 100644
--- a/fs/smb/server/asn1.c
+++ b/fs/smb/server/asn1.c
@@ -214,10 +214,15 @@ static int ksmbd_neg_token_alloc(void *context, size_t hdrlen,
 {
 	struct ksmbd_conn *conn = context;
 
+	if (!vlen)
+		return -EINVAL;
+
 	conn->mechToken = kmemdup_nul(value, vlen, GFP_KERNEL);
 	if (!conn->mechToken)
 		return -ENOMEM;
 
+	conn->mechTokenLen = (unsigned int)vlen;
+
 	return 0;
 }
 
diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c
index d311c2ee10bd..09e1e7771592 100644
--- a/fs/smb/server/connection.c
+++ b/fs/smb/server/connection.c
@@ -416,13 +416,7 @@ static void stop_sessions(void)
 again:
 	down_read(&conn_list_lock);
 	list_for_each_entry(conn, &conn_list, conns_list) {
-		struct task_struct *task;
-
 		t = conn->transport;
-		task = t->handler;
-		if (task)
-			ksmbd_debug(CONN, "Stop session handler %s/%d\n",
-				    task->comm, task_pid_nr(task));
 		ksmbd_conn_set_exiting(conn);
 		if (t->ops->shutdown) {
 			up_read(&conn_list_lock);
diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h
index 3c005246a32e..0e04cf8b1d89 100644
--- a/fs/smb/server/connection.h
+++ b/fs/smb/server/connection.h
@@ -88,6 +88,7 @@ struct ksmbd_conn {
 	__u16				dialect;
 
 	char				*mechToken;
+	unsigned int			mechTokenLen;
 
 	struct ksmbd_conn_ops	*conn_ops;
 
@@ -134,7 +135,6 @@ struct ksmbd_transport_ops {
 struct ksmbd_transport {
 	struct ksmbd_conn		*conn;
 	struct ksmbd_transport_ops	*ops;
-	struct task_struct		*handler;
 };
 
 #define KSMBD_TCP_RECV_TIMEOUT	(7 * HZ)
diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c
index 001926d3b348..53dfaac425c6 100644
--- a/fs/smb/server/oplock.c
+++ b/fs/smb/server/oplock.c
@@ -1197,6 +1197,12 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid,
 	bool prev_op_has_lease;
 	__le32 prev_op_state = 0;
 
+	/* Only v2 leases handle the directory */
+	if (S_ISDIR(file_inode(fp->filp)->i_mode)) {
+		if (!lctx || lctx->version != 2)
+			return 0;
+	}
+
 	opinfo = alloc_opinfo(work, pid, tid);
 	if (!opinfo)
 		return -ENOMEM;
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 3143819935dc..ba7a72a6a4f4 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -1414,7 +1414,10 @@ static struct ksmbd_user *session_user(struct ksmbd_conn *conn,
 	char *name;
 	unsigned int name_off, name_len, secbuf_len;
 
-	secbuf_len = le16_to_cpu(req->SecurityBufferLength);
+	if (conn->use_spnego && conn->mechToken)
+		secbuf_len = conn->mechTokenLen;
+	else
+		secbuf_len = le16_to_cpu(req->SecurityBufferLength);
 	if (secbuf_len < sizeof(struct authenticate_message)) {
 		ksmbd_debug(SMB, "blob len %d too small\n", secbuf_len);
 		return NULL;
@@ -1505,7 +1508,10 @@ static int ntlm_authenticate(struct ksmbd_work *work,
 		struct authenticate_message *authblob;
 
 		authblob = user_authblob(conn, req);
-		sz = le16_to_cpu(req->SecurityBufferLength);
+		if (conn->use_spnego && conn->mechToken)
+			sz = conn->mechTokenLen;
+		else
+			sz = le16_to_cpu(req->SecurityBufferLength);
 		rc = ksmbd_decode_ntlmssp_auth_blob(authblob, sz, conn, sess);
 		if (rc) {
 			set_user_flag(sess->user, KSMBD_USER_FLAG_BAD_PASSWORD);
@@ -1778,8 +1784,7 @@ int smb2_sess_setup(struct ksmbd_work *work)
 
 	negblob_off = le16_to_cpu(req->SecurityBufferOffset);
 	negblob_len = le16_to_cpu(req->SecurityBufferLength);
-	if (negblob_off < offsetof(struct smb2_sess_setup_req, Buffer) ||
-	    negblob_len < offsetof(struct negotiate_message, NegotiateFlags)) {
+	if (negblob_off < offsetof(struct smb2_sess_setup_req, Buffer)) {
 		rc = -EINVAL;
 		goto out_err;
 	}
@@ -1788,8 +1793,15 @@ int smb2_sess_setup(struct ksmbd_work *work)
 			negblob_off);
 
 	if (decode_negotiation_token(conn, negblob, negblob_len) == 0) {
-		if (conn->mechToken)
+		if (conn->mechToken) {
 			negblob = (struct negotiate_message *)conn->mechToken;
+			negblob_len = conn->mechTokenLen;
+		}
+	}
+
+	if (negblob_len < offsetof(struct negotiate_message, NegotiateFlags)) {
+		rc = -EINVAL;
+		goto out_err;
 	}
 
 	if (server_conf.auth_mechs & conn->auth_mechs) {
diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
index c5629a68c8b7..8faa25c6e129 100644
--- a/fs/smb/server/transport_rdma.c
+++ b/fs/smb/server/transport_rdma.c
@@ -2039,6 +2039,7 @@ static bool rdma_frwr_is_supported(struct ib_device_attr *attrs)
 static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
 {
 	struct smb_direct_transport *t;
+	struct task_struct *handler;
 	int ret;
 
 	if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) {
@@ -2056,11 +2057,11 @@ static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
 	if (ret)
 		goto out_err;
 
-	KSMBD_TRANS(t)->handler = kthread_run(ksmbd_conn_handler_loop,
-					      KSMBD_TRANS(t)->conn, "ksmbd:r%u",
-					      smb_direct_port);
-	if (IS_ERR(KSMBD_TRANS(t)->handler)) {
-		ret = PTR_ERR(KSMBD_TRANS(t)->handler);
+	handler = kthread_run(ksmbd_conn_handler_loop,
+			      KSMBD_TRANS(t)->conn, "ksmbd:r%u",
+			      smb_direct_port);
+	if (IS_ERR(handler)) {
+		ret = PTR_ERR(handler);
 		pr_err("Can't start thread\n");
 		goto out_err;
 	}
diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c
index eff7a1d793f0..9d4222154dcc 100644
--- a/fs/smb/server/transport_tcp.c
+++ b/fs/smb/server/transport_tcp.c
@@ -185,6 +185,7 @@ static int ksmbd_tcp_new_connection(struct socket *client_sk)
 	struct sockaddr *csin;
 	int rc = 0;
 	struct tcp_transport *t;
+	struct task_struct *handler;
 
 	t = alloc_transport(client_sk);
 	if (!t) {
@@ -199,13 +200,13 @@ static int ksmbd_tcp_new_connection(struct socket *client_sk)
 		goto out_error;
 	}
 
-	KSMBD_TRANS(t)->handler = kthread_run(ksmbd_conn_handler_loop,
-					      KSMBD_TRANS(t)->conn,
-					      "ksmbd:%u",
-					      ksmbd_tcp_get_port(csin));
-	if (IS_ERR(KSMBD_TRANS(t)->handler)) {
+	handler = kthread_run(ksmbd_conn_handler_loop,
+			      KSMBD_TRANS(t)->conn,
+			      "ksmbd:%u",
+			      ksmbd_tcp_get_port(csin));
+	if (IS_ERR(handler)) {
 		pr_err("cannot start conn thread\n");
-		rc = PTR_ERR(KSMBD_TRANS(t)->handler);
+		rc = PTR_ERR(handler);
 		free_transport(t);
 	}
 	return rc;
diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c
index 6795fda2af19..6b211522a13e 100644
--- a/fs/tracefs/event_inode.c
+++ b/fs/tracefs/event_inode.c
@@ -34,7 +34,15 @@ static DEFINE_MUTEX(eventfs_mutex);
 
 /* Choose something "unique" ;-) */
 #define EVENTFS_FILE_INODE_INO		0x12c4e37
-#define EVENTFS_DIR_INODE_INO		0x134b2f5
+
+/* Just try to make something consistent and unique */
+static int eventfs_dir_ino(struct eventfs_inode *ei)
+{
+	if (!ei->ino)
+		ei->ino = get_next_ino();
+
+	return ei->ino;
+}
 
 /*
  * The eventfs_inode (ei) itself is protected by SRCU. It is released from
@@ -396,7 +404,7 @@ static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent
 	inode->i_fop = &eventfs_file_operations;
 
 	/* All directories will have the same inode number */
-	inode->i_ino = EVENTFS_DIR_INODE_INO;
+	inode->i_ino = eventfs_dir_ino(ei);
 
 	ti = get_tracefs(inode);
 	ti->flags |= TRACEFS_EVENT_INODE;
@@ -802,7 +810,7 @@ static int eventfs_iterate(struct file *file, struct dir_context *ctx)
 
 		name = ei_child->name;
 
-		ino = EVENTFS_DIR_INODE_INO;
+		ino = eventfs_dir_ino(ei_child);
 
 		if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR))
 			goto out_dec;
diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h
index 12b7d0150ae9..45397df9bb65 100644
--- a/fs/tracefs/internal.h
+++ b/fs/tracefs/internal.h
@@ -55,6 +55,10 @@ struct eventfs_inode {
 	struct eventfs_attr		*entry_attrs;
 	struct eventfs_attr		attr;
 	void				*data;
+	unsigned int			is_freed:1;
+	unsigned int			is_events:1;
+	unsigned int			nr_entries:30;
+	unsigned int			ino;
 	/*
 	 * Union - used for deletion
 	 * @llist:	for calling dput() if needed after RCU
@@ -64,9 +68,6 @@ struct eventfs_inode {
 		struct llist_node	llist;
 		struct rcu_head		rcu;
 	};
-	unsigned int			is_freed:1;
-	unsigned int			is_events:1;
-	unsigned int			nr_entries:30;
 };
 
 static inline struct tracefs_inode *get_tracefs(const struct inode *inode)
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 98aaca933bdd..f362345467fa 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3277,7 +3277,7 @@ xfs_bmap_alloc_account(
 	struct xfs_bmalloca	*ap)
 {
 	bool			isrt = XFS_IS_REALTIME_INODE(ap->ip) &&
-					(ap->flags & XFS_BMAPI_ATTRFORK);
+					!(ap->flags & XFS_BMAPI_ATTRFORK);
 	uint			fld;
 
 	if (ap->flags & XFS_BMAPI_COWFORK) {
diff --git a/include/asm-generic/checksum.h b/include/asm-generic/checksum.h
index 43e18db89c14..ad928cce268b 100644
--- a/include/asm-generic/checksum.h
+++ b/include/asm-generic/checksum.h
@@ -2,6 +2,8 @@
 #ifndef __ASM_GENERIC_CHECKSUM_H
 #define __ASM_GENERIC_CHECKSUM_H
 
+#include <linux/bitops.h>
+
 /*
  * computes the checksum of a memory block at buff, length len,
  * and adds in "sum" (32-bit)
@@ -31,9 +33,7 @@ extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
 static inline __sum16 csum_fold(__wsum csum)
 {
 	u32 sum = (__force u32)csum;
-	sum = (sum & 0xffff) + (sum >> 16);
-	sum = (sum & 0xffff) + (sum >> 16);
-	return (__force __sum16)~sum;
+	return (__force __sum16)((~sum - ror32(sum, 16)) >> 16);
 }
 #endif
 
diff --git a/include/dt-bindings/dma/fsl-edma.h b/include/dt-bindings/dma/fsl-edma.h
new file mode 100644
index 000000000000..fd11478cfe9c
--- /dev/null
+++ b/include/dt-bindings/dma/fsl-edma.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+
+#ifndef _FSL_EDMA_DT_BINDING_H_
+#define _FSL_EDMA_DT_BINDING_H_
+
+/* Receive Channel */
+#define FSL_EDMA_RX		0x1
+
+/* iMX8 audio remote DMA */
+#define FSL_EDMA_REMOTE		0x2
+
+/* FIFO is continue memory region */
+#define FSL_EDMA_MULTI_FIFO	0x4
+
+/* Channel need stick to even channel */
+#define FSL_EDMA_EVEN_CH	0x8
+
+/* Channel need stick to odd channel */
+#define FSL_EDMA_ODD_CH		0x10
+
+#endif
diff --git a/include/linux/bio.h b/include/linux/bio.h
index ec4db73e5f4e..875d792bffff 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -286,6 +286,11 @@ static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio,
 {
 	struct bio_vec *bvec = bio_first_bvec_all(bio) + i;
 
+	if (unlikely(i >= bio->bi_vcnt)) {
+		fi->folio = NULL;
+		return;
+	}
+
 	fi->folio = page_folio(bvec->bv_page);
 	fi->offset = bvec->bv_offset +
 			PAGE_SIZE * (bvec->bv_page - &fi->folio->page);
@@ -303,10 +308,8 @@ static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio)
 		fi->offset = 0;
 		fi->length = min(folio_size(fi->folio), fi->_seg_count);
 		fi->_next = folio_next(fi->folio);
-	} else if (fi->_i + 1 < bio->bi_vcnt) {
-		bio_first_folio(fi, bio, fi->_i + 1);
 	} else {
-		fi->folio = NULL;
+		bio_first_folio(fi, bio, fi->_i + 1);
 	}
 }
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a676e116085f..7a8150a5f051 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -391,9 +391,6 @@ struct blk_mq_hw_ctx {
 	 */
 	struct blk_mq_tags	*sched_tags;
 
-	/** @run: Number of dispatched requests. */
-	unsigned long		run;
-
 	/** @numa_node: NUMA node the storage adapter has been connected to. */
 	unsigned int		numa_node;
 	/** @queue_num: Index of this hardware queue. */
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index b8610e9d2471..fa018d5864e7 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -572,9 +572,12 @@ int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt);
  */
 #define CEPH_SPARSE_EXT_ARRAY_INITIAL  16
 
-static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op)
+static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt)
 {
-	return __ceph_alloc_sparse_ext_map(op, CEPH_SPARSE_EXT_ARRAY_INITIAL);
+	if (!cnt)
+		cnt = CEPH_SPARSE_EXT_ARRAY_INITIAL;
+
+	return __ceph_alloc_sparse_ext_map(op, cnt);
 }
 
 extern void ceph_osdc_get_request(struct ceph_osd_request *req);
diff --git a/include/linux/export.h b/include/linux/export.h
index 9911508a9604..0bbd02fd351d 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -7,15 +7,6 @@
 #include <linux/stringify.h>
 
 /*
- * Export symbols from the kernel to modules.  Forked from module.h
- * to reduce the amount of pointless cruft we feed to gcc when only
- * exporting a simple symbol or two.
- *
- * Try not to add #includes here.  It slows compilation and makes kernel
- * hackers place grumpy comments in header files.
- */
-
-/*
  * This comment block is used by fixdep. Please do not remove.
  *
  * When CONFIG_MODVERSIONS is changed from n to y, all source files having
@@ -23,15 +14,6 @@
  * side effect of the *.o build rule.
  */
 
-#ifndef __ASSEMBLY__
-#ifdef MODULE
-extern struct module __this_module;
-#define THIS_MODULE (&__this_module)
-#else
-#define THIS_MODULE ((struct module *)0)
-#endif
-#endif /* __ASSEMBLY__ */
-
 #ifdef CONFIG_64BIT
 #define __EXPORT_SYMBOL_REF(sym)			\
 	.balign 8				ASM_NL	\
diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index 79ef6ac4c021..89a6888f2f9e 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -214,51 +214,6 @@ __kernel_size_t __fortify_strlen(const char * const POS p)
 	return ret;
 }
 
-/* Defined after fortified strlen() to reuse it. */
-extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy);
-/**
- * strlcpy - Copy a string into another string buffer
- *
- * @p: pointer to destination of copy
- * @q: pointer to NUL-terminated source string to copy
- * @size: maximum number of bytes to write at @p
- *
- * If strlen(@q) >= @size, the copy of @q will be truncated at
- * @size - 1 bytes. @p will always be NUL-terminated.
- *
- * Do not use this function. While FORTIFY_SOURCE tries to avoid
- * over-reads when calculating strlen(@q), it is still possible.
- * Prefer strscpy(), though note its different return values for
- * detecting truncation.
- *
- * Returns total number of bytes written to @p, including terminating NUL.
- *
- */
-__FORTIFY_INLINE size_t strlcpy(char * const POS p, const char * const POS q, size_t size)
-{
-	const size_t p_size = __member_size(p);
-	const size_t q_size = __member_size(q);
-	size_t q_len;	/* Full count of source string length. */
-	size_t len;	/* Count of characters going into destination. */
-
-	if (p_size == SIZE_MAX && q_size == SIZE_MAX)
-		return __real_strlcpy(p, q, size);
-	q_len = strlen(q);
-	len = (q_len >= size) ? size - 1 : q_len;
-	if (__builtin_constant_p(size) && __builtin_constant_p(q_len) && size) {
-		/* Write size is always larger than destination. */
-		if (len >= p_size)
-			__write_overflow();
-	}
-	if (size) {
-		if (len >= p_size)
-			fortify_panic(__func__);
-		__underlying_memcpy(p, q, len);
-		p[len] = '\0';
-	}
-	return q_len;
-}
-
 /* Defined after fortified strnlen() to reuse it. */
 extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy);
 /**
@@ -272,12 +227,6 @@ extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy);
  * @p buffer. The behavior is undefined if the string buffers overlap. The
  * destination @p buffer is always NUL terminated, unless it's zero-sized.
  *
- * Preferred to strlcpy() since the API doesn't require reading memory
- * from the source @q string beyond the specified @size bytes, and since
- * the return value is easier to error-check than strlcpy()'s.
- * In addition, the implementation is robust to the string changing out
- * from underneath it, unlike the current strlcpy() implementation.
- *
  * Preferred to strncpy() since it always returns a valid string, and
  * doesn't unnecessarily force the tail of the destination buffer to be
  * zero padded. If padding is desired please use strscpy_pad().
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e6ba0cc6f2ee..ed5966a70495 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2371,7 +2371,7 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
 #define I_CREATING		(1 << 15)
 #define I_DONTCACHE		(1 << 16)
 #define I_SYNC_QUEUED		(1 << 17)
-#define I_PINNING_FSCACHE_WB	(1 << 18)
+#define I_PINNING_NETFS_WB	(1 << 18)
 
 #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
 #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index a174cedf4d90..bdf7f3eddf0a 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -189,17 +189,20 @@ extern atomic_t fscache_n_write;
 extern atomic_t fscache_n_no_write_space;
 extern atomic_t fscache_n_no_create_space;
 extern atomic_t fscache_n_culled;
+extern atomic_t fscache_n_dio_misfit;
 #define fscache_count_read() atomic_inc(&fscache_n_read)
 #define fscache_count_write() atomic_inc(&fscache_n_write)
 #define fscache_count_no_write_space() atomic_inc(&fscache_n_no_write_space)
 #define fscache_count_no_create_space() atomic_inc(&fscache_n_no_create_space)
 #define fscache_count_culled() atomic_inc(&fscache_n_culled)
+#define fscache_count_dio_misfit() atomic_inc(&fscache_n_dio_misfit)
 #else
 #define fscache_count_read() do {} while(0)
 #define fscache_count_write() do {} while(0)
 #define fscache_count_no_write_space() do {} while(0)
 #define fscache_count_no_create_space() do {} while(0)
 #define fscache_count_culled() do {} while(0)
+#define fscache_count_dio_misfit() do {} while(0)
 #endif
 
 #endif /* _LINUX_FSCACHE_CACHE_H */
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 8e312c8323a8..6e8562cbcc43 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -437,9 +437,6 @@ const struct netfs_cache_ops *fscache_operation_valid(const struct netfs_cache_r
  * indicates the cache resources to which the operation state should be
  * attached; @cookie indicates the cache object that will be accessed.
  *
- * This is intended to be called from the ->begin_cache_operation() netfs lib
- * operation as implemented by the network filesystem.
- *
  * @cres->inval_counter is set from @cookie->inval_counter for comparison at
  * the end of the operation.  This allows invalidation during the operation to
  * be detected by the caller.
@@ -629,48 +626,6 @@ static inline void fscache_write_to_cache(struct fscache_cookie *cookie,
 
 }
 
-#if __fscache_available
-bool fscache_dirty_folio(struct address_space *mapping, struct folio *folio,
-		struct fscache_cookie *cookie);
-#else
-#define fscache_dirty_folio(MAPPING, FOLIO, COOKIE) \
-		filemap_dirty_folio(MAPPING, FOLIO)
-#endif
-
-/**
- * fscache_unpin_writeback - Unpin writeback resources
- * @wbc: The writeback control
- * @cookie: The cookie referring to the cache object
- *
- * Unpin the writeback resources pinned by fscache_dirty_folio().  This is
- * intended to be called by the netfs's ->write_inode() method.
- */
-static inline void fscache_unpin_writeback(struct writeback_control *wbc,
-					   struct fscache_cookie *cookie)
-{
-	if (wbc->unpinned_fscache_wb)
-		fscache_unuse_cookie(cookie, NULL, NULL);
-}
-
-/**
- * fscache_clear_inode_writeback - Clear writeback resources pinned by an inode
- * @cookie: The cookie referring to the cache object
- * @inode: The inode to clean up
- * @aux: Auxiliary data to apply to the inode
- *
- * Clear any writeback resources held by an inode when the inode is evicted.
- * This must be called before clear_inode() is called.
- */
-static inline void fscache_clear_inode_writeback(struct fscache_cookie *cookie,
-						 struct inode *inode,
-						 const void *aux)
-{
-	if (inode->i_state & I_PINNING_FSCACHE_WB) {
-		loff_t i_size = i_size_read(inode);
-		fscache_unuse_cookie(cookie, aux, &i_size);
-	}
-}
-
 /**
  * fscache_note_page_release - Note that a netfs page got released
  * @cookie: The cookie corresponding to the file
diff --git a/include/linux/init.h b/include/linux/init.h
index 01b52c9c7526..3fa3f6241350 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -179,6 +179,13 @@ extern void (*late_time_init)(void);
 
 extern bool initcall_debug;
 
+#ifdef MODULE
+extern struct module __this_module;
+#define THIS_MODULE (&__this_module)
+#else
+#define THIS_MODULE ((struct module *)0)
+#endif
+
 #endif
   
 #ifndef MODULE
diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
index 7578d4f6a969..db1249cd9692 100644
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -47,7 +47,30 @@ static inline int task_nice_ioclass(struct task_struct *task)
 }
 
 #ifdef CONFIG_BLOCK
-int __get_task_ioprio(struct task_struct *p);
+/*
+ * If the task has set an I/O priority, use that. Otherwise, return
+ * the default I/O priority.
+ *
+ * Expected to be called for current task or with task_lock() held to keep
+ * io_context stable.
+ */
+static inline int __get_task_ioprio(struct task_struct *p)
+{
+	struct io_context *ioc = p->io_context;
+	int prio;
+
+	if (!ioc)
+		return IOPRIO_DEFAULT;
+
+	if (p != current)
+		lockdep_assert_held(&p->alloc_lock);
+
+	prio = ioc->ioprio;
+	if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE)
+		prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(p),
+					 task_nice_ioprio(p));
+	return prio;
+}
 #else
 static inline int __get_task_ioprio(struct task_struct *p)
 {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 8c55ff351e5f..41f03b352401 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -681,6 +681,7 @@ struct mlx5e_resources {
 		struct mlx5_sq_bfreg       bfreg;
 #define MLX5_MAX_NUM_TC 8
 		u32                        tisn[MLX5_MAX_PORTS][MLX5_MAX_NUM_TC];
+		bool			   tisn_valid;
 	} hw_objs;
 	struct net_device *uplink_netdev;
 	struct mutex uplink_netdev_lock;
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 6f7725238abc..3fb428ce7d1c 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -132,6 +132,7 @@ struct mlx5_flow_handle;
 
 enum {
 	FLOW_CONTEXT_HAS_TAG = BIT(0),
+	FLOW_CONTEXT_UPLINK_HAIRPIN_EN = BIT(1),
 };
 
 struct mlx5_flow_context {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index bf5320b28b8b..c726f90ab752 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -3576,7 +3576,7 @@ struct mlx5_ifc_flow_context_bits {
 	u8         action[0x10];
 
 	u8         extended_destination[0x1];
-	u8         reserved_at_81[0x1];
+	u8         uplink_hairpin_en[0x1];
 	u8         flow_source[0x2];
 	u8         encrypt_decrypt_type[0x4];
 	u8         destination_list_size[0x18];
@@ -4036,8 +4036,13 @@ struct mlx5_ifc_nic_vport_context_bits {
 	u8	   affiliation_criteria[0x4];
 	u8	   affiliated_vhca_id[0x10];
 
-	u8	   reserved_at_60[0xd0];
+	u8	   reserved_at_60[0xa0];
+
+	u8	   reserved_at_100[0x1];
+	u8         sd_group[0x3];
+	u8	   reserved_at_104[0x1c];
 
+	u8	   reserved_at_120[0x10];
 	u8         mtu[0x10];
 
 	u8         system_image_guid[0x40];
@@ -10122,8 +10127,7 @@ struct mlx5_ifc_mpir_reg_bits {
 	u8         reserved_at_20[0x20];
 
 	u8         local_port[0x8];
-	u8         reserved_at_28[0x15];
-	u8         sd_group[0x3];
+	u8         reserved_at_28[0x18];
 
 	u8         reserved_at_60[0x20];
 };
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index fbb9bf447889..c36cc6d82926 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -72,6 +72,7 @@ int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu);
 int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu);
 int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
 					   u64 *system_image_guid);
+int mlx5_query_nic_vport_sd_group(struct mlx5_core_dev *mdev, u8 *sd_group);
 int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid);
 int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
 				    u16 vport, u64 node_guid);
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index b11a84f6c32b..100cbb261269 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -109,11 +109,18 @@ static inline int wait_on_page_fscache_killable(struct page *page)
 	return folio_wait_private_2_killable(page_folio(page));
 }
 
+/* Marks used on xarray-based buffers */
+#define NETFS_BUF_PUT_MARK	XA_MARK_0	/* - Page needs putting  */
+#define NETFS_BUF_PAGECACHE_MARK XA_MARK_1	/* - Page needs wb/dirty flag wrangling */
+
 enum netfs_io_source {
 	NETFS_FILL_WITH_ZEROES,
 	NETFS_DOWNLOAD_FROM_SERVER,
 	NETFS_READ_FROM_CACHE,
 	NETFS_INVALID_READ,
+	NETFS_UPLOAD_TO_SERVER,
+	NETFS_WRITE_TO_CACHE,
+	NETFS_INVALID_WRITE,
 } __mode(byte);
 
 typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error,
@@ -129,9 +136,57 @@ struct netfs_inode {
 	struct fscache_cookie	*cache;
 #endif
 	loff_t			remote_i_size;	/* Size of the remote file */
+	loff_t			zero_point;	/* Size after which we assume there's no data
+						 * on the server */
+	unsigned long		flags;
+#define NETFS_ICTX_ODIRECT	0		/* The file has DIO in progress */
+#define NETFS_ICTX_UNBUFFERED	1		/* I/O should not use the pagecache */
+#define NETFS_ICTX_WRITETHROUGH	2		/* Write-through caching */
+#define NETFS_ICTX_NO_WRITE_STREAMING	3	/* Don't engage in write-streaming */
+};
+
+/*
+ * A netfs group - for instance a ceph snap.  This is marked on dirty pages and
+ * pages marked with a group must be flushed before they can be written under
+ * the domain of another group.
+ */
+struct netfs_group {
+	refcount_t		ref;
+	void (*free)(struct netfs_group *netfs_group);
 };
 
 /*
+ * Information about a dirty page (attached only if necessary).
+ * folio->private
+ */
+struct netfs_folio {
+	struct netfs_group	*netfs_group;	/* Filesystem's grouping marker (or NULL). */
+	unsigned int		dirty_offset;	/* Write-streaming dirty data offset */
+	unsigned int		dirty_len;	/* Write-streaming dirty data length */
+};
+#define NETFS_FOLIO_INFO	0x1UL	/* OR'd with folio->private. */
+
+static inline struct netfs_folio *netfs_folio_info(struct folio *folio)
+{
+	void *priv = folio_get_private(folio);
+
+	if ((unsigned long)priv & NETFS_FOLIO_INFO)
+		return (struct netfs_folio *)((unsigned long)priv & ~NETFS_FOLIO_INFO);
+	return NULL;
+}
+
+static inline struct netfs_group *netfs_folio_group(struct folio *folio)
+{
+	struct netfs_folio *finfo;
+	void *priv = folio_get_private(folio);
+
+	finfo = netfs_folio_info(folio);
+	if (finfo)
+		return finfo->netfs_group;
+	return priv;
+}
+
+/*
  * Resources required to do operations on a cache.
  */
 struct netfs_cache_resources {
@@ -143,17 +198,24 @@ struct netfs_cache_resources {
 };
 
 /*
- * Descriptor for a single component subrequest.
+ * Descriptor for a single component subrequest.  Each operation represents an
+ * individual read/write from/to a server, a cache, a journal, etc..
+ *
+ * The buffer iterator is persistent for the life of the subrequest struct and
+ * the pages it points to can be relied on to exist for the duration.
  */
 struct netfs_io_subrequest {
 	struct netfs_io_request *rreq;		/* Supervising I/O request */
+	struct work_struct	work;
 	struct list_head	rreq_link;	/* Link in rreq->subrequests */
+	struct iov_iter		io_iter;	/* Iterator for this subrequest */
 	loff_t			start;		/* Where to start the I/O */
 	size_t			len;		/* Size of the I/O */
 	size_t			transferred;	/* Amount of data transferred */
 	refcount_t		ref;
 	short			error;		/* 0 or error that occurred */
 	unsigned short		debug_index;	/* Index in list (for debugging output) */
+	unsigned int		max_nr_segs;	/* 0 or max number of segments in an iterator */
 	enum netfs_io_source	source;		/* Where to read from/write to */
 	unsigned long		flags;
 #define NETFS_SREQ_COPY_TO_CACHE	0	/* Set if should copy the data to the cache */
@@ -168,6 +230,13 @@ enum netfs_io_origin {
 	NETFS_READAHEAD,		/* This read was triggered by readahead */
 	NETFS_READPAGE,			/* This read is a synchronous read */
 	NETFS_READ_FOR_WRITE,		/* This read is to prepare a write */
+	NETFS_WRITEBACK,		/* This write was triggered by writepages */
+	NETFS_WRITETHROUGH,		/* This write was made by netfs_perform_write() */
+	NETFS_LAUNDER_WRITE,		/* This is triggered by ->launder_folio() */
+	NETFS_UNBUFFERED_WRITE,		/* This is an unbuffered write */
+	NETFS_DIO_READ,			/* This is a direct I/O read */
+	NETFS_DIO_WRITE,		/* This is a direct I/O write */
+	nr__netfs_io_origin
 } __mode(byte);
 
 /*
@@ -175,19 +244,34 @@ enum netfs_io_origin {
  * operations to a variety of data stores and then stitch the result together.
  */
 struct netfs_io_request {
-	struct work_struct	work;
+	union {
+		struct work_struct work;
+		struct rcu_head rcu;
+	};
 	struct inode		*inode;		/* The file being accessed */
 	struct address_space	*mapping;	/* The mapping being accessed */
+	struct kiocb		*iocb;		/* AIO completion vector */
 	struct netfs_cache_resources cache_resources;
+	struct list_head	proc_link;	/* Link in netfs_iorequests */
 	struct list_head	subrequests;	/* Contributory I/O operations */
+	struct iov_iter		iter;		/* Unencrypted-side iterator */
+	struct iov_iter		io_iter;	/* I/O (Encrypted-side) iterator */
 	void			*netfs_priv;	/* Private data for the netfs */
+	struct bio_vec		*direct_bv;	/* DIO buffer list (when handling iovec-iter) */
+	unsigned int		direct_bv_count; /* Number of elements in direct_bv[] */
 	unsigned int		debug_id;
+	unsigned int		rsize;		/* Maximum read size (0 for none) */
+	unsigned int		wsize;		/* Maximum write size (0 for none) */
+	unsigned int		subreq_counter;	/* Next subreq->debug_index */
 	atomic_t		nr_outstanding;	/* Number of ops in progress */
 	atomic_t		nr_copy_ops;	/* Number of copy-to-cache ops in progress */
 	size_t			submitted;	/* Amount submitted for I/O so far */
 	size_t			len;		/* Length of the request */
+	size_t			upper_len;	/* Length can be extended to here */
+	size_t			transferred;	/* Amount to be indicated as transferred */
 	short			error;		/* 0 or error that occurred */
 	enum netfs_io_origin	origin;		/* Origin of the request */
+	bool			direct_bv_unpin; /* T if direct_bv[] must be unpinned */
 	loff_t			i_size;		/* Size of the file */
 	loff_t			start;		/* Start position */
 	pgoff_t			no_unlock_folio; /* Don't unlock this folio after read */
@@ -199,17 +283,25 @@ struct netfs_io_request {
 #define NETFS_RREQ_DONT_UNLOCK_FOLIOS	3	/* Don't unlock the folios on completion */
 #define NETFS_RREQ_FAILED		4	/* The request failed */
 #define NETFS_RREQ_IN_PROGRESS		5	/* Unlocked when the request completes */
+#define NETFS_RREQ_WRITE_TO_CACHE	7	/* Need to write to the cache */
+#define NETFS_RREQ_UPLOAD_TO_SERVER	8	/* Need to write to the server */
+#define NETFS_RREQ_NONBLOCK		9	/* Don't block if possible (O_NONBLOCK) */
+#define NETFS_RREQ_BLOCKED		10	/* We blocked */
 	const struct netfs_request_ops *netfs_ops;
+	void (*cleanup)(struct netfs_io_request *req);
 };
 
 /*
  * Operations the network filesystem can/must provide to the helpers.
  */
 struct netfs_request_ops {
+	unsigned int	io_request_size;	/* Alloc size for netfs_io_request struct */
+	unsigned int	io_subrequest_size;	/* Alloc size for netfs_io_subrequest struct */
 	int (*init_request)(struct netfs_io_request *rreq, struct file *file);
 	void (*free_request)(struct netfs_io_request *rreq);
-	int (*begin_cache_operation)(struct netfs_io_request *rreq);
+	void (*free_subrequest)(struct netfs_io_subrequest *rreq);
 
+	/* Read request handling */
 	void (*expand_readahead)(struct netfs_io_request *rreq);
 	bool (*clamp_length)(struct netfs_io_subrequest *subreq);
 	void (*issue_read)(struct netfs_io_subrequest *subreq);
@@ -217,6 +309,14 @@ struct netfs_request_ops {
 	int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
 				 struct folio **foliop, void **_fsdata);
 	void (*done)(struct netfs_io_request *rreq);
+
+	/* Modification handling */
+	void (*update_i_size)(struct inode *inode, loff_t i_size);
+
+	/* Write request handling */
+	void (*create_write_requests)(struct netfs_io_request *wreq,
+				      loff_t start, size_t len);
+	void (*invalidate_cache)(struct netfs_io_request *wreq);
 };
 
 /*
@@ -229,8 +329,7 @@ enum netfs_read_from_hole {
 };
 
 /*
- * Table of operations for access to a cache.  This is obtained by
- * rreq->ops->begin_cache_operation().
+ * Table of operations for access to a cache.
  */
 struct netfs_cache_ops {
 	/* End an operation */
@@ -265,8 +364,8 @@ struct netfs_cache_ops {
 	 * actually do.
 	 */
 	int (*prepare_write)(struct netfs_cache_resources *cres,
-			     loff_t *_start, size_t *_len, loff_t i_size,
-			     bool no_space_allocated_yet);
+			     loff_t *_start, size_t *_len, size_t upper_len,
+			     loff_t i_size, bool no_space_allocated_yet);
 
 	/* Prepare an on-demand read operation, shortening it to a cached/uncached
 	 * boundary as appropriate.
@@ -284,22 +383,62 @@ struct netfs_cache_ops {
 			       loff_t *_data_start, size_t *_data_len);
 };
 
+/* High-level read API. */
+ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter);
+ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter);
+ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter);
+
+/* High-level write API */
+ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
+			    struct netfs_group *netfs_group);
+ssize_t netfs_buffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *from,
+					 struct netfs_group *netfs_group);
+ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from);
+ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from);
+
+/* Address operations API */
 struct readahead_control;
 void netfs_readahead(struct readahead_control *);
 int netfs_read_folio(struct file *, struct folio *);
 int netfs_write_begin(struct netfs_inode *, struct file *,
-		struct address_space *, loff_t pos, unsigned int len,
-		struct folio **, void **fsdata);
-
+		      struct address_space *, loff_t pos, unsigned int len,
+		      struct folio **, void **fsdata);
+int netfs_writepages(struct address_space *mapping,
+		     struct writeback_control *wbc);
+bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio);
+int netfs_unpin_writeback(struct inode *inode, struct writeback_control *wbc);
+void netfs_clear_inode_writeback(struct inode *inode, const void *aux);
+void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length);
+bool netfs_release_folio(struct folio *folio, gfp_t gfp);
+int netfs_launder_folio(struct folio *folio);
+
+/* VMA operations API. */
+vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group);
+
+/* (Sub)request management API. */
 void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool);
 void netfs_get_subrequest(struct netfs_io_subrequest *subreq,
 			  enum netfs_sreq_ref_trace what);
 void netfs_put_subrequest(struct netfs_io_subrequest *subreq,
 			  bool was_async, enum netfs_sreq_ref_trace what);
-void netfs_stats_show(struct seq_file *);
 ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
 				struct iov_iter *new,
 				iov_iter_extraction_t extraction_flags);
+size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset,
+			size_t max_size, size_t max_segs);
+struct netfs_io_subrequest *netfs_create_write_request(
+	struct netfs_io_request *wreq, enum netfs_io_source dest,
+	loff_t start, size_t len, work_func_t worker);
+void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
+				       bool was_async);
+void netfs_queue_write_request(struct netfs_io_subrequest *subreq);
+
+int netfs_start_io_read(struct inode *inode);
+void netfs_end_io_read(struct inode *inode);
+int netfs_start_io_write(struct inode *inode);
+void netfs_end_io_write(struct inode *inode);
+int netfs_start_io_direct(struct inode *inode);
+void netfs_end_io_direct(struct inode *inode);
 
 /**
  * netfs_inode - Get the netfs inode context from the inode
@@ -317,30 +456,44 @@ static inline struct netfs_inode *netfs_inode(struct inode *inode)
  * netfs_inode_init - Initialise a netfslib inode context
  * @ctx: The netfs inode to initialise
  * @ops: The netfs's operations list
+ * @use_zero_point: True to use the zero_point read optimisation
  *
  * Initialise the netfs library context struct.  This is expected to follow on
  * directly from the VFS inode struct.
  */
 static inline void netfs_inode_init(struct netfs_inode *ctx,
-				    const struct netfs_request_ops *ops)
+				    const struct netfs_request_ops *ops,
+				    bool use_zero_point)
 {
 	ctx->ops = ops;
 	ctx->remote_i_size = i_size_read(&ctx->inode);
+	ctx->zero_point = LLONG_MAX;
+	ctx->flags = 0;
 #if IS_ENABLED(CONFIG_FSCACHE)
 	ctx->cache = NULL;
 #endif
+	/* ->releasepage() drives zero_point */
+	if (use_zero_point) {
+		ctx->zero_point = ctx->remote_i_size;
+		mapping_set_release_always(ctx->inode.i_mapping);
+	}
 }
 
 /**
  * netfs_resize_file - Note that a file got resized
  * @ctx: The netfs inode being resized
  * @new_i_size: The new file size
+ * @changed_on_server: The change was applied to the server
  *
  * Inform the netfs lib that a file got resized so that it can adjust its state.
  */
-static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size)
+static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size,
+				     bool changed_on_server)
 {
-	ctx->remote_i_size = new_i_size;
+	if (changed_on_server)
+		ctx->remote_i_size = new_i_size;
+	if (new_i_size < ctx->zero_point)
+		ctx->zero_point = new_i_size;
 }
 
 /**
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 44325c068b6a..462c21e0e417 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -20,7 +20,6 @@
 #define NVMF_TRSVCID_SIZE	32
 #define NVMF_TRADDR_SIZE	256
 #define NVMF_TSAS_SIZE		256
-#define NVMF_AUTH_HASH_LEN	64
 
 #define NVME_DISC_SUBSYS_NAME	"nqn.2014-08.org.nvmexpress.discovery"
 
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index a72661e47faa..9042bca5bb84 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -2,10 +2,7 @@
 #ifndef _LINUX_OF_DEVICE_H
 #define _LINUX_OF_DEVICE_H
 
-#include <linux/platform_device.h>
-#include <linux/of_platform.h> /* temporary until merge */
-
-#include <linux/of.h>
+#include <linux/device/driver.h>
 
 struct device;
 struct of_device_id;
diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index fadfea575485..a2ff1ad48f7f 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -7,11 +7,11 @@
  */
 
 #include <linux/mod_devicetable.h>
-#include <linux/of_device.h>
-#include <linux/platform_device.h>
 
 struct device;
+struct device_node;
 struct of_device_id;
+struct platform_device;
 
 /**
  * struct of_dev_auxdata - lookup table entry for device names & platform_data
diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index 7c8d65414a70..7d8025fb74b7 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -83,5 +83,6 @@ struct bq27xxx_device_info {
 void bq27xxx_battery_update(struct bq27xxx_device_info *di);
 int bq27xxx_battery_setup(struct bq27xxx_device_info *di);
 void bq27xxx_battery_teardown(struct bq27xxx_device_info *di);
+extern const struct dev_pm_ops bq27xxx_battery_battery_pm_ops;
 
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index cdb8ea53c365..ffe8f618ab86 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -920,7 +920,7 @@ struct task_struct {
 	unsigned			sched_rt_mutex:1;
 #endif
 
-	/* Bit to tell LSMs we're in execve(): */
+	/* Bit to tell TOMOYO we're in execve(): */
 	unsigned			in_execve:1;
 	unsigned			in_iowait:1;
 #ifndef TIF_RESTORE_SIGMASK
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 888a4b217829..e65ec3fd2799 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -505,12 +505,6 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
 	return !!psock->saved_data_ready;
 }
 
-static inline bool sk_is_udp(const struct sock *sk)
-{
-	return sk->sk_type == SOCK_DGRAM &&
-	       sk->sk_protocol == IPPROTO_UDP;
-}
-
 #if IS_ENABLED(CONFIG_NET_SOCK_MSG)
 
 #define BPF_F_STRPARSER	(1UL << 1)
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index eaac8b0da25b..3fcd20de6ca8 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -449,6 +449,12 @@ static __always_inline int spin_is_contended(spinlock_t *lock)
 	return raw_spin_is_contended(&lock->rlock);
 }
 
+#define assert_spin_locked(lock)	assert_raw_spin_locked(&(lock)->rlock)
+
+#else  /* !CONFIG_PREEMPT_RT */
+# include <linux/spinlock_rt.h>
+#endif /* CONFIG_PREEMPT_RT */
+
 /*
  * Does a critical section need to be broken due to another
  * task waiting?: (technically does not depend on CONFIG_PREEMPTION,
@@ -480,12 +486,6 @@ static inline int rwlock_needbreak(rwlock_t *lock)
 #endif
 }
 
-#define assert_spin_locked(lock)	assert_raw_spin_locked(&(lock)->rlock)
-
-#else  /* !CONFIG_PREEMPT_RT */
-# include <linux/spinlock_rt.h>
-#endif /* CONFIG_PREEMPT_RT */
-
 /*
  * Pull the atomic_t declaration:
  * (asm-mips/atomic.h needs above definitions)
diff --git a/include/linux/string.h b/include/linux/string.h
index ce137830a0b9..ab148d8dbfc1 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -66,9 +66,6 @@ extern char * strcpy(char *,const char *);
 #ifndef __HAVE_ARCH_STRNCPY
 extern char * strncpy(char *,const char *, __kernel_size_t);
 #endif
-#ifndef __HAVE_ARCH_STRLCPY
-size_t strlcpy(char *, const char *, size_t);
-#endif
 #ifndef __HAVE_ARCH_STRSCPY
 ssize_t strscpy(char *, const char *, size_t);
 #endif
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 6d0a14f7019d..453736fd1d23 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -60,7 +60,7 @@ struct writeback_control {
 	unsigned for_reclaim:1;		/* Invoked from the page allocator */
 	unsigned range_cyclic:1;	/* range_start is cyclic */
 	unsigned for_sync:1;		/* sync(2) WB_SYNC_ALL writeback */
-	unsigned unpinned_fscache_wb:1;	/* Cleared I_PINNING_FSCACHE_WB */
+	unsigned unpinned_netfs_wb:1;	/* Cleared I_PINNING_NETFS_WB */
 
 	/*
 	 * When writeback IOs are bounced through async layers, only the
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index d0a2f827d5f2..9ab4bf704e86 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -357,4 +357,12 @@ static inline bool inet_csk_has_ulp(const struct sock *sk)
 	return inet_test_bit(IS_ICSK, sk) && !!inet_csk(sk)->icsk_ulp_ops;
 }
 
+static inline void inet_init_csk_locks(struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	spin_lock_init(&icsk->icsk_accept_queue.rskq_lock);
+	spin_lock_init(&icsk->icsk_accept_queue.fastopenq.lock);
+}
+
 #endif /* _INET_CONNECTION_SOCK_H */
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index aa86453f6b9b..d94c242eb3ed 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -307,11 +307,6 @@ static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet)
 #define inet_assign_bit(nr, sk, val)		\
 	assign_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags, val)
 
-static inline bool sk_is_inet(struct sock *sk)
-{
-	return sk->sk_family == AF_INET || sk->sk_family == AF_INET6;
-}
-
 /**
  * sk_to_full_sk - Access to a full socket
  * @sk: pointer to a socket
diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h
index 7e73f8e5e497..1d55ba7c45be 100644
--- a/include/net/llc_pdu.h
+++ b/include/net/llc_pdu.h
@@ -262,8 +262,7 @@ static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type,
  */
 static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa)
 {
-	if (skb->protocol == htons(ETH_P_802_2))
-		memcpy(sa, eth_hdr(skb)->h_source, ETH_ALEN);
+	memcpy(sa, eth_hdr(skb)->h_source, ETH_ALEN);
 }
 
 /**
@@ -275,8 +274,7 @@ static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa)
  */
 static inline void llc_pdu_decode_da(struct sk_buff *skb, u8 *da)
 {
-	if (skb->protocol == htons(ETH_P_802_2))
-		memcpy(da, eth_hdr(skb)->h_dest, ETH_ALEN);
+	memcpy(da, eth_hdr(skb)->h_dest, ETH_ALEN);
 }
 
 /**
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index b157c5cafd14..4e1ea18eb5f0 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -205,6 +205,7 @@ static inline void nft_data_copy(u32 *dst, const struct nft_data *src,
  *	@nla: netlink attributes
  *	@portid: netlink portID of the original message
  *	@seq: netlink sequence number
+ *	@flags: modifiers to new request
  *	@family: protocol family
  *	@level: depth of the chains
  *	@report: notify via unicast netlink message
@@ -282,6 +283,7 @@ struct nft_elem_priv { };
  *
  *	@key: element key
  *	@key_end: closing element key
+ *	@data: element data
  *	@priv: element private data and extensions
  */
 struct nft_set_elem {
@@ -325,10 +327,10 @@ struct nft_set_iter {
  *	@dtype: data type
  *	@dlen: data length
  *	@objtype: object type
- *	@flags: flags
  *	@size: number of set elements
  *	@policy: set policy
  *	@gc_int: garbage collector interval
+ *	@timeout: element timeout
  *	@field_len: length of each field in concatenation, bytes
  *	@field_count: number of concatenated fields in element
  *	@expr: set must support for expressions
@@ -351,9 +353,9 @@ struct nft_set_desc {
 /**
  *	enum nft_set_class - performance class
  *
- *	@NFT_LOOKUP_O_1: constant, O(1)
- *	@NFT_LOOKUP_O_LOG_N: logarithmic, O(log N)
- *	@NFT_LOOKUP_O_N: linear, O(N)
+ *	@NFT_SET_CLASS_O_1: constant, O(1)
+ *	@NFT_SET_CLASS_O_LOG_N: logarithmic, O(log N)
+ *	@NFT_SET_CLASS_O_N: linear, O(N)
  */
 enum nft_set_class {
 	NFT_SET_CLASS_O_1,
@@ -422,9 +424,13 @@ struct nft_set_ext;
  *	@remove: remove element from set
  *	@walk: iterate over all set elements
  *	@get: get set elements
+ *	@commit: commit set elements
+ *	@abort: abort set elements
  *	@privsize: function to return size of set private data
+ *	@estimate: estimate the required memory size and the lookup complexity class
  *	@init: initialize private data of new set instance
  *	@destroy: destroy private data of set instance
+ *	@gc_init: initialize garbage collection
  *	@elemsize: element private size
  *
  *	Operations lookup, update and delete have simpler interfaces, are faster
@@ -540,13 +546,16 @@ struct nft_set_elem_expr {
  *	@policy: set parameterization (see enum nft_set_policies)
  *	@udlen: user data length
  *	@udata: user data
- *	@expr: stateful expression
+ *	@pending_update: list of pending update set element
  * 	@ops: set ops
  * 	@flags: set flags
  *	@dead: set will be freed, never cleared
  *	@genmask: generation mask
  * 	@klen: key length
  * 	@dlen: data length
+ *	@num_exprs: numbers of exprs
+ *	@exprs: stateful expression
+ *	@catchall_list: list of catch-all set element
  * 	@data: private set data
  */
 struct nft_set {
@@ -692,6 +701,7 @@ extern const struct nft_set_ext_type nft_set_ext_types[];
  *
  *	@len: length of extension area
  *	@offset: offsets of individual extension types
+ *	@ext_len: length of the expected extension(used to sanity check)
  */
 struct nft_set_ext_tmpl {
 	u16	len;
@@ -840,6 +850,7 @@ struct nft_expr_ops;
  *	@select_ops: function to select nft_expr_ops
  *	@release_ops: release nft_expr_ops
  *	@ops: default ops, used when no select_ops functions is present
+ *	@inner_ops: inner ops, used for inner packet operation
  *	@list: used internally
  *	@name: Identifier
  *	@owner: module reference
@@ -881,14 +892,22 @@ struct nft_offload_ctx;
  *	struct nft_expr_ops - nf_tables expression operations
  *
  *	@eval: Expression evaluation function
+ *	@clone: Expression clone function
  *	@size: full expression size, including private data size
  *	@init: initialization function
  *	@activate: activate expression in the next generation
  *	@deactivate: deactivate expression in next generation
  *	@destroy: destruction function, called after synchronize_rcu
+ *	@destroy_clone: destruction clone function
  *	@dump: function to dump parameters
- *	@type: expression type
  *	@validate: validate expression, called during loop detection
+ *	@reduce: reduce expression
+ *	@gc: garbage collection expression
+ *	@offload: hardware offload expression
+ *	@offload_action: function to report true/false to allocate one slot or not in the flow
+ *			 offload array
+ *	@offload_stats: function to synchronize hardware stats via updating the counter expression
+ *	@type: expression type
  *	@data: extra data to attach to this expression operation
  */
 struct nft_expr_ops {
@@ -1041,14 +1060,21 @@ struct nft_rule_blob {
 /**
  *	struct nft_chain - nf_tables chain
  *
+ *	@blob_gen_0: rule blob pointer to the current generation
+ *	@blob_gen_1: rule blob pointer to the future generation
  *	@rules: list of rules in the chain
  *	@list: used internally
  *	@rhlhead: used internally
  *	@table: table that this chain belongs to
  *	@handle: chain handle
  *	@use: number of jump references to this chain
- *	@flags: bitmask of enum nft_chain_flags
+ *	@flags: bitmask of enum NFTA_CHAIN_FLAGS
+ *	@bound: bind or not
+ *	@genmask: generation mask
  *	@name: name of the chain
+ *	@udlen: user data length
+ *	@udata: user data in the chain
+ *	@blob_next: rule blob pointer to the next in the chain
  */
 struct nft_chain {
 	struct nft_rule_blob		__rcu *blob_gen_0;
@@ -1146,6 +1172,7 @@ struct nft_hook {
  *	@hook_list: list of netfilter hooks (for NFPROTO_NETDEV family)
  *	@type: chain type
  *	@policy: default policy
+ *	@flags: indicate the base chain disabled or not
  *	@stats: per-cpu chain stats
  *	@chain: the chain
  *	@flow_block: flow block (for hardware offload)
@@ -1274,11 +1301,13 @@ struct nft_object_hash_key {
  *	struct nft_object - nf_tables stateful object
  *
  *	@list: table stateful object list node
- *	@key:  keys that identify this object
  *	@rhlhead: nft_objname_ht node
+ *	@key: keys that identify this object
  *	@genmask: generation mask
  *	@use: number of references to this stateful object
  *	@handle: unique object handle
+ *	@udlen: length of user data
+ *	@udata: user data
  *	@ops: object operations
  *	@data: object data, layout depends on type
  */
@@ -1344,6 +1373,7 @@ struct nft_object_type {
  *	@destroy: release existing stateful object
  *	@dump: netlink dump stateful object
  *	@update: update stateful object
+ *	@type: pointer to object type
  */
 struct nft_object_ops {
 	void				(*eval)(struct nft_object *obj,
@@ -1379,9 +1409,8 @@ void nft_unregister_obj(struct nft_object_type *obj_type);
  *	@genmask: generation mask
  *	@use: number of references to this flow table
  * 	@handle: unique object handle
- *	@dev_name: array of device names
+ *	@hook_list: hook list for hooks per net_device in flowtables
  *	@data: rhashtable and garbage collector
- * 	@ops: array of hooks
  */
 struct nft_flowtable {
 	struct list_head		list;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index ba3e1b315de8..934fdb977551 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -375,6 +375,10 @@ struct tcf_proto_ops {
 						struct nlattr **tca,
 						struct netlink_ext_ack *extack);
 	void			(*tmplt_destroy)(void *tmplt_priv);
+	void			(*tmplt_reoffload)(struct tcf_chain *chain,
+						   bool add,
+						   flow_setup_cb_t *cb,
+						   void *cb_priv);
 	struct tcf_exts *	(*get_exts)(const struct tcf_proto *tp,
 					    u32 handle);
 
diff --git a/include/net/sock.h b/include/net/sock.h
index a7f815c7cfdf..54ca8dcbfb43 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2765,9 +2765,25 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags)
 			   &skb_shinfo(skb)->tskey);
 }
 
+static inline bool sk_is_inet(const struct sock *sk)
+{
+	int family = READ_ONCE(sk->sk_family);
+
+	return family == AF_INET || family == AF_INET6;
+}
+
 static inline bool sk_is_tcp(const struct sock *sk)
 {
-	return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP;
+	return sk_is_inet(sk) &&
+	       sk->sk_type == SOCK_STREAM &&
+	       sk->sk_protocol == IPPROTO_TCP;
+}
+
+static inline bool sk_is_udp(const struct sock *sk)
+{
+	return sk_is_inet(sk) &&
+	       sk->sk_type == SOCK_DGRAM &&
+	       sk->sk_protocol == IPPROTO_UDP;
 }
 
 static inline bool sk_is_stream_unix(const struct sock *sk)
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 526c1e7f505e..c9aec9ab6191 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -159,11 +159,29 @@ static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
 	return ret;
 }
 
+static inline void xsk_buff_del_tail(struct xdp_buff *tail)
+{
+	struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp);
+
+	list_del(&xskb->xskb_list_node);
+}
+
+static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
+{
+	struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
+	struct xdp_buff_xsk *frag;
+
+	frag = list_last_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk,
+			       xskb_list_node);
+	return &frag->xdp;
+}
+
 static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
 {
 	xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM;
 	xdp->data_meta = xdp->data;
 	xdp->data_end = xdp->data + size;
+	xdp->flags = 0;
 }
 
 static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool,
@@ -350,6 +368,15 @@ static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
 	return NULL;
 }
 
+static inline void xsk_buff_del_tail(struct xdp_buff *tail)
+{
+}
+
+static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
+{
+	return NULL;
+}
+
 static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
 {
 }
diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h
index 0a86ab8d47b9..b00d65417c31 100644
--- a/include/sound/tas2781.h
+++ b/include/sound/tas2781.h
@@ -1,13 +1,13 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 //
-// ALSA SoC Texas Instruments TAS2781 Audio Smart Amplifier
+// ALSA SoC Texas Instruments TAS2563/TAS2781 Audio Smart Amplifier
 //
 // Copyright (C) 2022 - 2023 Texas Instruments Incorporated
 // https://www.ti.com
 //
-// The TAS2781 driver implements a flexible and configurable
+// The TAS2563/TAS2781 driver implements a flexible and configurable
 // algo coefficient setting for one, two, or even multiple
-// TAS2781 chips.
+// TAS2563/TAS2781 chips.
 //
 // Author: Shenghao Ding <shenghao-ding@ti.com>
 // Author: Kevin Lu <kevin-lu@ti.com>
@@ -60,7 +60,8 @@
 #define TASDEVICE_CMD_FIELD_W		0x4
 
 enum audio_device {
-	TAS2781	= 0,
+	TAS2563,
+	TAS2781,
 };
 
 enum device_catlog_id {
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 5194b7e6dc8d..08f2c93d6b16 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -902,37 +902,6 @@ TRACE_EVENT(afs_dir_check_failed,
 		      __entry->vnode, __entry->off, __entry->i_size)
 	    );
 
-TRACE_EVENT(afs_folio_dirty,
-	    TP_PROTO(struct afs_vnode *vnode, const char *where, struct folio *folio),
-
-	    TP_ARGS(vnode, where, folio),
-
-	    TP_STRUCT__entry(
-		    __field(struct afs_vnode *,		vnode)
-		    __field(const char *,		where)
-		    __field(pgoff_t,			index)
-		    __field(unsigned long,		from)
-		    __field(unsigned long,		to)
-			     ),
-
-	    TP_fast_assign(
-		    unsigned long priv = (unsigned long)folio_get_private(folio);
-		    __entry->vnode = vnode;
-		    __entry->where = where;
-		    __entry->index = folio_index(folio);
-		    __entry->from  = afs_folio_dirty_from(folio, priv);
-		    __entry->to    = afs_folio_dirty_to(folio, priv);
-		    __entry->to   |= (afs_is_folio_dirty_mmapped(priv) ?
-				      (1UL << (BITS_PER_LONG - 1)) : 0);
-			   ),
-
-	    TP_printk("vn=%p %lx %s %lx-%lx%s",
-		      __entry->vnode, __entry->index, __entry->where,
-		      __entry->from,
-		      __entry->to & ~(1UL << (BITS_PER_LONG - 1)),
-		      __entry->to & (1UL << (BITS_PER_LONG - 1)) ? " M" : "")
-	    );
-
 TRACE_EVENT(afs_call_state,
 	    TP_PROTO(struct afs_call *call,
 		     enum afs_call_state from,
@@ -1102,6 +1071,31 @@ TRACE_EVENT(afs_file_error,
 		      __print_symbolic(__entry->where, afs_file_errors))
 	    );
 
+TRACE_EVENT(afs_bulkstat_error,
+	    TP_PROTO(struct afs_operation *op, struct afs_fid *fid, unsigned int index, s32 abort),
+
+	    TP_ARGS(op, fid, index, abort),
+
+	    TP_STRUCT__entry(
+		    __field_struct(struct afs_fid,	fid)
+		    __field(unsigned int,		op)
+		    __field(unsigned int,		index)
+		    __field(s32,			abort)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->op = op->debug_id;
+		    __entry->fid = *fid;
+		    __entry->index = index;
+		    __entry->abort = abort;
+			   ),
+
+	    TP_printk("OP=%08x[%02x] %llx:%llx:%x a=%d",
+		      __entry->op, __entry->index,
+		      __entry->fid.vid, __entry->fid.vnode, __entry->fid.unique,
+		      __entry->abort)
+	    );
+
 TRACE_EVENT(afs_cm_no_server,
 	    TP_PROTO(struct afs_call *call, struct sockaddr_rxrpc *srx),
 
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index beec534cbaab..447a8c21cf57 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -16,34 +16,57 @@
  * Define enums for tracing information.
  */
 #define netfs_read_traces					\
+	EM(netfs_read_trace_dio_read,		"DIO-READ ")	\
 	EM(netfs_read_trace_expanded,		"EXPANDED ")	\
 	EM(netfs_read_trace_readahead,		"READAHEAD")	\
 	EM(netfs_read_trace_readpage,		"READPAGE ")	\
+	EM(netfs_read_trace_prefetch_for_write,	"PREFETCHW")	\
 	E_(netfs_read_trace_write_begin,	"WRITEBEGN")
 
+#define netfs_write_traces					\
+	EM(netfs_write_trace_dio_write,		"DIO-WRITE")	\
+	EM(netfs_write_trace_launder,		"LAUNDER  ")	\
+	EM(netfs_write_trace_unbuffered_write,	"UNB-WRITE")	\
+	EM(netfs_write_trace_writeback,		"WRITEBACK")	\
+	E_(netfs_write_trace_writethrough,	"WRITETHRU")
+
 #define netfs_rreq_origins					\
 	EM(NETFS_READAHEAD,			"RA")		\
 	EM(NETFS_READPAGE,			"RP")		\
-	E_(NETFS_READ_FOR_WRITE,		"RW")
+	EM(NETFS_READ_FOR_WRITE,		"RW")		\
+	EM(NETFS_WRITEBACK,			"WB")		\
+	EM(NETFS_WRITETHROUGH,			"WT")		\
+	EM(NETFS_LAUNDER_WRITE,			"LW")		\
+	EM(NETFS_UNBUFFERED_WRITE,		"UW")		\
+	EM(NETFS_DIO_READ,			"DR")		\
+	E_(NETFS_DIO_WRITE,			"DW")
 
 #define netfs_rreq_traces					\
 	EM(netfs_rreq_trace_assess,		"ASSESS ")	\
 	EM(netfs_rreq_trace_copy,		"COPY   ")	\
 	EM(netfs_rreq_trace_done,		"DONE   ")	\
 	EM(netfs_rreq_trace_free,		"FREE   ")	\
+	EM(netfs_rreq_trace_redirty,		"REDIRTY")	\
 	EM(netfs_rreq_trace_resubmit,		"RESUBMT")	\
 	EM(netfs_rreq_trace_unlock,		"UNLOCK ")	\
-	E_(netfs_rreq_trace_unmark,		"UNMARK ")
+	EM(netfs_rreq_trace_unmark,		"UNMARK ")	\
+	EM(netfs_rreq_trace_wait_ip,		"WAIT-IP")	\
+	EM(netfs_rreq_trace_wake_ip,		"WAKE-IP")	\
+	E_(netfs_rreq_trace_write_done,		"WR-DONE")
 
 #define netfs_sreq_sources					\
 	EM(NETFS_FILL_WITH_ZEROES,		"ZERO")		\
 	EM(NETFS_DOWNLOAD_FROM_SERVER,		"DOWN")		\
 	EM(NETFS_READ_FROM_CACHE,		"READ")		\
-	E_(NETFS_INVALID_READ,			"INVL")		\
+	EM(NETFS_INVALID_READ,			"INVL")		\
+	EM(NETFS_UPLOAD_TO_SERVER,		"UPLD")		\
+	EM(NETFS_WRITE_TO_CACHE,		"WRIT")		\
+	E_(NETFS_INVALID_WRITE,			"INVL")
 
 #define netfs_sreq_traces					\
 	EM(netfs_sreq_trace_download_instead,	"RDOWN")	\
 	EM(netfs_sreq_trace_free,		"FREE ")	\
+	EM(netfs_sreq_trace_limited,		"LIMIT")	\
 	EM(netfs_sreq_trace_prepare,		"PREP ")	\
 	EM(netfs_sreq_trace_resubmit_short,	"SHORT")	\
 	EM(netfs_sreq_trace_submit,		"SUBMT")	\
@@ -55,19 +78,24 @@
 #define netfs_failures							\
 	EM(netfs_fail_check_write_begin,	"check-write-begin")	\
 	EM(netfs_fail_copy_to_cache,		"copy-to-cache")	\
+	EM(netfs_fail_dio_read_short,		"dio-read-short")	\
+	EM(netfs_fail_dio_read_zero,		"dio-read-zero")	\
 	EM(netfs_fail_read,			"read")			\
 	EM(netfs_fail_short_read,		"short-read")		\
-	E_(netfs_fail_prepare_write,		"prep-write")
+	EM(netfs_fail_prepare_write,		"prep-write")		\
+	E_(netfs_fail_write,			"write")
 
 #define netfs_rreq_ref_traces					\
-	EM(netfs_rreq_trace_get_hold,		"GET HOLD   ")	\
+	EM(netfs_rreq_trace_get_for_outstanding,"GET OUTSTND")	\
 	EM(netfs_rreq_trace_get_subreq,		"GET SUBREQ ")	\
 	EM(netfs_rreq_trace_put_complete,	"PUT COMPLT ")	\
 	EM(netfs_rreq_trace_put_discard,	"PUT DISCARD")	\
 	EM(netfs_rreq_trace_put_failed,		"PUT FAILED ")	\
-	EM(netfs_rreq_trace_put_hold,		"PUT HOLD   ")	\
+	EM(netfs_rreq_trace_put_no_submit,	"PUT NO-SUBM")	\
+	EM(netfs_rreq_trace_put_return,		"PUT RETURN ")	\
 	EM(netfs_rreq_trace_put_subreq,		"PUT SUBREQ ")	\
-	EM(netfs_rreq_trace_put_zero_len,	"PUT ZEROLEN")	\
+	EM(netfs_rreq_trace_put_work,		"PUT WORK   ")	\
+	EM(netfs_rreq_trace_see_work,		"SEE WORK   ")	\
 	E_(netfs_rreq_trace_new,		"NEW        ")
 
 #define netfs_sreq_ref_traces					\
@@ -76,11 +104,44 @@
 	EM(netfs_sreq_trace_get_short_read,	"GET SHORTRD")	\
 	EM(netfs_sreq_trace_new,		"NEW        ")	\
 	EM(netfs_sreq_trace_put_clear,		"PUT CLEAR  ")	\
+	EM(netfs_sreq_trace_put_discard,	"PUT DISCARD")	\
 	EM(netfs_sreq_trace_put_failed,		"PUT FAILED ")	\
 	EM(netfs_sreq_trace_put_merged,		"PUT MERGED ")	\
 	EM(netfs_sreq_trace_put_no_copy,	"PUT NO COPY")	\
+	EM(netfs_sreq_trace_put_wip,		"PUT WIP    ")	\
+	EM(netfs_sreq_trace_put_work,		"PUT WORK   ")	\
 	E_(netfs_sreq_trace_put_terminated,	"PUT TERM   ")
 
+#define netfs_folio_traces					\
+	/* The first few correspond to enum netfs_how_to_modify */	\
+	EM(netfs_folio_is_uptodate,		"mod-uptodate")	\
+	EM(netfs_just_prefetch,			"mod-prefetch")	\
+	EM(netfs_whole_folio_modify,		"mod-whole-f")	\
+	EM(netfs_modify_and_clear,		"mod-n-clear")	\
+	EM(netfs_streaming_write,		"mod-streamw")	\
+	EM(netfs_streaming_write_cont,		"mod-streamw+")	\
+	EM(netfs_flush_content,			"flush")	\
+	EM(netfs_streaming_filled_page,		"mod-streamw-f") \
+	EM(netfs_streaming_cont_filled_page,	"mod-streamw-f+") \
+	/* The rest are for writeback */			\
+	EM(netfs_folio_trace_clear,		"clear")	\
+	EM(netfs_folio_trace_clear_s,		"clear-s")	\
+	EM(netfs_folio_trace_clear_g,		"clear-g")	\
+	EM(netfs_folio_trace_copy_to_cache,	"copy")		\
+	EM(netfs_folio_trace_end_copy,		"end-copy")	\
+	EM(netfs_folio_trace_filled_gaps,	"filled-gaps")	\
+	EM(netfs_folio_trace_kill,		"kill")		\
+	EM(netfs_folio_trace_launder,		"launder")	\
+	EM(netfs_folio_trace_mkwrite,		"mkwrite")	\
+	EM(netfs_folio_trace_mkwrite_plus,	"mkwrite+")	\
+	EM(netfs_folio_trace_read_gaps,		"read-gaps")	\
+	EM(netfs_folio_trace_redirty,		"redirty")	\
+	EM(netfs_folio_trace_redirtied,		"redirtied")	\
+	EM(netfs_folio_trace_store,		"store")	\
+	EM(netfs_folio_trace_store_plus,	"store+")	\
+	EM(netfs_folio_trace_wthru,		"wthru")	\
+	E_(netfs_folio_trace_wthru_plus,	"wthru+")
+
 #ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY
 #define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY
 
@@ -90,11 +151,13 @@
 #define E_(a, b) a
 
 enum netfs_read_trace { netfs_read_traces } __mode(byte);
+enum netfs_write_trace { netfs_write_traces } __mode(byte);
 enum netfs_rreq_trace { netfs_rreq_traces } __mode(byte);
 enum netfs_sreq_trace { netfs_sreq_traces } __mode(byte);
 enum netfs_failure { netfs_failures } __mode(byte);
 enum netfs_rreq_ref_trace { netfs_rreq_ref_traces } __mode(byte);
 enum netfs_sreq_ref_trace { netfs_sreq_ref_traces } __mode(byte);
+enum netfs_folio_trace { netfs_folio_traces } __mode(byte);
 
 #endif
 
@@ -107,6 +170,7 @@ enum netfs_sreq_ref_trace { netfs_sreq_ref_traces } __mode(byte);
 #define E_(a, b) TRACE_DEFINE_ENUM(a);
 
 netfs_read_traces;
+netfs_write_traces;
 netfs_rreq_origins;
 netfs_rreq_traces;
 netfs_sreq_sources;
@@ -114,6 +178,7 @@ netfs_sreq_traces;
 netfs_failures;
 netfs_rreq_ref_traces;
 netfs_sreq_ref_traces;
+netfs_folio_traces;
 
 /*
  * Now redefine the EM() and E_() macros to map the enums to the strings that
@@ -314,6 +379,82 @@ TRACE_EVENT(netfs_sreq_ref,
 		      __entry->ref)
 	    );
 
+TRACE_EVENT(netfs_folio,
+	    TP_PROTO(struct folio *folio, enum netfs_folio_trace why),
+
+	    TP_ARGS(folio, why),
+
+	    TP_STRUCT__entry(
+		    __field(ino_t,			ino)
+		    __field(pgoff_t,			index)
+		    __field(unsigned int,		nr)
+		    __field(enum netfs_folio_trace,	why)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->ino = folio->mapping->host->i_ino;
+		    __entry->why = why;
+		    __entry->index = folio_index(folio);
+		    __entry->nr = folio_nr_pages(folio);
+			   ),
+
+	    TP_printk("i=%05lx ix=%05lx-%05lx %s",
+		      __entry->ino, __entry->index, __entry->index + __entry->nr - 1,
+		      __print_symbolic(__entry->why, netfs_folio_traces))
+	    );
+
+TRACE_EVENT(netfs_write_iter,
+	    TP_PROTO(const struct kiocb *iocb, const struct iov_iter *from),
+
+	    TP_ARGS(iocb, from),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned long long,		start		)
+		    __field(size_t,			len		)
+		    __field(unsigned int,		flags		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->start	= iocb->ki_pos;
+		    __entry->len	= iov_iter_count(from);
+		    __entry->flags	= iocb->ki_flags;
+			   ),
+
+	    TP_printk("WRITE-ITER s=%llx l=%zx f=%x",
+		      __entry->start, __entry->len, __entry->flags)
+	    );
+
+TRACE_EVENT(netfs_write,
+	    TP_PROTO(const struct netfs_io_request *wreq,
+		     enum netfs_write_trace what),
+
+	    TP_ARGS(wreq, what),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		wreq		)
+		    __field(unsigned int,		cookie		)
+		    __field(enum netfs_write_trace,	what		)
+		    __field(unsigned long long,		start		)
+		    __field(size_t,			len		)
+			     ),
+
+	    TP_fast_assign(
+		    struct netfs_inode *__ctx = netfs_inode(wreq->inode);
+		    struct fscache_cookie *__cookie = netfs_i_cookie(__ctx);
+		    __entry->wreq	= wreq->debug_id;
+		    __entry->cookie	= __cookie ? __cookie->debug_id : 0;
+		    __entry->what	= what;
+		    __entry->start	= wreq->start;
+		    __entry->len	= wreq->len;
+			   ),
+
+	    TP_printk("R=%08x %s c=%08x by=%llx-%llx",
+		      __entry->wreq,
+		      __print_symbolic(__entry->what, netfs_write_traces),
+		      __entry->cookie,
+		      __entry->start, __entry->start + __entry->len - 1)
+	    );
+
 #undef EM
 #undef E_
 #endif /* _TRACE_NETFS_H */
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 7c29d82db9ee..f8bc34a6bcfa 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -614,6 +614,9 @@ struct btrfs_ioctl_clone_range_args {
  */
 #define BTRFS_DEFRAG_RANGE_COMPRESS 1
 #define BTRFS_DEFRAG_RANGE_START_IO 2
+#define BTRFS_DEFRAG_RANGE_FLAGS_SUPP	(BTRFS_DEFRAG_RANGE_COMPRESS |		\
+					 BTRFS_DEFRAG_RANGE_START_IO)
+
 struct btrfs_ioctl_defrag_range_args {
 	/* start of the defrag operation */
 	__u64 start;
diff --git a/init/Kconfig b/init/Kconfig
index 8df18f3a9748..8d4e836e1b6b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -876,6 +876,18 @@ config CC_NO_ARRAY_BOUNDS
 	bool
 	default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC11_NO_ARRAY_BOUNDS
 
+# Currently, disable -Wstringop-overflow for GCC 11, globally.
+config GCC11_NO_STRINGOP_OVERFLOW
+	def_bool y
+
+config CC_NO_STRINGOP_OVERFLOW
+	bool
+	default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC_VERSION < 120000 && GCC11_NO_STRINGOP_OVERFLOW
+
+config CC_STRINGOP_OVERFLOW
+	bool
+	default y if CC_IS_GCC && !CC_NO_STRINGOP_OVERFLOW
+
 #
 # For architectures that know their GCC __int128 support is sound
 #
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 86761ec623f9..cd9a137ad6ce 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -137,6 +137,14 @@ struct io_defer_entry {
 #define IO_DISARM_MASK (REQ_F_ARM_LTIMEOUT | REQ_F_LINK_TIMEOUT | REQ_F_FAIL)
 #define IO_REQ_LINK_FLAGS (REQ_F_LINK | REQ_F_HARDLINK)
 
+/*
+ * No waiters. It's larger than any valid value of the tw counter
+ * so that tests against ->cq_wait_nr would fail and skip wake_up().
+ */
+#define IO_CQ_WAKE_INIT		(-1U)
+/* Forced wake up if there is a waiter regardless of ->cq_wait_nr */
+#define IO_CQ_WAKE_FORCE	(IO_CQ_WAKE_INIT >> 1)
+
 static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 					 struct task_struct *task,
 					 bool cancel_all);
@@ -303,6 +311,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 		goto err;
 
 	ctx->flags = p->flags;
+	atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT);
 	init_waitqueue_head(&ctx->sqo_sq_wait);
 	INIT_LIST_HEAD(&ctx->sqd_list);
 	INIT_LIST_HEAD(&ctx->cq_overflow_list);
@@ -1304,16 +1313,23 @@ static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 	unsigned nr_wait, nr_tw, nr_tw_prev;
-	struct llist_node *first;
+	struct llist_node *head;
+
+	/* See comment above IO_CQ_WAKE_INIT */
+	BUILD_BUG_ON(IO_CQ_WAKE_FORCE <= IORING_MAX_CQ_ENTRIES);
 
+	/*
+	 * We don't know how many reuqests is there in the link and whether
+	 * they can even be queued lazily, fall back to non-lazy.
+	 */
 	if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK))
 		flags &= ~IOU_F_TWQ_LAZY_WAKE;
 
-	first = READ_ONCE(ctx->work_llist.first);
+	head = READ_ONCE(ctx->work_llist.first);
 	do {
 		nr_tw_prev = 0;
-		if (first) {
-			struct io_kiocb *first_req = container_of(first,
+		if (head) {
+			struct io_kiocb *first_req = container_of(head,
 							struct io_kiocb,
 							io_task_work.node);
 			/*
@@ -1322,17 +1338,29 @@ static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
 			 */
 			nr_tw_prev = READ_ONCE(first_req->nr_tw);
 		}
+
+		/*
+		 * Theoretically, it can overflow, but that's fine as one of
+		 * previous adds should've tried to wake the task.
+		 */
 		nr_tw = nr_tw_prev + 1;
-		/* Large enough to fail the nr_wait comparison below */
 		if (!(flags & IOU_F_TWQ_LAZY_WAKE))
-			nr_tw = -1U;
+			nr_tw = IO_CQ_WAKE_FORCE;
 
 		req->nr_tw = nr_tw;
-		req->io_task_work.node.next = first;
-	} while (!try_cmpxchg(&ctx->work_llist.first, &first,
+		req->io_task_work.node.next = head;
+	} while (!try_cmpxchg(&ctx->work_llist.first, &head,
 			      &req->io_task_work.node));
 
-	if (!first) {
+	/*
+	 * cmpxchg implies a full barrier, which pairs with the barrier
+	 * in set_current_state() on the io_cqring_wait() side. It's used
+	 * to ensure that either we see updated ->cq_wait_nr, or waiters
+	 * going to sleep will observe the work added to the list, which
+	 * is similar to the wait/wawke task state sync.
+	 */
+
+	if (!head) {
 		if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
 			atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
 		if (ctx->has_evfd)
@@ -1340,14 +1368,12 @@ static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
 	}
 
 	nr_wait = atomic_read(&ctx->cq_wait_nr);
-	/* no one is waiting */
-	if (!nr_wait)
+	/* not enough or no one is waiting */
+	if (nr_tw < nr_wait)
 		return;
-	/* either not enough or the previous add has already woken it up */
-	if (nr_wait > nr_tw || nr_tw_prev >= nr_wait)
+	/* the previous add has already woken it up */
+	if (nr_tw_prev >= nr_wait)
 		return;
-	/* pairs with set_current_state() in io_cqring_wait() */
-	smp_mb__after_atomic();
 	wake_up_state(ctx->submitter_task, TASK_INTERRUPTIBLE);
 }
 
@@ -2000,9 +2026,10 @@ inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
 		goto out;
 	fd = array_index_nospec(fd, ctx->nr_user_files);
 	slot = io_fixed_file_slot(&ctx->file_table, fd);
-	file = io_slot_file(slot);
+	if (!req->rsrc_node)
+		__io_req_set_rsrc_node(req, ctx);
 	req->flags |= io_slot_flags(slot);
-	io_req_set_rsrc_node(req, ctx, 0);
+	file = io_slot_file(slot);
 out:
 	io_ring_submit_unlock(ctx, issue_flags);
 	return file;
@@ -2613,7 +2640,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 
 		ret = io_cqring_wait_schedule(ctx, &iowq);
 		__set_current_state(TASK_RUNNING);
-		atomic_set(&ctx->cq_wait_nr, 0);
+		atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT);
 
 		/*
 		 * Run task_work after scheduling and before io_should_wake().
diff --git a/io_uring/register.c b/io_uring/register.c
index 708dd1d89add..5e62c1208996 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/nospec.h>
+#include <linux/compat.h>
 #include <linux/io_uring.h>
 #include <linux/io_uring_types.h>
 
@@ -278,13 +279,14 @@ static __cold int io_register_iowq_aff(struct io_ring_ctx *ctx,
 	if (len > cpumask_size())
 		len = cpumask_size();
 
-	if (in_compat_syscall()) {
+#ifdef CONFIG_COMPAT
+	if (in_compat_syscall())
 		ret = compat_get_bitmap(cpumask_bits(new_mask),
 					(const compat_ulong_t __user *)arg,
 					len * 8 /* CHAR_BIT */);
-	} else {
+	else
+#endif
 		ret = copy_from_user(new_mask, arg, len);
-	}
 
 	if (ret) {
 		free_cpumask_var(new_mask);
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index 7238b9cfe33b..c6f199bbee28 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -102,17 +102,21 @@ static inline void io_charge_rsrc_node(struct io_ring_ctx *ctx,
 	node->refs++;
 }
 
+static inline void __io_req_set_rsrc_node(struct io_kiocb *req,
+					  struct io_ring_ctx *ctx)
+{
+	lockdep_assert_held(&ctx->uring_lock);
+	req->rsrc_node = ctx->rsrc_node;
+	io_charge_rsrc_node(ctx, ctx->rsrc_node);
+}
+
 static inline void io_req_set_rsrc_node(struct io_kiocb *req,
 					struct io_ring_ctx *ctx,
 					unsigned int issue_flags)
 {
 	if (!req->rsrc_node) {
 		io_ring_submit_lock(ctx, issue_flags);
-
-		lockdep_assert_held(&ctx->uring_lock);
-
-		req->rsrc_node = ctx->rsrc_node;
-		io_charge_rsrc_node(ctx, ctx->rsrc_node);
+		__io_req_set_rsrc_node(req, ctx);
 		io_ring_submit_unlock(ctx, issue_flags);
 	}
 }
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 0c856726b15d..118cc9f1cf16 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -168,27 +168,6 @@ void io_readv_writev_cleanup(struct io_kiocb *req)
 	kfree(io->free_iovec);
 }
 
-static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
-{
-	switch (ret) {
-	case -EIOCBQUEUED:
-		break;
-	case -ERESTARTSYS:
-	case -ERESTARTNOINTR:
-	case -ERESTARTNOHAND:
-	case -ERESTART_RESTARTBLOCK:
-		/*
-		 * We can't just restart the syscall, since previously
-		 * submitted sqes may already be in progress. Just fail this
-		 * IO with EINTR.
-		 */
-		ret = -EINTR;
-		fallthrough;
-	default:
-		kiocb->ki_complete(kiocb, ret);
-	}
-}
-
 static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req)
 {
 	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
@@ -371,6 +350,33 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
 	smp_store_release(&req->iopoll_completed, 1);
 }
 
+static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
+{
+	/* IO was queued async, completion will happen later */
+	if (ret == -EIOCBQUEUED)
+		return;
+
+	/* transform internal restart error codes */
+	if (unlikely(ret < 0)) {
+		switch (ret) {
+		case -ERESTARTSYS:
+		case -ERESTARTNOINTR:
+		case -ERESTARTNOHAND:
+		case -ERESTART_RESTARTBLOCK:
+			/*
+			 * We can't just restart the syscall, since previously
+			 * submitted sqes may already be in progress. Just fail
+			 * this IO with EINTR.
+			 */
+			ret = -EINTR;
+			break;
+		}
+	}
+
+	INDIRECT_CALL_2(kiocb->ki_complete, io_complete_rw_iopoll,
+			io_complete_rw, kiocb, ret);
+}
+
 static int kiocb_done(struct io_kiocb *req, ssize_t ret,
 		       unsigned int issue_flags)
 {
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 6b213c8252d6..d05066cb40b2 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1348,8 +1348,6 @@ do_full_getstr:
 		/* PROMPT can only be set if we have MEM_READ permission. */
 		snprintf(kdb_prompt_str, CMD_BUFLEN, kdbgetenv("PROMPT"),
 			 raw_smp_processor_id());
-		if (defcmd_in_progress)
-			strncat(kdb_prompt_str, "[defcmd]", CMD_BUFLEN);
 
 		/*
 		 * Fetch command from keyboard
diff --git a/kernel/fork.c b/kernel/fork.c
index 47ff3b35352e..0d944e92a43f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1748,6 +1748,7 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
 	if (clone_flags & CLONE_FS) {
 		/* tsk->fs is already what we want */
 		spin_lock(&fs->lock);
+		/* "users" and "in_exec" locked for check_unsafe_exec() */
 		if (fs->in_exec) {
 			spin_unlock(&fs->lock);
 			return -EAGAIN;
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1ae851777806..b2bccfd37c38 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1013,6 +1013,38 @@ static bool rcu_future_gp_cleanup(struct rcu_node *rnp)
 	return needmore;
 }
 
+static void swake_up_one_online_ipi(void *arg)
+{
+	struct swait_queue_head *wqh = arg;
+
+	swake_up_one(wqh);
+}
+
+static void swake_up_one_online(struct swait_queue_head *wqh)
+{
+	int cpu = get_cpu();
+
+	/*
+	 * If called from rcutree_report_cpu_starting(), wake up
+	 * is dangerous that late in the CPU-down hotplug process. The
+	 * scheduler might queue an ignored hrtimer. Defer the wake up
+	 * to an online CPU instead.
+	 */
+	if (unlikely(cpu_is_offline(cpu))) {
+		int target;
+
+		target = cpumask_any_and(housekeeping_cpumask(HK_TYPE_RCU),
+					 cpu_online_mask);
+
+		smp_call_function_single(target, swake_up_one_online_ipi,
+					 wqh, 0);
+		put_cpu();
+	} else {
+		put_cpu();
+		swake_up_one(wqh);
+	}
+}
+
 /*
  * Awaken the grace-period kthread.  Don't do a self-awaken (unless in an
  * interrupt or softirq handler, in which case we just might immediately
@@ -1037,7 +1069,7 @@ static void rcu_gp_kthread_wake(void)
 		return;
 	WRITE_ONCE(rcu_state.gp_wake_time, jiffies);
 	WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq));
-	swake_up_one(&rcu_state.gp_wq);
+	swake_up_one_online(&rcu_state.gp_wq);
 }
 
 /*
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 6d7cea5d591f..2ac440bc7e10 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -173,7 +173,6 @@ static bool sync_rcu_exp_done_unlocked(struct rcu_node *rnp)
 	return ret;
 }
 
-
 /*
  * Report the exit from RCU read-side critical section for the last task
  * that queued itself during or before the current expedited preemptible-RCU
@@ -201,7 +200,7 @@ static void __rcu_report_exp_rnp(struct rcu_node *rnp,
 			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 			if (wake) {
 				smp_mb(); /* EGP done before wake_up(). */
-				swake_up_one(&rcu_state.expedited_wq);
+				swake_up_one_online(&rcu_state.expedited_wq);
 			}
 			break;
 		}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index a17d26002831..d2501673028d 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1576,13 +1576,18 @@ void tick_setup_sched_timer(void)
 void tick_cancel_sched_timer(int cpu)
 {
 	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+	ktime_t idle_sleeptime, iowait_sleeptime;
 
 # ifdef CONFIG_HIGH_RES_TIMERS
 	if (ts->sched_timer.base)
 		hrtimer_cancel(&ts->sched_timer);
 # endif
 
+	idle_sleeptime = ts->idle_sleeptime;
+	iowait_sleeptime = ts->iowait_sleeptime;
 	memset(ts, 0, sizeof(*ts));
+	ts->idle_sleeptime = idle_sleeptime;
+	ts->iowait_sleeptime = iowait_sleeptime;
 }
 #endif
 
diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c
index c774e560f2f9..a4dcf0f24352 100644
--- a/kernel/trace/tracing_map.c
+++ b/kernel/trace/tracing_map.c
@@ -574,7 +574,12 @@ __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only)
 				}
 
 				memcpy(elt->key, key, map->key_size);
-				entry->val = elt;
+				/*
+				 * Ensure the initialization is visible and
+				 * publish the elt.
+				 */
+				smp_wmb();
+				WRITE_ONCE(entry->val, elt);
 				atomic64_inc(&map->hits);
 
 				return entry->val;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ba25129563ad..975a07f9f1cc 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -231,9 +231,10 @@ config DEBUG_INFO
 	  in the "Debug information" choice below, indicating that debug
 	  information will be generated for build targets.
 
-# Clang is known to generate .{s,u}leb128 with symbol deltas with DWARF5, which
-# some targets may not support: https://sourceware.org/bugzilla/show_bug.cgi?id=27215
-config AS_HAS_NON_CONST_LEB128
+# Clang generates .uleb128 with label differences for DWARF v5, a feature that
+# older binutils ports do not support when utilizing RISC-V style linker
+# relaxation: https://sourceware.org/bugzilla/show_bug.cgi?id=27215
+config AS_HAS_NON_CONST_ULEB128
 	def_bool $(as-instr,.uleb128 .Lexpr_end4 - .Lexpr_start3\n.Lexpr_start3:\n.Lexpr_end4:)
 
 choice
@@ -258,7 +259,7 @@ config DEBUG_INFO_NONE
 config DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT
 	bool "Rely on the toolchain's implicit default DWARF version"
 	select DEBUG_INFO
-	depends on !CC_IS_CLANG || AS_IS_LLVM || CLANG_VERSION < 140000 || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_LEB128)
+	depends on !CC_IS_CLANG || AS_IS_LLVM || CLANG_VERSION < 140000 || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_ULEB128)
 	help
 	  The implicit default version of DWARF debug info produced by a
 	  toolchain changes over time.
@@ -282,7 +283,8 @@ config DEBUG_INFO_DWARF4
 config DEBUG_INFO_DWARF5
 	bool "Generate DWARF Version 5 debuginfo"
 	select DEBUG_INFO
-	depends on !CC_IS_CLANG || AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_LEB128)
+	depends on !ARCH_HAS_BROKEN_DWARF5
+	depends on !CC_IS_CLANG || AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_ULEB128)
 	help
 	  Generate DWARF v5 debug info. Requires binutils 2.35.2, gcc 5.0+ (gcc
 	  5.0+ accepts the -gdwarf-5 flag but only had partial support for some
diff --git a/lib/checksum_kunit.c b/lib/checksum_kunit.c
index 0eed92b77ba3..225bb7701460 100644
--- a/lib/checksum_kunit.c
+++ b/lib/checksum_kunit.c
@@ -1,15 +1,21 @@
 // SPDX-License-Identifier: GPL-2.0+
 /*
- * Test cases csum_partial and csum_fold
+ * Test cases csum_partial, csum_fold, ip_fast_csum, csum_ipv6_magic
  */
 
 #include <kunit/test.h>
 #include <asm/checksum.h>
+#include <net/ip6_checksum.h>
 
 #define MAX_LEN 512
 #define MAX_ALIGN 64
 #define TEST_BUFLEN (MAX_LEN + MAX_ALIGN)
 
+#define IPv4_MIN_WORDS 5
+#define IPv4_MAX_WORDS 15
+#define NUM_IPv6_TESTS 200
+#define NUM_IP_FAST_CSUM_TESTS 181
+
 /* Values for a little endian CPU. Byte swap each half on big endian CPU. */
 static const u32 random_init_sum = 0x2847aab;
 static const u8 random_buf[] = {
@@ -209,6 +215,237 @@ static const u32 init_sums_no_overflow[] = {
 	0xffff0000, 0xfffffffb,
 };
 
+static const __sum16 expected_csum_ipv6_magic[] = {
+	0x18d4, 0x3085, 0x2e4b, 0xd9f4, 0xbdc8, 0x78f,	0x1034, 0x8422, 0x6fc0,
+	0xd2f6, 0xbeb5, 0x9d3,	0x7e2a, 0x312e, 0x778e, 0xc1bb, 0x7cf2, 0x9d1e,
+	0xca21, 0xf3ff, 0x7569, 0xb02e, 0xca86, 0x7e76, 0x4539, 0x45e3, 0xf28d,
+	0xdf81, 0x8fd5, 0x3b5d, 0x8324, 0xf471, 0x83be, 0x1daf, 0x8c46, 0xe682,
+	0xd1fb, 0x6b2e, 0xe687, 0x2a33, 0x4833, 0x2d67, 0x660f, 0x2e79, 0xd65e,
+	0x6b62, 0x6672, 0x5dbd, 0x8680, 0xbaa5, 0x2229, 0x2125, 0x2d01, 0x1cc0,
+	0x6d36, 0x33c0, 0xee36, 0xd832, 0x9820, 0x8a31, 0x53c5, 0x2e2,	0xdb0e,
+	0x49ed, 0x17a7, 0x77a0, 0xd72e, 0x3d72, 0x7dc8, 0x5b17, 0xf55d, 0xa4d9,
+	0x1446, 0x5d56, 0x6b2e, 0x69a5, 0xadb6, 0xff2a, 0x92e,	0xe044, 0x3402,
+	0xbb60, 0xec7f, 0xe7e6, 0x1986, 0x32f4, 0x8f8,	0x5e00, 0x47c6, 0x3059,
+	0x3969, 0xe957, 0x4388, 0x2854, 0x3334, 0xea71, 0xa6de, 0x33f9, 0x83fc,
+	0x37b4, 0x5531, 0x3404, 0x1010, 0xed30, 0x610a, 0xc95,	0x9aed, 0x6ff,
+	0x5136, 0x2741, 0x660e, 0x8b80, 0xf71,	0xa263, 0x88af, 0x7a73, 0x3c37,
+	0x1908, 0x6db5, 0x2e92, 0x1cd2, 0x70c8, 0xee16, 0xe80,	0xcd55, 0x6e6,
+	0x6434, 0x127,	0x655d, 0x2ea0, 0xb4f4, 0xdc20, 0x5671, 0xe462, 0xe52b,
+	0xdb44, 0x3589, 0xc48f, 0xe60b, 0xd2d2, 0x66ad, 0x498,	0x436,	0xb917,
+	0xf0ca, 0x1a6e, 0x1cb7, 0xbf61, 0x2870, 0xc7e8, 0x5b30, 0xe4a5, 0x168,
+	0xadfc, 0xd035, 0xe690, 0xe283, 0xfb27, 0xe4ad, 0xb1a5, 0xf2d5, 0xc4b6,
+	0x8a30, 0xd7d5, 0x7df9, 0x91d5, 0x63ed, 0x2d21, 0x312b, 0xab19, 0xa632,
+	0x8d2e, 0xef06, 0x57b9, 0xc373, 0xbd1f, 0xa41f, 0x8444, 0x9975, 0x90cb,
+	0xc49c, 0xe965, 0x4eff, 0x5a,	0xef6d, 0xe81a, 0xe260, 0x853a, 0xff7a,
+	0x99aa, 0xb06b, 0xee19, 0xcc2c, 0xf34c, 0x7c49, 0xdac3, 0xa71e, 0xc988,
+	0x3845, 0x1014
+};
+
+static const __sum16 expected_fast_csum[] = {
+	0xda83, 0x45da, 0x4f46, 0x4e4f, 0x34e,	0xe902, 0xa5e9, 0x87a5, 0x7187,
+	0x5671, 0xf556, 0x6df5, 0x816d, 0x8f81, 0xbb8f, 0xfbba, 0x5afb, 0xbe5a,
+	0xedbe, 0xabee, 0x6aac, 0xe6b,	0xea0d, 0x67ea, 0x7e68, 0x8a7e, 0x6f8a,
+	0x3a70, 0x9f3a, 0xe89e, 0x75e8, 0x7976, 0xfa79, 0x2cfa, 0x3c2c, 0x463c,
+	0x7146, 0x7a71, 0x547a, 0xfd53, 0x99fc, 0xb699, 0x92b6, 0xdb91, 0xe8da,
+	0x5fe9, 0x1e60, 0xae1d, 0x39ae, 0xf439, 0xa1f4, 0xdda1, 0xede,	0x790f,
+	0x579,	0x1206, 0x9012, 0x2490, 0xd224, 0x5cd2, 0xa65d, 0xca7,	0x220d,
+	0xf922, 0xbf9,	0x920b, 0x1b92, 0x361c, 0x2e36, 0x4d2e, 0x24d,	0x2,
+	0xcfff, 0x90cf, 0xa591, 0x93a5, 0x7993, 0x9579, 0xc894, 0x50c8, 0x5f50,
+	0xd55e, 0xcad5, 0xf3c9, 0x8f4,	0x4409, 0x5043, 0x5b50, 0x55b,	0x2205,
+	0x1e22, 0x801e, 0x3780, 0xe137, 0x7ee0, 0xf67d, 0x3cf6, 0xa53c, 0x2ea5,
+	0x472e, 0x5147, 0xcf51, 0x1bcf, 0x951c, 0x1e95, 0xc71e, 0xe4c7, 0xc3e4,
+	0x3dc3, 0xee3d, 0xa4ed, 0xf9a4, 0xcbf8, 0x75cb, 0xb375, 0x50b4, 0x3551,
+	0xf835, 0x19f8, 0x8c1a, 0x538c, 0xad52, 0xa3ac, 0xb0a3, 0x5cb0, 0x6c5c,
+	0x5b6c, 0xc05a, 0x92c0, 0x4792, 0xbe47, 0x53be, 0x1554, 0x5715, 0x4b57,
+	0xe54a, 0x20e5, 0x21,	0xd500, 0xa1d4, 0xa8a1, 0x57a9, 0xca57, 0x5ca,
+	0x1c06, 0x4f1c, 0xe24e, 0xd9e2, 0xf0d9, 0x4af1, 0x474b, 0x8146, 0xe81,
+	0xfd0e, 0x84fd, 0x7c85, 0xba7c, 0x17ba, 0x4a17, 0x964a, 0xf595, 0xff5,
+	0x5310, 0x3253, 0x6432, 0x4263, 0x2242, 0xe121, 0x32e1, 0xf632, 0xc5f5,
+	0x21c6, 0x7d22, 0x8e7c, 0x418e, 0x5641, 0x3156, 0x7c31, 0x737c, 0x373,
+	0x2503, 0xc22a, 0x3c2,	0x4a04, 0x8549, 0x5285, 0xa352, 0xe8a3, 0x6fe8,
+	0x1a6f, 0x211a, 0xe021, 0x38e0, 0x7638, 0xf575, 0x9df5, 0x169e, 0xf116,
+	0x23f1, 0xcd23, 0xece,	0x660f, 0x4866, 0x6a48, 0x716a, 0xee71, 0xa2ee,
+	0xb8a2, 0x61b9, 0xa361, 0xf7a2, 0x26f7, 0x1127, 0x6611, 0xe065, 0x36e0,
+	0x1837, 0x3018, 0x1c30, 0x721b, 0x3e71, 0xe43d, 0x99e4, 0x9e9a, 0xb79d,
+	0xa9b7, 0xcaa,	0xeb0c, 0x4eb,	0x1305, 0x8813, 0xb687, 0xa9b6, 0xfba9,
+	0xd7fb, 0xccd8, 0x2ecd, 0x652f, 0xae65, 0x3fae, 0x3a40, 0x563a, 0x7556,
+	0x2776, 0x1228, 0xef12, 0xf9ee, 0xcef9, 0x56cf, 0xa956, 0x24a9, 0xba24,
+	0x5fba, 0x665f, 0xf465, 0x8ff4, 0x6d8f, 0x346d, 0x5f34, 0x385f, 0xd137,
+	0xb8d0, 0xacb8, 0x55ac, 0x7455, 0xe874, 0x89e8, 0xd189, 0xa0d1, 0xb2a0,
+	0xb8b2, 0x36b8, 0x5636, 0xd355, 0x8d3,	0x1908, 0x2118, 0xc21,	0x990c,
+	0x8b99, 0x158c, 0x7815, 0x9e78, 0x6f9e, 0x4470, 0x1d44, 0x341d, 0x2634,
+	0x3f26, 0x793e, 0xc79,	0xcc0b, 0x26cc, 0xd126, 0x1fd1, 0xb41f, 0xb6b4,
+	0x22b7, 0xa122, 0xa1,	0x7f01, 0x837e, 0x3b83, 0xaf3b, 0x6fae, 0x916f,
+	0xb490, 0xffb3, 0xceff, 0x50cf, 0x7550, 0x7275, 0x1272, 0x2613, 0xaa26,
+	0xd5aa, 0x7d5,	0x9607, 0x96,	0xb100, 0xf8b0, 0x4bf8, 0xdd4c, 0xeddd,
+	0x98ed, 0x2599, 0x9325, 0xeb92, 0x8feb, 0xcc8f, 0x2acd, 0x392b, 0x3b39,
+	0xcb3b, 0x6acb, 0xd46a, 0xb8d4, 0x6ab8, 0x106a, 0x2f10, 0x892f, 0x789,
+	0xc806, 0x45c8, 0x7445, 0x3c74, 0x3a3c, 0xcf39, 0xd7ce, 0x58d8, 0x6e58,
+	0x336e, 0x1034, 0xee10, 0xe9ed, 0xc2e9, 0x3fc2, 0xd53e, 0xd2d4, 0xead2,
+	0x8fea, 0x2190, 0x1162, 0xbe11, 0x8cbe, 0x6d8c, 0xfb6c, 0x6dfb, 0xd36e,
+	0x3ad3, 0xf3a,	0x870e, 0xc287, 0x53c3, 0xc54,	0x5b0c, 0x7d5a, 0x797d,
+	0xec79, 0x5dec, 0x4d5e, 0x184e, 0xd618, 0x60d6, 0xb360, 0x98b3, 0xf298,
+	0xb1f2, 0x69b1, 0xf969, 0xef9,	0xab0e, 0x21ab, 0xe321, 0x24e3, 0x8224,
+	0x5481, 0x5954, 0x7a59, 0xff7a, 0x7dff, 0x1a7d, 0xa51a, 0x46a5, 0x6b47,
+	0xe6b,	0x830e, 0xa083, 0xff9f, 0xd0ff, 0xffd0, 0xe6ff, 0x7de7, 0xc67d,
+	0xd0c6, 0x61d1, 0x3a62, 0xc3b,	0x150c, 0x1715, 0x4517, 0x5345, 0x3954,
+	0xdd39, 0xdadd, 0x32db, 0x6a33, 0xd169, 0x86d1, 0xb687, 0x3fb6, 0x883f,
+	0xa487, 0x39a4, 0x2139, 0xbe20, 0xffbe, 0xedfe, 0x8ded, 0x368e, 0xc335,
+	0x51c3, 0x9851, 0xf297, 0xd6f2, 0xb9d6, 0x95ba, 0x2096, 0xea1f, 0x76e9,
+	0x4e76, 0xe04d, 0xd0df, 0x80d0, 0xa280, 0xfca2, 0x75fc, 0xef75, 0x32ef,
+	0x6833, 0xdf68, 0xc4df, 0x76c4, 0xb77,	0xb10a, 0xbfb1, 0x58bf, 0x5258,
+	0x4d52, 0x6c4d, 0x7e6c, 0xb67e, 0xccb5, 0x8ccc, 0xbe8c, 0xc8bd, 0x9ac8,
+	0xa99b, 0x52a9, 0x2f53, 0xc30,	0x3e0c, 0xb83d, 0x83b7, 0x5383, 0x7e53,
+	0x4f7e, 0xe24e, 0xb3e1, 0x8db3, 0x618e, 0xc861, 0xfcc8, 0x34fc, 0x9b35,
+	0xaa9b, 0xb1aa, 0x5eb1, 0x395e, 0x8639, 0xd486, 0x8bd4, 0x558b, 0x2156,
+	0xf721, 0x4ef6, 0x14f,	0x7301, 0xdd72, 0x49de, 0x894a, 0x9889, 0x8898,
+	0x7788, 0x7b77, 0x637b, 0xb963, 0xabb9, 0x7cab, 0xc87b, 0x21c8, 0xcb21,
+	0xdfca, 0xbfdf, 0xf2bf, 0x6af2, 0x626b, 0xb261, 0x3cb2, 0xc63c, 0xc9c6,
+	0xc9c9, 0xb4c9, 0xf9b4, 0x91f9, 0x4091, 0x3a40, 0xcc39, 0xd1cb, 0x7ed1,
+	0x537f, 0x6753, 0xa167, 0xba49, 0x88ba, 0x7789, 0x3877, 0xf037, 0xd3ef,
+	0xb5d4, 0x55b6, 0xa555, 0xeca4, 0xa1ec, 0xb6a2, 0x7b7,	0x9507, 0xfd94,
+	0x82fd, 0x5c83, 0x765c, 0x9676, 0x3f97, 0xda3f, 0x6fda, 0x646f, 0x3064,
+	0x5e30, 0x655e, 0x6465, 0xcb64, 0xcdca, 0x4ccd, 0x3f4c, 0x243f, 0x6f24,
+	0x656f, 0x6065, 0x3560, 0x3b36, 0xac3b, 0x4aac, 0x714a, 0x7e71, 0xda7e,
+	0x7fda, 0xda7f, 0x6fda, 0xff6f, 0xc6ff, 0xedc6, 0xd4ed, 0x70d5, 0xeb70,
+	0xa3eb, 0x80a3, 0xca80, 0x3fcb, 0x2540, 0xf825, 0x7ef8, 0xf87e, 0x73f8,
+	0xb474, 0xb4b4, 0x92b5, 0x9293, 0x93,	0x3500, 0x7134, 0x9071, 0xfa8f,
+	0x51fa, 0x1452, 0xba13, 0x7ab9, 0x957a, 0x8a95, 0x6e8a, 0x6d6e, 0x7c6d,
+	0x447c, 0x9744, 0x4597, 0x8945, 0xef88, 0x8fee, 0x3190, 0x4831, 0x8447,
+	0xa183, 0x1da1, 0xd41d, 0x2dd4, 0x4f2e, 0xc94e, 0xcbc9, 0xc9cb, 0x9ec9,
+	0x319e, 0xd531, 0x20d5, 0x4021, 0xb23f, 0x29b2, 0xd828, 0xecd8, 0x5ded,
+	0xfc5d, 0x4dfc, 0xd24d, 0x6bd2, 0x5f6b, 0xb35e, 0x7fb3, 0xee7e, 0x56ee,
+	0xa657, 0x68a6, 0x8768, 0x7787, 0xb077, 0x4cb1, 0x764c, 0xb175, 0x7b1,
+	0x3d07, 0x603d, 0x3560, 0x3e35, 0xb03d, 0xd6b0, 0xc8d6, 0xd8c8, 0x8bd8,
+	0x3e8c, 0x303f, 0xd530, 0xf1d4, 0x42f1, 0xca42, 0xddca, 0x41dd, 0x3141,
+	0x132,	0xe901, 0x8e9,	0xbe09, 0xe0bd, 0x2ce0, 0x862d, 0x3986, 0x9139,
+	0x6d91, 0x6a6d, 0x8d6a, 0x1b8d, 0xac1b, 0xedab, 0x54ed, 0xc054, 0xcebf,
+	0xc1ce, 0x5c2,	0x3805, 0x6038, 0x5960, 0xd359, 0xdd3,	0xbe0d, 0xafbd,
+	0x6daf, 0x206d, 0x2c20, 0x862c, 0x8e86, 0xec8d, 0xa2ec, 0xa3a2, 0x51a3,
+	0x8051, 0xfd7f, 0x91fd, 0xa292, 0xaf14, 0xeeae, 0x59ef, 0x535a, 0x8653,
+	0x3986, 0x9539, 0xb895, 0xa0b8, 0x26a0, 0x2227, 0xc022, 0x77c0, 0xad77,
+	0x46ad, 0xaa46, 0x60aa, 0x8560, 0x4785, 0xd747, 0x45d7, 0x2346, 0x5f23,
+	0x25f,	0x1d02, 0x71d,	0x8206, 0xc82,	0x180c, 0x3018, 0x4b30, 0x4b,
+	0x3001, 0x1230, 0x2d12, 0x8c2d, 0x148d, 0x4015, 0x5f3f, 0x3d5f, 0x6b3d,
+	0x396b, 0x473a, 0xf746, 0x44f7, 0x8945, 0x3489, 0xcb34, 0x84ca, 0xd984,
+	0xf0d9, 0xbcf0, 0x63bd, 0x3264, 0xf332, 0x45f3, 0x7346, 0x5673, 0xb056,
+	0xd3b0, 0x4ad4, 0x184b, 0x7d18, 0x6c7d, 0xbb6c, 0xfeba, 0xe0fe, 0x10e1,
+	0x5410, 0x2954, 0x9f28, 0x3a9f, 0x5a3a, 0xdb59, 0xbdc,	0xb40b, 0x1ab4,
+	0x131b, 0x5d12, 0x6d5c, 0xe16c, 0xb0e0, 0x89b0, 0xba88, 0xbb,	0x3c01,
+	0xe13b, 0x6fe1, 0x446f, 0xa344, 0x81a3, 0xfe81, 0xc7fd, 0x38c8, 0xb38,
+	0x1a0b, 0x6d19, 0xf36c, 0x47f3, 0x6d48, 0xb76d, 0xd3b7, 0xd8d2, 0x52d9,
+	0x4b53, 0xa54a, 0x34a5, 0xc534, 0x9bc4, 0xed9b, 0xbeed, 0x3ebe, 0x233e,
+	0x9f22, 0x4a9f, 0x774b, 0x4577, 0xa545, 0x64a5, 0xb65,	0x870b, 0x487,
+	0x9204, 0x5f91, 0xd55f, 0x35d5, 0x1a35, 0x71a,	0x7a07, 0x4e7a, 0xfc4e,
+	0x1efc, 0x481f, 0x7448, 0xde74, 0xa7dd, 0x1ea7, 0xaa1e, 0xcfaa, 0xfbcf,
+	0xedfb, 0x6eee, 0x386f, 0x4538, 0x6e45, 0xd96d, 0x11d9, 0x7912, 0x4b79,
+	0x494b, 0x6049, 0xac5f, 0x65ac, 0x1366, 0x5913, 0xe458, 0x7ae4, 0x387a,
+	0x3c38, 0xb03c, 0x76b0, 0x9376, 0xe193, 0x42e1, 0x7742, 0x6476, 0x3564,
+	0x3c35, 0x6a3c, 0xcc69, 0x94cc, 0x5d95, 0xe5e,	0xee0d, 0x4ced, 0xce4c,
+	0x52ce, 0xaa52, 0xdaaa, 0xe4da, 0x1de5, 0x4530, 0x5445, 0x3954, 0xb639,
+	0x81b6, 0x7381, 0x1574, 0xc215, 0x10c2, 0x3f10, 0x6b3f, 0xe76b, 0x7be7,
+	0xbc7b, 0xf7bb, 0x41f7, 0xcc41, 0x38cc, 0x4239, 0xa942, 0x4a9,	0xc504,
+	0x7cc4, 0x437c, 0x6743, 0xea67, 0x8dea, 0xe88d, 0xd8e8, 0xdcd8, 0x17dd,
+	0x5718, 0x958,	0xa609, 0x41a5, 0x5842, 0x159,	0x9f01, 0x269f, 0x5a26,
+	0x405a, 0xc340, 0xb4c3, 0xd4b4, 0xf4d3, 0xf1f4, 0x39f2, 0xe439, 0x67e4,
+	0x4168, 0xa441, 0xdda3, 0xdedd, 0x9df,	0xab0a, 0xa5ab, 0x9a6,	0xba09,
+	0x9ab9, 0xad9a, 0x5ae,	0xe205, 0xece2, 0xecec, 0x14ed, 0xd614, 0x6bd5,
+	0x916c, 0x3391, 0x6f33, 0x206f, 0x8020, 0x780,	0x7207, 0x2472, 0x8a23,
+	0xb689, 0x3ab6, 0xf739, 0x97f6, 0xb097, 0xa4b0, 0xe6a4, 0x88e6, 0x2789,
+	0xb28,	0x350b, 0x1f35, 0x431e, 0x1043, 0xc30f, 0x79c3, 0x379,	0x5703,
+	0x3256, 0x4732, 0x7247, 0x9d72, 0x489d, 0xd348, 0xa4d3, 0x7ca4, 0xbf7b,
+	0x45c0, 0x7b45, 0x337b, 0x4034, 0x843f, 0xd083, 0x35d0, 0x6335, 0x4d63,
+	0xe14c, 0xcce0, 0xfecc, 0x35ff, 0x5636, 0xf856, 0xeef8, 0x2def, 0xfc2d,
+	0x4fc,	0x6e04, 0xb66d, 0x78b6, 0xbb78, 0x3dbb, 0x9a3d, 0x839a, 0x9283,
+	0x593,	0xd504, 0x23d5, 0x5424, 0xd054, 0x61d0, 0xdb61, 0x17db, 0x1f18,
+	0x381f, 0x9e37, 0x679e, 0x1d68, 0x381d, 0x8038, 0x917f, 0x491,	0xbb04,
+	0x23bb, 0x4124, 0xd41,	0xa30c, 0x8ba3, 0x8b8b, 0xc68b, 0xd2c6, 0xebd2,
+	0x93eb, 0xbd93, 0x99bd, 0x1a99, 0xea19, 0x58ea, 0xcf58, 0x73cf, 0x1073,
+	0x9e10, 0x139e, 0xea13, 0xcde9, 0x3ecd, 0x883f, 0xf89,	0x180f, 0x2a18,
+	0x212a, 0xce20, 0x73ce, 0xf373, 0x60f3, 0xad60, 0x4093, 0x8e40, 0xb98e,
+	0xbfb9, 0xf1bf, 0x8bf1, 0x5e8c, 0xe95e, 0x14e9, 0x4e14, 0x1c4e, 0x7f1c,
+	0xe77e, 0x6fe7, 0xf26f, 0x13f2, 0x8b13, 0xda8a, 0x5fda, 0xea5f, 0x4eea,
+	0xa84f, 0x88a8, 0x1f88, 0x2820, 0x9728, 0x5a97, 0x3f5b, 0xb23f, 0x70b2,
+	0x2c70, 0x232d, 0xf623, 0x4f6,	0x905,	0x7509, 0xd675, 0x28d7, 0x9428,
+	0x3794, 0xf036, 0x2bf0, 0xba2c, 0xedb9, 0xd7ed, 0x59d8, 0xed59, 0x4ed,
+	0xe304, 0x18e3, 0x5c19, 0x3d5c, 0x753d, 0x6d75, 0x956d, 0x7f95, 0xc47f,
+	0x83c4, 0xa84,	0x2e0a, 0x5f2e, 0xb95f, 0x77b9, 0x6d78, 0xf46d, 0x1bf4,
+	0xed1b, 0xd6ed, 0xe0d6, 0x5e1,	0x3905, 0x5638, 0xa355, 0x99a2, 0xbe99,
+	0xb4bd, 0x85b4, 0x2e86, 0x542e, 0x6654, 0xd765, 0x73d7, 0x3a74, 0x383a,
+	0x2638, 0x7826, 0x7677, 0x9a76, 0x7e99, 0x2e7e, 0xea2d, 0xa6ea, 0x8a7,
+	0x109,	0x3300, 0xad32, 0x5fad, 0x465f, 0x2f46, 0xc62f, 0xd4c5, 0xad5,
+	0xcb0a, 0x4cb,	0xb004, 0x7baf, 0xe47b, 0x92e4, 0x8e92, 0x638e, 0x1763,
+	0xc17,	0xf20b, 0x1ff2, 0x8920, 0x5889, 0xcb58, 0xf8cb, 0xcaf8, 0x84cb,
+	0x9f84, 0x8a9f, 0x918a, 0x4991, 0x8249, 0xff81, 0x46ff, 0x5046, 0x5f50,
+	0x725f, 0xf772, 0x8ef7, 0xe08f, 0xc1e0, 0x1fc2, 0x9e1f, 0x8b9d, 0x108b,
+	0x411,	0x2b04, 0xb02a, 0x1fb0, 0x1020, 0x7a0f, 0x587a, 0x8958, 0xb188,
+	0xb1b1, 0x49b2, 0xb949, 0x7ab9, 0x917a, 0xfc91, 0xe6fc, 0x47e7, 0xbc47,
+	0x8fbb, 0xea8e, 0x34ea, 0x2635, 0x1726, 0x9616, 0xc196, 0xa6c1, 0xf3a6,
+	0x11f3, 0x4811, 0x3e48, 0xeb3e, 0xf7ea, 0x1bf8, 0xdb1c, 0x8adb, 0xe18a,
+	0x42e1, 0x9d42, 0x5d9c, 0x6e5d, 0x286e, 0x4928, 0x9a49, 0xb09c, 0xa6b0,
+	0x2a7,	0xe702, 0xf5e6, 0x9af5, 0xf9b,	0x810f, 0x8080, 0x180,	0x1702,
+	0x5117, 0xa650, 0x11a6, 0x1011, 0x550f, 0xd554, 0xbdd5, 0x6bbe, 0xc66b,
+	0xfc7,	0x5510, 0x5555, 0x7655, 0x177,	0x2b02, 0x6f2a, 0xb70,	0x9f0b,
+	0xcf9e, 0xf3cf, 0x3ff4, 0xcb40, 0x8ecb, 0x768e, 0x5277, 0x8652, 0x9186,
+	0x9991, 0x5099, 0xd350, 0x93d3, 0x6d94, 0xe6d,	0x530e, 0x3153, 0xa531,
+	0x64a5, 0x7964, 0x7c79, 0x467c, 0x1746, 0x3017, 0x3730, 0x538,	0x5,
+	0x1e00, 0x5b1e, 0x955a, 0xae95, 0x3eaf, 0xff3e, 0xf8ff, 0xb2f9, 0xa1b3,
+	0xb2a1, 0x5b2,	0xad05, 0x7cac, 0x2d7c, 0xd32c, 0x80d2, 0x7280, 0x8d72,
+	0x1b8e, 0x831b, 0xac82, 0xfdac, 0xa7fd, 0x15a8, 0xd614, 0xe0d5, 0x7be0,
+	0xb37b, 0x61b3, 0x9661, 0x9d95, 0xc79d, 0x83c7, 0xd883, 0xead7, 0xceb,
+	0xf60c, 0xa9f5, 0x19a9, 0xa019, 0x8f9f, 0xd48f, 0x3ad5, 0x853a, 0x985,
+	0x5309, 0x6f52, 0x1370, 0x6e13, 0xa96d, 0x98a9, 0x5198, 0x9f51, 0xb69f,
+	0xa1b6, 0x2ea1, 0x672e, 0x2067, 0x6520, 0xaf65, 0x6eaf, 0x7e6f, 0xee7e,
+	0x17ef, 0xa917, 0xcea8, 0x9ace, 0xff99, 0x5dff, 0xdf5d, 0x38df, 0xa39,
+	0x1c0b, 0xe01b, 0x46e0, 0xcb46, 0x90cb, 0xba90, 0x4bb,	0x9104, 0x9d90,
+	0xc89c, 0xf6c8, 0x6cf6, 0x886c, 0x1789, 0xbd17, 0x70bc, 0x7e71, 0x17e,
+	0x1f01, 0xa01f, 0xbaa0, 0x14bb, 0xfc14, 0x7afb, 0xa07a, 0x3da0, 0xbf3d,
+	0x48bf, 0x8c48, 0x968b, 0x9d96, 0xfd9d, 0x96fd, 0x9796, 0x6b97, 0xd16b,
+	0xf4d1, 0x3bf4, 0x253c, 0x9125, 0x6691, 0xc166, 0x34c1, 0x5735, 0x1a57,
+	0xdc19, 0x77db, 0x8577, 0x4a85, 0x824a, 0x9182, 0x7f91, 0xfd7f, 0xb4c3,
+	0xb5b4, 0xb3b5, 0x7eb3, 0x617e, 0x4e61, 0xa4f,	0x530a, 0x3f52, 0xa33e,
+	0x34a3, 0x9234, 0xf091, 0xf4f0, 0x1bf5, 0x311b, 0x9631, 0x6a96, 0x386b,
+	0x1d39, 0xe91d, 0xe8e9, 0x69e8, 0x426a, 0xee42, 0x89ee, 0x368a, 0x2837,
+	0x7428, 0x5974, 0x6159, 0x1d62, 0x7b1d, 0xf77a, 0x7bf7, 0x6b7c, 0x696c,
+	0xf969, 0x4cf9, 0x714c, 0x4e71, 0x6b4e, 0x256c, 0x6e25, 0xe96d, 0x94e9,
+	0x8f94, 0x3e8f, 0x343e, 0x4634, 0xb646, 0x97b5, 0x8997, 0xe8a,	0x900e,
+	0x8090, 0xfd80, 0xa0fd, 0x16a1, 0xf416, 0xebf4, 0x95ec, 0x1196, 0x8911,
+	0x3d89, 0xda3c, 0x9fd9, 0xd79f, 0x4bd7, 0x214c, 0x3021, 0x4f30, 0x994e,
+	0x5c99, 0x6f5d, 0x326f, 0xab31, 0x6aab, 0xe969, 0x90e9, 0x1190, 0xff10,
+	0xa2fe, 0xe0a2, 0x66e1, 0x4067, 0x9e3f, 0x2d9e, 0x712d, 0x8170, 0xd180,
+	0xffd1, 0x25ff, 0x3826, 0x2538, 0x5f24, 0xc45e, 0x1cc4, 0xdf1c, 0x93df,
+	0xc793, 0x80c7, 0x2380, 0xd223, 0x7ed2, 0xfc7e, 0x22fd, 0x7422, 0x1474,
+	0xb714, 0x7db6, 0x857d, 0xa85,	0xa60a, 0x88a6, 0x4289, 0x7842, 0xc278,
+	0xf7c2, 0xcdf7, 0x84cd, 0xae84, 0x8cae, 0xb98c, 0x1aba, 0x4d1a, 0x884c,
+	0x4688, 0xcc46, 0xd8cb, 0x2bd9, 0xbe2b, 0xa2be, 0x72a2, 0xf772, 0xd2f6,
+	0x75d2, 0xc075, 0xa3c0, 0x63a3, 0xae63, 0x8fae, 0x2a90, 0x5f2a, 0xef5f,
+	0x5cef, 0xa05c, 0x89a0, 0x5e89, 0x6b5e, 0x736b, 0x773,	0x9d07, 0xe99c,
+	0x27ea, 0x2028, 0xc20,	0x980b, 0x4797, 0x2848, 0x9828, 0xc197, 0x48c2,
+	0x2449, 0x7024, 0x570,	0x3e05, 0xd3e,	0xf60c, 0xbbf5, 0x69bb, 0x3f6a,
+	0x740,	0xf006, 0xe0ef, 0xbbe0, 0xadbb, 0x56ad, 0xcf56, 0xbfce, 0xa9bf,
+	0x205b, 0x6920, 0xae69, 0x50ae, 0x2050, 0xf01f, 0x27f0, 0x9427, 0x8993,
+	0x8689, 0x4087, 0x6e40, 0xb16e, 0xa1b1, 0xe8a1, 0x87e8, 0x6f88, 0xfe6f,
+	0x4cfe, 0xe94d, 0xd5e9, 0x47d6, 0x3148, 0x5f31, 0xc35f, 0x13c4, 0xa413,
+	0x5a5,	0x2405, 0xc223, 0x66c2, 0x3667, 0x5e37, 0x5f5e, 0x2f5f, 0x8c2f,
+	0xe48c, 0xd0e4, 0x4d1,	0xd104, 0xe4d0, 0xcee4, 0xfcf,	0x480f, 0xa447,
+	0x5ea4, 0xff5e, 0xbefe, 0x8dbe, 0x1d8e, 0x411d, 0x1841, 0x6918, 0x5469,
+	0x1155, 0xc611, 0xaac6, 0x37ab, 0x2f37, 0xca2e, 0x87ca, 0xbd87, 0xabbd,
+	0xb3ab, 0xcb4,	0xce0c, 0xfccd, 0xa5fd, 0x72a5, 0xf072, 0x83f0, 0xfe83,
+	0x97fd, 0xc997, 0xb0c9, 0xadb0, 0xe6ac, 0x88e6, 0x1088, 0xbe10, 0x16be,
+	0xa916, 0xa3a8, 0x46a3, 0x5447, 0xe953, 0x84e8, 0x2085, 0xa11f, 0xfa1,
+	0xdd0f, 0xbedc, 0x5abe, 0x805a, 0xc97f, 0x6dc9, 0x826d, 0x4a82, 0x934a,
+	0x5293, 0xd852, 0xd3d8, 0xadd3, 0xf4ad, 0xf3f4, 0xfcf3, 0xfefc, 0xcafe,
+	0xb7ca, 0x3cb8, 0xa13c, 0x18a1, 0x1418, 0xea13, 0x91ea, 0xf891, 0x53f8,
+	0xa254, 0xe9a2, 0x87ea, 0x4188, 0x1c41, 0xdc1b, 0xf5db, 0xcaf5, 0x45ca,
+	0x6d45, 0x396d, 0xde39, 0x90dd, 0x1e91, 0x1e,	0x7b00, 0x6a7b, 0xa46a,
+	0xc9a3, 0x9bc9, 0x389b, 0x1139, 0x5211, 0x1f52, 0xeb1f, 0xabeb, 0x48ab,
+	0x9348, 0xb392, 0x17b3, 0x1618, 0x5b16, 0x175b, 0xdc17, 0xdedb, 0x1cdf,
+	0xeb1c, 0xd1ea, 0x4ad2, 0xd4b,	0xc20c, 0x24c2, 0x7b25, 0x137b, 0x8b13,
+	0x618b, 0xa061, 0xff9f, 0xfffe, 0x72ff, 0xf572, 0xe2f5, 0xcfe2, 0xd2cf,
+	0x75d3, 0x6a76, 0xc469, 0x1ec4, 0xfc1d, 0x59fb, 0x455a, 0x7a45, 0xa479,
+	0xb7a4
+};
+
 static u8 tmp_buf[TEST_BUFLEN];
 
 #define full_csum(buff, len, sum) csum_fold(csum_partial(buff, len, sum))
@@ -338,10 +575,57 @@ static void test_csum_no_carry_inputs(struct kunit *test)
 	}
 }
 
+static void test_ip_fast_csum(struct kunit *test)
+{
+	__sum16 csum_result, expected;
+
+	for (int len = IPv4_MIN_WORDS; len < IPv4_MAX_WORDS; len++) {
+		for (int index = 0; index < NUM_IP_FAST_CSUM_TESTS; index++) {
+			csum_result = ip_fast_csum(random_buf + index, len);
+			expected =
+				expected_fast_csum[(len - IPv4_MIN_WORDS) *
+						   NUM_IP_FAST_CSUM_TESTS +
+						   index];
+			CHECK_EQ(expected, csum_result);
+		}
+	}
+}
+
+static void test_csum_ipv6_magic(struct kunit *test)
+{
+#if defined(CONFIG_NET)
+	const struct in6_addr *saddr;
+	const struct in6_addr *daddr;
+	unsigned int len;
+	unsigned char proto;
+	unsigned int csum;
+
+	const int daddr_offset = sizeof(struct in6_addr);
+	const int len_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr);
+	const int proto_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr) +
+			     sizeof(int);
+	const int csum_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr) +
+			    sizeof(int) + sizeof(char);
+
+	for (int i = 0; i < NUM_IPv6_TESTS; i++) {
+		saddr = (const struct in6_addr *)(random_buf + i);
+		daddr = (const struct in6_addr *)(random_buf + i +
+						  daddr_offset);
+		len = *(unsigned int *)(random_buf + i + len_offset);
+		proto = *(random_buf + i + proto_offset);
+		csum = *(unsigned int *)(random_buf + i + csum_offset);
+		CHECK_EQ(expected_csum_ipv6_magic[i],
+			 csum_ipv6_magic(saddr, daddr, len, proto, csum));
+	}
+#endif /* !CONFIG_NET */
+}
+
 static struct kunit_case __refdata checksum_test_cases[] = {
 	KUNIT_CASE(test_csum_fixed_random_inputs),
 	KUNIT_CASE(test_csum_all_carry_inputs),
 	KUNIT_CASE(test_csum_no_carry_inputs),
+	KUNIT_CASE(test_ip_fast_csum),
+	KUNIT_CASE(test_csum_ipv6_magic),
 	{}
 };
 
diff --git a/lib/nlattr.c b/lib/nlattr.c
index dc15e7888fc1..ed2ab43e1b22 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -758,7 +758,7 @@ EXPORT_SYMBOL(nla_find);
  * @dstsize: Size of destination buffer.
  *
  * Copies at most dstsize - 1 bytes into the destination buffer.
- * Unlike strlcpy the destination buffer is always padded out.
+ * Unlike strscpy() the destination buffer is always padded out.
  *
  * Return:
  * * srclen - Returns @nla length (not including the trailing %NUL).
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index d0a5081dfd12..92c6b1fd8989 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -388,11 +388,6 @@ static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq,
 	unsigned int shallow_depth;
 
 	/*
-	 * For each batch, we wake up one queue. We need to make sure that our
-	 * batch size is small enough that the full depth of the bitmap,
-	 * potentially limited by a shallow depth, is enough to wake up all of
-	 * the queues.
-	 *
 	 * Each full word of the bitmap has bits_per_word bits, and there might
 	 * be a partial word. There are depth / bits_per_word full words and
 	 * depth % bits_per_word bits left over. In bitwise arithmetic:
diff --git a/lib/string.c b/lib/string.c
index be26623953d2..6891d15ce991 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -103,21 +103,6 @@ char *strncpy(char *dest, const char *src, size_t count)
 EXPORT_SYMBOL(strncpy);
 #endif
 
-#ifndef __HAVE_ARCH_STRLCPY
-size_t strlcpy(char *dest, const char *src, size_t size)
-{
-	size_t ret = strlen(src);
-
-	if (size) {
-		size_t len = (ret >= size) ? size - 1 : ret;
-		__builtin_memcpy(dest, src, len);
-		dest[len] = '\0';
-	}
-	return ret;
-}
-EXPORT_SYMBOL(strlcpy);
-#endif
-
 #ifndef __HAVE_ARCH_STRSCPY
 ssize_t strscpy(char *dest, const char *src, size_t count)
 {
diff --git a/lib/test_fortify/write_overflow-strlcpy-src.c b/lib/test_fortify/write_overflow-strlcpy-src.c
deleted file mode 100644
index 91bf83ebd34a..000000000000
--- a/lib/test_fortify/write_overflow-strlcpy-src.c
+++ /dev/null
@@ -1,5 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#define TEST	\
-	strlcpy(small, large_src, sizeof(small) + 1)
-
-#include "test_fortify.h"
diff --git a/lib/test_fortify/write_overflow-strlcpy.c b/lib/test_fortify/write_overflow-strlcpy.c
deleted file mode 100644
index 1883db7c0cd6..000000000000
--- a/lib/test_fortify/write_overflow-strlcpy.c
+++ /dev/null
@@ -1,5 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#define TEST	\
-	strlcpy(instance.buf, large_src, sizeof(instance.buf) + 1)
-
-#include "test_fortify.h"
diff --git a/mm/filemap.c b/mm/filemap.c
index ea49677c6338..750e779c23db 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2688,6 +2688,7 @@ int kiocb_write_and_wait(struct kiocb *iocb, size_t count)
 
 	return filemap_write_and_wait_range(mapping, pos, end);
 }
+EXPORT_SYMBOL_GPL(kiocb_write_and_wait);
 
 int kiocb_invalidate_pages(struct kiocb *iocb, size_t count)
 {
@@ -2715,6 +2716,7 @@ int kiocb_invalidate_pages(struct kiocb *iocb, size_t count)
 	return invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT,
 					     end >> PAGE_SHIFT);
 }
+EXPORT_SYMBOL_GPL(kiocb_invalidate_pages);
 
 /**
  * generic_file_read_iter - generic filesystem read routine
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 214532173536..a3b68243fd4b 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -118,12 +118,16 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[],
 	}
 	if (data[IFLA_VLAN_INGRESS_QOS]) {
 		nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) {
+			if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING)
+				continue;
 			m = nla_data(attr);
 			vlan_dev_set_ingress_priority(dev, m->to, m->from);
 		}
 	}
 	if (data[IFLA_VLAN_EGRESS_QOS]) {
 		nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) {
+			if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING)
+				continue;
 			m = nla_data(attr);
 			err = vlan_dev_set_egress_priority(dev, m->from, m->to);
 			if (err)
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index d3a759e052c8..625622016f57 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -5850,8 +5850,6 @@ static inline void convert_extent_map(struct ceph_sparse_read *sr)
 }
 #endif
 
-#define MAX_EXTENTS 4096
-
 static int osd_sparse_read(struct ceph_connection *con,
 			   struct ceph_msg_data_cursor *cursor,
 			   char **pbuf)
@@ -5882,23 +5880,16 @@ next_op:
 
 		if (count > 0) {
 			if (!sr->sr_extent || count > sr->sr_ext_len) {
-				/*
-				 * Apply a hard cap to the number of extents.
-				 * If we have more, assume something is wrong.
-				 */
-				if (count > MAX_EXTENTS) {
-					dout("%s: OSD returned 0x%x extents in a single reply!\n",
-					     __func__, count);
-					return -EREMOTEIO;
-				}
-
 				/* no extent array provided, or too short */
 				kfree(sr->sr_extent);
 				sr->sr_extent = kmalloc_array(count,
 							      sizeof(*sr->sr_extent),
 							      GFP_NOIO);
-				if (!sr->sr_extent)
+				if (!sr->sr_extent) {
+					pr_err("%s: failed to allocate %u extents\n",
+					       __func__, count);
 					return -ENOMEM;
+				}
 				sr->sr_ext_len = count;
 			}
 			ret = count * sizeof(*sr->sr_extent);
diff --git a/net/core/dev.c b/net/core/dev.c
index f01a9b858347..cb2dab0feee0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -11551,6 +11551,7 @@ static struct pernet_operations __net_initdata netdev_net_ops = {
 
 static void __net_exit default_device_exit_net(struct net *net)
 {
+	struct netdev_name_node *name_node, *tmp;
 	struct net_device *dev, *aux;
 	/*
 	 * Push all migratable network devices back to the
@@ -11573,6 +11574,14 @@ static void __net_exit default_device_exit_net(struct net *net)
 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
 		if (netdev_name_in_use(&init_net, fb_name))
 			snprintf(fb_name, IFNAMSIZ, "dev%%d");
+
+		netdev_for_each_altname_safe(dev, name_node, tmp)
+			if (netdev_name_in_use(&init_net, name_node->name)) {
+				netdev_name_node_del(name_node);
+				synchronize_rcu();
+				__netdev_name_node_alt_destroy(name_node);
+			}
+
 		err = dev_change_net_namespace(dev, &init_net, fb_name);
 		if (err) {
 			pr_emerg("%s: failed to move %s to init_net: %d\n",
diff --git a/net/core/dev.h b/net/core/dev.h
index cf93e188785b..7480b4c84298 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -63,6 +63,9 @@ int dev_change_name(struct net_device *dev, const char *newname);
 
 #define netdev_for_each_altname(dev, namenode)				\
 	list_for_each_entry((namenode), &(dev)->name_node->list, list)
+#define netdev_for_each_altname_safe(dev, namenode, next)		\
+	list_for_each_entry_safe((namenode), (next), &(dev)->name_node->list, \
+				 list)
 
 int netdev_name_node_alt_create(struct net_device *dev, const char *name);
 int netdev_name_node_alt_destroy(struct net_device *dev, const char *name);
diff --git a/net/core/filter.c b/net/core/filter.c
index 24061f29c9dd..ef3e78b6a39c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -83,6 +83,7 @@
 #include <net/netfilter/nf_conntrack_bpf.h>
 #include <net/netkit.h>
 #include <linux/un.h>
+#include <net/xdp_sock_drv.h>
 
 #include "dev.h"
 
@@ -4092,10 +4093,46 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
 	memset(skb_frag_address(frag) + skb_frag_size(frag), 0, offset);
 	skb_frag_size_add(frag, offset);
 	sinfo->xdp_frags_size += offset;
+	if (rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL)
+		xsk_buff_get_tail(xdp)->data_end += offset;
 
 	return 0;
 }
 
+static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
+				   struct xdp_mem_info *mem_info, bool release)
+{
+	struct xdp_buff *zc_frag = xsk_buff_get_tail(xdp);
+
+	if (release) {
+		xsk_buff_del_tail(zc_frag);
+		__xdp_return(NULL, mem_info, false, zc_frag);
+	} else {
+		zc_frag->data_end -= shrink;
+	}
+}
+
+static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
+				int shrink)
+{
+	struct xdp_mem_info *mem_info = &xdp->rxq->mem;
+	bool release = skb_frag_size(frag) == shrink;
+
+	if (mem_info->type == MEM_TYPE_XSK_BUFF_POOL) {
+		bpf_xdp_shrink_data_zc(xdp, shrink, mem_info, release);
+		goto out;
+	}
+
+	if (release) {
+		struct page *page = skb_frag_page(frag);
+
+		__xdp_return(page_address(page), mem_info, false, NULL);
+	}
+
+out:
+	return release;
+}
+
 static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
 {
 	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
@@ -4110,12 +4147,7 @@ static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
 
 		len_free += shrink;
 		offset -= shrink;
-
-		if (skb_frag_size(frag) == shrink) {
-			struct page *page = skb_frag_page(frag);
-
-			__xdp_return(page_address(page), &xdp->rxq->mem,
-				     false, NULL);
+		if (bpf_xdp_shrink_data(xdp, frag, shrink)) {
 			n_frags_free++;
 		} else {
 			skb_frag_size_sub(frag, shrink);
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index f35c2e998406..63de5c635842 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -33,9 +33,6 @@
 
 void reqsk_queue_alloc(struct request_sock_queue *queue)
 {
-	spin_lock_init(&queue->rskq_lock);
-
-	spin_lock_init(&queue->fastopenq.lock);
 	queue->fastopenq.rskq_rst_head = NULL;
 	queue->fastopenq.rskq_rst_tail = NULL;
 	queue->fastopenq.qlen = 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index 158dbdebce6a..0a7f46c37f0c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -107,6 +107,7 @@
 #include <linux/interrupt.h>
 #include <linux/poll.h>
 #include <linux/tcp.h>
+#include <linux/udp.h>
 #include <linux/init.h>
 #include <linux/highmem.h>
 #include <linux/user_namespace.h>
@@ -4144,8 +4145,14 @@ bool sk_busy_loop_end(void *p, unsigned long start_time)
 {
 	struct sock *sk = p;
 
-	return !skb_queue_empty_lockless(&sk->sk_receive_queue) ||
-	       sk_busy_loop_timeout(sk, start_time);
+	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+		return true;
+
+	if (sk_is_udp(sk) &&
+	    !skb_queue_empty_lockless(&udp_sk(sk)->reader_queue))
+		return true;
+
+	return sk_busy_loop_timeout(sk, start_time);
 }
 EXPORT_SYMBOL(sk_busy_loop_end);
 #endif /* CONFIG_NET_RX_BUSY_POLL */
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 835f4f9d98d2..4e635dd3d3c8 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -330,6 +330,9 @@ lookup_protocol:
 	if (INET_PROTOSW_REUSE & answer_flags)
 		sk->sk_reuse = SK_CAN_REUSE;
 
+	if (INET_PROTOSW_ICSK & answer_flags)
+		inet_init_csk_locks(sk);
+
 	inet = inet_sk(sk);
 	inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags);
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 8e2eb1793685..459af1f89739 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -727,6 +727,10 @@ out:
 	}
 	if (req)
 		reqsk_put(req);
+
+	if (newsk)
+		inet_init_csk_locks(newsk);
+
 	return newsk;
 out_err:
 	newsk = NULL;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1baa484d2190..a1c6de385cce 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -722,6 +722,7 @@ void tcp_push(struct sock *sk, int flags, int mss_now,
 		if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) {
 			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING);
 			set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
+			smp_mb__after_atomic();
 		}
 		/* It is possible TX completion already happened
 		 * before we set TSQ_THROTTLED.
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 13a1833a4df5..959bfd9f6344 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -199,6 +199,9 @@ lookup_protocol:
 	if (INET_PROTOSW_REUSE & answer_flags)
 		sk->sk_reuse = SK_CAN_REUSE;
 
+	if (INET_PROTOSW_ICSK & answer_flags)
+		inet_init_csk_locks(sk);
+
 	inet = inet_sk(sk);
 	inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags);
 
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 9b06c380866b..20551cfb7da6 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -928,14 +928,15 @@ copy_uaddr:
  */
 static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 {
+	DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name);
 	struct sock *sk = sock->sk;
 	struct llc_sock *llc = llc_sk(sk);
-	DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name);
 	int flags = msg->msg_flags;
 	int noblock = flags & MSG_DONTWAIT;
+	int rc = -EINVAL, copied = 0, hdrlen, hh_len;
 	struct sk_buff *skb = NULL;
+	struct net_device *dev;
 	size_t size = 0;
-	int rc = -EINVAL, copied = 0, hdrlen;
 
 	dprintk("%s: sending from %02X to %02X\n", __func__,
 		llc->laddr.lsap, llc->daddr.lsap);
@@ -955,22 +956,29 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 		if (rc)
 			goto out;
 	}
-	hdrlen = llc->dev->hard_header_len + llc_ui_header_len(sk, addr);
+	dev = llc->dev;
+	hh_len = LL_RESERVED_SPACE(dev);
+	hdrlen = llc_ui_header_len(sk, addr);
 	size = hdrlen + len;
-	if (size > llc->dev->mtu)
-		size = llc->dev->mtu;
+	size = min_t(size_t, size, READ_ONCE(dev->mtu));
 	copied = size - hdrlen;
 	rc = -EINVAL;
 	if (copied < 0)
 		goto out;
 	release_sock(sk);
-	skb = sock_alloc_send_skb(sk, size, noblock, &rc);
+	skb = sock_alloc_send_skb(sk, hh_len + size, noblock, &rc);
 	lock_sock(sk);
 	if (!skb)
 		goto out;
-	skb->dev      = llc->dev;
+	if (sock_flag(sk, SOCK_ZAPPED) ||
+	    llc->dev != dev ||
+	    hdrlen != llc_ui_header_len(sk, addr) ||
+	    hh_len != LL_RESERVED_SPACE(dev) ||
+	    size > READ_ONCE(dev->mtu))
+		goto out;
+	skb->dev      = dev;
 	skb->protocol = llc_proto_type(addr->sllc_arphrd);
-	skb_reserve(skb, hdrlen);
+	skb_reserve(skb, hh_len + hdrlen);
 	rc = memcpy_from_msg(skb_put(skb, copied), msg, copied);
 	if (rc)
 		goto out;
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 6e387aadffce..4f16d9c88350 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -135,22 +135,15 @@ static struct packet_type llc_packet_type __read_mostly = {
 	.func = llc_rcv,
 };
 
-static struct packet_type llc_tr_packet_type __read_mostly = {
-	.type = cpu_to_be16(ETH_P_TR_802_2),
-	.func = llc_rcv,
-};
-
 static int __init llc_init(void)
 {
 	dev_add_pack(&llc_packet_type);
-	dev_add_pack(&llc_tr_packet_type);
 	return 0;
 }
 
 static void __exit llc_exit(void)
 {
 	dev_remove_pack(&llc_packet_type);
-	dev_remove_pack(&llc_tr_packet_type);
 }
 
 module_init(llc_init);
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index cb0291decf2e..13438cc0a6b1 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -62,7 +62,6 @@ config MAC80211_KUNIT_TEST
 	depends on KUNIT
 	depends on MAC80211
 	default KUNIT_ALL_TESTS
-	depends on !KERNEL_6_2
 	help
 	  Enable this option to test mac80211 internals with kunit.
 
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index bf1adcd96b41..4391d8dd634b 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -404,7 +404,10 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(sta->link); i++) {
-		if (!(sta->sta.valid_links & BIT(i)))
+		struct link_sta_info *link_sta;
+
+		link_sta = rcu_access_pointer(sta->link[i]);
+		if (!link_sta)
 			continue;
 
 		sta_remove_link(sta, i, false);
@@ -910,6 +913,8 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
 	if (ieee80211_vif_is_mesh(&sdata->vif))
 		mesh_accept_plinks_update(sdata);
 
+	ieee80211_check_fast_xmit(sta);
+
 	return 0;
  out_remove:
 	if (sta->sta.valid_links)
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 314998fdb1a5..68a48abc7287 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3048,7 +3048,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
 	    sdata->vif.type == NL80211_IFTYPE_STATION)
 		goto out;
 
-	if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED))
+	if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED) || !sta->uploaded)
 		goto out;
 
 	if (test_sta_flag(sta, WLAN_STA_PS_STA) ||
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4b55533ce5ca..c537104411e7 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -24,6 +24,7 @@
 #include <net/sock.h>
 
 #define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-"))
+#define NFT_SET_MAX_ANONLEN 16
 
 unsigned int nf_tables_net_id __read_mostly;
 
@@ -4413,6 +4414,9 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
 		if (p[1] != 'd' || strchr(p + 2, '%'))
 			return -EINVAL;
 
+		if (strnlen(name, NFT_SET_MAX_ANONLEN) >= NFT_SET_MAX_ANONLEN)
+			return -EINVAL;
+
 		inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL);
 		if (inuse == NULL)
 			return -ENOMEM;
@@ -10988,16 +10992,10 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
 	data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
 
 	switch (data->verdict.code) {
-	default:
-		switch (data->verdict.code & NF_VERDICT_MASK) {
-		case NF_ACCEPT:
-		case NF_DROP:
-		case NF_QUEUE:
-			break;
-		default:
-			return -EINVAL;
-		}
-		fallthrough;
+	case NF_ACCEPT:
+	case NF_DROP:
+	case NF_QUEUE:
+		break;
 	case NFT_CONTINUE:
 	case NFT_BREAK:
 	case NFT_RETURN:
@@ -11032,6 +11030,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
 
 		data->verdict.chain = chain;
 		break;
+	default:
+		return -EINVAL;
 	}
 
 	desc->len = sizeof(data->verdict);
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 680fe557686e..274b6f7e6bb5 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -357,9 +357,10 @@ static int nf_tables_netdev_event(struct notifier_block *this,
 				  unsigned long event, void *ptr)
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct nft_base_chain *basechain;
 	struct nftables_pernet *nft_net;
-	struct nft_table *table;
 	struct nft_chain *chain, *nr;
+	struct nft_table *table;
 	struct nft_ctx ctx = {
 		.net	= dev_net(dev),
 	};
@@ -371,7 +372,8 @@ static int nf_tables_netdev_event(struct notifier_block *this,
 	nft_net = nft_pernet(ctx.net);
 	mutex_lock(&nft_net->commit_mutex);
 	list_for_each_entry(table, &nft_net->tables, list) {
-		if (table->family != NFPROTO_NETDEV)
+		if (table->family != NFPROTO_NETDEV &&
+		    table->family != NFPROTO_INET)
 			continue;
 
 		ctx.family = table->family;
@@ -380,6 +382,11 @@ static int nf_tables_netdev_event(struct notifier_block *this,
 			if (!nft_is_base_chain(chain))
 				continue;
 
+			basechain = nft_base_chain(chain);
+			if (table->family == NFPROTO_INET &&
+			    basechain->ops.hooknum != NF_INET_INGRESS)
+				continue;
+
 			ctx.chain = chain;
 			nft_netdev_event(event, dev, &ctx);
 		}
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 5284cd2ad532..f0eeda97bfcd 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -350,6 +350,12 @@ static int nft_target_validate(const struct nft_ctx *ctx,
 	unsigned int hook_mask = 0;
 	int ret;
 
+	if (ctx->family != NFPROTO_IPV4 &&
+	    ctx->family != NFPROTO_IPV6 &&
+	    ctx->family != NFPROTO_BRIDGE &&
+	    ctx->family != NFPROTO_ARP)
+		return -EOPNOTSUPP;
+
 	if (nft_is_base_chain(ctx->chain)) {
 		const struct nft_base_chain *basechain =
 						nft_base_chain(ctx->chain);
@@ -595,6 +601,12 @@ static int nft_match_validate(const struct nft_ctx *ctx,
 	unsigned int hook_mask = 0;
 	int ret;
 
+	if (ctx->family != NFPROTO_IPV4 &&
+	    ctx->family != NFPROTO_IPV6 &&
+	    ctx->family != NFPROTO_BRIDGE &&
+	    ctx->family != NFPROTO_ARP)
+		return -EOPNOTSUPP;
+
 	if (nft_is_base_chain(ctx->chain)) {
 		const struct nft_base_chain *basechain =
 						nft_base_chain(ctx->chain);
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index ab3362c483b4..397351fa4d5f 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -384,6 +384,11 @@ static int nft_flow_offload_validate(const struct nft_ctx *ctx,
 {
 	unsigned int hook_mask = (1 << NF_INET_FORWARD);
 
+	if (ctx->family != NFPROTO_IPV4 &&
+	    ctx->family != NFPROTO_IPV6 &&
+	    ctx->family != NFPROTO_INET)
+		return -EOPNOTSUPP;
+
 	return nft_chain_validate_hooks(ctx->chain, hook_mask);
 }
 
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 79039afde34e..cefa25e0dbb0 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -58,17 +58,19 @@ static inline bool nft_limit_eval(struct nft_limit_priv *priv, u64 cost)
 static int nft_limit_init(struct nft_limit_priv *priv,
 			  const struct nlattr * const tb[], bool pkts)
 {
+	u64 unit, tokens, rate_with_burst;
 	bool invert = false;
-	u64 unit, tokens;
 
 	if (tb[NFTA_LIMIT_RATE] == NULL ||
 	    tb[NFTA_LIMIT_UNIT] == NULL)
 		return -EINVAL;
 
 	priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
+	if (priv->rate == 0)
+		return -EINVAL;
+
 	unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT]));
-	priv->nsecs = unit * NSEC_PER_SEC;
-	if (priv->rate == 0 || priv->nsecs < unit)
+	if (check_mul_overflow(unit, NSEC_PER_SEC, &priv->nsecs))
 		return -EOVERFLOW;
 
 	if (tb[NFTA_LIMIT_BURST])
@@ -77,18 +79,25 @@ static int nft_limit_init(struct nft_limit_priv *priv,
 	if (pkts && priv->burst == 0)
 		priv->burst = NFT_LIMIT_PKT_BURST_DEFAULT;
 
-	if (priv->rate + priv->burst < priv->rate)
+	if (check_add_overflow(priv->rate, priv->burst, &rate_with_burst))
 		return -EOVERFLOW;
 
 	if (pkts) {
-		tokens = div64_u64(priv->nsecs, priv->rate) * priv->burst;
+		u64 tmp = div64_u64(priv->nsecs, priv->rate);
+
+		if (check_mul_overflow(tmp, priv->burst, &tokens))
+			return -EOVERFLOW;
 	} else {
+		u64 tmp;
+
 		/* The token bucket size limits the number of tokens can be
 		 * accumulated. tokens_max specifies the bucket size.
 		 * tokens_max = unit * (rate + burst) / rate.
 		 */
-		tokens = div64_u64(priv->nsecs * (priv->rate + priv->burst),
-				 priv->rate);
+		if (check_mul_overflow(priv->nsecs, rate_with_burst, &tmp))
+			return -EOVERFLOW;
+
+		tokens = div64_u64(tmp, priv->rate);
 	}
 
 	if (tb[NFTA_LIMIT_FLAGS]) {
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 583885ce7232..808f5802c270 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -143,6 +143,11 @@ static int nft_nat_validate(const struct nft_ctx *ctx,
 	struct nft_nat *priv = nft_expr_priv(expr);
 	int err;
 
+	if (ctx->family != NFPROTO_IPV4 &&
+	    ctx->family != NFPROTO_IPV6 &&
+	    ctx->family != NFPROTO_INET)
+		return -EOPNOTSUPP;
+
 	err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
 	if (err < 0)
 		return err;
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index 35a2c28caa60..24d977138572 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -166,6 +166,11 @@ static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *exp
 	const struct nft_rt *priv = nft_expr_priv(expr);
 	unsigned int hooks;
 
+	if (ctx->family != NFPROTO_IPV4 &&
+	    ctx->family != NFPROTO_IPV6 &&
+	    ctx->family != NFPROTO_INET)
+		return -EOPNOTSUPP;
+
 	switch (priv->key) {
 	case NFT_RT_NEXTHOP4:
 	case NFT_RT_NEXTHOP6:
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 9ed85be79452..f30163e2ca62 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -242,6 +242,11 @@ static int nft_socket_validate(const struct nft_ctx *ctx,
 			       const struct nft_expr *expr,
 			       const struct nft_data **data)
 {
+	if (ctx->family != NFPROTO_IPV4 &&
+	    ctx->family != NFPROTO_IPV6 &&
+	    ctx->family != NFPROTO_INET)
+		return -EOPNOTSUPP;
+
 	return nft_chain_validate_hooks(ctx->chain,
 					(1 << NF_INET_PRE_ROUTING) |
 					(1 << NF_INET_LOCAL_IN) |
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index 13da882669a4..1d737f89dfc1 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -186,7 +186,6 @@ static int nft_synproxy_do_init(const struct nft_ctx *ctx,
 		break;
 #endif
 	case NFPROTO_INET:
-	case NFPROTO_BRIDGE:
 		err = nf_synproxy_ipv4_init(snet, ctx->net);
 		if (err)
 			goto nf_ct_failure;
@@ -219,7 +218,6 @@ static void nft_synproxy_do_destroy(const struct nft_ctx *ctx)
 		break;
 #endif
 	case NFPROTO_INET:
-	case NFPROTO_BRIDGE:
 		nf_synproxy_ipv4_fini(snet, ctx->net);
 		nf_synproxy_ipv6_fini(snet, ctx->net);
 		break;
@@ -253,6 +251,11 @@ static int nft_synproxy_validate(const struct nft_ctx *ctx,
 				 const struct nft_expr *expr,
 				 const struct nft_data **data)
 {
+	if (ctx->family != NFPROTO_IPV4 &&
+	    ctx->family != NFPROTO_IPV6 &&
+	    ctx->family != NFPROTO_INET)
+		return -EOPNOTSUPP;
+
 	return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) |
 						    (1 << NF_INET_FORWARD));
 }
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index ae15cd693f0e..71412adb73d4 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -316,6 +316,11 @@ static int nft_tproxy_validate(const struct nft_ctx *ctx,
 			       const struct nft_expr *expr,
 			       const struct nft_data **data)
 {
+	if (ctx->family != NFPROTO_IPV4 &&
+	    ctx->family != NFPROTO_IPV6 &&
+	    ctx->family != NFPROTO_INET)
+		return -EOPNOTSUPP;
+
 	return nft_chain_validate_hooks(ctx->chain, 1 << NF_INET_PRE_ROUTING);
 }
 
diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c
index 452f8587adda..1c866757db55 100644
--- a/net/netfilter/nft_xfrm.c
+++ b/net/netfilter/nft_xfrm.c
@@ -235,6 +235,11 @@ static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *e
 	const struct nft_xfrm *priv = nft_expr_priv(expr);
 	unsigned int hooks;
 
+	if (ctx->family != NFPROTO_IPV4 &&
+	    ctx->family != NFPROTO_IPV6 &&
+	    ctx->family != NFPROTO_INET)
+		return -EOPNOTSUPP;
+
 	switch (priv->dir) {
 	case XFRM_POLICY_IN:
 		hooks = (1 << NF_INET_FORWARD) |
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 4ed8ffd58ff3..9c962347cf85 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -374,7 +374,7 @@ static void netlink_skb_destructor(struct sk_buff *skb)
 	if (is_vmalloc_addr(skb->head)) {
 		if (!skb->cloned ||
 		    !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
-			vfree(skb->head);
+			vfree_atomic(skb->head);
 
 		skb->head = NULL;
 	}
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 01c4cdfef45d..8435a20968ef 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -419,7 +419,7 @@ static int rds_recv_track_latency(struct rds_sock *rs, sockptr_t optval,
 
 	rs->rs_rx_traces = trace.rx_traces;
 	for (i = 0; i < rs->rs_rx_traces; i++) {
-		if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) {
+		if (trace.rx_trace_pos[i] >= RDS_MSG_RX_DGRAM_TRACE_MAX) {
 			rs->rs_rx_traces = 0;
 			return -EFAULT;
 		}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 92a12e3d0fe6..ff3d396a65aa 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1560,6 +1560,9 @@ tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
 	     chain_prev = chain,
 		     chain = __tcf_get_next_chain(block, chain),
 		     tcf_chain_put(chain_prev)) {
+		if (chain->tmplt_ops && add)
+			chain->tmplt_ops->tmplt_reoffload(chain, true, cb,
+							  cb_priv);
 		for (tp = __tcf_get_next_proto(chain, NULL); tp;
 		     tp_prev = tp,
 			     tp = __tcf_get_next_proto(chain, tp),
@@ -1575,6 +1578,9 @@ tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
 				goto err_playback_remove;
 			}
 		}
+		if (chain->tmplt_ops && !add)
+			chain->tmplt_ops->tmplt_reoffload(chain, false, cb,
+							  cb_priv);
 	}
 
 	return 0;
@@ -3000,7 +3006,8 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
 	ops = tcf_proto_lookup_ops(name, true, extack);
 	if (IS_ERR(ops))
 		return PTR_ERR(ops);
-	if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
+	if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump ||
+	    !ops->tmplt_reoffload) {
 		NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
 		module_put(ops->owner);
 		return -EOPNOTSUPP;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index e5314a31f75a..efb9d2811b73 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -2721,6 +2721,28 @@ static void fl_tmplt_destroy(void *tmplt_priv)
 	kfree(tmplt);
 }
 
+static void fl_tmplt_reoffload(struct tcf_chain *chain, bool add,
+			       flow_setup_cb_t *cb, void *cb_priv)
+{
+	struct fl_flow_tmplt *tmplt = chain->tmplt_priv;
+	struct flow_cls_offload cls_flower = {};
+
+	cls_flower.rule = flow_rule_alloc(0);
+	if (!cls_flower.rule)
+		return;
+
+	cls_flower.common.chain_index = chain->index;
+	cls_flower.command = add ? FLOW_CLS_TMPLT_CREATE :
+				   FLOW_CLS_TMPLT_DESTROY;
+	cls_flower.cookie = (unsigned long) tmplt;
+	cls_flower.rule->match.dissector = &tmplt->dissector;
+	cls_flower.rule->match.mask = &tmplt->mask;
+	cls_flower.rule->match.key = &tmplt->dummy_key;
+
+	cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv);
+	kfree(cls_flower.rule);
+}
+
 static int fl_dump_key_val(struct sk_buff *skb,
 			   void *val, int val_type,
 			   void *mask, int mask_type, int len)
@@ -3628,6 +3650,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
 	.bind_class	= fl_bind_class,
 	.tmplt_create	= fl_tmplt_create,
 	.tmplt_destroy	= fl_tmplt_destroy,
+	.tmplt_reoffload = fl_tmplt_reoffload,
 	.tmplt_dump	= fl_tmplt_dump,
 	.get_exts	= fl_get_exts,
 	.owner		= THIS_MODULE,
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index 32bad267fa3e..6fdb2d96777a 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -164,7 +164,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
 	}
 	if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd &&
 	    (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) &&
-	    !list_empty(&smc->conn.lgr->list)) {
+	    !list_empty(&smc->conn.lgr->list) && smc->conn.rmb_desc) {
 		struct smc_connection *conn = &smc->conn;
 		struct smcd_diag_dmbinfo dinfo;
 		struct smcd_dev *smcd = conn->lgr->smcd;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index bfb2f78523a8..545017a3daa4 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -717,12 +717,12 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
 				ARRAY_SIZE(rqstp->rq_bvec), xdr);
 
 	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
-		      count, 0);
+		      count, rqstp->rq_res.len);
 	err = sock_sendmsg(svsk->sk_sock, &msg);
 	if (err == -ECONNREFUSED) {
 		/* ICMP error on earlier request. */
 		iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
-			      count, 0);
+			      count, rqstp->rq_res.len);
 		err = sock_sendmsg(svsk->sk_sock, &msg);
 	}
 
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index a9ac85e09af3..10345388ad13 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -206,7 +206,6 @@ config CFG80211_KUNIT_TEST
 	depends on KUNIT
 	depends on CFG80211
 	default KUNIT_ALL_TESTS
-	depends on !KERNEL_6_2
 	help
 	  Enable this option to test cfg80211 functions with kunit.
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 60877b532993..b09700400d09 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4020,6 +4020,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
 		}
 		wiphy_unlock(&rdev->wiphy);
 
+		if_start = 0;
 		wp_idx++;
 	}
  out:
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 9f13aa3353e3..1eadfac03cc4 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -167,8 +167,10 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
 		contd = XDP_PKT_CONTD;
 
 	err = __xsk_rcv_zc(xs, xskb, len, contd);
-	if (err || likely(!frags))
-		goto out;
+	if (err)
+		goto err;
+	if (likely(!frags))
+		return 0;
 
 	xskb_list = &xskb->pool->xskb_list;
 	list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
@@ -177,11 +179,13 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
 		len = pos->xdp.data_end - pos->xdp.data;
 		err = __xsk_rcv_zc(xs, pos, len, contd);
 		if (err)
-			return err;
+			goto err;
 		list_del(&pos->xskb_list_node);
 	}
 
-out:
+	return 0;
+err:
+	xsk_buff_free(xdp);
 	return err;
 }
 
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 28711cc44ced..ce60ecd48a4d 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -555,6 +555,7 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
 
 	xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM;
 	xskb->xdp.data_meta = xskb->xdp.data;
+	xskb->xdp.flags = 0;
 
 	if (pool->dma_need_sync) {
 		dma_sync_single_range_for_device(pool->dev, xskb->dma, 0,
diff --git a/samples/cgroup/.gitignore b/samples/cgroup/.gitignore
new file mode 100644
index 000000000000..3a0161194cce
--- /dev/null
+++ b/samples/cgroup/.gitignore
@@ -0,0 +1,3 @@
+/cgroup_event_listener
+/memcg_event_listener
+
diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c
index e2a6a69352df..81220390851a 100644
--- a/samples/ftrace/ftrace-direct-modify.c
+++ b/samples/ftrace/ftrace-direct-modify.c
@@ -24,6 +24,41 @@ extern void my_tramp2(void *);
 
 static unsigned long my_ip = (unsigned long)schedule;
 
+#ifdef CONFIG_RISCV
+#include <asm/asm.h>
+
+asm (
+"	.pushsection    .text, \"ax\", @progbits\n"
+"	.type		my_tramp1, @function\n"
+"	.globl		my_tramp1\n"
+"   my_tramp1:\n"
+"	addi	sp,sp,-2*"SZREG"\n"
+"	"REG_S"	t0,0*"SZREG"(sp)\n"
+"	"REG_S"	ra,1*"SZREG"(sp)\n"
+"	call	my_direct_func1\n"
+"	"REG_L"	t0,0*"SZREG"(sp)\n"
+"	"REG_L"	ra,1*"SZREG"(sp)\n"
+"	addi	sp,sp,2*"SZREG"\n"
+"	jr	t0\n"
+"	.size		my_tramp1, .-my_tramp1\n"
+"	.type		my_tramp2, @function\n"
+"	.globl		my_tramp2\n"
+
+"   my_tramp2:\n"
+"	addi	sp,sp,-2*"SZREG"\n"
+"	"REG_S"	t0,0*"SZREG"(sp)\n"
+"	"REG_S"	ra,1*"SZREG"(sp)\n"
+"	call	my_direct_func2\n"
+"	"REG_L"	t0,0*"SZREG"(sp)\n"
+"	"REG_L"	ra,1*"SZREG"(sp)\n"
+"	addi	sp,sp,2*"SZREG"\n"
+"	jr	t0\n"
+"	.size		my_tramp2, .-my_tramp2\n"
+"	.popsection\n"
+);
+
+#endif /* CONFIG_RISCV */
+
 #ifdef CONFIG_X86_64
 
 #include <asm/ibt.h>
diff --git a/samples/ftrace/ftrace-direct-multi-modify.c b/samples/ftrace/ftrace-direct-multi-modify.c
index 2e349834d63c..f943e40d57fd 100644
--- a/samples/ftrace/ftrace-direct-multi-modify.c
+++ b/samples/ftrace/ftrace-direct-multi-modify.c
@@ -22,6 +22,47 @@ void my_direct_func2(unsigned long ip)
 extern void my_tramp1(void *);
 extern void my_tramp2(void *);
 
+#ifdef CONFIG_RISCV
+#include <asm/asm.h>
+
+asm (
+"	.pushsection    .text, \"ax\", @progbits\n"
+"	.type		my_tramp1, @function\n"
+"	.globl		my_tramp1\n"
+"   my_tramp1:\n"
+"       addi	sp,sp,-3*"SZREG"\n"
+"       "REG_S"	a0,0*"SZREG"(sp)\n"
+"       "REG_S"	t0,1*"SZREG"(sp)\n"
+"       "REG_S"	ra,2*"SZREG"(sp)\n"
+"       mv	a0,t0\n"
+"       call	my_direct_func1\n"
+"       "REG_L"	a0,0*"SZREG"(sp)\n"
+"       "REG_L"	t0,1*"SZREG"(sp)\n"
+"       "REG_L"	ra,2*"SZREG"(sp)\n"
+"       addi	sp,sp,3*"SZREG"\n"
+"	jr	t0\n"
+"	.size		my_tramp1, .-my_tramp1\n"
+
+"	.type		my_tramp2, @function\n"
+"	.globl		my_tramp2\n"
+"   my_tramp2:\n"
+"       addi	sp,sp,-3*"SZREG"\n"
+"       "REG_S"	a0,0*"SZREG"(sp)\n"
+"       "REG_S"	t0,1*"SZREG"(sp)\n"
+"       "REG_S"	ra,2*"SZREG"(sp)\n"
+"       mv	a0,t0\n"
+"       call	my_direct_func2\n"
+"       "REG_L"	a0,0*"SZREG"(sp)\n"
+"       "REG_L"	t0,1*"SZREG"(sp)\n"
+"       "REG_L"	ra,2*"SZREG"(sp)\n"
+"       addi	sp,sp,3*"SZREG"\n"
+"	jr	t0\n"
+"	.size		my_tramp2, .-my_tramp2\n"
+"	.popsection\n"
+);
+
+#endif /* CONFIG_RISCV */
+
 #ifdef CONFIG_X86_64
 
 #include <asm/ibt.h>
diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c
index 9243dbfe4d0c..aed6df2927ce 100644
--- a/samples/ftrace/ftrace-direct-multi.c
+++ b/samples/ftrace/ftrace-direct-multi.c
@@ -17,6 +17,31 @@ void my_direct_func(unsigned long ip)
 
 extern void my_tramp(void *);
 
+#ifdef CONFIG_RISCV
+#include <asm/asm.h>
+
+asm (
+"       .pushsection    .text, \"ax\", @progbits\n"
+"       .type           my_tramp, @function\n"
+"       .globl          my_tramp\n"
+"   my_tramp:\n"
+"       addi	sp,sp,-3*"SZREG"\n"
+"       "REG_S"	a0,0*"SZREG"(sp)\n"
+"       "REG_S"	t0,1*"SZREG"(sp)\n"
+"       "REG_S"	ra,2*"SZREG"(sp)\n"
+"       mv	a0,t0\n"
+"       call	my_direct_func\n"
+"       "REG_L"	a0,0*"SZREG"(sp)\n"
+"       "REG_L"	t0,1*"SZREG"(sp)\n"
+"       "REG_L"	ra,2*"SZREG"(sp)\n"
+"       addi	sp,sp,3*"SZREG"\n"
+"       jr	t0\n"
+"       .size           my_tramp, .-my_tramp\n"
+"       .popsection\n"
+);
+
+#endif /* CONFIG_RISCV */
+
 #ifdef CONFIG_X86_64
 
 #include <asm/ibt.h>
diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c
index e39c3563ae4e..6ff546a5d7eb 100644
--- a/samples/ftrace/ftrace-direct-too.c
+++ b/samples/ftrace/ftrace-direct-too.c
@@ -19,6 +19,34 @@ void my_direct_func(struct vm_area_struct *vma, unsigned long address,
 
 extern void my_tramp(void *);
 
+#ifdef CONFIG_RISCV
+#include <asm/asm.h>
+
+asm (
+"       .pushsection    .text, \"ax\", @progbits\n"
+"       .type           my_tramp, @function\n"
+"       .globl          my_tramp\n"
+"   my_tramp:\n"
+"       addi	sp,sp,-5*"SZREG"\n"
+"       "REG_S"	a0,0*"SZREG"(sp)\n"
+"       "REG_S"	a1,1*"SZREG"(sp)\n"
+"       "REG_S"	a2,2*"SZREG"(sp)\n"
+"       "REG_S"	t0,3*"SZREG"(sp)\n"
+"       "REG_S"	ra,4*"SZREG"(sp)\n"
+"       call	my_direct_func\n"
+"       "REG_L"	a0,0*"SZREG"(sp)\n"
+"       "REG_L"	a1,1*"SZREG"(sp)\n"
+"       "REG_L"	a2,2*"SZREG"(sp)\n"
+"       "REG_L"	t0,3*"SZREG"(sp)\n"
+"       "REG_L"	ra,4*"SZREG"(sp)\n"
+"       addi	sp,sp,5*"SZREG"\n"
+"       jr	t0\n"
+"       .size           my_tramp, .-my_tramp\n"
+"       .popsection\n"
+);
+
+#endif /* CONFIG_RISCV */
+
 #ifdef CONFIG_X86_64
 
 #include <asm/ibt.h>
diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c
index 32c477da1e9a..ef0945670e1e 100644
--- a/samples/ftrace/ftrace-direct.c
+++ b/samples/ftrace/ftrace-direct.c
@@ -16,6 +16,30 @@ void my_direct_func(struct task_struct *p)
 
 extern void my_tramp(void *);
 
+#ifdef CONFIG_RISCV
+#include <asm/asm.h>
+
+asm (
+"       .pushsection    .text, \"ax\", @progbits\n"
+"       .type           my_tramp, @function\n"
+"       .globl          my_tramp\n"
+"   my_tramp:\n"
+"       addi	sp,sp,-3*"SZREG"\n"
+"       "REG_S"	a0,0*"SZREG"(sp)\n"
+"       "REG_S"	t0,1*"SZREG"(sp)\n"
+"       "REG_S"	ra,2*"SZREG"(sp)\n"
+"       call	my_direct_func\n"
+"       "REG_L"	a0,0*"SZREG"(sp)\n"
+"       "REG_L"	t0,1*"SZREG"(sp)\n"
+"       "REG_L"	ra,2*"SZREG"(sp)\n"
+"       addi	sp,sp,3*"SZREG"\n"
+"       jr	t0\n"
+"       .size           my_tramp, .-my_tramp\n"
+"       .popsection\n"
+);
+
+#endif /* CONFIG_RISCV */
+
 #ifdef CONFIG_X86_64
 
 #include <asm/ibt.h>
diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index c9725685aa76..a9e552a1e910 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -82,15 +82,6 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init)
 # Warn if there is an enum types mismatch
 KBUILD_CFLAGS += $(call cc-option,-Wenum-conversion)
 
-# backward compatibility
-KBUILD_EXTRA_WARN ?= $(KBUILD_ENABLE_EXTRA_GCC_CHECKS)
-
-ifeq ("$(origin W)", "command line")
-  KBUILD_EXTRA_WARN := $(W)
-endif
-
-export KBUILD_EXTRA_WARN
-
 #
 # W=1 - warnings which may be relevant and do not occur too often
 #
@@ -106,7 +97,6 @@ KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable)
 KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned)
 KBUILD_CFLAGS += $(call cc-option, -Wformat-overflow)
 KBUILD_CFLAGS += $(call cc-option, -Wformat-truncation)
-KBUILD_CFLAGS += $(call cc-option, -Wstringop-overflow)
 KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation)
 
 KBUILD_CPPFLAGS += -Wundef
@@ -122,7 +112,6 @@ KBUILD_CFLAGS += $(call cc-disable-warning, restrict)
 KBUILD_CFLAGS += $(call cc-disable-warning, packed-not-aligned)
 KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow)
 KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation)
-KBUILD_CFLAGS += $(call cc-disable-warning, stringop-overflow)
 KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation)
 
 ifdef CONFIG_CC_IS_CLANG
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 1a965fe68e01..cd5b181060f1 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -83,8 +83,8 @@ dtb-$(CONFIG_OF_ALL_DTBS)       += $(dtb-)
 multi-dtb-y := $(call multi-search, $(dtb-y), .dtb, -dtbs)
 # Primitive DTB compiled from *.dts
 real-dtb-y := $(call real-search, $(dtb-y), .dtb, -dtbs)
-# Base DTB that overlay is applied onto (each first word of $(*-dtbs) expansion)
-base-dtb-y := $(foreach m, $(multi-dtb-y), $(firstword $(call suffix-search, $m, .dtb, -dtbs)))
+# Base DTB that overlay is applied onto
+base-dtb-y := $(filter %.dtb, $(call real-search, $(multi-dtb-y), .dtb, -dtbs))
 
 always-y			+= $(dtb-y)
 
diff --git a/scripts/Makefile.package b/scripts/Makefile.package
index 3addd1c0b989..a81dfb1f5181 100644
--- a/scripts/Makefile.package
+++ b/scripts/Makefile.package
@@ -4,27 +4,6 @@
 include $(srctree)/scripts/Kbuild.include
 include $(srctree)/scripts/Makefile.lib
 
-KERNELPATH := kernel-$(subst -,_,$(KERNELRELEASE))
-# Include only those top-level files that are needed by make, plus the GPL copy
-TAR_CONTENT := Documentation LICENSES arch block certs crypto drivers fs \
-               include init io_uring ipc kernel lib mm net rust \
-               samples scripts security sound tools usr virt \
-               .config Makefile \
-               Kbuild Kconfig COPYING $(wildcard localversion*)
-
-quiet_cmd_src_tar = TAR     $(2).tar.gz
-      cmd_src_tar = \
-if test "$(objtree)" != "$(srctree)"; then \
-	echo >&2; \
-	echo >&2 "  ERROR:"; \
-	echo >&2 "  Building source tarball is not possible outside the"; \
-	echo >&2 "  kernel source tree. Don't set KBUILD_OUTPUT"; \
-	echo >&2; \
-	false; \
-fi ; \
-tar -I $(KGZIP) -c $(RCS_TAR_IGNORE) -f $(2).tar.gz \
-	--transform 's:^:$(2)/:S' $(TAR_CONTENT) $(3)
-
 # Git
 # ---------------------------------------------------------------------------
 
@@ -130,8 +109,6 @@ debian-orig: linux.tar$(debian-orig-suffix) debian
 		cp $< ../$(orig-name); \
 	fi
 
-KBUILD_PKG_ROOTCMD ?= 'fakeroot -u'
-
 PHONY += deb-pkg srcdeb-pkg bindeb-pkg
 
 deb-pkg:    private build-type := source,binary
@@ -146,7 +123,7 @@ deb-pkg srcdeb-pkg bindeb-pkg:
 	$(if $(findstring source, $(build-type)), \
 		--unsigned-source --compression=$(KDEB_SOURCE_COMPRESS)) \
 	$(if $(findstring binary, $(build-type)), \
-		--rules-file='$(MAKE) -f debian/rules' --jobs=1 -r$(KBUILD_PKG_ROOTCMD) -a$$(cat debian/arch), \
+		-R'$(MAKE) -f debian/rules' -j1 -a$$(cat debian/arch), \
 		--no-check-builddeps) \
 	$(DPKG_FLAGS))
 
@@ -157,9 +134,8 @@ snap-pkg:
 	rm -rf $(objtree)/snap
 	mkdir $(objtree)/snap
 	$(MAKE) clean
-	$(call cmd,src_tar,$(KERNELPATH))
 	sed "s@KERNELRELEASE@$(KERNELRELEASE)@; \
-		s@SRCTREE@$(shell realpath $(KERNELPATH).tar.gz)@" \
+		s@SRCTREE@$(abs_srctree)@" \
 		$(srctree)/scripts/package/snapcraft.template > \
 		$(objtree)/snap/snapcraft.yaml
 	cd $(objtree)/snap && \
diff --git a/scripts/check-uapi.sh b/scripts/check-uapi.sh
new file mode 100755
index 000000000000..955581735cb3
--- /dev/null
+++ b/scripts/check-uapi.sh
@@ -0,0 +1,573 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Script to check commits for UAPI backwards compatibility
+
+set -o errexit
+set -o pipefail
+
+print_usage() {
+	name=$(basename "$0")
+	cat << EOF
+$name - check for UAPI header stability across Git commits
+
+By default, the script will check to make sure the latest commit (or current
+dirty changes) did not introduce ABI changes when compared to HEAD^1. You can
+check against additional commit ranges with the -b and -p options.
+
+The script will not check UAPI headers for architectures other than the one
+defined in ARCH.
+
+Usage: $name [-b BASE_REF] [-p PAST_REF] [-j N] [-l ERROR_LOG] [-i] [-q] [-v]
+
+Options:
+    -b BASE_REF    Base git reference to use for comparison. If unspecified or empty,
+                   will use any dirty changes in tree to UAPI files. If there are no
+                   dirty changes, HEAD will be used.
+    -p PAST_REF    Compare BASE_REF to PAST_REF (e.g. -p v6.1). If unspecified or empty,
+                   will use BASE_REF^1. Must be an ancestor of BASE_REF. Only headers
+                   that exist on PAST_REF will be checked for compatibility.
+    -j JOBS        Number of checks to run in parallel (default: number of CPU cores).
+    -l ERROR_LOG   Write error log to file (default: no error log is generated).
+    -i             Ignore ambiguous changes that may or may not break UAPI compatibility.
+    -q             Quiet operation.
+    -v             Verbose operation (print more information about each header being checked).
+
+Environmental args:
+    ABIDIFF  Custom path to abidiff binary
+    CC       C compiler (default is "gcc")
+    ARCH     Target architecture for the UAPI check (default is host arch)
+
+Exit codes:
+    $SUCCESS) Success
+    $FAIL_ABI) ABI difference detected
+    $FAIL_PREREQ) Prerequisite not met
+EOF
+}
+
+readonly SUCCESS=0
+readonly FAIL_ABI=1
+readonly FAIL_PREREQ=2
+
+# Print to stderr
+eprintf() {
+	# shellcheck disable=SC2059
+	printf "$@" >&2
+}
+
+# Expand an array with a specific character (similar to Python string.join())
+join() {
+	local IFS="$1"
+	shift
+	printf "%s" "$*"
+}
+
+# Create abidiff suppressions
+gen_suppressions() {
+	# Common enum variant names which we don't want to worry about
+	# being shifted when new variants are added.
+	local -a enum_regex=(
+		".*_AFTER_LAST$"
+		".*_CNT$"
+		".*_COUNT$"
+		".*_END$"
+		".*_LAST$"
+		".*_MASK$"
+		".*_MAX$"
+		".*_MAX_BIT$"
+		".*_MAX_BPF_ATTACH_TYPE$"
+		".*_MAX_ID$"
+		".*_MAX_SHIFT$"
+		".*_NBITS$"
+		".*_NETDEV_NUMHOOKS$"
+		".*_NFT_META_IIFTYPE$"
+		".*_NL80211_ATTR$"
+		".*_NLDEV_NUM_OPS$"
+		".*_NUM$"
+		".*_NUM_ELEMS$"
+		".*_NUM_IRQS$"
+		".*_SIZE$"
+		".*_TLSMAX$"
+		"^MAX_.*"
+		"^NUM_.*"
+	)
+
+	# Common padding field names which can be expanded into
+	# without worrying about users.
+	local -a padding_regex=(
+		".*end$"
+		".*pad$"
+		".*pad[0-9]?$"
+		".*pad_[0-9]?$"
+		".*padding$"
+		".*padding[0-9]?$"
+		".*padding_[0-9]?$"
+		".*res$"
+		".*resv$"
+		".*resv[0-9]?$"
+		".*resv_[0-9]?$"
+		".*reserved$"
+		".*reserved[0-9]?$"
+		".*reserved_[0-9]?$"
+		".*rsvd[0-9]?$"
+		".*unused$"
+	)
+
+	cat << EOF
+[suppress_type]
+  type_kind = enum
+  changed_enumerators_regexp = $(join , "${enum_regex[@]}")
+EOF
+
+	for p in "${padding_regex[@]}"; do
+		cat << EOF
+[suppress_type]
+  type_kind = struct
+  has_data_member_inserted_at = offset_of_first_data_member_regexp(${p})
+EOF
+	done
+
+if [ "$IGNORE_AMBIGUOUS_CHANGES" = "true" ]; then
+	cat << EOF
+[suppress_type]
+  type_kind = struct
+  has_data_member_inserted_at = end
+  has_size_change = yes
+EOF
+fi
+}
+
+# Check if git tree is dirty
+tree_is_dirty() {
+	! git diff --quiet
+}
+
+# Get list of files installed in $ref
+get_file_list() {
+	local -r ref="$1"
+	local -r tree="$(get_header_tree "$ref")"
+
+	# Print all installed headers, filtering out ones that can't be compiled
+	find "$tree" -type f -name '*.h' -printf '%P\n' | grep -v -f "$INCOMPAT_LIST"
+}
+
+# Add to the list of incompatible headers
+add_to_incompat_list() {
+	local -r ref="$1"
+
+	# Start with the usr/include/Makefile to get a list of the headers
+	# that don't compile using this method.
+	if [ ! -f usr/include/Makefile ]; then
+		eprintf "error - no usr/include/Makefile present at %s\n" "$ref"
+		eprintf "Note: usr/include/Makefile was added in the v5.3 kernel release\n"
+		exit "$FAIL_PREREQ"
+	fi
+	{
+		# shellcheck disable=SC2016
+		printf 'all: ; @echo $(no-header-test)\n'
+		cat usr/include/Makefile
+	} | SRCARCH="$ARCH" make --always-make -f - | tr " " "\n" \
+	  | grep -v "asm-generic" >> "$INCOMPAT_LIST"
+
+	# The makefile also skips all asm-generic files, but prints "asm-generic/%"
+	# which won't work for our grep match. Instead, print something grep will match.
+	printf "asm-generic/.*\.h\n" >> "$INCOMPAT_LIST"
+}
+
+# Compile the simple test app
+do_compile() {
+	local -r inc_dir="$1"
+	local -r header="$2"
+	local -r out="$3"
+	printf "int main(void) { return 0; }\n" | \
+		"$CC" -c \
+		  -o "$out" \
+		  -x c \
+		  -O0 \
+		  -std=c90 \
+		  -fno-eliminate-unused-debug-types \
+		  -g \
+		  "-I${inc_dir}" \
+		  -include "$header" \
+		  -
+}
+
+# Run make headers_install
+run_make_headers_install() {
+	local -r ref="$1"
+	local -r install_dir="$(get_header_tree "$ref")"
+	make -j "$MAX_THREADS" ARCH="$ARCH" INSTALL_HDR_PATH="$install_dir" \
+		headers_install > /dev/null
+}
+
+# Install headers for both git refs
+install_headers() {
+	local -r base_ref="$1"
+	local -r past_ref="$2"
+
+	for ref in "$base_ref" "$past_ref"; do
+		printf "Installing user-facing UAPI headers from %s... " "${ref:-dirty tree}"
+		if [ -n "$ref" ]; then
+			git archive --format=tar --prefix="${ref}-archive/" "$ref" \
+				| (cd "$TMP_DIR" && tar xf -)
+			(
+				cd "${TMP_DIR}/${ref}-archive"
+				run_make_headers_install "$ref"
+				add_to_incompat_list "$ref" "$INCOMPAT_LIST"
+			)
+		else
+			run_make_headers_install "$ref"
+			add_to_incompat_list "$ref" "$INCOMPAT_LIST"
+		fi
+		printf "OK\n"
+	done
+	sort -u -o "$INCOMPAT_LIST" "$INCOMPAT_LIST"
+	sed -i -e '/^$/d' "$INCOMPAT_LIST"
+}
+
+# Print the path to the headers_install tree for a given ref
+get_header_tree() {
+	local -r ref="$1"
+	printf "%s" "${TMP_DIR}/${ref}/usr"
+}
+
+# Check file list for UAPI compatibility
+check_uapi_files() {
+	local -r base_ref="$1"
+	local -r past_ref="$2"
+	local -r abi_error_log="$3"
+
+	local passed=0;
+	local failed=0;
+	local -a threads=()
+	set -o errexit
+
+	printf "Checking changes to UAPI headers between %s and %s...\n" "$past_ref" "${base_ref:-dirty tree}"
+	# Loop over all UAPI headers that were installed by $past_ref (if they only exist on $base_ref,
+	# there's no way they're broken and no way to compare anyway)
+	while read -r file; do
+		if [ "${#threads[@]}" -ge "$MAX_THREADS" ]; then
+			if wait "${threads[0]}"; then
+				passed=$((passed + 1))
+			else
+				failed=$((failed + 1))
+			fi
+			threads=("${threads[@]:1}")
+		fi
+
+		check_individual_file "$base_ref" "$past_ref" "$file" &
+		threads+=("$!")
+	done < <(get_file_list "$past_ref")
+
+	for t in "${threads[@]}"; do
+		if wait "$t"; then
+			passed=$((passed + 1))
+		else
+			failed=$((failed + 1))
+		fi
+	done
+
+	if [ -n "$abi_error_log" ]; then
+		printf 'Generated by "%s %s" from git ref %s\n\n' \
+			"$0" "$*" "$(git rev-parse HEAD)" > "$abi_error_log"
+	fi
+
+	while read -r error_file; do
+		{
+			cat "$error_file"
+			printf "\n\n"
+		} | tee -a "${abi_error_log:-/dev/null}" >&2
+	done < <(find "$TMP_DIR" -type f -name '*.error' | sort)
+
+	total="$((passed + failed))"
+	if [ "$failed" -gt 0 ]; then
+		eprintf "error - %d/%d UAPI headers compatible with %s appear _not_ to be backwards compatible\n" \
+			"$failed" "$total" "$ARCH"
+		if [ -n "$abi_error_log" ]; then
+			eprintf "Failure summary saved to %s\n" "$abi_error_log"
+		fi
+	else
+		printf "All %d UAPI headers compatible with %s appear to be backwards compatible\n" \
+			"$total" "$ARCH"
+	fi
+
+	return "$failed"
+}
+
+# Check an individual file for UAPI compatibility
+check_individual_file() {
+	local -r base_ref="$1"
+	local -r past_ref="$2"
+	local -r file="$3"
+
+	local -r base_header="$(get_header_tree "$base_ref")/${file}"
+	local -r past_header="$(get_header_tree "$past_ref")/${file}"
+
+	if [ ! -f "$base_header" ]; then
+		mkdir -p "$(dirname "$base_header")"
+		printf "==== UAPI header %s was removed between %s and %s ====" \
+			"$file" "$past_ref" "$base_ref" \
+				> "${base_header}.error"
+		return 1
+	fi
+
+	compare_abi "$file" "$base_header" "$past_header" "$base_ref" "$past_ref"
+}
+
+# Perform the A/B compilation and compare output ABI
+compare_abi() {
+	local -r file="$1"
+	local -r base_header="$2"
+	local -r past_header="$3"
+	local -r base_ref="$4"
+	local -r past_ref="$5"
+	local -r log="${TMP_DIR}/log/${file}.log"
+	local -r error_log="${TMP_DIR}/log/${file}.error"
+
+	mkdir -p "$(dirname "$log")"
+
+	if ! do_compile "$(get_header_tree "$base_ref")/include" "$base_header" "${base_header}.bin" 2> "$log"; then
+		{
+			warn_str=$(printf "==== Could not compile version of UAPI header %s at %s ====\n" \
+				"$file" "$base_ref")
+			printf "%s\n" "$warn_str"
+			cat "$log"
+			printf -- "=%.0s" $(seq 0 ${#warn_str})
+		} > "$error_log"
+		return 1
+	fi
+
+	if ! do_compile "$(get_header_tree "$past_ref")/include" "$past_header" "${past_header}.bin" 2> "$log"; then
+		{
+			warn_str=$(printf "==== Could not compile version of UAPI header %s at %s ====\n" \
+				"$file" "$past_ref")
+			printf "%s\n" "$warn_str"
+			cat "$log"
+			printf -- "=%.0s" $(seq 0 ${#warn_str})
+		} > "$error_log"
+		return 1
+	fi
+
+	local ret=0
+	"$ABIDIFF" --non-reachable-types \
+		--suppressions "$SUPPRESSIONS" \
+		"${past_header}.bin" "${base_header}.bin" > "$log" || ret="$?"
+	if [ "$ret" -eq 0 ]; then
+		if [ "$VERBOSE" = "true" ]; then
+			printf "No ABI differences detected in %s from %s -> %s\n" \
+				"$file" "$past_ref" "$base_ref"
+		fi
+	else
+		# Bits in abidiff's return code can be used to determine the type of error
+		if [ $((ret & 0x2)) -gt 0 ]; then
+			eprintf "error - abidiff did not run properly\n"
+			exit 1
+		fi
+
+		if [ "$IGNORE_AMBIGUOUS_CHANGES" = "true" ] && [ "$ret" -eq 4 ]; then
+			return 0
+		fi
+
+		# If the only changes were additions (not modifications to existing APIs), then
+		# there's no problem. Ignore these diffs.
+		if grep "Unreachable types summary" "$log" | grep -q "0 removed" &&
+		   grep "Unreachable types summary" "$log" | grep -q "0 changed"; then
+			return 0
+		fi
+
+		{
+			warn_str=$(printf "==== ABI differences detected in %s from %s -> %s ====" \
+				"$file" "$past_ref" "$base_ref")
+			printf "%s\n" "$warn_str"
+			sed  -e '/summary:/d' -e '/changed type/d' -e '/^$/d' -e 's/^/  /g' "$log"
+			printf -- "=%.0s" $(seq 0 ${#warn_str})
+			if cmp "$past_header" "$base_header" > /dev/null 2>&1; then
+				printf "\n%s did not change between %s and %s...\n" "$file" "$past_ref" "${base_ref:-dirty tree}"
+				printf "It's possible a change to one of the headers it includes caused this error:\n"
+				grep '^#include' "$base_header"
+				printf "\n"
+			fi
+		} > "$error_log"
+
+		return 1
+	fi
+}
+
+# Check that a minimum software version number is satisfied
+min_version_is_satisfied() {
+	local -r min_version="$1"
+	local -r version_installed="$2"
+
+	printf "%s\n%s\n" "$min_version" "$version_installed" \
+		| sort -Vc > /dev/null 2>&1
+}
+
+# Make sure we have the tools we need and the arguments make sense
+check_deps() {
+	ABIDIFF="${ABIDIFF:-abidiff}"
+	CC="${CC:-gcc}"
+	ARCH="${ARCH:-$(uname -m)}"
+	if [ "$ARCH" = "x86_64" ]; then
+		ARCH="x86"
+	fi
+
+	local -r abidiff_min_version="2.4"
+	local -r libdw_min_version_if_clang="0.171"
+
+	if ! command -v "$ABIDIFF" > /dev/null 2>&1; then
+		eprintf "error - abidiff not found!\n"
+		eprintf "Please install abigail-tools version %s or greater\n" "$abidiff_min_version"
+		eprintf "See: https://sourceware.org/libabigail/manual/libabigail-overview.html\n"
+		return 1
+	fi
+
+	local -r abidiff_version="$("$ABIDIFF" --version | cut -d ' ' -f 2)"
+	if ! min_version_is_satisfied "$abidiff_min_version" "$abidiff_version"; then
+		eprintf "error - abidiff version too old: %s\n" "$abidiff_version"
+		eprintf "Please install abigail-tools version %s or greater\n" "$abidiff_min_version"
+		eprintf "See: https://sourceware.org/libabigail/manual/libabigail-overview.html\n"
+		return 1
+	fi
+
+	if ! command -v "$CC" > /dev/null 2>&1; then
+		eprintf 'error - %s not found\n' "$CC"
+		return 1
+	fi
+
+	if "$CC" --version | grep -q clang; then
+		local -r libdw_version="$(ldconfig -v 2>/dev/null | grep -v SKIPPED | grep -m 1 -o 'libdw-[0-9]\+.[0-9]\+' | cut -c 7-)"
+		if ! min_version_is_satisfied "$libdw_min_version_if_clang" "$libdw_version"; then
+			eprintf "error - libdw version too old for use with clang: %s\n" "$libdw_version"
+			eprintf "Please install libdw from elfutils version %s or greater\n" "$libdw_min_version_if_clang"
+			eprintf "See: https://sourceware.org/elfutils/\n"
+			return 1
+		fi
+	fi
+
+	if [ ! -d "arch/${ARCH}" ]; then
+		eprintf 'error - ARCH "%s" is not a subdirectory under arch/\n' "$ARCH"
+		eprintf "Please set ARCH to one of:\n%s\n" "$(find arch -maxdepth 1 -mindepth 1 -type d -printf '%f ' | fmt)"
+		return 1
+	fi
+
+	if ! git rev-parse --is-inside-work-tree > /dev/null 2>&1; then
+		eprintf "error - this script requires the kernel tree to be initialized with Git\n"
+		return 1
+	fi
+
+	if ! git rev-parse --verify "$past_ref" > /dev/null 2>&1; then
+		printf 'error - invalid git reference "%s"\n' "$past_ref"
+		return 1
+	fi
+
+	if [ -n "$base_ref" ]; then
+		if ! git merge-base --is-ancestor "$past_ref" "$base_ref" > /dev/null 2>&1; then
+			printf 'error - "%s" is not an ancestor of base ref "%s"\n' "$past_ref" "$base_ref"
+			return 1
+		fi
+		if [ "$(git rev-parse "$base_ref")" = "$(git rev-parse "$past_ref")" ]; then
+			printf 'error - "%s" and "%s" are the same reference\n' "$past_ref" "$base_ref"
+			return 1
+		fi
+	fi
+}
+
+run() {
+	local base_ref="$1"
+	local past_ref="$2"
+	local abi_error_log="$3"
+	shift 3
+
+	if [ -z "$KERNEL_SRC" ]; then
+		KERNEL_SRC="$(realpath "$(dirname "$0")"/..)"
+	fi
+
+	cd "$KERNEL_SRC"
+
+	if [ -z "$base_ref" ] && ! tree_is_dirty; then
+		base_ref=HEAD
+	fi
+
+	if [ -z "$past_ref" ]; then
+		if [ -n "$base_ref" ]; then
+			past_ref="${base_ref}^1"
+		else
+			past_ref=HEAD
+		fi
+	fi
+
+	if ! check_deps; then
+		exit "$FAIL_PREREQ"
+	fi
+
+	TMP_DIR=$(mktemp -d)
+	readonly TMP_DIR
+	trap 'rm -rf "$TMP_DIR"' EXIT
+
+	readonly INCOMPAT_LIST="${TMP_DIR}/incompat_list.txt"
+	touch "$INCOMPAT_LIST"
+
+	readonly SUPPRESSIONS="${TMP_DIR}/suppressions.txt"
+	gen_suppressions > "$SUPPRESSIONS"
+
+	# Run make install_headers for both refs
+	install_headers "$base_ref" "$past_ref"
+
+	# Check for any differences in the installed header trees
+	if diff -r -q "$(get_header_tree "$base_ref")" "$(get_header_tree "$past_ref")" > /dev/null 2>&1; then
+		printf "No changes to UAPI headers were applied between %s and %s\n" "$past_ref" "${base_ref:-dirty tree}"
+		exit "$SUCCESS"
+	fi
+
+	if ! check_uapi_files "$base_ref" "$past_ref" "$abi_error_log"; then
+		exit "$FAIL_ABI"
+	fi
+}
+
+main() {
+	MAX_THREADS=$(nproc)
+	VERBOSE="false"
+	IGNORE_AMBIGUOUS_CHANGES="false"
+	quiet="false"
+	local base_ref=""
+	while getopts "hb:p:j:l:iqv" opt; do
+		case $opt in
+		h)
+			print_usage
+			exit "$SUCCESS"
+			;;
+		b)
+			base_ref="$OPTARG"
+			;;
+		p)
+			past_ref="$OPTARG"
+			;;
+		j)
+			MAX_THREADS="$OPTARG"
+			;;
+		l)
+			abi_error_log="$OPTARG"
+			;;
+		i)
+			IGNORE_AMBIGUOUS_CHANGES="true"
+			;;
+		q)
+			quiet="true"
+			VERBOSE="false"
+			;;
+		v)
+			VERBOSE="true"
+			quiet="false"
+			;;
+		*)
+			exit "$FAIL_PREREQ"
+		esac
+	done
+
+	if [ "$quiet" = "true" ]; then
+		exec > /dev/null 2>&1
+	fi
+
+	run "$base_ref" "$past_ref" "$abi_error_log" "$@"
+}
+
+main "$@"
diff --git a/scripts/coccinelle/api/device_attr_show.cocci b/scripts/coccinelle/api/device_attr_show.cocci
index a28dc061653a..550d1d2fc02a 100644
--- a/scripts/coccinelle/api/device_attr_show.cocci
+++ b/scripts/coccinelle/api/device_attr_show.cocci
@@ -1,10 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0-only
 ///
 /// From Documentation/filesystems/sysfs.rst:
-///  show() must not use snprintf() when formatting the value to be
-///  returned to user space. If you can guarantee that an overflow
-///  will never happen you can use sprintf() otherwise you must use
-///  scnprintf().
+///  show() should only use sysfs_emit() or sysfs_emit_at() when formatting
+///  the value to be returned to user space.
 ///
 // Confidence: High
 // Copyright: (C) 2020 Denis Efremov ISPRAS
@@ -30,15 +28,16 @@ ssize_t show(struct device *dev, struct device_attribute *attr, char *buf)
 
 @rp depends on patch@
 identifier show, dev, attr, buf;
+expression BUF, SZ, FORMAT, STR;
 @@
 
 ssize_t show(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	<...
 	return
--		snprintf
-+		scnprintf
-			(...);
+-		snprintf(BUF, SZ, FORMAT
++		sysfs_emit(BUF, FORMAT
+				,...);
 	...>
 }
 
@@ -46,10 +45,10 @@ ssize_t show(struct device *dev, struct device_attribute *attr, char *buf)
 p << r.p;
 @@
 
-coccilib.report.print_report(p[0], "WARNING: use scnprintf or sprintf")
+coccilib.report.print_report(p[0], "WARNING: please use sysfs_emit or sysfs_emit_at")
 
 @script: python depends on org@
 p << r.p;
 @@
 
-coccilib.org.print_todo(p[0], "WARNING: use scnprintf or sprintf")
+coccilib.org.print_todo(p[0], "WARNING: please use sysfs_emit or sysfs_emit_at")
diff --git a/scripts/gdb/linux/tasks.py b/scripts/gdb/linux/tasks.py
index aa5ab6251f76..6793d6e86e77 100644
--- a/scripts/gdb/linux/tasks.py
+++ b/scripts/gdb/linux/tasks.py
@@ -82,21 +82,12 @@ LxPs()
 
 thread_info_type = utils.CachedType("struct thread_info")
 
-ia64_task_size = None
-
 
 def get_thread_info(task):
     thread_info_ptr_type = thread_info_type.get_type().pointer()
-    if utils.is_target_arch("ia64"):
-        global ia64_task_size
-        if ia64_task_size is None:
-            ia64_task_size = gdb.parse_and_eval("sizeof(struct task_struct)")
-        thread_info_addr = task.address + ia64_task_size
-        thread_info = thread_info_addr.cast(thread_info_ptr_type)
-    else:
-        if task.type.fields()[0].type == thread_info_type.get_type():
-            return task['thread_info']
-        thread_info = task['stack'].cast(thread_info_ptr_type)
+    if task.type.fields()[0].type == thread_info_type.get_type():
+        return task['thread_info']
+    thread_info = task['stack'].cast(thread_info_ptr_type)
     return thread_info.dereference()
 
 
diff --git a/scripts/generate_rust_target.rs b/scripts/generate_rust_target.rs
index 3c6cbe2b278d..0da52b548ba5 100644
--- a/scripts/generate_rust_target.rs
+++ b/scripts/generate_rust_target.rs
@@ -161,6 +161,13 @@ fn main() {
         ts.push("features", features);
         ts.push("llvm-target", "x86_64-linux-gnu");
         ts.push("target-pointer-width", "64");
+    } else if cfg.has("LOONGARCH") {
+        ts.push("arch", "loongarch64");
+        ts.push("data-layout", "e-m:e-p:64:64-i64:64-i128:128-n64-S128");
+        ts.push("features", "-f,-d");
+        ts.push("llvm-target", "loongarch64-linux-gnusf");
+        ts.push("llvm-abiname", "lp64s");
+        ts.push("target-pointer-width", "64");
     } else {
         panic!("Unsupported architecture");
     }
diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c
index f5dfdb9d80e9..f3901c55df23 100644
--- a/scripts/genksyms/genksyms.c
+++ b/scripts/genksyms/genksyms.c
@@ -16,9 +16,7 @@
 #include <unistd.h>
 #include <assert.h>
 #include <stdarg.h>
-#ifdef __GNU_LIBRARY__
 #include <getopt.h>
-#endif				/* __GNU_LIBRARY__ */
 
 #include "genksyms.h"
 /*----------------------------------------------------------------------*/
@@ -718,8 +716,6 @@ void error_with_pos(const char *fmt, ...)
 static void genksyms_usage(void)
 {
 	fputs("Usage:\n" "genksyms [-adDTwqhVR] > /path/to/.tmp_obj.ver\n" "\n"
-#ifdef __GNU_LIBRARY__
-	      "  -s, --symbol-prefix   Select symbol prefix\n"
 	      "  -d, --debug           Increment the debug level (repeatable)\n"
 	      "  -D, --dump            Dump expanded symbol defs (for debugging only)\n"
 	      "  -r, --reference file  Read reference symbols from a file\n"
@@ -729,18 +725,6 @@ static void genksyms_usage(void)
 	      "  -q, --quiet           Disable warnings (default)\n"
 	      "  -h, --help            Print this message\n"
 	      "  -V, --version         Print the release version\n"
-#else				/* __GNU_LIBRARY__ */
-	      "  -s                    Select symbol prefix\n"
-	      "  -d                    Increment the debug level (repeatable)\n"
-	      "  -D                    Dump expanded symbol defs (for debugging only)\n"
-	      "  -r file               Read reference symbols from a file\n"
-	      "  -T file               Dump expanded types into file\n"
-	      "  -p                    Preserve reference modversions or fail\n"
-	      "  -w                    Enable warnings\n"
-	      "  -q                    Disable warnings (default)\n"
-	      "  -h                    Print this message\n"
-	      "  -V                    Print the release version\n"
-#endif				/* __GNU_LIBRARY__ */
 	      , stderr);
 }
 
@@ -749,7 +733,6 @@ int main(int argc, char **argv)
 	FILE *dumpfile = NULL, *ref_file = NULL;
 	int o;
 
-#ifdef __GNU_LIBRARY__
 	struct option long_opts[] = {
 		{"debug", 0, 0, 'd'},
 		{"warnings", 0, 0, 'w'},
@@ -763,11 +746,8 @@ int main(int argc, char **argv)
 		{0, 0, 0, 0}
 	};
 
-	while ((o = getopt_long(argc, argv, "s:dwqVDr:T:ph",
+	while ((o = getopt_long(argc, argv, "dwqVDr:T:ph",
 				&long_opts[0], NULL)) != EOF)
-#else				/* __GNU_LIBRARY__ */
-	while ((o = getopt(argc, argv, "s:dwqVDr:T:ph")) != EOF)
-#endif				/* __GNU_LIBRARY__ */
 		switch (o) {
 		case 'd':
 			flag_debug++;
diff --git a/scripts/git.orderFile b/scripts/git.orderFile
new file mode 100644
index 000000000000..5102ba73357f
--- /dev/null
+++ b/scripts/git.orderFile
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# order file for git, to produce patches which are easier to review
+# by diffing the important stuff like header changes first.
+#
+# one-off usage:
+#   git diff -O scripts/git.orderFile ...
+#
+# add to git config:
+#   git config diff.orderFile scripts/git.orderFile
+#
+
+MAINTAINERS
+
+# Documentation
+Documentation/*
+*.rst
+
+# git-specific
+.gitignore
+scripts/git.orderFile
+
+# build system
+Kconfig*
+*/Kconfig*
+Kbuild*
+*/Kbuild*
+Makefile*
+*/Makefile*
+*.mak
+*.mk
+scripts/*
+
+# semantic patches
+*.cocci
+
+# headers
+*types.h
+*.h
+
+# code
+*.c
diff --git a/scripts/head-object-list.txt b/scripts/head-object-list.txt
index 26359968744e..890f69005bab 100644
--- a/scripts/head-object-list.txt
+++ b/scripts/head-object-list.txt
@@ -17,7 +17,6 @@ arch/arm/kernel/head-nommu.o
 arch/arm/kernel/head.o
 arch/csky/kernel/head.o
 arch/hexagon/kernel/head.o
-arch/ia64/kernel/head.o
 arch/loongarch/kernel/head.o
 arch/m68k/68000/head.o
 arch/m68k/coldfire/head.o
diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index 4eee155121a8..ea1bf3b3dbde 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -27,6 +27,14 @@ KCONFIG_DEFCONFIG_LIST += \
 endif
 KCONFIG_DEFCONFIG_LIST += arch/$(SRCARCH)/configs/$(KBUILD_DEFCONFIG)
 
+ifneq ($(findstring c, $(KBUILD_EXTRA_WARN)),)
+export KCONFIG_WARN_UNKNOWN_SYMBOLS=1
+endif
+
+ifneq ($(findstring e, $(KBUILD_EXTRA_WARN)),)
+export KCONFIG_WERROR=1
+endif
+
 # We need this, in case the user has it in its environment
 unexport CONFIG_
 
@@ -99,7 +107,7 @@ config-fragments = $(call configfiles,$@)
 
 %.config: $(obj)/conf
 	$(if $(config-fragments),, $(error $@ fragment does not exists on this architecture))
-	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m .config $(config-fragments)
+	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m $(KCONFIG_CONFIG) $(config-fragments)
 	$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig
 
 PHONY += tinyconfig
@@ -166,7 +174,7 @@ conf-objs	:= conf.o $(common-objs)
 
 # nconf: Used for the nconfig target based on ncurses
 hostprogs	+= nconf
-nconf-objs	:= nconf.o nconf.gui.o $(common-objs)
+nconf-objs	:= nconf.o nconf.gui.o mnconf-common.o $(common-objs)
 
 HOSTLDLIBS_nconf       = $(call read-file, $(obj)/nconf-libs)
 HOSTCFLAGS_nconf.o     = $(call read-file, $(obj)/nconf-cflags)
@@ -179,7 +187,7 @@ $(obj)/nconf.o $(obj)/nconf.gui.o: | $(obj)/nconf-cflags
 hostprogs	+= mconf
 lxdialog	:= $(addprefix lxdialog/, \
 		     checklist.o inputbox.o menubox.o textbox.o util.o yesno.o)
-mconf-objs	:= mconf.o $(lxdialog) $(common-objs)
+mconf-objs	:= mconf.o $(lxdialog) mnconf-common.o $(common-objs)
 
 HOSTLDLIBS_mconf = $(call read-file, $(obj)/mconf-libs)
 $(foreach f, mconf.o $(lxdialog), \
diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c
index 33d19e419908..662a5e7c37c2 100644
--- a/scripts/kconfig/conf.c
+++ b/scripts/kconfig/conf.c
@@ -827,6 +827,9 @@ int main(int ac, char **av)
 		break;
 	}
 
+	if (conf_errors())
+		exit(1);
+
 	if (sync_kconfig) {
 		name = getenv("KCONFIG_NOSILENTUPDATE");
 		if (name && *name) {
@@ -890,6 +893,9 @@ int main(int ac, char **av)
 		break;
 	}
 
+	if (sym_dep_errors())
+		exit(1);
+
 	if (input_mode == savedefconfig) {
 		if (conf_write_defconfig(defconfig_file)) {
 			fprintf(stderr, "n*** Error while saving defconfig to: %s\n\n",
diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
index 4a6811d77d18..f53dcdd44597 100644
--- a/scripts/kconfig/confdata.c
+++ b/scripts/kconfig/confdata.c
@@ -155,6 +155,13 @@ static void conf_message(const char *fmt, ...)
 static const char *conf_filename;
 static int conf_lineno, conf_warnings;
 
+bool conf_errors(void)
+{
+	if (conf_warnings)
+		return getenv("KCONFIG_WERROR");
+	return false;
+}
+
 static void conf_warning(const char *fmt, ...)
 {
 	va_list ap;
@@ -289,16 +296,12 @@ static int conf_set_sym_val(struct symbol *sym, int def, int def_flags, char *p)
 #define LINE_GROWTH 16
 static int add_byte(int c, char **lineptr, size_t slen, size_t *n)
 {
-	char *nline;
 	size_t new_size = slen + 1;
+
 	if (new_size > *n) {
 		new_size += LINE_GROWTH - 1;
 		new_size *= 2;
-		nline = xrealloc(*lineptr, new_size);
-		if (!nline)
-			return -1;
-
-		*lineptr = nline;
+		*lineptr = xrealloc(*lineptr, new_size);
 		*n = new_size;
 	}
 
@@ -341,19 +344,37 @@ e_out:
 	return -1;
 }
 
+/* like getline(), but the newline character is stripped away */
+static ssize_t getline_stripped(char **lineptr, size_t *n, FILE *stream)
+{
+	ssize_t len;
+
+	len = compat_getline(lineptr, n, stream);
+
+	if (len > 0 && (*lineptr)[len - 1] == '\n') {
+		len--;
+		(*lineptr)[len] = '\0';
+
+		if (len > 0 && (*lineptr)[len - 1] == '\r') {
+			len--;
+			(*lineptr)[len] = '\0';
+		}
+	}
+
+	return len;
+}
+
 int conf_read_simple(const char *name, int def)
 {
 	FILE *in = NULL;
 	char   *line = NULL;
 	size_t  line_asize = 0;
-	char *p, *p2;
+	char *p, *val;
 	struct symbol *sym;
 	int i, def_flags;
-	const char *warn_unknown;
-	const char *werror;
+	const char *warn_unknown, *sym_name;
 
 	warn_unknown = getenv("KCONFIG_WARN_UNKNOWN_SYMBOLS");
-	werror = getenv("KCONFIG_WERROR");
 	if (name) {
 		in = zconf_fopen(name);
 	} else {
@@ -417,8 +438,7 @@ load:
 		case S_INT:
 		case S_HEX:
 		case S_STRING:
-			if (sym->def[def].val)
-				free(sym->def[def].val);
+			free(sym->def[def].val);
 			/* fall through */
 		default:
 			sym->def[def].val = NULL;
@@ -426,90 +446,68 @@ load:
 		}
 	}
 
-	while (compat_getline(&line, &line_asize, in) != -1) {
+	while (getline_stripped(&line, &line_asize, in) != -1) {
 		conf_lineno++;
-		sym = NULL;
+
+		if (!line[0]) /* blank line */
+			continue;
+
 		if (line[0] == '#') {
-			if (memcmp(line + 2, CONFIG_, strlen(CONFIG_)))
+			if (line[1] != ' ')
+				continue;
+			p = line + 2;
+			if (memcmp(p, CONFIG_, strlen(CONFIG_)))
 				continue;
-			p = strchr(line + 2 + strlen(CONFIG_), ' ');
+			sym_name = p + strlen(CONFIG_);
+			p = strchr(sym_name, ' ');
 			if (!p)
 				continue;
 			*p++ = 0;
-			if (strncmp(p, "is not set", 10))
+			if (strcmp(p, "is not set"))
 				continue;
-			if (def == S_DEF_USER) {
-				sym = sym_find(line + 2 + strlen(CONFIG_));
-				if (!sym) {
-					if (warn_unknown)
-						conf_warning("unknown symbol: %s",
-							     line + 2 + strlen(CONFIG_));
-
-					conf_set_changed(true);
-					continue;
-				}
-			} else {
-				sym = sym_lookup(line + 2 + strlen(CONFIG_), 0);
-				if (sym->type == S_UNKNOWN)
-					sym->type = S_BOOLEAN;
-			}
-			if (sym->flags & def_flags) {
-				conf_warning("override: reassigning to symbol %s", sym->name);
-			}
-			switch (sym->type) {
-			case S_BOOLEAN:
-			case S_TRISTATE:
-				sym->def[def].tri = no;
-				sym->flags |= def_flags;
-				break;
-			default:
-				;
-			}
-		} else if (memcmp(line, CONFIG_, strlen(CONFIG_)) == 0) {
-			p = strchr(line + strlen(CONFIG_), '=');
-			if (!p)
+
+			val = "n";
+		} else {
+			if (memcmp(line, CONFIG_, strlen(CONFIG_))) {
+				conf_warning("unexpected data: %s", line);
 				continue;
-			*p++ = 0;
-			p2 = strchr(p, '\n');
-			if (p2) {
-				*p2-- = 0;
-				if (*p2 == '\r')
-					*p2 = 0;
 			}
 
-			sym = sym_find(line + strlen(CONFIG_));
-			if (!sym) {
-				if (def == S_DEF_AUTO) {
-					/*
-					 * Reading from include/config/auto.conf
-					 * If CONFIG_FOO previously existed in
-					 * auto.conf but it is missing now,
-					 * include/config/FOO must be touched.
-					 */
-					conf_touch_dep(line + strlen(CONFIG_));
-				} else {
-					if (warn_unknown)
-						conf_warning("unknown symbol: %s",
-							     line + strlen(CONFIG_));
-
-					conf_set_changed(true);
-				}
+			sym_name = line + strlen(CONFIG_);
+			p = strchr(sym_name, '=');
+			if (!p) {
+				conf_warning("unexpected data: %s", line);
 				continue;
 			}
+			*p = 0;
+			val = p + 1;
+		}
 
-			if (sym->flags & def_flags) {
-				conf_warning("override: reassigning to symbol %s", sym->name);
-			}
-			if (conf_set_sym_val(sym, def, def_flags, p))
-				continue;
-		} else {
-			if (line[0] != '\r' && line[0] != '\n')
-				conf_warning("unexpected data: %.*s",
-					     (int)strcspn(line, "\r\n"), line);
+		sym = sym_find(sym_name);
+		if (!sym) {
+			if (def == S_DEF_AUTO) {
+				/*
+				 * Reading from include/config/auto.conf.
+				 * If CONFIG_FOO previously existed in auto.conf
+				 * but it is missing now, include/config/FOO
+				 * must be touched.
+				 */
+				conf_touch_dep(sym_name);
+			} else {
+				if (warn_unknown)
+					conf_warning("unknown symbol: %s", sym_name);
 
+				conf_set_changed(true);
+			}
 			continue;
 		}
 
+		if (sym->flags & def_flags)
+			conf_warning("override: reassigning to symbol %s", sym->name);
+
+		if (conf_set_sym_val(sym, def, def_flags, val))
+			continue;
+
 		if (sym && sym_is_choice_value(sym)) {
 			struct symbol *cs = prop_get_symbol(sym_get_choice_prop(sym));
 			switch (sym->def[def].tri) {
@@ -533,9 +531,6 @@ load:
 	free(line);
 	fclose(in);
 
-	if (conf_warnings && werror)
-		exit(1);
-
 	return 0;
 }
 
@@ -594,7 +589,7 @@ int conf_read(const char *name)
 				/* Reset a string value if it's out of range */
 				if (sym_string_within_range(sym, sym->def[S_DEF_USER].val))
 					break;
-				sym->flags &= ~(SYMBOL_VALID|SYMBOL_DEF_USER);
+				sym->flags &= ~SYMBOL_VALID;
 				conf_unsaved++;
 				break;
 			default:
diff --git a/scripts/kconfig/expr.c b/scripts/kconfig/expr.c
index 81ebf8108ca7..a290de36307b 100644
--- a/scripts/kconfig/expr.c
+++ b/scripts/kconfig/expr.c
@@ -1131,7 +1131,6 @@ static int expr_compare_type(enum expr_type t1, enum expr_type t2)
 	default:
 		return -1;
 	}
-	printf("[%dgt%d?]", t1, t2);
 	return 0;
 }
 
diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h
index 471a59acecec..5cdc8f5e6446 100644
--- a/scripts/kconfig/lkc.h
+++ b/scripts/kconfig/lkc.h
@@ -99,8 +99,6 @@ bool menu_is_visible(struct menu *menu);
 bool menu_has_prompt(struct menu *menu);
 const char *menu_get_prompt(struct menu *menu);
 struct menu *menu_get_parent_menu(struct menu *menu);
-bool menu_has_help(struct menu *menu);
-const char *menu_get_help(struct menu *menu);
 int get_jump_key_char(void);
 struct gstr get_relations_str(struct symbol **sym_arr, struct list_head *head);
 void menu_get_ext_help(struct menu *menu, struct gstr *help);
diff --git a/scripts/kconfig/lkc_proto.h b/scripts/kconfig/lkc_proto.h
index edd1e617b25c..a4ae5e9eadad 100644
--- a/scripts/kconfig/lkc_proto.h
+++ b/scripts/kconfig/lkc_proto.h
@@ -1,4 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LKC_PROTO_H
+#define LKC_PROTO_H
+
 #include <stdarg.h>
 
 /* confdata.c */
@@ -12,6 +15,7 @@ void conf_set_changed(bool val);
 bool conf_get_changed(void);
 void conf_set_changed_callback(void (*fn)(void));
 void conf_set_message_callback(void (*fn)(const char *s));
+bool conf_errors(void);
 
 /* symbol.c */
 extern struct symbol * symbol_hash[SYMBOL_HASHSIZE];
@@ -22,6 +26,7 @@ void print_symbol_for_listconfig(struct symbol *sym);
 struct symbol ** sym_re_search(const char *pattern);
 const char * sym_type_name(enum symbol_type type);
 void sym_calc_value(struct symbol *sym);
+bool sym_dep_errors(void);
 enum symbol_type sym_get_type(struct symbol *sym);
 bool sym_tristate_within_range(struct symbol *sym,tristate tri);
 bool sym_set_tristate_value(struct symbol *sym,tristate tri);
@@ -50,3 +55,5 @@ char *expand_one_token(const char **str);
 
 /* expr.c */
 void expr_print(struct expr *e, void (*fn)(void *, struct symbol *, const char *), void *data, int prevtoken);
+
+#endif /* LKC_PROTO_H */
diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
index eccc87a441e7..5df32148a869 100644
--- a/scripts/kconfig/mconf.c
+++ b/scripts/kconfig/mconf.c
@@ -21,6 +21,7 @@
 
 #include "lkc.h"
 #include "lxdialog/dialog.h"
+#include "mnconf-common.h"
 
 static const char mconf_readme[] =
 "Overview\n"
@@ -247,7 +248,7 @@ search_help[] =
 	"      -> PCI support (PCI [=y])\n"
 	"(1)     -> PCI access mode (<choice> [=y])\n"
 	"  Defined at drivers/pci/Kconfig:47\n"
-	"  Depends on: X86_LOCAL_APIC && X86_IO_APIC || IA64\n"
+	"  Depends on: X86_LOCAL_APIC && X86_IO_APIC\n"
 	"  Selects: LIBCRC32\n"
 	"  Selected by: BAR [=n]\n"
 	"-----------------------------------------------------------------\n"
@@ -286,7 +287,6 @@ static int single_menu_mode;
 static int show_all_options;
 static int save_and_exit;
 static int silent;
-static int jump_key_char;
 
 static void conf(struct menu *menu, struct menu *active_menu);
 
@@ -378,58 +378,6 @@ static void show_help(struct menu *menu)
 	str_free(&help);
 }
 
-struct search_data {
-	struct list_head *head;
-	struct menu *target;
-};
-
-static int next_jump_key(int key)
-{
-	if (key < '1' || key > '9')
-		return '1';
-
-	key++;
-
-	if (key > '9')
-		key = '1';
-
-	return key;
-}
-
-static int handle_search_keys(int key, size_t start, size_t end, void *_data)
-{
-	struct search_data *data = _data;
-	struct jump_key *pos;
-	int index = 0;
-
-	if (key < '1' || key > '9')
-		return 0;
-
-	list_for_each_entry(pos, data->head, entries) {
-		index = next_jump_key(index);
-
-		if (pos->offset < start)
-			continue;
-
-		if (pos->offset >= end)
-			break;
-
-		if (key == index) {
-			data->target = pos->target;
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
-int get_jump_key_char(void)
-{
-	jump_key_char = next_jump_key(jump_key_char);
-
-	return jump_key_char;
-}
-
 static void search_conf(void)
 {
 	struct symbol **sym_arr;
diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c
index 61c442d84aef..2cce8b651f61 100644
--- a/scripts/kconfig/menu.c
+++ b/scripts/kconfig/menu.c
@@ -673,19 +673,6 @@ struct menu *menu_get_parent_menu(struct menu *menu)
 	return menu;
 }
 
-bool menu_has_help(struct menu *menu)
-{
-	return menu->help != NULL;
-}
-
-const char *menu_get_help(struct menu *menu)
-{
-	if (menu->help)
-		return menu->help;
-	else
-		return "";
-}
-
 static void get_def_str(struct gstr *r, struct menu *menu)
 {
 	str_printf(r, "Defined at %s:%d\n",
@@ -856,10 +843,10 @@ void menu_get_ext_help(struct menu *menu, struct gstr *help)
 	struct symbol *sym = menu->sym;
 	const char *help_text = nohelp_text;
 
-	if (menu_has_help(menu)) {
+	if (menu->help) {
 		if (sym->name)
 			str_printf(help, "%s%s:\n\n", CONFIG_, sym->name);
-		help_text = menu_get_help(menu);
+		help_text = menu->help;
 	}
 	str_printf(help, "%s\n", help_text);
 	if (sym)
diff --git a/scripts/kconfig/mnconf-common.c b/scripts/kconfig/mnconf-common.c
new file mode 100644
index 000000000000..18cb9a6c5aaa
--- /dev/null
+++ b/scripts/kconfig/mnconf-common.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "expr.h"
+#include "list.h"
+#include "mnconf-common.h"
+
+int jump_key_char;
+
+int next_jump_key(int key)
+{
+	if (key < '1' || key > '9')
+		return '1';
+
+	key++;
+
+	if (key > '9')
+		key = '1';
+
+	return key;
+}
+
+int handle_search_keys(int key, size_t start, size_t end, void *_data)
+{
+	struct search_data *data = _data;
+	struct jump_key *pos;
+	int index = 0;
+
+	if (key < '1' || key > '9')
+		return 0;
+
+	list_for_each_entry(pos, data->head, entries) {
+		index = next_jump_key(index);
+
+		if (pos->offset < start)
+			continue;
+
+		if (pos->offset >= end)
+			break;
+
+		if (key == index) {
+			data->target = pos->target;
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+int get_jump_key_char(void)
+{
+	jump_key_char = next_jump_key(jump_key_char);
+
+	return jump_key_char;
+}
diff --git a/scripts/kconfig/mnconf-common.h b/scripts/kconfig/mnconf-common.h
new file mode 100644
index 000000000000..ab6292cc4bf2
--- /dev/null
+++ b/scripts/kconfig/mnconf-common.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef MNCONF_COMMON_H
+#define MNCONF_COMMON_H
+
+#include <stddef.h>
+
+struct search_data {
+	struct list_head *head;
+	struct menu *target;
+};
+
+extern int jump_key_char;
+
+int next_jump_key(int key);
+int handle_search_keys(int key, size_t start, size_t end, void *_data);
+int get_jump_key_char(void);
+
+#endif /* MNCONF_COMMON_H */
diff --git a/scripts/kconfig/nconf.c b/scripts/kconfig/nconf.c
index 143a2c351d57..1148163cfa7e 100644
--- a/scripts/kconfig/nconf.c
+++ b/scripts/kconfig/nconf.c
@@ -12,6 +12,7 @@
 #include <stdlib.h>
 
 #include "lkc.h"
+#include "mnconf-common.h"
 #include "nconf.h"
 #include <ctype.h>
 
@@ -216,7 +217,7 @@ search_help[] =
 "Symbol: FOO [ = m]\n"
 "Prompt: Foo bus is used to drive the bar HW\n"
 "Defined at drivers/pci/Kconfig:47\n"
-"Depends on: X86_LOCAL_APIC && X86_IO_APIC || IA64\n"
+"Depends on: X86_LOCAL_APIC && X86_IO_APIC\n"
 "Location:\n"
 "  -> Bus options (PCI, PCMCIA, EISA, ISA)\n"
 "    -> PCI support (PCI [ = y])\n"
@@ -279,7 +280,6 @@ static const char *current_instructions = menu_instructions;
 
 static char *dialog_input_result;
 static int dialog_input_result_len;
-static int jump_key_char;
 
 static void selected_conf(struct menu *menu, struct menu *active_menu);
 static void conf(struct menu *menu);
@@ -691,57 +691,6 @@ static int do_exit(void)
 	return 0;
 }
 
-struct search_data {
-	struct list_head *head;
-	struct menu *target;
-};
-
-static int next_jump_key(int key)
-{
-	if (key < '1' || key > '9')
-		return '1';
-
-	key++;
-
-	if (key > '9')
-		key = '1';
-
-	return key;
-}
-
-static int handle_search_keys(int key, size_t start, size_t end, void *_data)
-{
-	struct search_data *data = _data;
-	struct jump_key *pos;
-	int index = 0;
-
-	if (key < '1' || key > '9')
-		return 0;
-
-	list_for_each_entry(pos, data->head, entries) {
-		index = next_jump_key(index);
-
-		if (pos->offset < start)
-			continue;
-
-		if (pos->offset >= end)
-			break;
-
-		if (key == index) {
-			data->target = pos->target;
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
-int get_jump_key_char(void)
-{
-	jump_key_char = next_jump_key(jump_key_char);
-
-	return jump_key_char;
-}
 
 static void search_conf(void)
 {
diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c
index a76925b46ce6..3e808528aaea 100644
--- a/scripts/kconfig/symbol.c
+++ b/scripts/kconfig/symbol.c
@@ -29,14 +29,9 @@ struct symbol symbol_no = {
 	.flags = SYMBOL_CONST|SYMBOL_VALID,
 };
 
-static struct symbol symbol_empty = {
-	.name = "",
-	.curr = { "", no },
-	.flags = SYMBOL_VALID,
-};
-
 struct symbol *modules_sym;
 static tristate modules_val;
+static int sym_warnings;
 
 enum symbol_type sym_get_type(struct symbol *sym)
 {
@@ -317,6 +312,14 @@ static void sym_warn_unmet_dep(struct symbol *sym)
 			       "  Selected by [m]:\n");
 
 	fputs(str_get(&gs), stderr);
+	sym_warnings++;
+}
+
+bool sym_dep_errors(void)
+{
+	if (sym_warnings)
+		return getenv("KCONFIG_WERROR");
+	return false;
 }
 
 void sym_calc_value(struct symbol *sym)
@@ -344,9 +347,13 @@ void sym_calc_value(struct symbol *sym)
 
 	switch (sym->type) {
 	case S_INT:
+		newval.val = "0";
+		break;
 	case S_HEX:
+		newval.val = "0x0";
+		break;
 	case S_STRING:
-		newval = symbol_empty.curr;
+		newval.val = "";
 		break;
 	case S_BOOLEAN:
 	case S_TRISTATE:
@@ -697,13 +704,12 @@ const char *sym_get_string_default(struct symbol *sym)
 {
 	struct property *prop;
 	struct symbol *ds;
-	const char *str;
+	const char *str = "";
 	tristate val;
 
 	sym_calc_visibility(sym);
 	sym_calc_value(modules_sym);
 	val = symbol_no.curr.tri;
-	str = symbol_empty.curr.val;
 
 	/* If symbol has a default value look it up */
 	prop = sym_get_default_prop(sym);
@@ -753,14 +759,17 @@ const char *sym_get_string_default(struct symbol *sym)
 		case yes: return "y";
 		}
 	case S_INT:
+		if (!str[0])
+			str = "0";
+		break;
 	case S_HEX:
-		return str;
-	case S_STRING:
-		return str;
-	case S_UNKNOWN:
+		if (!str[0])
+			str = "0x0";
+		break;
+	default:
 		break;
 	}
-	return "";
+	return str;
 }
 
 const char *sym_get_string_value(struct symbol *sym)
diff --git a/scripts/kconfig/util.c b/scripts/kconfig/util.c
index b78f114ad48c..92e5b2b9761d 100644
--- a/scripts/kconfig/util.c
+++ b/scripts/kconfig/util.c
@@ -42,8 +42,7 @@ struct gstr str_new(void)
 /* Free storage for growable string */
 void str_free(struct gstr *gs)
 {
-	if (gs->s)
-		free(gs->s);
+	free(gs->s);
 	gs->s = NULL;
 	gs->len = 0;
 }
diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh
index c62066825f53..9faa4d3d91e3 100755
--- a/scripts/min-tool-version.sh
+++ b/scripts/min-tool-version.sh
@@ -26,6 +26,8 @@ gcc)
 llvm)
 	if [ "$SRCARCH" = s390 ]; then
 		echo 15.0.0
+	elif [ "$SRCARCH" = loongarch ]; then
+		echo 18.0.0
 	else
 		echo 11.0.0
 	fi
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index cb6406f485a9..795b21154446 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -60,8 +60,7 @@ static unsigned int nr_unresolved;
 
 #define MODULE_NAME_LEN (64 - sizeof(Elf_Addr))
 
-void __attribute__((format(printf, 2, 3)))
-modpost_log(enum loglevel loglevel, const char *fmt, ...)
+void modpost_log(enum loglevel loglevel, const char *fmt, ...)
 {
 	va_list arglist;
 
@@ -91,6 +90,9 @@ modpost_log(enum loglevel loglevel, const char *fmt, ...)
 		error_occurred = true;
 }
 
+void __attribute__((alias("modpost_log")))
+modpost_log_noret(enum loglevel loglevel, const char *fmt, ...);
+
 static inline bool strends(const char *str, const char *postfix)
 {
 	if (strlen(str) < strlen(postfix))
@@ -474,11 +476,9 @@ static int parse_elf(struct elf_info *info, const char *filename)
 		fatal("%s: not relocatable object.", filename);
 
 	/* Check if file offset is correct */
-	if (hdr->e_shoff > info->size) {
+	if (hdr->e_shoff > info->size)
 		fatal("section header offset=%lu in file '%s' is bigger than filesize=%zu\n",
 		      (unsigned long)hdr->e_shoff, filename, info->size);
-		return 0;
-	}
 
 	if (hdr->e_shnum == SHN_UNDEF) {
 		/*
@@ -516,12 +516,11 @@ static int parse_elf(struct elf_info *info, const char *filename)
 		const char *secname;
 		int nobits = sechdrs[i].sh_type == SHT_NOBITS;
 
-		if (!nobits && sechdrs[i].sh_offset > info->size) {
+		if (!nobits && sechdrs[i].sh_offset > info->size)
 			fatal("%s is truncated. sechdrs[i].sh_offset=%lu > sizeof(*hrd)=%zu\n",
 			      filename, (unsigned long)sechdrs[i].sh_offset,
 			      sizeof(*hdr));
-			return 0;
-		}
+
 		secname = secstrings + sechdrs[i].sh_name;
 		if (strcmp(secname, ".modinfo") == 0) {
 			if (nobits)
@@ -1346,6 +1345,14 @@ static Elf_Addr addend_mips_rel(uint32_t *location, unsigned int r_type)
 #define R_LARCH_SUB32		55
 #endif
 
+#ifndef R_LARCH_RELAX
+#define R_LARCH_RELAX		100
+#endif
+
+#ifndef R_LARCH_ALIGN
+#define R_LARCH_ALIGN		102
+#endif
+
 static void get_rel_type_and_sym(struct elf_info *elf, uint64_t r_info,
 				 unsigned int *r_type, unsigned int *r_sym)
 {
@@ -1400,9 +1407,16 @@ static void section_rela(struct module *mod, struct elf_info *elf,
 				continue;
 			break;
 		case EM_LOONGARCH:
-			if (!strcmp("__ex_table", fromsec) &&
-			    r_type == R_LARCH_SUB32)
+			switch (r_type) {
+			case R_LARCH_SUB32:
+				if (!strcmp("__ex_table", fromsec))
+					continue;
+				break;
+			case R_LARCH_RELAX:
+			case R_LARCH_ALIGN:
+				/* These relocs do not refer to symbols */
 				continue;
+			}
 			break;
 		}
 
@@ -1419,7 +1433,7 @@ static void section_rel(struct module *mod, struct elf_info *elf,
 
 	for (rel = start; rel < stop; rel++) {
 		Elf_Sym *tsym;
-		Elf_Addr taddr = 0, r_offset;
+		Elf_Addr taddr, r_offset;
 		unsigned int r_type, r_sym;
 		void *loc;
 
diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index 69baf014da4f..835cababf1b0 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -197,7 +197,11 @@ enum loglevel {
 	LOG_FATAL
 };
 
-void modpost_log(enum loglevel loglevel, const char *fmt, ...);
+void __attribute__((format(printf, 2, 3)))
+modpost_log(enum loglevel loglevel, const char *fmt, ...);
+
+void __attribute__((format(printf, 2, 3), noreturn))
+modpost_log_noret(enum loglevel loglevel, const char *fmt, ...);
 
 /*
  * warn - show the given message, then let modpost continue running, still
@@ -214,4 +218,4 @@ void modpost_log(enum loglevel loglevel, const char *fmt, ...);
  */
 #define warn(fmt, args...)	modpost_log(LOG_WARN, fmt, ##args)
 #define error(fmt, args...)	modpost_log(LOG_ERROR, fmt, ##args)
-#define fatal(fmt, args...)	modpost_log(LOG_FATAL, fmt, ##args)
+#define fatal(fmt, args...)	modpost_log_noret(LOG_FATAL, fmt, ##args)
diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index d7dd0d04c70c..bf96a3c24608 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -25,35 +25,20 @@ if_enabled_echo() {
 }
 
 create_package() {
-	local pname="$1" pdir="$2"
-	local dpkg_deb_opts
-
-	mkdir -m 755 -p "$pdir/DEBIAN"
-	mkdir -p "$pdir/usr/share/doc/$pname"
-	cp debian/copyright "$pdir/usr/share/doc/$pname/"
-	cp debian/changelog "$pdir/usr/share/doc/$pname/changelog.Debian"
-	gzip -n -9 "$pdir/usr/share/doc/$pname/changelog.Debian"
-	sh -c "cd '$pdir'; find . -type f ! -path './DEBIAN/*' -printf '%P\0' \
-		| xargs -r0 md5sum > DEBIAN/md5sums"
-
-	# Fix ownership and permissions
-	if [ "$DEB_RULES_REQUIRES_ROOT" = "no" ]; then
-		dpkg_deb_opts="--root-owner-group"
-	else
-		chown -R root:root "$pdir"
-	fi
-	# a+rX in case we are in a restrictive umask environment like 0077
-	# ug-s in case we build in a setuid/setgid directory
-	chmod -R go-w,a+rX,ug-s "$pdir"
-
-	# Create the package
-	dpkg-gencontrol -p$pname -P"$pdir"
-	dpkg-deb $dpkg_deb_opts ${KDEB_COMPRESS:+-Z$KDEB_COMPRESS} --build "$pdir" ..
+	export DH_OPTIONS="-p${1}"
+
+	dh_installdocs
+	dh_installchangelogs
+	dh_compress
+	dh_fixperms
+	dh_gencontrol
+	dh_md5sums
+	dh_builddeb -- ${KDEB_COMPRESS:+-Z$KDEB_COMPRESS}
 }
 
 install_linux_image () {
-	pdir=$1
-	pname=$2
+	pname=$1
+	pdir=debian/$1
 
 	rm -rf ${pdir}
 
@@ -62,7 +47,7 @@ install_linux_image () {
 		${MAKE} -f ${srctree}/Makefile INSTALL_DTBS_PATH="${pdir}/usr/lib/linux-image-${KERNELRELEASE}" dtbs_install
 	fi
 
-	${MAKE} -f ${srctree}/Makefile INSTALL_MOD_PATH="${pdir}" modules_install
+	${MAKE} -f ${srctree}/Makefile INSTALL_MOD_PATH="${pdir}" INSTALL_MOD_STRIP=1 modules_install
 	rm -f "${pdir}/lib/modules/${KERNELRELEASE}/build"
 
 	# Install the kernel
@@ -122,26 +107,22 @@ install_linux_image () {
 }
 
 install_linux_image_dbg () {
-	pdir=$1
-	image_pdir=$2
+	pdir=debian/$1
 
 	rm -rf ${pdir}
 
-	for module in $(find ${image_pdir}/lib/modules/ -name *.ko -printf '%P\n'); do
-		module=lib/modules/${module}
-		mkdir -p $(dirname ${pdir}/usr/lib/debug/${module})
-		# only keep debug symbols in the debug file
-		${OBJCOPY} --only-keep-debug ${image_pdir}/${module} ${pdir}/usr/lib/debug/${module}
-		# strip original module from debug symbols
-		${OBJCOPY} --strip-debug ${image_pdir}/${module}
-		# then add a link to those
-		${OBJCOPY} --add-gnu-debuglink=${pdir}/usr/lib/debug/${module} ${image_pdir}/${module}
-	done
+	# Parse modules.order directly because 'make modules_install' may sign,
+	# compress modules, and then run unneeded depmod.
+	while read -r mod; do
+		mod="${mod%.o}.ko"
+		dbg="${pdir}/usr/lib/debug/lib/modules/${KERNELRELEASE}/kernel/${mod}"
+		buildid=$("${READELF}" -n "${mod}" | sed -n 's@^.*Build ID: \(..\)\(.*\)@\1/\2@p')
+		link="${pdir}/usr/lib/debug/.build-id/${buildid}.debug"
 
-	# re-sign stripped modules
-	if is_enabled CONFIG_MODULE_SIG_ALL; then
-		${MAKE} -f ${srctree}/Makefile INSTALL_MOD_PATH="${image_pdir}" modules_sign
-	fi
+		mkdir -p "${dbg%/*}" "${link%/*}"
+		"${OBJCOPY}" --only-keep-debug "${mod}" "${dbg}"
+		ln -sf --relative "${dbg}" "${link}"
+	done < modules.order
 
 	# Build debug package
 	# Different tools want the image in different locations
@@ -156,8 +137,8 @@ install_linux_image_dbg () {
 }
 
 install_kernel_headers () {
-	pdir=$1
-	version=$2
+	pdir=debian/$1
+	version=${1#linux-headers-}
 
 	rm -rf $pdir
 
@@ -168,18 +149,16 @@ install_kernel_headers () {
 }
 
 install_libc_headers () {
-	pdir=$1
+	pdir=debian/$1
 
 	rm -rf $pdir
 
-	$MAKE -f $srctree/Makefile headers
 	$MAKE -f $srctree/Makefile headers_install INSTALL_HDR_PATH=$pdir/usr
 
 	# move asm headers to /usr/include/<libc-machine>/asm to match the structure
 	# used by Debian-based distros (to support multi-arch)
-	host_arch=$(dpkg-architecture -a$DEB_HOST_ARCH -qDEB_HOST_MULTIARCH)
-	mkdir $pdir/usr/include/$host_arch
-	mv $pdir/usr/include/asm $pdir/usr/include/$host_arch/
+	mkdir "$pdir/usr/include/${DEB_HOST_MULTIARCH}"
+	mv "$pdir/usr/include/asm" "$pdir/usr/include/${DEB_HOST_MULTIARCH}"
 }
 
 rm -f debian/files
@@ -190,30 +169,13 @@ for package in ${packages_enabled}
 do
 	case ${package} in
 	*-dbg)
-		# This must be done after linux-image, that is, we expect the
-		# debug package appears after linux-image in debian/control.
-		install_linux_image_dbg debian/linux-image-dbg debian/linux-image;;
-	linux-image-*|user-mode-linux-*)
-		install_linux_image debian/linux-image ${package};;
-	linux-libc-dev)
-		install_libc_headers debian/linux-libc-dev;;
-	linux-headers-*)
-		install_kernel_headers debian/linux-headers ${package#linux-headers-};;
-	esac
-done
-
-for package in ${packages_enabled}
-do
-	case ${package} in
-	*-dbg)
-		create_package ${package} debian/linux-image-dbg;;
+		install_linux_image_dbg "${package}";;
 	linux-image-*|user-mode-linux-*)
-		create_package ${package} debian/linux-image;;
+		install_linux_image "${package}";;
 	linux-libc-dev)
-		create_package ${package} debian/linux-libc-dev;;
+		install_libc_headers "${package}";;
 	linux-headers-*)
-		create_package ${package} debian/linux-headers;;
+		install_kernel_headers "${package}";;
 	esac
+	create_package "${package}"
 done
-
-exit 0
diff --git a/scripts/package/buildtar b/scripts/package/buildtar
index 65b4ea502962..72c91a1b832f 100755
--- a/scripts/package/buildtar
+++ b/scripts/package/buildtar
@@ -23,7 +23,6 @@ tmpdir=$1
 #
 rm -rf -- "${tmpdir}"
 mkdir -p -- "${tmpdir}/boot"
-dirs=boot
 
 
 #
@@ -38,12 +37,9 @@ fi
 
 
 #
-# Try to install modules
+# Install modules
 #
-if grep -q '^CONFIG_MODULES=y' include/config/auto.conf; then
-	make ARCH="${ARCH}" -f ${srctree}/Makefile INSTALL_MOD_PATH="${tmpdir}" modules_install
-	dirs="$dirs lib"
-fi
+make ARCH="${ARCH}" -f ${srctree}/Makefile INSTALL_MOD_PATH="${tmpdir}" modules_install
 
 
 #
diff --git a/scripts/package/deb-build-option b/scripts/package/deb-build-option
deleted file mode 100755
index 7950eff01781..000000000000
--- a/scripts/package/deb-build-option
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0-only
-
-# Set up CROSS_COMPILE if not defined yet
-if [ "${CROSS_COMPILE+set}" != "set" -a "${DEB_HOST_ARCH}" != "${DEB_BUILD_ARCH}" ]; then
-	echo CROSS_COMPILE=${DEB_HOST_GNU_TYPE}-
-fi
-
-version=$(dpkg-parsechangelog -S Version)
-debian_revision="${version##*-}"
-
-if [ "${version}" != "${debian_revision}" ]; then
-	echo KBUILD_BUILD_VERSION=${debian_revision}
-fi
diff --git a/scripts/package/debian/copyright b/scripts/package/debian/copyright
new file mode 100644
index 000000000000..4f1f06221f09
--- /dev/null
+++ b/scripts/package/debian/copyright
@@ -0,0 +1,16 @@
+This is a packaged upstream version of the Linux kernel.
+
+The sources may be found at most Linux archive sites, including:
+https://www.kernel.org/pub/linux/kernel
+
+Copyright: 1991 - 2023 Linus Torvalds and others.
+
+The git repository for mainline kernel development is at:
+git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; version 2 dated June, 1991.
+
+On Debian GNU/Linux systems, the complete text of the GNU General Public
+License version 2 can be found in `/usr/share/common-licenses/GPL-2'.
diff --git a/scripts/package/debian/rules b/scripts/package/debian/rules
index 3dafa9496c63..098307780062 100755
--- a/scripts/package/debian/rules
+++ b/scripts/package/debian/rules
@@ -1,33 +1,46 @@
 #!/usr/bin/make -f
 # SPDX-License-Identifier: GPL-2.0-only
 
-include debian/rules.vars
+# in case debian/rules is executed directly
+export DEB_RULES_REQUIRES_ROOT := no
 
-srctree ?= .
+include debian/rules.vars
 
 ifneq (,$(filter-out parallel=1,$(filter parallel=%,$(DEB_BUILD_OPTIONS))))
     NUMJOBS = $(patsubst parallel=%,%,$(filter parallel=%,$(DEB_BUILD_OPTIONS)))
     MAKEFLAGS += -j$(NUMJOBS)
 endif
 
+revision = $(lastword $(subst -, ,$(shell dpkg-parsechangelog -S Version)))
+CROSS_COMPILE ?= $(filter-out $(DEB_BUILD_GNU_TYPE)-, $(DEB_HOST_GNU_TYPE)-)
+make-opts = ARCH=$(ARCH) KERNELRELEASE=$(KERNELRELEASE) KBUILD_BUILD_VERSION=$(revision) $(addprefix CROSS_COMPILE=,$(CROSS_COMPILE))
+
 .PHONY: binary binary-indep binary-arch
 binary: binary-arch binary-indep
 binary-indep: build-indep
 binary-arch: build-arch
-	$(MAKE) -f $(srctree)/Makefile ARCH=$(ARCH) \
-	KERNELRELEASE=$(KERNELRELEASE) \
-	run-command KBUILD_RUN_COMMAND=+$(srctree)/scripts/package/builddeb
+	$(MAKE) $(make-opts) \
+	run-command KBUILD_RUN_COMMAND='+$$(srctree)/scripts/package/builddeb'
 
 .PHONY: build build-indep build-arch
 build: build-arch build-indep
 build-indep:
 build-arch:
-	$(MAKE) -f $(srctree)/Makefile ARCH=$(ARCH) \
-	KERNELRELEASE=$(KERNELRELEASE) \
-	$(shell $(srctree)/scripts/package/deb-build-option) \
-	olddefconfig all
+	$(MAKE) $(make-opts) olddefconfig
+	$(MAKE) $(make-opts) $(if $(filter um,$(ARCH)),,headers) all
 
 .PHONY: clean
 clean:
-	rm -rf debian/files debian/linux-*
-	$(MAKE) -f $(srctree)/Makefile ARCH=$(ARCH) clean
+	rm -rf debian/files debian/linux-* debian/deb-env.vars*
+	$(MAKE) ARCH=$(ARCH) clean
+
+# If DEB_HOST_ARCH is empty, it is likely that debian/rules was executed
+# directly. Run 'dpkg-architecture --print-set --print-format=make' to
+# generate a makefile construct that exports all DEB_* variables.
+ifndef DEB_HOST_ARCH
+include debian/deb-env.vars
+
+debian/deb-env.vars:
+	dpkg-architecture -a$$(cat debian/arch) --print-set --print-format=make > $@.tmp
+	mv $@.tmp $@
+endif
diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build
index 8a7051fad087..76e0765dfcd6 100755
--- a/scripts/package/install-extmod-build
+++ b/scripts/package/install-extmod-build
@@ -20,7 +20,7 @@ mkdir -p "${destdir}"
 	find "arch/${SRCARCH}" -maxdepth 1 -name 'Makefile*'
 	find include scripts -type f -o -type l
 	find "arch/${SRCARCH}" -name Kbuild.platforms -o -name Platform
-	find "arch/${SRCARCH}" -name include -o -name scripts -type d
+	find "arch/${SRCARCH}" -name include -type d
 ) | tar -c -f - -C "${srctree}" -T - | tar -xf - -C "${destdir}"
 
 {
diff --git a/scripts/package/kernel.spec b/scripts/package/kernel.spec
index 3eee0143e0c5..89298983a169 100644
--- a/scripts/package/kernel.spec
+++ b/scripts/package/kernel.spec
@@ -56,13 +56,7 @@ patch -p1 < %{SOURCE2}
 
 %install
 mkdir -p %{buildroot}/boot
-%ifarch ia64
-mkdir -p %{buildroot}/boot/efi
-cp $(%{make} %{makeflags} -s image_name) %{buildroot}/boot/efi/vmlinuz-%{KERNELRELEASE}
-ln -s efi/vmlinuz-%{KERNELRELEASE} %{buildroot}/boot/
-%else
 cp $(%{make} %{makeflags} -s image_name) %{buildroot}/boot/vmlinuz-%{KERNELRELEASE}
-%endif
 %{make} %{makeflags} INSTALL_MOD_PATH=%{buildroot} modules_install
 %{make} %{makeflags} INSTALL_HDR_PATH=%{buildroot}/usr headers_install
 cp System.map %{buildroot}/boot/System.map-%{KERNELRELEASE}
diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian
index 5044224cf671..070149c985fe 100755
--- a/scripts/package/mkdebian
+++ b/scripts/package/mkdebian
@@ -26,7 +26,7 @@ set_debarch() {
 
 	# Attempt to find the correct Debian architecture
 	case "$UTS_MACHINE" in
-	i386|ia64|alpha|m68k|riscv*)
+	i386|alpha|m68k|riscv*)
 		debarch="$UTS_MACHINE" ;;
 	x86_64)
 		debarch=amd64 ;;
@@ -176,8 +176,6 @@ else
 fi
 
 echo $debarch > debian/arch
-extra_build_depends=", $(if_enabled_echo CONFIG_UNWINDER_ORC libelf-dev:native)"
-extra_build_depends="$extra_build_depends, $(if_enabled_echo CONFIG_SYSTEM_TRUSTED_KEYRING libssl-dev:native)"
 
 # Generate a simple changelog template
 cat <<EOF > debian/changelog
@@ -188,26 +186,6 @@ $sourcename ($packageversion) $distribution; urgency=low
  -- $maintainer  $(date -R)
 EOF
 
-# Generate copyright file
-cat <<EOF > debian/copyright
-This is a packaged upstream version of the Linux kernel.
-
-The sources may be found at most Linux archive sites, including:
-https://www.kernel.org/pub/linux/kernel
-
-Copyright: 1991 - 2018 Linus Torvalds and others.
-
-The git repository for mainline kernel development is at:
-git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; version 2 dated June, 1991.
-
-On Debian GNU/Linux systems, the complete text of the GNU General Public
-License version 2 can be found in \`/usr/share/common-licenses/GPL-2'.
-EOF
-
 # Generate a control file
 cat <<EOF > debian/control
 Source: $sourcename
@@ -215,7 +193,8 @@ Section: kernel
 Priority: optional
 Maintainer: $maintainer
 Rules-Requires-Root: no
-Build-Depends: bc, debhelper, rsync, kmod, cpio, bison, flex $extra_build_depends
+Build-Depends: debhelper-compat (= 12)
+Build-Depends-Arch: bc, bison, cpio, flex, kmod, libelf-dev:native, libssl-dev:native, rsync
 Homepage: https://www.kernel.org/
 
 Package: $packagename-$version
@@ -268,6 +247,7 @@ ARCH := ${ARCH}
 KERNELRELEASE := ${KERNELRELEASE}
 EOF
 
+cp "${srctree}/scripts/package/debian/copyright" debian/
 cp "${srctree}/scripts/package/debian/rules" debian/
 
 exit 0
diff --git a/scripts/package/snapcraft.template b/scripts/package/snapcraft.template
index 626d278e4a5a..85d5e07d1b40 100644
--- a/scripts/package/snapcraft.template
+++ b/scripts/package/snapcraft.template
@@ -10,5 +10,5 @@ parts:
   kernel:
     plugin: kernel
     source: SRCTREE
-    source-type: tar
+    source-type: local
     kernel-with-firmware: false
diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index 40ae6b2c7a6d..3e4f54799cc0 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -590,7 +590,6 @@ static int do_file(char const *const fname)
 		ideal_nop = ideal_nop4_arm64;
 		is_fake_mcount64 = arm64_is_fake_mcount;
 		break;
-	case EM_IA_64:	reltype = R_IA64_IMM64; break;
 	case EM_MIPS:	/* reltype: e_class    */ break;
 	case EM_LOONGARCH:	/* reltype: e_class    */ break;
 	case EM_PPC:	reltype = R_PPC_ADDR32; break;
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 6a4645a57976..f84df9e383fd 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -275,13 +275,6 @@ if ($arch eq "x86_64") {
     $section_type = '%progbits';
     $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_AARCH64_CALL26\\s+_mcount\$";
     $type = ".quad";
-} elsif ($arch eq "ia64") {
-    $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$";
-    $type = "data8";
-
-    if ($is_module eq "0") {
-	$cc .= " -mconstant-gp";
-    }
 } elsif ($arch eq "sparc64") {
     # In the objdump output there are giblets like:
     # 0000000000000000 <igmp_net_exit-0x18>:
diff --git a/scripts/xz_wrap.sh b/scripts/xz_wrap.sh
index 76e9cbcfbeab..d06baf626abe 100755
--- a/scripts/xz_wrap.sh
+++ b/scripts/xz_wrap.sh
@@ -15,7 +15,6 @@ LZMA2OPTS=
 case $SRCARCH in
 	x86)            BCJ=--x86 ;;
 	powerpc)        BCJ=--powerpc ;;
-	ia64)           BCJ=--ia64; LZMA2OPTS=pb=4 ;;
 	arm)            BCJ=--arm ;;
 	sparc)          BCJ=--sparc ;;
 esac
diff --git a/security/apparmor/Kconfig b/security/apparmor/Kconfig
index e0d1dd0a192a..64cc3044a42c 100644
--- a/security/apparmor/Kconfig
+++ b/security/apparmor/Kconfig
@@ -57,10 +57,10 @@ config SECURITY_APPARMOR_INTROSPECT_POLICY
 	  cpu is paramount.
 
 config SECURITY_APPARMOR_HASH
-	bool "Enable introspection of sha1 hashes for loaded profiles"
+	bool "Enable introspection of sha256 hashes for loaded profiles"
 	depends on SECURITY_APPARMOR_INTROSPECT_POLICY
 	select CRYPTO
-	select CRYPTO_SHA1
+	select CRYPTO_SHA256
 	default y
 	help
 	  This option selects whether introspection of loaded policy
@@ -74,10 +74,10 @@ config SECURITY_APPARMOR_HASH_DEFAULT
        depends on SECURITY_APPARMOR_HASH
        default y
        help
-         This option selects whether sha1 hashing of loaded policy
-	 is enabled by default. The generation of sha1 hashes for
-	 loaded policy provide system administrators a quick way
-	 to verify that policy in the kernel matches what is expected,
+	 This option selects whether sha256 hashing of loaded policy
+	 is enabled by default. The generation of sha256 hashes for
+	 loaded policy provide system administrators a quick way to
+	 verify that policy in the kernel matches what is expected,
 	 however it can slow down policy load on some devices. In
 	 these cases policy hashing can be disabled by default and
 	 enabled only if needed.
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index f3c77825aa75..bcfea073e3f2 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -1474,7 +1474,7 @@ int __aa_fs_create_rawdata(struct aa_ns *ns, struct aa_loaddata *rawdata)
 	rawdata->dents[AAFS_LOADDATA_REVISION] = dent;
 
 	if (aa_g_hash_policy) {
-		dent = aafs_create_file("sha1", S_IFREG | 0444, dir,
+		dent = aafs_create_file("sha256", S_IFREG | 0444, dir,
 					      rawdata, &seq_rawdata_hash_fops);
 		if (IS_ERR(dent))
 			goto fail;
@@ -1643,11 +1643,11 @@ static const char *rawdata_get_link_base(struct dentry *dentry,
 	return target;
 }
 
-static const char *rawdata_get_link_sha1(struct dentry *dentry,
+static const char *rawdata_get_link_sha256(struct dentry *dentry,
 					 struct inode *inode,
 					 struct delayed_call *done)
 {
-	return rawdata_get_link_base(dentry, inode, done, "sha1");
+	return rawdata_get_link_base(dentry, inode, done, "sha256");
 }
 
 static const char *rawdata_get_link_abi(struct dentry *dentry,
@@ -1664,8 +1664,8 @@ static const char *rawdata_get_link_data(struct dentry *dentry,
 	return rawdata_get_link_base(dentry, inode, done, "raw_data");
 }
 
-static const struct inode_operations rawdata_link_sha1_iops = {
-	.get_link	= rawdata_get_link_sha1,
+static const struct inode_operations rawdata_link_sha256_iops = {
+	.get_link	= rawdata_get_link_sha256,
 };
 
 static const struct inode_operations rawdata_link_abi_iops = {
@@ -1738,7 +1738,7 @@ int __aafs_profile_mkdir(struct aa_profile *profile, struct dentry *parent)
 	profile->dents[AAFS_PROF_ATTACH] = dent;
 
 	if (profile->hash) {
-		dent = create_profile_file(dir, "sha1", profile,
+		dent = create_profile_file(dir, "sha256", profile,
 					   &seq_profile_hash_fops);
 		if (IS_ERR(dent))
 			goto fail;
@@ -1748,9 +1748,9 @@ int __aafs_profile_mkdir(struct aa_profile *profile, struct dentry *parent)
 #ifdef CONFIG_SECURITY_APPARMOR_EXPORT_BINARY
 	if (profile->rawdata) {
 		if (aa_g_hash_policy) {
-			dent = aafs_create("raw_sha1", S_IFLNK | 0444, dir,
+			dent = aafs_create("raw_sha256", S_IFLNK | 0444, dir,
 					   profile->label.proxy, NULL, NULL,
-					   &rawdata_link_sha1_iops);
+					   &rawdata_link_sha256_iops);
 			if (IS_ERR(dent))
 				goto fail;
 			aa_get_proxy(profile->label.proxy);
diff --git a/security/apparmor/crypto.c b/security/apparmor/crypto.c
index 6724e2ff6da8..aad486b2fca6 100644
--- a/security/apparmor/crypto.c
+++ b/security/apparmor/crypto.c
@@ -106,16 +106,16 @@ static int __init init_profile_hash(void)
 	if (!apparmor_initialized)
 		return 0;
 
-	tfm = crypto_alloc_shash("sha1", 0, 0);
+	tfm = crypto_alloc_shash("sha256", 0, 0);
 	if (IS_ERR(tfm)) {
 		int error = PTR_ERR(tfm);
-		AA_ERROR("failed to setup profile sha1 hashing: %d\n", error);
+		AA_ERROR("failed to setup profile sha256 hashing: %d\n", error);
 		return error;
 	}
 	apparmor_tfm = tfm;
 	apparmor_hash_size = crypto_shash_digestsize(apparmor_tfm);
 
-	aa_info_message("AppArmor sha1 policy hashing enabled");
+	aa_info_message("AppArmor sha256 policy hashing enabled");
 
 	return 0;
 }
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 89fbeab4b33b..571158ec6188 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -1311,7 +1311,7 @@ static int change_profile_perms_wrapper(const char *op, const char *name,
 	return error;
 }
 
-const char *stack_msg = "change_profile unprivileged unconfined converted to stacking";
+static const char *stack_msg = "change_profile unprivileged unconfined converted to stacking";
 
 /**
  * aa_change_profile - perform a one-way profile transition
diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c
index 4c198d273f09..cd569fbbfe36 100644
--- a/security/apparmor/lib.c
+++ b/security/apparmor/lib.c
@@ -41,6 +41,7 @@ void aa_free_str_table(struct aa_str_table *t)
 			kfree_sensitive(t->table[i]);
 		kfree_sensitive(t->table);
 		t->table = NULL;
+		t->size = 0;
 	}
 }
 
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index e490a7000408..98e1150bee9d 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -469,8 +469,10 @@ static int apparmor_file_open(struct file *file)
 	 * Cache permissions granted by the previous exec check, with
 	 * implicit read and executable mmap which are required to
 	 * actually execute the image.
+	 *
+	 * Illogically, FMODE_EXEC is in f_flags, not f_mode.
 	 */
-	if (current->in_execve) {
+	if (file->f_flags & __FMODE_EXEC) {
 		fctx->allow = MAY_EXEC | MAY_READ | AA_EXEC_MMAP;
 		return 0;
 	}
@@ -1023,7 +1025,6 @@ static int apparmor_task_kill(struct task_struct *target, struct kernel_siginfo
 		cl = aa_get_newest_cred_label(cred);
 		error = aa_may_signal(cred, cl, tc, tl, sig);
 		aa_put_label(cl);
-		return error;
 	} else {
 		cl = __begin_current_label_crit_section();
 		error = aa_may_signal(current_cred(), cl, tc, tl, sig);
@@ -1056,9 +1057,6 @@ static int apparmor_userns_create(const struct cred *cred)
 	return error;
 }
 
-/**
- * apparmor_sk_alloc_security - allocate and attach the sk_security field
- */
 static int apparmor_sk_alloc_security(struct sock *sk, int family, gfp_t flags)
 {
 	struct aa_sk_ctx *ctx;
@@ -1072,9 +1070,6 @@ static int apparmor_sk_alloc_security(struct sock *sk, int family, gfp_t flags)
 	return 0;
 }
 
-/**
- * apparmor_sk_free_security - free the sk_security field
- */
 static void apparmor_sk_free_security(struct sock *sk)
 {
 	struct aa_sk_ctx *ctx = aa_sock(sk);
@@ -1087,6 +1082,8 @@ static void apparmor_sk_free_security(struct sock *sk)
 
 /**
  * apparmor_sk_clone_security - clone the sk_security field
+ * @sk: sock to have security cloned
+ * @newsk: sock getting clone
  */
 static void apparmor_sk_clone_security(const struct sock *sk,
 				       struct sock *newsk)
@@ -1103,9 +1100,6 @@ static void apparmor_sk_clone_security(const struct sock *sk,
 	new->peer = aa_get_label(ctx->peer);
 }
 
-/**
- * apparmor_socket_create - check perms before creating a new socket
- */
 static int apparmor_socket_create(int family, int type, int protocol, int kern)
 {
 	struct aa_label *label;
@@ -1127,10 +1121,14 @@ static int apparmor_socket_create(int family, int type, int protocol, int kern)
 
 /**
  * apparmor_socket_post_create - setup the per-socket security struct
+ * @sock: socket that is being setup
+ * @family: family of socket being created
+ * @type: type of the socket
+ * @ptotocol: protocol of the socket
+ * @kern: socket is a special kernel socket
  *
  * Note:
- * -   kernel sockets currently labeled unconfined but we may want to
- *     move to a special kernel label
+ * -   kernel sockets labeled kernel_t used to use unconfined
  * -   socket may not have sk here if created with sock_create_lite or
  *     sock_alloc. These should be accept cases which will be handled in
  *     sock_graft.
@@ -1156,9 +1154,6 @@ static int apparmor_socket_post_create(struct socket *sock, int family,
 	return 0;
 }
 
-/**
- * apparmor_socket_bind - check perms before bind addr to socket
- */
 static int apparmor_socket_bind(struct socket *sock,
 				struct sockaddr *address, int addrlen)
 {
@@ -1172,9 +1167,6 @@ static int apparmor_socket_bind(struct socket *sock,
 			 aa_sk_perm(OP_BIND, AA_MAY_BIND, sock->sk));
 }
 
-/**
- * apparmor_socket_connect - check perms before connecting @sock to @address
- */
 static int apparmor_socket_connect(struct socket *sock,
 				   struct sockaddr *address, int addrlen)
 {
@@ -1188,9 +1180,6 @@ static int apparmor_socket_connect(struct socket *sock,
 			 aa_sk_perm(OP_CONNECT, AA_MAY_CONNECT, sock->sk));
 }
 
-/**
- * apparmor_socket_listen - check perms before allowing listen
- */
 static int apparmor_socket_listen(struct socket *sock, int backlog)
 {
 	AA_BUG(!sock);
@@ -1202,9 +1191,7 @@ static int apparmor_socket_listen(struct socket *sock, int backlog)
 			 aa_sk_perm(OP_LISTEN, AA_MAY_LISTEN, sock->sk));
 }
 
-/**
- * apparmor_socket_accept - check perms before accepting a new connection.
- *
+/*
  * Note: while @newsock is created and has some information, the accept
  *       has not been done.
  */
@@ -1233,18 +1220,12 @@ static int aa_sock_msg_perm(const char *op, u32 request, struct socket *sock,
 			 aa_sk_perm(op, request, sock->sk));
 }
 
-/**
- * apparmor_socket_sendmsg - check perms before sending msg to another socket
- */
 static int apparmor_socket_sendmsg(struct socket *sock,
 				   struct msghdr *msg, int size)
 {
 	return aa_sock_msg_perm(OP_SENDMSG, AA_MAY_SEND, sock, msg, size);
 }
 
-/**
- * apparmor_socket_recvmsg - check perms before receiving a message
- */
 static int apparmor_socket_recvmsg(struct socket *sock,
 				   struct msghdr *msg, int size, int flags)
 {
@@ -1263,17 +1244,11 @@ static int aa_sock_perm(const char *op, u32 request, struct socket *sock)
 			 aa_sk_perm(op, request, sock->sk));
 }
 
-/**
- * apparmor_socket_getsockname - check perms before getting the local address
- */
 static int apparmor_socket_getsockname(struct socket *sock)
 {
 	return aa_sock_perm(OP_GETSOCKNAME, AA_MAY_GETATTR, sock);
 }
 
-/**
- * apparmor_socket_getpeername - check perms before getting remote address
- */
 static int apparmor_socket_getpeername(struct socket *sock)
 {
 	return aa_sock_perm(OP_GETPEERNAME, AA_MAY_GETATTR, sock);
@@ -1292,9 +1267,6 @@ static int aa_sock_opt_perm(const char *op, u32 request, struct socket *sock,
 			 aa_sk_perm(op, request, sock->sk));
 }
 
-/**
- * apparmor_socket_getsockopt - check perms before getting socket options
- */
 static int apparmor_socket_getsockopt(struct socket *sock, int level,
 				      int optname)
 {
@@ -1302,9 +1274,6 @@ static int apparmor_socket_getsockopt(struct socket *sock, int level,
 				level, optname);
 }
 
-/**
- * apparmor_socket_setsockopt - check perms before setting socket options
- */
 static int apparmor_socket_setsockopt(struct socket *sock, int level,
 				      int optname)
 {
@@ -1312,9 +1281,6 @@ static int apparmor_socket_setsockopt(struct socket *sock, int level,
 				level, optname);
 }
 
-/**
- * apparmor_socket_shutdown - check perms before shutting down @sock conn
- */
 static int apparmor_socket_shutdown(struct socket *sock, int how)
 {
 	return aa_sock_perm(OP_SHUTDOWN, AA_MAY_SHUTDOWN, sock);
@@ -1323,6 +1289,8 @@ static int apparmor_socket_shutdown(struct socket *sock, int how)
 #ifdef CONFIG_NETWORK_SECMARK
 /**
  * apparmor_socket_sock_rcv_skb - check perms before associating skb to sk
+ * @sk: sk to associate @skb with
+ * @skb: skb to check for perms
  *
  * Note: can not sleep may be called with locks held
  *
@@ -1354,6 +1322,11 @@ static struct aa_label *sk_peer_label(struct sock *sk)
 
 /**
  * apparmor_socket_getpeersec_stream - get security context of peer
+ * @sock: socket that we are trying to get the peer context of
+ * @optval: output - buffer to copy peer name to
+ * @optlen: output - size of copied name in @optval
+ * @len: size of @optval buffer
+ * Returns: 0 on success, -errno of failure
  *
  * Note: for tcp only valid if using ipsec or cipso on lan
  */
@@ -2182,7 +2155,7 @@ __initcall(apparmor_nf_ip_init);
 static char nulldfa_src[] = {
 	#include "nulldfa.in"
 };
-struct aa_dfa *nulldfa;
+static struct aa_dfa *nulldfa;
 
 static char stacksplitdfa_src[] = {
 	#include "stacksplitdfa.in"
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index ed4c9803c8fa..957654d253dd 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -99,13 +99,14 @@ const char *const aa_profile_mode_names[] = {
 };
 
 
-static void aa_free_pdb(struct aa_policydb *policy)
+static void aa_free_pdb(struct aa_policydb *pdb)
 {
-	if (policy) {
-		aa_put_dfa(policy->dfa);
-		if (policy->perms)
-			kvfree(policy->perms);
-		aa_free_str_table(&policy->trans);
+	if (pdb) {
+		aa_put_dfa(pdb->dfa);
+		if (pdb->perms)
+			kvfree(pdb->perms);
+		aa_free_str_table(&pdb->trans);
+		kfree(pdb);
 	}
 }
 
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 47ec097d6741..5e578ef0ddff 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -478,6 +478,8 @@ static bool unpack_trans_table(struct aa_ext *e, struct aa_str_table *strs)
 		if (!table)
 			goto fail;
 
+		strs->table = table;
+		strs->size = size;
 		for (i = 0; i < size; i++) {
 			char *str;
 			int c, j, pos, size2 = aa_unpack_strdup(e, &str, NULL);
@@ -520,14 +522,11 @@ static bool unpack_trans_table(struct aa_ext *e, struct aa_str_table *strs)
 			goto fail;
 		if (!aa_unpack_nameX(e, AA_STRUCTEND, NULL))
 			goto fail;
-
-		strs->table = table;
-		strs->size = size;
 	}
 	return true;
 
 fail:
-	kfree_sensitive(table);
+	aa_free_str_table(strs);
 	e->pos = saved_pos;
 	return false;
 }
@@ -833,6 +832,10 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 
 	tmpname = aa_splitn_fqname(name, strlen(name), &tmpns, &ns_len);
 	if (tmpns) {
+		if (!tmpname) {
+			info = "empty profile name";
+			goto fail;
+		}
 		*ns_name = kstrndup(tmpns, ns_len, GFP_KERNEL);
 		if (!*ns_name) {
 			info = "out of memory";
@@ -1022,8 +1025,10 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 		}
 	} else if (rules->policy->dfa &&
 		   rules->policy->start[AA_CLASS_FILE]) {
+		aa_put_pdb(rules->file);
 		rules->file = aa_get_pdb(rules->policy);
 	} else {
+		aa_put_pdb(rules->file);
 		rules->file = aa_get_pdb(nullpdb);
 	}
 	error = -EPROTO;
diff --git a/security/apparmor/task.c b/security/apparmor/task.c
index f29a2e80e6bf..c87fb9f4ac18 100644
--- a/security/apparmor/task.c
+++ b/security/apparmor/task.c
@@ -278,7 +278,9 @@ static int profile_tracer_perm(const struct cred *cred,
 
 /**
  * aa_may_ptrace - test if tracer task can trace the tracee
+ * @tracer_cred: cred of task doing the tracing  (NOT NULL)
  * @tracer: label of the task doing the tracing  (NOT NULL)
+ * @tracee_cred: cred of task to be traced
  * @tracee: task label to be traced
  * @request: permission request
  *
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 76f55dd13cb8..8af2136069d2 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -237,10 +237,6 @@ static int datablob_parse(char *datablob, const char **format,
 			break;
 		}
 		*decrypted_data = strsep(&datablob, " \t");
-		if (!*decrypted_data) {
-			pr_info("encrypted_key: decrypted_data is missing\n");
-			break;
-		}
 		ret = 0;
 		break;
 	case Opt_load:
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 3c3af149bf1c..04a92c3d65d4 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -328,7 +328,8 @@ static int tomoyo_file_fcntl(struct file *file, unsigned int cmd,
 static int tomoyo_file_open(struct file *f)
 {
 	/* Don't check read permission here if called from execve(). */
-	if (current->in_execve)
+	/* Illogically, FMODE_EXEC is in f_flags, not f_mode. */
+	if (f->f_flags & __FMODE_EXEC)
 		return 0;
 	return tomoyo_check_open_permission(tomoyo_domain(), &f->f_path,
 					    f->f_flags);
diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c
index e87dc67f33c6..1c65e0a3b13c 100644
--- a/sound/drivers/aloop.c
+++ b/sound/drivers/aloop.c
@@ -322,6 +322,17 @@ static int loopback_snd_timer_close_cable(struct loopback_pcm *dpcm)
 	return 0;
 }
 
+static bool is_access_interleaved(snd_pcm_access_t access)
+{
+	switch (access) {
+	case SNDRV_PCM_ACCESS_MMAP_INTERLEAVED:
+	case SNDRV_PCM_ACCESS_RW_INTERLEAVED:
+		return true;
+	default:
+		return false;
+	}
+};
+
 static int loopback_check_format(struct loopback_cable *cable, int stream)
 {
 	struct snd_pcm_runtime *runtime, *cruntime;
@@ -341,7 +352,8 @@ static int loopback_check_format(struct loopback_cable *cable, int stream)
 	check = runtime->format != cruntime->format ||
 		runtime->rate != cruntime->rate ||
 		runtime->channels != cruntime->channels ||
-		runtime->access != cruntime->access;
+		is_access_interleaved(runtime->access) !=
+		is_access_interleaved(cruntime->access);
 	if (!check)
 		return 0;
 	if (stream == SNDRV_PCM_STREAM_CAPTURE) {
@@ -369,7 +381,8 @@ static int loopback_check_format(struct loopback_cable *cable, int stream)
 							&setup->channels_id);
 			setup->channels = runtime->channels;
 		}
-		if (setup->access != runtime->access) {
+		if (is_access_interleaved(setup->access) !=
+		    is_access_interleaved(runtime->access)) {
 			snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_VALUE,
 							&setup->access_id);
 			setup->access = runtime->access;
@@ -584,8 +597,7 @@ static void copy_play_buf(struct loopback_pcm *play,
 			size = play->pcm_buffer_size - src_off;
 		if (dst_off + size > capt->pcm_buffer_size)
 			size = capt->pcm_buffer_size - dst_off;
-		if (runtime->access == SNDRV_PCM_ACCESS_RW_NONINTERLEAVED ||
-		    runtime->access == SNDRV_PCM_ACCESS_MMAP_NONINTERLEAVED)
+		if (!is_access_interleaved(runtime->access))
 			copy_play_buf_part_n(play, capt, size, src_off, dst_off);
 		else
 			memcpy(dst + dst_off, src + src_off, size);
@@ -1544,8 +1556,7 @@ static int loopback_access_get(struct snd_kcontrol *kcontrol,
 	mutex_lock(&loopback->cable_lock);
 	access = loopback->setup[kcontrol->id.subdevice][kcontrol->id.device].access;
 
-	ucontrol->value.enumerated.item[0] = access == SNDRV_PCM_ACCESS_RW_NONINTERLEAVED ||
-					     access == SNDRV_PCM_ACCESS_MMAP_NONINTERLEAVED;
+	ucontrol->value.enumerated.item[0] = !is_access_interleaved(access);
 
 	mutex_unlock(&loopback->cable_lock);
 	return 0;
diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index bf685d01259d..de2a3d08c73c 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -3946,7 +3946,6 @@ static int create_mute_led_cdev(struct hda_codec *codec,
 	cdev->max_brightness = 1;
 	cdev->default_trigger = micmute ? "audio-micmute" : "audio-mute";
 	cdev->brightness_set_blocking = callback;
-	cdev->brightness = ledtrig_audio_get(idx);
 	cdev->flags = LED_CORE_SUSPENDRESUME;
 
 	err = led_classdev_register(&codec->core.dev, cdev);
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 200779296a1b..495d63101186 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -2301,6 +2301,7 @@ static int generic_hdmi_build_pcms(struct hda_codec *codec)
 	codec_dbg(codec, "hdmi: pcm_num set to %d\n", pcm_num);
 
 	for (idx = 0; idx < pcm_num; idx++) {
+		struct hdmi_spec_per_cvt *per_cvt;
 		struct hda_pcm *info;
 		struct hda_pcm_stream *pstr;
 
@@ -2316,6 +2317,11 @@ static int generic_hdmi_build_pcms(struct hda_codec *codec)
 		pstr = &info->stream[SNDRV_PCM_STREAM_PLAYBACK];
 		pstr->substreams = 1;
 		pstr->ops = generic_ops;
+
+		per_cvt = get_cvt(spec, 0);
+		pstr->channels_min = per_cvt->channels_min;
+		pstr->channels_max = per_cvt->channels_max;
+
 		/* pcm number is less than pcm_rec array size */
 		if (spec->pcm_used >= ARRAY_SIZE(spec->pcm_rec))
 			break;
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index b68c94757051..f6f16622f9cc 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -9861,6 +9861,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x87f5, "HP", ALC287_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x87f6, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP),
 	SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP),
+	SND_PCI_QUIRK(0x103c, 0x87fe, "HP Laptop 15s-fq2xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
 	SND_PCI_QUIRK(0x103c, 0x8805, "HP ProBook 650 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x880d, "HP EliteBook 830 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8811, "HP Spectre x360 15-eb1xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1),
@@ -9955,6 +9956,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x8c71, "HP EliteBook 845 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8c96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+	SND_PCI_QUIRK(0x103c, 0x8c97, "HP ZBook", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
 	SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
@@ -10231,6 +10233,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x3176, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
 	SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
 	SND_PCI_QUIRK(0x17aa, 0x31af, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340),
+	SND_PCI_QUIRK(0x17aa, 0x334b, "Lenovo ThinkCentre M70 Gen5", ALC283_FIXUP_HEADSET_MIC),
 	SND_PCI_QUIRK(0x17aa, 0x3801, "Lenovo Yoga9 14IAP7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN),
 	SND_PCI_QUIRK(0x17aa, 0x3802, "Lenovo Yoga DuetITL 2021", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
 	SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS),
diff --git a/sound/pci/oxygen/oxygen_mixer.c b/sound/pci/oxygen/oxygen_mixer.c
index 46705ec77b48..eb3aca16359c 100644
--- a/sound/pci/oxygen/oxygen_mixer.c
+++ b/sound/pci/oxygen/oxygen_mixer.c
@@ -718,7 +718,7 @@ static int ac97_fp_rec_volume_put(struct snd_kcontrol *ctl,
 	oldreg = oxygen_read_ac97(chip, 1, AC97_REC_GAIN);
 	newreg = oldreg & ~0x0707;
 	newreg = newreg | (value->value.integer.value[0] & 7);
-	newreg = newreg | ((value->value.integer.value[0] & 7) << 8);
+	newreg = newreg | ((value->value.integer.value[1] & 7) << 8);
 	change = newreg != oldreg;
 	if (change)
 		oxygen_write_ac97(chip, 1, AC97_REC_GAIN, newreg);
diff --git a/sound/soc/codecs/rtq9128.c b/sound/soc/codecs/rtq9128.c
index c22b047115cc..aa3eadecd974 100644
--- a/sound/soc/codecs/rtq9128.c
+++ b/sound/soc/codecs/rtq9128.c
@@ -59,6 +59,7 @@
 
 struct rtq9128_data {
 	struct gpio_desc *enable;
+	unsigned int daifmt;
 	int tdm_slots;
 	int tdm_slot_width;
 	bool tdm_input_data2_select;
@@ -391,7 +392,11 @@ static int rtq9128_component_probe(struct snd_soc_component *comp)
 	unsigned int val;
 	int i, ret;
 
-	pm_runtime_resume_and_get(comp->dev);
+	ret = pm_runtime_resume_and_get(comp->dev);
+	if (ret < 0) {
+		dev_err(comp->dev, "Failed to resume device (%d)\n", ret);
+		return ret;
+	}
 
 	val = snd_soc_component_read(comp, RTQ9128_REG_EFUSE_DATA);
 
@@ -437,10 +442,7 @@ static const struct snd_soc_component_driver rtq9128_comp_driver = {
 static int rtq9128_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
 {
 	struct rtq9128_data *data = snd_soc_dai_get_drvdata(dai);
-	struct snd_soc_component *comp = dai->component;
 	struct device *dev = dai->dev;
-	unsigned int audfmt, fmtval;
-	int ret;
 
 	dev_dbg(dev, "%s: fmt 0x%8x\n", __func__, fmt);
 
@@ -450,35 +452,10 @@ static int rtq9128_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
 		return -EINVAL;
 	}
 
-	fmtval = fmt & SND_SOC_DAIFMT_FORMAT_MASK;
-	if (data->tdm_slots && fmtval != SND_SOC_DAIFMT_DSP_A && fmtval != SND_SOC_DAIFMT_DSP_B) {
-		dev_err(dev, "TDM is used, format only support DSP_A or DSP_B\n");
-		return -EINVAL;
-	}
+	/* Store here and will be used in runtime hw_params for DAI format setting */
+	data->daifmt = fmt;
 
-	switch (fmtval) {
-	case SND_SOC_DAIFMT_I2S:
-		audfmt = 8;
-		break;
-	case SND_SOC_DAIFMT_LEFT_J:
-		audfmt = 9;
-		break;
-	case SND_SOC_DAIFMT_RIGHT_J:
-		audfmt = 10;
-		break;
-	case SND_SOC_DAIFMT_DSP_A:
-		audfmt = data->tdm_slots ? 12 : 11;
-		break;
-	case SND_SOC_DAIFMT_DSP_B:
-		audfmt = data->tdm_slots ? 4 : 3;
-		break;
-	default:
-		dev_err(dev, "Unsupported format 0x%8x\n", fmt);
-		return -EINVAL;
-	}
-
-	ret = snd_soc_component_write_field(comp, RTQ9128_REG_I2S_OPT, RTQ9128_AUDFMT_MASK, audfmt);
-	return ret < 0 ? ret : 0;
+	return 0;
 }
 
 static int rtq9128_dai_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask,
@@ -554,10 +531,38 @@ static int rtq9128_dai_hw_params(struct snd_pcm_substream *stream, struct snd_pc
 	unsigned int width, slot_width, bitrate, audbit, dolen;
 	struct snd_soc_component *comp = dai->component;
 	struct device *dev = dai->dev;
+	unsigned int fmtval, audfmt;
 	int ret;
 
 	dev_dbg(dev, "%s: width %d\n", __func__, params_width(param));
 
+	fmtval = FIELD_GET(SND_SOC_DAIFMT_FORMAT_MASK, data->daifmt);
+	if (data->tdm_slots && fmtval != SND_SOC_DAIFMT_DSP_A && fmtval != SND_SOC_DAIFMT_DSP_B) {
+		dev_err(dev, "TDM is used, format only support DSP_A or DSP_B\n");
+		return -EINVAL;
+	}
+
+	switch (fmtval) {
+	case SND_SOC_DAIFMT_I2S:
+		audfmt = 8;
+		break;
+	case SND_SOC_DAIFMT_LEFT_J:
+		audfmt = 9;
+		break;
+	case SND_SOC_DAIFMT_RIGHT_J:
+		audfmt = 10;
+		break;
+	case SND_SOC_DAIFMT_DSP_A:
+		audfmt = data->tdm_slots ? 12 : 11;
+		break;
+	case SND_SOC_DAIFMT_DSP_B:
+		audfmt = data->tdm_slots ? 4 : 3;
+		break;
+	default:
+		dev_err(dev, "Unsupported format 0x%8x\n", fmtval);
+		return -EINVAL;
+	}
+
 	switch (width = params_width(param)) {
 	case 16:
 		audbit = 0;
@@ -611,6 +616,10 @@ static int rtq9128_dai_hw_params(struct snd_pcm_substream *stream, struct snd_pc
 		return -EINVAL;
 	}
 
+	ret = snd_soc_component_write_field(comp, RTQ9128_REG_I2S_OPT, RTQ9128_AUDFMT_MASK, audfmt);
+	if (ret < 0)
+		return ret;
+
 	ret = snd_soc_component_write_field(comp, RTQ9128_REG_I2S_OPT, RTQ9128_AUDBIT_MASK, audbit);
 	if (ret < 0)
 		return ret;
diff --git a/sound/soc/codecs/tas2562.c b/sound/soc/codecs/tas2562.c
index 962c2cdfa017..54561ae598b8 100644
--- a/sound/soc/codecs/tas2562.c
+++ b/sound/soc/codecs/tas2562.c
@@ -59,7 +59,6 @@ struct tas2562_data {
 
 enum tas256x_model {
 	TAS2562,
-	TAS2563,
 	TAS2564,
 	TAS2110,
 };
@@ -721,7 +720,6 @@ static int tas2562_parse_dt(struct tas2562_data *tas2562)
 
 static const struct i2c_device_id tas2562_id[] = {
 	{ "tas2562", TAS2562 },
-	{ "tas2563", TAS2563 },
 	{ "tas2564", TAS2564 },
 	{ "tas2110", TAS2110 },
 	{ }
@@ -770,7 +768,6 @@ static int tas2562_probe(struct i2c_client *client)
 #ifdef CONFIG_OF
 static const struct of_device_id tas2562_of_match[] = {
 	{ .compatible = "ti,tas2562", },
-	{ .compatible = "ti,tas2563", },
 	{ .compatible = "ti,tas2564", },
 	{ .compatible = "ti,tas2110", },
 	{ },
diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c
index 917b1c15f71d..32913bd1a623 100644
--- a/sound/soc/codecs/tas2781-i2c.c
+++ b/sound/soc/codecs/tas2781-i2c.c
@@ -1,13 +1,13 @@
 // SPDX-License-Identifier: GPL-2.0
 //
-// ALSA SoC Texas Instruments TAS2781 Audio Smart Amplifier
+// ALSA SoC Texas Instruments TAS2563/TAS2781 Audio Smart Amplifier
 //
 // Copyright (C) 2022 - 2023 Texas Instruments Incorporated
 // https://www.ti.com
 //
-// The TAS2781 driver implements a flexible and configurable
+// The TAS2563/TAS2781 driver implements a flexible and configurable
 // algo coefficient setting for one, two, or even multiple
-// TAS2781 chips.
+// TAS2563/TAS2781 chips.
 //
 // Author: Shenghao Ding <shenghao-ding@ti.com>
 // Author: Kevin Lu <kevin-lu@ti.com>
@@ -32,6 +32,7 @@
 #include <sound/tas2781-tlv.h>
 
 static const struct i2c_device_id tasdevice_id[] = {
+	{ "tas2563", TAS2563 },
 	{ "tas2781", TAS2781 },
 	{}
 };
@@ -39,6 +40,7 @@ MODULE_DEVICE_TABLE(i2c, tasdevice_id);
 
 #ifdef CONFIG_OF
 static const struct of_device_id tasdevice_of_match[] = {
+	{ .compatible = "ti,tas2563" },
 	{ .compatible = "ti,tas2781" },
 	{},
 };
diff --git a/sound/soc/generic/audio-graph-card2.c b/sound/soc/generic/audio-graph-card2.c
index 9c94677f681a..62606e20be9a 100644
--- a/sound/soc/generic/audio-graph-card2.c
+++ b/sound/soc/generic/audio-graph-card2.c
@@ -556,7 +556,7 @@ static int graph_parse_node_multi_nm(struct snd_soc_dai_link *dai_link,
 		struct device_node *mcodec_port;
 		int codec_idx;
 
-		if (*nm_idx >= nm_max)
+		if (*nm_idx > nm_max)
 			break;
 
 		mcpu_ep_n = of_get_next_child(mcpu_port, mcpu_ep_n);
diff --git a/sound/soc/intel/boards/bxt_da7219_max98357a.c b/sound/soc/intel/boards/bxt_da7219_max98357a.c
index 816fad8c1ff0..540f7a29310a 100644
--- a/sound/soc/intel/boards/bxt_da7219_max98357a.c
+++ b/sound/soc/intel/boards/bxt_da7219_max98357a.c
@@ -797,6 +797,9 @@ static int broxton_audio_probe(struct platform_device *pdev)
 		broxton_audio_card.name = "glkda7219max";
 		/* Fixup the SSP entries for geminilake */
 		for (i = 0; i < ARRAY_SIZE(broxton_dais); i++) {
+			if (!broxton_dais[i].codecs->dai_name)
+				continue;
+
 			/* MAXIM_CODEC is connected to SSP1. */
 			if (!strcmp(broxton_dais[i].codecs->dai_name,
 				    BXT_MAXIM_CODEC_DAI)) {
@@ -822,6 +825,9 @@ static int broxton_audio_probe(struct platform_device *pdev)
 			broxton_audio_card.name = "cmlda7219max";
 
 		for (i = 0; i < ARRAY_SIZE(broxton_dais); i++) {
+			if (!broxton_dais[i].codecs->dai_name)
+				continue;
+
 			/* MAXIM_CODEC is connected to SSP1. */
 			if (!strcmp(broxton_dais[i].codecs->dai_name,
 					BXT_MAXIM_CODEC_DAI)) {
diff --git a/sound/soc/intel/boards/bxt_rt298.c b/sound/soc/intel/boards/bxt_rt298.c
index 4631106f2a28..c0eb65c14aa9 100644
--- a/sound/soc/intel/boards/bxt_rt298.c
+++ b/sound/soc/intel/boards/bxt_rt298.c
@@ -604,7 +604,8 @@ static int broxton_audio_probe(struct platform_device *pdev)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(broxton_rt298_dais); i++) {
-		if (!strncmp(card->dai_link[i].codecs->name, "i2c-INT343A:00",
+		if (card->dai_link[i].codecs->name &&
+		    !strncmp(card->dai_link[i].codecs->name, "i2c-INT343A:00",
 			     I2C_NAME_SIZE)) {
 			if (!strncmp(card->name, "broxton-rt298",
 				     PLATFORM_NAME_SIZE)) {
diff --git a/sound/soc/mediatek/common/mtk-dsp-sof-common.c b/sound/soc/mediatek/common/mtk-dsp-sof-common.c
index f3894010f656..7ec8965a70c0 100644
--- a/sound/soc/mediatek/common/mtk-dsp-sof-common.c
+++ b/sound/soc/mediatek/common/mtk-dsp-sof-common.c
@@ -24,7 +24,7 @@ int mtk_sof_dai_link_fixup(struct snd_soc_pcm_runtime *rtd,
 		struct snd_soc_dai_link *sof_dai_link = NULL;
 		const struct sof_conn_stream *conn = &sof_priv->conn_streams[i];
 
-		if (strcmp(rtd->dai_link->name, conn->normal_link))
+		if (conn->normal_link && strcmp(rtd->dai_link->name, conn->normal_link))
 			continue;
 
 		for_each_card_rtds(card, runtime) {
diff --git a/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c b/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c
index 5bd6addd1450..bfcb2c486c39 100644
--- a/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c
+++ b/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c
@@ -1208,7 +1208,8 @@ static int mt8192_mt6359_dev_probe(struct platform_device *pdev)
 			dai_link->ignore = 0;
 		}
 
-		if (strcmp(dai_link->codecs[0].dai_name, RT1015_CODEC_DAI) == 0)
+		if (dai_link->num_codecs && dai_link->codecs[0].dai_name &&
+		    strcmp(dai_link->codecs[0].dai_name, RT1015_CODEC_DAI) == 0)
 			dai_link->ops = &mt8192_rt1015_i2s_ops;
 
 		if (!dai_link->platforms->name)
diff --git a/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c b/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c
index 1e33863c85ca..620d7ade1992 100644
--- a/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c
+++ b/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c
@@ -1795,10 +1795,6 @@ static const struct snd_kcontrol_new mt8195_memif_controls[] = {
 			    MT8195_AFE_IRQ_28),
 };
 
-static const struct snd_soc_component_driver mt8195_afe_pcm_dai_component = {
-	.name = "mt8195-afe-pcm-dai",
-};
-
 static const struct mtk_base_memif_data memif_data[MT8195_AFE_MEMIF_NUM] = {
 	[MT8195_AFE_MEMIF_DL2] = {
 		.name = "DL2",
@@ -3037,7 +3033,6 @@ static int mt8195_afe_pcm_dev_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct reset_control *rstc;
 	int i, irq_id, ret;
-	struct snd_soc_component *component;
 
 	ret = of_reserved_mem_device_init(dev);
 	if (ret)
@@ -3170,36 +3165,12 @@ static int mt8195_afe_pcm_dev_probe(struct platform_device *pdev)
 
 	/* register component */
 	ret = devm_snd_soc_register_component(dev, &mt8195_afe_component,
-					      NULL, 0);
+					      afe->dai_drivers, afe->num_dai_drivers);
 	if (ret) {
 		dev_warn(dev, "err_platform\n");
 		goto err_pm_put;
 	}
 
-	component = devm_kzalloc(dev, sizeof(*component), GFP_KERNEL);
-	if (!component) {
-		ret = -ENOMEM;
-		goto err_pm_put;
-	}
-
-	ret = snd_soc_component_initialize(component,
-					   &mt8195_afe_pcm_dai_component,
-					   dev);
-	if (ret)
-		goto err_pm_put;
-
-#ifdef CONFIG_DEBUG_FS
-	component->debugfs_prefix = "pcm";
-#endif
-
-	ret = snd_soc_add_component(component,
-				    afe->dai_drivers,
-				    afe->num_dai_drivers);
-	if (ret) {
-		dev_warn(dev, "err_dai_component\n");
-		goto err_pm_put;
-	}
-
 	ret = regmap_multi_reg_write(afe->regmap, mt8195_afe_reg_defaults,
 				     ARRAY_SIZE(mt8195_afe_reg_defaults));
 	if (ret)
@@ -3224,8 +3195,6 @@ err_pm_put:
 
 static void mt8195_afe_pcm_dev_remove(struct platform_device *pdev)
 {
-	snd_soc_unregister_component(&pdev->dev);
-
 	pm_runtime_disable(&pdev->dev);
 	if (!pm_runtime_status_suspended(&pdev->dev))
 		mt8195_afe_runtime_suspend(&pdev->dev);
diff --git a/sound/soc/mediatek/mt8195/mt8195-mt6359.c b/sound/soc/mediatek/mt8195/mt8195-mt6359.c
index 4feb9fb76967..53fd8a897b9d 100644
--- a/sound/soc/mediatek/mt8195/mt8195-mt6359.c
+++ b/sound/soc/mediatek/mt8195/mt8195-mt6359.c
@@ -934,12 +934,11 @@ SND_SOC_DAILINK_DEFS(ETDM1_IN_BE,
 
 SND_SOC_DAILINK_DEFS(ETDM2_IN_BE,
 		     DAILINK_COMP_ARRAY(COMP_CPU("ETDM2_IN")),
-		     DAILINK_COMP_ARRAY(COMP_DUMMY()),
+		     DAILINK_COMP_ARRAY(COMP_EMPTY()),
 		     DAILINK_COMP_ARRAY(COMP_EMPTY()));
 
 SND_SOC_DAILINK_DEFS(ETDM1_OUT_BE,
 		     DAILINK_COMP_ARRAY(COMP_CPU("ETDM1_OUT")),
-		     DAILINK_COMP_ARRAY(COMP_DUMMY()),
 		     DAILINK_COMP_ARRAY(COMP_EMPTY()));
 
 SND_SOC_DAILINK_DEFS(ETDM2_OUT_BE,
@@ -1237,8 +1236,6 @@ static struct snd_soc_dai_link mt8195_mt6359_dai_links[] = {
 			SND_SOC_DAIFMT_NB_NF |
 			SND_SOC_DAIFMT_CBS_CFS,
 		.dpcm_capture = 1,
-		.init = mt8195_rt5682_init,
-		.ops = &mt8195_rt5682_etdm_ops,
 		.be_hw_params_fixup = mt8195_etdm_hw_params_fixup,
 		SND_SOC_DAILINK_REG(ETDM2_IN_BE),
 	},
@@ -1249,7 +1246,6 @@ static struct snd_soc_dai_link mt8195_mt6359_dai_links[] = {
 			SND_SOC_DAIFMT_NB_NF |
 			SND_SOC_DAIFMT_CBS_CFS,
 		.dpcm_playback = 1,
-		.ops = &mt8195_rt5682_etdm_ops,
 		.be_hw_params_fixup = mt8195_etdm_hw_params_fixup,
 		SND_SOC_DAILINK_REG(ETDM1_OUT_BE),
 	},
@@ -1381,7 +1377,7 @@ static int mt8195_mt6359_dev_probe(struct platform_device *pdev)
 	struct snd_soc_dai_link *dai_link;
 	struct mtk_soc_card_data *soc_card_data;
 	struct mt8195_mt6359_priv *mach_priv;
-	struct device_node *platform_node, *adsp_node, *dp_node, *hdmi_node;
+	struct device_node *platform_node, *adsp_node, *codec_node, *dp_node, *hdmi_node;
 	struct mt8195_card_data *card_data;
 	int is5682s = 0;
 	int init6359 = 0;
@@ -1401,8 +1397,12 @@ static int mt8195_mt6359_dev_probe(struct platform_device *pdev)
 	if (!card->name)
 		card->name = card_data->name;
 
-	if (strstr(card->name, "_5682s"))
+	if (strstr(card->name, "_5682s")) {
+		codec_node = of_find_compatible_node(NULL, NULL, "realtek,rt5682s");
 		is5682s = 1;
+	} else
+		codec_node = of_find_compatible_node(NULL, NULL, "realtek,rt5682i");
+
 	soc_card_data = devm_kzalloc(&pdev->dev, sizeof(*card_data), GFP_KERNEL);
 	if (!soc_card_data)
 		return -ENOMEM;
@@ -1488,12 +1488,27 @@ static int mt8195_mt6359_dev_probe(struct platform_device *pdev)
 				dai_link->codecs->dai_name = "i2s-hifi";
 				dai_link->init = mt8195_hdmi_codec_init;
 			}
-		} else if (strcmp(dai_link->name, "ETDM1_OUT_BE") == 0 ||
-			   strcmp(dai_link->name, "ETDM2_IN_BE") == 0) {
-			dai_link->codecs->name =
-				is5682s ? RT5682S_DEV0_NAME : RT5682_DEV0_NAME;
-			dai_link->codecs->dai_name =
-				is5682s ? RT5682S_CODEC_DAI : RT5682_CODEC_DAI;
+		} else if (strcmp(dai_link->name, "ETDM1_OUT_BE") == 0) {
+			if (!codec_node) {
+				dev_err(&pdev->dev, "Codec not found!\n");
+			} else {
+				dai_link->codecs->of_node = codec_node;
+				dai_link->codecs->name = NULL;
+				dai_link->codecs->dai_name =
+					is5682s ? RT5682S_CODEC_DAI : RT5682_CODEC_DAI;
+				dai_link->init = mt8195_rt5682_init;
+				dai_link->ops = &mt8195_rt5682_etdm_ops;
+			}
+		} else if (strcmp(dai_link->name, "ETDM2_IN_BE") == 0) {
+			if (!codec_node) {
+				dev_err(&pdev->dev, "Codec not found!\n");
+			} else {
+				dai_link->codecs->of_node = codec_node;
+				dai_link->codecs->name = NULL;
+				dai_link->codecs->dai_name =
+					is5682s ? RT5682S_CODEC_DAI : RT5682_CODEC_DAI;
+				dai_link->ops = &mt8195_rt5682_etdm_ops;
+			}
 		} else if (strcmp(dai_link->name, "DL_SRC_BE") == 0 ||
 			   strcmp(dai_link->name, "UL_SRC1_BE") == 0 ||
 			   strcmp(dai_link->name, "UL_SRC2_BE") == 0) {
diff --git a/sound/soc/sof/ipc3-dtrace.c b/sound/soc/sof/ipc3-dtrace.c
index 93b189c2d2ee..0dca139322f3 100644
--- a/sound/soc/sof/ipc3-dtrace.c
+++ b/sound/soc/sof/ipc3-dtrace.c
@@ -137,7 +137,6 @@ static int trace_filter_parse(struct snd_sof_dev *sdev, char *string,
 			dev_err(sdev->dev,
 				"Parsing filter entry '%s' failed with %d\n",
 				entry, entry_len);
-			kfree(*out);
 			return -EINVAL;
 		}
 	}
@@ -209,13 +208,13 @@ static ssize_t dfsentry_trace_filter_write(struct file *file, const char __user
 		ret = ipc3_trace_update_filter(sdev, num_elems, elems);
 		if (ret < 0) {
 			dev_err(sdev->dev, "Filter update failed: %d\n", ret);
-			kfree(elems);
 			goto error;
 		}
 	}
 	ret = count;
 error:
 	kfree(string);
+	kfree(elems);
 	return ret;
 }
 
diff --git a/sound/soc/sof/ipc4-loader.c b/sound/soc/sof/ipc4-loader.c
index 3539b0a66e1b..c79479afa8d0 100644
--- a/sound/soc/sof/ipc4-loader.c
+++ b/sound/soc/sof/ipc4-loader.c
@@ -482,13 +482,10 @@ void sof_ipc4_update_cpc_from_manifest(struct snd_sof_dev *sdev,
 		msg = "No CPC match in the firmware file's manifest";
 
 no_cpc:
-	dev_warn(sdev->dev, "%s (UUID: %pUL): %s (ibs/obs: %u/%u)\n",
-		 fw_module->man4_module_entry.name,
-		 &fw_module->man4_module_entry.uuid, msg, basecfg->ibs,
-		 basecfg->obs);
-	dev_warn_once(sdev->dev, "Please try to update the firmware.\n");
-	dev_warn_once(sdev->dev, "If the issue persists, file a bug at\n");
-	dev_warn_once(sdev->dev, "https://github.com/thesofproject/sof/issues/\n");
+	dev_dbg(sdev->dev, "%s (UUID: %pUL): %s (ibs/obs: %u/%u)\n",
+		fw_module->man4_module_entry.name,
+		&fw_module->man4_module_entry.uuid, msg, basecfg->ibs,
+		basecfg->obs);
 }
 
 const struct sof_ipc_fw_loader_ops ipc4_loader_ops = {
diff --git a/sound/soc/sof/ipc4-pcm.c b/sound/soc/sof/ipc4-pcm.c
index 39039a647cca..85d3f390e4b2 100644
--- a/sound/soc/sof/ipc4-pcm.c
+++ b/sound/soc/sof/ipc4-pcm.c
@@ -768,10 +768,8 @@ static void sof_ipc4_build_time_info(struct snd_sof_dev *sdev, struct snd_sof_pc
 	info->llp_offset = offsetof(struct sof_ipc4_fw_registers, llp_evad_reading_slot) +
 					sdev->fw_info_box.offset;
 	sof_mailbox_read(sdev, info->llp_offset, &llp_slot, sizeof(llp_slot));
-	if (llp_slot.node_id != dai_copier->data.gtw_cfg.node_id) {
-		dev_info(sdev->dev, "no llp found, fall back to default HDA path");
+	if (llp_slot.node_id != dai_copier->data.gtw_cfg.node_id)
 		info->llp_offset = 0;
-	}
 }
 
 static int sof_ipc4_pcm_hw_params(struct snd_soc_component *component,
diff --git a/sound/usb/mixer_scarlett2.c b/sound/usb/mixer_scarlett2.c
index 1de3ddc50eb6..6de605a601e5 100644
--- a/sound/usb/mixer_scarlett2.c
+++ b/sound/usb/mixer_scarlett2.c
@@ -5361,9 +5361,9 @@ static int scarlett2_add_line_out_ctls(struct usb_mixer_interface *mixer)
 			if (private->vol_sw_hw_switch[index])
 				scarlett2_vol_ctl_set_writable(mixer, i, 0);
 
-			snprintf(s, sizeof(s),
-				 "Line Out %02d Volume Control Playback Enum",
-				 i + 1);
+			scnprintf(s, sizeof(s),
+				  "Line Out %02d Volume Control Playback Enum",
+				  i + 1);
 			err = scarlett2_add_new_ctl(mixer,
 						    &scarlett2_sw_hw_enum_ctl,
 						    i, 1, s,
@@ -5406,8 +5406,8 @@ static int scarlett2_add_line_in_ctls(struct usb_mixer_interface *mixer)
 
 	/* Add input level (line/inst) controls */
 	for (i = 0; i < info->level_input_count; i++) {
-		snprintf(s, sizeof(s), fmt, i + 1 + info->level_input_first,
-			 "Level", "Enum");
+		scnprintf(s, sizeof(s), fmt, i + 1 + info->level_input_first,
+			  "Level", "Enum");
 		err = scarlett2_add_new_ctl(mixer, &scarlett2_level_enum_ctl,
 					    i, 1, s, &private->level_ctls[i]);
 		if (err < 0)
@@ -5416,7 +5416,7 @@ static int scarlett2_add_line_in_ctls(struct usb_mixer_interface *mixer)
 
 	/* Add input pad controls */
 	for (i = 0; i < info->pad_input_count; i++) {
-		snprintf(s, sizeof(s), fmt, i + 1, "Pad", "Switch");
+		scnprintf(s, sizeof(s), fmt, i + 1, "Pad", "Switch");
 		err = scarlett2_add_new_ctl(mixer, &scarlett2_pad_ctl,
 					    i, 1, s, &private->pad_ctls[i]);
 		if (err < 0)
@@ -5425,8 +5425,8 @@ static int scarlett2_add_line_in_ctls(struct usb_mixer_interface *mixer)
 
 	/* Add input air controls */
 	for (i = 0; i < info->air_input_count; i++) {
-		snprintf(s, sizeof(s), fmt, i + 1 + info->air_input_first,
-			 "Air", info->air_option ? "Enum" : "Switch");
+		scnprintf(s, sizeof(s), fmt, i + 1 + info->air_input_first,
+			  "Air", info->air_option ? "Enum" : "Switch");
 		err = scarlett2_add_new_ctl(
 			mixer, &scarlett2_air_ctl[info->air_option],
 			i, 1, s, &private->air_ctls[i]);
@@ -5481,9 +5481,9 @@ static int scarlett2_add_line_in_ctls(struct usb_mixer_interface *mixer)
 
 		for (i = 0; i < info->gain_input_count; i++) {
 			if (i % 2) {
-				snprintf(s, sizeof(s),
-					 "Line In %d-%d Link Capture Switch",
-					 i, i + 1);
+				scnprintf(s, sizeof(s),
+					  "Line In %d-%d Link Capture Switch",
+					  i, i + 1);
 				err = scarlett2_add_new_ctl(
 					mixer, &scarlett2_input_link_ctl,
 					i / 2, 1, s,
@@ -5492,30 +5492,30 @@ static int scarlett2_add_line_in_ctls(struct usb_mixer_interface *mixer)
 					return err;
 			}
 
-			snprintf(s, sizeof(s), fmt, i + 1,
-				 "Gain", "Volume");
+			scnprintf(s, sizeof(s), fmt, i + 1,
+				  "Gain", "Volume");
 			err = scarlett2_add_new_ctl(
 				mixer, &scarlett2_input_gain_ctl,
 				i, 1, s, &private->input_gain_ctls[i]);
 			if (err < 0)
 				return err;
 
-			snprintf(s, sizeof(s), fmt, i + 1,
-				 "Autogain", "Switch");
+			scnprintf(s, sizeof(s), fmt, i + 1,
+				  "Autogain", "Switch");
 			err = scarlett2_add_new_ctl(
 				mixer, &scarlett2_autogain_switch_ctl,
 				i, 1, s, &private->autogain_ctls[i]);
 			if (err < 0)
 				return err;
 
-			snprintf(s, sizeof(s), fmt, i + 1,
-				 "Autogain Status", "Enum");
+			scnprintf(s, sizeof(s), fmt, i + 1,
+				  "Autogain Status", "Enum");
 			err = scarlett2_add_new_ctl(
 				mixer, &scarlett2_autogain_status_ctl,
 				i, 1, s, &private->autogain_status_ctls[i]);
 
-			snprintf(s, sizeof(s), fmt, i + 1,
-				 "Safe", "Switch");
+			scnprintf(s, sizeof(s), fmt, i + 1,
+				  "Safe", "Switch");
 			err = scarlett2_add_new_ctl(
 				mixer, &scarlett2_safe_ctl,
 				i, 1, s, &private->safe_ctls[i]);
@@ -5902,8 +5902,8 @@ static int scarlett2_add_direct_monitor_ctls(struct usb_mixer_interface *mixer)
 			for (k = 0; k < private->num_mix_in; k++, index++) {
 				char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN];
 
-				snprintf(name, sizeof(name), format,
-					 mix_type, 'A' + j, k + 1);
+				scnprintf(name, sizeof(name), format,
+					  mix_type, 'A' + j, k + 1);
 
 				err = scarlett2_add_new_ctl(
 					mixer, &scarlett2_monitor_mix_ctl,
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 934e2777a2db..64df118376df 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -32,6 +32,7 @@ FEATURE_TESTS_BASIC :=                  \
         backtrace                       \
         dwarf                           \
         dwarf_getlocations              \
+        dwarf_getcfi                    \
         eventfd                         \
         fortify-source                  \
         get_current_dir_name            \
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index dad79ede4e0a..37722e509eb9 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -7,6 +7,7 @@ FILES=                                          \
          test-bionic.bin                        \
          test-dwarf.bin                         \
          test-dwarf_getlocations.bin            \
+         test-dwarf_getcfi.bin                  \
          test-eventfd.bin                       \
          test-fortify-source.bin                \
          test-get_current_dir_name.bin          \
@@ -154,6 +155,9 @@ $(OUTPUT)test-dwarf.bin:
 $(OUTPUT)test-dwarf_getlocations.bin:
 	$(BUILD) $(DWARFLIBS)
 
+$(OUTPUT)test-dwarf_getcfi.bin:
+	$(BUILD) $(DWARFLIBS)
+
 $(OUTPUT)test-libelf-getphdrnum.bin:
 	$(BUILD) -lelf
 
diff --git a/tools/build/feature/test-dwarf_getcfi.c b/tools/build/feature/test-dwarf_getcfi.c
new file mode 100644
index 000000000000..50e7d7cb7bdf
--- /dev/null
+++ b/tools/build/feature/test-dwarf_getcfi.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <elfutils/libdw.h>
+
+int main(void)
+{
+	Dwarf *dwarf = NULL;
+	return dwarf_getcfi(dwarf) == NULL;
+}
diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c
index eb6303ff446e..4cfcef9da3e4 100644
--- a/tools/build/feature/test-libopencsd.c
+++ b/tools/build/feature/test-libopencsd.c
@@ -4,9 +4,9 @@
 /*
  * Check OpenCSD library version is sufficient to provide required features
  */
-#define OCSD_MIN_VER ((1 << 16) | (1 << 8) | (1))
+#define OCSD_MIN_VER ((1 << 16) | (2 << 8) | (1))
 #if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER)
-#error "OpenCSD >= 1.1.1 is required"
+#error "OpenCSD >= 1.2.1 is required"
 #endif
 
 int main(void)
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 39c6a250dd1b..3a64499b0f5d 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -204,6 +204,8 @@ enum perf_branch_sample_type_shift {
 
 	PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT	= 18, /* save privilege mode */
 
+	PERF_SAMPLE_BRANCH_COUNTERS_SHIFT	= 19, /* save occurrences of events on a branch */
+
 	PERF_SAMPLE_BRANCH_MAX_SHIFT		/* non-ABI */
 };
 
@@ -235,6 +237,8 @@ enum perf_branch_sample_type {
 
 	PERF_SAMPLE_BRANCH_PRIV_SAVE	= 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT,
 
+	PERF_SAMPLE_BRANCH_COUNTERS	= 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT,
+
 	PERF_SAMPLE_BRANCH_MAX		= 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
 };
 
@@ -982,6 +986,12 @@ enum perf_event_type {
 	 *	{ u64                   nr;
 	 *	  { u64	hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX
 	 *        { u64 from, to, flags } lbr[nr];
+	 *        #
+	 *        # The format of the counters is decided by the
+	 *        # "branch_counter_nr" and "branch_counter_width",
+	 *        # which are defined in the ABI.
+	 *        #
+	 *        { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS
 	 *      } && PERF_SAMPLE_BRANCH_STACK
 	 *
 	 * 	{ u64			abi; # enum perf_sample_regs_abi
@@ -1427,6 +1437,9 @@ struct perf_branch_entry {
 		reserved:31;
 };
 
+/* Size of used info bits in struct perf_branch_entry */
+#define PERF_BRANCH_ENTRY_INFO_BITS_MAX		33
+
 union perf_sample_weight {
 	__u64		full;
 #if defined(__LITTLE_ENDIAN_BITFIELD)
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 5cb0eeec2c8a..337fde770e45 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -16,6 +16,7 @@
 #include <sys/mount.h>
 
 #include "fs.h"
+#include "../io.h"
 #include "debug-internal.h"
 
 #define _STR(x) #x
@@ -344,53 +345,24 @@ int filename__read_ull(const char *filename, unsigned long long *value)
 	return filename__read_ull_base(filename, value, 0);
 }
 
-#define STRERR_BUFSIZE  128     /* For the buffer size of strerror_r */
-
 int filename__read_str(const char *filename, char **buf, size_t *sizep)
 {
-	size_t size = 0, alloc_size = 0;
-	void *bf = NULL, *nbf;
-	int fd, n, err = 0;
-	char sbuf[STRERR_BUFSIZE];
+	struct io io;
+	char bf[128];
+	int err;
 
-	fd = open(filename, O_RDONLY);
-	if (fd < 0)
+	io.fd = open(filename, O_RDONLY);
+	if (io.fd < 0)
 		return -errno;
-
-	do {
-		if (size == alloc_size) {
-			alloc_size += BUFSIZ;
-			nbf = realloc(bf, alloc_size);
-			if (!nbf) {
-				err = -ENOMEM;
-				break;
-			}
-
-			bf = nbf;
-		}
-
-		n = read(fd, bf + size, alloc_size - size);
-		if (n < 0) {
-			if (size) {
-				pr_warn("read failed %d: %s\n", errno,
-					strerror_r(errno, sbuf, sizeof(sbuf)));
-				err = 0;
-			} else
-				err = -errno;
-
-			break;
-		}
-
-		size += n;
-	} while (n > 0);
-
-	if (!err) {
-		*sizep = size;
-		*buf   = bf;
+	io__init(&io, io.fd, bf, sizeof(bf));
+	*buf = NULL;
+	err = io__getdelim(&io, buf, sizep, /*delim=*/-1);
+	if (err < 0) {
+		free(*buf);
+		*buf = NULL;
 	} else
-		free(bf);
-
-	close(fd);
+		err = 0;
+	close(io.fd);
 	return err;
 }
 
@@ -475,15 +447,22 @@ int sysfs__read_str(const char *entry, char **buf, size_t *sizep)
 
 int sysfs__read_bool(const char *entry, bool *value)
 {
-	char *buf;
-	size_t size;
-	int ret;
+	struct io io;
+	char bf[16];
+	int ret = 0;
+	char path[PATH_MAX];
+	const char *sysfs = sysfs__mountpoint();
+
+	if (!sysfs)
+		return -1;
 
-	ret = sysfs__read_str(entry, &buf, &size);
-	if (ret < 0)
-		return ret;
+	snprintf(path, sizeof(path), "%s/%s", sysfs, entry);
+	io.fd = open(path, O_RDONLY);
+	if (io.fd < 0)
+		return -errno;
 
-	switch (buf[0]) {
+	io__init(&io, io.fd, bf, sizeof(bf));
+	switch (io__get_char(&io)) {
 	case '1':
 	case 'y':
 	case 'Y':
@@ -497,8 +476,7 @@ int sysfs__read_bool(const char *entry, bool *value)
 	default:
 		ret = -1;
 	}
-
-	free(buf);
+	close(io.fd);
 
 	return ret;
 }
diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h
index a77b74c5fb65..84adf8102018 100644
--- a/tools/lib/api/io.h
+++ b/tools/lib/api/io.h
@@ -12,6 +12,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <linux/types.h>
 
 struct io {
 	/* File descriptor being read/ */
@@ -140,8 +141,8 @@ static inline int io__get_dec(struct io *io, __u64 *dec)
 	}
 }
 
-/* Read up to and including the first newline following the pattern of getline. */
-static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_len_out)
+/* Read up to and including the first delim. */
+static inline ssize_t io__getdelim(struct io *io, char **line_out, size_t *line_len_out, int delim)
 {
 	char buf[128];
 	int buf_pos = 0;
@@ -151,7 +152,7 @@ static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_l
 
 	/* TODO: reuse previously allocated memory. */
 	free(*line_out);
-	while (ch != '\n') {
+	while (ch != delim) {
 		ch = io__get_char(io);
 
 		if (ch < 0)
@@ -184,4 +185,9 @@ err_out:
 	return -ENOMEM;
 }
 
+static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_len_out)
+{
+	return io__getdelim(io, line_out, line_len_out, /*delim=*/'\n');
+}
+
 #endif /* __API_IO__ */
diff --git a/tools/lib/perf/Documentation/examples/sampling.c b/tools/lib/perf/Documentation/examples/sampling.c
index 8e1a926a9cfe..bc142f0664b5 100644
--- a/tools/lib/perf/Documentation/examples/sampling.c
+++ b/tools/lib/perf/Documentation/examples/sampling.c
@@ -39,7 +39,7 @@ int main(int argc, char **argv)
 
 	libperf_init(libperf_print);
 
-	cpus = perf_cpu_map__new(NULL);
+	cpus = perf_cpu_map__new_online_cpus();
 	if (!cpus) {
 		fprintf(stderr, "failed to create cpus\n");
 		return -1;
diff --git a/tools/lib/perf/Documentation/libperf-sampling.txt b/tools/lib/perf/Documentation/libperf-sampling.txt
index d6ca24f6ef78..2378980fab8a 100644
--- a/tools/lib/perf/Documentation/libperf-sampling.txt
+++ b/tools/lib/perf/Documentation/libperf-sampling.txt
@@ -97,7 +97,7 @@ In this case we will monitor all the available CPUs:
 
 [source,c]
 --
- 42         cpus = perf_cpu_map__new(NULL);
+ 42         cpus = perf_cpu_map__new_online_cpus();
  43         if (!cpus) {
  44                 fprintf(stderr, "failed to create cpus\n");
  45                 return -1;
diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
index a8f1a237931b..fcfb9499ef9c 100644
--- a/tools/lib/perf/Documentation/libperf.txt
+++ b/tools/lib/perf/Documentation/libperf.txt
@@ -37,7 +37,7 @@ SYNOPSIS
 
   struct perf_cpu_map;
 
-  struct perf_cpu_map *perf_cpu_map__dummy_new(void);
+  struct perf_cpu_map *perf_cpu_map__new_any_cpu(void);
   struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
   struct perf_cpu_map *perf_cpu_map__read(FILE *file);
   struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
@@ -46,7 +46,7 @@ SYNOPSIS
   void perf_cpu_map__put(struct perf_cpu_map *map);
   int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
   int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
-  bool perf_cpu_map__empty(const struct perf_cpu_map *map);
+  bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map);
   int perf_cpu_map__max(struct perf_cpu_map *map);
   bool perf_cpu_map__has(const struct perf_cpu_map *map, int cpu);
 
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index 2a5a29217374..4adcd7920d03 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -9,6 +9,7 @@
 #include <unistd.h>
 #include <ctype.h>
 #include <limits.h>
+#include "internal.h"
 
 void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus)
 {
@@ -27,7 +28,7 @@ struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus)
 	return result;
 }
 
-struct perf_cpu_map *perf_cpu_map__dummy_new(void)
+struct perf_cpu_map *perf_cpu_map__new_any_cpu(void)
 {
 	struct perf_cpu_map *cpus = perf_cpu_map__alloc(1);
 
@@ -66,15 +67,21 @@ void perf_cpu_map__put(struct perf_cpu_map *map)
 	}
 }
 
-static struct perf_cpu_map *cpu_map__default_new(void)
+static struct perf_cpu_map *cpu_map__new_sysconf(void)
 {
 	struct perf_cpu_map *cpus;
-	int nr_cpus;
+	int nr_cpus, nr_cpus_conf;
 
 	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
 	if (nr_cpus < 0)
 		return NULL;
 
+	nr_cpus_conf = sysconf(_SC_NPROCESSORS_CONF);
+	if (nr_cpus != nr_cpus_conf) {
+		pr_warning("Number of online CPUs (%d) differs from the number configured (%d) the CPU map will only cover the first %d CPUs.",
+			nr_cpus, nr_cpus_conf, nr_cpus);
+	}
+
 	cpus = perf_cpu_map__alloc(nr_cpus);
 	if (cpus != NULL) {
 		int i;
@@ -86,9 +93,27 @@ static struct perf_cpu_map *cpu_map__default_new(void)
 	return cpus;
 }
 
-struct perf_cpu_map *perf_cpu_map__default_new(void)
+static struct perf_cpu_map *cpu_map__new_sysfs_online(void)
 {
-	return cpu_map__default_new();
+	struct perf_cpu_map *cpus = NULL;
+	FILE *onlnf;
+
+	onlnf = fopen("/sys/devices/system/cpu/online", "r");
+	if (onlnf) {
+		cpus = perf_cpu_map__read(onlnf);
+		fclose(onlnf);
+	}
+	return cpus;
+}
+
+struct perf_cpu_map *perf_cpu_map__new_online_cpus(void)
+{
+	struct perf_cpu_map *cpus = cpu_map__new_sysfs_online();
+
+	if (cpus)
+		return cpus;
+
+	return cpu_map__new_sysconf();
 }
 
 
@@ -180,27 +205,11 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file)
 
 	if (nr_cpus > 0)
 		cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
-	else
-		cpus = cpu_map__default_new();
 out_free_tmp:
 	free(tmp_cpus);
 	return cpus;
 }
 
-static struct perf_cpu_map *cpu_map__read_all_cpu_map(void)
-{
-	struct perf_cpu_map *cpus = NULL;
-	FILE *onlnf;
-
-	onlnf = fopen("/sys/devices/system/cpu/online", "r");
-	if (!onlnf)
-		return cpu_map__default_new();
-
-	cpus = perf_cpu_map__read(onlnf);
-	fclose(onlnf);
-	return cpus;
-}
-
 struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
 {
 	struct perf_cpu_map *cpus = NULL;
@@ -211,7 +220,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
 	int max_entries = 0;
 
 	if (!cpu_list)
-		return cpu_map__read_all_cpu_map();
+		return perf_cpu_map__new_online_cpus();
 
 	/*
 	 * must handle the case of empty cpumap to cover
@@ -268,10 +277,12 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
 
 	if (nr_cpus > 0)
 		cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
-	else if (*cpu_list != '\0')
-		cpus = cpu_map__default_new();
-	else
-		cpus = perf_cpu_map__dummy_new();
+	else if (*cpu_list != '\0') {
+		pr_warning("Unexpected characters at end of cpu list ('%s'), using online CPUs.",
+			   cpu_list);
+		cpus = perf_cpu_map__new_online_cpus();
+	} else
+		cpus = perf_cpu_map__new_any_cpu();
 invalid:
 	free(tmp_cpus);
 out:
@@ -300,7 +311,7 @@ int perf_cpu_map__nr(const struct perf_cpu_map *cpus)
 	return cpus ? __perf_cpu_map__nr(cpus) : 1;
 }
 
-bool perf_cpu_map__empty(const struct perf_cpu_map *map)
+bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map)
 {
 	return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true;
 }
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index 3acbbccc1901..058e3ff10f9b 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -39,7 +39,7 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
 	if (evsel->system_wide) {
 		/* System wide: set the cpu map of the evsel to all online CPUs. */
 		perf_cpu_map__put(evsel->cpus);
-		evsel->cpus = perf_cpu_map__new(NULL);
+		evsel->cpus = perf_cpu_map__new_online_cpus();
 	} else if (evlist->has_user_cpus && evsel->is_pmu_core) {
 		/*
 		 * User requested CPUs on a core PMU, ensure the requested CPUs
@@ -619,7 +619,7 @@ static int perf_evlist__nr_mmaps(struct perf_evlist *evlist)
 
 	/* One for each CPU */
 	nr_mmaps = perf_cpu_map__nr(evlist->all_cpus);
-	if (perf_cpu_map__empty(evlist->all_cpus)) {
+	if (perf_cpu_map__has_any_cpu_or_is_empty(evlist->all_cpus)) {
 		/* Plus one for each thread */
 		nr_mmaps += perf_thread_map__nr(evlist->threads);
 		/* Minus the per-thread CPU (-1) */
@@ -653,7 +653,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
 	if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
 		return -ENOMEM;
 
-	if (perf_cpu_map__empty(cpus))
+	if (perf_cpu_map__has_any_cpu_or_is_empty(cpus))
 		return mmap_per_thread(evlist, ops, mp);
 
 	return mmap_per_cpu(evlist, ops, mp);
diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c
index 8b51b008a81f..c07160953224 100644
--- a/tools/lib/perf/evsel.c
+++ b/tools/lib/perf/evsel.c
@@ -120,7 +120,7 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
 		static struct perf_cpu_map *empty_cpu_map;
 
 		if (empty_cpu_map == NULL) {
-			empty_cpu_map = perf_cpu_map__dummy_new();
+			empty_cpu_map = perf_cpu_map__new_any_cpu();
 			if (empty_cpu_map == NULL)
 				return -ENOMEM;
 		}
diff --git a/tools/lib/perf/include/internal/mmap.h b/tools/lib/perf/include/internal/mmap.h
index 5a062af8e9d8..5f08cab61ece 100644
--- a/tools/lib/perf/include/internal/mmap.h
+++ b/tools/lib/perf/include/internal/mmap.h
@@ -33,7 +33,8 @@ struct perf_mmap {
 	bool			 overwrite;
 	u64			 flush;
 	libperf_unmap_cb_t	 unmap_cb;
-	char			 event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
+	void			*event_copy;
+	size_t			 event_copy_sz;
 	struct perf_mmap	*next;
 };
 
diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index e38d859a384d..228c6c629b0c 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -19,10 +19,23 @@ struct perf_cache {
 struct perf_cpu_map;
 
 /**
- * perf_cpu_map__dummy_new - a map with a singular "any CPU"/dummy -1 value.
+ * perf_cpu_map__new_any_cpu - a map with a singular "any CPU"/dummy -1 value.
+ */
+LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_any_cpu(void);
+/**
+ * perf_cpu_map__new_online_cpus - a map read from
+ *                                 /sys/devices/system/cpu/online if
+ *                                 available. If reading wasn't possible a map
+ *                                 is created using the online processors
+ *                                 assuming the first 'n' processors are all
+ *                                 online.
+ */
+LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_online_cpus(void);
+/**
+ * perf_cpu_map__new - create a map from the given cpu_list such as "0-7". If no
+ *                     cpu_list argument is provided then
+ *                     perf_cpu_map__new_online_cpus is returned.
  */
-LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);
-LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void);
 LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
 LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file);
 LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
@@ -31,12 +44,23 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
 LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
 							 struct perf_cpu_map *other);
 LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__cpu - get the CPU value at the given index. Returns -1 if index
+ *                     is invalid.
+ */
 LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
+/**
+ * perf_cpu_map__nr - for an empty map returns 1, as perf_cpu_map__cpu returns a
+ *                    cpu of -1 for an invalid index, this makes an empty map
+ *                    look like it contains the "any CPU"/dummy value. Otherwise
+ *                    the result is the number CPUs in the map plus one if the
+ *                    "any CPU"/dummy value is present.
+ */
 LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
 /**
- * perf_cpu_map__empty - is map either empty or the "any CPU"/dummy value.
+ * perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value.
  */
-LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map);
+LIBPERF_API bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map);
 LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map);
 LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu);
 LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs,
@@ -51,6 +75,12 @@ LIBPERF_API bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map);
 	     (idx) < perf_cpu_map__nr(cpus);			\
 	     (idx)++, (cpu) = perf_cpu_map__cpu(cpus, idx))
 
+#define perf_cpu_map__for_each_cpu_skip_any(_cpu, idx, cpus)	\
+	for ((idx) = 0, (_cpu) = perf_cpu_map__cpu(cpus, idx);	\
+	     (idx) < perf_cpu_map__nr(cpus);			\
+	     (idx)++, (_cpu) = perf_cpu_map__cpu(cpus, idx))	\
+		if ((_cpu).cpu != -1)
+
 #define perf_cpu_map__for_each_idx(idx, cpus)				\
 	for ((idx) = 0; (idx) < perf_cpu_map__nr(cpus); (idx)++)
 
diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
index 190b56ae923a..10b3f3722642 100644
--- a/tools/lib/perf/libperf.map
+++ b/tools/lib/perf/libperf.map
@@ -1,15 +1,15 @@
 LIBPERF_0.0.1 {
 	global:
 		libperf_init;
-		perf_cpu_map__dummy_new;
-		perf_cpu_map__default_new;
+		perf_cpu_map__new_any_cpu;
+		perf_cpu_map__new_online_cpus;
 		perf_cpu_map__get;
 		perf_cpu_map__put;
 		perf_cpu_map__new;
 		perf_cpu_map__read;
 		perf_cpu_map__nr;
 		perf_cpu_map__cpu;
-		perf_cpu_map__empty;
+		perf_cpu_map__has_any_cpu_or_is_empty;
 		perf_cpu_map__max;
 		perf_cpu_map__has;
 		perf_thread_map__new_array;
diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c
index 2184814b37dd..0c903c2372c9 100644
--- a/tools/lib/perf/mmap.c
+++ b/tools/lib/perf/mmap.c
@@ -19,6 +19,7 @@
 void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
 		     bool overwrite, libperf_unmap_cb_t unmap_cb)
 {
+	/* Assume fields were zero initialized. */
 	map->fd = -1;
 	map->overwrite = overwrite;
 	map->unmap_cb  = unmap_cb;
@@ -51,13 +52,18 @@ int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
 
 void perf_mmap__munmap(struct perf_mmap *map)
 {
-	if (map && map->base != NULL) {
+	if (!map)
+		return;
+
+	zfree(&map->event_copy);
+	map->event_copy_sz = 0;
+	if (map->base) {
 		munmap(map->base, perf_mmap__mmap_len(map));
 		map->base = NULL;
 		map->fd = -1;
 		refcount_set(&map->refcnt, 0);
 	}
-	if (map && map->unmap_cb)
+	if (map->unmap_cb)
 		map->unmap_cb(map);
 }
 
@@ -223,9 +229,17 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map,
 		 */
 		if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
 			unsigned int offset = *startp;
-			unsigned int len = min(sizeof(*event), size), cpy;
+			unsigned int len = size, cpy;
 			void *dst = map->event_copy;
 
+			if (size > map->event_copy_sz) {
+				dst = realloc(map->event_copy, size);
+				if (!dst)
+					return NULL;
+				map->event_copy = dst;
+				map->event_copy_sz = size;
+			}
+
 			do {
 				cpy = min(map->mask + 1 - (offset & map->mask), len);
 				memcpy(dst, &data[offset & map->mask], cpy);
diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c
index 87b0510a556f..c998b1dae863 100644
--- a/tools/lib/perf/tests/test-cpumap.c
+++ b/tools/lib/perf/tests/test-cpumap.c
@@ -21,7 +21,7 @@ int test_cpumap(int argc, char **argv)
 
 	libperf_init(libperf_print);
 
-	cpus = perf_cpu_map__dummy_new();
+	cpus = perf_cpu_map__new_any_cpu();
 	if (!cpus)
 		return -1;
 
@@ -29,7 +29,7 @@ int test_cpumap(int argc, char **argv)
 	perf_cpu_map__put(cpus);
 	perf_cpu_map__put(cpus);
 
-	cpus = perf_cpu_map__default_new();
+	cpus = perf_cpu_map__new_online_cpus();
 	if (!cpus)
 		return -1;
 
diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c
index ed616fc19b4f..10f70cb41ff1 100644
--- a/tools/lib/perf/tests/test-evlist.c
+++ b/tools/lib/perf/tests/test-evlist.c
@@ -46,7 +46,7 @@ static int test_stat_cpu(void)
 	};
 	int err, idx;
 
-	cpus = perf_cpu_map__new(NULL);
+	cpus = perf_cpu_map__new_online_cpus();
 	__T("failed to create cpus", cpus);
 
 	evlist = perf_evlist__new();
@@ -261,7 +261,7 @@ static int test_mmap_thread(void)
 	threads = perf_thread_map__new_dummy();
 	__T("failed to create threads", threads);
 
-	cpus = perf_cpu_map__dummy_new();
+	cpus = perf_cpu_map__new_any_cpu();
 	__T("failed to create cpus", cpus);
 
 	perf_thread_map__set_pid(threads, 0, pid);
@@ -350,7 +350,7 @@ static int test_mmap_cpus(void)
 
 	attr.config = id;
 
-	cpus = perf_cpu_map__new(NULL);
+	cpus = perf_cpu_map__new_online_cpus();
 	__T("failed to create cpus", cpus);
 
 	evlist = perf_evlist__new();
diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c
index a11fc51bfb68..545ec3150546 100644
--- a/tools/lib/perf/tests/test-evsel.c
+++ b/tools/lib/perf/tests/test-evsel.c
@@ -27,7 +27,7 @@ static int test_stat_cpu(void)
 	};
 	int err, idx;
 
-	cpus = perf_cpu_map__new(NULL);
+	cpus = perf_cpu_map__new_online_cpus();
 	__T("failed to create cpus", cpus);
 
 	evsel = perf_evsel__new(&attr);
diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c
index adfbae27dc36..8561b0f01a24 100644
--- a/tools/lib/subcmd/help.c
+++ b/tools/lib/subcmd/help.c
@@ -52,11 +52,21 @@ void uniq(struct cmdnames *cmds)
 	if (!cmds->cnt)
 		return;
 
-	for (i = j = 1; i < cmds->cnt; i++)
-		if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name))
-			cmds->names[j++] = cmds->names[i];
-
+	for (i = 1; i < cmds->cnt; i++) {
+		if (!strcmp(cmds->names[i]->name, cmds->names[i-1]->name))
+			zfree(&cmds->names[i - 1]);
+	}
+	for (i = 0, j = 0; i < cmds->cnt; i++) {
+		if (cmds->names[i]) {
+			if (i == j)
+				j++;
+			else
+				cmds->names[j++] = cmds->names[i];
+		}
+	}
 	cmds->cnt = j;
+	while (j < i)
+		cmds->names[j++] = NULL;
 }
 
 void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index f533e76fb480..f5b81d439387 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -39,6 +39,9 @@ trace/beauty/generated/
 pmu-events/pmu-events.c
 pmu-events/jevents
 pmu-events/metric_test.log
+tests/shell/*.shellcheck_log
+tests/shell/coresight/*.shellcheck_log
+tests/shell/lib/*.shellcheck_log
 feature/
 libapi/
 libbpf/
@@ -49,3 +52,4 @@ libtraceevent/
 libtraceevent_plugins/
 fixdep
 Documentation/doc.dep
+python_ext_build/
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
index a97f95825b14..19cc179be9a7 100644
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt
@@ -25,6 +25,7 @@
 		q	quicker (less detailed) decoding
 		A	approximate IPC
 		Z	prefer to ignore timestamps (so-called "timeless" decoding)
+		T	use the timestamp trace as kernel time
 
 	The default is all events i.e. the same as --itrace=iybxwpe,
 	except for perf script where it is --itrace=ce
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index fe168e8165c8..b95524bea021 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -155,6 +155,17 @@ include::itrace.txt[]
 	stdio or stdio2 (Default: 0).  Note that this is about selection of
 	functions to display, not about lines within the function.
 
+--data-type[=TYPE_NAME]::
+	Display data type annotation instead of code.  It infers data type of
+	samples (if they are memory accessing instructions) using DWARF debug
+	information.  It can take an optional argument of data type name.  In
+	that case it'd show annotation for the type only, otherwise it'd show
+	all data types it finds.
+
+--type-stat::
+	Show stats for the data type annotation.
+
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index 0b4e79dbd3f6..379f9d7a8ab1 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -251,7 +251,8 @@ annotate.*::
 		addr2line binary to use for file names and line numbers.
 
 	annotate.objdump::
-		objdump binary to use for disassembly and annotations.
+		objdump binary to use for disassembly and annotations,
+		including in the 'perf test' command.
 
 	annotate.disassembler_style::
 		Use this to change the default disassembler style to some other value
@@ -722,7 +723,6 @@ session-<NAME>.*::
 		Defines new record session for daemon. The value is record's
 		command line without the 'record' keyword.
 
-
 SEE ALSO
 --------
 linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index d5f78e125efe..1b90575ee3c8 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -81,11 +81,13 @@ For Intel systems precise event sampling is implemented with PEBS
 which supports up to precise-level 2, and precise level 3 for
 some special cases
 
-On AMD systems it is implemented using IBS (up to precise-level 2).
-The precise modifier works with event types 0x76 (cpu-cycles, CPU
-clocks not halted) and 0xC1 (micro-ops retired). Both events map to
-IBS execution sampling (IBS op) with the IBS Op Counter Control bit
-(IbsOpCntCtl) set respectively (see the
+On AMD systems it is implemented using IBS OP (up to precise-level 2).
+Unlike Intel PEBS which provides levels of precision, AMD core pmu is
+inherently non-precise and IBS is inherently precise. (i.e. ibs_op//,
+ibs_op//p, ibs_op//pp and ibs_op//ppp are all same). The precise modifier
+works with event types 0x76 (cpu-cycles, CPU clocks not halted) and 0xC1
+(micro-ops retired). Both events map to IBS execution sampling (IBS op)
+with the IBS Op Counter Control bit (IbsOpCntCtl) set respectively (see the
 Core Complex (CCX) -> Processor x86 Core -> Instruction Based Sampling (IBS)
 section of the [AMD Processor Programming Reference (PPR)] relevant to the
 family, model and stepping of the processor being used).
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index 503abcba1438..f5938d616d75 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -119,7 +119,7 @@ INFO OPTIONS
 
 
 CONTENTION OPTIONS
---------------
+------------------
 
 -k::
 --key=<value>::
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 1889f66addf2..6015fdd08fb6 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -445,6 +445,10 @@ following filters are defined:
 		     4th-Gen Xeon+ server), the save branch type is unconditionally enabled
 		     when the taken branch stack sampling is enabled.
 	- priv: save privilege state during sampling in case binary is not available later
+	- counter: save occurrences of the event since the last branch entry. Currently, the
+		   feature is only supported by a newer CPU, e.g., Intel Sierra Forest and
+		   later platforms. An error out is expected if it's used on the unsupported
+		   kernel or CPUs.
 
 +
 The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index af068b4f1e5a..38f59ac064f7 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -118,6 +118,9 @@ OPTIONS
 	- retire_lat: On X86, this reports pipeline stall of this instruction compared
 	  to the previous instruction in cycles. And currently supported only on X86
 	- simd: Flags describing a SIMD operation. "e" for empty Arm SVE predicate. "p" for partial Arm SVE predicate
+	- type: Data type of sample memory access.
+	- typeoff: Offset in the data type of sample memory access.
+	- symoff: Offset in the symbol.
 
 	By default, comm, dso and symbol keys are used.
 	(i.e. --sort comm,dso,symbol)
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 8f789fa1242e..5af2e432b54f 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -422,7 +422,34 @@ See perf list output for the possible metrics and metricgroups.
 
 -A::
 --no-aggr::
-Do not aggregate counts across all monitored CPUs.
+--no-merge::
+Do not aggregate/merge counts across monitored CPUs or PMUs.
+
+When multiple events are created from a single event specification,
+stat will, by default, aggregate the event counts and show the result
+in a single row. This option disables that behavior and shows the
+individual events and counts.
+
+Multiple events are created from a single event specification when:
+
+1. PID monitoring isn't requested and the system has more than one
+   CPU. For example, a system with 8 SMT threads will have one event
+   opened on each thread and aggregation is performed across them.
+
+2. Prefix or glob wildcard matching is used for the PMU name. For
+   example, multiple memory controller PMUs may exist typically with a
+   suffix of _0, _1, etc. By default the event counts will all be
+   combined if the PMU is specified without the suffix such as
+   uncore_imc rather than uncore_imc_0.
+
+3. Aliases, which are listed immediately after the Kernel PMU events
+   by perf list, are used.
+
+--hybrid-merge::
+Merge core event counts from all core PMUs. In hybrid or big.LITTLE
+systems by default each core PMU will report its count
+separately. This option forces core PMU counts to be combined to give
+a behavior closer to having a single CPU type in the system.
 
 --topdown::
 Print top-down metrics supported by the CPU. This allows to determine
@@ -475,29 +502,6 @@ highlight 'tma_frontend_bound'. This metric may be drilled into with
 
 Error out if the input is higher than the supported max level.
 
---no-merge::
-Do not merge results from same PMUs.
-
-When multiple events are created from a single event specification,
-stat will, by default, aggregate the event counts and show the result
-in a single row. This option disables that behavior and shows
-the individual events and counts.
-
-Multiple events are created from a single event specification when:
-1. Prefix or glob matching is used for the PMU name.
-2. Aliases, which are listed immediately after the Kernel PMU events
-   by perf list, are used.
-
---hybrid-merge::
-Merge the hybrid event counts from all PMUs.
-
-For hybrid events, by default, the stat aggregates and reports the event
-counts per PMU. But sometimes, it's also useful to aggregate event counts
-from all PMUs. This option enables that behavior and reports the counts
-without PMUs.
-
-For non-hybrid events, it should be no effect.
-
 --smi-cost::
 Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
 
diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt
index ba3df49c169d..a7cf7bc2f968 100644
--- a/tools/perf/Documentation/perf.txt
+++ b/tools/perf/Documentation/perf.txt
@@ -64,6 +64,9 @@ OPTIONS
           perf-event-open  - Print perf_event_open() arguments and
                              return value
 
+--debug-file::
+	Write debug output to a specified file.
+
 DESCRIPTION
 -----------
 Performance counters for Linux are a new kernel-based subsystem
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index b3e6ed10f40c..aa55850fbc21 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -476,6 +476,11 @@ else
       else
         CFLAGS += -DHAVE_DWARF_GETLOCATIONS_SUPPORT
       endif # dwarf_getlocations
+      ifneq ($(feature-dwarf_getcfi), 1)
+        msg := $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.142);
+      else
+        CFLAGS += -DHAVE_DWARF_CFI_SUPPORT
+      endif # dwarf_getcfi
     endif # Dwarf support
   endif # libelf support
 endif # NO_LIBELF
@@ -680,15 +685,15 @@ ifndef BUILD_BPF_SKEL
 endif
 
 ifeq ($(BUILD_BPF_SKEL),1)
-  ifeq ($(filter -DHAVE_LIBBPF_SUPPORT, $(CFLAGS)),)
-    dummy := $(warning Warning: Disabled BPF skeletons as libbpf is required)
-    BUILD_BPF_SKEL := 0
-  else ifeq ($(filter -DHAVE_LIBELF_SUPPORT, $(CFLAGS)),)
+  ifeq ($(filter -DHAVE_LIBELF_SUPPORT, $(CFLAGS)),)
     dummy := $(warning Warning: Disabled BPF skeletons as libelf is required by bpftool)
     BUILD_BPF_SKEL := 0
   else ifeq ($(filter -DHAVE_ZLIB_SUPPORT, $(CFLAGS)),)
     dummy := $(warning Warning: Disabled BPF skeletons as zlib is required by bpftool)
     BUILD_BPF_SKEL := 0
+  else ifeq ($(filter -DHAVE_LIBBPF_SUPPORT, $(CFLAGS)),)
+    dummy := $(warning Warning: Disabled BPF skeletons as libbpf is required)
+    BUILD_BPF_SKEL := 0
   else ifeq ($(call get-executable,$(CLANG)),)
     dummy := $(warning Warning: Disabled BPF skeletons as clang ($(CLANG)) is missing)
     BUILD_BPF_SKEL := 0
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 058c9aecf608..27e7c478880f 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -134,6 +134,8 @@ include ../scripts/utilities.mak
 #	x86 instruction decoder - new instructions test
 #
 # Define GEN_VMLINUX_H to generate vmlinux.h from the BTF.
+#
+# Define NO_SHELLCHECK if you do not want to run shellcheck during build
 
 # As per kernel Makefile, avoid funny character set dependencies
 unexport LC_ALL
@@ -227,8 +229,15 @@ else
   force_fixdep := $(config)
 endif
 
+# Runs shellcheck on perf test shell scripts
+ifeq ($(NO_SHELLCHECK),1)
+  SHELLCHECK :=
+else
+  SHELLCHECK := $(shell which shellcheck 2> /dev/null)
+endif
+
 export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK
-export HOSTCC HOSTLD HOSTAR HOSTCFLAGS
+export HOSTCC HOSTLD HOSTAR HOSTCFLAGS SHELLCHECK
 
 include $(srctree)/tools/build/Makefile.include
 
@@ -1152,7 +1161,7 @@ bpf-skel-clean:
 
 clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(LIBPERF)-clean arm64-sysreg-defs-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean
 	$(call QUIET_CLEAN, core-objs)  $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-iostat $(LANG_BINDINGS)
-	$(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
+	$(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete -o -name '*.shellcheck_log' -delete
 	$(Q)$(RM) $(OUTPUT).config-detected
 	$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)$(LIBJVMTI).so
 	$(call QUIET_CLEAN, core-gen)   $(RM)  *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 2cf873d71dff..77e6663c1703 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -199,7 +199,7 @@ static int cs_etm_validate_config(struct auxtrace_record *itr,
 {
 	int i, err = -EINVAL;
 	struct perf_cpu_map *event_cpus = evsel->evlist->core.user_requested_cpus;
-	struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL);
+	struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
 
 	/* Set option of each CPU we have */
 	for (i = 0; i < cpu__max_cpu().cpu; i++) {
@@ -211,7 +211,7 @@ static int cs_etm_validate_config(struct auxtrace_record *itr,
 		 * program can run on any CPUs in this case, thus don't skip
 		 * validation.
 		 */
-		if (!perf_cpu_map__empty(event_cpus) &&
+		if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus) &&
 		    !perf_cpu_map__has(event_cpus, cpu))
 			continue;
 
@@ -435,7 +435,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
 	 * Also the case of per-cpu mmaps, need the contextID in order to be notified
 	 * when a context switch happened.
 	 */
-	if (!perf_cpu_map__empty(cpus)) {
+	if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
 		evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
 					   "timestamp", 1);
 		evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
@@ -461,7 +461,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
 	evsel->core.attr.sample_period = 1;
 
 	/* In per-cpu case, always need the time of mmap events etc */
-	if (!perf_cpu_map__empty(cpus))
+	if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus))
 		evsel__set_sample_bit(evsel, TIME);
 
 	err = cs_etm_validate_config(itr, cs_etm_evsel);
@@ -536,10 +536,10 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
 	int i;
 	int etmv3 = 0, etmv4 = 0, ete = 0;
 	struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus;
-	struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL);
+	struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
 
 	/* cpu map is not empty, we have specific CPUs to work with */
-	if (!perf_cpu_map__empty(event_cpus)) {
+	if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
 		for (i = 0; i < cpu__max_cpu().cpu; i++) {
 			struct perf_cpu cpu = { .cpu = i, };
 
@@ -802,7 +802,7 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
 	u64 nr_cpu, type;
 	struct perf_cpu_map *cpu_map;
 	struct perf_cpu_map *event_cpus = session->evlist->core.user_requested_cpus;
-	struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL);
+	struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
 	struct cs_etm_recording *ptr =
 			container_of(itr, struct cs_etm_recording, itr);
 	struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
@@ -814,7 +814,7 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
 		return -EINVAL;
 
 	/* If the cpu_map is empty all online CPUs are involved */
-	if (perf_cpu_map__empty(event_cpus)) {
+	if (perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
 		cpu_map = online_cpus;
 	} else {
 		/* Make sure all specified CPUs are online */
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index e3acc739bd00..51ccbfd3d246 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -232,7 +232,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
 	 * In the case of per-cpu mmaps, sample CPU for AUX event;
 	 * also enable the timestamp tracing for samples correlation.
 	 */
-	if (!perf_cpu_map__empty(cpus)) {
+	if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
 		evsel__set_sample_bit(arm_spe_evsel, CPU);
 		evsel__set_config_if_unset(arm_spe_pmu, arm_spe_evsel,
 					   "ts_enable", 1);
@@ -265,7 +265,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
 	tracking_evsel->core.attr.sample_period = 1;
 
 	/* In per-cpu case, always need the time of mmap events etc */
-	if (!perf_cpu_map__empty(cpus)) {
+	if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
 		evsel__set_sample_bit(tracking_evsel, TIME);
 		evsel__set_sample_bit(tracking_evsel, CPU);
 
diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c
index a2eef9ec5491..97037499152e 100644
--- a/tools/perf/arch/arm64/util/header.c
+++ b/tools/perf/arch/arm64/util/header.c
@@ -57,7 +57,7 @@ static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)
 
 int get_cpuid(char *buf, size_t sz)
 {
-	struct perf_cpu_map *cpus = perf_cpu_map__new(NULL);
+	struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus();
 	int ret;
 
 	if (!cpus)
diff --git a/tools/perf/arch/loongarch/annotate/instructions.c b/tools/perf/arch/loongarch/annotate/instructions.c
index 98e19c5366ac..21cc7e4149f7 100644
--- a/tools/perf/arch/loongarch/annotate/instructions.c
+++ b/tools/perf/arch/loongarch/annotate/instructions.c
@@ -61,10 +61,10 @@ static int loongarch_jump__parse(struct arch *arch, struct ins_operands *ops, st
 	const char *c = strchr(ops->raw, '#');
 	u64 start, end;
 
-	ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char);
-	ops->raw_func_start = strchr(ops->raw, '<');
+	ops->jump.raw_comment = strchr(ops->raw, arch->objdump.comment_char);
+	ops->jump.raw_func_start = strchr(ops->raw, '<');
 
-	if (ops->raw_func_start && c > ops->raw_func_start)
+	if (ops->jump.raw_func_start && c > ops->jump.raw_func_start)
 		c = NULL;
 
 	if (c++ != NULL)
diff --git a/tools/perf/arch/x86/tests/hybrid.c b/tools/perf/arch/x86/tests/hybrid.c
index eb152770f148..40f5d17fedab 100644
--- a/tools/perf/arch/x86/tests/hybrid.c
+++ b/tools/perf/arch/x86/tests/hybrid.c
@@ -47,7 +47,7 @@ static int test__hybrid_hw_group_event(struct evlist *evlist)
 	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW));
-	TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS));
+	TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_INSTRUCTIONS));
 	TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
 	return TEST_OK;
 }
@@ -102,7 +102,7 @@ static int test__hybrid_group_modifier1(struct evlist *evlist)
 	evsel = evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW));
-	TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS));
+	TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_INSTRUCTIONS));
 	TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
 	TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
 	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
@@ -163,6 +163,24 @@ static int test__checkevent_pmu(struct evlist *evlist)
 	return TEST_OK;
 }
 
+static int test__hybrid_hw_group_event_2(struct evlist *evlist)
+{
+	struct evsel *evsel, *leader;
+
+	evsel = leader = evlist__first(evlist);
+	TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
+	TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW));
+	TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+	TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+
+	evsel = evsel__next(evsel);
+	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
+	TEST_ASSERT_VAL("wrong config", evsel->core.attr.config == 0x3c);
+	TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
+	return TEST_OK;
+}
+
 struct evlist_test {
 	const char *name;
 	bool (*valid)(void);
@@ -171,27 +189,27 @@ struct evlist_test {
 
 static const struct evlist_test test__hybrid_events[] = {
 	{
-		.name  = "cpu_core/cpu-cycles/",
+		.name  = "cpu_core/cycles/",
 		.check = test__hybrid_hw_event_with_pmu,
 		/* 0 */
 	},
 	{
-		.name  = "{cpu_core/cpu-cycles/,cpu_core/instructions/}",
+		.name  = "{cpu_core/cycles/,cpu_core/branches/}",
 		.check = test__hybrid_hw_group_event,
 		/* 1 */
 	},
 	{
-		.name  = "{cpu-clock,cpu_core/cpu-cycles/}",
+		.name  = "{cpu-clock,cpu_core/cycles/}",
 		.check = test__hybrid_sw_hw_group_event,
 		/* 2 */
 	},
 	{
-		.name  = "{cpu_core/cpu-cycles/,cpu-clock}",
+		.name  = "{cpu_core/cycles/,cpu-clock}",
 		.check = test__hybrid_hw_sw_group_event,
 		/* 3 */
 	},
 	{
-		.name  = "{cpu_core/cpu-cycles/k,cpu_core/instructions/u}",
+		.name  = "{cpu_core/cycles/k,cpu_core/branches/u}",
 		.check = test__hybrid_group_modifier1,
 		/* 4 */
 	},
@@ -215,6 +233,11 @@ static const struct evlist_test test__hybrid_events[] = {
 		.check = test__hybrid_cache_event,
 		/* 8 */
 	},
+	{
+		.name  = "{cpu_core/cycles/,cpu_core/cpu-cycles/}",
+		.check = test__hybrid_hw_group_event_2,
+		/* 9 */
+	},
 };
 
 static int test_event(const struct evlist_test *e)
diff --git a/tools/perf/arch/x86/util/dwarf-regs.c b/tools/perf/arch/x86/util/dwarf-regs.c
index 530934805710..399c4a0a29d8 100644
--- a/tools/perf/arch/x86/util/dwarf-regs.c
+++ b/tools/perf/arch/x86/util/dwarf-regs.c
@@ -113,3 +113,41 @@ int regs_query_register_offset(const char *name)
 			return roff->offset;
 	return -EINVAL;
 }
+
+struct dwarf_regs_idx {
+	const char *name;
+	int idx;
+};
+
+static const struct dwarf_regs_idx x86_regidx_table[] = {
+	{ "rax", 0 }, { "eax", 0 }, { "ax", 0 }, { "al", 0 },
+	{ "rdx", 1 }, { "edx", 1 }, { "dx", 1 }, { "dl", 1 },
+	{ "rcx", 2 }, { "ecx", 2 }, { "cx", 2 }, { "cl", 2 },
+	{ "rbx", 3 }, { "edx", 3 }, { "bx", 3 }, { "bl", 3 },
+	{ "rsi", 4 }, { "esi", 4 }, { "si", 4 }, { "sil", 4 },
+	{ "rdi", 5 }, { "edi", 5 }, { "di", 5 }, { "dil", 5 },
+	{ "rbp", 6 }, { "ebp", 6 }, { "bp", 6 }, { "bpl", 6 },
+	{ "rsp", 7 }, { "esp", 7 }, { "sp", 7 }, { "spl", 7 },
+	{ "r8", 8 }, { "r8d", 8 }, { "r8w", 8 }, { "r8b", 8 },
+	{ "r9", 9 }, { "r9d", 9 }, { "r9w", 9 }, { "r9b", 9 },
+	{ "r10", 10 }, { "r10d", 10 }, { "r10w", 10 }, { "r10b", 10 },
+	{ "r11", 11 }, { "r11d", 11 }, { "r11w", 11 }, { "r11b", 11 },
+	{ "r12", 12 }, { "r12d", 12 }, { "r12w", 12 }, { "r12b", 12 },
+	{ "r13", 13 }, { "r13d", 13 }, { "r13w", 13 }, { "r13b", 13 },
+	{ "r14", 14 }, { "r14d", 14 }, { "r14w", 14 }, { "r14b", 14 },
+	{ "r15", 15 }, { "r15d", 15 }, { "r15w", 15 }, { "r15b", 15 },
+	{ "rip", DWARF_REG_PC },
+};
+
+int get_arch_regnum(const char *name)
+{
+	unsigned int i;
+
+	if (*name != '%')
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(x86_regidx_table); i++)
+		if (!strcmp(x86_regidx_table[i].name, name + 1))
+			return x86_regidx_table[i].idx;
+	return -ENOENT;
+}
diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c
index 5741ffe47312..e65b7dbe27fb 100644
--- a/tools/perf/arch/x86/util/event.c
+++ b/tools/perf/arch/x86/util/event.c
@@ -14,66 +14,79 @@
 
 #if defined(__x86_64__)
 
-int perf_event__synthesize_extra_kmaps(struct perf_tool *tool,
-				       perf_event__handler_t process,
-				       struct machine *machine)
+struct perf_event__synthesize_extra_kmaps_cb_args {
+	struct perf_tool *tool;
+	perf_event__handler_t process;
+	struct machine *machine;
+	union perf_event *event;
+};
+
+static int perf_event__synthesize_extra_kmaps_cb(struct map *map, void *data)
 {
-	int rc = 0;
-	struct map_rb_node *pos;
-	struct maps *kmaps = machine__kernel_maps(machine);
-	union perf_event *event = zalloc(sizeof(event->mmap) +
-					 machine->id_hdr_size);
+	struct perf_event__synthesize_extra_kmaps_cb_args *args = data;
+	union perf_event *event = args->event;
+	struct kmap *kmap;
+	size_t size;
 
-	if (!event) {
-		pr_debug("Not enough memory synthesizing mmap event "
-			 "for extra kernel maps\n");
-		return -1;
-	}
+	if (!__map__is_extra_kernel_map(map))
+		return 0;
 
-	maps__for_each_entry(kmaps, pos) {
-		struct kmap *kmap;
-		size_t size;
-		struct map *map = pos->map;
+	kmap = map__kmap(map);
 
-		if (!__map__is_extra_kernel_map(map))
-			continue;
+	size = sizeof(event->mmap) - sizeof(event->mmap.filename) +
+		      PERF_ALIGN(strlen(kmap->name) + 1, sizeof(u64)) +
+		      args->machine->id_hdr_size;
 
-		kmap = map__kmap(map);
+	memset(event, 0, size);
 
-		size = sizeof(event->mmap) - sizeof(event->mmap.filename) +
-		       PERF_ALIGN(strlen(kmap->name) + 1, sizeof(u64)) +
-		       machine->id_hdr_size;
+	event->mmap.header.type = PERF_RECORD_MMAP;
 
-		memset(event, 0, size);
+	/*
+	 * kernel uses 0 for user space maps, see kernel/perf_event.c
+	 * __perf_event_mmap
+	 */
+	if (machine__is_host(args->machine))
+		event->header.misc = PERF_RECORD_MISC_KERNEL;
+	else
+		event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
 
-		event->mmap.header.type = PERF_RECORD_MMAP;
+	event->mmap.header.size = size;
 
-		/*
-		 * kernel uses 0 for user space maps, see kernel/perf_event.c
-		 * __perf_event_mmap
-		 */
-		if (machine__is_host(machine))
-			event->header.misc = PERF_RECORD_MISC_KERNEL;
-		else
-			event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
+	event->mmap.start = map__start(map);
+	event->mmap.len   = map__size(map);
+	event->mmap.pgoff = map__pgoff(map);
+	event->mmap.pid   = args->machine->pid;
 
-		event->mmap.header.size = size;
+	strlcpy(event->mmap.filename, kmap->name, PATH_MAX);
 
-		event->mmap.start = map__start(map);
-		event->mmap.len   = map__size(map);
-		event->mmap.pgoff = map__pgoff(map);
-		event->mmap.pid   = machine->pid;
+	if (perf_tool__process_synth_event(args->tool, event, args->machine, args->process) != 0)
+		return -1;
 
-		strlcpy(event->mmap.filename, kmap->name, PATH_MAX);
+	return 0;
+}
 
-		if (perf_tool__process_synth_event(tool, event, machine,
-						   process) != 0) {
-			rc = -1;
-			break;
-		}
+int perf_event__synthesize_extra_kmaps(struct perf_tool *tool,
+				       perf_event__handler_t process,
+				       struct machine *machine)
+{
+	int rc;
+	struct maps *kmaps = machine__kernel_maps(machine);
+	struct perf_event__synthesize_extra_kmaps_cb_args args = {
+		.tool = tool,
+		.process = process,
+		.machine = machine,
+		.event = zalloc(sizeof(args.event->mmap) + machine->id_hdr_size),
+	};
+
+	if (!args.event) {
+		pr_debug("Not enough memory synthesizing mmap event "
+			 "for extra kernel maps\n");
+		return -1;
 	}
 
-	free(event);
+	rc = maps__for_each_map(kmaps, perf_event__synthesize_extra_kmaps_cb, &args);
+
+	free(args.event);
 	return rc;
 }
 
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index d2c8cac11470..af8ae4647585 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -143,7 +143,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
 	if (!opts->full_auxtrace)
 		return 0;
 
-	if (opts->full_auxtrace && !perf_cpu_map__empty(cpus)) {
+	if (opts->full_auxtrace && !perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
 		pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n");
 		return -EINVAL;
 	}
@@ -224,7 +224,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
 		 * In the case of per-cpu mmaps, we need the CPU on the
 		 * AUX event.
 		 */
-		if (!perf_cpu_map__empty(cpus))
+		if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus))
 			evsel__set_sample_bit(intel_bts_evsel, CPU);
 	}
 
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index fa0c718b9e72..d199619df3ab 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -369,7 +369,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
 			ui__warning("Intel Processor Trace: TSC not available\n");
 	}
 
-	per_cpu_mmaps = !perf_cpu_map__empty(session->evlist->core.user_requested_cpus);
+	per_cpu_mmaps = !perf_cpu_map__has_any_cpu_or_is_empty(session->evlist->core.user_requested_cpus);
 
 	auxtrace_info->type = PERF_AUXTRACE_INTEL_PT;
 	auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type;
@@ -774,7 +774,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
 	 * Per-cpu recording needs sched_switch events to distinguish different
 	 * threads.
 	 */
-	if (have_timing_info && !perf_cpu_map__empty(cpus) &&
+	if (have_timing_info && !perf_cpu_map__has_any_cpu_or_is_empty(cpus) &&
 	    !record_opts__no_switch_events(opts)) {
 		if (perf_can_record_switch_events()) {
 			bool cpu_wide = !target__none(&opts->target) &&
@@ -832,7 +832,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
 		 * In the case of per-cpu mmaps, we need the CPU on the
 		 * AUX event.
 		 */
-		if (!perf_cpu_map__empty(cpus))
+		if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus))
 			evsel__set_sample_bit(intel_pt_evsel, CPU);
 	}
 
@@ -858,7 +858,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
 			tracking_evsel->immediate = true;
 
 		/* In per-cpu case, always need the time of mmap events etc */
-		if (!perf_cpu_map__empty(cpus)) {
+		if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
 			evsel__set_sample_bit(tracking_evsel, TIME);
 			/* And the CPU for switch events */
 			evsel__set_sample_bit(tracking_evsel, CPU);
@@ -870,7 +870,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
 	 * Warn the user when we do not have enough information to decode i.e.
 	 * per-cpu with no sched_switch (except workload-only).
 	 */
-	if (!ptr->have_sched_switch && !perf_cpu_map__empty(cpus) &&
+	if (!ptr->have_sched_switch && !perf_cpu_map__has_any_cpu_or_is_empty(cpus) &&
 	    !target__none(&opts->target) &&
 	    !intel_pt_evsel->core.attr.exclude_user)
 		ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n");
diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c
index 6bfffe83dde9..d3db73dac66a 100644
--- a/tools/perf/bench/epoll-ctl.c
+++ b/tools/perf/bench/epoll-ctl.c
@@ -330,7 +330,7 @@ int bench_epoll_ctl(int argc, const char **argv)
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
-	cpu = perf_cpu_map__new(NULL);
+	cpu = perf_cpu_map__new_online_cpus();
 	if (!cpu)
 		goto errmem;
 
diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c
index cb5174b53940..06bb3187660a 100644
--- a/tools/perf/bench/epoll-wait.c
+++ b/tools/perf/bench/epoll-wait.c
@@ -444,7 +444,7 @@ int bench_epoll_wait(int argc, const char **argv)
 	act.sa_sigaction = toggle_done;
 	sigaction(SIGINT, &act, NULL);
 
-	cpu = perf_cpu_map__new(NULL);
+	cpu = perf_cpu_map__new_online_cpus();
 	if (!cpu)
 		goto errmem;
 
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index 2005a3fa3026..0c69d20efa32 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -138,7 +138,7 @@ int bench_futex_hash(int argc, const char **argv)
 		exit(EXIT_FAILURE);
 	}
 
-	cpu = perf_cpu_map__new(NULL);
+	cpu = perf_cpu_map__new_online_cpus();
 	if (!cpu)
 		goto errmem;
 
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 092cbd52db82..7a4973346180 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -172,7 +172,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
 	if (argc)
 		goto err;
 
-	cpu = perf_cpu_map__new(NULL);
+	cpu = perf_cpu_map__new_online_cpus();
 	if (!cpu)
 		err(EXIT_FAILURE, "calloc");
 
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index c0035990a33c..d9ad736c1a3e 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -174,7 +174,7 @@ int bench_futex_requeue(int argc, const char **argv)
 	if (argc)
 		goto err;
 
-	cpu = perf_cpu_map__new(NULL);
+	cpu = perf_cpu_map__new_online_cpus();
 	if (!cpu)
 		err(EXIT_FAILURE, "cpu_map__new");
 
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index 5ab0234d74e6..b66df553e561 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -264,7 +264,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
 			err(EXIT_FAILURE, "mlockall");
 	}
 
-	cpu = perf_cpu_map__new(NULL);
+	cpu = perf_cpu_map__new_online_cpus();
 	if (!cpu)
 		err(EXIT_FAILURE, "calloc");
 
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 18a5894af8bb..690fd6d3da13 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -149,7 +149,7 @@ int bench_futex_wake(int argc, const char **argv)
 		exit(EXIT_FAILURE);
 	}
 
-	cpu = perf_cpu_map__new(NULL);
+	cpu = perf_cpu_map__new_online_cpus();
 	if (!cpu)
 		err(EXIT_FAILURE, "calloc");
 
diff --git a/tools/perf/bench/sched-seccomp-notify.c b/tools/perf/bench/sched-seccomp-notify.c
index a01c40131493..269c1f4a6852 100644
--- a/tools/perf/bench/sched-seccomp-notify.c
+++ b/tools/perf/bench/sched-seccomp-notify.c
@@ -32,7 +32,7 @@ static bool sync_mode;
 static const struct option options[] = {
 	OPT_U64('l', "loop",	&loops,		"Specify number of loops"),
 	OPT_BOOLEAN('s', "sync-mode", &sync_mode,
-		    "Enable the synchronious mode for seccomp notifications"),
+		    "Enable the synchronous mode for seccomp notifications"),
 	OPT_END()
 };
 
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index aeeb801f1ed7..6c1cc797692d 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -20,6 +20,7 @@
 #include "util/evlist.h"
 #include "util/evsel.h"
 #include "util/annotate.h"
+#include "util/annotate-data.h"
 #include "util/event.h"
 #include <subcmd/parse-options.h>
 #include "util/parse-events.h"
@@ -45,7 +46,6 @@
 struct perf_annotate {
 	struct perf_tool tool;
 	struct perf_session *session;
-	struct annotation_options opts;
 #ifdef HAVE_SLANG_SUPPORT
 	bool	   use_tui;
 #endif
@@ -56,9 +56,13 @@ struct perf_annotate {
 	bool	   skip_missing;
 	bool	   has_br_stack;
 	bool	   group_set;
+	bool	   data_type;
+	bool	   type_stat;
+	bool	   insn_stat;
 	float	   min_percent;
 	const char *sym_hist_filter;
 	const char *cpu_list;
+	const char *target_data_type;
 	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 };
 
@@ -94,6 +98,7 @@ static void process_basic_block(struct addr_map_symbol *start,
 	struct annotation *notes = sym ? symbol__annotation(sym) : NULL;
 	struct block_range_iter iter;
 	struct block_range *entry;
+	struct annotated_branch *branch;
 
 	/*
 	 * Sanity; NULL isn't executable and the CPU cannot execute backwards
@@ -105,6 +110,8 @@ static void process_basic_block(struct addr_map_symbol *start,
 	if (!block_range_iter__valid(&iter))
 		return;
 
+	branch = annotation__get_branch(notes);
+
 	/*
 	 * First block in range is a branch target.
 	 */
@@ -118,8 +125,8 @@ static void process_basic_block(struct addr_map_symbol *start,
 		entry->coverage++;
 		entry->sym = sym;
 
-		if (notes)
-			notes->max_coverage = max(notes->max_coverage, entry->coverage);
+		if (branch)
+			branch->max_coverage = max(branch->max_coverage, entry->coverage);
 
 	} while (block_range_iter__next(&iter));
 
@@ -315,9 +322,153 @@ static int hist_entry__tty_annotate(struct hist_entry *he,
 				    struct perf_annotate *ann)
 {
 	if (!ann->use_stdio2)
-		return symbol__tty_annotate(&he->ms, evsel, &ann->opts);
+		return symbol__tty_annotate(&he->ms, evsel);
+
+	return symbol__tty_annotate2(&he->ms, evsel);
+}
+
+static void print_annotated_data_header(struct hist_entry *he, struct evsel *evsel)
+{
+	struct dso *dso = map__dso(he->ms.map);
+	int nr_members = 1;
+	int nr_samples = he->stat.nr_events;
+
+	if (evsel__is_group_event(evsel)) {
+		struct hist_entry *pair;
+
+		list_for_each_entry(pair, &he->pairs.head, pairs.node)
+			nr_samples += pair->stat.nr_events;
+	}
+
+	printf("Annotate type: '%s' in %s (%d samples):\n",
+	       he->mem_type->self.type_name, dso->name, nr_samples);
+
+	if (evsel__is_group_event(evsel)) {
+		struct evsel *pos;
+		int i = 0;
+
+		for_each_group_evsel(pos, evsel)
+			printf(" event[%d] = %s\n", i++, pos->name);
+
+		nr_members = evsel->core.nr_members;
+	}
+
+	printf("============================================================================\n");
+	printf("%*s %10s %10s  %s\n", 11 * nr_members, "samples", "offset", "size", "field");
+}
+
+static void print_annotated_data_type(struct annotated_data_type *mem_type,
+				      struct annotated_member *member,
+				      struct evsel *evsel, int indent)
+{
+	struct annotated_member *child;
+	struct type_hist *h = mem_type->histograms[evsel->core.idx];
+	int i, nr_events = 1, samples = 0;
+
+	for (i = 0; i < member->size; i++)
+		samples += h->addr[member->offset + i].nr_samples;
+	printf(" %10d", samples);
 
-	return symbol__tty_annotate2(&he->ms, evsel, &ann->opts);
+	if (evsel__is_group_event(evsel)) {
+		struct evsel *pos;
+
+		for_each_group_member(pos, evsel) {
+			h = mem_type->histograms[pos->core.idx];
+
+			samples = 0;
+			for (i = 0; i < member->size; i++)
+				samples += h->addr[member->offset + i].nr_samples;
+			printf(" %10d", samples);
+		}
+		nr_events = evsel->core.nr_members;
+	}
+
+	printf(" %10d %10d  %*s%s\t%s",
+	       member->offset, member->size, indent, "", member->type_name,
+	       member->var_name ?: "");
+
+	if (!list_empty(&member->children))
+		printf(" {\n");
+
+	list_for_each_entry(child, &member->children, node)
+		print_annotated_data_type(mem_type, child, evsel, indent + 4);
+
+	if (!list_empty(&member->children))
+		printf("%*s}", 11 * nr_events + 24 + indent, "");
+	printf(";\n");
+}
+
+static void print_annotate_data_stat(struct annotated_data_stat *s)
+{
+#define PRINT_STAT(fld) if (s->fld) printf("%10d : %s\n", s->fld, #fld)
+
+	int bad = s->no_sym +
+			s->no_insn +
+			s->no_insn_ops +
+			s->no_mem_ops +
+			s->no_reg +
+			s->no_dbginfo +
+			s->no_cuinfo +
+			s->no_var +
+			s->no_typeinfo +
+			s->invalid_size +
+			s->bad_offset;
+	int ok = s->total - bad;
+
+	printf("Annotate data type stats:\n");
+	printf("total %d, ok %d (%.1f%%), bad %d (%.1f%%)\n",
+		s->total, ok, 100.0 * ok / (s->total ?: 1), bad, 100.0 * bad / (s->total ?: 1));
+	printf("-----------------------------------------------------------\n");
+	PRINT_STAT(no_sym);
+	PRINT_STAT(no_insn);
+	PRINT_STAT(no_insn_ops);
+	PRINT_STAT(no_mem_ops);
+	PRINT_STAT(no_reg);
+	PRINT_STAT(no_dbginfo);
+	PRINT_STAT(no_cuinfo);
+	PRINT_STAT(no_var);
+	PRINT_STAT(no_typeinfo);
+	PRINT_STAT(invalid_size);
+	PRINT_STAT(bad_offset);
+	printf("\n");
+
+#undef PRINT_STAT
+}
+
+static void print_annotate_item_stat(struct list_head *head, const char *title)
+{
+	struct annotated_item_stat *istat, *pos, *iter;
+	int total_good, total_bad, total;
+	int sum1, sum2;
+	LIST_HEAD(tmp);
+
+	/* sort the list by count */
+	list_splice_init(head, &tmp);
+	total_good = total_bad = 0;
+
+	list_for_each_entry_safe(istat, pos, &tmp, list) {
+		total_good += istat->good;
+		total_bad += istat->bad;
+		sum1 = istat->good + istat->bad;
+
+		list_for_each_entry(iter, head, list) {
+			sum2 = iter->good + iter->bad;
+			if (sum1 > sum2)
+				break;
+		}
+		list_move_tail(&istat->list, &iter->list);
+	}
+	total = total_good + total_bad;
+
+	printf("Annotate %s stats\n", title);
+	printf("total %d, ok %d (%.1f%%), bad %d (%.1f%%)\n\n", total,
+	       total_good, 100.0 * total_good / (total ?: 1),
+	       total_bad, 100.0 * total_bad / (total ?: 1));
+	printf("  %-10s: %5s %5s\n", "Name", "Good", "Bad");
+	printf("-----------------------------------------------------------\n");
+	list_for_each_entry(istat, head, list)
+		printf("  %-10s: %5d %5d\n", istat->name, istat->good, istat->bad);
+	printf("\n");
 }
 
 static void hists__find_annotations(struct hists *hists,
@@ -327,6 +478,11 @@ static void hists__find_annotations(struct hists *hists,
 	struct rb_node *nd = rb_first_cached(&hists->entries), *next;
 	int key = K_RIGHT;
 
+	if (ann->type_stat)
+		print_annotate_data_stat(&ann_data_stat);
+	if (ann->insn_stat)
+		print_annotate_item_stat(&ann_insn_stat, "Instruction");
+
 	while (nd) {
 		struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
 		struct annotation *notes;
@@ -359,11 +515,38 @@ find_next:
 			continue;
 		}
 
+		if (ann->data_type) {
+			/* skip unknown type */
+			if (he->mem_type->histograms == NULL)
+				goto find_next;
+
+			if (ann->target_data_type) {
+				const char *type_name = he->mem_type->self.type_name;
+
+				/* skip 'struct ' prefix in the type name */
+				if (strncmp(ann->target_data_type, "struct ", 7) &&
+				    !strncmp(type_name, "struct ", 7))
+					type_name += 7;
+
+				/* skip 'union ' prefix in the type name */
+				if (strncmp(ann->target_data_type, "union ", 6) &&
+				    !strncmp(type_name, "union ", 6))
+					type_name += 6;
+
+				if (strcmp(ann->target_data_type, type_name))
+					goto find_next;
+			}
+
+			print_annotated_data_header(he, evsel);
+			print_annotated_data_type(he->mem_type, &he->mem_type->self, evsel, 0);
+			printf("\n");
+			goto find_next;
+		}
+
 		if (use_browser == 2) {
 			int ret;
 			int (*annotate)(struct hist_entry *he,
 					struct evsel *evsel,
-					struct annotation_options *options,
 					struct hist_browser_timer *hbt);
 
 			annotate = dlsym(perf_gtk_handle,
@@ -373,14 +556,14 @@ find_next:
 				return;
 			}
 
-			ret = annotate(he, evsel, &ann->opts, NULL);
+			ret = annotate(he, evsel, NULL);
 			if (!ret || !ann->skip_missing)
 				return;
 
 			/* skip missing symbols */
 			nd = rb_next(nd);
 		} else if (use_browser == 1) {
-			key = hist_entry__tui_annotate(he, evsel, NULL, &ann->opts);
+			key = hist_entry__tui_annotate(he, evsel, NULL);
 
 			switch (key) {
 			case -1:
@@ -422,9 +605,9 @@ static int __cmd_annotate(struct perf_annotate *ann)
 			goto out;
 	}
 
-	if (!ann->opts.objdump_path) {
+	if (!annotate_opts.objdump_path) {
 		ret = perf_env__lookup_objdump(&session->header.env,
-					       &ann->opts.objdump_path);
+					       &annotate_opts.objdump_path);
 		if (ret)
 			goto out;
 	}
@@ -457,8 +640,20 @@ static int __cmd_annotate(struct perf_annotate *ann)
 			evsel__reset_sample_bit(pos, CALLCHAIN);
 			evsel__output_resort(pos, NULL);
 
-			if (symbol_conf.event_group && !evsel__is_group_leader(pos))
+			/*
+			 * An event group needs to display other events too.
+			 * Let's delay printing until other events are processed.
+			 */
+			if (symbol_conf.event_group) {
+				if (!evsel__is_group_leader(pos)) {
+					struct hists *leader_hists;
+
+					leader_hists = evsel__hists(evsel__leader(pos));
+					hists__match(leader_hists, hists);
+					hists__link(leader_hists, hists);
+				}
 				continue;
+			}
 
 			hists__find_annotations(hists, pos, ann);
 		}
@@ -469,6 +664,20 @@ static int __cmd_annotate(struct perf_annotate *ann)
 		goto out;
 	}
 
+	/* Display group events together */
+	evlist__for_each_entry(session->evlist, pos) {
+		struct hists *hists = evsel__hists(pos);
+		u32 nr_samples = hists->stats.nr_samples;
+
+		if (nr_samples == 0)
+			continue;
+
+		if (!symbol_conf.event_group || !evsel__is_group_leader(pos))
+			continue;
+
+		hists__find_annotations(hists, pos, ann);
+	}
+
 	if (use_browser == 2) {
 		void (*show_annotations)(void);
 
@@ -495,6 +704,17 @@ static int parse_percent_limit(const struct option *opt, const char *str,
 	return 0;
 }
 
+static int parse_data_type(const struct option *opt, const char *str, int unset)
+{
+	struct perf_annotate *ann = opt->value;
+
+	ann->data_type = !unset;
+	if (str)
+		ann->target_data_type = strdup(str);
+
+	return 0;
+}
+
 static const char * const annotate_usage[] = {
 	"perf annotate [<options>]",
 	NULL
@@ -558,9 +778,9 @@ int cmd_annotate(int argc, const char **argv)
 		   "file", "vmlinux pathname"),
 	OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
 		    "load module symbols - WARNING: use only with -k and LIVE kernel"),
-	OPT_BOOLEAN('l', "print-line", &annotate.opts.print_lines,
+	OPT_BOOLEAN('l', "print-line", &annotate_opts.print_lines,
 		    "print matching source lines (may be slow)"),
-	OPT_BOOLEAN('P', "full-paths", &annotate.opts.full_path,
+	OPT_BOOLEAN('P', "full-paths", &annotate_opts.full_path,
 		    "Don't shorten the displayed pathnames"),
 	OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing,
 		    "Skip symbols that cannot be annotated"),
@@ -571,15 +791,15 @@ int cmd_annotate(int argc, const char **argv)
 	OPT_CALLBACK(0, "symfs", NULL, "directory",
 		     "Look for files with symbols relative to this directory",
 		     symbol__config_symfs),
-	OPT_BOOLEAN(0, "source", &annotate.opts.annotate_src,
+	OPT_BOOLEAN(0, "source", &annotate_opts.annotate_src,
 		    "Interleave source code with assembly code (default)"),
-	OPT_BOOLEAN(0, "asm-raw", &annotate.opts.show_asm_raw,
+	OPT_BOOLEAN(0, "asm-raw", &annotate_opts.show_asm_raw,
 		    "Display raw encoding of assembly instructions (default)"),
 	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
 		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
-	OPT_STRING(0, "prefix", &annotate.opts.prefix, "prefix",
+	OPT_STRING(0, "prefix", &annotate_opts.prefix, "prefix",
 		    "Add prefix to source file path names in programs (with --prefix-strip)"),
-	OPT_STRING(0, "prefix-strip", &annotate.opts.prefix_strip, "N",
+	OPT_STRING(0, "prefix-strip", &annotate_opts.prefix_strip, "N",
 		    "Strip first N entries of source file path name in programs (with --prefix)"),
 	OPT_STRING(0, "objdump", &objdump_path, "path",
 		   "objdump binary to use for disassembly and annotations"),
@@ -598,7 +818,7 @@ int cmd_annotate(int argc, const char **argv)
 	OPT_CALLBACK_DEFAULT(0, "stdio-color", NULL, "mode",
 			     "'always' (default), 'never' or 'auto' only applicable to --stdio mode",
 			     stdio__config_color, "always"),
-	OPT_CALLBACK(0, "percent-type", &annotate.opts, "local-period",
+	OPT_CALLBACK(0, "percent-type", &annotate_opts, "local-period",
 		     "Set percent type local/global-period/hits",
 		     annotate_parse_percent_type),
 	OPT_CALLBACK(0, "percent-limit", &annotate, "percent",
@@ -606,7 +826,13 @@ int cmd_annotate(int argc, const char **argv)
 	OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
 			    "Instruction Tracing options\n" ITRACE_HELP,
 			    itrace_parse_synth_opts),
-
+	OPT_CALLBACK_OPTARG(0, "data-type", &annotate, NULL, "name",
+			    "Show data type annotate for the memory accesses",
+			    parse_data_type),
+	OPT_BOOLEAN(0, "type-stat", &annotate.type_stat,
+		    "Show stats for the data type annotation"),
+	OPT_BOOLEAN(0, "insn-stat", &annotate.insn_stat,
+		    "Show instruction stats for the data type annotation"),
 	OPT_END()
 	};
 	int ret;
@@ -614,13 +840,13 @@ int cmd_annotate(int argc, const char **argv)
 	set_option_flag(options, 0, "show-total-period", PARSE_OPT_EXCLUSIVE);
 	set_option_flag(options, 0, "show-nr-samples", PARSE_OPT_EXCLUSIVE);
 
-	annotation_options__init(&annotate.opts);
+	annotation_options__init();
 
 	ret = hists__init();
 	if (ret < 0)
 		return ret;
 
-	annotation_config__init(&annotate.opts);
+	annotation_config__init();
 
 	argc = parse_options(argc, argv, options, annotate_usage, 0);
 	if (argc) {
@@ -635,13 +861,13 @@ int cmd_annotate(int argc, const char **argv)
 	}
 
 	if (disassembler_style) {
-		annotate.opts.disassembler_style = strdup(disassembler_style);
-		if (!annotate.opts.disassembler_style)
+		annotate_opts.disassembler_style = strdup(disassembler_style);
+		if (!annotate_opts.disassembler_style)
 			return -ENOMEM;
 	}
 	if (objdump_path) {
-		annotate.opts.objdump_path = strdup(objdump_path);
-		if (!annotate.opts.objdump_path)
+		annotate_opts.objdump_path = strdup(objdump_path);
+		if (!annotate_opts.objdump_path)
 			return -ENOMEM;
 	}
 	if (addr2line_path) {
@@ -650,7 +876,7 @@ int cmd_annotate(int argc, const char **argv)
 			return -ENOMEM;
 	}
 
-	if (annotate_check_args(&annotate.opts) < 0)
+	if (annotate_check_args() < 0)
 		return -EINVAL;
 
 #ifdef HAVE_GTK2_SUPPORT
@@ -660,6 +886,13 @@ int cmd_annotate(int argc, const char **argv)
 	}
 #endif
 
+#ifndef HAVE_DWARF_GETLOCATIONS_SUPPORT
+	if (annotate.data_type) {
+		pr_err("Error: Data type profiling is disabled due to missing DWARF support\n");
+		return -ENOTSUP;
+	}
+#endif
+
 	ret = symbol__validate_sym_arguments();
 	if (ret)
 		return ret;
@@ -702,6 +935,14 @@ int cmd_annotate(int argc, const char **argv)
 		use_browser = 2;
 #endif
 
+	/* FIXME: only support stdio for now */
+	if (annotate.data_type) {
+		use_browser = 0;
+		annotate_opts.annotate_src = false;
+		symbol_conf.annotate_data_member = true;
+		symbol_conf.annotate_data_sample = true;
+	}
+
 	setup_browser(true);
 
 	/*
@@ -709,7 +950,10 @@ int cmd_annotate(int argc, const char **argv)
 	 * symbol, we do not care about the processes in annotate,
 	 * set sort order to avoid repeated output.
 	 */
-	sort_order = "dso,symbol";
+	if (annotate.data_type)
+		sort_order = "dso,type";
+	else
+		sort_order = "dso,symbol";
 
 	/*
 	 * Set SORT_MODE__BRANCH so that annotate display IPC/Cycle
@@ -731,7 +975,7 @@ out_delete:
 #ifndef NDEBUG
 	perf_session__delete(annotate.session);
 #endif
-	annotation_options__exit(&annotate.opts);
+	annotation_options__exit();
 
 	return ret;
 }
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index a4cf9de7a7b5..f78eea9e2153 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2320,7 +2320,7 @@ static int setup_nodes(struct perf_session *session)
 		nodes[node] = set;
 
 		/* empty node, skip */
-		if (perf_cpu_map__empty(map))
+		if (perf_cpu_map__has_any_cpu_or_is_empty(map))
 			continue;
 
 		perf_cpu_map__for_each_cpu(cpu, idx, map) {
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index ac2e6c75f912..eb30c8eca488 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -333,7 +333,7 @@ static int set_tracing_func_irqinfo(struct perf_ftrace *ftrace)
 
 static int reset_tracing_cpu(void)
 {
-	struct perf_cpu_map *cpumap = perf_cpu_map__new(NULL);
+	struct perf_cpu_map *cpumap = perf_cpu_map__new_online_cpus();
 	int ret;
 
 	ret = set_tracing_cpumask(cpumap);
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index c8cf2fdd9cff..eb3ef5c24b66 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -2265,6 +2265,12 @@ int cmd_inject(int argc, const char **argv)
 		"perf inject [<options>]",
 		NULL
 	};
+
+	if (!inject.itrace_synth_opts.set) {
+		/* Disable eager loading of kernel symbols that adds overhead to perf inject. */
+		symbol_conf.lazy_load_kernel_maps = true;
+	}
+
 #ifndef HAVE_JITDUMP
 	set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
 #endif
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index a3ff2f4edbaa..230461280e45 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -2285,8 +2285,10 @@ setup_args:
 		else
 			ev_name = strdup(contention_tracepoints[j].name);
 
-		if (!ev_name)
+		if (!ev_name) {
+			free(rec_argv);
 			return -ENOMEM;
+		}
 
 		rec_argv[i++] = "-e";
 		rec_argv[i++] = ev_name;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index dcf288a4fb9a..91e6828c38cc 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -270,7 +270,7 @@ static int record__write(struct record *rec, struct mmap *map __maybe_unused,
 
 static int record__aio_enabled(struct record *rec);
 static int record__comp_enabled(struct record *rec);
-static size_t zstd_compress(struct perf_session *session, struct mmap *map,
+static ssize_t zstd_compress(struct perf_session *session, struct mmap *map,
 			    void *dst, size_t dst_size, void *src, size_t src_size);
 
 #ifdef HAVE_AIO_SUPPORT
@@ -405,9 +405,13 @@ static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size
 	 */
 
 	if (record__comp_enabled(aio->rec)) {
-		size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
-				     mmap__mmap_len(map) - aio->size,
-				     buf, size);
+		ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
+						   mmap__mmap_len(map) - aio->size,
+						   buf, size);
+		if (compressed < 0)
+			return (int)compressed;
+
+		size = compressed;
 	} else {
 		memcpy(aio->data + aio->size, buf, size);
 	}
@@ -633,7 +637,13 @@ static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
 	struct record *rec = to;
 
 	if (record__comp_enabled(rec)) {
-		size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size);
+		ssize_t compressed = zstd_compress(rec->session, map, map->data,
+						   mmap__mmap_len(map), bf, size);
+
+		if (compressed < 0)
+			return (int)compressed;
+
+		size = compressed;
 		bf   = map->data;
 	}
 
@@ -1350,7 +1360,7 @@ static int record__open(struct record *rec)
 	evlist__for_each_entry(evlist, pos) {
 try_again:
 		if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
-			if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
+			if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) {
 				if (verbose > 0)
 					ui__warning("%s\n", msg);
 				goto try_again;
@@ -1527,10 +1537,10 @@ static size_t process_comp_header(void *record, size_t increment)
 	return size;
 }
 
-static size_t zstd_compress(struct perf_session *session, struct mmap *map,
+static ssize_t zstd_compress(struct perf_session *session, struct mmap *map,
 			    void *dst, size_t dst_size, void *src, size_t src_size)
 {
-	size_t compressed;
+	ssize_t compressed;
 	size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
 	struct zstd_data *zstd_data = &session->zstd_data;
 
@@ -1539,6 +1549,8 @@ static size_t zstd_compress(struct perf_session *session, struct mmap *map,
 
 	compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
 						     max_record_size, process_comp_header);
+	if (compressed < 0)
+		return compressed;
 
 	if (map && map->file) {
 		thread->bytes_transferred += src_size;
@@ -1912,21 +1924,13 @@ static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
 static void record__read_lost_samples(struct record *rec)
 {
 	struct perf_session *session = rec->session;
-	struct perf_record_lost_samples *lost;
+	struct perf_record_lost_samples *lost = NULL;
 	struct evsel *evsel;
 
 	/* there was an error during record__open */
 	if (session->evlist == NULL)
 		return;
 
-	lost = zalloc(PERF_SAMPLE_MAX_SIZE);
-	if (lost == NULL) {
-		pr_debug("Memory allocation failed\n");
-		return;
-	}
-
-	lost->header.type = PERF_RECORD_LOST_SAMPLES;
-
 	evlist__for_each_entry(session->evlist, evsel) {
 		struct xyarray *xy = evsel->core.sample_id;
 		u64 lost_count;
@@ -1949,6 +1953,15 @@ static void record__read_lost_samples(struct record *rec)
 				}
 
 				if (count.lost) {
+					if (!lost) {
+						lost = zalloc(sizeof(*lost) +
+							      session->machines.host.id_hdr_size);
+						if (!lost) {
+							pr_debug("Memory allocation failed\n");
+							return;
+						}
+						lost->header.type = PERF_RECORD_LOST_SAMPLES;
+					}
 					__record__save_lost_samples(rec, evsel, lost,
 								    x, y, count.lost, 0);
 				}
@@ -1956,9 +1969,19 @@ static void record__read_lost_samples(struct record *rec)
 		}
 
 		lost_count = perf_bpf_filter__lost_count(evsel);
-		if (lost_count)
+		if (lost_count) {
+			if (!lost) {
+				lost = zalloc(sizeof(*lost) +
+					      session->machines.host.id_hdr_size);
+				if (!lost) {
+					pr_debug("Memory allocation failed\n");
+					return;
+				}
+				lost->header.type = PERF_RECORD_LOST_SAMPLES;
+			}
 			__record__save_lost_samples(rec, evsel, lost, 0, 0, lost_count,
 						    PERF_RECORD_MISC_LOST_SAMPLES_BPF);
+		}
 	}
 out:
 	free(lost);
@@ -2216,32 +2239,6 @@ static void hit_auxtrace_snapshot_trigger(struct record *rec)
 	}
 }
 
-static void record__uniquify_name(struct record *rec)
-{
-	struct evsel *pos;
-	struct evlist *evlist = rec->evlist;
-	char *new_name;
-	int ret;
-
-	if (perf_pmus__num_core_pmus() == 1)
-		return;
-
-	evlist__for_each_entry(evlist, pos) {
-		if (!evsel__is_hybrid(pos))
-			continue;
-
-		if (strchr(pos->name, '/'))
-			continue;
-
-		ret = asprintf(&new_name, "%s/%s/",
-			       pos->pmu_name, pos->name);
-		if (ret) {
-			free(pos->name);
-			pos->name = new_name;
-		}
-	}
-}
-
 static int record__terminate_thread(struct record_thread *thread_data)
 {
 	int err;
@@ -2475,7 +2472,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	if (data->is_pipe && rec->evlist->core.nr_entries == 1)
 		rec->opts.sample_id = true;
 
-	record__uniquify_name(rec);
+	evlist__uniquify_name(rec->evlist);
 
 	/* Debug message used by test scripts */
 	pr_debug3("perf record opening and mmapping events\n");
@@ -3580,9 +3577,7 @@ static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cp
 	if (cpu_map__is_dummy(cpus))
 		return 0;
 
-	perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
-		if (cpu.cpu == -1)
-			continue;
+	perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) {
 		/* Return ENODEV is input cpu is greater than max cpu */
 		if ((unsigned long)cpu.cpu > mask->nbits)
 			return -ENODEV;
@@ -3989,6 +3984,8 @@ int cmd_record(int argc, const char **argv)
 # undef set_nobuild
 #endif
 
+	/* Disable eager loading of kernel symbols that adds overhead to perf record. */
+	symbol_conf.lazy_load_kernel_maps = true;
 	rec->opts.affinity = PERF_AFFINITY_SYS;
 
 	rec->evlist = evlist__new();
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 9cb1da2dc0c0..f2ed2b7e80a3 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -96,9 +96,9 @@ struct report {
 	bool			stitch_lbr;
 	bool			disable_order;
 	bool			skip_empty;
+	bool			data_type;
 	int			max_stack;
 	struct perf_read_values	show_threads_values;
-	struct annotation_options annotation_opts;
 	const char		*pretty_printing_style;
 	const char		*cpu_list;
 	const char		*symbol_filter_str;
@@ -171,7 +171,7 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter,
 	struct mem_info *mi;
 	struct branch_info *bi;
 
-	if (!ui__has_annotation() && !rep->symbol_ipc)
+	if (!ui__has_annotation() && !rep->symbol_ipc && !rep->data_type)
 		return 0;
 
 	if (sort__mode == SORT_MODE__BRANCH) {
@@ -541,8 +541,7 @@ static int evlist__tui_block_hists_browse(struct evlist *evlist, struct report *
 	evlist__for_each_entry(evlist, pos) {
 		ret = report__browse_block_hists(&rep->block_reports[i++].hist,
 						 rep->min_percent, pos,
-						 &rep->session->header.env,
-						 &rep->annotation_opts);
+						 &rep->session->header.env);
 		if (ret != 0)
 			return ret;
 	}
@@ -574,8 +573,7 @@ static int evlist__tty_browse_hists(struct evlist *evlist, struct report *rep, c
 
 		if (rep->total_cycles_mode) {
 			report__browse_block_hists(&rep->block_reports[i++].hist,
-						   rep->min_percent, pos,
-						   NULL, NULL);
+						   rep->min_percent, pos, NULL);
 			continue;
 		}
 
@@ -670,7 +668,7 @@ static int report__browse_hists(struct report *rep)
 		}
 
 		ret = evlist__tui_browse_hists(evlist, help, NULL, rep->min_percent,
-					       &session->header.env, true, &rep->annotation_opts);
+					       &session->header.env, true);
 		/*
 		 * Usually "ret" is the last pressed key, and we only
 		 * care if the key notifies us to switch data file.
@@ -745,7 +743,7 @@ static int hists__resort_cb(struct hist_entry *he, void *arg)
 	if (rep->symbol_ipc && sym && !sym->annotate2) {
 		struct evsel *evsel = hists_to_evsel(he->hists);
 
-		symbol__annotate2(&he->ms, evsel, &rep->annotation_opts, NULL);
+		symbol__annotate2(&he->ms, evsel, NULL);
 	}
 
 	return 0;
@@ -859,27 +857,47 @@ static struct task *tasks_list(struct task *task, struct machine *machine)
 	return tasks_list(parent_task, machine);
 }
 
-static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp)
+struct maps__fprintf_task_args {
+	int indent;
+	FILE *fp;
+	size_t printed;
+};
+
+static int maps__fprintf_task_cb(struct map *map, void *data)
 {
-	size_t printed = 0;
-	struct map_rb_node *rb_node;
+	struct maps__fprintf_task_args *args = data;
+	const struct dso *dso = map__dso(map);
+	u32 prot = map__prot(map);
+	int ret;
 
-	maps__for_each_entry(maps, rb_node) {
-		struct map *map = rb_node->map;
-		const struct dso *dso = map__dso(map);
-		u32 prot = map__prot(map);
+	ret = fprintf(args->fp,
+		"%*s  %" PRIx64 "-%" PRIx64 " %c%c%c%c %08" PRIx64 " %" PRIu64 " %s\n",
+		args->indent, "", map__start(map), map__end(map),
+		prot & PROT_READ ? 'r' : '-',
+		prot & PROT_WRITE ? 'w' : '-',
+		prot & PROT_EXEC ? 'x' : '-',
+		map__flags(map) ? 's' : 'p',
+		map__pgoff(map),
+		dso->id.ino, dso->name);
 
-		printed += fprintf(fp, "%*s  %" PRIx64 "-%" PRIx64 " %c%c%c%c %08" PRIx64 " %" PRIu64 " %s\n",
-				   indent, "", map__start(map), map__end(map),
-				   prot & PROT_READ ? 'r' : '-',
-				   prot & PROT_WRITE ? 'w' : '-',
-				   prot & PROT_EXEC ? 'x' : '-',
-				   map__flags(map) ? 's' : 'p',
-				   map__pgoff(map),
-				   dso->id.ino, dso->name);
-	}
+	if (ret < 0)
+		return ret;
+
+	args->printed += ret;
+	return 0;
+}
+
+static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp)
+{
+	struct maps__fprintf_task_args args = {
+		.indent = indent,
+		.fp = fp,
+		.printed = 0,
+	};
 
-	return printed;
+	maps__for_each_map(maps, maps__fprintf_task_cb, &args);
+
+	return args.printed;
 }
 
 static void task__print_level(struct task *task, FILE *fp, int level)
@@ -1341,15 +1359,15 @@ int cmd_report(int argc, const char **argv)
 		   "list of cpus to profile"),
 	OPT_BOOLEAN('I', "show-info", &report.show_full_info,
 		    "Display extended information about perf.data file"),
-	OPT_BOOLEAN(0, "source", &report.annotation_opts.annotate_src,
+	OPT_BOOLEAN(0, "source", &annotate_opts.annotate_src,
 		    "Interleave source code with assembly code (default)"),
-	OPT_BOOLEAN(0, "asm-raw", &report.annotation_opts.show_asm_raw,
+	OPT_BOOLEAN(0, "asm-raw", &annotate_opts.show_asm_raw,
 		    "Display raw encoding of assembly instructions (default)"),
 	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
 		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
-	OPT_STRING(0, "prefix", &report.annotation_opts.prefix, "prefix",
+	OPT_STRING(0, "prefix", &annotate_opts.prefix, "prefix",
 		    "Add prefix to source file path names in programs (with --prefix-strip)"),
-	OPT_STRING(0, "prefix-strip", &report.annotation_opts.prefix_strip, "N",
+	OPT_STRING(0, "prefix-strip", &annotate_opts.prefix_strip, "N",
 		    "Strip first N entries of source file path name in programs (with --prefix)"),
 	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
 		    "Show a column with the sum of periods"),
@@ -1401,7 +1419,7 @@ int cmd_report(int argc, const char **argv)
 		   "Time span of interest (start,stop)"),
 	OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name,
 		    "Show inline function"),
-	OPT_CALLBACK(0, "percent-type", &report.annotation_opts, "local-period",
+	OPT_CALLBACK(0, "percent-type", &annotate_opts, "local-period",
 		     "Set percent type local/global-period/hits",
 		     annotate_parse_percent_type),
 	OPT_BOOLEAN(0, "ns", &symbol_conf.nanosecs, "Show times in nanosecs"),
@@ -1426,7 +1444,14 @@ int cmd_report(int argc, const char **argv)
 	if (ret < 0)
 		goto exit;
 
-	annotation_options__init(&report.annotation_opts);
+	/*
+	 * tasks_mode require access to exited threads to list those that are in
+	 * the data file. Off-cpu events are synthesized after other events and
+	 * reference exited threads.
+	 */
+	symbol_conf.keep_exited_threads = true;
+
+	annotation_options__init();
 
 	ret = perf_config(report__config, &report);
 	if (ret)
@@ -1445,13 +1470,13 @@ int cmd_report(int argc, const char **argv)
 	}
 
 	if (disassembler_style) {
-		report.annotation_opts.disassembler_style = strdup(disassembler_style);
-		if (!report.annotation_opts.disassembler_style)
+		annotate_opts.disassembler_style = strdup(disassembler_style);
+		if (!annotate_opts.disassembler_style)
 			return -ENOMEM;
 	}
 	if (objdump_path) {
-		report.annotation_opts.objdump_path = strdup(objdump_path);
-		if (!report.annotation_opts.objdump_path)
+		annotate_opts.objdump_path = strdup(objdump_path);
+		if (!annotate_opts.objdump_path)
 			return -ENOMEM;
 	}
 	if (addr2line_path) {
@@ -1460,7 +1485,7 @@ int cmd_report(int argc, const char **argv)
 			return -ENOMEM;
 	}
 
-	if (annotate_check_args(&report.annotation_opts) < 0) {
+	if (annotate_check_args() < 0) {
 		ret = -EINVAL;
 		goto exit;
 	}
@@ -1615,6 +1640,16 @@ repeat:
 			sort_order = NULL;
 	}
 
+	if (sort_order && strstr(sort_order, "type")) {
+		report.data_type = true;
+		annotate_opts.annotate_src = false;
+
+#ifndef HAVE_DWARF_GETLOCATIONS_SUPPORT
+		pr_err("Error: Data type profiling is disabled due to missing DWARF support\n");
+		goto error;
+#endif
+	}
+
 	if (strcmp(input_name, "-") != 0)
 		setup_browser(true);
 	else
@@ -1673,7 +1708,7 @@ repeat:
 	 * so don't allocate extra space that won't be used in the stdio
 	 * implementation.
 	 */
-	if (ui__has_annotation() || report.symbol_ipc ||
+	if (ui__has_annotation() || report.symbol_ipc || report.data_type ||
 	    report.total_cycles_mode) {
 		ret = symbol__annotation_init();
 		if (ret < 0)
@@ -1692,7 +1727,7 @@ repeat:
 			 */
 			symbol_conf.priv_size += sizeof(u32);
 		}
-		annotation_config__init(&report.annotation_opts);
+		annotation_config__init();
 	}
 
 	if (symbol__init(&session->header.env) < 0)
@@ -1746,7 +1781,7 @@ error:
 	zstd_fini(&(session->zstd_data));
 	perf_session__delete(session);
 exit:
-	annotation_options__exit(&report.annotation_opts);
+	annotation_options__exit();
 	free(sort_order_help);
 	free(field_order_help);
 	return ret;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a3af805a1d57..5fe9abc6a524 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -653,7 +653,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter)
 		if ((evsel__leader(counter) != counter) ||
 		    !(counter->core.leader->nr_members > 1))
 			return COUNTER_SKIP;
-	} else if (evsel__fallback(counter, errno, msg, sizeof(msg))) {
+	} else if (evsel__fallback(counter, &target, errno, msg, sizeof(msg))) {
 		if (verbose > 0)
 			ui__warning("%s\n", msg);
 		return COUNTER_RETRY;
@@ -1204,8 +1204,9 @@ static struct option stat_options[] = {
 	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
 		    "list of cpus to monitor in system-wide"),
 	OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
-		    "disable CPU count aggregation", AGGR_NONE),
-	OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"),
+		    "disable aggregation across CPUs or PMUs", AGGR_NONE),
+	OPT_SET_UINT(0, "no-merge", &stat_config.aggr_mode,
+		    "disable aggregation the same as -A or -no-aggr", AGGR_NONE),
 	OPT_BOOLEAN(0, "hybrid-merge", &stat_config.hybrid_merge,
 		    "Merge identical named hybrid events"),
 	OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
@@ -1255,7 +1256,7 @@ static struct option stat_options[] = {
 	OPT_BOOLEAN(0, "metric-no-merge", &stat_config.metric_no_merge,
 		       "don't try to share events between metrics in a group"),
 	OPT_BOOLEAN(0, "metric-no-threshold", &stat_config.metric_no_threshold,
-		       "don't try to share events between metrics in a group  "),
+		       "disable adding events for the metric threshold calculation"),
 	OPT_BOOLEAN(0, "topdown", &topdown_run,
 			"measure top-down statistics"),
 	OPT_UINTEGER(0, "td-level", &stat_config.topdown_level,
@@ -1316,7 +1317,7 @@ static int cpu__get_cache_id_from_map(struct perf_cpu cpu, char *map)
 	 * be the first online CPU in the cache domain else use the
 	 * first online CPU of the cache domain as the ID.
 	 */
-	if (perf_cpu_map__empty(cpu_map))
+	if (perf_cpu_map__has_any_cpu_or_is_empty(cpu_map))
 		id = cpu.cpu;
 	else
 		id = perf_cpu_map__cpu(cpu_map, 0).cpu;
@@ -1622,7 +1623,7 @@ static int perf_stat_init_aggr_mode(void)
 	 * taking the highest cpu number to be the size of
 	 * the aggregation translate cpumap.
 	 */
-	if (!perf_cpu_map__empty(evsel_list->core.user_requested_cpus))
+	if (!perf_cpu_map__has_any_cpu_or_is_empty(evsel_list->core.user_requested_cpus))
 		nr = perf_cpu_map__max(evsel_list->core.user_requested_cpus).cpu;
 	else
 		nr = 0;
@@ -2289,7 +2290,7 @@ int process_stat_config_event(struct perf_session *session,
 
 	perf_event__read_stat_config(&stat_config, &event->stat_config);
 
-	if (perf_cpu_map__empty(st->cpus)) {
+	if (perf_cpu_map__has_any_cpu_or_is_empty(st->cpus)) {
 		if (st->aggr_mode != AGGR_UNSET)
 			pr_warning("warning: processing task data, aggregation mode not set\n");
 	} else if (st->aggr_mode != AGGR_UNSET) {
@@ -2695,15 +2696,19 @@ int cmd_stat(int argc, const char **argv)
 	 */
 	if (metrics) {
 		const char *pmu = parse_events_option_args.pmu_filter ?: "all";
+		int ret = metricgroup__parse_groups(evsel_list, pmu, metrics,
+						stat_config.metric_no_group,
+						stat_config.metric_no_merge,
+						stat_config.metric_no_threshold,
+						stat_config.user_requested_cpu_list,
+						stat_config.system_wide,
+						&stat_config.metric_events);
 
-		metricgroup__parse_groups(evsel_list, pmu, metrics,
-					stat_config.metric_no_group,
-					stat_config.metric_no_merge,
-					stat_config.metric_no_threshold,
-					stat_config.user_requested_cpu_list,
-					stat_config.system_wide,
-					&stat_config.metric_events);
 		zfree(&metrics);
+		if (ret) {
+			status = ret;
+			goto out;
+		}
 	}
 
 	if (add_default_attributes())
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index ea8c7eca5eee..baf1ab083436 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -147,7 +147,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
 		return err;
 	}
 
-	err = symbol__annotate(&he->ms, evsel, &top->annotation_opts, NULL);
+	err = symbol__annotate(&he->ms, evsel, NULL);
 	if (err == 0) {
 		top->sym_filter_entry = he;
 	} else {
@@ -261,9 +261,9 @@ static void perf_top__show_details(struct perf_top *top)
 		goto out_unlock;
 
 	printf("Showing %s for %s\n", evsel__name(top->sym_evsel), symbol->name);
-	printf("  Events  Pcnt (>=%d%%)\n", top->annotation_opts.min_pcnt);
+	printf("  Events  Pcnt (>=%d%%)\n", annotate_opts.min_pcnt);
 
-	more = symbol__annotate_printf(&he->ms, top->sym_evsel, &top->annotation_opts);
+	more = symbol__annotate_printf(&he->ms, top->sym_evsel);
 
 	if (top->evlist->enabled) {
 		if (top->zero)
@@ -450,7 +450,7 @@ static void perf_top__print_mapped_keys(struct perf_top *top)
 
 	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top->count_filter);
 
-	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", top->annotation_opts.min_pcnt);
+	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", annotate_opts.min_pcnt);
 	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
 	fprintf(stdout, "\t[S]     stop annotation.\n");
 
@@ -553,7 +553,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c)
 			prompt_integer(&top->count_filter, "Enter display event count filter");
 			break;
 		case 'F':
-			prompt_percent(&top->annotation_opts.min_pcnt,
+			prompt_percent(&annotate_opts.min_pcnt,
 				       "Enter details display event filter (percent)");
 			break;
 		case 'K':
@@ -646,8 +646,7 @@ repeat:
 	}
 
 	ret = evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent,
-				       &top->session->header.env, !top->record_opts.overwrite,
-				       &top->annotation_opts);
+				       &top->session->header.env, !top->record_opts.overwrite);
 	if (ret == K_RELOAD) {
 		top->zero = true;
 		goto repeat;
@@ -1027,8 +1026,8 @@ static int perf_top__start_counters(struct perf_top *top)
 
 	evlist__for_each_entry(evlist, counter) {
 try_again:
-		if (evsel__open(counter, top->evlist->core.user_requested_cpus,
-				     top->evlist->core.threads) < 0) {
+		if (evsel__open(counter, counter->core.cpus,
+				counter->core.threads) < 0) {
 
 			/*
 			 * Specially handle overwrite fall back.
@@ -1044,7 +1043,7 @@ try_again:
 			    perf_top_overwrite_fallback(top, counter))
 				goto try_again;
 
-			if (evsel__fallback(counter, errno, msg, sizeof(msg))) {
+			if (evsel__fallback(counter, &opts->target, errno, msg, sizeof(msg))) {
 				if (verbose > 0)
 					ui__warning("%s\n", msg);
 				goto try_again;
@@ -1241,9 +1240,9 @@ static int __cmd_top(struct perf_top *top)
 	pthread_t thread, thread_process;
 	int ret;
 
-	if (!top->annotation_opts.objdump_path) {
+	if (!annotate_opts.objdump_path) {
 		ret = perf_env__lookup_objdump(&top->session->header.env,
-					       &top->annotation_opts.objdump_path);
+					       &annotate_opts.objdump_path);
 		if (ret)
 			return ret;
 	}
@@ -1299,6 +1298,7 @@ static int __cmd_top(struct perf_top *top)
 		}
 	}
 
+	evlist__uniquify_name(top->evlist);
 	ret = perf_top__start_counters(top);
 	if (ret)
 		return ret;
@@ -1536,9 +1536,9 @@ int cmd_top(int argc, const char **argv)
 		   "only consider symbols in these comms"),
 	OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
 		   "only consider these symbols"),
-	OPT_BOOLEAN(0, "source", &top.annotation_opts.annotate_src,
+	OPT_BOOLEAN(0, "source", &annotate_opts.annotate_src,
 		    "Interleave source code with assembly code (default)"),
-	OPT_BOOLEAN(0, "asm-raw", &top.annotation_opts.show_asm_raw,
+	OPT_BOOLEAN(0, "asm-raw", &annotate_opts.show_asm_raw,
 		    "Display raw encoding of assembly instructions (default)"),
 	OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
 		    "Enable kernel symbol demangling"),
@@ -1549,9 +1549,9 @@ int cmd_top(int argc, const char **argv)
 		   "addr2line binary to use for line numbers"),
 	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
 		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
-	OPT_STRING(0, "prefix", &top.annotation_opts.prefix, "prefix",
+	OPT_STRING(0, "prefix", &annotate_opts.prefix, "prefix",
 		    "Add prefix to source file path names in programs (with --prefix-strip)"),
-	OPT_STRING(0, "prefix-strip", &top.annotation_opts.prefix_strip, "N",
+	OPT_STRING(0, "prefix-strip", &annotate_opts.prefix_strip, "N",
 		    "Strip first N entries of source file path name in programs (with --prefix)"),
 	OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"),
 	OPT_CALLBACK(0, "percent-limit", &top, "percent",
@@ -1609,10 +1609,10 @@ int cmd_top(int argc, const char **argv)
 	if (status < 0)
 		return status;
 
-	annotation_options__init(&top.annotation_opts);
+	annotation_options__init();
 
-	top.annotation_opts.min_pcnt = 5;
-	top.annotation_opts.context  = 4;
+	annotate_opts.min_pcnt = 5;
+	annotate_opts.context  = 4;
 
 	top.evlist = evlist__new();
 	if (top.evlist == NULL)
@@ -1642,13 +1642,13 @@ int cmd_top(int argc, const char **argv)
 		usage_with_options(top_usage, options);
 
 	if (disassembler_style) {
-		top.annotation_opts.disassembler_style = strdup(disassembler_style);
-		if (!top.annotation_opts.disassembler_style)
+		annotate_opts.disassembler_style = strdup(disassembler_style);
+		if (!annotate_opts.disassembler_style)
 			return -ENOMEM;
 	}
 	if (objdump_path) {
-		top.annotation_opts.objdump_path = strdup(objdump_path);
-		if (!top.annotation_opts.objdump_path)
+		annotate_opts.objdump_path = strdup(objdump_path);
+		if (!annotate_opts.objdump_path)
 			return -ENOMEM;
 	}
 	if (addr2line_path) {
@@ -1661,7 +1661,7 @@ int cmd_top(int argc, const char **argv)
 	if (status)
 		goto out_delete_evlist;
 
-	if (annotate_check_args(&top.annotation_opts) < 0)
+	if (annotate_check_args() < 0)
 		goto out_delete_evlist;
 
 	if (!top.evlist->core.nr_entries) {
@@ -1787,7 +1787,7 @@ int cmd_top(int argc, const char **argv)
 	if (status < 0)
 		goto out_delete_evlist;
 
-	annotation_config__init(&top.annotation_opts);
+	annotation_config__init();
 
 	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
 	status = symbol__init(NULL);
@@ -1840,7 +1840,7 @@ int cmd_top(int argc, const char **argv)
 out_delete_evlist:
 	evlist__delete(top.evlist);
 	perf_session__delete(top.session);
-	annotation_options__exit(&top.annotation_opts);
+	annotation_options__exit();
 
 	return status;
 }
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index e541d0e2777a..109b8e64fe69 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2470,9 +2470,8 @@ static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sam
 static const char *errno_to_name(struct evsel *evsel, int err)
 {
 	struct perf_env *env = evsel__env(evsel);
-	const char *arch_name = perf_env__arch(env);
 
-	return arch_syscalls__strerrno(arch_name, err);
+	return perf_env__arch_strerrno(env, err);
 }
 
 static int trace__sys_exit(struct trace *trace, struct evsel *evsel,
@@ -4264,12 +4263,11 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
 			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
 
 			if (trace->errno_summary && stats->nr_failures) {
-				const char *arch_name = perf_env__arch(trace->host->env);
 				int e;
 
 				for (e = 0; e < stats->max_errno; ++e) {
 					if (stats->errnos[e] != 0)
-						fprintf(fp, "\t\t\t\t%s: %d\n", arch_syscalls__strerrno(arch_name, e + 1), stats->errnos[e]);
+						fprintf(fp, "\t\t\t\t%s: %d\n", perf_env__arch_strerrno(trace->host->env, e + 1), stats->errnos[e]);
 				}
 			}
 		}
diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh
index 133f0eddbcc4..f94795794b36 100644..100755
--- a/tools/perf/perf-archive.sh
+++ b/tools/perf/perf-archive.sh
@@ -4,8 +4,73 @@
 # Arnaldo Carvalho de Melo <acme@redhat.com>
 
 PERF_DATA=perf.data
-if [ $# -ne 0 ] ; then
-	PERF_DATA=$1
+PERF_SYMBOLS=perf.symbols
+PERF_ALL=perf.all
+ALL=0
+UNPACK=0
+
+while [ $# -gt 0 ] ; do
+	if [ $1 == "--all" ]; then
+		ALL=1
+		shift
+	elif [ $1 == "--unpack" ]; then
+		UNPACK=1
+		shift
+	else
+		PERF_DATA=$1
+		UNPACK_TAR=$1
+		shift
+	fi
+done
+
+if [ $UNPACK -eq 1 ]; then
+	if [ ! -z "$UNPACK_TAR" ]; then					# tar given as an argument
+		if [ ! -e "$UNPACK_TAR" ]; then
+			echo "Provided file $UNPACK_TAR does not exist"
+			exit 1
+		fi
+		TARGET="$UNPACK_TAR"
+	else																# search for perf tar in the current directory
+		TARGET=`find . -regex "\./perf.*\.tar\.bz2"`
+		TARGET_NUM=`echo -n "$TARGET" | grep -c '^'`
+
+		if [ -z "$TARGET" -o $TARGET_NUM -gt 1 ]; then
+			echo -e "Error: $TARGET_NUM files found for unpacking:\n$TARGET"
+			echo "Provide the requested file as an argument"
+			exit 1
+		else
+			echo "Found target file for unpacking: $TARGET"
+		fi
+	fi
+
+	if [[ "$TARGET" =~ (\./)?$PERF_ALL.*.tar.bz2 ]]; then				# perf tar generated by --all option
+		TAR_CONTENTS=`tar tvf "$TARGET" | tr -s " " | cut -d " " -f 6`
+		VALID_TAR=`echo "$TAR_CONTENTS" | grep "$PERF_SYMBOLS.tar.bz2" | wc -l`		# check if it contains a sub-tar perf.symbols
+		if [ $VALID_TAR -ne 1 ]; then
+			echo "Error: $TARGET file is not valid (contains zero or multiple sub-tar files with debug symbols)"
+			exit 1
+		fi
+
+		INTERSECT=`comm -12 <(ls) <(echo "$TAR_CONTENTS") | tr "\n" " "`	# check for overwriting
+		if [ ! -z "$INTERSECT" ]; then										# prompt if file(s) already exist in the current directory
+			echo "File(s) ${INTERSECT::-1} already exist in the current directory."
+			while true; do
+				read -p 'Do you wish to overwrite them? ' yn
+				case $yn in
+					[Yy]* ) break;;
+					[Nn]* ) exit 1;;
+					* ) echo "Please answer yes or no.";;
+				esac
+			done
+		fi
+
+		# unzip the perf.data file in the current working directory	and debug symbols in ~/.debug directory
+		tar xvf $TARGET && tar xvf $PERF_SYMBOLS.tar.bz2 -C ~/.debug
+
+	else																# perf tar generated by perf archive (contains only debug symbols)
+		tar xvf $TARGET -C ~/.debug
+	fi
+	exit 0
 fi
 
 #
@@ -39,9 +104,18 @@ while read build_id ; do
 	echo ${filename#$PERF_BUILDID_LINKDIR} >> $MANIFEST
 done
 
-tar cjf $PERF_DATA.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST
-rm $MANIFEST $BUILDIDS || true
+if [ $ALL -eq 1 ]; then						# pack perf.data file together with tar containing debug symbols
+	HOSTNAME=$(hostname)
+	DATE=$(date '+%Y%m%d-%H%M%S')
+	tar cjf $PERF_SYMBOLS.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST
+	tar cjf	$PERF_ALL-$HOSTNAME-$DATE.tar.bz2 $PERF_DATA $PERF_SYMBOLS.tar.bz2
+	rm $PERF_SYMBOLS.tar.bz2 $MANIFEST $BUILDIDS || true
+else										# pack only the debug symbols
+	tar cjf $PERF_DATA.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST
+	rm $MANIFEST $BUILDIDS || true
+fi
+
 echo -e "Now please run:\n"
-echo -e "$ tar xvf $PERF_DATA.tar.bz2 -C ~/.debug\n"
-echo "wherever you need to run 'perf report' on."
+echo -e "$ perf archive --unpack\n"
+echo "or unpack the tar manually wherever you need to run 'perf report' on."
 exit 0
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index d3fc8090413c..921bee0a6437 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -39,6 +39,7 @@
 #include <linux/zalloc.h>
 
 static int use_pager = -1;
+static FILE *debug_fp = NULL;
 
 struct cmd_struct {
 	const char *cmd;
@@ -162,6 +163,19 @@ static void commit_pager_choice(void)
 	}
 }
 
+static int set_debug_file(const char *path)
+{
+	debug_fp = fopen(path, "w");
+	if (!debug_fp) {
+		fprintf(stderr, "Open debug file '%s' failed: %s\n",
+			path, strerror(errno));
+		return -1;
+	}
+
+	debug_set_file(debug_fp);
+	return 0;
+}
+
 struct option options[] = {
 	OPT_ARGUMENT("help", "help"),
 	OPT_ARGUMENT("version", "version"),
@@ -174,6 +188,7 @@ struct option options[] = {
 	OPT_ARGUMENT("list-cmds", "list-cmds"),
 	OPT_ARGUMENT("list-opts", "list-opts"),
 	OPT_ARGUMENT("debug", "debug"),
+	OPT_ARGUMENT("debug-file", "debug-file"),
 	OPT_END()
 };
 
@@ -287,6 +302,18 @@ static int handle_options(const char ***argv, int *argc, int *envchanged)
 
 			(*argv)++;
 			(*argc)--;
+		} else if (!strcmp(cmd, "--debug-file")) {
+			if (*argc < 2) {
+				fprintf(stderr, "No path given for --debug-file.\n");
+				usage(perf_usage_string);
+			}
+
+			if (set_debug_file((*argv)[1]))
+				usage(perf_usage_string);
+
+			(*argv)++;
+			(*argc)--;
+
 		} else {
 			fprintf(stderr, "Unknown option: %s\n", cmd);
 			usage(perf_usage_string);
@@ -547,5 +574,8 @@ int main(int argc, const char **argv)
 	fprintf(stderr, "Failed to run command '%s': %s\n",
 		cmd, str_error_r(errno, sbuf, sizeof(sbuf)));
 out:
+	if (debug_fp)
+		fclose(debug_fp);
+
 	return 1;
 }
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json
index 88b23b85e33c..879ff21e0b17 100644
--- a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json
@@ -110,7 +110,7 @@
     {
         "PublicDescription": "Flushes due to memory hazards",
         "EventCode": "0x121",
-        "EventName": "BPU_FLUSH_MEM_FAULT",
+        "EventName": "GPC_FLUSH_MEM_FAULT",
         "BriefDescription": "Flushes due to memory hazards"
     },
     {
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/branch.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/branch.json
new file mode 100644
index 000000000000..a632755fc086
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/branch.json
@@ -0,0 +1,125 @@
+[
+    {
+        "ArchStdEvent": "BR_IMMED_SPEC"
+    },
+    {
+        "ArchStdEvent": "BR_RETURN_SPEC"
+    },
+    {
+        "ArchStdEvent": "BR_INDIRECT_SPEC"
+    },
+    {
+        "ArchStdEvent": "BR_MIS_PRED"
+    },
+    {
+        "ArchStdEvent": "BR_PRED"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, branch not taken",
+        "EventCode": "0x8107",
+        "EventName": "BR_SKIP_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, branch not taken"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, immediate branch taken",
+        "EventCode": "0x8108",
+        "EventName": "BR_IMMED_TAKEN_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, immediate branch taken"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, indirect branch excluding return retired",
+        "EventCode": "0x810c",
+        "EventName": "BR_INDNR_TAKEN_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, indirect branch excluding return retired"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, predicted immediate branch",
+        "EventCode": "0x8110",
+        "EventName": "BR_IMMED_PRED_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, predicted immediate branch"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, mispredicted immediate branch",
+        "EventCode": "0x8111",
+        "EventName": "BR_IMMED_MIS_PRED_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, mispredicted immediate branch"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, predicted indirect branch",
+        "EventCode": "0x8112",
+        "EventName": "BR_IND_PRED_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, predicted indirect branch"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, mispredicted indirect branch",
+        "EventCode": "0x8113",
+        "EventName": "BR_IND_MIS_PRED_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, mispredicted indirect branch"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, predicted procedure return",
+        "EventCode": "0x8114",
+        "EventName": "BR_RETURN_PRED_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, predicted procedure return"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, mispredicted procedure return",
+        "EventCode": "0x8115",
+        "EventName": "BR_RETURN_MIS_PRED_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, mispredicted procedure return"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, predicted indirect branch excluding return",
+        "EventCode": "0x8116",
+        "EventName": "BR_INDNR_PRED_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, predicted indirect branch excluding return"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, mispredicted indirect branch excluding return",
+        "EventCode": "0x8117",
+        "EventName": "BR_INDNR_MIS_PRED_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, mispredicted indirect branch excluding return"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, predicted branch, taken",
+        "EventCode": "0x8118",
+        "EventName": "BR_TAKEN_PRED_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, predicted branch, taken"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, mispredicted branch, taken",
+        "EventCode": "0x8119",
+        "EventName": "BR_TAKEN_MIS_PRED_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, mispredicted branch, taken"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, predicted branch, not taken",
+        "EventCode": "0x811a",
+        "EventName": "BR_SKIP_PRED_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, predicted branch, not taken"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, mispredicted branch, not taken",
+        "EventCode": "0x811b",
+        "EventName": "BR_SKIP_MIS_PRED_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, mispredicted branch, not taken"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, predicted branch",
+        "EventCode": "0x811c",
+        "EventName": "BR_PRED_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, predicted branch"
+    },
+    {
+        "PublicDescription": "Instruction architecturally executed, indirect branch",
+        "EventCode": "0x811d",
+        "EventName": "BR_IND_RETIRED",
+        "BriefDescription": "Instruction architecturally executed, indirect branch"
+    },
+    {
+        "PublicDescription": "Branch Record captured.",
+        "EventCode": "0x811f",
+        "EventName": "BRB_FILTRATE",
+        "BriefDescription": "Branch Record captured."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/bus.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/bus.json
new file mode 100644
index 000000000000..2aeb9907831d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/bus.json
@@ -0,0 +1,20 @@
+[
+    {
+        "ArchStdEvent": "CPU_CYCLES"
+    },
+    {
+        "ArchStdEvent": "BUS_CYCLES"
+    },
+    {
+        "ArchStdEvent": "BUS_ACCESS_RD"
+    },
+    {
+        "ArchStdEvent": "BUS_ACCESS_WR"
+    },
+    {
+        "ArchStdEvent": "BUS_ACCESS"
+    },
+    {
+        "ArchStdEvent": "CNT_CYCLES"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/cache.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/cache.json
new file mode 100644
index 000000000000..c50d8e930b05
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/cache.json
@@ -0,0 +1,206 @@
+[
+    {
+        "ArchStdEvent": "L1D_CACHE_RD"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_WR"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_RD"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_INVAL"
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_REFILL_RD"
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_REFILL_WR"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_RD"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WR"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_REFILL_RD"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_REFILL_WR"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WB_VICTIM"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WB_CLEAN"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_INVAL"
+    },
+    {
+        "ArchStdEvent": "L1I_CACHE_REFILL"
+    },
+    {
+        "ArchStdEvent": "L1I_TLB_REFILL"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE"
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_REFILL"
+    },
+    {
+        "ArchStdEvent": "L1I_CACHE"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_REFILL"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WB"
+    },
+    {
+        "ArchStdEvent": "L1D_TLB"
+    },
+    {
+        "ArchStdEvent": "L1I_TLB"
+    },
+    {
+        "ArchStdEvent": "L2D_TLB_REFILL"
+    },
+    {
+        "ArchStdEvent": "L2I_TLB_REFILL"
+    },
+    {
+        "ArchStdEvent": "L2D_TLB"
+    },
+    {
+        "ArchStdEvent": "L2I_TLB"
+    },
+    {
+        "ArchStdEvent": "DTLB_WALK"
+    },
+    {
+        "ArchStdEvent": "ITLB_WALK"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_WR"
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_LMISS_RD"
+    },
+    {
+        "ArchStdEvent": "L1I_CACHE_LMISS"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_LMISS_RD"
+    },
+    {
+        "PublicDescription": "Level 1 data or unified cache demand access",
+        "EventCode": "0x8140",
+        "EventName": "L1D_CACHE_RW",
+        "BriefDescription": "Level 1 data or unified cache demand access"
+    },
+    {
+        "PublicDescription": "Level 1 data or unified cache preload or prefetch",
+        "EventCode": "0x8142",
+        "EventName": "L1D_CACHE_PRFM",
+        "BriefDescription": "Level 1 data or unified cache preload or prefetch"
+    },
+    {
+        "PublicDescription": "Level 1 data or unified cache refill, preload or prefetch",
+        "EventCode": "0x8146",
+        "EventName": "L1D_CACHE_REFILL_PRFM",
+        "BriefDescription": "Level 1 data or unified cache refill, preload or prefetch"
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_RD"
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_WR"
+    },
+    {
+        "ArchStdEvent": "L2D_TLB_REFILL_RD"
+    },
+    {
+        "ArchStdEvent": "L2D_TLB_REFILL_WR"
+    },
+    {
+        "ArchStdEvent": "L2D_TLB_RD"
+    },
+    {
+        "ArchStdEvent": "L2D_TLB_WR"
+    },
+    {
+        "PublicDescription": "L1D TLB miss",
+        "EventCode": "0xD600",
+        "EventName": "L1D_TLB_MISS",
+        "BriefDescription": "L1D TLB miss"
+    },
+    {
+        "PublicDescription": "Level 1 prefetcher, load prefetch requests generated",
+        "EventCode": "0xd606",
+        "EventName": "L1_PREFETCH_LD_GEN",
+        "BriefDescription": "Level 1 prefetcher, load prefetch requests generated"
+    },
+    {
+        "PublicDescription": "Level 1 prefetcher, load prefetch fills into the level 1 cache",
+        "EventCode": "0xd607",
+        "EventName": "L1_PREFETCH_LD_FILL",
+        "BriefDescription": "Level 1 prefetcher, load prefetch fills into the level 1 cache"
+    },
+    {
+        "PublicDescription": "Level 1 prefetcher, load prefetch to level 2 generated",
+        "EventCode": "0xd608",
+        "EventName": "L1_PREFETCH_L2_REQ",
+        "BriefDescription": "Level 1 prefetcher, load prefetch to level 2 generated"
+    },
+    {
+        "PublicDescription": "L1 prefetcher, distance was reset",
+        "EventCode": "0xd609",
+        "EventName": "L1_PREFETCH_DIST_RST",
+        "BriefDescription": "L1 prefetcher, distance was reset"
+    },
+    {
+        "PublicDescription": "L1 prefetcher, distance was increased",
+        "EventCode": "0xd60a",
+        "EventName": "L1_PREFETCH_DIST_INC",
+        "BriefDescription": "L1 prefetcher, distance was increased"
+    },
+    {
+        "PublicDescription": "Level 1 prefetcher, table entry is trained",
+        "EventCode": "0xd60b",
+        "EventName": "L1_PREFETCH_ENTRY_TRAINED",
+        "BriefDescription": "Level 1 prefetcher, table entry is trained"
+    },
+    {
+        "PublicDescription": "L1 data cache refill - Read or Write",
+        "EventCode": "0xd60e",
+        "EventName": "L1D_CACHE_REFILL_RW",
+        "BriefDescription": "L1 data cache refill - Read or Write"
+    },
+    {
+        "PublicDescription": "Level 2 cache refill from instruction-side miss, including IMMU refills",
+        "EventCode": "0xD701",
+        "EventName": "L2C_INST_REFILL",
+        "BriefDescription": "Level 2 cache refill from instruction-side miss, including IMMU refills"
+    },
+    {
+        "PublicDescription": "Level 2 cache refill from data-side miss, including DMMU refills",
+        "EventCode": "0xD702",
+        "EventName": "L2C_DATA_REFILL",
+        "BriefDescription": "Level 2 cache refill from data-side miss, including DMMU refills"
+    },
+    {
+        "PublicDescription": "Level 2 cache prefetcher, load prefetch requests generated",
+        "EventCode": "0xD703",
+        "EventName": "L2_PREFETCH_REQ",
+        "BriefDescription": "Level 2 cache prefetcher, load prefetch requests generated"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/core-imp-def.json
new file mode 100644
index 000000000000..eb5a2208d260
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/core-imp-def.json
@@ -0,0 +1,464 @@
+[
+    {
+        "PublicDescription": "Level 2 prefetch requests, refilled to L2 cache",
+        "EventCode": "0x10A",
+        "EventName": "L2_PREFETCH_REFILL",
+        "BriefDescription": "Level 2 prefetch requests, refilled to L2 cache"
+    },
+    {
+        "PublicDescription": "Level 2 prefetch requests, late",
+        "EventCode": "0x10B",
+        "EventName": "L2_PREFETCH_UPGRADE",
+        "BriefDescription": "Level 2 prefetch requests, late"
+    },
+    {
+        "PublicDescription": "Predictable branch speculatively executed that hit any level of BTB",
+        "EventCode": "0x110",
+        "EventName": "BPU_HIT_BTB",
+        "BriefDescription": "Predictable branch speculatively executed that hit any level of BTB"
+    },
+    {
+        "PublicDescription": "Predictable conditional branch speculatively executed that hit any level of BTB",
+        "EventCode": "0x111",
+        "EventName": "BPU_CONDITIONAL_BRANCH_HIT_BTB",
+        "BriefDescription": "Predictable conditional branch speculatively executed that hit any level of BTB"
+    },
+    {
+        "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor",
+        "EventCode": "0x112",
+        "EventName": "BPU_HIT_INDIRECT_PREDICTOR",
+        "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor"
+    },
+    {
+        "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor",
+        "EventCode": "0x113",
+        "EventName": "BPU_HIT_RSB",
+        "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor"
+    },
+    {
+        "PublicDescription": "Predictable unconditional branch speculatively executed that did not hit any level of BTB",
+        "EventCode": "0x114",
+        "EventName": "BPU_UNCONDITIONAL_BRANCH_MISS_BTB",
+        "BriefDescription": "Predictable unconditional branch speculatively executed that did not hit any level of BTB"
+    },
+    {
+        "PublicDescription": "Predictable branch speculatively executed, unpredicted",
+        "EventCode": "0x115",
+        "EventName": "BPU_BRANCH_NO_HIT",
+        "BriefDescription": "Predictable branch speculatively executed, unpredicted"
+    },
+    {
+        "PublicDescription": "Predictable branch speculatively executed that hit any level of BTB that mispredict",
+        "EventCode": "0x116",
+        "EventName": "BPU_HIT_BTB_AND_MISPREDICT",
+        "BriefDescription": "Predictable branch speculatively executed that hit any level of BTB that mispredict"
+    },
+    {
+        "PublicDescription": "Predictable conditional branch speculatively executed that hit any level of BTB that (direction) mispredict",
+        "EventCode": "0x117",
+        "EventName": "BPU_CONDITIONAL_BRANCH_HIT_BTB_AND_MISPREDICT",
+        "BriefDescription": "Predictable conditional branch speculatively executed that hit any level of BTB that (direction) mispredict"
+    },
+    {
+        "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor that mispredict",
+        "EventCode": "0x118",
+        "EventName": "BPU_INDIRECT_BRANCH_HIT_BTB_AND_MISPREDICT",
+        "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor that mispredict"
+    },
+    {
+        "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor that mispredict",
+        "EventCode": "0x119",
+        "EventName": "BPU_HIT_RSB_AND_MISPREDICT",
+        "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor that mispredict"
+    },
+    {
+        "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the overflow/underflow return predictor that mispredict",
+        "EventCode": "0x11a",
+        "EventName": "BPU_MISS_RSB_AND_MISPREDICT",
+        "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the overflow/underflow return predictor that mispredict"
+    },
+    {
+        "PublicDescription": "Predictable branch speculatively executed, unpredicted, that mispredict",
+        "EventCode": "0x11b",
+        "EventName": "BPU_NO_PREDICTION_MISPREDICT",
+        "BriefDescription": "Predictable branch speculatively executed, unpredicted, that mispredict"
+    },
+    {
+        "PublicDescription": "Preditable branch update the BTB region buffer entry",
+        "EventCode": "0x11c",
+        "EventName": "BPU_BTB_UPDATE",
+        "BriefDescription": "Preditable branch update the BTB region buffer entry"
+    },
+    {
+        "PublicDescription": "Count predict pipe stalls due to speculative return address predictor full",
+        "EventCode": "0x11d",
+        "EventName": "BPU_RSB_FULL_STALL",
+        "BriefDescription": "Count predict pipe stalls due to speculative return address predictor full"
+    },
+    {
+        "PublicDescription": "Macro-ops speculatively decoded",
+        "EventCode": "0x11f",
+        "EventName": "ICF_INST_SPEC_DECODE",
+        "BriefDescription": "Macro-ops speculatively decoded"
+    },
+    {
+        "PublicDescription": "Flushes",
+        "EventCode": "0x120",
+        "EventName": "GPC_FLUSH",
+        "BriefDescription": "Flushes"
+    },
+    {
+        "PublicDescription": "Flushes due to memory hazards",
+        "EventCode": "0x121",
+        "EventName": "GPC_FLUSH_MEM_FAULT",
+        "BriefDescription": "Flushes due to memory hazards"
+    },
+    {
+        "PublicDescription": "ETM extout bit 0",
+        "EventCode": "0x141",
+        "EventName": "MSC_ETM_EXTOUT0",
+        "BriefDescription": "ETM extout bit 0"
+    },
+    {
+        "PublicDescription": "ETM extout bit 1",
+        "EventCode": "0x142",
+        "EventName": "MSC_ETM_EXTOUT1",
+        "BriefDescription": "ETM extout bit 1"
+    },
+    {
+        "PublicDescription": "ETM extout bit 2",
+        "EventCode": "0x143",
+        "EventName": "MSC_ETM_EXTOUT2",
+        "BriefDescription": "ETM extout bit 2"
+    },
+    {
+        "PublicDescription": "ETM extout bit 3",
+        "EventCode": "0x144",
+        "EventName": "MSC_ETM_EXTOUT3",
+        "BriefDescription": "ETM extout bit 3"
+    },
+    {
+        "PublicDescription": "Bus request sn",
+        "EventCode": "0x156",
+        "EventName": "L2C_SNOOP",
+        "BriefDescription": "Bus request sn"
+    },
+    {
+        "PublicDescription": "L2 TXDAT LCRD blocked",
+        "EventCode": "0x169",
+        "EventName": "L2C_DAT_CRD_STALL",
+        "BriefDescription": "L2 TXDAT LCRD blocked"
+    },
+    {
+        "PublicDescription": "L2 TXRSP LCRD blocked",
+        "EventCode": "0x16a",
+        "EventName": "L2C_RSP_CRD_STALL",
+        "BriefDescription": "L2 TXRSP LCRD blocked"
+    },
+    {
+        "PublicDescription": "L2 TXREQ LCRD blocked",
+        "EventCode": "0x16b",
+        "EventName": "L2C_REQ_CRD_STALL",
+        "BriefDescription": "L2 TXREQ LCRD blocked"
+    },
+    {
+        "PublicDescription": "Early mispredict",
+        "EventCode": "0xD100",
+        "EventName": "ICF_EARLY_MIS_PRED",
+        "BriefDescription": "Early mispredict"
+    },
+    {
+        "PublicDescription": "FEQ full cycles",
+        "EventCode": "0xD101",
+        "EventName": "ICF_FEQ_FULL",
+        "BriefDescription": "FEQ full cycles"
+    },
+    {
+        "PublicDescription": "Instruction FIFO Full",
+        "EventCode": "0xD102",
+        "EventName": "ICF_INST_FIFO_FULL",
+        "BriefDescription": "Instruction FIFO Full"
+    },
+    {
+        "PublicDescription": "L1I TLB miss",
+        "EventCode": "0xD103",
+        "EventName": "L1I_TLB_MISS",
+        "BriefDescription": "L1I TLB miss"
+    },
+    {
+        "PublicDescription": "ICF sent 0 instructions to IDR this cycle",
+        "EventCode": "0xD104",
+        "EventName": "ICF_STALL",
+        "BriefDescription": "ICF sent 0 instructions to IDR this cycle"
+    },
+    {
+        "PublicDescription": "PC FIFO Full",
+        "EventCode": "0xD105",
+        "EventName": "ICF_PC_FIFO_FULL",
+        "BriefDescription": "PC FIFO Full"
+    },
+    {
+        "PublicDescription": "Stall due to BOB ID",
+        "EventCode": "0xD200",
+        "EventName": "IDR_STALL_BOB_ID",
+        "BriefDescription": "Stall due to BOB ID"
+    },
+    {
+        "PublicDescription": "Dispatch stall due to LOB entries",
+        "EventCode": "0xD201",
+        "EventName": "IDR_STALL_LOB_ID",
+        "BriefDescription": "Dispatch stall due to LOB entries"
+    },
+    {
+        "PublicDescription": "Dispatch stall due to SOB entries",
+        "EventCode": "0xD202",
+        "EventName": "IDR_STALL_SOB_ID",
+        "BriefDescription": "Dispatch stall due to SOB entries"
+    },
+    {
+        "PublicDescription": "Dispatch stall due to IXU scheduler entries",
+        "EventCode": "0xD203",
+        "EventName": "IDR_STALL_IXU_SCHED",
+        "BriefDescription": "Dispatch stall due to IXU scheduler entries"
+    },
+    {
+        "PublicDescription": "Dispatch stall due to FSU scheduler entries",
+        "EventCode": "0xD204",
+        "EventName": "IDR_STALL_FSU_SCHED",
+        "BriefDescription": "Dispatch stall due to FSU scheduler entries"
+    },
+    {
+        "PublicDescription": "Dispatch stall due to ROB entries",
+        "EventCode": "0xD205",
+        "EventName": "IDR_STALL_ROB_ID",
+        "BriefDescription": "Dispatch stall due to ROB entries"
+    },
+    {
+        "PublicDescription": "Dispatch stall due to flush",
+        "EventCode": "0xD206",
+        "EventName": "IDR_STALL_FLUSH",
+        "BriefDescription": "Dispatch stall due to flush"
+    },
+    {
+        "PublicDescription": "Dispatch stall due to WFI",
+        "EventCode": "0xD207",
+        "EventName": "IDR_STALL_WFI",
+        "BriefDescription": "Dispatch stall due to WFI"
+    },
+    {
+        "PublicDescription": "Number of SWOB drains triggered by timeout",
+        "EventCode": "0xD208",
+        "EventName": "IDR_STALL_SWOB_TIMEOUT",
+        "BriefDescription": "Number of SWOB drains triggered by timeout"
+    },
+    {
+        "PublicDescription": "Number of SWOB drains triggered by system register or special-purpose register read-after-write or specific special-purpose register writes that cause SWOB drain",
+        "EventCode": "0xD209",
+        "EventName": "IDR_STALL_SWOB_RAW",
+        "BriefDescription": "Number of SWOB drains triggered by system register or special-purpose register read-after-write or specific special-purpose register writes that cause SWOB drain"
+    },
+    {
+        "PublicDescription": "Number of SWOB drains triggered by system register write when SWOB full",
+        "EventCode": "0xD20A",
+        "EventName": "IDR_STALL_SWOB_FULL",
+        "BriefDescription": "Number of SWOB drains triggered by system register write when SWOB full"
+    },
+    {
+        "PublicDescription": "Dispatch stall due to L1 instruction cache miss",
+        "EventCode": "0xD20B",
+        "EventName": "STALL_FRONTEND_CACHE",
+        "BriefDescription": "Dispatch stall due to L1 instruction cache miss"
+    },
+    {
+        "PublicDescription": "Dispatch stall due to L1 data cache miss",
+        "EventCode": "0xD20D",
+        "EventName": "STALL_BACKEND_CACHE",
+        "BriefDescription": "Dispatch stall due to L1 data cache miss"
+    },
+    {
+        "PublicDescription": "Dispatch stall due to lack of any core resource",
+        "EventCode": "0xD20F",
+        "EventName": "STALL_BACKEND_RESOURCE",
+        "BriefDescription": "Dispatch stall due to lack of any core resource"
+    },
+    {
+        "PublicDescription": "Instructions issued by the scheduler",
+        "EventCode": "0xD300",
+        "EventName": "IXU_NUM_UOPS_ISSUED",
+        "BriefDescription": "Instructions issued by the scheduler"
+    },
+    {
+        "PublicDescription": "Any uop issued was canceled for any reason",
+        "EventCode": "0xD301",
+        "EventName": "IXU_ISSUE_CANCEL",
+        "BriefDescription": "Any uop issued was canceled for any reason"
+    },
+    {
+        "PublicDescription": "A load wakeup to the scheduler has been canceled",
+        "EventCode": "0xD302",
+        "EventName": "IXU_LOAD_CANCEL",
+        "BriefDescription": "A load wakeup to the scheduler has been canceled"
+    },
+    {
+        "PublicDescription": "The scheduler had to cancel one slow Uop due to resource conflict",
+        "EventCode": "0xD303",
+        "EventName": "IXU_SLOW_CANCEL",
+        "BriefDescription": "The scheduler had to cancel one slow Uop due to resource conflict"
+    },
+    {
+        "PublicDescription": "Uops issued by the scheduler on IXA",
+        "EventCode": "0xD304",
+        "EventName": "IXU_IXA_ISSUED",
+        "BriefDescription": "Uops issued by the scheduler on IXA"
+    },
+    {
+        "PublicDescription": "Uops issued by the scheduler on IXA Par 0",
+        "EventCode": "0xD305",
+        "EventName": "IXU_IXA_PAR0_ISSUED",
+        "BriefDescription": "Uops issued by the scheduler on IXA Par 0"
+    },
+    {
+        "PublicDescription": "Uops issued by the scheduler on IXA Par 1",
+        "EventCode": "0xD306",
+        "EventName": "IXU_IXA_PAR1_ISSUED",
+        "BriefDescription": "Uops issued by the scheduler on IXA Par 1"
+    },
+    {
+        "PublicDescription": "Uops issued by the scheduler on IXB",
+        "EventCode": "0xD307",
+        "EventName": "IXU_IXB_ISSUED",
+        "BriefDescription": "Uops issued by the scheduler on IXB"
+    },
+    {
+        "PublicDescription": "Uops issued by the scheduler on IXB Par 0",
+        "EventCode": "0xD308",
+        "EventName": "IXU_IXB_PAR0_ISSUED",
+        "BriefDescription": "Uops issued by the scheduler on IXB Par 0"
+    },
+    {
+        "PublicDescription": "Uops issued by the scheduler on IXB Par 1",
+        "EventCode": "0xD309",
+        "EventName": "IXU_IXB_PAR1_ISSUED",
+        "BriefDescription": "Uops issued by the scheduler on IXB Par 1"
+    },
+    {
+        "PublicDescription": "Uops issued by the scheduler on IXC",
+        "EventCode": "0xD30A",
+        "EventName": "IXU_IXC_ISSUED",
+        "BriefDescription": "Uops issued by the scheduler on IXC"
+    },
+    {
+        "PublicDescription": "Uops issued by the scheduler on IXC Par 0",
+        "EventCode": "0xD30B",
+        "EventName": "IXU_IXC_PAR0_ISSUED",
+        "BriefDescription": "Uops issued by the scheduler on IXC Par 0"
+    },
+    {
+        "PublicDescription": "Uops issued by the scheduler on IXC Par 1",
+        "EventCode": "0xD30C",
+        "EventName": "IXU_IXC_PAR1_ISSUED",
+        "BriefDescription": "Uops issued by the scheduler on IXC Par 1"
+    },
+    {
+        "PublicDescription": "Uops issued by the scheduler on IXD",
+        "EventCode": "0xD30D",
+        "EventName": "IXU_IXD_ISSUED",
+        "BriefDescription": "Uops issued by the scheduler on IXD"
+    },
+    {
+        "PublicDescription": "Uops issued by the scheduler on IXD Par 0",
+        "EventCode": "0xD30E",
+        "EventName": "IXU_IXD_PAR0_ISSUED",
+        "BriefDescription": "Uops issued by the scheduler on IXD Par 0"
+    },
+    {
+        "PublicDescription": "Uops issued by the scheduler on IXD Par 1",
+        "EventCode": "0xD30F",
+        "EventName": "IXU_IXD_PAR1_ISSUED",
+        "BriefDescription": "Uops issued by the scheduler on IXD Par 1"
+    },
+    {
+        "PublicDescription": "Uops issued by the FSU scheduler",
+        "EventCode": "0xD400",
+        "EventName": "FSU_ISSUED",
+        "BriefDescription": "Uops issued by the FSU scheduler"
+    },
+    {
+        "PublicDescription": "Uops issued by the scheduler on FSX",
+        "EventCode": "0xD401",
+        "EventName": "FSU_FSX_ISSUED",
+        "BriefDescription": "Uops issued by the scheduler on FSX"
+    },
+    {
+        "PublicDescription": "Uops issued by the scheduler on FSY",
+        "EventCode": "0xD402",
+        "EventName": "FSU_FSY_ISSUED",
+        "BriefDescription": "Uops issued by the scheduler on FSY"
+    },
+    {
+        "PublicDescription": "Uops issued by the scheduler on FSZ",
+        "EventCode": "0xD403",
+        "EventName": "FSU_FSZ_ISSUED",
+        "BriefDescription": "Uops issued by the scheduler on FSZ"
+    },
+    {
+        "PublicDescription": "Uops canceled (load cancels)",
+        "EventCode": "0xD404",
+        "EventName": "FSU_CANCEL",
+        "BriefDescription": "Uops canceled (load cancels)"
+    },
+    {
+        "PublicDescription": "Count scheduler stalls due to divide/sqrt",
+        "EventCode": "0xD405",
+        "EventName": "FSU_DIV_SQRT_STALL",
+        "BriefDescription": "Count scheduler stalls due to divide/sqrt"
+    },
+    {
+        "PublicDescription": "Number of SWOB drains",
+        "EventCode": "0xD500",
+        "EventName": "GPC_SWOB_DRAIN",
+        "BriefDescription": "Number of SWOB drains"
+    },
+    {
+        "PublicDescription": "GPC detected a Breakpoint instruction match",
+        "EventCode": "0xD501",
+        "EventName": "BREAKPOINT_MATCH",
+        "BriefDescription": "GPC detected a Breakpoint instruction match"
+    },
+    {
+        "PublicDescription": "Core progress monitor triggered",
+        "EventCode": "0xd502",
+        "EventName": "GPC_CPM_TRIGGER",
+        "BriefDescription": "Core progress monitor triggered"
+    },
+    {
+        "PublicDescription": "Fill buffer full",
+        "EventCode": "0xD601",
+        "EventName": "OFB_FULL",
+        "BriefDescription": "Fill buffer full"
+    },
+    {
+        "PublicDescription": "Load satisified from store forwarded data",
+        "EventCode": "0xD605",
+        "EventName": "LD_FROM_ST_FWD",
+        "BriefDescription": "Load satisified from store forwarded data"
+    },
+    {
+        "PublicDescription": "Store retirement pipe stall",
+        "EventCode": "0xD60C",
+        "EventName": "LSU_ST_RETIRE_STALL",
+        "BriefDescription": "Store retirement pipe stall"
+    },
+    {
+        "PublicDescription": "LSU detected a Watchpoint data match",
+        "EventCode": "0xD60D",
+        "EventName": "WATCHPOINT_MATCH",
+        "BriefDescription": "LSU detected a Watchpoint data match"
+    },
+    {
+        "PublicDescription": "Counts cycles that MSC is telling GPC to stall commit due to ETM ISTALL feature",
+        "EventCode": "0xda00",
+        "EventName": "MSC_ETM_COMMIT_STALL",
+        "BriefDescription": "Counts cycles that MSC is telling GPC to stall commit due to ETM ISTALL feature"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/exception.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/exception.json
new file mode 100644
index 000000000000..bd59ba7b74e4
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/exception.json
@@ -0,0 +1,47 @@
+[
+    {
+        "ArchStdEvent": "EXC_UNDEF"
+    },
+    {
+        "ArchStdEvent": "EXC_SVC"
+    },
+    {
+        "ArchStdEvent": "EXC_PABORT"
+    },
+    {
+        "ArchStdEvent": "EXC_DABORT"
+    },
+    {
+        "ArchStdEvent": "EXC_IRQ"
+    },
+    {
+        "ArchStdEvent": "EXC_FIQ"
+    },
+    {
+        "ArchStdEvent": "EXC_HVC"
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_PABORT"
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_DABORT"
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_OTHER"
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_IRQ"
+    },
+    {
+        "ArchStdEvent": "EXC_TRAP_FIQ"
+    },
+    {
+        "ArchStdEvent": "EXC_TAKEN"
+    },
+    {
+        "ArchStdEvent": "EXC_RETURN"
+    },
+    {
+        "ArchStdEvent": "EXC_SMC"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/instruction.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/instruction.json
new file mode 100644
index 000000000000..a6a20f541e33
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/instruction.json
@@ -0,0 +1,128 @@
+[
+    {
+        "ArchStdEvent": "SW_INCR"
+    },
+    {
+        "ArchStdEvent": "ST_RETIRED"
+    },
+    {
+        "ArchStdEvent": "LD_SPEC"
+    },
+    {
+        "ArchStdEvent": "ST_SPEC"
+    },
+    {
+        "ArchStdEvent": "LDST_SPEC"
+    },
+    {
+        "ArchStdEvent": "DP_SPEC"
+    },
+    {
+        "ArchStdEvent": "ASE_SPEC"
+    },
+    {
+        "ArchStdEvent": "VFP_SPEC"
+    },
+    {
+        "ArchStdEvent": "PC_WRITE_SPEC"
+    },
+    {
+        "ArchStdEvent": "BR_IMMED_RETIRED"
+    },
+    {
+        "ArchStdEvent": "BR_RETURN_RETIRED"
+    },
+    {
+        "ArchStdEvent": "CRYPTO_SPEC"
+    },
+    {
+        "ArchStdEvent": "ISB_SPEC"
+    },
+    {
+        "ArchStdEvent": "DSB_SPEC"
+    },
+    {
+        "ArchStdEvent": "DMB_SPEC"
+    },
+    {
+        "ArchStdEvent": "RC_LD_SPEC"
+    },
+    {
+        "ArchStdEvent": "RC_ST_SPEC"
+    },
+    {
+        "ArchStdEvent": "INST_RETIRED"
+    },
+    {
+        "ArchStdEvent": "CID_WRITE_RETIRED"
+    },
+    {
+        "ArchStdEvent": "PC_WRITE_RETIRED"
+    },
+    {
+        "ArchStdEvent": "INST_SPEC"
+    },
+    {
+        "ArchStdEvent": "TTBR_WRITE_RETIRED"
+    },
+    {
+        "ArchStdEvent": "BR_RETIRED"
+    },
+    {
+        "ArchStdEvent": "BR_MIS_PRED_RETIRED"
+    },
+    {
+        "ArchStdEvent": "OP_RETIRED"
+    },
+    {
+        "ArchStdEvent": "OP_SPEC"
+    },
+    {
+        "PublicDescription": "Operation speculatively executed - ASE Scalar",
+        "EventCode": "0xd210",
+        "EventName": "ASE_SCALAR_SPEC",
+        "BriefDescription": "Operation speculatively executed - ASE Scalar"
+    },
+    {
+        "PublicDescription": "Operation speculatively executed - ASE Vector",
+        "EventCode": "0xd211",
+        "EventName": "ASE_VECTOR_SPEC",
+        "BriefDescription": "Operation speculatively executed - ASE Vector"
+    },
+    {
+        "PublicDescription": "Barrier speculatively executed, CSDB",
+        "EventCode": "0x7f",
+        "EventName": "CSDB_SPEC",
+        "BriefDescription": "Barrier speculatively executed, CSDB"
+    },
+    {
+        "PublicDescription": "Prefetch sent to L2.",
+        "EventCode": "0xd106",
+        "EventName": "ICF_PREFETCH_DISPATCH",
+        "BriefDescription": "Prefetch sent to L2."
+    },
+    {
+        "PublicDescription": "Prefetch response received but was dropped since we don't support inflight upgrades.",
+        "EventCode": "0xd107",
+        "EventName": "ICF_PREFETCH_DROPPED_NO_UPGRADE",
+        "BriefDescription": "Prefetch response received but was dropped since we don't support inflight upgrades."
+    },
+    {
+        "PublicDescription": "Prefetch request missed TLB.",
+        "EventCode": "0xd108",
+        "EventName": "ICF_PREFETCH_DROPPED_TLB_MISS",
+        "BriefDescription": "Prefetch request missed TLB."
+    },
+    {
+        "PublicDescription": "Prefetch request dropped since duplicate was found in TLB.",
+        "EventCode": "0xd109",
+        "EventName": "ICF_PREFETCH_DROPPED_DUPLICATE",
+        "BriefDescription": "Prefetch request dropped since duplicate was found in TLB."
+    },
+    {
+        "PublicDescription": "Prefetch request dropped since it was found in cache.",
+        "EventCode": "0xd10a",
+        "EventName": "ICF_PREFETCH_DROPPED_CACHE_HIT",
+        "BriefDescription": "Prefetch request dropped since it was found in cache."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/intrinsic.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/intrinsic.json
new file mode 100644
index 000000000000..7ecffb989ae0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/intrinsic.json
@@ -0,0 +1,14 @@
+[
+    {
+        "ArchStdEvent": "LDREX_SPEC"
+    },
+    {
+        "ArchStdEvent": "STREX_PASS_SPEC"
+    },
+    {
+        "ArchStdEvent": "STREX_FAIL_SPEC"
+    },
+    {
+        "ArchStdEvent": "STREX_SPEC"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/memory.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/memory.json
new file mode 100644
index 000000000000..a211d94aacde
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/memory.json
@@ -0,0 +1,41 @@
+[
+    {
+        "ArchStdEvent": "LD_RETIRED"
+    },
+    {
+        "ArchStdEvent": "MEM_ACCESS_RD"
+    },
+    {
+        "ArchStdEvent": "MEM_ACCESS_WR"
+    },
+    {
+        "ArchStdEvent": "LD_ALIGN_LAT"
+    },
+    {
+        "ArchStdEvent": "ST_ALIGN_LAT"
+    },
+    {
+        "ArchStdEvent": "MEM_ACCESS"
+    },
+    {
+        "ArchStdEvent": "MEMORY_ERROR"
+    },
+    {
+        "ArchStdEvent": "LDST_ALIGN_LAT"
+    },
+    {
+        "ArchStdEvent": "MEM_ACCESS_CHECKED"
+    },
+    {
+        "ArchStdEvent": "MEM_ACCESS_CHECKED_RD"
+    },
+    {
+        "ArchStdEvent": "MEM_ACCESS_CHECKED_WR"
+    },
+    {
+        "PublicDescription": "Flushes due to memory hazards",
+        "EventCode": "0x121",
+        "EventName": "BPU_FLUSH_MEM_FAULT",
+        "BriefDescription": "Flushes due to memory hazards"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json
new file mode 100644
index 000000000000..c5d1d22bd034
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json
@@ -0,0 +1,442 @@
+[
+    {
+        "MetricName": "branch_miss_pred_rate",
+        "MetricExpr": "BR_MIS_PRED / BR_PRED",
+        "BriefDescription": "Branch predictor misprediction rate. May not count branches that are never resolved because they are in the misprediction shadow of an earlier branch",
+        "MetricGroup": "branch",
+        "ScaleUnit": "100%"
+    },
+    {
+        "MetricName": "bus_utilization",
+        "MetricExpr": "BUS_ACCESS / (BUS_CYCLES * 1)",
+        "BriefDescription": "Core-to-uncore bus utilization",
+        "MetricGroup": "Bus",
+        "ScaleUnit": "100percent of bus cycles"
+    },
+    {
+        "MetricName": "l1d_cache_miss_ratio",
+        "MetricExpr": "L1D_CACHE_REFILL / L1D_CACHE",
+        "BriefDescription": "This metric measures the ratio of level 1 data cache accesses missed to the total number of level 1 data cache accesses. This gives an indication of the effectiveness of the level 1 data cache.",
+        "MetricGroup": "Miss_Ratio;L1D_Cache_Effectiveness",
+        "ScaleUnit": "1per cache access"
+    },
+    {
+        "MetricName": "l1i_cache_miss_ratio",
+        "MetricExpr": "L1I_CACHE_REFILL / L1I_CACHE",
+        "BriefDescription": "This metric measures the ratio of level 1 instruction cache accesses missed to the total number of level 1 instruction cache accesses. This gives an indication of the effectiveness of the level 1 instruction cache.",
+        "MetricGroup": "Miss_Ratio;L1I_Cache_Effectiveness",
+        "ScaleUnit": "1per cache access"
+    },
+    {
+        "MetricName": "Miss_Ratio;l1d_cache_read_miss",
+        "MetricExpr": "L1D_CACHE_LMISS_RD / L1D_CACHE_RD",
+        "BriefDescription": "L1D cache read miss rate",
+        "MetricGroup": "Cache",
+        "ScaleUnit": "1per cache read access"
+    },
+    {
+        "MetricName": "l2_cache_miss_ratio",
+        "MetricExpr": "L2D_CACHE_REFILL / L2D_CACHE",
+        "BriefDescription": "This metric measures the ratio of level 2 cache accesses missed to the total number of level 2 cache accesses. This gives an indication of the effectiveness of the level 2 cache, which is a unified cache that stores both data and instruction. Note that cache accesses in this cache are either data memory access or instruction fetch as this is a unified cache.",
+        "MetricGroup": "Miss_Ratio;L2_Cache_Effectiveness",
+        "ScaleUnit": "1per cache access"
+    },
+    {
+        "MetricName": "l1i_cache_read_miss_rate",
+        "MetricExpr": "L1I_CACHE_LMISS / L1I_CACHE",
+        "BriefDescription": "L1I cache read miss rate",
+        "MetricGroup": "Cache",
+        "ScaleUnit": "1per cache access"
+    },
+    {
+        "MetricName": "l2d_cache_read_miss_rate",
+        "MetricExpr": "L2D_CACHE_LMISS_RD / L2D_CACHE_RD",
+        "BriefDescription": "L2 cache read miss rate",
+        "MetricGroup": "Cache",
+        "ScaleUnit": "1per cache read access"
+    },
+    {
+        "MetricName": "l1d_cache_miss_mpki",
+        "MetricExpr": "(L1D_CACHE_LMISS_RD * 1e3) / INST_RETIRED",
+        "BriefDescription": "Misses per thousand instructions (data)",
+        "MetricGroup": "Cache",
+        "ScaleUnit": "1MPKI"
+    },
+    {
+        "MetricName": "l1i_cache_miss_mpki",
+        "MetricExpr": "(L1I_CACHE_LMISS * 1e3) / INST_RETIRED",
+        "BriefDescription": "Misses per thousand instructions (instruction)",
+        "MetricGroup": "Cache",
+        "ScaleUnit": "1MPKI"
+    },
+    {
+        "MetricName": "simd_percentage",
+        "MetricExpr": "ASE_SPEC / INST_SPEC",
+        "BriefDescription": "This metric measures advanced SIMD operations as a percentage of total operations speculatively executed.",
+        "MetricGroup": "Operation_Mix",
+        "ScaleUnit": "100percent of operations"
+    },
+    {
+        "MetricName": "crypto_percentage",
+        "MetricExpr": "CRYPTO_SPEC / INST_SPEC",
+        "BriefDescription": "This metric measures crypto operations as a percentage of operations speculatively executed.",
+        "MetricGroup": "Operation_Mix",
+        "ScaleUnit": "100percent of operations"
+    },
+    {
+        "MetricName": "gflops",
+        "MetricExpr": "VFP_SPEC / (duration_time * 1e9)",
+        "BriefDescription": "Giga-floating point operations per second",
+        "MetricGroup": "InstructionMix"
+    },
+    {
+        "MetricName": "integer_dp_percentage",
+        "MetricExpr": "DP_SPEC / INST_SPEC",
+        "BriefDescription": "This metric measures scalar integer operations as a percentage of operations speculatively executed.",
+        "MetricGroup": "Operation_Mix",
+        "ScaleUnit": "100percent of operations"
+    },
+    {
+        "MetricName": "ipc",
+        "MetricExpr": "INST_RETIRED / CPU_CYCLES",
+        "BriefDescription": "This metric measures the number of instructions retired per cycle.",
+        "MetricGroup": "General",
+        "ScaleUnit": "1per cycle"
+    },
+    {
+        "MetricName": "load_percentage",
+        "MetricExpr": "LD_SPEC / INST_SPEC",
+        "BriefDescription": "This metric measures load operations as a percentage of operations speculatively executed.",
+        "MetricGroup": "Operation_Mix",
+        "ScaleUnit": "100percent of operations"
+    },
+    {
+        "MetricName": "load_store_spec_rate",
+        "MetricExpr": "LDST_SPEC / INST_SPEC",
+        "BriefDescription": "The rate of load or store instructions speculatively executed to overall instructions speclatively executed",
+        "MetricGroup": "Operation_Mix",
+        "ScaleUnit": "100percent of operations"
+    },
+    {
+        "MetricName": "retired_mips",
+        "MetricExpr": "INST_RETIRED / (duration_time * 1e6)",
+        "BriefDescription": "Millions of instructions per second",
+        "MetricGroup": "InstructionMix"
+    },
+    {
+        "MetricName": "spec_utilization_mips",
+        "MetricExpr": "INST_SPEC / (duration_time * 1e6)",
+        "BriefDescription": "Millions of instructions per second",
+        "MetricGroup": "PEutilization"
+    },
+    {
+        "MetricName": "pc_write_spec_rate",
+        "MetricExpr": "PC_WRITE_SPEC / INST_SPEC",
+        "BriefDescription": "The rate of software change of the PC speculatively executed to overall instructions speclatively executed",
+        "MetricGroup": "Operation_Mix",
+        "ScaleUnit": "100percent of operations"
+    },
+    {
+        "MetricName": "store_percentage",
+        "MetricExpr": "ST_SPEC / INST_SPEC",
+        "BriefDescription": "This metric measures store operations as a percentage of operations speculatively executed.",
+        "MetricGroup": "Operation_Mix",
+        "ScaleUnit": "100percent of operations"
+    },
+    {
+        "MetricName": "scalar_fp_percentage",
+        "MetricExpr": "VFP_SPEC / INST_SPEC",
+        "BriefDescription": "This metric measures scalar floating point operations as a percentage of operations speculatively executed.",
+        "MetricGroup": "Operation_Mix",
+        "ScaleUnit": "100percent of operations"
+    },
+    {
+        "MetricName": "retired_rate",
+        "MetricExpr": "OP_RETIRED / OP_SPEC",
+        "BriefDescription": "Of all the micro-operations issued, what percentage are retired(committed)",
+        "MetricGroup": "General",
+        "ScaleUnit": "100%"
+    },
+    {
+        "MetricName": "wasted",
+        "MetricExpr": "1 - (OP_RETIRED / (CPU_CYCLES * #slots))",
+        "BriefDescription": "Of all the micro-operations issued, what proportion are lost",
+        "MetricGroup": "General",
+        "ScaleUnit": "100%"
+    },
+    {
+        "MetricName": "wasted_rate",
+        "MetricExpr": "1 - OP_RETIRED / OP_SPEC",
+        "BriefDescription": "Of all the micro-operations issued, what percentage are not retired(committed)",
+        "MetricGroup": "General",
+        "ScaleUnit": "100%"
+    },
+    {
+        "MetricName": "stall_backend_cache_rate",
+        "MetricExpr": "STALL_BACKEND_CACHE / CPU_CYCLES",
+        "BriefDescription": "Proportion of cycles stalled and no operations issued to backend and cache miss",
+        "MetricGroup": "Stall",
+        "ScaleUnit": "100percent of cycles"
+    },
+    {
+        "MetricName": "stall_backend_resource_rate",
+        "MetricExpr": "STALL_BACKEND_RESOURCE / CPU_CYCLES",
+        "BriefDescription": "Proportion of cycles stalled and no operations issued to backend and resource full",
+        "MetricGroup": "Stall",
+        "ScaleUnit": "100percent of cycles"
+    },
+    {
+        "MetricName": "stall_backend_tlb_rate",
+        "MetricExpr": "STALL_BACKEND_TLB / CPU_CYCLES",
+        "BriefDescription": "Proportion of cycles stalled and no operations issued to backend and TLB miss",
+        "MetricGroup": "Stall",
+        "ScaleUnit": "100percent of cycles"
+    },
+    {
+        "MetricName": "stall_frontend_cache_rate",
+        "MetricExpr": "STALL_FRONTEND_CACHE / CPU_CYCLES",
+        "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and cache miss",
+        "MetricGroup": "Stall",
+        "ScaleUnit": "100percent of cycles"
+    },
+    {
+        "MetricName": "stall_frontend_tlb_rate",
+        "MetricExpr": "STALL_FRONTEND_TLB / CPU_CYCLES",
+        "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and TLB miss",
+        "MetricGroup": "Stall",
+        "ScaleUnit": "100percent of cycles"
+    },
+    {
+        "MetricName": "dtlb_walk_ratio",
+        "MetricExpr": "DTLB_WALK / L1D_TLB",
+        "BriefDescription": "This metric measures the ratio of data TLB Walks to the total number of data TLB accesses. This gives an indication of the effectiveness of the data TLB accesses.",
+        "MetricGroup": "Miss_Ratio;DTLB_Effectiveness",
+        "ScaleUnit": "1per TLB access"
+    },
+    {
+        "MetricName": "itlb_walk_ratio",
+        "MetricExpr": "ITLB_WALK / L1I_TLB",
+        "BriefDescription": "This metric measures the ratio of instruction TLB Walks to the total number of instruction TLB accesses. This gives an indication of the effectiveness of the instruction TLB accesses.",
+        "MetricGroup": "Miss_Ratio;ITLB_Effectiveness",
+        "ScaleUnit": "1per TLB access"
+    },
+    {
+        "ArchStdEvent": "backend_bound"
+    },
+    {
+        "ArchStdEvent": "frontend_bound",
+        "MetricExpr": "100 - (retired_fraction + slots_lost_misspeculation_fraction + backend_bound)"
+    },
+    {
+        "MetricName": "slots_lost_misspeculation_fraction",
+        "MetricExpr": "(OP_SPEC - OP_RETIRED) / (CPU_CYCLES * #slots)",
+        "BriefDescription": "Fraction of slots lost due to misspeculation",
+        "DefaultMetricgroupName": "TopdownL1",
+        "MetricGroup": "Default;TopdownL1",
+        "ScaleUnit": "100percent of slots"
+    },
+    {
+        "MetricName": "retired_fraction",
+        "MetricExpr": "OP_RETIRED / (CPU_CYCLES * #slots)",
+        "BriefDescription": "Fraction of slots retiring, useful work",
+        "DefaultMetricgroupName": "TopdownL1",
+        "MetricGroup": "Default;TopdownL1",
+        "ScaleUnit": "100percent of slots"
+    },
+    {
+        "MetricName": "backend_core",
+        "MetricExpr": "(backend_bound / 100) - backend_memory",
+        "BriefDescription": "Fraction of slots the CPU was stalled due to backend non-memory subsystem issues",
+        "MetricGroup": "TopdownL2",
+        "ScaleUnit": "100%"
+    },
+    {
+        "MetricName": "backend_memory",
+        "MetricExpr": "(STALL_BACKEND_TLB + STALL_BACKEND_CACHE) / CPU_CYCLES",
+        "BriefDescription": "Fraction of slots the CPU was stalled due to backend memory subsystem issues (cache/tlb miss)",
+        "MetricGroup": "TopdownL2",
+        "ScaleUnit": "100%"
+    },
+    {
+        "MetricName": "branch_mispredict",
+        "MetricExpr": "(BR_MIS_PRED_RETIRED / GPC_FLUSH) * slots_lost_misspeculation_fraction",
+        "BriefDescription": "Fraction of slots lost due to branch misprediciton",
+        "MetricGroup": "TopdownL2",
+        "ScaleUnit": "1percent of slots"
+    },
+    {
+        "MetricName": "frontend_bandwidth",
+        "MetricExpr": "frontend_bound - frontend_latency",
+        "BriefDescription": "Fraction of slots the CPU did not dispatch at full bandwidth - able to dispatch partial slots only (1, 2, or 3 uops)",
+        "MetricGroup": "TopdownL2",
+        "ScaleUnit": "1percent of slots"
+    },
+    {
+        "MetricName": "frontend_latency",
+        "MetricExpr": "(STALL_FRONTEND - ((STALL_SLOT_FRONTEND - ((frontend_bound / 100) * CPU_CYCLES * #slots)) / #slots)) / CPU_CYCLES",
+        "BriefDescription": "Fraction of slots the CPU was stalled due to frontend latency issues (cache/tlb miss); nothing to dispatch",
+        "MetricGroup": "TopdownL2",
+        "ScaleUnit": "100percent of slots"
+    },
+    {
+        "MetricName": "other_miss_pred",
+        "MetricExpr": "slots_lost_misspeculation_fraction - branch_mispredict",
+        "BriefDescription": "Fraction of slots lost due to other/non-branch misprediction misspeculation",
+        "MetricGroup": "TopdownL2",
+        "ScaleUnit": "1percent of slots"
+    },
+    {
+        "MetricName": "pipe_utilization",
+        "MetricExpr": "100 * ((IXU_NUM_UOPS_ISSUED + FSU_ISSUED) / (CPU_CYCLES * 6))",
+        "BriefDescription": "Fraction of execute slots utilized",
+        "MetricGroup": "TopdownL2",
+        "ScaleUnit": "1percent of slots"
+    },
+    {
+        "MetricName": "d_cache_l2_miss_rate",
+        "MetricExpr": "STALL_BACKEND_MEM / CPU_CYCLES",
+        "BriefDescription": "Fraction of cycles the CPU was stalled due to data L2 cache miss",
+        "MetricGroup": "TopdownL3",
+        "ScaleUnit": "100percent of cycles"
+    },
+    {
+        "MetricName": "d_cache_miss_rate",
+        "MetricExpr": "STALL_BACKEND_CACHE / CPU_CYCLES",
+        "BriefDescription": "Fraction of cycles the CPU was stalled due to data cache miss",
+        "MetricGroup": "TopdownL3",
+        "ScaleUnit": "100percent of cycles"
+    },
+    {
+        "MetricName": "d_tlb_miss_rate",
+        "MetricExpr": "STALL_BACKEND_TLB / CPU_CYCLES",
+        "BriefDescription": "Fraction of cycles the CPU was stalled due to data TLB miss",
+        "MetricGroup": "TopdownL3",
+        "ScaleUnit": "100percent of cycles"
+    },
+    {
+        "MetricName": "fsu_pipe_utilization",
+        "MetricExpr": "FSU_ISSUED / (CPU_CYCLES * 2)",
+        "BriefDescription": "Fraction of FSU execute slots utilized",
+        "MetricGroup": "TopdownL3",
+        "ScaleUnit": "100percent of slots"
+    },
+    {
+        "MetricName": "i_cache_miss_rate",
+        "MetricExpr": "STALL_FRONTEND_CACHE / CPU_CYCLES",
+        "BriefDescription": "Fraction of cycles the CPU was stalled due to instruction cache miss",
+        "MetricGroup": "TopdownL3",
+        "ScaleUnit": "100percent of slots"
+    },
+    {
+        "MetricName": "i_tlb_miss_rate",
+        "MetricExpr": "STALL_FRONTEND_TLB / CPU_CYCLES",
+        "BriefDescription": "Fraction of cycles the CPU was stalled due to instruction TLB miss",
+        "MetricGroup": "TopdownL3",
+        "ScaleUnit": "100percent of slots"
+    },
+    {
+        "MetricName": "ixu_pipe_utilization",
+        "MetricExpr": "IXU_NUM_UOPS_ISSUED / (CPU_CYCLES * #slots)",
+        "BriefDescription": "Fraction of IXU execute slots utilized",
+        "MetricGroup": "TopdownL3",
+        "ScaleUnit": "100percent of slots"
+    },
+    {
+        "MetricName": "stall_recovery_rate",
+        "MetricExpr": "IDR_STALL_FLUSH / CPU_CYCLES",
+        "BriefDescription": "Fraction of cycles the CPU was stalled due to flush recovery",
+        "MetricGroup": "TopdownL3",
+        "ScaleUnit": "100percent of slots"
+    },
+    {
+        "MetricName": "stall_fsu_sched_rate",
+        "MetricExpr": "IDR_STALL_FSU_SCHED / CPU_CYCLES",
+        "BriefDescription": "Fraction of cycles the CPU was stalled and FSU was full",
+        "MetricGroup": "TopdownL4",
+        "ScaleUnit": "100percent of cycles"
+    },
+    {
+        "MetricName": "stall_ixu_sched_rate",
+        "MetricExpr": "IDR_STALL_IXU_SCHED / CPU_CYCLES",
+        "BriefDescription": "Fraction of cycles the CPU was stalled and IXU was full",
+        "MetricGroup": "TopdownL4",
+        "ScaleUnit": "100percent of cycles"
+    },
+    {
+        "MetricName": "stall_lob_id_rate",
+        "MetricExpr": "IDR_STALL_LOB_ID / CPU_CYCLES",
+        "BriefDescription": "Fraction of cycles the CPU was stalled and LOB was full",
+        "MetricGroup": "TopdownL4",
+        "ScaleUnit": "100percent of cycles"
+    },
+    {
+        "MetricName": "stall_rob_id_rate",
+        "MetricExpr": "IDR_STALL_ROB_ID / CPU_CYCLES",
+        "BriefDescription": "Fraction of cycles the CPU was stalled and ROB was full",
+        "MetricGroup": "TopdownL4",
+        "ScaleUnit": "100percent of cycles"
+    },
+    {
+        "MetricName": "stall_sob_id_rate",
+        "MetricExpr": "IDR_STALL_SOB_ID / CPU_CYCLES",
+        "BriefDescription": "Fraction of cycles the CPU was stalled and SOB was full",
+        "MetricGroup": "TopdownL4",
+        "ScaleUnit": "100percent of cycles"
+    },
+    {
+        "MetricName": "l1d_cache_access_demand",
+        "MetricExpr": "L1D_CACHE_RW / L1D_CACHE",
+        "BriefDescription": "L1D cache access - demand",
+        "MetricGroup": "Cache",
+        "ScaleUnit": "100percent of cache acceses"
+    },
+    {
+        "MetricName": "l1d_cache_access_prefetces",
+        "MetricExpr": "L1D_CACHE_PRFM / L1D_CACHE",
+        "BriefDescription": "L1D cache access - prefetch",
+        "MetricGroup": "Cache",
+        "ScaleUnit": "100percent of cache acceses"
+    },
+    {
+        "MetricName": "l1d_cache_demand_misses",
+        "MetricExpr": "L1D_CACHE_REFILL_RW / L1D_CACHE",
+        "BriefDescription": "L1D cache demand misses",
+        "MetricGroup": "Cache",
+        "ScaleUnit": "100percent of cache acceses"
+    },
+    {
+        "MetricName": "l1d_cache_demand_misses_read",
+        "MetricExpr": "L1D_CACHE_REFILL_RD / L1D_CACHE",
+        "BriefDescription": "L1D cache demand misses - read",
+        "MetricGroup": "Cache",
+        "ScaleUnit": "100percent of cache acceses"
+    },
+    {
+        "MetricName": "l1d_cache_demand_misses_write",
+        "MetricExpr": "L1D_CACHE_REFILL_WR / L1D_CACHE",
+        "BriefDescription": "L1D cache demand misses - write",
+        "MetricGroup": "Cache",
+        "ScaleUnit": "100percent of cache acceses"
+    },
+    {
+        "MetricName": "l1d_cache_prefetch_misses",
+        "MetricExpr": "L1D_CACHE_REFILL_PRFM / L1D_CACHE",
+        "BriefDescription": "L1D cache prefetch misses",
+        "MetricGroup": "Cache",
+        "ScaleUnit": "100percent of cache acceses"
+    },
+    {
+        "MetricName": "ase_scalar_mix",
+        "MetricExpr": "ASE_SCALAR_SPEC / OP_SPEC",
+        "BriefDescription": "Proportion of advanced SIMD data processing operations (excluding DP_SPEC/LD_SPEC) scalar operations",
+        "MetricGroup": "Instructions",
+        "ScaleUnit": "100percent of cache acceses"
+    },
+    {
+        "MetricName": "ase_vector_mix",
+        "MetricExpr": "ASE_VECTOR_SPEC / OP_SPEC",
+        "BriefDescription": "Proportion of advanced SIMD data processing operations (excluding DP_SPEC/LD_SPEC) vector operations",
+        "MetricGroup": "Instructions",
+        "ScaleUnit": "100percent of cache acceses"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/mmu.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/mmu.json
new file mode 100644
index 000000000000..66d83b680651
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/mmu.json
@@ -0,0 +1,170 @@
+[
+    {
+        "PublicDescription": "Level 2 data translation buffer allocation",
+        "EventCode": "0xD800",
+        "EventName": "MMU_D_OTB_ALLOC",
+        "BriefDescription": "Level 2 data translation buffer allocation"
+    },
+    {
+        "PublicDescription": "Data TLB translation cache hit on S1L2 walk cache entry",
+        "EventCode": "0xd801",
+        "EventName": "MMU_D_TRANS_CACHE_HIT_S1L2_WALK",
+        "BriefDescription": "Data TLB translation cache hit on S1L2 walk cache entry"
+    },
+    {
+        "PublicDescription": "Data TLB translation cache hit on S1L1 walk cache entry",
+        "EventCode": "0xd802",
+        "EventName": "MMU_D_TRANS_CACHE_HIT_S1L1_WALK",
+        "BriefDescription": "Data TLB translation cache hit on S1L1 walk cache entry"
+    },
+    {
+        "PublicDescription": "Data TLB translation cache hit on S1L0 walk cache entry",
+        "EventCode": "0xd803",
+        "EventName": "MMU_D_TRANS_CACHE_HIT_S1L0_WALK",
+        "BriefDescription": "Data TLB translation cache hit on S1L0 walk cache entry"
+    },
+    {
+        "PublicDescription": "Data TLB translation cache hit on S2L2 walk cache entry",
+        "EventCode": "0xd804",
+        "EventName": "MMU_D_TRANS_CACHE_HIT_S2L2_WALK",
+        "BriefDescription": "Data TLB translation cache hit on S2L2 walk cache entry"
+    },
+    {
+        "PublicDescrition": "Data TLB translation cache hit on S2L1 walk cache entry",
+        "EventCode": "0xd805",
+        "EventName": "MMU_D_TRANS_CACHE_HIT_S2L1_WALK",
+        "BriefDescription": "Data TLB translation cache hit on S2L1 walk cache entry"
+    },
+    {
+        "PublicDescrition": "Data TLB translation cache hit on S2L0 walk cache entry",
+        "EventCode": "0xd806",
+        "EventName": "MMU_D_TRANS_CACHE_HIT_S2L0_WALK",
+        "BriefDescription": "Data TLB translation cache hit on S2L0 walk cache entry"
+    },
+    {
+        "PublicDescrition": "Data-side S1 page walk cache lookup",
+        "EventCode": "0xd807",
+        "EventName": "MMU_D_S1_WALK_CACHE_LOOKUP",
+        "BriefDescription": "Data-side S1 page walk cache lookup"
+    },
+    {
+        "PublicDescrition": "Data-side S1 page walk cache refill",
+        "EventCode": "0xd808",
+        "EventName": "MMU_D_S1_WALK_CACHE_REFILL",
+        "BriefDescription": "Data-side S1 page walk cache refill"
+    },
+    {
+        "PublicDescrition": "Data-side S2 page walk cache lookup",
+        "EventCode": "0xd809",
+        "EventName": "MMU_D_S2_WALK_CACHE_LOOKUP",
+        "BriefDescription": "Data-side S2 page walk cache lookup"
+    },
+    {
+        "PublicDescrition": "Data-side S2 page walk cache refill",
+        "EventCode": "0xd80a",
+        "EventName": "MMU_D_S2_WALK_CACHE_REFILL",
+        "BriefDescription": "Data-side S2 page walk cache refill"
+    },
+    {
+        "PublicDescription": "Data-side S1 table walk fault",
+        "EventCode": "0xD80B",
+        "EventName": "MMU_D_S1_WALK_FAULT",
+        "BriefDescription": "Data-side S1 table walk fault"
+    },
+    {
+        "PublicDescription": "Data-side S2 table walk fault",
+        "EventCode": "0xD80C",
+        "EventName": "MMU_D_S2_WALK_FAULT",
+        "BriefDescription": "Data-side S2 table walk fault"
+    },
+    {
+        "PublicDescription": "Data-side table walk steps or descriptor fetches",
+        "EventCode": "0xD80D",
+        "EventName": "MMU_D_WALK_STEPS",
+        "BriefDescription": "Data-side table walk steps or descriptor fetches"
+    },
+    {
+        "PublicDescription": "Level 2 instruction translation buffer allocation",
+        "EventCode": "0xD900",
+        "EventName": "MMU_I_OTB_ALLOC",
+        "BriefDescription": "Level 2 instruction translation buffer allocation"
+    },
+    {
+        "PublicDescrition": "Instruction TLB translation cache hit on S1L2 walk cache entry",
+        "EventCode": "0xd901",
+        "EventName": "MMU_I_TRANS_CACHE_HIT_S1L2_WALK",
+        "BriefDescription": "Instruction TLB translation cache hit on S1L2 walk cache entry"
+    },
+    {
+        "PublicDescrition": "Instruction TLB translation cache hit on S1L1 walk cache entry",
+        "EventCode": "0xd902",
+        "EventName": "MMU_I_TRANS_CACHE_HIT_S1L1_WALK",
+        "BriefDescription": "Instruction TLB translation cache hit on S1L1 walk cache entry"
+    },
+    {
+        "PublicDescrition": "Instruction TLB translation cache hit on S1L0 walk cache entry",
+        "EventCode": "0xd903",
+        "EventName": "MMU_I_TRANS_CACHE_HIT_S1L0_WALK",
+        "BriefDescription": "Instruction TLB translation cache hit on S1L0 walk cache entry"
+    },
+    {
+        "PublicDescrition": "Instruction TLB translation cache hit on S2L2 walk cache entry",
+        "EventCode": "0xd904",
+        "EventName": "MMU_I_TRANS_CACHE_HIT_S2L2_WALK",
+        "BriefDescription": "Instruction TLB translation cache hit on S2L2 walk cache entry"
+    },
+    {
+        "PublicDescrition": "Instruction TLB translation cache hit on S2L1 walk cache entry",
+        "EventCode": "0xd905",
+        "EventName": "MMU_I_TRANS_CACHE_HIT_S2L1_WALK",
+        "BriefDescription": "Instruction TLB translation cache hit on S2L1 walk cache entry"
+    },
+    {
+        "PublicDescrition": "Instruction TLB translation cache hit on S2L0 walk cache entry",
+        "EventCode": "0xd906",
+        "EventName": "MMU_I_TRANS_CACHE_HIT_S2L0_WALK",
+        "BriefDescription": "Instruction TLB translation cache hit on S2L0 walk cache entry"
+    },
+    {
+        "PublicDescrition": "Instruction-side S1 page walk cache lookup",
+        "EventCode": "0xd907",
+        "EventName": "MMU_I_S1_WALK_CACHE_LOOKUP",
+        "BriefDescription": "Instruction-side S1 page walk cache lookup"
+    },
+    {
+        "PublicDescrition": "Instruction-side S1 page walk cache refill",
+        "EventCode": "0xd908",
+        "EventName": "MMU_I_S1_WALK_CACHE_REFILL",
+        "BriefDescription": "Instruction-side S1 page walk cache refill"
+    },
+    {
+        "PublicDescrition": "Instruction-side S2 page walk cache lookup",
+        "EventCode": "0xd909",
+        "EventName": "MMU_I_S2_WALK_CACHE_LOOKUP",
+        "BriefDescription": "Instruction-side S2 page walk cache lookup"
+    },
+    {
+        "PublicDescrition": "Instruction-side S2 page walk cache refill",
+        "EventCode": "0xd90a",
+        "EventName": "MMU_I_S2_WALK_CACHE_REFILL",
+        "BriefDescription": "Instruction-side S2 page walk cache refill"
+    },
+    {
+        "PublicDescription": "Instruction-side S1 table walk fault",
+        "EventCode": "0xD90B",
+        "EventName": "MMU_I_S1_WALK_FAULT",
+        "BriefDescription": "Instruction-side S1 table walk fault"
+    },
+    {
+        "PublicDescription": "Instruction-side S2 table walk fault",
+        "EventCode": "0xD90C",
+        "EventName": "MMU_I_S2_WALK_FAULT",
+        "BriefDescription": "Instruction-side S2 table walk fault"
+    },
+    {
+        "PublicDescription": "Instruction-side table walk steps or descriptor fetches",
+        "EventCode": "0xD90D",
+        "EventName": "MMU_I_WALK_STEPS",
+        "BriefDescription": "Instruction-side table walk steps or descriptor fetches"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/pipeline.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/pipeline.json
new file mode 100644
index 000000000000..2fb2d1f183fc
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/pipeline.json
@@ -0,0 +1,41 @@
+[
+    {
+        "ArchStdEvent": "STALL_FRONTEND",
+        "Errata": "Errata AC03_CPU_29",
+        "BriefDescription": "Impacted by errata, use metrics instead -"
+    },
+    {
+        "ArchStdEvent": "STALL_BACKEND"
+    },
+    {
+        "ArchStdEvent": "STALL",
+        "Errata": "Errata AC03_CPU_29",
+        "BriefDescription": "Impacted by errata, use metrics instead -"
+    },
+    {
+        "ArchStdEvent": "STALL_SLOT_BACKEND"
+    },
+    {
+        "ArchStdEvent": "STALL_SLOT_FRONTEND",
+        "Errata": "Errata AC03_CPU_29",
+        "BriefDescription": "Impacted by errata, use metrics instead -"
+    },
+    {
+        "ArchStdEvent": "STALL_SLOT"
+    },
+    {
+        "ArchStdEvent": "STALL_BACKEND_MEM"
+    },
+    {
+        "PublicDescription": "Frontend stall cycles, TLB",
+        "EventCode": "0x815c",
+        "EventName": "STALL_FRONTEND_TLB",
+        "BriefDescription": "Frontend stall cycles, TLB"
+    },
+    {
+        "PublicDescription": "Backend stall cycles, TLB",
+        "EventCode": "0x8167",
+        "EventName": "STALL_BACKEND_TLB",
+        "BriefDescription": "Backend stall cycles, TLB"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/spe.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/spe.json
new file mode 100644
index 000000000000..20f2165c85fe
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/spe.json
@@ -0,0 +1,14 @@
+[
+    {
+        "ArchStdEvent": "SAMPLE_POP"
+    },
+    {
+        "ArchStdEvent": "SAMPLE_FEED"
+    },
+    {
+        "ArchStdEvent": "SAMPLE_FILTRATE"
+    },
+    {
+        "ArchStdEvent": "SAMPLE_COLLISION"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cmn/sys/cmn.json b/tools/perf/pmu-events/arch/arm64/arm/cmn/sys/cmn.json
index 428605c37d10..5ec157c39f0d 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/cmn/sys/cmn.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/cmn/sys/cmn.json
@@ -107,7 +107,7 @@
 		"EventName": "hnf_qos_hh_retry",
 		"EventidCode": "0xe",
 		"NodeType": "0x5",
-		"BriefDescription": "Counts number of times a HighHigh priority request is protocolretried at the HN‑F.",
+		"BriefDescription": "Counts number of times a HighHigh priority request is protocolretried at the HN-F.",
 		"Unit": "arm_cmn",
 		"Compat": "(434|436|43c|43a).*"
 	},
diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv
index 5b58db5032c1..f4d1ca4d1493 100644
--- a/tools/perf/pmu-events/arch/arm64/mapfile.csv
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -42,3 +42,4 @@
 0x00000000480fd010,v1,hisilicon/hip08,core
 0x00000000500f0000,v1,ampere/emag,core
 0x00000000c00fac30,v1,ampere/ampereone,core
+0x00000000c00fac40,v1,ampere/ampereonex,core
diff --git a/tools/perf/pmu-events/arch/powerpc/mapfile.csv b/tools/perf/pmu-events/arch/powerpc/mapfile.csv
index f4908af7ad66..599a588dbeb4 100644
--- a/tools/perf/pmu-events/arch/powerpc/mapfile.csv
+++ b/tools/perf/pmu-events/arch/powerpc/mapfile.csv
@@ -11,8 +11,7 @@
 #
 # Multiple PVRs could map to a single JSON file.
 #
-
-# Power8 entries
 0x004[bcd][[:xdigit:]]{4},1,power8,core
+0x0066[[:xdigit:]]{4},1,power8,core
 0x004e[[:xdigit:]]{4},1,power9,core
 0x0080[[:xdigit:]]{4},1,power10,core
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/datasource.json b/tools/perf/pmu-events/arch/powerpc/power10/datasource.json
index 6b0356f2d301..0eeaaf1a95b8 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/datasource.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/datasource.json
@@ -100,6 +100,11 @@
     "BriefDescription": "The processor's instruction cache was reloaded from a source beyond the local core's L2 due to a demand miss."
   },
   {
+    "EventCode": "0x0003C0000000C040",
+    "EventName": "PM_DATA_FROM_L2MISS_DSRC",
+    "BriefDescription": "The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss."
+  },
+  {
     "EventCode": "0x000380000010C040",
     "EventName": "PM_INST_FROM_L2MISS_ALL",
     "BriefDescription": "The processor's instruction cache was reloaded from a source beyond the local core's L2 due to a demand miss or prefetch reload."
@@ -161,10 +166,15 @@
   },
   {
     "EventCode": "0x000780000000C040",
-    "EventName": "PM_INST_FROM_L3MISS",
+    "EventName": "PM_INST_FROM_L3MISS_DSRC",
     "BriefDescription": "The processor's instruction cache was reloaded from beyond the local core's L3 due to a demand miss."
   },
   {
+    "EventCode": "0x0007C0000000C040",
+    "EventName": "PM_DATA_FROM_L3MISS_DSRC",
+    "BriefDescription": "The processor's L1 data cache was reloaded from beyond the local core's L3 due to a demand miss."
+  },
+  {
     "EventCode": "0x000780000010C040",
     "EventName": "PM_INST_FROM_L3MISS_ALL",
     "BriefDescription": "The processor's instruction cache was reloaded from beyond the local core's L3 due to a demand miss or prefetch reload."
@@ -981,7 +991,7 @@
   },
   {
     "EventCode": "0x0003C0000000C142",
-    "EventName": "PM_MRK_DATA_FROM_L2MISS",
+    "EventName": "PM_MRK_DATA_FROM_L2MISS_DSRC",
     "BriefDescription": "The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss for a marked instruction."
   },
   {
@@ -1046,12 +1056,12 @@
   },
   {
     "EventCode": "0x000780000000C142",
-    "EventName": "PM_MRK_INST_FROM_L3MISS",
+    "EventName": "PM_MRK_INST_FROM_L3MISS_DSRC",
     "BriefDescription": "The processor's instruction cache was reloaded from beyond the local core's L3 due to a demand miss for a marked instruction."
   },
   {
     "EventCode": "0x0007C0000000C142",
-    "EventName": "PM_MRK_DATA_FROM_L3MISS",
+    "EventName": "PM_MRK_DATA_FROM_L3MISS_DSRC",
     "BriefDescription": "The processor's L1 data cache was reloaded from beyond the local core's L3 due to a demand miss for a marked instruction."
   },
   {
diff --git a/tools/perf/pmu-events/arch/riscv/mapfile.csv b/tools/perf/pmu-events/arch/riscv/mapfile.csv
index c61b3d6ef616..cfc449b19810 100644
--- a/tools/perf/pmu-events/arch/riscv/mapfile.csv
+++ b/tools/perf/pmu-events/arch/riscv/mapfile.csv
@@ -15,3 +15,5 @@
 #
 #MVENDORID-MARCHID-MIMPID,Version,Filename,EventType
 0x489-0x8000000000000007-0x[[:xdigit:]]+,v1,sifive/u74,core
+0x5b7-0x0-0x0,v1,thead/c900-legacy,core
+0x67e-0x80000000db0000[89]0-0x[[:xdigit:]]+,v1,starfive/dubhe-80,core
diff --git a/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/common.json b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/common.json
new file mode 100644
index 000000000000..fbffcacb2ace
--- /dev/null
+++ b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/common.json
@@ -0,0 +1,172 @@
+[
+  {
+    "EventName": "ACCESS_MMU_STLB",
+    "EventCode": "0x1",
+    "BriefDescription": "access MMU STLB"
+  },
+  {
+    "EventName": "MISS_MMU_STLB",
+    "EventCode": "0x2",
+    "BriefDescription": "miss MMU STLB"
+  },
+  {
+    "EventName": "ACCESS_MMU_PTE_C",
+    "EventCode": "0x3",
+    "BriefDescription": "access MMU PTE-Cache"
+  },
+  {
+    "EventName": "MISS_MMU_PTE_C",
+    "EventCode": "0x4",
+    "BriefDescription": "miss MMU PTE-Cache"
+  },
+  {
+    "EventName": "ROB_FLUSH",
+    "EventCode": "0x5",
+    "BriefDescription": "ROB flush (all kinds of exceptions)"
+  },
+  {
+    "EventName": "BTB_PREDICTION_MISS",
+    "EventCode": "0x6",
+    "BriefDescription": "BTB prediction miss"
+  },
+  {
+    "EventName": "ITLB_MISS",
+    "EventCode": "0x7",
+    "BriefDescription": "ITLB miss"
+  },
+  {
+    "EventName": "SYNC_DEL_FETCH_G",
+    "EventCode": "0x8",
+    "BriefDescription": "SYNC delivery a fetch-group"
+  },
+  {
+    "EventName": "ICACHE_MISS",
+    "EventCode": "0x9",
+    "BriefDescription": "ICache miss"
+  },
+  {
+    "EventName": "BPU_BR_RETIRE",
+    "EventCode": "0xA",
+    "BriefDescription": "condition branch instruction retire"
+  },
+  {
+    "EventName": "BPU_BR_MISS",
+    "EventCode": "0xB",
+    "BriefDescription": "condition branch instruction miss"
+  },
+  {
+    "EventName": "RET_INS_RETIRE",
+    "EventCode": "0xC",
+    "BriefDescription": "return instruction retire"
+  },
+  {
+    "EventName": "RET_INS_MISS",
+    "EventCode": "0xD",
+    "BriefDescription": "return instruction miss"
+  },
+  {
+    "EventName": "INDIRECT_JR_MISS",
+    "EventCode": "0xE",
+    "BriefDescription": "indirect JR instruction miss (inlcude without target)"
+  },
+  {
+    "EventName": "IBUF_VAL_ID_NORDY",
+    "EventCode": "0xF",
+    "BriefDescription": "IBUF valid while ID not ready"
+  },
+  {
+    "EventName": "IBUF_NOVAL_ID_RDY",
+    "EventCode": "0x10",
+    "BriefDescription": "IBUF not valid while ID ready"
+  },
+  {
+    "EventName": "REN_INT_PHY_REG_NORDY",
+    "EventCode": "0x11",
+    "BriefDescription": "REN integer physical register file is not ready"
+  },
+  {
+    "EventName": "REN_FP_PHY_REG_NORDY",
+    "EventCode": "0x12",
+    "BriefDescription": "REN floating point physical register file is not ready"
+  },
+  {
+    "EventName": "REN_CP_NORDY",
+    "EventCode": "0x13",
+    "BriefDescription": "REN checkpoint is not ready"
+  },
+  {
+    "EventName": "DEC_VAL_ROB_NORDY",
+    "EventCode": "0x14",
+    "BriefDescription": "DEC is valid and ROB is not ready"
+  },
+  {
+    "EventName": "OOD_FLUSH_LS_DEP",
+    "EventCode": "0x15",
+    "BriefDescription": "out of order flush due to load/store dependency"
+  },
+  {
+    "EventName": "BRU_RET_IJR_INS",
+    "EventCode": "0x16",
+    "BriefDescription": "BRU retire an IJR instruction"
+  },
+  {
+    "EventName": "ACCESS_DTLB",
+    "EventCode": "0x17",
+    "BriefDescription": "access DTLB"
+  },
+  {
+    "EventName": "MISS_DTLB",
+    "EventCode": "0x18",
+    "BriefDescription": "miss DTLB"
+  },
+  {
+    "EventName": "LOAD_INS_DCACHE",
+    "EventCode": "0x19",
+    "BriefDescription": "load instruction access DCache"
+  },
+  {
+    "EventName": "LOAD_INS_MISS_DCACHE",
+    "EventCode": "0x1A",
+    "BriefDescription": "load instruction miss DCache"
+  },
+  {
+    "EventName": "STORE_INS_DCACHE",
+    "EventCode": "0x1B",
+    "BriefDescription": "store/amo instruction access DCache"
+  },
+  {
+    "EventName": "STORE_INS_MISS_DCACHE",
+    "EventCode": "0x1C",
+    "BriefDescription": "store/amo instruction miss DCache"
+  },
+  {
+    "EventName": "LOAD_SCACHE",
+    "EventCode": "0x1D",
+    "BriefDescription": "load access SCache"
+  },
+  {
+    "EventName": "STORE_SCACHE",
+    "EventCode": "0x1E",
+    "BriefDescription": "store access SCache"
+  },
+  {
+    "EventName": "LOAD_MISS_SCACHE",
+    "EventCode": "0x1F",
+    "BriefDescription": "load miss SCache"
+  },
+  {
+    "EventName": "STORE_MISS_SCACHE",
+    "EventCode": "0x20",
+    "BriefDescription": "store miss SCache"
+  },
+  {
+    "EventName": "L2C_PF_REQ",
+    "EventCode": "0x21",
+    "BriefDescription": "L2C data-prefetcher request"
+  },
+  {
+    "EventName": "L2C_PF_HIT",
+    "EventCode": "0x22",
+    "BriefDescription": "L2C data-prefetcher hit"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json
new file mode 100644
index 000000000000..9b4a032186a7
--- /dev/null
+++ b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json
@@ -0,0 +1,68 @@
+[
+  {
+    "ArchStdEvent": "FW_MISALIGNED_LOAD"
+  },
+  {
+    "ArchStdEvent": "FW_MISALIGNED_STORE"
+  },
+  {
+    "ArchStdEvent": "FW_ACCESS_LOAD"
+  },
+  {
+    "ArchStdEvent": "FW_ACCESS_STORE"
+  },
+  {
+    "ArchStdEvent": "FW_ILLEGAL_INSN"
+  },
+  {
+    "ArchStdEvent": "FW_SET_TIMER"
+  },
+  {
+    "ArchStdEvent": "FW_IPI_SENT"
+  },
+  {
+    "ArchStdEvent": "FW_IPI_RECEIVED"
+  },
+  {
+    "ArchStdEvent": "FW_FENCE_I_SENT"
+  },
+  {
+    "ArchStdEvent": "FW_FENCE_I_RECEIVED"
+  },
+  {
+    "ArchStdEvent": "FW_SFENCE_VMA_SENT"
+  },
+  {
+    "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED"
+  },
+  {
+    "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED"
+  },
+  {
+    "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED"
+  },
+  {
+    "ArchStdEvent": "FW_HFENCE_GVMA_SENT"
+  },
+  {
+    "ArchStdEvent": "FW_HFENCE_GVMA_RECEIVED"
+  },
+  {
+    "ArchStdEvent": "FW_HFENCE_GVMA_VMID_SENT"
+  },
+  {
+    "ArchStdEvent": "FW_HFENCE_GVMA_VMID_RECEIVED"
+  },
+  {
+    "ArchStdEvent": "FW_HFENCE_VVMA_SENT"
+  },
+  {
+    "ArchStdEvent": "FW_HFENCE_VVMA_RECEIVED"
+  },
+  {
+    "ArchStdEvent": "FW_HFENCE_VVMA_ASID_SENT"
+  },
+  {
+    "ArchStdEvent": "FW_HFENCE_VVMA_ASID_RECEIVED"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/cache.json b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/cache.json
new file mode 100644
index 000000000000..2b142348d635
--- /dev/null
+++ b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/cache.json
@@ -0,0 +1,67 @@
+[
+  {
+    "EventName": "L1_ICACHE_ACCESS",
+    "EventCode": "0x00000001",
+    "BriefDescription": "L1 instruction cache access"
+  },
+  {
+    "EventName": "L1_ICACHE_MISS",
+    "EventCode": "0x00000002",
+    "BriefDescription": "L1 instruction cache miss"
+  },
+  {
+    "EventName": "ITLB_MISS",
+    "EventCode": "0x00000003",
+    "BriefDescription": "I-UTLB miss"
+  },
+  {
+    "EventName": "DTLB_MISS",
+    "EventCode": "0x00000004",
+    "BriefDescription": "D-UTLB miss"
+  },
+  {
+    "EventName": "JTLB_MISS",
+    "EventCode": "0x00000005",
+    "BriefDescription": "JTLB miss"
+  },
+  {
+    "EventName": "L1_DCACHE_READ_ACCESS",
+    "EventCode": "0x0000000c",
+    "BriefDescription": "L1 data cache read access"
+  },
+  {
+    "EventName": "L1_DCACHE_READ_MISS",
+    "EventCode": "0x0000000d",
+    "BriefDescription": "L1 data cache read miss"
+  },
+  {
+    "EventName": "L1_DCACHE_WRITE_ACCESS",
+    "EventCode": "0x0000000e",
+    "BriefDescription": "L1 data cache write access"
+  },
+  {
+    "EventName": "L1_DCACHE_WRITE_MISS",
+    "EventCode": "0x0000000f",
+    "BriefDescription": "L1 data cache write miss"
+  },
+  {
+    "EventName": "LL_CACHE_READ_ACCESS",
+    "EventCode": "0x00000010",
+    "BriefDescription": "LL Cache read access"
+  },
+  {
+    "EventName": "LL_CACHE_READ_MISS",
+    "EventCode": "0x00000011",
+    "BriefDescription": "LL Cache read miss"
+  },
+  {
+    "EventName": "LL_CACHE_WRITE_ACCESS",
+    "EventCode": "0x00000012",
+    "BriefDescription": "LL Cache write access"
+  },
+  {
+    "EventName": "LL_CACHE_WRITE_MISS",
+    "EventCode": "0x00000013",
+    "BriefDescription": "LL Cache write miss"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json
new file mode 100644
index 000000000000..9b4a032186a7
--- /dev/null
+++ b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json
@@ -0,0 +1,68 @@
+[
+  {
+    "ArchStdEvent": "FW_MISALIGNED_LOAD"
+  },
+  {
+    "ArchStdEvent": "FW_MISALIGNED_STORE"
+  },
+  {
+    "ArchStdEvent": "FW_ACCESS_LOAD"
+  },
+  {
+    "ArchStdEvent": "FW_ACCESS_STORE"
+  },
+  {
+    "ArchStdEvent": "FW_ILLEGAL_INSN"
+  },
+  {
+    "ArchStdEvent": "FW_SET_TIMER"
+  },
+  {
+    "ArchStdEvent": "FW_IPI_SENT"
+  },
+  {
+    "ArchStdEvent": "FW_IPI_RECEIVED"
+  },
+  {
+    "ArchStdEvent": "FW_FENCE_I_SENT"
+  },
+  {
+    "ArchStdEvent": "FW_FENCE_I_RECEIVED"
+  },
+  {
+    "ArchStdEvent": "FW_SFENCE_VMA_SENT"
+  },
+  {
+    "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED"
+  },
+  {
+    "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED"
+  },
+  {
+    "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED"
+  },
+  {
+    "ArchStdEvent": "FW_HFENCE_GVMA_SENT"
+  },
+  {
+    "ArchStdEvent": "FW_HFENCE_GVMA_RECEIVED"
+  },
+  {
+    "ArchStdEvent": "FW_HFENCE_GVMA_VMID_SENT"
+  },
+  {
+    "ArchStdEvent": "FW_HFENCE_GVMA_VMID_RECEIVED"
+  },
+  {
+    "ArchStdEvent": "FW_HFENCE_VVMA_SENT"
+  },
+  {
+    "ArchStdEvent": "FW_HFENCE_VVMA_RECEIVED"
+  },
+  {
+    "ArchStdEvent": "FW_HFENCE_VVMA_ASID_SENT"
+  },
+  {
+    "ArchStdEvent": "FW_HFENCE_VVMA_ASID_RECEIVED"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/instruction.json b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/instruction.json
new file mode 100644
index 000000000000..c822b5373333
--- /dev/null
+++ b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/instruction.json
@@ -0,0 +1,72 @@
+[
+  {
+    "EventName": "INST_BRANCH_MISPREDICT",
+    "EventCode": "0x00000006",
+    "BriefDescription": "Mispredicted branch instructions"
+  },
+  {
+    "EventName": "INST_BRANCH",
+    "EventCode": "0x00000007",
+    "BriefDescription": "Retired branch instructions"
+  },
+  {
+    "EventName": "INST_JMP_MISPREDICT",
+    "EventCode": "0x00000008",
+    "BriefDescription": "Indirect branch mispredict"
+  },
+  {
+    "EventName": "INST_JMP",
+    "EventCode": "0x00000009",
+    "BriefDescription": "Retired jmp instructions"
+  },
+  {
+    "EventName": "INST_STORE",
+    "EventCode": "0x0000000b",
+    "BriefDescription": "Retired store instructions"
+  },
+  {
+    "EventName": "INST_ALU",
+    "EventCode": "0x0000001d",
+    "BriefDescription": "Retired ALU instructions"
+  },
+  {
+    "EventName": "INST_LDST",
+    "EventCode": "0x0000001e",
+    "BriefDescription": "Retired Load/Store instructions"
+  },
+  {
+    "EventName": "INST_VECTOR",
+    "EventCode": "0x0000001f",
+    "BriefDescription": "Retired Vector instructions"
+  },
+  {
+    "EventName": "INST_CSR",
+    "EventCode": "0x00000020",
+    "BriefDescription": "Retired CSR instructions"
+  },
+  {
+    "EventName": "INST_SYNC",
+    "EventCode": "0x00000021",
+    "BriefDescription": "Retired sync instructions (AMO/LR/SC instructions)"
+  },
+  {
+    "EventName": "INST_UNALIGNED_ACCESS",
+    "EventCode": "0x00000022",
+    "BriefDescription": "Retired Store/Load instructions with unaligned memory access"
+  },
+  {
+    "EventName": "INST_ECALL",
+    "EventCode": "0x00000025",
+    "BriefDescription": "Retired ecall instructions"
+  },
+  {
+    "EventName": "INST_LONG_JP",
+    "EventCode": "0x00000026",
+    "BriefDescription": "Retired long jump instructions"
+  },
+  {
+    "EventName": "INST_FP",
+    "EventCode": "0x0000002a",
+    "BriefDescription": "Retired FPU instructions"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/microarch.json b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/microarch.json
new file mode 100644
index 000000000000..0ab6f288af91
--- /dev/null
+++ b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/microarch.json
@@ -0,0 +1,80 @@
+[
+  {
+    "EventName": "LSU_SPEC_FAIL",
+    "EventCode": "0x0000000a",
+    "BriefDescription": "LSU speculation fail"
+  },
+  {
+    "EventName": "IDU_RF_PIPE_FAIL",
+    "EventCode": "0x00000014",
+    "BriefDescription": "Instruction decode unit launch pipeline failed in RF state"
+  },
+  {
+    "EventName": "IDU_RF_REG_FAIL",
+    "EventCode": "0x00000015",
+    "BriefDescription": "Instruction decode unit launch register file fail in RF state"
+  },
+  {
+    "EventName": "IDU_RF_INSTRUCTION",
+    "EventCode": "0x00000016",
+    "BriefDescription": "retired instruction count of Instruction decode unit in RF (Register File) stage"
+  },
+  {
+    "EventName": "LSU_4K_STALL",
+    "EventCode": "0x00000017",
+    "BriefDescription": "LSU stall times for long distance data access (Over 4K)",
+    "PublicDescription": "This stall occurs when translate virtual address with page offset over 4k"
+  },
+  {
+    "EventName": "LSU_OTHER_STALL",
+    "EventCode": "0x00000018",
+    "BriefDescription": "LSU stall times for other reasons (except the 4k stall)"
+  },
+  {
+    "EventName": "LSU_SQ_OTHER_DIS",
+    "EventCode": "0x00000019",
+    "BriefDescription": "LSU store queue discard others"
+  },
+  {
+    "EventName": "LSU_SQ_DATA_DISCARD",
+    "EventCode": "0x0000001a",
+    "BriefDescription": "LSU store queue discard data (uops)"
+  },
+  {
+    "EventName": "BRANCH_DIRECTION_MISPREDICTION",
+    "EventCode": "0x0000001b",
+    "BriefDescription": "Branch misprediction in BTB"
+  },
+  {
+    "EventName": "BRANCH_DIRECTION_PREDICTION",
+    "EventCode": "0x0000001c",
+    "BriefDescription": "All branch prediction in BTB",
+    "PublicDescription": "This event including both successful prediction and failed prediction in BTB"
+  },
+  {
+    "EventName": "INTERRUPT_ACK_COUNT",
+    "EventCode": "0x00000023",
+    "BriefDescription": "acknowledged interrupt count"
+  },
+  {
+    "EventName": "INTERRUPT_OFF_CYCLE",
+    "EventCode": "0x00000024",
+    "BriefDescription": "PLIC arbitration time when the interrupt is not responded",
+    "PublicDescription": "The arbitration time is recorded while meeting any of the following:\n- CPU is M-mode and MIE == 0\n- CPU is S-mode and delegation and SIE == 0\n"
+  },
+  {
+    "EventName": "IFU_STALLED_CYCLE",
+    "EventCode": "0x00000027",
+    "BriefDescription": "Number of stall cycles of the instruction fetch unit (IFU)."
+  },
+  {
+    "EventName": "IDU_STALLED_CYCLE",
+    "EventCode": "0x00000028",
+    "BriefDescription": "hpcp_backend_stall Number of stall cycles of the instruction decoding unit (IDU) and next-level pipeline unit."
+  },
+  {
+    "EventName": "SYNC_STALL",
+    "EventCode": "0x00000029",
+    "BriefDescription": "Sync instruction stall cycle fence/fence.i/sync/sfence"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
index 3388b58b8f1a..35124a4ddcb2 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
@@ -70,12 +70,6 @@
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "Uncore frequency per die [GHZ]",
-        "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9",
-        "MetricGroup": "SoC",
-        "MetricName": "UNCORE_FREQ"
-    },
-    {
         "BriefDescription": "Percentage of cycles spent in System Management Interrupts.",
         "MetricExpr": "((msr@aperf@ - cycles) / msr@aperf@ if msr@smi@ > 0 else 0)",
         "MetricGroup": "smi",
@@ -810,6 +804,13 @@
         "Unit": "cpu_atom"
     },
     {
+        "BriefDescription": "Uncore frequency per die [GHZ]",
+        "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9",
+        "MetricGroup": "SoC",
+        "MetricName": "UNCORE_FREQ",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
         "MetricExpr": "(cpu_core@UOPS_DISPATCHED.PORT_0@ + cpu_core@UOPS_DISPATCHED.PORT_1@ + cpu_core@UOPS_DISPATCHED.PORT_5_11@ + cpu_core@UOPS_DISPATCHED.PORT_6@) / (5 * tma_info_core_core_clks)",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
@@ -1838,7 +1839,7 @@
     },
     {
         "BriefDescription": "Average number of parallel data read requests to external memory",
-        "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / cpu_core@UNC_ARB_DAT_OCCUPANCY.RD\\,cmask\\=1@",
+        "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / UNC_ARB_DAT_OCCUPANCY.RD@cmask\\=1@",
         "MetricGroup": "Mem;MemoryBW;SoC",
         "MetricName": "tma_info_system_mem_parallel_reads",
         "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
diff --git a/tools/perf/pmu-events/arch/x86/amdzen4/memory-controller.json b/tools/perf/pmu-events/arch/x86/amdzen4/memory-controller.json
new file mode 100644
index 000000000000..55263e5e4f69
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen4/memory-controller.json
@@ -0,0 +1,101 @@
+[
+  {
+    "EventName": "umc_mem_clk",
+    "PublicDescription": "Number of memory clock cycles.",
+    "EventCode": "0x00",
+    "PerPkg": "1",
+    "Unit": "UMCPMC"
+  },
+  {
+    "EventName": "umc_act_cmd.all",
+    "PublicDescription": "Number of ACTIVATE commands sent.",
+    "EventCode": "0x05",
+    "PerPkg": "1",
+    "Unit": "UMCPMC"
+  },
+  {
+    "EventName": "umc_act_cmd.rd",
+    "PublicDescription": "Number of ACTIVATE commands sent for reads.",
+    "EventCode": "0x05",
+    "RdWrMask": "0x1",
+    "PerPkg": "1",
+    "Unit": "UMCPMC"
+  },
+  {
+    "EventName": "umc_act_cmd.wr",
+    "PublicDescription": "Number of ACTIVATE commands sent for writes.",
+    "EventCode": "0x05",
+    "RdWrMask": "0x2",
+    "PerPkg": "1",
+    "Unit": "UMCPMC"
+  },
+  {
+    "EventName": "umc_pchg_cmd.all",
+    "PublicDescription": "Number of PRECHARGE commands sent.",
+    "EventCode": "0x06",
+    "PerPkg": "1",
+    "Unit": "UMCPMC"
+  },
+  {
+    "EventName": "umc_pchg_cmd.rd",
+    "PublicDescription": "Number of PRECHARGE commands sent for reads.",
+    "EventCode": "0x06",
+    "RdWrMask": "0x1",
+    "PerPkg": "1",
+    "Unit": "UMCPMC"
+  },
+  {
+    "EventName": "umc_pchg_cmd.wr",
+    "PublicDescription": "Number of PRECHARGE commands sent for writes.",
+    "EventCode": "0x06",
+    "RdWrMask": "0x2",
+    "PerPkg": "1",
+    "Unit": "UMCPMC"
+  },
+  {
+    "EventName": "umc_cas_cmd.all",
+    "PublicDescription": "Number of CAS commands sent.",
+    "EventCode": "0x0a",
+    "PerPkg": "1",
+    "Unit": "UMCPMC"
+  },
+  {
+    "EventName": "umc_cas_cmd.rd",
+    "PublicDescription": "Number of CAS commands sent for reads.",
+    "EventCode": "0x0a",
+    "RdWrMask": "0x1",
+    "PerPkg": "1",
+    "Unit": "UMCPMC"
+  },
+  {
+    "EventName": "umc_cas_cmd.wr",
+    "PublicDescription": "Number of CAS commands sent for writes.",
+    "EventCode": "0x0a",
+    "RdWrMask": "0x2",
+    "PerPkg": "1",
+    "Unit": "UMCPMC"
+  },
+  {
+    "EventName": "umc_data_slot_clks.all",
+    "PublicDescription": "Number of clocks used by the data bus.",
+    "EventCode": "0x14",
+    "PerPkg": "1",
+    "Unit": "UMCPMC"
+  },
+  {
+    "EventName": "umc_data_slot_clks.rd",
+    "PublicDescription": "Number of clocks used by the data bus for reads.",
+    "EventCode": "0x14",
+    "RdWrMask": "0x1",
+    "PerPkg": "1",
+    "Unit": "UMCPMC"
+  },
+  {
+    "EventName": "umc_data_slot_clks.wr",
+    "PublicDescription": "Number of clocks used by the data bus for writes.",
+    "EventCode": "0x14",
+    "RdWrMask": "0x2",
+    "PerPkg": "1",
+    "Unit": "UMCPMC"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen4/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen4/recommended.json
index 5e6a793acf7b..96e06401c6cb 100644
--- a/tools/perf/pmu-events/arch/x86/amdzen4/recommended.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen4/recommended.json
@@ -330,5 +330,89 @@
     "MetricGroup": "data_fabric",
     "PerPkg": "1",
     "ScaleUnit": "6.103515625e-5MiB"
+  },
+  {
+    "MetricName": "umc_data_bus_utilization",
+    "BriefDescription": "Memory controller data bus utilization.",
+    "MetricExpr": "d_ratio(umc_data_slot_clks.all / 2, umc_mem_clk)",
+    "MetricGroup": "memory_controller",
+    "PerPkg": "1",
+    "ScaleUnit": "100%"
+  },
+  {
+    "MetricName": "umc_cas_cmd_rate",
+    "BriefDescription": "Memory controller CAS command rate.",
+    "MetricExpr": "d_ratio(umc_cas_cmd.all * 1000, umc_mem_clk)",
+    "MetricGroup": "memory_controller",
+    "PerPkg": "1"
+  },
+  {
+    "MetricName": "umc_cas_cmd_read_ratio",
+    "BriefDescription": "Ratio of memory controller CAS commands for reads.",
+    "MetricExpr": "d_ratio(umc_cas_cmd.rd, umc_cas_cmd.all)",
+    "MetricGroup": "memory_controller",
+    "PerPkg": "1",
+    "ScaleUnit": "100%"
+  },
+  {
+    "MetricName": "umc_cas_cmd_write_ratio",
+    "BriefDescription": "Ratio of memory controller CAS commands for writes.",
+    "MetricExpr": "d_ratio(umc_cas_cmd.wr, umc_cas_cmd.all)",
+    "MetricGroup": "memory_controller",
+    "PerPkg": "1",
+    "ScaleUnit": "100%"
+  },
+  {
+    "MetricName": "umc_mem_read_bandwidth",
+    "BriefDescription": "Estimated memory read bandwidth.",
+    "MetricExpr": "(umc_cas_cmd.rd * 64) / 1e6 / duration_time",
+    "MetricGroup": "memory_controller",
+    "PerPkg": "1",
+    "ScaleUnit": "1MB/s"
+  },
+  {
+    "MetricName": "umc_mem_write_bandwidth",
+    "BriefDescription": "Estimated memory write bandwidth.",
+    "MetricExpr": "(umc_cas_cmd.wr * 64) / 1e6 / duration_time",
+    "MetricGroup": "memory_controller",
+    "PerPkg": "1",
+    "ScaleUnit": "1MB/s"
+  },
+  {
+    "MetricName": "umc_mem_bandwidth",
+    "BriefDescription": "Estimated combined memory bandwidth.",
+    "MetricExpr": "(umc_cas_cmd.all * 64) / 1e6 / duration_time",
+    "MetricGroup": "memory_controller",
+    "PerPkg": "1",
+    "ScaleUnit": "1MB/s"
+  },
+  {
+    "MetricName": "umc_cas_cmd_read_ratio",
+    "BriefDescription": "Ratio of memory controller CAS commands for reads.",
+    "MetricExpr": "d_ratio(umc_cas_cmd.rd, umc_cas_cmd.all)",
+    "MetricGroup": "memory_controller",
+    "PerPkg": "1",
+    "ScaleUnit": "100%"
+  },
+  {
+    "MetricName": "umc_cas_cmd_rate",
+    "BriefDescription": "Memory controller CAS command rate.",
+    "MetricExpr": "d_ratio(umc_cas_cmd.all * 1000, umc_mem_clk)",
+    "MetricGroup": "memory_controller",
+    "PerPkg": "1"
+  },
+  {
+    "MetricName": "umc_activate_cmd_rate",
+    "BriefDescription": "Memory controller ACTIVATE command rate.",
+    "MetricExpr": "d_ratio(umc_act_cmd.all * 1000, umc_mem_clk)",
+    "MetricGroup": "memory_controller",
+    "PerPkg": "1"
+  },
+  {
+    "MetricName": "umc_precharge_cmd_rate",
+    "BriefDescription": "Memory controller PRECHARGE command rate.",
+    "MetricExpr": "d_ratio(umc_pchg_cmd.all * 1000, umc_mem_clk)",
+    "MetricGroup": "memory_controller",
+    "PerPkg": "1"
   }
 ]
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
index 84c132af3dfa..8bc6c0707856 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
@@ -1863,6 +1863,12 @@
         "ScaleUnit": "1GHz"
     },
     {
+        "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)",
+        "MetricExpr": "UNC_UPI_RxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time",
+        "MetricName": "upi_data_receive_bw",
+        "ScaleUnit": "1MB/s"
+    },
+    {
         "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)",
         "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time",
         "MetricName": "upi_data_transmit_bw",
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json
index 4a9d211e9d4f..1bdefaf96287 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json
@@ -23,27 +23,48 @@
         "UMask": "0x10"
     },
     {
-        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0",
+        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0 [This event is alias to FP_ARITH_DISPATCHED.V0]",
         "EventCode": "0xb3",
         "EventName": "FP_ARITH_DISPATCHED.PORT_0",
         "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1",
+        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1 [This event is alias to FP_ARITH_DISPATCHED.V1]",
         "EventCode": "0xb3",
         "EventName": "FP_ARITH_DISPATCHED.PORT_1",
         "SampleAfterValue": "2000003",
         "UMask": "0x2"
     },
     {
-        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5",
+        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5 [This event is alias to FP_ARITH_DISPATCHED.V2]",
         "EventCode": "0xb3",
         "EventName": "FP_ARITH_DISPATCHED.PORT_5",
         "SampleAfterValue": "2000003",
         "UMask": "0x4"
     },
     {
+        "BriefDescription": "FP_ARITH_DISPATCHED.V0 [This event is alias to FP_ARITH_DISPATCHED.PORT_0]",
+        "EventCode": "0xb3",
+        "EventName": "FP_ARITH_DISPATCHED.V0",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "FP_ARITH_DISPATCHED.V1 [This event is alias to FP_ARITH_DISPATCHED.PORT_1]",
+        "EventCode": "0xb3",
+        "EventName": "FP_ARITH_DISPATCHED.V1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "FP_ARITH_DISPATCHED.V2 [This event is alias to FP_ARITH_DISPATCHED.PORT_5]",
+        "EventCode": "0xb3",
+        "EventName": "FP_ARITH_DISPATCHED.V2",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
         "BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 2 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "EventCode": "0xc7",
         "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json
index 6dcf3b763af4..1f8200fb8964 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json
@@ -1,21 +1,5 @@
 [
     {
-        "BriefDescription": "AMX retired arithmetic BF16 operations.",
-        "EventCode": "0xce",
-        "EventName": "AMX_OPS_RETIRED.BF16",
-        "PublicDescription": "Number of AMX-based retired arithmetic bfloat16 (BF16) floating-point operations. Counts TDPBF16PS FP instructions. SW to use operation multiplier of 4",
-        "SampleAfterValue": "1000003",
-        "UMask": "0x2"
-    },
-    {
-        "BriefDescription": "AMX retired arithmetic integer 8-bit operations.",
-        "EventCode": "0xce",
-        "EventName": "AMX_OPS_RETIRED.INT8",
-        "PublicDescription": "Number of AMX-based retired arithmetic integer operations of 8-bit width source operands. Counts TDPB[SS,UU,US,SU]D instructions. SW should use operation multiplier of 8.",
-        "SampleAfterValue": "1000003",
-        "UMask": "0x1"
-    },
-    {
         "BriefDescription": "This event is deprecated. Refer to new event ARITH.DIV_ACTIVE",
         "CounterMask": "1",
         "Deprecated": "1",
@@ -505,7 +489,7 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "INT_MISC.UNKNOWN_BRANCH_CYCLES",
+        "BriefDescription": "Bubble cycles of BAClear (Unknown Branch).",
         "EventCode": "0xad",
         "EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES",
         "MSRIndex": "0x3F7",
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-interconnect.json
index 09d840c7da4c..65d088556bae 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-interconnect.json
@@ -4825,11 +4825,11 @@
         "Unit": "M3UPI"
     },
     {
-        "BriefDescription": "Number of allocations into the CRS Egress  used to queue up requests destined to the mesh (AD Bouncable)",
+        "BriefDescription": "Number of allocations into the CRS Egress  used to queue up requests destined to the mesh (AD Bounceable)",
         "EventCode": "0x47",
         "EventName": "UNC_MDF_CRS_TxR_INSERTS.AD_BNC",
         "PerPkg": "1",
-        "PublicDescription": "AD Bouncable : Number of allocations into the CRS Egress",
+        "PublicDescription": "AD Bounceable : Number of allocations into the CRS Egress",
         "UMask": "0x1",
         "Unit": "MDF"
     },
@@ -4861,11 +4861,11 @@
         "Unit": "MDF"
     },
     {
-        "BriefDescription": "Number of allocations into the CRS Egress  used to queue up requests destined to the mesh (BL Bouncable)",
+        "BriefDescription": "Number of allocations into the CRS Egress  used to queue up requests destined to the mesh (BL Bounceable)",
         "EventCode": "0x47",
         "EventName": "UNC_MDF_CRS_TxR_INSERTS.BL_BNC",
         "PerPkg": "1",
-        "PublicDescription": "BL Bouncable : Number of allocations into the CRS Egress",
+        "PublicDescription": "BL Bounceable : Number of allocations into the CRS Egress",
         "UMask": "0x4",
         "Unit": "MDF"
     },
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json
index 557080b74ee5..0761980c34a0 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-io.json
@@ -1186,6 +1186,36 @@
         "Unit": "IIO"
     },
     {
+        "BriefDescription": ": IOTLB Hits to a 1G Page",
+        "EventCode": "0x40",
+        "EventName": "UNC_IIO_IOMMU0.1G_HITS",
+        "PerPkg": "1",
+        "PortMask": "0x0000",
+        "PublicDescription": ": IOTLB Hits to a 1G Page : Counts if a transaction to a 1G page, on its first lookup, hits the IOTLB.",
+        "UMask": "0x10",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": ": IOTLB Hits to a 2M Page",
+        "EventCode": "0x40",
+        "EventName": "UNC_IIO_IOMMU0.2M_HITS",
+        "PerPkg": "1",
+        "PortMask": "0x0000",
+        "PublicDescription": ": IOTLB Hits to a 2M Page : Counts if a transaction to a 2M page, on its first lookup, hits the IOTLB.",
+        "UMask": "0x8",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": ": IOTLB Hits to a 4K Page",
+        "EventCode": "0x40",
+        "EventName": "UNC_IIO_IOMMU0.4K_HITS",
+        "PerPkg": "1",
+        "PortMask": "0x0000",
+        "PublicDescription": ": IOTLB Hits to a 4K Page : Counts if a transaction to a 4K page, on its first lookup, hits the IOTLB.",
+        "UMask": "0x4",
+        "Unit": "IIO"
+    },
+    {
         "BriefDescription": ": Context cache hits",
         "EventCode": "0x40",
         "EventName": "UNC_IIO_IOMMU0.CTXT_CACHE_HITS",
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
index e98602c66707..71d78a7841ea 100644
--- a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
@@ -1847,6 +1847,12 @@
         "ScaleUnit": "1GHz"
     },
     {
+        "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)",
+        "MetricExpr": "UNC_UPI_RxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time",
+        "MetricName": "upi_data_receive_bw",
+        "ScaleUnit": "1MB/s"
+    },
+    {
         "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)",
         "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time",
         "MetricName": "upi_data_transmit_bw",
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/other.json b/tools/perf/pmu-events/arch/x86/icelakex/other.json
index 63d5faf2fc43..11810daaf150 100644
--- a/tools/perf/pmu-events/arch/x86/icelakex/other.json
+++ b/tools/perf/pmu-events/arch/x86/icelakex/other.json
@@ -19,7 +19,7 @@
         "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.",
         "EventCode": "0x28",
         "EventName": "CORE_POWER.LVL2_TURBO_LICENSE",
-        "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchtecture).  This includes high current AVX 512-bit instructions.",
+        "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchitecture).  This includes high current AVX 512-bit instructions.",
         "SampleAfterValue": "200003",
         "UMask": "0x20"
     },
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json b/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json
index 176e5ef2a24a..45ee6bceba7f 100644
--- a/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/icelakex/pipeline.json
@@ -519,7 +519,7 @@
         "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
         "EventCode": "0x5e",
         "EventName": "RS_EVENTS.EMPTY_CYCLES",
-        "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into stravation periods (e.g. branch mispredictions or i-cache misses)",
+        "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses)",
         "SampleAfterValue": "1000003",
         "UMask": "0x1"
     },
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json
index f87ea3f66d1b..a066a009c511 100644
--- a/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json
@@ -38,7 +38,7 @@
         "EventCode": "0x10",
         "EventName": "UNC_I_COHERENT_OPS.CLFLUSH",
         "PerPkg": "1",
-        "PublicDescription": "Coherent Ops : CLFlush : Counts the number of coherency related operations servied by the IRP",
+        "PublicDescription": "Coherent Ops : CLFlush : Counts the number of coherency related operations serviced by the IRP",
         "UMask": "0x80",
         "Unit": "IRP"
     },
@@ -65,7 +65,7 @@
         "EventCode": "0x10",
         "EventName": "UNC_I_COHERENT_OPS.WBMTOI",
         "PerPkg": "1",
-        "PublicDescription": "Coherent Ops : WbMtoI : Counts the number of coherency related operations servied by the IRP",
+        "PublicDescription": "Coherent Ops : WbMtoI : Counts the number of coherency related operations serviced by the IRP",
         "UMask": "0x40",
         "Unit": "IRP"
     },
@@ -454,7 +454,7 @@
         "EventCode": "0x11",
         "EventName": "UNC_I_TRANSACTIONS.WRITES",
         "PerPkg": "1",
-        "PublicDescription": "Inbound Transaction Count : Writes : Counts the number of Inbound transactions from the IRP to the Uncore.  This can be filtered based on request type in addition to the source queue.  Note the special filtering equation.  We do OR-reduction on the request type.  If the SOURCE bit is set, then we also do AND qualification based on the source portID. : Trackes only write requests.  Each write request should have a prefetch, so there is no need to explicitly track these requests.  For writes that are tickled and have to retry, the counter will be incremented for each retry.",
+        "PublicDescription": "Inbound Transaction Count : Writes : Counts the number of Inbound transactions from the IRP to the Uncore.  This can be filtered based on request type in addition to the source queue.  Note the special filtering equation.  We do OR-reduction on the request type.  If the SOURCE bit is set, then we also do AND qualification based on the source portID. : Tracks only write requests.  Each write request should have a prefetch, so there is no need to explicitly track these requests.  For writes that are tickled and have to retry, the counter will be incremented for each retry.",
         "UMask": "0x2",
         "Unit": "IRP"
     },
diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv
index e571683f59f3..4d1deed4437a 100644
--- a/tools/perf/pmu-events/arch/x86/mapfile.csv
+++ b/tools/perf/pmu-events/arch/x86/mapfile.csv
@@ -7,7 +7,7 @@ GenuineIntel-6-56,v11,broadwellde,core
 GenuineIntel-6-4F,v22,broadwellx,core
 GenuineIntel-6-55-[56789ABCDEF],v1.20,cascadelakex,core
 GenuineIntel-6-9[6C],v1.04,elkhartlake,core
-GenuineIntel-6-CF,v1.01,emeraldrapids,core
+GenuineIntel-6-CF,v1.02,emeraldrapids,core
 GenuineIntel-6-5[CF],v13,goldmont,core
 GenuineIntel-6-7A,v1.01,goldmontplus,core
 GenuineIntel-6-B6,v1.00,grandridge,core
@@ -15,7 +15,7 @@ GenuineIntel-6-A[DE],v1.01,graniterapids,core
 GenuineIntel-6-(3C|45|46),v33,haswell,core
 GenuineIntel-6-3F,v28,haswellx,core
 GenuineIntel-6-7[DE],v1.19,icelake,core
-GenuineIntel-6-6[AC],v1.21,icelakex,core
+GenuineIntel-6-6[AC],v1.23,icelakex,core
 GenuineIntel-6-3A,v24,ivybridge,core
 GenuineIntel-6-3E,v24,ivytown,core
 GenuineIntel-6-2D,v24,jaketown,core
@@ -26,7 +26,7 @@ GenuineIntel-6-1[AEF],v4,nehalemep,core
 GenuineIntel-6-2E,v4,nehalemex,core
 GenuineIntel-6-A7,v1.01,rocketlake,core
 GenuineIntel-6-2A,v19,sandybridge,core
-GenuineIntel-6-8F,v1.16,sapphirerapids,core
+GenuineIntel-6-8F,v1.17,sapphirerapids,core
 GenuineIntel-6-AF,v1.00,sierraforest,core
 GenuineIntel-6-(37|4A|4C|4D|5A),v15,silvermont,core
 GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v57,skylake,core
diff --git a/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json b/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json
index 0c880e415669..27433fc15ede 100644
--- a/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json
@@ -985,7 +985,7 @@
     },
     {
         "BriefDescription": "Average number of parallel data read requests to external memory",
-        "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / cpu@UNC_ARB_DAT_OCCUPANCY.RD\\,cmask\\=1@",
+        "MetricExpr": "UNC_ARB_DAT_OCCUPANCY.RD / UNC_ARB_DAT_OCCUPANCY.RD@cmask\\=1@",
         "MetricGroup": "Mem;MemoryBW;SoC",
         "MetricName": "tma_info_system_mem_parallel_reads",
         "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json
index 4a9d211e9d4f..1bdefaf96287 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json
@@ -23,27 +23,48 @@
         "UMask": "0x10"
     },
     {
-        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0",
+        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0 [This event is alias to FP_ARITH_DISPATCHED.V0]",
         "EventCode": "0xb3",
         "EventName": "FP_ARITH_DISPATCHED.PORT_0",
         "SampleAfterValue": "2000003",
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1",
+        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1 [This event is alias to FP_ARITH_DISPATCHED.V1]",
         "EventCode": "0xb3",
         "EventName": "FP_ARITH_DISPATCHED.PORT_1",
         "SampleAfterValue": "2000003",
         "UMask": "0x2"
     },
     {
-        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5",
+        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5 [This event is alias to FP_ARITH_DISPATCHED.V2]",
         "EventCode": "0xb3",
         "EventName": "FP_ARITH_DISPATCHED.PORT_5",
         "SampleAfterValue": "2000003",
         "UMask": "0x4"
     },
     {
+        "BriefDescription": "FP_ARITH_DISPATCHED.V0 [This event is alias to FP_ARITH_DISPATCHED.PORT_0]",
+        "EventCode": "0xb3",
+        "EventName": "FP_ARITH_DISPATCHED.V0",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "FP_ARITH_DISPATCHED.V1 [This event is alias to FP_ARITH_DISPATCHED.PORT_1]",
+        "EventCode": "0xb3",
+        "EventName": "FP_ARITH_DISPATCHED.V1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "FP_ARITH_DISPATCHED.V2 [This event is alias to FP_ARITH_DISPATCHED.PORT_5]",
+        "EventCode": "0xb3",
+        "EventName": "FP_ARITH_DISPATCHED.V2",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
         "BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 2 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "EventCode": "0xc7",
         "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json
index 6dcf3b763af4..2cfe814d2015 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json
@@ -505,7 +505,7 @@
         "UMask": "0x1"
     },
     {
-        "BriefDescription": "INT_MISC.UNKNOWN_BRANCH_CYCLES",
+        "BriefDescription": "Bubble cycles of BAClear (Unknown Branch).",
         "EventCode": "0xad",
         "EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES",
         "MSRIndex": "0x3F7",
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
index 06c6d67cb76b..e31a4aac9f20 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
@@ -1965,6 +1965,12 @@
         "ScaleUnit": "1GHz"
     },
     {
+        "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)",
+        "MetricExpr": "UNC_UPI_RxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time",
+        "MetricName": "upi_data_receive_bw",
+        "ScaleUnit": "1MB/s"
+    },
+    {
         "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)",
         "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time",
         "MetricName": "upi_data_transmit_bw",
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json
index 09d840c7da4c..65d088556bae 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json
@@ -4825,11 +4825,11 @@
         "Unit": "M3UPI"
     },
     {
-        "BriefDescription": "Number of allocations into the CRS Egress  used to queue up requests destined to the mesh (AD Bouncable)",
+        "BriefDescription": "Number of allocations into the CRS Egress  used to queue up requests destined to the mesh (AD Bounceable)",
         "EventCode": "0x47",
         "EventName": "UNC_MDF_CRS_TxR_INSERTS.AD_BNC",
         "PerPkg": "1",
-        "PublicDescription": "AD Bouncable : Number of allocations into the CRS Egress",
+        "PublicDescription": "AD Bounceable : Number of allocations into the CRS Egress",
         "UMask": "0x1",
         "Unit": "MDF"
     },
@@ -4861,11 +4861,11 @@
         "Unit": "MDF"
     },
     {
-        "BriefDescription": "Number of allocations into the CRS Egress  used to queue up requests destined to the mesh (BL Bouncable)",
+        "BriefDescription": "Number of allocations into the CRS Egress  used to queue up requests destined to the mesh (BL Bounceable)",
         "EventCode": "0x47",
         "EventName": "UNC_MDF_CRS_TxR_INSERTS.BL_BNC",
         "PerPkg": "1",
-        "PublicDescription": "BL Bouncable : Number of allocations into the CRS Egress",
+        "PublicDescription": "BL Bounceable : Number of allocations into the CRS Egress",
         "UMask": "0x4",
         "Unit": "MDF"
     },
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json
index 8b5f54fed103..03596db87710 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-io.json
@@ -1250,6 +1250,36 @@
         "Unit": "IIO"
     },
     {
+        "BriefDescription": ": IOTLB Hits to a 1G Page",
+        "EventCode": "0x40",
+        "EventName": "UNC_IIO_IOMMU0.1G_HITS",
+        "PerPkg": "1",
+        "PortMask": "0x0000",
+        "PublicDescription": ": IOTLB Hits to a 1G Page : Counts if a transaction to a 1G page, on its first lookup, hits the IOTLB.",
+        "UMask": "0x10",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": ": IOTLB Hits to a 2M Page",
+        "EventCode": "0x40",
+        "EventName": "UNC_IIO_IOMMU0.2M_HITS",
+        "PerPkg": "1",
+        "PortMask": "0x0000",
+        "PublicDescription": ": IOTLB Hits to a 2M Page : Counts if a transaction to a 2M page, on its first lookup, hits the IOTLB.",
+        "UMask": "0x8",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": ": IOTLB Hits to a 4K Page",
+        "EventCode": "0x40",
+        "EventName": "UNC_IIO_IOMMU0.4K_HITS",
+        "PerPkg": "1",
+        "PortMask": "0x0000",
+        "PublicDescription": ": IOTLB Hits to a 4K Page : Counts if a transaction to a 4K page, on its first lookup, hits the IOTLB.",
+        "UMask": "0x4",
+        "Unit": "IIO"
+    },
+    {
         "BriefDescription": ": Context cache hits",
         "EventCode": "0x40",
         "EventName": "UNC_IIO_IOMMU0.CTXT_CACHE_HITS",
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
index 4a8f8eeb7525..ec3aa5ef00a3 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
@@ -1807,6 +1807,12 @@
         "ScaleUnit": "1GHz"
     },
     {
+        "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)",
+        "MetricExpr": "UNC_UPI_RxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time",
+        "MetricName": "upi_data_receive_bw",
+        "ScaleUnit": "1MB/s"
+    },
+    {
         "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)",
         "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time",
         "MetricName": "upi_data_transmit_bw",
diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
index 3c091ab75305..53ab050c8fa4 100755
--- a/tools/perf/pmu-events/jevents.py
+++ b/tools/perf/pmu-events/jevents.py
@@ -83,7 +83,7 @@ def c_len(s: str) -> int:
   """Return the length of s a C string
 
   This doesn't handle all escape characters properly. It first assumes
-  all \ are for escaping, it then adjusts as it will have over counted
+  all \\ are for escaping, it then adjusts as it will have over counted
   \\. The code uses \000 rather than \0 as a terminator as an adjacent
   number would be folded into a string of \0 (ie. "\0" + "5" doesn't
   equal a terminator followed by the number 5 but the escape of
@@ -286,6 +286,7 @@ class JsonEvent:
           'imx8_ddr': 'imx8_ddr',
           'L3PMC': 'amd_l3',
           'DFPMC': 'amd_df',
+          'UMCPMC': 'amd_umc',
           'cpu_core': 'cpu_core',
           'cpu_atom': 'cpu_atom',
           'ali_drw': 'ali_drw',
@@ -354,6 +355,7 @@ class JsonEvent:
         ('SampleAfterValue', 'period='),
         ('UMask', 'umask='),
         ('NodeType', 'type='),
+        ('RdWrMask', 'rdwrmask='),
     ]
     for key, value in event_fields:
       if key in jd and jd[key] != '0':
diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py
index d59ff53f1d94..d973c2baed1c 100755
--- a/tools/perf/scripts/python/arm-cs-trace-disasm.py
+++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py
@@ -45,8 +45,8 @@ parser = OptionParser(option_list=option_list)
 # Initialize global dicts and regular expression
 disasm_cache = dict()
 cpu_data = dict()
-disasm_re = re.compile("^\s*([0-9a-fA-F]+):")
-disasm_func_re = re.compile("^\s*([0-9a-fA-F]+)\s.*:")
+disasm_re = re.compile(r"^\s*([0-9a-fA-F]+):")
+disasm_func_re = re.compile(r"^\s*([0-9a-fA-F]+)\s.*:")
 cache_size = 64*1024
 
 glb_source_file_name	= None
@@ -188,6 +188,17 @@ def process_event(param_dict):
 	dso_end = get_optional(param_dict, "dso_map_end")
 	symbol = get_optional(param_dict, "symbol")
 
+	cpu = sample["cpu"]
+	ip = sample["ip"]
+	addr = sample["addr"]
+
+	# Initialize CPU data if it's empty, and directly return back
+	# if this is the first tracing event for this CPU.
+	if (cpu_data.get(str(cpu) + 'addr') == None):
+		cpu_data[str(cpu) + 'addr'] = addr
+		return
+
+
 	if (options.verbose == True):
 		print("Event type: %s" % name)
 		print_sample(sample)
@@ -209,16 +220,6 @@ def process_event(param_dict):
 	if (name[0:8] != "branches"):
 		return
 
-	cpu = sample["cpu"]
-	ip = sample["ip"]
-	addr = sample["addr"]
-
-	# Initialize CPU data if it's empty, and directly return back
-	# if this is the first tracing event for this CPU.
-	if (cpu_data.get(str(cpu) + 'addr') == None):
-		cpu_data[str(cpu) + 'addr'] = addr
-		return
-
 	# The format for packet is:
 	#
 	#		  +------------+------------+------------+
@@ -258,8 +259,9 @@ def process_event(param_dict):
 
 	if (options.objdump_name != None):
 		# It doesn't need to decrease virtual memory offset for disassembly
-		# for kernel dso, so in this case we set vm_start to zero.
-		if (dso == "[kernel.kallsyms]"):
+		# for kernel dso and executable file dso, so in this case we set
+		# vm_start to zero.
+		if (dso == "[kernel.kallsyms]" or dso_start == 0x400000):
 			dso_vm_start = 0
 		else:
 			dso_vm_start = int(dso_start)
diff --git a/tools/perf/scripts/python/compaction-times.py b/tools/perf/scripts/python/compaction-times.py
index 2560a042dc6f..9401f7c14747 100644
--- a/tools/perf/scripts/python/compaction-times.py
+++ b/tools/perf/scripts/python/compaction-times.py
@@ -260,7 +260,7 @@ def pr_help():
 
 comm_re = None
 pid_re = None
-pid_regex = "^(\d*)-(\d*)$|^(\d*)$"
+pid_regex = r"^(\d*)-(\d*)$|^(\d*)$"
 
 opt_proc = popt.DISP_DFL
 opt_disp = topt.DISP_ALL
diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index 13f2d8a81610..121cf61ba1b3 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -677,8 +677,8 @@ class CallGraphModelBase(TreeModel):
 			#   sqlite supports GLOB (text only) which uses * and ? and is case sensitive
 			if not self.glb.dbref.is_sqlite3:
 				# Escape % and _
-				s = value.replace("%", "\%")
-				s = s.replace("_", "\_")
+				s = value.replace("%", "\\%")
+				s = s.replace("_", "\\_")
 				# Translate * and ? into SQL LIKE pattern characters % and _
 				trans = string.maketrans("*?", "%_")
 				match = " LIKE '" + str(s).translate(trans) + "'"
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 2b45ffa462a6..53ba9c3e20e0 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -77,3 +77,17 @@ CFLAGS_python-use.o   += -DPYTHONPATH="BUILD_STR($(OUTPUT)python)" -DPYTHON="BUI
 CFLAGS_dwarf-unwind.o += -fno-optimize-sibling-calls
 
 perf-y += workloads/
+
+ifdef SHELLCHECK
+  SHELL_TESTS := $(shell find tests/shell -executable -type f -name '*.sh')
+  TEST_LOGS := $(SHELL_TESTS:tests/shell/%=shell/%.shellcheck_log)
+else
+  SHELL_TESTS :=
+  TEST_LOGS :=
+endif
+
+$(OUTPUT)%.shellcheck_log: %
+	$(call rule_mkdir)
+	$(Q)$(call echo-cmd,test)shellcheck -a -S warning "$<" > $@ || (cat $@ && rm $@ && false)
+
+perf-y += $(TEST_LOGS)
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index 61186d0d1cfa..97e1bdd6ec0e 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -188,7 +188,7 @@ static int test__attr(struct test_suite *test __maybe_unused, int subtest __mayb
 	if (perf_pmus__num_core_pmus() > 1) {
 		/*
 		 * TODO: Attribute tests hard code the PMU type. If there are >1
-		 * core PMU then each PMU will have a different type whic
+		 * core PMU then each PMU will have a different type which
 		 * requires additional support.
 		 */
 		pr_debug("Skip test on hybrid systems");
diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record
index 27c21271a16c..b44e4e6e4443 100644
--- a/tools/perf/tests/attr/base-record
+++ b/tools/perf/tests/attr/base-record
@@ -6,7 +6,7 @@ flags=0|8
 cpu=*
 type=0|1
 size=136
-config=0
+config=0|1
 sample_period=*
 sample_type=263
 read_format=0|4|20
diff --git a/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64 b/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64
index fbb065842880..bed765450ca9 100644
--- a/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64
+++ b/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64
@@ -6,4 +6,4 @@ args    = --no-bpf-event --user-regs=vg kill >/dev/null 2>&1
 ret     = 129
 test_ret = true
 arch    = aarch64
-auxv    = auxv["AT_HWCAP"] & 0x200000 == 0
+auxv    = auxv["AT_HWCAP"] & 0x400000 == 0
diff --git a/tools/perf/tests/attr/test-record-user-regs-sve-aarch64 b/tools/perf/tests/attr/test-record-user-regs-sve-aarch64
index c598c803221d..a65113cd7311 100644
--- a/tools/perf/tests/attr/test-record-user-regs-sve-aarch64
+++ b/tools/perf/tests/attr/test-record-user-regs-sve-aarch64
@@ -6,7 +6,7 @@ args    = --no-bpf-event --user-regs=vg kill >/dev/null 2>&1
 ret     = 1
 test_ret = true
 arch    = aarch64
-auxv    = auxv["AT_HWCAP"] & 0x200000 == 0x200000
+auxv    = auxv["AT_HWCAP"] & 0x400000 == 0x400000
 kernel_since = 6.1
 
 [event:base-record]
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index cb6f1dd00dc4..4a5973f9bb9b 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -14,6 +14,7 @@
 #include <sys/wait.h>
 #include <sys/stat.h>
 #include "builtin.h"
+#include "config.h"
 #include "hist.h"
 #include "intlist.h"
 #include "tests.h"
@@ -32,6 +33,7 @@
 
 static bool dont_fork;
 const char *dso_to_test;
+const char *test_objdump_path = "objdump";
 
 /*
  * List of architecture specific tests. Not a weak symbol as the array length is
@@ -60,8 +62,6 @@ static struct test_suite *generic_tests[] = {
 	&suite__pmu,
 	&suite__pmu_events,
 	&suite__dso_data,
-	&suite__dso_data_cache,
-	&suite__dso_data_reopen,
 	&suite__perf_evsel__roundtrip_name_test,
 #ifdef HAVE_LIBTRACEEVENT
 	&suite__perf_evsel__tp_sched_test,
@@ -513,6 +513,15 @@ static int run_workload(const char *work, int argc, const char **argv)
 	return -1;
 }
 
+static int perf_test__config(const char *var, const char *value,
+			     void *data __maybe_unused)
+{
+	if (!strcmp(var, "annotate.objdump"))
+		test_objdump_path = value;
+
+	return 0;
+}
+
 int cmd_test(int argc, const char **argv)
 {
 	const char *test_usage[] = {
@@ -529,6 +538,8 @@ int cmd_test(int argc, const char **argv)
 		    "Do not fork for testcase"),
 	OPT_STRING('w', "workload", &workload, "work", "workload to run for testing"),
 	OPT_STRING(0, "dso", &dso_to_test, "dso", "dso to test"),
+	OPT_STRING(0, "objdump", &test_objdump_path, "path",
+		   "objdump binary to use for disassembly and annotations"),
 	OPT_END()
 	};
 	const char * const test_subcommands[] = { "list", NULL };
@@ -538,6 +549,8 @@ int cmd_test(int argc, const char **argv)
         if (ret < 0)
                 return ret;
 
+	perf_config(perf_test__config, NULL);
+
 	/* Unbuffered output */
 	setvbuf(stdout, NULL, _IONBF, 0);
 
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 3af81012014e..7a3a7bbbec71 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -185,7 +185,7 @@ static int read_via_objdump(const char *filename, u64 addr, void *buf,
 	int ret;
 
 	fmt = "%s -z -d --start-address=0x%"PRIx64" --stop-address=0x%"PRIx64" %s";
-	ret = snprintf(cmd, sizeof(cmd), fmt, "objdump", addr, addr + len,
+	ret = snprintf(cmd, sizeof(cmd), fmt, test_objdump_path, addr, addr + len,
 		       filename);
 	if (ret <= 0 || (size_t)ret >= sizeof(cmd))
 		return -1;
@@ -511,38 +511,6 @@ static void fs_something(void)
 	}
 }
 
-#ifdef __s390x__
-#include "header.h" // for get_cpuid()
-#endif
-
-static const char *do_determine_event(bool excl_kernel)
-{
-	const char *event = excl_kernel ? "cycles:u" : "cycles";
-
-#ifdef __s390x__
-	char cpuid[128], model[16], model_c[16], cpum_cf_v[16];
-	unsigned int family;
-	int ret, cpum_cf_a;
-
-	if (get_cpuid(cpuid, sizeof(cpuid)))
-		goto out_clocks;
-	ret = sscanf(cpuid, "%*[^,],%u,%[^,],%[^,],%[^,],%x", &family, model_c,
-		     model, cpum_cf_v, &cpum_cf_a);
-	if (ret != 5)		 /* Not available */
-		goto out_clocks;
-	if (excl_kernel && (cpum_cf_a & 4))
-		return event;
-	if (!excl_kernel && (cpum_cf_a & 2))
-		return event;
-
-	/* Fall through: missing authorization */
-out_clocks:
-	event = excl_kernel ? "cpu-clock:u" : "cpu-clock";
-
-#endif
-	return event;
-}
-
 static void do_something(void)
 {
 	fs_something();
@@ -583,8 +551,10 @@ static int do_test_code_reading(bool try_kcore)
 	int err = -1, ret;
 	pid_t pid;
 	struct map *map;
-	bool have_vmlinux, have_kcore, excl_kernel = false;
+	bool have_vmlinux, have_kcore;
 	struct dso *dso;
+	const char *events[] = { "cycles", "cycles:u", "cpu-clock", "cpu-clock:u", NULL };
+	int evidx = 0;
 
 	pid = getpid();
 
@@ -618,7 +588,7 @@ static int do_test_code_reading(bool try_kcore)
 
 	/* No point getting kernel events if there is no kernel object */
 	if (!have_vmlinux && !have_kcore)
-		excl_kernel = true;
+		evidx++;
 
 	threads = thread_map__new_by_tid(pid);
 	if (!threads) {
@@ -640,13 +610,13 @@ static int do_test_code_reading(bool try_kcore)
 		goto out_put;
 	}
 
-	cpus = perf_cpu_map__new(NULL);
+	cpus = perf_cpu_map__new_online_cpus();
 	if (!cpus) {
 		pr_debug("perf_cpu_map__new failed\n");
 		goto out_put;
 	}
 
-	while (1) {
+	while (events[evidx]) {
 		const char *str;
 
 		evlist = evlist__new();
@@ -657,7 +627,7 @@ static int do_test_code_reading(bool try_kcore)
 
 		perf_evlist__set_maps(&evlist->core, cpus, threads);
 
-		str = do_determine_event(excl_kernel);
+		str = events[evidx];
 		pr_debug("Parsing event '%s'\n", str);
 		ret = parse_event(evlist, str);
 		if (ret < 0) {
@@ -675,32 +645,32 @@ static int do_test_code_reading(bool try_kcore)
 
 		ret = evlist__open(evlist);
 		if (ret < 0) {
-			if (!excl_kernel) {
-				excl_kernel = true;
-				/*
-				 * Both cpus and threads are now owned by evlist
-				 * and will be freed by following perf_evlist__set_maps
-				 * call. Getting reference to keep them alive.
-				 */
-				perf_cpu_map__get(cpus);
-				perf_thread_map__get(threads);
-				perf_evlist__set_maps(&evlist->core, NULL, NULL);
-				evlist__delete(evlist);
-				evlist = NULL;
-				continue;
-			}
+			evidx++;
 
-			if (verbose > 0) {
+			if (events[evidx] == NULL && verbose > 0) {
 				char errbuf[512];
 				evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
 				pr_debug("perf_evlist__open() failed!\n%s\n", errbuf);
 			}
 
-			goto out_put;
+			/*
+			 * Both cpus and threads are now owned by evlist
+			 * and will be freed by following perf_evlist__set_maps
+			 * call. Getting reference to keep them alive.
+			 */
+			perf_cpu_map__get(cpus);
+			perf_thread_map__get(threads);
+			perf_evlist__set_maps(&evlist->core, NULL, NULL);
+			evlist__delete(evlist);
+			evlist = NULL;
+			continue;
 		}
 		break;
 	}
 
+	if (events[evidx] == NULL)
+		goto out_put;
+
 	ret = evlist__mmap(evlist, UINT_MAX);
 	if (ret < 0) {
 		pr_debug("evlist__mmap failed\n");
@@ -721,7 +691,7 @@ static int do_test_code_reading(bool try_kcore)
 		err = TEST_CODE_READING_NO_KERNEL_OBJ;
 	else if (!have_vmlinux && !try_kcore)
 		err = TEST_CODE_READING_NO_VMLINUX;
-	else if (excl_kernel)
+	else if (strstr(events[evidx], ":u"))
 		err = TEST_CODE_READING_NO_ACCESS;
 	else
 		err = TEST_CODE_READING_OK;
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
index 7730fc2ab40b..bd8e396f3e57 100644
--- a/tools/perf/tests/cpumap.c
+++ b/tools/perf/tests/cpumap.c
@@ -213,7 +213,7 @@ static int test__cpu_map_intersect(struct test_suite *test __maybe_unused,
 
 static int test__cpu_map_equal(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
-	struct perf_cpu_map *any = perf_cpu_map__dummy_new();
+	struct perf_cpu_map *any = perf_cpu_map__new_any_cpu();
 	struct perf_cpu_map *one = perf_cpu_map__new("1");
 	struct perf_cpu_map *two = perf_cpu_map__new("2");
 	struct perf_cpu_map *empty = perf_cpu_map__intersect(one, two);
diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c
index 3419a4ab5590..2d67422c1222 100644
--- a/tools/perf/tests/dso-data.c
+++ b/tools/perf/tests/dso-data.c
@@ -394,6 +394,15 @@ static int test__dso_data_reopen(struct test_suite *test __maybe_unused, int sub
 	return 0;
 }
 
-DEFINE_SUITE("DSO data read", dso_data);
-DEFINE_SUITE("DSO data cache", dso_data_cache);
-DEFINE_SUITE("DSO data reopen", dso_data_reopen);
+
+static struct test_case tests__dso_data[] = {
+	TEST_CASE("read", dso_data),
+	TEST_CASE("cache", dso_data_cache),
+	TEST_CASE("reopen", dso_data_reopen),
+	{	.name = NULL, }
+};
+
+struct test_suite suite__dso_data = {
+	.desc = "DSO data tests",
+	.test_cases = tests__dso_data,
+};
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index 8f4f9b632e1e..5a3b2bed07f3 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -81,7 +81,7 @@ static int test__keep_tracking(struct test_suite *test __maybe_unused, int subte
 	threads = thread_map__new(-1, getpid(), UINT_MAX);
 	CHECK_NOT_NULL__(threads);
 
-	cpus = perf_cpu_map__new(NULL);
+	cpus = perf_cpu_map__new_online_cpus();
 	CHECK_NOT_NULL__(cpus);
 
 	evlist = evlist__new();
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index d9945ed25bc5..8a4da7eb637a 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -183,7 +183,7 @@ run += make_install_prefix_slash
 # run += make_install_pdf
 run += make_minimal
 
-old_libbpf := $(shell echo '\#include <bpf/libbpf.h>' | $(CC) -E -dM -x c -| egrep -q "define[[:space:]]+LIBBPF_MAJOR_VERSION[[:space:]]+0{1}")
+old_libbpf := $(shell echo '\#include <bpf/libbpf.h>' | $(CC) -E -dM -x c -| grep -q -E "define[[:space:]]+LIBBPF_MAJOR_VERSION[[:space:]]+0{1}")
 
 ifneq ($(old_libbpf),)
 run += make_libbpf_dynamic
diff --git a/tools/perf/tests/maps.c b/tools/perf/tests/maps.c
index 5bb1123a91a7..bb3fbfe5a73e 100644
--- a/tools/perf/tests/maps.c
+++ b/tools/perf/tests/maps.c
@@ -14,44 +14,59 @@ struct map_def {
 	u64 end;
 };
 
+struct check_maps_cb_args {
+	struct map_def *merged;
+	unsigned int i;
+};
+
+static int check_maps_cb(struct map *map, void *data)
+{
+	struct check_maps_cb_args *args = data;
+	struct map_def *merged = &args->merged[args->i];
+
+	if (map__start(map) != merged->start ||
+	    map__end(map) != merged->end ||
+	    strcmp(map__dso(map)->name, merged->name) ||
+	    refcount_read(map__refcnt(map)) != 1) {
+		return 1;
+	}
+	args->i++;
+	return 0;
+}
+
+static int failed_cb(struct map *map, void *data __maybe_unused)
+{
+	pr_debug("\tstart: %" PRIu64 " end: %" PRIu64 " name: '%s' refcnt: %d\n",
+		map__start(map),
+		map__end(map),
+		map__dso(map)->name,
+		refcount_read(map__refcnt(map)));
+
+	return 0;
+}
+
 static int check_maps(struct map_def *merged, unsigned int size, struct maps *maps)
 {
-	struct map_rb_node *rb_node;
-	unsigned int i = 0;
 	bool failed = false;
 
 	if (maps__nr_maps(maps) != size) {
 		pr_debug("Expected %d maps, got %d", size, maps__nr_maps(maps));
 		failed = true;
 	} else {
-		maps__for_each_entry(maps, rb_node) {
-			struct map *map = rb_node->map;
-
-			if (map__start(map) != merged[i].start ||
-			    map__end(map) != merged[i].end ||
-			    strcmp(map__dso(map)->name, merged[i].name) ||
-			    refcount_read(map__refcnt(map)) != 1) {
-				failed = true;
-			}
-			i++;
-		}
+		struct check_maps_cb_args args = {
+			.merged = merged,
+			.i = 0,
+		};
+		failed = maps__for_each_map(maps, check_maps_cb, &args);
 	}
 	if (failed) {
 		pr_debug("Expected:\n");
-		for (i = 0; i < size; i++) {
+		for (unsigned int i = 0; i < size; i++) {
 			pr_debug("\tstart: %" PRIu64 " end: %" PRIu64 " name: '%s' refcnt: 1\n",
 				merged[i].start, merged[i].end, merged[i].name);
 		}
 		pr_debug("Got:\n");
-		maps__for_each_entry(maps, rb_node) {
-			struct map *map = rb_node->map;
-
-			pr_debug("\tstart: %" PRIu64 " end: %" PRIu64 " name: '%s' refcnt: %d\n",
-				map__start(map),
-				map__end(map),
-				map__dso(map)->name,
-				refcount_read(map__refcnt(map)));
-		}
+		maps__for_each_map(maps, failed_cb, NULL);
 	}
 	return failed ? TEST_FAIL : TEST_OK;
 }
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 886a13a77a16..012c8ae439fd 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -52,7 +52,7 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest
 		return -1;
 	}
 
-	cpus = perf_cpu_map__new(NULL);
+	cpus = perf_cpu_map__new_online_cpus();
 	if (cpus == NULL) {
 		pr_debug("perf_cpu_map__new\n");
 		goto out_free_threads;
diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c
index f3275be83a33..fb114118c876 100644
--- a/tools/perf/tests/openat-syscall-all-cpus.c
+++ b/tools/perf/tests/openat-syscall-all-cpus.c
@@ -37,7 +37,7 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb
 		return -1;
 	}
 
-	cpus = perf_cpu_map__new(NULL);
+	cpus = perf_cpu_map__new_online_cpus();
 	if (cpus == NULL) {
 		pr_debug("perf_cpu_map__new\n");
 		goto out_thread_map_delete;
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index f78be21a5999..fbdf710d5eea 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -162,6 +162,22 @@ static int test__checkevent_numeric(struct evlist *evlist)
 	return TEST_OK;
 }
 
+
+static int assert_hw(struct perf_evsel *evsel, enum perf_hw_id id, const char *name)
+{
+	struct perf_pmu *pmu;
+
+	if (evsel->attr.type == PERF_TYPE_HARDWARE) {
+		TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, id));
+		return 0;
+	}
+	pmu = perf_pmus__find_by_type(evsel->attr.type);
+
+	TEST_ASSERT_VAL("unexpected PMU type", pmu);
+	TEST_ASSERT_VAL("PMU missing event", perf_pmu__have_event(pmu, name));
+	return 0;
+}
+
 static int test__checkevent_symbolic_name(struct evlist *evlist)
 {
 	struct perf_evsel *evsel;
@@ -169,10 +185,12 @@ static int test__checkevent_symbolic_name(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries);
 
 	perf_evlist__for_each_evsel(&evlist->core, evsel) {
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
-		TEST_ASSERT_VAL("wrong config",
-				test_perf_config(evsel, PERF_COUNT_HW_INSTRUCTIONS));
+		int ret = assert_hw(evsel, PERF_COUNT_HW_INSTRUCTIONS, "instructions");
+
+		if (ret)
+			return ret;
 	}
+
 	return TEST_OK;
 }
 
@@ -183,8 +201,10 @@ static int test__checkevent_symbolic_name_config(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries);
 
 	perf_evlist__for_each_evsel(&evlist->core, evsel) {
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
-		TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+		int ret = assert_hw(evsel, PERF_COUNT_HW_CPU_CYCLES, "cycles");
+
+		if (ret)
+			return ret;
 		/*
 		 * The period value gets configured within evlist__config,
 		 * while this test executes only parse events method.
@@ -861,10 +881,14 @@ static int test__group1(struct evlist *evlist)
 			evlist__nr_groups(evlist) == num_core_entries());
 
 	for (int i = 0; i < num_core_entries(); i++) {
+		int ret;
+
 		/* instructions:k */
 		evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
 		TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
@@ -878,8 +902,10 @@ static int test__group1(struct evlist *evlist)
 
 		/* cycles:upp */
 		evsel = evsel__next(evsel);
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
 		TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
@@ -907,6 +933,8 @@ static int test__group2(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong number of groups", 1 == evlist__nr_groups(evlist));
 
 	evlist__for_each_entry(evlist, evsel) {
+		int ret;
+
 		if (evsel->core.attr.type == PERF_TYPE_SOFTWARE) {
 			/* faults + :ku modifier */
 			leader = evsel;
@@ -939,8 +967,10 @@ static int test__group2(struct evlist *evlist)
 			continue;
 		}
 		/* cycles:k */
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
 		TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
@@ -957,6 +987,7 @@ static int test__group2(struct evlist *evlist)
 static int test__group3(struct evlist *evlist __maybe_unused)
 {
 	struct evsel *evsel, *group1_leader = NULL, *group2_leader = NULL;
+	int ret;
 
 	TEST_ASSERT_VAL("wrong number of entries",
 			evlist->core.nr_entries == (3 * perf_pmus__num_core_pmus() + 2));
@@ -1045,8 +1076,10 @@ static int test__group3(struct evlist *evlist __maybe_unused)
 			continue;
 		}
 		/* instructions:u */
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
 		TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
@@ -1070,10 +1103,14 @@ static int test__group4(struct evlist *evlist __maybe_unused)
 			num_core_entries() == evlist__nr_groups(evlist));
 
 	for (int i = 0; i < num_core_entries(); i++) {
+		int ret;
+
 		/* cycles:u + p */
 		evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
 		TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
@@ -1089,8 +1126,10 @@ static int test__group4(struct evlist *evlist __maybe_unused)
 
 		/* instructions:kp + p */
 		evsel = evsel__next(evsel);
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
 		TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
@@ -1108,6 +1147,7 @@ static int test__group4(struct evlist *evlist __maybe_unused)
 static int test__group5(struct evlist *evlist __maybe_unused)
 {
 	struct evsel *evsel = NULL, *leader;
+	int ret;
 
 	TEST_ASSERT_VAL("wrong number of entries",
 			evlist->core.nr_entries == (5 * num_core_entries()));
@@ -1117,8 +1157,10 @@ static int test__group5(struct evlist *evlist __maybe_unused)
 	for (int i = 0; i < num_core_entries(); i++) {
 		/* cycles + G */
 		evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
 		TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
@@ -1133,8 +1175,10 @@ static int test__group5(struct evlist *evlist __maybe_unused)
 
 		/* instructions + G */
 		evsel = evsel__next(evsel);
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
 		TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
@@ -1148,8 +1192,10 @@ static int test__group5(struct evlist *evlist __maybe_unused)
 	for (int i = 0; i < num_core_entries(); i++) {
 		/* cycles:G */
 		evsel = leader = evsel__next(evsel);
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
 		TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
@@ -1164,8 +1210,10 @@ static int test__group5(struct evlist *evlist __maybe_unused)
 
 		/* instructions:G */
 		evsel = evsel__next(evsel);
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
 		TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
@@ -1178,8 +1226,10 @@ static int test__group5(struct evlist *evlist __maybe_unused)
 	for (int i = 0; i < num_core_entries(); i++) {
 		/* cycles */
 		evsel = evsel__next(evsel);
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
 		TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
@@ -1201,10 +1251,14 @@ static int test__group_gh1(struct evlist *evlist)
 			evlist__nr_groups(evlist) == num_core_entries());
 
 	for (int i = 0; i < num_core_entries(); i++) {
+		int ret;
+
 		/* cycles + :H group modifier */
 		evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
 		TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
@@ -1218,8 +1272,10 @@ static int test__group_gh1(struct evlist *evlist)
 
 		/* cache-misses:G + :H group modifier */
 		evsel = evsel__next(evsel);
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
 		TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
@@ -1242,10 +1298,14 @@ static int test__group_gh2(struct evlist *evlist)
 			evlist__nr_groups(evlist) == num_core_entries());
 
 	for (int i = 0; i < num_core_entries(); i++) {
+		int ret;
+
 		/* cycles + :G group modifier */
 		evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
 		TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
@@ -1259,8 +1319,10 @@ static int test__group_gh2(struct evlist *evlist)
 
 		/* cache-misses:H + :G group modifier */
 		evsel = evsel__next(evsel);
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
 		TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
@@ -1283,10 +1345,14 @@ static int test__group_gh3(struct evlist *evlist)
 			evlist__nr_groups(evlist) == num_core_entries());
 
 	for (int i = 0; i < num_core_entries(); i++) {
+		int ret;
+
 		/* cycles:G + :u group modifier */
 		evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
 		TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
@@ -1300,8 +1366,10 @@ static int test__group_gh3(struct evlist *evlist)
 
 		/* cache-misses:H + :u group modifier */
 		evsel = evsel__next(evsel);
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
 		TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
@@ -1324,10 +1392,14 @@ static int test__group_gh4(struct evlist *evlist)
 			evlist__nr_groups(evlist) == num_core_entries());
 
 	for (int i = 0; i < num_core_entries(); i++) {
+		int ret;
+
 		/* cycles:G + :uG group modifier */
 		evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
 		TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
@@ -1341,8 +1413,10 @@ static int test__group_gh4(struct evlist *evlist)
 
 		/* cache-misses:H + :uG group modifier */
 		evsel = evsel__next(evsel);
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
 		TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
@@ -1363,10 +1437,14 @@ static int test__leader_sample1(struct evlist *evlist)
 			evlist->core.nr_entries == (3 * num_core_entries()));
 
 	for (int i = 0; i < num_core_entries(); i++) {
+		int ret;
+
 		/* cycles - sampling group leader */
 		evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
 		TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
@@ -1379,8 +1457,10 @@ static int test__leader_sample1(struct evlist *evlist)
 
 		/* cache-misses - not sampling */
 		evsel = evsel__next(evsel);
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
 		TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
@@ -1392,8 +1472,10 @@ static int test__leader_sample1(struct evlist *evlist)
 
 		/* branch-misses - not sampling */
 		evsel = evsel__next(evsel);
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
 		TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
@@ -1415,10 +1497,14 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused)
 			evlist->core.nr_entries == (2 * num_core_entries()));
 
 	for (int i = 0; i < num_core_entries(); i++) {
+		int ret;
+
 		/* instructions - sampling group leader */
 		evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
 		TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
@@ -1431,8 +1517,10 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused)
 
 		/* branch-misses - not sampling */
 		evsel = evsel__next(evsel);
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
 		TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
 		TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
@@ -1472,10 +1560,14 @@ static int test__pinned_group(struct evlist *evlist)
 			evlist->core.nr_entries == (3 * num_core_entries()));
 
 	for (int i = 0; i < num_core_entries(); i++) {
+		int ret;
+
 		/* cycles - group leader */
 		evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
 		TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
 		/* TODO: The group modifier is not copied to the split group leader. */
@@ -1484,13 +1576,18 @@ static int test__pinned_group(struct evlist *evlist)
 
 		/* cache-misses - can not be pinned, but will go on with the leader */
 		evsel = evsel__next(evsel);
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
 
 		/* branch-misses - ditto */
 		evsel = evsel__next(evsel);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
 	}
 	return TEST_OK;
@@ -1517,10 +1614,14 @@ static int test__exclusive_group(struct evlist *evlist)
 			evlist->core.nr_entries == 3 * num_core_entries());
 
 	for (int i = 0; i < num_core_entries(); i++) {
+		int ret;
+
 		/* cycles - group leader */
 		evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel));
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
 		TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader));
 		/* TODO: The group modifier is not copied to the split group leader. */
@@ -1529,13 +1630,18 @@ static int test__exclusive_group(struct evlist *evlist)
 
 		/* cache-misses - can not be pinned, but will go on with the leader */
 		evsel = evsel__next(evsel);
-		TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive);
 
 		/* branch-misses - ditto */
 		evsel = evsel__next(evsel);
-		TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES));
+		ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses");
+		if (ret)
+			return ret;
+
 		TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive);
 	}
 	return TEST_OK;
@@ -1677,9 +1783,11 @@ static int test__checkevent_raw_pmu(struct evlist *evlist)
 static int test__sym_event_slash(struct evlist *evlist)
 {
 	struct evsel *evsel = evlist__first(evlist);
+	int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
+
+	if (ret)
+		return ret;
 
-	TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE);
-	TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
 	TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
 	return TEST_OK;
 }
@@ -1687,9 +1795,11 @@ static int test__sym_event_slash(struct evlist *evlist)
 static int test__sym_event_dc(struct evlist *evlist)
 {
 	struct evsel *evsel = evlist__first(evlist);
+	int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
+
+	if (ret)
+		return ret;
 
-	TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE);
-	TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
 	TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
 	return TEST_OK;
 }
@@ -1697,9 +1807,11 @@ static int test__sym_event_dc(struct evlist *evlist)
 static int test__term_equal_term(struct evlist *evlist)
 {
 	struct evsel *evsel = evlist__first(evlist);
+	int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
+
+	if (ret)
+		return ret;
 
-	TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE);
-	TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
 	TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "name") == 0);
 	return TEST_OK;
 }
@@ -1707,9 +1819,11 @@ static int test__term_equal_term(struct evlist *evlist)
 static int test__term_equal_legacy(struct evlist *evlist)
 {
 	struct evsel *evsel = evlist__first(evlist);
+	int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles");
+
+	if (ret)
+		return ret;
 
-	TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE);
-	TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES));
 	TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "l1d") == 0);
 	return TEST_OK;
 }
@@ -2549,7 +2663,7 @@ static int test__pmu_events(struct test_suite *test __maybe_unused, int subtest
 			if (strchr(ent->d_name, '.'))
 				continue;
 
-			/* exclude parametrized ones (name contains '?') */
+			/* exclude parameterized ones (name contains '?') */
 			n = snprintf(pmu_event, sizeof(pmu_event), "%s%s", path, ent->d_name);
 			if (n >= PATH_MAX) {
 				pr_err("pmu event name crossed PATH_MAX(%d) size\n", PATH_MAX);
@@ -2578,7 +2692,7 @@ static int test__pmu_events(struct test_suite *test __maybe_unused, int subtest
 			fclose(file);
 
 			if (is_event_parameterized == 1) {
-				pr_debug("skipping parametrized PMU event: %s which contains ?\n", pmu_event);
+				pr_debug("skipping parameterized PMU event: %s which contains ?\n", pmu_event);
 				continue;
 			}
 
diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
index efcd71c2738a..bbe2ddeb9b74 100644
--- a/tools/perf/tests/perf-time-to-tsc.c
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -93,7 +93,7 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su
 	threads = thread_map__new(-1, getpid(), UINT_MAX);
 	CHECK_NOT_NULL__(threads);
 
-	cpus = perf_cpu_map__new(NULL);
+	cpus = perf_cpu_map__new_online_cpus();
 	CHECK_NOT_NULL__(cpus);
 
 	evlist = evlist__new();
diff --git a/tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c b/tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c
index a7e169d1bf64..5f886cd09e6b 100644
--- a/tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c
+++ b/tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c
@@ -42,7 +42,6 @@ static pthread_t new_thr(void *(*fn) (void *arg), void *arg)
 int main(int argc, char **argv)
 {
 	unsigned long i, len, size, thr;
-	pthread_t threads[256];
 	struct args args[256];
 	long long v;
 
diff --git a/tools/perf/tests/shell/coresight/thread_loop/thread_loop.c b/tools/perf/tests/shell/coresight/thread_loop/thread_loop.c
index c0158fac7d0b..e05a559253ca 100644
--- a/tools/perf/tests/shell/coresight/thread_loop/thread_loop.c
+++ b/tools/perf/tests/shell/coresight/thread_loop/thread_loop.c
@@ -57,7 +57,6 @@ static pthread_t new_thr(void *(*fn) (void *arg), void *arg)
 int main(int argc, char **argv)
 {
 	unsigned int i, len, thr;
-	pthread_t threads[256];
 	struct args args[256];
 
 	if (argc < 3) {
diff --git a/tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c b/tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c
index 8f6d384208ed..0fc7bf1a25af 100644
--- a/tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c
+++ b/tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c
@@ -51,7 +51,6 @@ static pthread_t new_thr(void *(*fn) (void *arg), void *arg)
 int main(int argc, char **argv)
 {
 	unsigned int i, thr;
-	pthread_t threads[256];
 	struct args args[256];
 
 	if (argc < 2) {
diff --git a/tools/perf/tests/shell/diff.sh b/tools/perf/tests/shell/diff.sh
new file mode 100755
index 000000000000..14b87af88703
--- /dev/null
+++ b/tools/perf/tests/shell/diff.sh
@@ -0,0 +1,108 @@
+#!/bin/sh
+# perf diff tests
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+err=0
+perfdata1=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
+perfdata2=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
+perfdata3=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
+testprog="perf test -w thloop"
+
+shelldir=$(dirname "$0")
+# shellcheck source=lib/perf_has_symbol.sh
+. "${shelldir}"/lib/perf_has_symbol.sh
+
+testsym="test_loop"
+
+skip_test_missing_symbol ${testsym}
+
+cleanup() {
+  rm -rf "${perfdata1}"
+  rm -rf "${perfdata1}".old
+  rm -rf "${perfdata2}"
+  rm -rf "${perfdata2}".old
+  rm -rf "${perfdata3}"
+  rm -rf "${perfdata3}".old
+
+  trap - EXIT TERM INT
+}
+
+trap_cleanup() {
+  cleanup
+  exit 1
+}
+trap trap_cleanup EXIT TERM INT
+
+make_data() {
+  file="$1"
+  if ! perf record -o "${file}" ${testprog} 2> /dev/null
+  then
+    echo "Workload record [Failed record]"
+    echo 1
+    return
+  fi
+  if ! perf report -i "${file}" -q | grep -q "${testsym}"
+  then
+    echo "Workload record [Failed missing output]"
+    echo 1
+    return
+  fi
+  echo 0
+}
+
+test_two_files() {
+  echo "Basic two file diff test"
+  err=$(make_data "${perfdata1}")
+  if [ $err != 0 ]
+  then
+    return
+  fi
+  err=$(make_data "${perfdata2}")
+  if [ $err != 0 ]
+  then
+    return
+  fi
+
+  if ! perf diff "${perfdata1}" "${perfdata2}" | grep -q "${testsym}"
+  then
+    echo "Basic two file diff test [Failed diff]"
+    err=1
+    return
+  fi
+  echo "Basic two file diff test [Success]"
+}
+
+test_three_files() {
+  echo "Basic three file diff test"
+  err=$(make_data "${perfdata1}")
+  if [ $err != 0 ]
+  then
+    return
+  fi
+  err=$(make_data "${perfdata2}")
+  if [ $err != 0 ]
+  then
+    return
+  fi
+  err=$(make_data "${perfdata3}")
+  if [ $err != 0 ]
+  then
+    return
+  fi
+
+  if ! perf diff "${perfdata1}" "${perfdata2}" "${perfdata3}" | grep -q "${testsym}"
+  then
+    echo "Basic three file diff test [Failed diff]"
+    err=1
+    return
+  fi
+  echo "Basic three file diff test [Success]"
+}
+
+test_two_files
+test_three_files
+
+cleanup
+exit $err
diff --git a/tools/perf/tests/shell/lib/perf_has_symbol.sh b/tools/perf/tests/shell/lib/perf_has_symbol.sh
new file mode 100644
index 000000000000..5d59c32ae3e7
--- /dev/null
+++ b/tools/perf/tests/shell/lib/perf_has_symbol.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+perf_has_symbol()
+{
+	if perf test -vv "Symbols" 2>&1 | grep "[[:space:]]$1$"; then
+		echo "perf does have symbol '$1'"
+		return 0
+	fi
+	echo "perf does not have symbol '$1'"
+	return 1
+}
+
+skip_test_missing_symbol()
+{
+	if ! perf_has_symbol "$1" ; then
+		echo "perf is missing symbols - skipping test"
+		exit 2
+	fi
+	return 0
+}
diff --git a/tools/perf/tests/shell/lib/setup_python.sh b/tools/perf/tests/shell/lib/setup_python.sh
new file mode 100644
index 000000000000..c2fce1793538
--- /dev/null
+++ b/tools/perf/tests/shell/lib/setup_python.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+if [ "x$PYTHON" = "x" ]
+then
+  python3 --version >/dev/null 2>&1 && PYTHON=python3
+fi
+if [ "x$PYTHON" = "x" ]
+then
+  python --version >/dev/null 2>&1 && PYTHON=python
+fi
+if [ "x$PYTHON" = "x" ]
+then
+  echo Skipping test, python not detected please set environment variable PYTHON.
+  exit 2
+fi
diff --git a/tools/perf/tests/shell/list.sh b/tools/perf/tests/shell/list.sh
new file mode 100755
index 000000000000..22b004f2b23e
--- /dev/null
+++ b/tools/perf/tests/shell/list.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+# perf list tests
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+err=0
+
+shelldir=$(dirname "$0")
+# shellcheck source=lib/setup_python.sh
+. "${shelldir}"/lib/setup_python.sh
+
+test_list_json() {
+  echo "Json output test"
+  perf list -j | $PYTHON -m json.tool
+  echo "Json output test [Success]"
+}
+
+test_list_json
+exit $err
diff --git a/tools/perf/tests/shell/pipe_test.sh b/tools/perf/tests/shell/pipe_test.sh
index 8dd115dd35a7..a78d35d2cff0 100755
--- a/tools/perf/tests/shell/pipe_test.sh
+++ b/tools/perf/tests/shell/pipe_test.sh
@@ -2,10 +2,17 @@
 # perf pipe recording and injection test
 # SPDX-License-Identifier: GPL-2.0
 
+shelldir=$(dirname "$0")
+# shellcheck source=lib/perf_has_symbol.sh
+. "${shelldir}"/lib/perf_has_symbol.sh
+
+sym="noploop"
+
+skip_test_missing_symbol ${sym}
+
 data=$(mktemp /tmp/perf.data.XXXXXX)
 prog="perf test -w noploop"
 task="perf"
-sym="noploop"
 
 if ! perf record -e task-clock:u -o - ${prog} | perf report -i - --task | grep ${task}; then
 	echo "cannot find the test file in the perf report"
diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
index eebeea6bdc76..72c65570db37 100755
--- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
@@ -45,7 +45,10 @@ trace_libc_inet_pton_backtrace() {
 		;;
 	ppc64|ppc64le)
 		eventattr='max-stack=4'
-		echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected
+		# Add gaih_inet to expected backtrace only if it is part of libc.
+		if nm $libc | grep -F -q gaih_inet.; then
+			echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected
+		fi
 		echo "getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected
 		echo ".*(\+0x[[:xdigit:]]+|\[unknown\])[[:space:]]\(.*/bin/ping.*\)$" >> $expected
 		;;
diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
index 29443b8e8876..3d1a7759a7b2 100755
--- a/tools/perf/tests/shell/record.sh
+++ b/tools/perf/tests/shell/record.sh
@@ -8,10 +8,19 @@ shelldir=$(dirname "$0")
 # shellcheck source=lib/waiting.sh
 . "${shelldir}"/lib/waiting.sh
 
+# shellcheck source=lib/perf_has_symbol.sh
+. "${shelldir}"/lib/perf_has_symbol.sh
+
+testsym="test_loop"
+
+skip_test_missing_symbol ${testsym}
+
 err=0
 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
 testprog="perf test -w thloop"
-testsym="test_loop"
+cpu_pmu_dir="/sys/bus/event_source/devices/cpu*"
+br_cntr_file="/caps/branch_counter_nr"
+br_cntr_output="branch stack counters"
 
 cleanup() {
   rm -rf "${perfdata}"
@@ -155,10 +164,37 @@ test_workload() {
   echo "Basic target workload test [Success]"
 }
 
+test_branch_counter() {
+  echo "Basic branch counter test"
+  # Check if the branch counter feature is supported
+  for dir in $cpu_pmu_dir
+  do
+    if [ ! -e "$dir$br_cntr_file" ]
+    then
+      echo "branch counter feature not supported on all core PMUs ($dir) [Skipped]"
+      return
+    fi
+  done
+  if ! perf record -o "${perfdata}" -j any,counter ${testprog} 2> /dev/null
+  then
+    echo "Basic branch counter test [Failed record]"
+    err=1
+    return
+  fi
+  if ! perf report -i "${perfdata}" -D -q | grep -q "$br_cntr_output"
+  then
+    echo "Basic branch record test [Failed missing output]"
+    err=1
+    return
+  fi
+  echo "Basic branch counter test [Success]"
+}
+
 test_per_thread
 test_register_capture
 test_system_wide
 test_workload
+test_branch_counter
 
 cleanup
 exit $err
diff --git a/tools/perf/tests/shell/record_offcpu.sh b/tools/perf/tests/shell/record_offcpu.sh
index a1ef8f0d2b5c..67c925f3a15a 100755
--- a/tools/perf/tests/shell/record_offcpu.sh
+++ b/tools/perf/tests/shell/record_offcpu.sh
@@ -77,9 +77,9 @@ test_offcpu_child() {
     err=1
     return
   fi
-  # each process waits for read and write, so it should be more than 800 events
+  # each process waits at least for poll, so it should be more than 400 events
   if ! perf report -i ${perfdata} -s comm -q -n -t ';' --percent-limit=90 | \
-    awk -F ";" '{ if (NF > 3 && int($3) < 800) exit 1; }'
+    awk -F ";" '{ if (NF > 3 && int($3) < 400) exit 1; }'
   then
     echo "Child task off-cpu test [Failed invalid output]"
     err=1
diff --git a/tools/perf/tests/shell/script.sh b/tools/perf/tests/shell/script.sh
new file mode 100755
index 000000000000..5ae7bd0031a8
--- /dev/null
+++ b/tools/perf/tests/shell/script.sh
@@ -0,0 +1,66 @@
+#!/bin/sh
+# perf script tests
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+temp_dir=$(mktemp -d /tmp/perf-test-script.XXXXXXXXXX)
+
+perfdatafile="${temp_dir}/perf.data"
+db_test="${temp_dir}/db_test.py"
+
+err=0
+
+cleanup()
+{
+	trap - EXIT TERM INT
+	sane=$(echo "${temp_dir}" | cut -b 1-21)
+	if [ "${sane}" = "/tmp/perf-test-script" ] ; then
+		echo "--- Cleaning up ---"
+		rm -f "${temp_dir}/"*
+		rmdir "${temp_dir}"
+	fi
+}
+
+trap_cleanup()
+{
+	cleanup
+	exit 1
+}
+
+trap trap_cleanup EXIT TERM INT
+
+
+test_db()
+{
+	echo "DB test"
+
+	# Check if python script is supported
+	libpython=$(perf version --build-options | grep python | grep -cv OFF)
+	if [ "${libpython}" != "1" ] ; then
+		echo "SKIP: python scripting is not supported"
+		err=2
+		return
+	fi
+
+	cat << "_end_of_file_" > "${db_test}"
+perf_db_export_mode = True
+perf_db_export_calls = False
+perf_db_export_callchains = True
+
+def sample_table(*args):
+    print(f'sample_table({args})')
+
+def call_path_table(*args):
+    print(f'call_path_table({args}')
+_end_of_file_
+	perf record -g -o "${perfdatafile}" true
+	perf script -i "${perfdatafile}" -s "${db_test}"
+	echo "DB test [Success]"
+}
+
+test_db
+
+cleanup
+
+exit $err
diff --git a/tools/perf/tests/shell/stat+json_output.sh b/tools/perf/tests/shell/stat+json_output.sh
index 196e22672c50..3bc900533a5d 100755
--- a/tools/perf/tests/shell/stat+json_output.sh
+++ b/tools/perf/tests/shell/stat+json_output.sh
@@ -8,20 +8,10 @@ set -e
 
 skip_test=0
 
+shelldir=$(dirname "$0")
+# shellcheck source=lib/setup_python.sh
+. "${shelldir}"/lib/setup_python.sh
 pythonchecker=$(dirname $0)/lib/perf_json_output_lint.py
-if [ "x$PYTHON" == "x" ]
-then
-	if which python3 > /dev/null
-	then
-		PYTHON=python3
-	elif which python > /dev/null
-	then
-		PYTHON=python
-	else
-		echo Skipping test, python not detected please set environment variable PYTHON.
-		exit 2
-	fi
-fi
 
 stat_output=$(mktemp /tmp/__perf_test.stat_output.json.XXXXX)
 
diff --git a/tools/perf/tests/shell/stat_all_pmu.sh b/tools/perf/tests/shell/stat_all_pmu.sh
index c77955419173..d2a3506e0d19 100755
--- a/tools/perf/tests/shell/stat_all_pmu.sh
+++ b/tools/perf/tests/shell/stat_all_pmu.sh
@@ -4,7 +4,7 @@
 
 set -e
 
-# Test all PMU events; however exclude parametrized ones (name contains '?')
+# Test all PMU events; however exclude parameterized ones (name contains '?')
 for p in $(perf list --raw-dump pmu | sed 's/[[:graph:]]\+?[[:graph:]]\+[[:space:]]//g'); do
   echo "Testing $p"
   result=$(perf stat -e "$p" true 2>&1)
diff --git a/tools/perf/tests/shell/stat_metrics_values.sh b/tools/perf/tests/shell/stat_metrics_values.sh
index ad94c936de7e..7ca172599aa6 100755
--- a/tools/perf/tests/shell/stat_metrics_values.sh
+++ b/tools/perf/tests/shell/stat_metrics_values.sh
@@ -1,16 +1,10 @@
 #!/bin/bash
 # perf metrics value validation
 # SPDX-License-Identifier: GPL-2.0
-if [ "x$PYTHON" == "x" ]
-then
-	if which python3 > /dev/null
-	then
-		PYTHON=python3
-	else
-		echo Skipping test, python3 not detected please set environment variable PYTHON.
-		exit 2
-	fi
-fi
+
+shelldir=$(dirname "$0")
+# shellcheck source=lib/setup_python.sh
+. "${shelldir}"/lib/setup_python.sh
 
 grep -q GenuineIntel /proc/cpuinfo || { echo Skipping non-Intel; exit 2; }
 
diff --git a/tools/perf/tests/shell/test_arm_callgraph_fp.sh b/tools/perf/tests/shell/test_arm_callgraph_fp.sh
index 66dfdfdad553..e342e6c8aa50 100755
--- a/tools/perf/tests/shell/test_arm_callgraph_fp.sh
+++ b/tools/perf/tests/shell/test_arm_callgraph_fp.sh
@@ -2,8 +2,14 @@
 # Check Arm64 callgraphs are complete in fp mode
 # SPDX-License-Identifier: GPL-2.0
 
+shelldir=$(dirname "$0")
+# shellcheck source=lib/perf_has_symbol.sh
+. "${shelldir}"/lib/perf_has_symbol.sh
+
 lscpu | grep -q "aarch64" || exit 2
 
+skip_test_missing_symbol leafloop
+
 PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
 TEST_PROGRAM="perf test -w leafloop"
 
diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh
index 09908d71c994..5f14d0cb013f 100755
--- a/tools/perf/tests/shell/test_brstack.sh
+++ b/tools/perf/tests/shell/test_brstack.sh
@@ -4,6 +4,10 @@
 # SPDX-License-Identifier: GPL-2.0
 # German Gomez <german.gomez@arm.com>, 2022
 
+shelldir=$(dirname "$0")
+# shellcheck source=lib/perf_has_symbol.sh
+. "${shelldir}"/lib/perf_has_symbol.sh
+
 # skip the test if the hardware doesn't support branch stack sampling
 # and if the architecture doesn't support filter types: any,save_type,u
 if ! perf record -o- --no-buildid --branch-filter any,save_type,u -- true > /dev/null 2>&1 ; then
@@ -11,6 +15,8 @@ if ! perf record -o- --no-buildid --branch-filter any,save_type,u -- true > /dev
 	exit 2
 fi
 
+skip_test_missing_symbol brstack_bench
+
 TMPDIR=$(mktemp -d /tmp/__perf_test.program.XXXXX)
 TESTPROG="perf test -w brstack"
 
diff --git a/tools/perf/tests/shell/test_data_symbol.sh b/tools/perf/tests/shell/test_data_symbol.sh
index 69bb6fe86c50..3dfa91832aa8 100755
--- a/tools/perf/tests/shell/test_data_symbol.sh
+++ b/tools/perf/tests/shell/test_data_symbol.sh
@@ -4,6 +4,13 @@
 # SPDX-License-Identifier: GPL-2.0
 # Leo Yan <leo.yan@linaro.org>, 2022
 
+shelldir=$(dirname "$0")
+# shellcheck source=lib/waiting.sh
+. "${shelldir}"/lib/waiting.sh
+
+# shellcheck source=lib/perf_has_symbol.sh
+. "${shelldir}"/lib/perf_has_symbol.sh
+
 skip_if_no_mem_event() {
 	perf mem record -e list 2>&1 | grep -E -q 'available' && return 0
 	return 2
@@ -11,8 +18,11 @@ skip_if_no_mem_event() {
 
 skip_if_no_mem_event || exit 2
 
+skip_test_missing_symbol buf1
+
 TEST_PROGRAM="perf test -w datasym"
 PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
+ERR_FILE=$(mktemp /tmp/__perf_test.stderr.XXXXX)
 
 check_result() {
 	# The memory report format is as below:
@@ -50,13 +60,15 @@ echo "Recording workload..."
 # specific CPU and test in per-CPU mode.
 is_amd=$(grep -E -c 'vendor_id.*AuthenticAMD' /proc/cpuinfo)
 if (($is_amd >= 1)); then
-	perf mem record -o ${PERF_DATA} -C 0 -- taskset -c 0 $TEST_PROGRAM &
+	perf mem record -vvv -o ${PERF_DATA} -C 0 -- taskset -c 0 $TEST_PROGRAM 2>"${ERR_FILE}" &
 else
-	perf mem record --all-user -o ${PERF_DATA} -- $TEST_PROGRAM &
+	perf mem record -vvv --all-user -o ${PERF_DATA} -- $TEST_PROGRAM 2>"${ERR_FILE}" &
 fi
 
 PERFPID=$!
 
+wait_for_perf_to_start ${PERFPID} "${ERR_FILE}"
+
 sleep 1
 
 kill $PERFPID
diff --git a/tools/perf/tests/shell/test_perf_data_converter_json.sh b/tools/perf/tests/shell/test_perf_data_converter_json.sh
index 6ded58f98f55..c4f1b59d116f 100755
--- a/tools/perf/tests/shell/test_perf_data_converter_json.sh
+++ b/tools/perf/tests/shell/test_perf_data_converter_json.sh
@@ -6,16 +6,9 @@ set -e
 
 err=0
 
-if [ "$PYTHON" = "" ] ; then
-	if which python3 > /dev/null ; then
-		PYTHON=python3
-	elif which python > /dev/null ; then
-		PYTHON=python
-	else
-		echo Skipping test, python not detected please set environment variable PYTHON.
-		exit 2
-	fi
-fi
+shelldir=$(dirname "$0")
+# shellcheck source=lib/setup_python.sh
+. "${shelldir}"/lib/setup_python.sh
 
 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
 result=$(mktemp /tmp/__perf_test.output.json.XXXXX)
diff --git a/tools/perf/tests/sigtrap.c b/tools/perf/tests/sigtrap.c
index 1de7478ec189..e6fd934b027a 100644
--- a/tools/perf/tests/sigtrap.c
+++ b/tools/perf/tests/sigtrap.c
@@ -57,36 +57,79 @@ static struct perf_event_attr make_event_attr(void)
 #ifdef HAVE_BPF_SKEL
 #include <bpf/btf.h>
 
-static bool attr_has_sigtrap(void)
+static struct btf *btf;
+
+static bool btf__available(void)
 {
-	bool ret = false;
-	struct btf *btf;
-	const struct btf_type *t;
+	if (btf == NULL)
+		btf = btf__load_vmlinux_btf();
+
+	return btf != NULL;
+}
+
+static void btf__exit(void)
+{
+	btf__free(btf);
+	btf = NULL;
+}
+
+static const struct btf_member *__btf_type__find_member_by_name(int type_id, const char *member_name)
+{
+	const struct btf_type *t = btf__type_by_id(btf, type_id);
 	const struct btf_member *m;
-	const char *name;
-	int i, id;
+	int i;
+
+	for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
+		const char *current_member_name = btf__name_by_offset(btf, m->name_off);
+		if (!strcmp(current_member_name, member_name))
+			return m;
+	}
 
-	btf = btf__load_vmlinux_btf();
-	if (btf == NULL) {
+	return NULL;
+}
+
+static bool attr_has_sigtrap(void)
+{
+	int id;
+
+	if (!btf__available()) {
 		/* should be an old kernel */
 		return false;
 	}
 
 	id = btf__find_by_name_kind(btf, "perf_event_attr", BTF_KIND_STRUCT);
 	if (id < 0)
-		goto out;
+		return false;
 
-	t = btf__type_by_id(btf, id);
-	for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
-		name = btf__name_by_offset(btf, m->name_off);
-		if (!strcmp(name, "sigtrap")) {
-			ret = true;
-			break;
-		}
-	}
-out:
-	btf__free(btf);
-	return ret;
+	return __btf_type__find_member_by_name(id, "sigtrap") != NULL;
+}
+
+static bool kernel_with_sleepable_spinlocks(void)
+{
+	const struct btf_member *member;
+	const struct btf_type *type;
+	const char *type_name;
+	int id;
+
+	if (!btf__available())
+		return false;
+
+	id = btf__find_by_name_kind(btf, "spinlock", BTF_KIND_STRUCT);
+	if (id < 0)
+		return false;
+
+	// Only RT has a "lock" member for "struct spinlock"
+	member = __btf_type__find_member_by_name(id, "lock");
+	if (member == NULL)
+		return false;
+
+	// But check its type as well
+	type = btf__type_by_id(btf, member->type);
+	if (!type || !btf_is_struct(type))
+		return false;
+
+	type_name = btf__name_by_offset(btf, type->name_off);
+	return type_name && !strcmp(type_name, "rt_mutex_base");
 }
 #else  /* !HAVE_BPF_SKEL */
 static bool attr_has_sigtrap(void)
@@ -109,6 +152,15 @@ static bool attr_has_sigtrap(void)
 
 	return ret;
 }
+
+static bool kernel_with_sleepable_spinlocks(void)
+{
+	return false;
+}
+
+static void btf__exit(void)
+{
+}
 #endif  /* HAVE_BPF_SKEL */
 
 static void
@@ -147,7 +199,7 @@ static int run_test_threads(pthread_t *threads, pthread_barrier_t *barrier)
 
 static int run_stress_test(int fd, pthread_t *threads, pthread_barrier_t *barrier)
 {
-	int ret;
+	int ret, expected_sigtraps;
 
 	ctx.iterate_on = 3000;
 
@@ -156,7 +208,16 @@ static int run_stress_test(int fd, pthread_t *threads, pthread_barrier_t *barrie
 	ret = run_test_threads(threads, barrier);
 	TEST_ASSERT_EQUAL("disable failed", ioctl(fd, PERF_EVENT_IOC_DISABLE, 0), 0);
 
-	TEST_ASSERT_EQUAL("unexpected sigtraps", ctx.signal_count, NUM_THREADS * ctx.iterate_on);
+	expected_sigtraps = NUM_THREADS * ctx.iterate_on;
+
+	if (ctx.signal_count < expected_sigtraps && kernel_with_sleepable_spinlocks()) {
+		pr_debug("Expected %d sigtraps, got %d, running on a kernel with sleepable spinlocks.\n",
+			 expected_sigtraps, ctx.signal_count);
+		pr_debug("See https://lore.kernel.org/all/e368f2c848d77fbc8d259f44e2055fe469c219cf.camel@gmx.de/\n");
+		return TEST_SKIP;
+	} else
+		TEST_ASSERT_EQUAL("unexpected sigtraps", ctx.signal_count, expected_sigtraps);
+
 	TEST_ASSERT_EQUAL("missing signals or incorrectly delivered", ctx.tids_want_signal, 0);
 	TEST_ASSERT_VAL("unexpected si_addr", ctx.first_siginfo.si_addr == &ctx.iterate_on);
 #if 0 /* FIXME: enable when libc's signal.h has si_perf_{type,data} */
@@ -221,6 +282,7 @@ out_restore_sigaction:
 	sigaction(SIGTRAP, &oldact, NULL);
 out:
 	pthread_barrier_destroy(&barrier);
+	btf__exit();
 	return ret;
 }
 
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index 4d7493fa0105..290716783ac6 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -62,7 +62,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
 	}
 	evlist__add(evlist, evsel);
 
-	cpus = perf_cpu_map__dummy_new();
+	cpus = perf_cpu_map__new_any_cpu();
 	threads = thread_map__new_by_tid(getpid());
 	if (!cpus || !threads) {
 		err = -ENOMEM;
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index e52b031bedc5..5cab17a1942e 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -351,7 +351,7 @@ static int test__switch_tracking(struct test_suite *test __maybe_unused, int sub
 		goto out_err;
 	}
 
-	cpus = perf_cpu_map__new(NULL);
+	cpus = perf_cpu_map__new_online_cpus();
 	if (!cpus) {
 		pr_debug("perf_cpu_map__new failed!\n");
 		goto out_err;
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index 968dddde6dda..d33d0952025c 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -70,7 +70,7 @@ static int test__task_exit(struct test_suite *test __maybe_unused, int subtest _
 	 * evlist__prepare_workload we'll fill in the only thread
 	 * we're monitoring, the one forked there.
 	 */
-	cpus = perf_cpu_map__dummy_new();
+	cpus = perf_cpu_map__new_any_cpu();
 	threads = thread_map__new_by_tid(-1);
 	if (!cpus || !threads) {
 		err = -ENOMEM;
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index b394f3ac2d66..dad3d7414142 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -207,5 +207,6 @@ DECLARE_WORKLOAD(brstack);
 DECLARE_WORKLOAD(datasym);
 
 extern const char *dso_to_test;
+extern const char *test_objdump_path;
 
 #endif /* TESTS_H */
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 9dee63734e66..2a842f53fbb5 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -215,7 +215,7 @@ static int test__session_topology(struct test_suite *test __maybe_unused, int su
 	if (session_write_header(path))
 		goto free_path;
 
-	map = perf_cpu_map__new(NULL);
+	map = perf_cpu_map__new_online_cpus();
 	if (map == NULL) {
 		pr_debug("failed to get system cpumap\n");
 		goto free_path;
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index 1078a93b01aa..822f893e67d5 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -112,18 +112,92 @@ static bool is_ignored_symbol(const char *name, char type)
 	return false;
 }
 
+struct test__vmlinux_matches_kallsyms_cb_args {
+	struct machine kallsyms;
+	struct map *vmlinux_map;
+	bool header_printed;
+};
+
+static int test__vmlinux_matches_kallsyms_cb1(struct map *map, void *data)
+{
+	struct test__vmlinux_matches_kallsyms_cb_args *args = data;
+	struct dso *dso = map__dso(map);
+	/*
+	 * If it is the kernel, kallsyms is always "[kernel.kallsyms]", while
+	 * the kernel will have the path for the vmlinux file being used, so use
+	 * the short name, less descriptive but the same ("[kernel]" in both
+	 * cases.
+	 */
+	struct map *pair = maps__find_by_name(args->kallsyms.kmaps,
+					(dso->kernel ? dso->short_name : dso->name));
+
+	if (pair)
+		map__set_priv(pair, 1);
+	else {
+		if (!args->header_printed) {
+			pr_info("WARN: Maps only in vmlinux:\n");
+			args->header_printed = true;
+		}
+		map__fprintf(map, stderr);
+	}
+	return 0;
+}
+
+static int test__vmlinux_matches_kallsyms_cb2(struct map *map, void *data)
+{
+	struct test__vmlinux_matches_kallsyms_cb_args *args = data;
+	struct map *pair;
+	u64 mem_start = map__unmap_ip(args->vmlinux_map, map__start(map));
+	u64 mem_end = map__unmap_ip(args->vmlinux_map, map__end(map));
+
+	pair = maps__find(args->kallsyms.kmaps, mem_start);
+	if (pair == NULL || map__priv(pair))
+		return 0;
+
+	if (map__start(pair) == mem_start) {
+		struct dso *dso = map__dso(map);
+
+		if (!args->header_printed) {
+			pr_info("WARN: Maps in vmlinux with a different name in kallsyms:\n");
+			args->header_printed = true;
+		}
+
+		pr_info("WARN: %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as",
+			map__start(map), map__end(map), map__pgoff(map), dso->name);
+		if (mem_end != map__end(pair))
+			pr_info(":\nWARN: *%" PRIx64 "-%" PRIx64 " %" PRIx64,
+				map__start(pair), map__end(pair), map__pgoff(pair));
+		pr_info(" %s\n", dso->name);
+		map__set_priv(pair, 1);
+	}
+	return 0;
+}
+
+static int test__vmlinux_matches_kallsyms_cb3(struct map *map, void *data)
+{
+	struct test__vmlinux_matches_kallsyms_cb_args *args = data;
+
+	if (!map__priv(map)) {
+		if (!args->header_printed) {
+			pr_info("WARN: Maps only in kallsyms:\n");
+			args->header_printed = true;
+		}
+		map__fprintf(map, stderr);
+	}
+	return 0;
+}
+
 static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused,
 					int subtest __maybe_unused)
 {
 	int err = TEST_FAIL;
 	struct rb_node *nd;
 	struct symbol *sym;
-	struct map *kallsyms_map, *vmlinux_map;
-	struct map_rb_node *rb_node;
-	struct machine kallsyms, vmlinux;
+	struct map *kallsyms_map;
+	struct machine vmlinux;
 	struct maps *maps;
 	u64 mem_start, mem_end;
-	bool header_printed;
+	struct test__vmlinux_matches_kallsyms_cb_args args;
 
 	/*
 	 * Step 1:
@@ -131,7 +205,7 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused
 	 * Init the machines that will hold kernel, modules obtained from
 	 * both vmlinux + .ko files and from /proc/kallsyms split by modules.
 	 */
-	machine__init(&kallsyms, "", HOST_KERNEL_ID);
+	machine__init(&args.kallsyms, "", HOST_KERNEL_ID);
 	machine__init(&vmlinux, "", HOST_KERNEL_ID);
 
 	maps = machine__kernel_maps(&vmlinux);
@@ -143,7 +217,7 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused
 	 * load /proc/kallsyms. Also create the modules maps from /proc/modules
 	 * and find the .ko files that match them in /lib/modules/`uname -r`/.
 	 */
-	if (machine__create_kernel_maps(&kallsyms) < 0) {
+	if (machine__create_kernel_maps(&args.kallsyms) < 0) {
 		pr_debug("machine__create_kernel_maps failed");
 		err = TEST_SKIP;
 		goto out;
@@ -160,7 +234,7 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused
 	 * be compacted against the list of modules found in the "vmlinux"
 	 * code and with the one got from /proc/modules from the "kallsyms" code.
 	 */
-	if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms") <= 0) {
+	if (machine__load_kallsyms(&args.kallsyms, "/proc/kallsyms") <= 0) {
 		pr_debug("machine__load_kallsyms failed");
 		err = TEST_SKIP;
 		goto out;
@@ -174,7 +248,7 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused
 	 * to see if the running kernel was relocated by checking if it has the
 	 * same value in the vmlinux file we load.
 	 */
-	kallsyms_map = machine__kernel_map(&kallsyms);
+	kallsyms_map = machine__kernel_map(&args.kallsyms);
 
 	/*
 	 * Step 5:
@@ -186,7 +260,7 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused
 		goto out;
 	}
 
-	vmlinux_map = machine__kernel_map(&vmlinux);
+	args.vmlinux_map = machine__kernel_map(&vmlinux);
 
 	/*
 	 * Step 6:
@@ -213,7 +287,7 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused
 	 * in the kallsyms dso. For the ones that are in both, check its names and
 	 * end addresses too.
 	 */
-	map__for_each_symbol(vmlinux_map, sym, nd) {
+	map__for_each_symbol(args.vmlinux_map, sym, nd) {
 		struct symbol *pair, *first_pair;
 
 		sym  = rb_entry(nd, struct symbol, rb_node);
@@ -221,10 +295,10 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused
 		if (sym->start == sym->end)
 			continue;
 
-		mem_start = map__unmap_ip(vmlinux_map, sym->start);
-		mem_end = map__unmap_ip(vmlinux_map, sym->end);
+		mem_start = map__unmap_ip(args.vmlinux_map, sym->start);
+		mem_end = map__unmap_ip(args.vmlinux_map, sym->end);
 
-		first_pair = machine__find_kernel_symbol(&kallsyms, mem_start, NULL);
+		first_pair = machine__find_kernel_symbol(&args.kallsyms, mem_start, NULL);
 		pair = first_pair;
 
 		if (pair && UM(pair->start) == mem_start) {
@@ -253,7 +327,8 @@ next_pair:
 				 */
 				continue;
 			} else {
-				pair = machine__find_kernel_symbol_by_name(&kallsyms, sym->name, NULL);
+				pair = machine__find_kernel_symbol_by_name(&args.kallsyms,
+									   sym->name, NULL);
 				if (pair) {
 					if (UM(pair->start) == mem_start)
 						goto next_pair;
@@ -267,7 +342,7 @@ next_pair:
 
 				continue;
 			}
-		} else if (mem_start == map__end(kallsyms.vmlinux_map)) {
+		} else if (mem_start == map__end(args.kallsyms.vmlinux_map)) {
 			/*
 			 * Ignore aliases to _etext, i.e. to the end of the kernel text area,
 			 * such as __indirect_thunk_end.
@@ -289,78 +364,18 @@ next_pair:
 	if (verbose <= 0)
 		goto out;
 
-	header_printed = false;
-
-	maps__for_each_entry(maps, rb_node) {
-		struct map *map = rb_node->map;
-		struct dso *dso = map__dso(map);
-		/*
-		 * If it is the kernel, kallsyms is always "[kernel.kallsyms]", while
-		 * the kernel will have the path for the vmlinux file being used,
-		 * so use the short name, less descriptive but the same ("[kernel]" in
-		 * both cases.
-		 */
-		struct map *pair = maps__find_by_name(kallsyms.kmaps, (dso->kernel ?
-								dso->short_name :
-								dso->name));
-		if (pair) {
-			map__set_priv(pair, 1);
-		} else {
-			if (!header_printed) {
-				pr_info("WARN: Maps only in vmlinux:\n");
-				header_printed = true;
-			}
-			map__fprintf(map, stderr);
-		}
-	}
-
-	header_printed = false;
-
-	maps__for_each_entry(maps, rb_node) {
-		struct map *pair, *map = rb_node->map;
-
-		mem_start = map__unmap_ip(vmlinux_map, map__start(map));
-		mem_end = map__unmap_ip(vmlinux_map, map__end(map));
+	args.header_printed = false;
+	maps__for_each_map(maps, test__vmlinux_matches_kallsyms_cb1, &args);
 
-		pair = maps__find(kallsyms.kmaps, mem_start);
-		if (pair == NULL || map__priv(pair))
-			continue;
-
-		if (map__start(pair) == mem_start) {
-			struct dso *dso = map__dso(map);
-
-			if (!header_printed) {
-				pr_info("WARN: Maps in vmlinux with a different name in kallsyms:\n");
-				header_printed = true;
-			}
-
-			pr_info("WARN: %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as",
-				map__start(map), map__end(map), map__pgoff(map), dso->name);
-			if (mem_end != map__end(pair))
-				pr_info(":\nWARN: *%" PRIx64 "-%" PRIx64 " %" PRIx64,
-					map__start(pair), map__end(pair), map__pgoff(pair));
-			pr_info(" %s\n", dso->name);
-			map__set_priv(pair, 1);
-		}
-	}
-
-	header_printed = false;
-
-	maps = machine__kernel_maps(&kallsyms);
+	args.header_printed = false;
+	maps__for_each_map(maps, test__vmlinux_matches_kallsyms_cb2, &args);
 
-	maps__for_each_entry(maps, rb_node) {
-		struct map *map = rb_node->map;
+	args.header_printed = false;
+	maps = machine__kernel_maps(&args.kallsyms);
+	maps__for_each_map(maps, test__vmlinux_matches_kallsyms_cb3, &args);
 
-		if (!map__priv(map)) {
-			if (!header_printed) {
-				pr_info("WARN: Maps only in kallsyms:\n");
-				header_printed = true;
-			}
-			map__fprintf(map, stderr);
-		}
-	}
 out:
-	machine__exit(&kallsyms);
+	machine__exit(&args.kallsyms);
 	machine__exit(&vmlinux);
 	return err;
 }
diff --git a/tools/perf/tests/workloads/thloop.c b/tools/perf/tests/workloads/thloop.c
index af05269c2eb8..457b29f91c3e 100644
--- a/tools/perf/tests/workloads/thloop.c
+++ b/tools/perf/tests/workloads/thloop.c
@@ -7,7 +7,6 @@
 #include "../tests.h"
 
 static volatile sig_atomic_t done;
-static volatile unsigned count;
 
 /* We want to check this symbol in perf report */
 noinline void test_loop(void);
@@ -19,8 +18,7 @@ static void sighandler(int sig __maybe_unused)
 
 noinline void test_loop(void)
 {
-	while (!done)
-		__atomic_fetch_add(&count, 1, __ATOMIC_RELAXED);
+	while (!done);
 }
 
 static void *thfunc(void *arg)
diff --git a/tools/perf/trace/beauty/arch_errno_names.sh b/tools/perf/trace/beauty/arch_errno_names.sh
index cc09dcaa891e..7df4bf5b55a3 100755
--- a/tools/perf/trace/beauty/arch_errno_names.sh
+++ b/tools/perf/trace/beauty/arch_errno_names.sh
@@ -57,13 +57,13 @@ create_arch_errno_table_func()
 	archlist="$1"
 	default="$2"
 
-	printf 'const char *arch_syscalls__strerrno(const char *arch, int err)\n'
+	printf 'arch_syscalls__strerrno_t *arch_syscalls__strerrno_function(const char *arch)\n'
 	printf '{\n'
 	for arch in $archlist; do
 		printf '\tif (!strcmp(arch, "%s"))\n' $(arch_string "$arch")
-		printf '\t\treturn errno_to_name__%s(err);\n' $(arch_string "$arch")
+		printf '\t\treturn errno_to_name__%s;\n' $(arch_string "$arch")
 	done
-	printf '\treturn errno_to_name__%s(err);\n' $(arch_string "$default")
+	printf '\treturn errno_to_name__%s;\n' $(arch_string "$default")
 	printf '}\n'
 }
 
@@ -76,7 +76,9 @@ EoHEADER
 
 # Create list of architectures that have a specific errno.h.
 archlist=""
-for arch in $(find $toolsdir/arch -maxdepth 1 -mindepth 1 -type d -printf "%f\n" | sort -r); do
+for f in $toolsdir/arch/*/include/uapi/asm/errno.h; do
+	d=${f%/include/uapi/asm/errno.h}
+	arch="${d##*/}"
 	test -f $toolsdir/arch/$arch/include/uapi/asm/errno.h && archlist="$archlist $arch"
 done
 
diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h
index 788e8f6bd90e..9feb794f5c6e 100644
--- a/tools/perf/trace/beauty/beauty.h
+++ b/tools/perf/trace/beauty/beauty.h
@@ -251,6 +251,4 @@ size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool sh
 void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
 				    size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg));
 
-const char *arch_syscalls__strerrno(const char *arch, int err);
-
 #endif /* _PERF_TRACE_BEAUTY_H */
diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh
index 8059342ca412..9455d9672f14 100755
--- a/tools/perf/trace/beauty/prctl_option.sh
+++ b/tools/perf/trace/beauty/prctl_option.sh
@@ -4,9 +4,9 @@
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
 printf "static const char *prctl_options[] = {\n"
-regex='^#define[[:space:]]{1}PR_(\w+)[[:space:]]*([[:xdigit:]]+)([[:space:]]*\/.*)?$'
+regex='^#define[[:space:]]{1}PR_(\w+)[[:space:]]*([[:xdigit:]]+)([[:space:]]*/.*)?$'
 grep -E $regex ${header_dir}/prctl.h | grep -v PR_SET_PTRACER | \
-	sed -r "s/$regex/\2 \1/g"	| \
+	sed -E "s%$regex%\2 \1%g"	| \
 	sort -n | xargs printf "\t[%s] = \"%s\",\n"
 printf "};\n"
 
diff --git a/tools/perf/trace/beauty/socket.sh b/tools/perf/trace/beauty/socket.sh
index 8bc7ba62203e..670c6db298ae 100755
--- a/tools/perf/trace/beauty/socket.sh
+++ b/tools/perf/trace/beauty/socket.sh
@@ -18,10 +18,10 @@ grep -E $ipproto_regex ${uapi_header_dir}/in.h | \
 printf "};\n\n"
 
 printf "static const char *socket_level[] = {\n"
-socket_level_regex='^#define[[:space:]]+SOL_(\w+)[[:space:]]+([[:digit:]]+)([[:space:]]+\/.*)?'
+socket_level_regex='^#define[[:space:]]+SOL_(\w+)[[:space:]]+([[:digit:]]+)([[:space:]]+/.*)?'
 
 grep -E $socket_level_regex ${beauty_header_dir}/socket.h | \
-	sed -r "s/$socket_level_regex/\2 \1/g"	| \
+	sed -E "s%$socket_level_regex%\2 \1%g"	| \
 	sort -n | xargs printf "\t[%s] = \"%s\",\n"
 printf "};\n\n"
 
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index ccdb2cd11fbf..ec5e21932876 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -27,7 +27,6 @@ struct annotate_browser {
 	struct rb_node		   *curr_hot;
 	struct annotation_line	   *selection;
 	struct arch		   *arch;
-	struct annotation_options  *opts;
 	bool			    searching_backwards;
 	char			    search_bf[128];
 };
@@ -38,11 +37,10 @@ static inline struct annotation *browser__annotation(struct ui_browser *browser)
 	return symbol__annotation(ms->sym);
 }
 
-static bool disasm_line__filter(struct ui_browser *browser, void *entry)
+static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, void *entry)
 {
-	struct annotation *notes = browser__annotation(browser);
 	struct annotation_line *al = list_entry(entry, struct annotation_line, node);
-	return annotation_line__filter(al, notes);
+	return annotation_line__filter(al);
 }
 
 static int ui_browser__jumps_percent_color(struct ui_browser *browser, int nr, bool current)
@@ -97,7 +95,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 	struct annotation_write_ops ops = {
 		.first_line		 = row == 0,
 		.current_entry		 = is_current_entry,
-		.change_color		 = (!notes->options->hide_src_code &&
+		.change_color		 = (!annotate_opts.hide_src_code &&
 					    (!is_current_entry ||
 					     (browser->use_navkeypressed &&
 					      !browser->navkeypressed))),
@@ -114,7 +112,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 	if (!browser->navkeypressed)
 		ops.width += 1;
 
-	annotation_line__write(al, notes, &ops, ab->opts);
+	annotation_line__write(al, notes, &ops);
 
 	if (ops.current_entry)
 		ab->selection = al;
@@ -128,7 +126,7 @@ static int is_fused(struct annotate_browser *ab, struct disasm_line *cursor)
 
 	while (pos && pos->al.offset == -1) {
 		pos = list_prev_entry(pos, al.node);
-		if (!ab->opts->hide_src_code)
+		if (!annotate_opts.hide_src_code)
 			diff++;
 	}
 
@@ -188,14 +186,14 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
 	 *  name right after the '<' token and probably treating this like a
 	 *  'call' instruction.
 	 */
-	target = notes->offsets[cursor->ops.target.offset];
+	target = notes->src->offsets[cursor->ops.target.offset];
 	if (target == NULL) {
 		ui_helpline__printf("WARN: jump target inconsistency, press 'o', notes->offsets[%#x] = NULL\n",
 				    cursor->ops.target.offset);
 		return;
 	}
 
-	if (notes->options->hide_src_code) {
+	if (annotate_opts.hide_src_code) {
 		from = cursor->al.idx_asm;
 		to = target->idx_asm;
 	} else {
@@ -224,7 +222,7 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser)
 	int ret = ui_browser__list_head_refresh(browser);
 	int pcnt_width = annotation__pcnt_width(notes);
 
-	if (notes->options->jump_arrows)
+	if (annotate_opts.jump_arrows)
 		annotate_browser__draw_current_jump(browser);
 
 	ui_browser__set_color(browser, HE_COLORSET_NORMAL);
@@ -258,7 +256,7 @@ static void disasm_rb_tree__insert(struct annotate_browser *browser,
 		parent = *p;
 		l = rb_entry(parent, struct annotation_line, rb_node);
 
-		if (disasm__cmp(al, l, browser->opts->percent_type) < 0)
+		if (disasm__cmp(al, l, annotate_opts.percent_type) < 0)
 			p = &(*p)->rb_left;
 		else
 			p = &(*p)->rb_right;
@@ -270,7 +268,6 @@ static void disasm_rb_tree__insert(struct annotate_browser *browser,
 static void annotate_browser__set_top(struct annotate_browser *browser,
 				      struct annotation_line *pos, u32 idx)
 {
-	struct annotation *notes = browser__annotation(&browser->b);
 	unsigned back;
 
 	ui_browser__refresh_dimensions(&browser->b);
@@ -280,7 +277,7 @@ static void annotate_browser__set_top(struct annotate_browser *browser,
 	while (browser->b.top_idx != 0 && back != 0) {
 		pos = list_entry(pos->node.prev, struct annotation_line, node);
 
-		if (annotation_line__filter(pos, notes))
+		if (annotation_line__filter(pos))
 			continue;
 
 		--browser->b.top_idx;
@@ -294,11 +291,10 @@ static void annotate_browser__set_top(struct annotate_browser *browser,
 static void annotate_browser__set_rb_top(struct annotate_browser *browser,
 					 struct rb_node *nd)
 {
-	struct annotation *notes = browser__annotation(&browser->b);
 	struct annotation_line * pos = rb_entry(nd, struct annotation_line, rb_node);
 	u32 idx = pos->idx;
 
-	if (notes->options->hide_src_code)
+	if (annotate_opts.hide_src_code)
 		idx = pos->idx_asm;
 	annotate_browser__set_top(browser, pos, idx);
 	browser->curr_hot = nd;
@@ -331,13 +327,13 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
 			double percent;
 
 			percent = annotation_data__percent(&pos->al.data[i],
-							   browser->opts->percent_type);
+							   annotate_opts.percent_type);
 
 			if (max_percent < percent)
 				max_percent = percent;
 		}
 
-		if (max_percent < 0.01 && pos->al.ipc == 0) {
+		if (max_percent < 0.01 && (!pos->al.cycles || pos->al.cycles->ipc == 0)) {
 			RB_CLEAR_NODE(&pos->al.rb_node);
 			continue;
 		}
@@ -380,12 +376,12 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser)
 	browser->b.seek(&browser->b, offset, SEEK_CUR);
 	al = list_entry(browser->b.top, struct annotation_line, node);
 
-	if (notes->options->hide_src_code) {
+	if (annotate_opts.hide_src_code) {
 		if (al->idx_asm < offset)
 			offset = al->idx;
 
-		browser->b.nr_entries = notes->nr_entries;
-		notes->options->hide_src_code = false;
+		browser->b.nr_entries = notes->src->nr_entries;
+		annotate_opts.hide_src_code = false;
 		browser->b.seek(&browser->b, -offset, SEEK_CUR);
 		browser->b.top_idx = al->idx - offset;
 		browser->b.index = al->idx;
@@ -402,8 +398,8 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser)
 		if (al->idx_asm < offset)
 			offset = al->idx_asm;
 
-		browser->b.nr_entries = notes->nr_asm_entries;
-		notes->options->hide_src_code = true;
+		browser->b.nr_entries = notes->src->nr_asm_entries;
+		annotate_opts.hide_src_code = true;
 		browser->b.seek(&browser->b, -offset, SEEK_CUR);
 		browser->b.top_idx = al->idx_asm - offset;
 		browser->b.index = al->idx_asm;
@@ -435,7 +431,7 @@ static void ui_browser__init_asm_mode(struct ui_browser *browser)
 {
 	struct annotation *notes = browser__annotation(browser);
 	ui_browser__reset_index(browser);
-	browser->nr_entries = notes->nr_asm_entries;
+	browser->nr_entries = notes->src->nr_asm_entries;
 }
 
 static int sym_title(struct symbol *sym, struct map *map, char *title,
@@ -483,8 +479,8 @@ static bool annotate_browser__callq(struct annotate_browser *browser,
 	target_ms.map = ms->map;
 	target_ms.sym = dl->ops.target.sym;
 	annotation__unlock(notes);
-	symbol__tui_annotate(&target_ms, evsel, hbt, browser->opts);
-	sym_title(ms->sym, ms->map, title, sizeof(title), browser->opts->percent_type);
+	symbol__tui_annotate(&target_ms, evsel, hbt);
+	sym_title(ms->sym, ms->map, title, sizeof(title), annotate_opts.percent_type);
 	ui_browser__show_title(&browser->b, title);
 	return true;
 }
@@ -500,7 +496,7 @@ struct disasm_line *annotate_browser__find_offset(struct annotate_browser *brows
 	list_for_each_entry(pos, &notes->src->source, al.node) {
 		if (pos->al.offset == offset)
 			return pos;
-		if (!annotation_line__filter(&pos->al, notes))
+		if (!annotation_line__filter(&pos->al))
 			++*idx;
 	}
 
@@ -544,7 +540,7 @@ struct annotation_line *annotate_browser__find_string(struct annotate_browser *b
 
 	*idx = browser->b.index;
 	list_for_each_entry_continue(al, &notes->src->source, node) {
-		if (annotation_line__filter(al, notes))
+		if (annotation_line__filter(al))
 			continue;
 
 		++*idx;
@@ -581,7 +577,7 @@ struct annotation_line *annotate_browser__find_string_reverse(struct annotate_br
 
 	*idx = browser->b.index;
 	list_for_each_entry_continue_reverse(al, &notes->src->source, node) {
-		if (annotation_line__filter(al, notes))
+		if (annotation_line__filter(al))
 			continue;
 
 		--*idx;
@@ -659,7 +655,6 @@ bool annotate_browser__continue_search_reverse(struct annotate_browser *browser,
 
 static int annotate_browser__show(struct ui_browser *browser, char *title, const char *help)
 {
-	struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
 	struct map_symbol *ms = browser->priv;
 	struct symbol *sym = ms->sym;
 	char symbol_dso[SYM_TITLE_MAX_SIZE];
@@ -667,7 +662,7 @@ static int annotate_browser__show(struct ui_browser *browser, char *title, const
 	if (ui_browser__show(browser, title, help) < 0)
 		return -1;
 
-	sym_title(sym, ms->map, symbol_dso, sizeof(symbol_dso), ab->opts->percent_type);
+	sym_title(sym, ms->map, symbol_dso, sizeof(symbol_dso), annotate_opts.percent_type);
 
 	ui_browser__gotorc_title(browser, 0, 0);
 	ui_browser__set_color(browser, HE_COLORSET_ROOT);
@@ -809,7 +804,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
 			annotate_browser__show(&browser->b, title, help);
 			continue;
 		case 'k':
-			notes->options->show_linenr = !notes->options->show_linenr;
+			annotate_opts.show_linenr = !annotate_opts.show_linenr;
 			continue;
 		case 'l':
 			annotate_browser__show_full_location (&browser->b);
@@ -822,18 +817,18 @@ static int annotate_browser__run(struct annotate_browser *browser,
 				ui_helpline__puts(help);
 			continue;
 		case 'o':
-			notes->options->use_offset = !notes->options->use_offset;
+			annotate_opts.use_offset = !annotate_opts.use_offset;
 			annotation__update_column_widths(notes);
 			continue;
 		case 'O':
-			if (++notes->options->offset_level > ANNOTATION__MAX_OFFSET_LEVEL)
-				notes->options->offset_level = ANNOTATION__MIN_OFFSET_LEVEL;
+			if (++annotate_opts.offset_level > ANNOTATION__MAX_OFFSET_LEVEL)
+				annotate_opts.offset_level = ANNOTATION__MIN_OFFSET_LEVEL;
 			continue;
 		case 'j':
-			notes->options->jump_arrows = !notes->options->jump_arrows;
+			annotate_opts.jump_arrows = !annotate_opts.jump_arrows;
 			continue;
 		case 'J':
-			notes->options->show_nr_jumps = !notes->options->show_nr_jumps;
+			annotate_opts.show_nr_jumps = !annotate_opts.show_nr_jumps;
 			annotation__update_column_widths(notes);
 			continue;
 		case '/':
@@ -860,7 +855,7 @@ show_help:
 					   browser->b.height,
 					   browser->b.index,
 					   browser->b.top_idx,
-					   notes->nr_asm_entries);
+					   notes->src->nr_asm_entries);
 		}
 			continue;
 		case K_ENTER:
@@ -884,7 +879,7 @@ show_sup_ins:
 			continue;
 		}
 		case 'P':
-			map_symbol__annotation_dump(ms, evsel, browser->opts);
+			map_symbol__annotation_dump(ms, evsel);
 			continue;
 		case 't':
 			if (symbol_conf.show_total_period) {
@@ -897,15 +892,15 @@ show_sup_ins:
 			annotation__update_column_widths(notes);
 			continue;
 		case 'c':
-			if (notes->options->show_minmax_cycle)
-				notes->options->show_minmax_cycle = false;
+			if (annotate_opts.show_minmax_cycle)
+				annotate_opts.show_minmax_cycle = false;
 			else
-				notes->options->show_minmax_cycle = true;
+				annotate_opts.show_minmax_cycle = true;
 			annotation__update_column_widths(notes);
 			continue;
 		case 'p':
 		case 'b':
-			switch_percent_type(browser->opts, key == 'b');
+			switch_percent_type(&annotate_opts, key == 'b');
 			hists__scnprintf_title(hists, title, sizeof(title));
 			annotate_browser__show(&browser->b, title, help);
 			continue;
@@ -932,26 +927,24 @@ out:
 }
 
 int map_symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
-			     struct hist_browser_timer *hbt,
-			     struct annotation_options *opts)
+			     struct hist_browser_timer *hbt)
 {
-	return symbol__tui_annotate(ms, evsel, hbt, opts);
+	return symbol__tui_annotate(ms, evsel, hbt);
 }
 
 int hist_entry__tui_annotate(struct hist_entry *he, struct evsel *evsel,
-			     struct hist_browser_timer *hbt,
-			     struct annotation_options *opts)
+			     struct hist_browser_timer *hbt)
 {
 	/* reset abort key so that it can get Ctrl-C as a key */
 	SLang_reset_tty();
 	SLang_init_tty(0, 0, 0);
+	SLtty_set_suspend_state(true);
 
-	return map_symbol__tui_annotate(&he->ms, evsel, hbt, opts);
+	return map_symbol__tui_annotate(&he->ms, evsel, hbt);
 }
 
 int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
-			 struct hist_browser_timer *hbt,
-			 struct annotation_options *opts)
+			 struct hist_browser_timer *hbt)
 {
 	struct symbol *sym = ms->sym;
 	struct annotation *notes = symbol__annotation(sym);
@@ -965,7 +958,6 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
 			.priv	 = ms,
 			.use_navkeypressed = true,
 		},
-		.opts = opts,
 	};
 	struct dso *dso;
 	int ret = -1, err;
@@ -979,7 +971,7 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
 		return -1;
 
 	if (not_annotated) {
-		err = symbol__annotate2(ms, evsel, opts, &browser.arch);
+		err = symbol__annotate2(ms, evsel, &browser.arch);
 		if (err) {
 			char msg[BUFSIZ];
 			dso->annotate_warned = true;
@@ -991,12 +983,12 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
 
 	ui_helpline__push("Press ESC to exit");
 
-	browser.b.width = notes->max_line_len;
-	browser.b.nr_entries = notes->nr_entries;
+	browser.b.width = notes->src->max_line_len;
+	browser.b.nr_entries = notes->src->nr_entries;
 	browser.b.entries = &notes->src->source,
 	browser.b.width += 18; /* Percentage */
 
-	if (notes->options->hide_src_code)
+	if (annotate_opts.hide_src_code)
 		ui_browser__init_asm_mode(&browser.b);
 
 	ret = annotate_browser__run(&browser, evsel, hbt);
@@ -1006,6 +998,6 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
 
 out_free_offsets:
 	if(not_annotated)
-		zfree(&notes->offsets);
+		zfree(&notes->src->offsets);
 	return ret;
 }
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index f4812b226818..0c02b3a8e121 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2250,8 +2250,7 @@ struct hist_browser *hist_browser__new(struct hists *hists)
 static struct hist_browser *
 perf_evsel_browser__new(struct evsel *evsel,
 			struct hist_browser_timer *hbt,
-			struct perf_env *env,
-			struct annotation_options *annotation_opts)
+			struct perf_env *env)
 {
 	struct hist_browser *browser = hist_browser__new(evsel__hists(evsel));
 
@@ -2259,7 +2258,6 @@ perf_evsel_browser__new(struct evsel *evsel,
 		browser->hbt   = hbt;
 		browser->env   = env;
 		browser->title = hists_browser__scnprintf_title;
-		browser->annotation_opts = annotation_opts;
 	}
 	return browser;
 }
@@ -2432,8 +2430,8 @@ do_annotate(struct hist_browser *browser, struct popup_action *act)
 	struct hist_entry *he;
 	int err;
 
-	if (!browser->annotation_opts->objdump_path &&
-	    perf_env__lookup_objdump(browser->env, &browser->annotation_opts->objdump_path))
+	if (!annotate_opts.objdump_path &&
+	    perf_env__lookup_objdump(browser->env, &annotate_opts.objdump_path))
 		return 0;
 
 	notes = symbol__annotation(act->ms.sym);
@@ -2445,8 +2443,7 @@ do_annotate(struct hist_browser *browser, struct popup_action *act)
 	else
 		evsel = hists_to_evsel(browser->hists);
 
-	err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt,
-				       browser->annotation_opts);
+	err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt);
 	he = hist_browser__selected_entry(browser);
 	/*
 	 * offer option to annotate the other branch source or target
@@ -2943,11 +2940,10 @@ next:
 
 static int evsel__hists_browse(struct evsel *evsel, int nr_events, const char *helpline,
 			       bool left_exits, struct hist_browser_timer *hbt, float min_pcnt,
-			       struct perf_env *env, bool warn_lost_event,
-			       struct annotation_options *annotation_opts)
+			       struct perf_env *env, bool warn_lost_event)
 {
 	struct hists *hists = evsel__hists(evsel);
-	struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env, annotation_opts);
+	struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env);
 	struct branch_info *bi = NULL;
 #define MAX_OPTIONS  16
 	char *options[MAX_OPTIONS];
@@ -3004,6 +3000,7 @@ static int evsel__hists_browse(struct evsel *evsel, int nr_events, const char *h
 	/* reset abort key so that it can get Ctrl-C as a key */
 	SLang_reset_tty();
 	SLang_init_tty(0, 0, 0);
+	SLtty_set_suspend_state(true);
 
 	if (min_pcnt)
 		browser->min_pcnt = min_pcnt;
@@ -3398,7 +3395,6 @@ out:
 struct evsel_menu {
 	struct ui_browser b;
 	struct evsel *selection;
-	struct annotation_options *annotation_opts;
 	bool lost_events, lost_events_warned;
 	float min_pcnt;
 	struct perf_env *env;
@@ -3499,8 +3495,7 @@ browse_hists:
 				hbt->timer(hbt->arg);
 			key = evsel__hists_browse(pos, nr_events, help, true, hbt,
 						  menu->min_pcnt, menu->env,
-						  warn_lost_event,
-						  menu->annotation_opts);
+						  warn_lost_event);
 			ui_browser__show_title(&menu->b, title);
 			switch (key) {
 			case K_TAB:
@@ -3557,7 +3552,7 @@ static bool filter_group_entries(struct ui_browser *browser __maybe_unused,
 
 static int __evlist__tui_browse_hists(struct evlist *evlist, int nr_entries, const char *help,
 				      struct hist_browser_timer *hbt, float min_pcnt, struct perf_env *env,
-				      bool warn_lost_event, struct annotation_options *annotation_opts)
+				      bool warn_lost_event)
 {
 	struct evsel *pos;
 	struct evsel_menu menu = {
@@ -3572,7 +3567,6 @@ static int __evlist__tui_browse_hists(struct evlist *evlist, int nr_entries, con
 		},
 		.min_pcnt = min_pcnt,
 		.env = env,
-		.annotation_opts = annotation_opts,
 	};
 
 	ui_helpline__push("Press ESC to exit");
@@ -3607,8 +3601,7 @@ static bool evlist__single_entry(struct evlist *evlist)
 }
 
 int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt,
-			     float min_pcnt, struct perf_env *env, bool warn_lost_event,
-			     struct annotation_options *annotation_opts)
+			     float min_pcnt, struct perf_env *env, bool warn_lost_event)
 {
 	int nr_entries = evlist->core.nr_entries;
 
@@ -3617,7 +3610,7 @@ single_entry: {
 		struct evsel *first = evlist__first(evlist);
 
 		return evsel__hists_browse(first, nr_entries, help, false, hbt, min_pcnt,
-					   env, warn_lost_event, annotation_opts);
+					   env, warn_lost_event);
 	}
 	}
 
@@ -3635,7 +3628,7 @@ single_entry: {
 	}
 
 	return __evlist__tui_browse_hists(evlist, nr_entries, help, hbt, min_pcnt, env,
-					  warn_lost_event, annotation_opts);
+					  warn_lost_event);
 }
 
 static int block_hists_browser__title(struct hist_browser *browser, char *bf,
@@ -3654,8 +3647,7 @@ static int block_hists_browser__title(struct hist_browser *browser, char *bf,
 }
 
 int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel,
-			   float min_percent, struct perf_env *env,
-			   struct annotation_options *annotation_opts)
+			   float min_percent, struct perf_env *env)
 {
 	struct hists *hists = &bh->block_hists;
 	struct hist_browser *browser;
@@ -3672,11 +3664,11 @@ int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel,
 	browser->title = block_hists_browser__title;
 	browser->min_pcnt = min_percent;
 	browser->env = env;
-	browser->annotation_opts = annotation_opts;
 
 	/* reset abort key so that it can get Ctrl-C as a key */
 	SLang_reset_tty();
 	SLang_init_tty(0, 0, 0);
+	SLtty_set_suspend_state(true);
 
 	memset(&action, 0, sizeof(action));
 
diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h
index 1e938d9ffa5e..de46f6c56b0e 100644
--- a/tools/perf/ui/browsers/hists.h
+++ b/tools/perf/ui/browsers/hists.h
@@ -4,7 +4,6 @@
 
 #include "ui/browser.h"
 
-struct annotation_options;
 struct evsel;
 
 struct hist_browser {
@@ -15,7 +14,6 @@ struct hist_browser {
 	struct hist_browser_timer *hbt;
 	struct pstack	    *pstack;
 	struct perf_env	    *env;
-	struct annotation_options *annotation_opts;
 	struct evsel	    *block_evsel;
 	int		     print_seq;
 	bool		     show_dso;
diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c
index 47d2c7a8cbe1..50d45054ed6c 100644
--- a/tools/perf/ui/browsers/scripts.c
+++ b/tools/perf/ui/browsers/scripts.c
@@ -166,6 +166,7 @@ void run_script(char *cmd)
 	printf("\033[c\033[H\033[J");
 	fflush(stdout);
 	SLang_init_tty(0, 0, 0);
+	SLtty_set_suspend_state(true);
 	SLsmg_refresh();
 }
 
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index 2effac77ca8c..394861245fd3 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -162,7 +162,6 @@ static int perf_gtk__annotate_symbol(GtkWidget *window, struct map_symbol *ms,
 }
 
 static int symbol__gtk_annotate(struct map_symbol *ms, struct evsel *evsel,
-				struct annotation_options *options,
 				struct hist_browser_timer *hbt)
 {
 	struct dso *dso = map__dso(ms->map);
@@ -176,7 +175,7 @@ static int symbol__gtk_annotate(struct map_symbol *ms, struct evsel *evsel,
 	if (dso->annotate_warned)
 		return -1;
 
-	err = symbol__annotate(ms, evsel, options, NULL);
+	err = symbol__annotate(ms, evsel, NULL);
 	if (err) {
 		char msg[BUFSIZ];
 		dso->annotate_warned = true;
@@ -244,10 +243,9 @@ static int symbol__gtk_annotate(struct map_symbol *ms, struct evsel *evsel,
 
 int hist_entry__gtk_annotate(struct hist_entry *he,
 			     struct evsel *evsel,
-			     struct annotation_options *options,
 			     struct hist_browser_timer *hbt)
 {
-	return symbol__gtk_annotate(&he->ms, evsel, options, hbt);
+	return symbol__gtk_annotate(&he->ms, evsel, hbt);
 }
 
 void perf_gtk__show_annotations(void)
diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h
index 1e84dceb5267..a2b497f03fd6 100644
--- a/tools/perf/ui/gtk/gtk.h
+++ b/tools/perf/ui/gtk/gtk.h
@@ -56,13 +56,11 @@ struct evsel;
 struct evlist;
 struct hist_entry;
 struct hist_browser_timer;
-struct annotation_options;
 
 int evlist__gtk_browse_hists(struct evlist *evlist, const char *help,
 			     struct hist_browser_timer *hbt, float min_pcnt);
 int hist_entry__gtk_annotate(struct hist_entry *he,
 			     struct evsel *evsel,
-			     struct annotation_options *options,
 			     struct hist_browser_timer *hbt);
 void perf_gtk__show_annotations(void);
 
diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c
index 605d9e175ea7..16c6eff4d241 100644
--- a/tools/perf/ui/tui/setup.c
+++ b/tools/perf/ui/tui/setup.c
@@ -2,12 +2,14 @@
 #include <signal.h>
 #include <stdbool.h>
 #include <stdlib.h>
+#include <termios.h>
 #include <unistd.h>
 #include <linux/kernel.h>
 #ifdef HAVE_BACKTRACE_SUPPORT
 #include <execinfo.h>
 #endif
 
+#include "../../util/color.h"
 #include "../../util/debug.h"
 #include "../browser.h"
 #include "../helpline.h"
@@ -121,6 +123,23 @@ static void ui__signal(int sig)
 	exit(0);
 }
 
+static void ui__sigcont(int sig)
+{
+	static struct termios tty;
+
+	if (sig == SIGTSTP) {
+		while (tcgetattr(SLang_TT_Read_FD, &tty) == -1 && errno == EINTR)
+			;
+		while (write(SLang_TT_Read_FD, PERF_COLOR_RESET, sizeof(PERF_COLOR_RESET) - 1) == -1 && errno == EINTR)
+			;
+		raise(SIGSTOP);
+	} else {
+		while (tcsetattr(SLang_TT_Read_FD, TCSADRAIN, &tty) == -1 && errno == EINTR)
+			;
+		raise(SIGWINCH);
+	}
+}
+
 int ui__init(void)
 {
 	int err;
@@ -135,6 +154,7 @@ int ui__init(void)
 	err = SLang_init_tty(-1, 0, 0);
 	if (err < 0)
 		goto out;
+	SLtty_set_suspend_state(true);
 
 	err = SLkp_init();
 	if (err < 0) {
@@ -149,6 +169,8 @@ int ui__init(void)
 	signal(SIGINT, ui__signal);
 	signal(SIGQUIT, ui__signal);
 	signal(SIGTERM, ui__signal);
+	signal(SIGTSTP, ui__sigcont);
+	signal(SIGCONT, ui__sigcont);
 
 	perf_error__register(&perf_tui_eops);
 
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 988473bf907a..8027f450fa3e 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -195,6 +195,8 @@ endif
 perf-$(CONFIG_DWARF) += probe-finder.o
 perf-$(CONFIG_DWARF) += dwarf-aux.o
 perf-$(CONFIG_DWARF) += dwarf-regs.o
+perf-$(CONFIG_DWARF) += debuginfo.o
+perf-$(CONFIG_DWARF) += annotate-data.o
 
 perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
 perf-$(CONFIG_LOCAL_LIBUNWIND)    += unwind-libunwind-local.o
diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
new file mode 100644
index 000000000000..f22b4f18271c
--- /dev/null
+++ b/tools/perf/util/annotate-data.c
@@ -0,0 +1,405 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Convert sample address to data type using DWARF debug info.
+ *
+ * Written by Namhyung Kim <namhyung@kernel.org>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "annotate-data.h"
+#include "debuginfo.h"
+#include "debug.h"
+#include "dso.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "map.h"
+#include "map_symbol.h"
+#include "strbuf.h"
+#include "symbol.h"
+#include "symbol_conf.h"
+
+/*
+ * Compare type name and size to maintain them in a tree.
+ * I'm not sure if DWARF would have information of a single type in many
+ * different places (compilation units).  If not, it could compare the
+ * offset of the type entry in the .debug_info section.
+ */
+static int data_type_cmp(const void *_key, const struct rb_node *node)
+{
+	const struct annotated_data_type *key = _key;
+	struct annotated_data_type *type;
+
+	type = rb_entry(node, struct annotated_data_type, node);
+
+	if (key->self.size != type->self.size)
+		return key->self.size - type->self.size;
+	return strcmp(key->self.type_name, type->self.type_name);
+}
+
+static bool data_type_less(struct rb_node *node_a, const struct rb_node *node_b)
+{
+	struct annotated_data_type *a, *b;
+
+	a = rb_entry(node_a, struct annotated_data_type, node);
+	b = rb_entry(node_b, struct annotated_data_type, node);
+
+	if (a->self.size != b->self.size)
+		return a->self.size < b->self.size;
+	return strcmp(a->self.type_name, b->self.type_name) < 0;
+}
+
+/* Recursively add new members for struct/union */
+static int __add_member_cb(Dwarf_Die *die, void *arg)
+{
+	struct annotated_member *parent = arg;
+	struct annotated_member *member;
+	Dwarf_Die member_type, die_mem;
+	Dwarf_Word size, loc;
+	Dwarf_Attribute attr;
+	struct strbuf sb;
+	int tag;
+
+	if (dwarf_tag(die) != DW_TAG_member)
+		return DIE_FIND_CB_SIBLING;
+
+	member = zalloc(sizeof(*member));
+	if (member == NULL)
+		return DIE_FIND_CB_END;
+
+	strbuf_init(&sb, 32);
+	die_get_typename(die, &sb);
+
+	die_get_real_type(die, &member_type);
+	if (dwarf_aggregate_size(&member_type, &size) < 0)
+		size = 0;
+
+	if (!dwarf_attr_integrate(die, DW_AT_data_member_location, &attr))
+		loc = 0;
+	else
+		dwarf_formudata(&attr, &loc);
+
+	member->type_name = strbuf_detach(&sb, NULL);
+	/* member->var_name can be NULL */
+	if (dwarf_diename(die))
+		member->var_name = strdup(dwarf_diename(die));
+	member->size = size;
+	member->offset = loc + parent->offset;
+	INIT_LIST_HEAD(&member->children);
+	list_add_tail(&member->node, &parent->children);
+
+	tag = dwarf_tag(&member_type);
+	switch (tag) {
+	case DW_TAG_structure_type:
+	case DW_TAG_union_type:
+		die_find_child(&member_type, __add_member_cb, member, &die_mem);
+		break;
+	default:
+		break;
+	}
+	return DIE_FIND_CB_SIBLING;
+}
+
+static void add_member_types(struct annotated_data_type *parent, Dwarf_Die *type)
+{
+	Dwarf_Die die_mem;
+
+	die_find_child(type, __add_member_cb, &parent->self, &die_mem);
+}
+
+static void delete_members(struct annotated_member *member)
+{
+	struct annotated_member *child, *tmp;
+
+	list_for_each_entry_safe(child, tmp, &member->children, node) {
+		list_del(&child->node);
+		delete_members(child);
+		free(child->type_name);
+		free(child->var_name);
+		free(child);
+	}
+}
+
+static struct annotated_data_type *dso__findnew_data_type(struct dso *dso,
+							  Dwarf_Die *type_die)
+{
+	struct annotated_data_type *result = NULL;
+	struct annotated_data_type key;
+	struct rb_node *node;
+	struct strbuf sb;
+	char *type_name;
+	Dwarf_Word size;
+
+	strbuf_init(&sb, 32);
+	if (die_get_typename_from_type(type_die, &sb) < 0)
+		strbuf_add(&sb, "(unknown type)", 14);
+	type_name = strbuf_detach(&sb, NULL);
+	dwarf_aggregate_size(type_die, &size);
+
+	/* Check existing nodes in dso->data_types tree */
+	key.self.type_name = type_name;
+	key.self.size = size;
+	node = rb_find(&key, &dso->data_types, data_type_cmp);
+	if (node) {
+		result = rb_entry(node, struct annotated_data_type, node);
+		free(type_name);
+		return result;
+	}
+
+	/* If not, add a new one */
+	result = zalloc(sizeof(*result));
+	if (result == NULL) {
+		free(type_name);
+		return NULL;
+	}
+
+	result->self.type_name = type_name;
+	result->self.size = size;
+	INIT_LIST_HEAD(&result->self.children);
+
+	if (symbol_conf.annotate_data_member)
+		add_member_types(result, type_die);
+
+	rb_add(&result->node, &dso->data_types, data_type_less);
+	return result;
+}
+
+static bool find_cu_die(struct debuginfo *di, u64 pc, Dwarf_Die *cu_die)
+{
+	Dwarf_Off off, next_off;
+	size_t header_size;
+
+	if (dwarf_addrdie(di->dbg, pc, cu_die) != NULL)
+		return cu_die;
+
+	/*
+	 * There are some kernels don't have full aranges and contain only a few
+	 * aranges entries.  Fallback to iterate all CU entries in .debug_info
+	 * in case it's missing.
+	 */
+	off = 0;
+	while (dwarf_nextcu(di->dbg, off, &next_off, &header_size,
+			    NULL, NULL, NULL) == 0) {
+		if (dwarf_offdie(di->dbg, off + header_size, cu_die) &&
+		    dwarf_haspc(cu_die, pc))
+			return true;
+
+		off = next_off;
+	}
+	return false;
+}
+
+/* The type info will be saved in @type_die */
+static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset)
+{
+	Dwarf_Word size;
+
+	/* Get the type of the variable */
+	if (die_get_real_type(var_die, type_die) == NULL) {
+		pr_debug("variable has no type\n");
+		ann_data_stat.no_typeinfo++;
+		return -1;
+	}
+
+	/*
+	 * It expects a pointer type for a memory access.
+	 * Convert to a real type it points to.
+	 */
+	if (dwarf_tag(type_die) != DW_TAG_pointer_type ||
+	    die_get_real_type(type_die, type_die) == NULL) {
+		pr_debug("no pointer or no type\n");
+		ann_data_stat.no_typeinfo++;
+		return -1;
+	}
+
+	/* Get the size of the actual type */
+	if (dwarf_aggregate_size(type_die, &size) < 0) {
+		pr_debug("type size is unknown\n");
+		ann_data_stat.invalid_size++;
+		return -1;
+	}
+
+	/* Minimal sanity check */
+	if ((unsigned)offset >= size) {
+		pr_debug("offset: %d is bigger than size: %" PRIu64 "\n", offset, size);
+		ann_data_stat.bad_offset++;
+		return -1;
+	}
+
+	return 0;
+}
+
+/* The result will be saved in @type_die */
+static int find_data_type_die(struct debuginfo *di, u64 pc,
+			      int reg, int offset, Dwarf_Die *type_die)
+{
+	Dwarf_Die cu_die, var_die;
+	Dwarf_Die *scopes = NULL;
+	int ret = -1;
+	int i, nr_scopes;
+
+	/* Get a compile_unit for this address */
+	if (!find_cu_die(di, pc, &cu_die)) {
+		pr_debug("cannot find CU for address %" PRIx64 "\n", pc);
+		ann_data_stat.no_cuinfo++;
+		return -1;
+	}
+
+	/* Get a list of nested scopes - i.e. (inlined) functions and blocks. */
+	nr_scopes = die_get_scopes(&cu_die, pc, &scopes);
+
+	/* Search from the inner-most scope to the outer */
+	for (i = nr_scopes - 1; i >= 0; i--) {
+		/* Look up variables/parameters in this scope */
+		if (!die_find_variable_by_reg(&scopes[i], pc, reg, &var_die))
+			continue;
+
+		/* Found a variable, see if it's correct */
+		ret = check_variable(&var_die, type_die, offset);
+		goto out;
+	}
+	if (ret < 0)
+		ann_data_stat.no_var++;
+
+out:
+	free(scopes);
+	return ret;
+}
+
+/**
+ * find_data_type - Return a data type at the location
+ * @ms: map and symbol at the location
+ * @ip: instruction address of the memory access
+ * @reg: register that holds the base address
+ * @offset: offset from the base address
+ *
+ * This functions searches the debug information of the binary to get the data
+ * type it accesses.  The exact location is expressed by (ip, reg, offset).
+ * It return %NULL if not found.
+ */
+struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip,
+					   int reg, int offset)
+{
+	struct annotated_data_type *result = NULL;
+	struct dso *dso = map__dso(ms->map);
+	struct debuginfo *di;
+	Dwarf_Die type_die;
+	u64 pc;
+
+	di = debuginfo__new(dso->long_name);
+	if (di == NULL) {
+		pr_debug("cannot get the debug info\n");
+		return NULL;
+	}
+
+	/*
+	 * IP is a relative instruction address from the start of the map, as
+	 * it can be randomized/relocated, it needs to translate to PC which is
+	 * a file address for DWARF processing.
+	 */
+	pc = map__rip_2objdump(ms->map, ip);
+	if (find_data_type_die(di, pc, reg, offset, &type_die) < 0)
+		goto out;
+
+	result = dso__findnew_data_type(dso, &type_die);
+
+out:
+	debuginfo__delete(di);
+	return result;
+}
+
+static int alloc_data_type_histograms(struct annotated_data_type *adt, int nr_entries)
+{
+	int i;
+	size_t sz = sizeof(struct type_hist);
+
+	sz += sizeof(struct type_hist_entry) * adt->self.size;
+
+	/* Allocate a table of pointers for each event */
+	adt->nr_histograms = nr_entries;
+	adt->histograms = calloc(nr_entries, sizeof(*adt->histograms));
+	if (adt->histograms == NULL)
+		return -ENOMEM;
+
+	/*
+	 * Each histogram is allocated for the whole size of the type.
+	 * TODO: Probably we can move the histogram to members.
+	 */
+	for (i = 0; i < nr_entries; i++) {
+		adt->histograms[i] = zalloc(sz);
+		if (adt->histograms[i] == NULL)
+			goto err;
+	}
+	return 0;
+
+err:
+	while (--i >= 0)
+		free(adt->histograms[i]);
+	free(adt->histograms);
+	return -ENOMEM;
+}
+
+static void delete_data_type_histograms(struct annotated_data_type *adt)
+{
+	for (int i = 0; i < adt->nr_histograms; i++)
+		free(adt->histograms[i]);
+	free(adt->histograms);
+}
+
+void annotated_data_type__tree_delete(struct rb_root *root)
+{
+	struct annotated_data_type *pos;
+
+	while (!RB_EMPTY_ROOT(root)) {
+		struct rb_node *node = rb_first(root);
+
+		rb_erase(node, root);
+		pos = rb_entry(node, struct annotated_data_type, node);
+		delete_members(&pos->self);
+		delete_data_type_histograms(pos);
+		free(pos->self.type_name);
+		free(pos);
+	}
+}
+
+/**
+ * annotated_data_type__update_samples - Update histogram
+ * @adt: Data type to update
+ * @evsel: Event to update
+ * @offset: Offset in the type
+ * @nr_samples: Number of samples at this offset
+ * @period: Event count at this offset
+ *
+ * This function updates type histogram at @ofs for @evsel.  Samples are
+ * aggregated before calling this function so it can be called with more
+ * than one samples at a certain offset.
+ */
+int annotated_data_type__update_samples(struct annotated_data_type *adt,
+					struct evsel *evsel, int offset,
+					int nr_samples, u64 period)
+{
+	struct type_hist *h;
+
+	if (adt == NULL)
+		return 0;
+
+	if (adt->histograms == NULL) {
+		int nr = evsel->evlist->core.nr_entries;
+
+		if (alloc_data_type_histograms(adt, nr) < 0)
+			return -1;
+	}
+
+	if (offset < 0 || offset >= adt->self.size)
+		return -1;
+
+	h = adt->histograms[evsel->core.idx];
+
+	h->nr_samples += nr_samples;
+	h->addr[offset].nr_samples += nr_samples;
+	h->period += period;
+	h->addr[offset].period += period;
+	return 0;
+}
diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h
new file mode 100644
index 000000000000..8e73096c01d1
--- /dev/null
+++ b/tools/perf/util/annotate-data.h
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_ANNOTATE_DATA_H
+#define _PERF_ANNOTATE_DATA_H
+
+#include <errno.h>
+#include <linux/compiler.h>
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+struct evsel;
+struct map_symbol;
+
+/**
+ * struct annotated_member - Type of member field
+ * @node: List entry in the parent list
+ * @children: List head for child nodes
+ * @type_name: Name of the member type
+ * @var_name: Name of the member variable
+ * @offset: Offset from the outer data type
+ * @size: Size of the member field
+ *
+ * This represents a member type in a data type.
+ */
+struct annotated_member {
+	struct list_head node;
+	struct list_head children;
+	char *type_name;
+	char *var_name;
+	int offset;
+	int size;
+};
+
+/**
+ * struct type_hist_entry - Histogram entry per offset
+ * @nr_samples: Number of samples
+ * @period: Count of event
+ */
+struct type_hist_entry {
+	int nr_samples;
+	u64 period;
+};
+
+/**
+ * struct type_hist - Type histogram for each event
+ * @nr_samples: Total number of samples in this data type
+ * @period: Total count of the event in this data type
+ * @offset: Array of histogram entry
+ */
+struct type_hist {
+	u64			nr_samples;
+	u64			period;
+	struct type_hist_entry	addr[];
+};
+
+/**
+ * struct annotated_data_type - Data type to profile
+ * @node: RB-tree node for dso->type_tree
+ * @self: Actual type information
+ * @nr_histogram: Number of histogram entries
+ * @histograms: An array of pointers to histograms
+ *
+ * This represents a data type accessed by samples in the profile data.
+ */
+struct annotated_data_type {
+	struct rb_node node;
+	struct annotated_member self;
+	int nr_histograms;
+	struct type_hist **histograms;
+};
+
+extern struct annotated_data_type unknown_type;
+
+/**
+ * struct annotated_data_stat - Debug statistics
+ * @total: Total number of entry
+ * @no_sym: No symbol or map found
+ * @no_insn: Failed to get disasm line
+ * @no_insn_ops: The instruction has no operands
+ * @no_mem_ops: The instruction has no memory operands
+ * @no_reg: Failed to extract a register from the operand
+ * @no_dbginfo: The binary has no debug information
+ * @no_cuinfo: Failed to find a compile_unit
+ * @no_var: Failed to find a matching variable
+ * @no_typeinfo: Failed to get a type info for the variable
+ * @invalid_size: Failed to get a size info of the type
+ * @bad_offset: The access offset is out of the type
+ */
+struct annotated_data_stat {
+	int total;
+	int no_sym;
+	int no_insn;
+	int no_insn_ops;
+	int no_mem_ops;
+	int no_reg;
+	int no_dbginfo;
+	int no_cuinfo;
+	int no_var;
+	int no_typeinfo;
+	int invalid_size;
+	int bad_offset;
+};
+extern struct annotated_data_stat ann_data_stat;
+
+#ifdef HAVE_DWARF_SUPPORT
+
+/* Returns data type at the location (ip, reg, offset) */
+struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip,
+					   int reg, int offset);
+
+/* Update type access histogram at the given offset */
+int annotated_data_type__update_samples(struct annotated_data_type *adt,
+					struct evsel *evsel, int offset,
+					int nr_samples, u64 period);
+
+/* Release all data type information in the tree */
+void annotated_data_type__tree_delete(struct rb_root *root);
+
+#else /* HAVE_DWARF_SUPPORT */
+
+static inline struct annotated_data_type *
+find_data_type(struct map_symbol *ms __maybe_unused, u64 ip __maybe_unused,
+	       int reg __maybe_unused, int offset __maybe_unused)
+{
+	return NULL;
+}
+
+static inline int
+annotated_data_type__update_samples(struct annotated_data_type *adt __maybe_unused,
+				    struct evsel *evsel __maybe_unused,
+				    int offset __maybe_unused,
+				    int nr_samples __maybe_unused,
+				    u64 period __maybe_unused)
+{
+	return -1;
+}
+
+static inline void annotated_data_type__tree_delete(struct rb_root *root __maybe_unused)
+{
+}
+
+#endif /* HAVE_DWARF_SUPPORT */
+
+#endif /* _PERF_ANNOTATE_DATA_H */
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 82956adf9963..9b70ab110ce7 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -25,12 +25,14 @@
 #include "units.h"
 #include "debug.h"
 #include "annotate.h"
+#include "annotate-data.h"
 #include "evsel.h"
 #include "evlist.h"
 #include "bpf-event.h"
 #include "bpf-utils.h"
 #include "block-range.h"
 #include "string2.h"
+#include "dwarf-regs.h"
 #include "util/event.h"
 #include "util/sharded_mutex.h"
 #include "arch/common.h"
@@ -57,6 +59,9 @@
 
 #include <linux/ctype.h>
 
+/* global annotation options */
+struct annotation_options annotate_opts;
+
 static regex_t	 file_lineno;
 
 static struct ins_ops *ins__find(struct arch *arch, const char *name);
@@ -85,6 +90,8 @@ struct arch {
 	struct		{
 		char comment_char;
 		char skip_functions_char;
+		char register_char;
+		char memory_ref_char;
 	} objdump;
 };
 
@@ -96,6 +103,10 @@ static struct ins_ops nop_ops;
 static struct ins_ops lock_ops;
 static struct ins_ops ret_ops;
 
+/* Data type collection debug statistics */
+struct annotated_data_stat ann_data_stat;
+LIST_HEAD(ann_insn_stat);
+
 static int arch__grow_instructions(struct arch *arch)
 {
 	struct ins *new_instructions;
@@ -188,6 +199,8 @@ static struct arch architectures[] = {
 		.insn_suffix = "bwlq",
 		.objdump =  {
 			.comment_char = '#',
+			.register_char = '%',
+			.memory_ref_char = '(',
 		},
 	},
 	{
@@ -340,10 +353,10 @@ bool ins__is_call(const struct ins *ins)
  */
 static inline const char *validate_comma(const char *c, struct ins_operands *ops)
 {
-	if (ops->raw_comment && c > ops->raw_comment)
+	if (ops->jump.raw_comment && c > ops->jump.raw_comment)
 		return NULL;
 
-	if (ops->raw_func_start && c > ops->raw_func_start)
+	if (ops->jump.raw_func_start && c > ops->jump.raw_func_start)
 		return NULL;
 
 	return c;
@@ -359,8 +372,8 @@ static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_s
 	const char *c = strchr(ops->raw, ',');
 	u64 start, end;
 
-	ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char);
-	ops->raw_func_start = strchr(ops->raw, '<');
+	ops->jump.raw_comment = strchr(ops->raw, arch->objdump.comment_char);
+	ops->jump.raw_func_start = strchr(ops->raw, '<');
 
 	c = validate_comma(c, ops);
 
@@ -462,7 +475,16 @@ static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
 			 ops->target.offset);
 }
 
+static void jump__delete(struct ins_operands *ops __maybe_unused)
+{
+	/*
+	 * The ops->jump.raw_comment and ops->jump.raw_func_start belong to the
+	 * raw string, don't free them.
+	 */
+}
+
 static struct ins_ops jump_ops = {
+	.free	   = jump__delete,
 	.parse	   = jump__parse,
 	.scnprintf = jump__scnprintf,
 };
@@ -557,6 +579,34 @@ static struct ins_ops lock_ops = {
 	.scnprintf = lock__scnprintf,
 };
 
+/*
+ * Check if the operand has more than one registers like x86 SIB addressing:
+ *   0x1234(%rax, %rbx, 8)
+ *
+ * But it doesn't care segment selectors like %gs:0x5678(%rcx), so just check
+ * the input string after 'memory_ref_char' if exists.
+ */
+static bool check_multi_regs(struct arch *arch, const char *op)
+{
+	int count = 0;
+
+	if (arch->objdump.register_char == 0)
+		return false;
+
+	if (arch->objdump.memory_ref_char) {
+		op = strchr(op, arch->objdump.memory_ref_char);
+		if (op == NULL)
+			return false;
+	}
+
+	while ((op = strchr(op, arch->objdump.register_char)) != NULL) {
+		count++;
+		op++;
+	}
+
+	return count > 1;
+}
+
 static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused)
 {
 	char *s = strchr(ops->raw, ','), *target, *comment, prev;
@@ -584,6 +634,8 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_sy
 	if (ops->source.raw == NULL)
 		return -1;
 
+	ops->source.multi_regs = check_multi_regs(arch, ops->source.raw);
+
 	target = skip_spaces(++s);
 	comment = strchr(s, arch->objdump.comment_char);
 
@@ -604,6 +656,8 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_sy
 	if (ops->target.raw == NULL)
 		goto out_free_source;
 
+	ops->target.multi_regs = check_multi_regs(arch, ops->target.raw);
+
 	if (comment == NULL)
 		return 0;
 
@@ -795,6 +849,11 @@ static struct arch *arch__find(const char *name)
 	return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp);
 }
 
+bool arch__is(struct arch *arch, const char *name)
+{
+	return !strcmp(arch->name, name);
+}
+
 static struct annotated_source *annotated_source__new(void)
 {
 	struct annotated_source *src = zalloc(sizeof(*src));
@@ -810,7 +869,6 @@ static __maybe_unused void annotated_source__delete(struct annotated_source *src
 	if (src == NULL)
 		return;
 	zfree(&src->histograms);
-	zfree(&src->cycles_hist);
 	free(src);
 }
 
@@ -845,18 +903,6 @@ static int annotated_source__alloc_histograms(struct annotated_source *src,
 	return src->histograms ? 0 : -1;
 }
 
-/* The cycles histogram is lazily allocated. */
-static int symbol__alloc_hist_cycles(struct symbol *sym)
-{
-	struct annotation *notes = symbol__annotation(sym);
-	const size_t size = symbol__size(sym);
-
-	notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist));
-	if (notes->src->cycles_hist == NULL)
-		return -1;
-	return 0;
-}
-
 void symbol__annotate_zero_histograms(struct symbol *sym)
 {
 	struct annotation *notes = symbol__annotation(sym);
@@ -865,9 +911,10 @@ void symbol__annotate_zero_histograms(struct symbol *sym)
 	if (notes->src != NULL) {
 		memset(notes->src->histograms, 0,
 		       notes->src->nr_histograms * notes->src->sizeof_sym_hist);
-		if (notes->src->cycles_hist)
-			memset(notes->src->cycles_hist, 0,
-				symbol__size(sym) * sizeof(struct cyc_hist));
+	}
+	if (notes->branch && notes->branch->cycles_hist) {
+		memset(notes->branch->cycles_hist, 0,
+		       symbol__size(sym) * sizeof(struct cyc_hist));
 	}
 	annotation__unlock(notes);
 }
@@ -958,23 +1005,33 @@ static int __symbol__inc_addr_samples(struct map_symbol *ms,
 	return 0;
 }
 
+struct annotated_branch *annotation__get_branch(struct annotation *notes)
+{
+	if (notes == NULL)
+		return NULL;
+
+	if (notes->branch == NULL)
+		notes->branch = zalloc(sizeof(*notes->branch));
+
+	return notes->branch;
+}
+
 static struct cyc_hist *symbol__cycles_hist(struct symbol *sym)
 {
 	struct annotation *notes = symbol__annotation(sym);
+	struct annotated_branch *branch;
 
-	if (notes->src == NULL) {
-		notes->src = annotated_source__new();
-		if (notes->src == NULL)
-			return NULL;
-		goto alloc_cycles_hist;
-	}
+	branch = annotation__get_branch(notes);
+	if (branch == NULL)
+		return NULL;
+
+	if (branch->cycles_hist == NULL) {
+		const size_t size = symbol__size(sym);
 
-	if (!notes->src->cycles_hist) {
-alloc_cycles_hist:
-		symbol__alloc_hist_cycles(sym);
+		branch->cycles_hist = calloc(size, sizeof(struct cyc_hist));
 	}
 
-	return notes->src->cycles_hist;
+	return branch->cycles_hist;
 }
 
 struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists)
@@ -1077,12 +1134,20 @@ static unsigned annotation__count_insn(struct annotation *notes, u64 start, u64
 	u64 offset;
 
 	for (offset = start; offset <= end; offset++) {
-		if (notes->offsets[offset])
+		if (notes->src->offsets[offset])
 			n_insn++;
 	}
 	return n_insn;
 }
 
+static void annotated_branch__delete(struct annotated_branch *branch)
+{
+	if (branch) {
+		zfree(&branch->cycles_hist);
+		free(branch);
+	}
+}
+
 static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 end, struct cyc_hist *ch)
 {
 	unsigned n_insn;
@@ -1091,6 +1156,7 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64
 
 	n_insn = annotation__count_insn(notes, start, end);
 	if (n_insn && ch->num && ch->cycles) {
+		struct annotated_branch *branch;
 		float ipc = n_insn / ((double)ch->cycles / (double)ch->num);
 
 		/* Hide data when there are too many overlaps. */
@@ -1098,54 +1164,76 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64
 			return;
 
 		for (offset = start; offset <= end; offset++) {
-			struct annotation_line *al = notes->offsets[offset];
+			struct annotation_line *al = notes->src->offsets[offset];
 
-			if (al && al->ipc == 0.0) {
-				al->ipc = ipc;
+			if (al && al->cycles && al->cycles->ipc == 0.0) {
+				al->cycles->ipc = ipc;
 				cover_insn++;
 			}
 		}
 
-		if (cover_insn) {
-			notes->hit_cycles += ch->cycles;
-			notes->hit_insn += n_insn * ch->num;
-			notes->cover_insn += cover_insn;
+		branch = annotation__get_branch(notes);
+		if (cover_insn && branch) {
+			branch->hit_cycles += ch->cycles;
+			branch->hit_insn += n_insn * ch->num;
+			branch->cover_insn += cover_insn;
 		}
 	}
 }
 
-void annotation__compute_ipc(struct annotation *notes, size_t size)
+static int annotation__compute_ipc(struct annotation *notes, size_t size)
 {
+	int err = 0;
 	s64 offset;
 
-	if (!notes->src || !notes->src->cycles_hist)
-		return;
+	if (!notes->branch || !notes->branch->cycles_hist)
+		return 0;
 
-	notes->total_insn = annotation__count_insn(notes, 0, size - 1);
-	notes->hit_cycles = 0;
-	notes->hit_insn = 0;
-	notes->cover_insn = 0;
+	notes->branch->total_insn = annotation__count_insn(notes, 0, size - 1);
+	notes->branch->hit_cycles = 0;
+	notes->branch->hit_insn = 0;
+	notes->branch->cover_insn = 0;
 
 	annotation__lock(notes);
 	for (offset = size - 1; offset >= 0; --offset) {
 		struct cyc_hist *ch;
 
-		ch = &notes->src->cycles_hist[offset];
+		ch = &notes->branch->cycles_hist[offset];
 		if (ch && ch->cycles) {
 			struct annotation_line *al;
 
+			al = notes->src->offsets[offset];
+			if (al && al->cycles == NULL) {
+				al->cycles = zalloc(sizeof(*al->cycles));
+				if (al->cycles == NULL) {
+					err = ENOMEM;
+					break;
+				}
+			}
 			if (ch->have_start)
 				annotation__count_and_fill(notes, ch->start, offset, ch);
-			al = notes->offsets[offset];
 			if (al && ch->num_aggr) {
-				al->cycles = ch->cycles_aggr / ch->num_aggr;
-				al->cycles_max = ch->cycles_max;
-				al->cycles_min = ch->cycles_min;
+				al->cycles->avg = ch->cycles_aggr / ch->num_aggr;
+				al->cycles->max = ch->cycles_max;
+				al->cycles->min = ch->cycles_min;
+			}
+		}
+	}
+
+	if (err) {
+		while (++offset < (s64)size) {
+			struct cyc_hist *ch = &notes->branch->cycles_hist[offset];
+
+			if (ch && ch->cycles) {
+				struct annotation_line *al = notes->src->offsets[offset];
+				if (al)
+					zfree(&al->cycles);
 			}
-			notes->have_cycles = true;
 		}
 	}
+
 	annotation__unlock(notes);
+	return 0;
 }
 
 int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample,
@@ -1225,6 +1313,7 @@ static void annotation_line__exit(struct annotation_line *al)
 {
 	zfree_srcline(&al->path);
 	zfree(&al->line);
+	zfree(&al->cycles);
 }
 
 static size_t disasm_line_size(int nr)
@@ -1299,6 +1388,7 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r
 void annotation__exit(struct annotation *notes)
 {
 	annotated_source__delete(notes->src);
+	annotated_branch__delete(notes->branch);
 }
 
 static struct sharded_mutex *sharded_mutex;
@@ -1817,7 +1907,6 @@ static int symbol__disassemble_bpf(struct symbol *sym,
 				   struct annotate_args *args)
 {
 	struct annotation *notes = symbol__annotation(sym);
-	struct annotation_options *opts = args->options;
 	struct bpf_prog_linfo *prog_linfo = NULL;
 	struct bpf_prog_info_node *info_node;
 	int len = sym->end - sym->start;
@@ -1927,7 +2016,7 @@ static int symbol__disassemble_bpf(struct symbol *sym,
 		prev_buf_size = buf_size;
 		fflush(s);
 
-		if (!opts->hide_src_code && srcline) {
+		if (!annotate_opts.hide_src_code && srcline) {
 			args->offset = -1;
 			args->line = strdup(srcline);
 			args->line_nr = 0;
@@ -2050,7 +2139,7 @@ static char *expand_tabs(char *line, char **storage, size_t *storage_len)
 
 static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 {
-	struct annotation_options *opts = args->options;
+	struct annotation_options *opts = &annotate_opts;
 	struct map *map = args->ms.map;
 	struct dso *dso = map__dso(map);
 	char *command;
@@ -2113,12 +2202,13 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 	err = asprintf(&command,
 		 "%s %s%s --start-address=0x%016" PRIx64
 		 " --stop-address=0x%016" PRIx64
-		 " -l -d %s %s %s %c%s%c %s%s -C \"$1\"",
+		 " %s -d %s %s %s %c%s%c %s%s -C \"$1\"",
 		 opts->objdump_path ?: "objdump",
 		 opts->disassembler_style ? "-M " : "",
 		 opts->disassembler_style ?: "",
 		 map__rip_2objdump(map, sym->start),
 		 map__rip_2objdump(map, sym->end),
+		 opts->show_linenr ? "-l" : "",
 		 opts->show_asm_raw ? "" : "--no-show-raw-insn",
 		 opts->annotate_src ? "-S" : "",
 		 opts->prefix ? "--prefix " : "",
@@ -2299,15 +2389,8 @@ void symbol__calc_percent(struct symbol *sym, struct evsel *evsel)
 	annotation__calc_percent(notes, evsel, symbol__size(sym));
 }
 
-int symbol__annotate(struct map_symbol *ms, struct evsel *evsel,
-		     struct annotation_options *options, struct arch **parch)
+static int evsel__get_arch(struct evsel *evsel, struct arch **parch)
 {
-	struct symbol *sym = ms->sym;
-	struct annotation *notes = symbol__annotation(sym);
-	struct annotate_args args = {
-		.evsel		= evsel,
-		.options	= options,
-	};
 	struct perf_env *env = evsel__env(evsel);
 	const char *arch_name = perf_env__arch(env);
 	struct arch *arch;
@@ -2316,25 +2399,45 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel,
 	if (!arch_name)
 		return errno;
 
-	args.arch = arch = arch__find(arch_name);
+	*parch = arch = arch__find(arch_name);
 	if (arch == NULL) {
 		pr_err("%s: unsupported arch %s\n", __func__, arch_name);
 		return ENOTSUP;
 	}
 
-	if (parch)
-		*parch = arch;
-
 	if (arch->init) {
 		err = arch->init(arch, env ? env->cpuid : NULL);
 		if (err) {
-			pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name);
+			pr_err("%s: failed to initialize %s arch priv area\n",
+			       __func__, arch->name);
 			return err;
 		}
 	}
+	return 0;
+}
 
+int symbol__annotate(struct map_symbol *ms, struct evsel *evsel,
+		     struct arch **parch)
+{
+	struct symbol *sym = ms->sym;
+	struct annotation *notes = symbol__annotation(sym);
+	struct annotate_args args = {
+		.evsel		= evsel,
+		.options	= &annotate_opts,
+	};
+	struct arch *arch = NULL;
+	int err;
+
+	err = evsel__get_arch(evsel, &arch);
+	if (err < 0)
+		return err;
+
+	if (parch)
+		*parch = arch;
+
+	args.arch = arch;
 	args.ms = *ms;
-	if (notes->options && notes->options->full_addr)
+	if (annotate_opts.full_addr)
 		notes->start = map__objdump_2mem(ms->map, ms->sym->start);
 	else
 		notes->start = map__rip_2objdump(ms->map, ms->sym->start);
@@ -2342,12 +2445,12 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel,
 	return symbol__disassemble(sym, &args);
 }
 
-static void insert_source_line(struct rb_root *root, struct annotation_line *al,
-			       struct annotation_options *opts)
+static void insert_source_line(struct rb_root *root, struct annotation_line *al)
 {
 	struct annotation_line *iter;
 	struct rb_node **p = &root->rb_node;
 	struct rb_node *parent = NULL;
+	unsigned int percent_type = annotate_opts.percent_type;
 	int i, ret;
 
 	while (*p != NULL) {
@@ -2358,7 +2461,7 @@ static void insert_source_line(struct rb_root *root, struct annotation_line *al,
 		if (ret == 0) {
 			for (i = 0; i < al->data_nr; i++) {
 				iter->data[i].percent_sum += annotation_data__percent(&al->data[i],
-										      opts->percent_type);
+										      percent_type);
 			}
 			return;
 		}
@@ -2371,7 +2474,7 @@ static void insert_source_line(struct rb_root *root, struct annotation_line *al,
 
 	for (i = 0; i < al->data_nr; i++) {
 		al->data[i].percent_sum = annotation_data__percent(&al->data[i],
-								   opts->percent_type);
+								   percent_type);
 	}
 
 	rb_link_node(&al->rb_node, parent, p);
@@ -2493,8 +2596,7 @@ static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start)
 	return 0;
 }
 
-int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel,
-			    struct annotation_options *opts)
+int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel)
 {
 	struct map *map = ms->map;
 	struct symbol *sym = ms->sym;
@@ -2505,6 +2607,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel,
 	struct annotation *notes = symbol__annotation(sym);
 	struct sym_hist *h = annotation__histogram(notes, evsel->core.idx);
 	struct annotation_line *pos, *queue = NULL;
+	struct annotation_options *opts = &annotate_opts;
 	u64 start = map__rip_2objdump(map, sym->start);
 	int printed = 2, queue_len = 0, addr_fmt_width;
 	int more = 0;
@@ -2633,8 +2736,7 @@ static void FILE__write_graph(void *fp, int graph)
 	fputs(s, fp);
 }
 
-static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp,
-				     struct annotation_options *opts)
+static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp)
 {
 	struct annotation *notes = symbol__annotation(sym);
 	struct annotation_write_ops wops = {
@@ -2649,9 +2751,9 @@ static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp,
 	struct annotation_line *al;
 
 	list_for_each_entry(al, &notes->src->source, node) {
-		if (annotation_line__filter(al, notes))
+		if (annotation_line__filter(al))
 			continue;
-		annotation_line__write(al, notes, &wops, opts);
+		annotation_line__write(al, notes, &wops);
 		fputc('\n', fp);
 		wops.first_line = false;
 	}
@@ -2659,8 +2761,7 @@ static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp,
 	return 0;
 }
 
-int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel,
-				struct annotation_options *opts)
+int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel)
 {
 	const char *ev_name = evsel__name(evsel);
 	char buf[1024];
@@ -2682,7 +2783,7 @@ int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel,
 
 	fprintf(fp, "%s() %s\nEvent: %s\n\n",
 		ms->sym->name, map__dso(ms->map)->long_name, ev_name);
-	symbol__annotate_fprintf2(ms->sym, fp, opts);
+	symbol__annotate_fprintf2(ms->sym, fp);
 
 	fclose(fp);
 	err = 0;
@@ -2769,7 +2870,7 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym)
 		return;
 
 	for (offset = 0; offset < size; ++offset) {
-		struct annotation_line *al = notes->offsets[offset];
+		struct annotation_line *al = notes->src->offsets[offset];
 		struct disasm_line *dl;
 
 		dl = disasm_line(al);
@@ -2777,7 +2878,7 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym)
 		if (!disasm_line__is_valid_local_jump(dl, sym))
 			continue;
 
-		al = notes->offsets[dl->ops.target.offset];
+		al = notes->src->offsets[dl->ops.target.offset];
 
 		/*
 		 * FIXME: Oops, no jump target? Buggy disassembler? Or do we
@@ -2794,19 +2895,20 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym)
 void annotation__set_offsets(struct annotation *notes, s64 size)
 {
 	struct annotation_line *al;
+	struct annotated_source *src = notes->src;
 
-	notes->max_line_len = 0;
-	notes->nr_entries = 0;
-	notes->nr_asm_entries = 0;
+	src->max_line_len = 0;
+	src->nr_entries = 0;
+	src->nr_asm_entries = 0;
 
-	list_for_each_entry(al, &notes->src->source, node) {
+	list_for_each_entry(al, &src->source, node) {
 		size_t line_len = strlen(al->line);
 
-		if (notes->max_line_len < line_len)
-			notes->max_line_len = line_len;
-		al->idx = notes->nr_entries++;
+		if (src->max_line_len < line_len)
+			src->max_line_len = line_len;
+		al->idx = src->nr_entries++;
 		if (al->offset != -1) {
-			al->idx_asm = notes->nr_asm_entries++;
+			al->idx_asm = src->nr_asm_entries++;
 			/*
 			 * FIXME: short term bandaid to cope with assembly
 			 * routines that comes with labels in the same column
@@ -2815,7 +2917,7 @@ void annotation__set_offsets(struct annotation *notes, s64 size)
 			 * E.g. copy_user_generic_unrolled
  			 */
 			if (al->offset < size)
-				notes->offsets[al->offset] = al;
+				notes->src->offsets[al->offset] = al;
 		} else
 			al->idx_asm = -1;
 	}
@@ -2858,24 +2960,24 @@ void annotation__init_column_widths(struct annotation *notes, struct symbol *sym
 
 void annotation__update_column_widths(struct annotation *notes)
 {
-	if (notes->options->use_offset)
+	if (annotate_opts.use_offset)
 		notes->widths.target = notes->widths.min_addr;
-	else if (notes->options->full_addr)
+	else if (annotate_opts.full_addr)
 		notes->widths.target = BITS_PER_LONG / 4;
 	else
 		notes->widths.target = notes->widths.max_addr;
 
 	notes->widths.addr = notes->widths.target;
 
-	if (notes->options->show_nr_jumps)
+	if (annotate_opts.show_nr_jumps)
 		notes->widths.addr += notes->widths.jumps + 1;
 }
 
 void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms)
 {
-	notes->options->full_addr = !notes->options->full_addr;
+	annotate_opts.full_addr = !annotate_opts.full_addr;
 
-	if (notes->options->full_addr)
+	if (annotate_opts.full_addr)
 		notes->start = map__objdump_2mem(ms->map, ms->sym->start);
 	else
 		notes->start = map__rip_2objdump(ms->map, ms->sym->start);
@@ -2884,8 +2986,7 @@ void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *m
 }
 
 static void annotation__calc_lines(struct annotation *notes, struct map *map,
-				   struct rb_root *root,
-				   struct annotation_options *opts)
+				   struct rb_root *root)
 {
 	struct annotation_line *al;
 	struct rb_root tmp_root = RB_ROOT;
@@ -2898,7 +2999,7 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map,
 			double percent;
 
 			percent = annotation_data__percent(&al->data[i],
-							   opts->percent_type);
+							   annotate_opts.percent_type);
 
 			if (percent > percent_max)
 				percent_max = percent;
@@ -2909,22 +3010,20 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map,
 
 		al->path = get_srcline(map__dso(map), notes->start + al->offset, NULL,
 				       false, true, notes->start + al->offset);
-		insert_source_line(&tmp_root, al, opts);
+		insert_source_line(&tmp_root, al);
 	}
 
 	resort_source_line(root, &tmp_root);
 }
 
-static void symbol__calc_lines(struct map_symbol *ms, struct rb_root *root,
-			       struct annotation_options *opts)
+static void symbol__calc_lines(struct map_symbol *ms, struct rb_root *root)
 {
 	struct annotation *notes = symbol__annotation(ms->sym);
 
-	annotation__calc_lines(notes, ms->map, root, opts);
+	annotation__calc_lines(notes, ms->map, root);
 }
 
-int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel,
-			  struct annotation_options *opts)
+int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel)
 {
 	struct dso *dso = map__dso(ms->map);
 	struct symbol *sym = ms->sym;
@@ -2933,7 +3032,7 @@ int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel,
 	char buf[1024];
 	int err;
 
-	err = symbol__annotate2(ms, evsel, opts, NULL);
+	err = symbol__annotate2(ms, evsel, NULL);
 	if (err) {
 		char msg[BUFSIZ];
 
@@ -2943,31 +3042,31 @@ int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel,
 		return -1;
 	}
 
-	if (opts->print_lines) {
-		srcline_full_filename = opts->full_path;
-		symbol__calc_lines(ms, &source_line, opts);
+	if (annotate_opts.print_lines) {
+		srcline_full_filename = annotate_opts.full_path;
+		symbol__calc_lines(ms, &source_line);
 		print_summary(&source_line, dso->long_name);
 	}
 
 	hists__scnprintf_title(hists, buf, sizeof(buf));
 	fprintf(stdout, "%s, [percent: %s]\n%s() %s\n",
-		buf, percent_type_str(opts->percent_type), sym->name, dso->long_name);
-	symbol__annotate_fprintf2(sym, stdout, opts);
+		buf, percent_type_str(annotate_opts.percent_type), sym->name,
+		dso->long_name);
+	symbol__annotate_fprintf2(sym, stdout);
 
 	annotated_source__purge(symbol__annotation(sym)->src);
 
 	return 0;
 }
 
-int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel,
-			 struct annotation_options *opts)
+int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel)
 {
 	struct dso *dso = map__dso(ms->map);
 	struct symbol *sym = ms->sym;
 	struct rb_root source_line = RB_ROOT;
 	int err;
 
-	err = symbol__annotate(ms, evsel, opts, NULL);
+	err = symbol__annotate(ms, evsel, NULL);
 	if (err) {
 		char msg[BUFSIZ];
 
@@ -2979,13 +3078,13 @@ int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel,
 
 	symbol__calc_percent(sym, evsel);
 
-	if (opts->print_lines) {
-		srcline_full_filename = opts->full_path;
-		symbol__calc_lines(ms, &source_line, opts);
+	if (annotate_opts.print_lines) {
+		srcline_full_filename = annotate_opts.full_path;
+		symbol__calc_lines(ms, &source_line);
 		print_summary(&source_line, dso->long_name);
 	}
 
-	symbol__annotate_printf(ms, evsel, opts);
+	symbol__annotate_printf(ms, evsel);
 
 	annotated_source__purge(symbol__annotation(sym)->src);
 
@@ -3046,19 +3145,20 @@ call_like:
 		obj__printf(obj, "  ");
 	}
 
-	disasm_line__scnprintf(dl, bf, size, !notes->options->use_offset, notes->widths.max_ins_name);
+	disasm_line__scnprintf(dl, bf, size, !annotate_opts.use_offset, notes->widths.max_ins_name);
 }
 
 static void ipc_coverage_string(char *bf, int size, struct annotation *notes)
 {
 	double ipc = 0.0, coverage = 0.0;
+	struct annotated_branch *branch = annotation__get_branch(notes);
 
-	if (notes->hit_cycles)
-		ipc = notes->hit_insn / ((double)notes->hit_cycles);
+	if (branch && branch->hit_cycles)
+		ipc = branch->hit_insn / ((double)branch->hit_cycles);
 
-	if (notes->total_insn) {
-		coverage = notes->cover_insn * 100.0 /
-			((double)notes->total_insn);
+	if (branch && branch->total_insn) {
+		coverage = branch->cover_insn * 100.0 /
+			((double)branch->total_insn);
 	}
 
 	scnprintf(bf, size, "(Average IPC: %.2f, IPC Coverage: %.1f%%)",
@@ -3083,8 +3183,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
 	int printed;
 
 	if (first_line && (al->offset == -1 || percent_max == 0.0)) {
-		if (notes->have_cycles) {
-			if (al->ipc == 0.0 && al->cycles == 0)
+		if (notes->branch && al->cycles) {
+			if (al->cycles->ipc == 0.0 && al->cycles->avg == 0)
 				show_title = true;
 		} else
 			show_title = true;
@@ -3120,18 +3220,18 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
 		}
 	}
 
-	if (notes->have_cycles) {
-		if (al->ipc)
-			obj__printf(obj, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->ipc);
+	if (notes->branch) {
+		if (al->cycles && al->cycles->ipc)
+			obj__printf(obj, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->cycles->ipc);
 		else if (!show_title)
 			obj__printf(obj, "%*s", ANNOTATION__IPC_WIDTH, " ");
 		else
 			obj__printf(obj, "%*s ", ANNOTATION__IPC_WIDTH - 1, "IPC");
 
-		if (!notes->options->show_minmax_cycle) {
-			if (al->cycles)
+		if (!annotate_opts.show_minmax_cycle) {
+			if (al->cycles && al->cycles->avg)
 				obj__printf(obj, "%*" PRIu64 " ",
-					   ANNOTATION__CYCLES_WIDTH - 1, al->cycles);
+					   ANNOTATION__CYCLES_WIDTH - 1, al->cycles->avg);
 			else if (!show_title)
 				obj__printf(obj, "%*s",
 					    ANNOTATION__CYCLES_WIDTH, " ");
@@ -3145,8 +3245,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
 
 				scnprintf(str, sizeof(str),
 					"%" PRIu64 "(%" PRIu64 "/%" PRIu64 ")",
-					al->cycles, al->cycles_min,
-					al->cycles_max);
+					al->cycles->avg, al->cycles->min,
+					al->cycles->max);
 
 				obj__printf(obj, "%*s ",
 					    ANNOTATION__MINMAX_CYCLES_WIDTH - 1,
@@ -3172,7 +3272,7 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
 	if (!*al->line)
 		obj__printf(obj, "%-*s", width - pcnt_width - cycles_width, " ");
 	else if (al->offset == -1) {
-		if (al->line_nr && notes->options->show_linenr)
+		if (al->line_nr && annotate_opts.show_linenr)
 			printed = scnprintf(bf, sizeof(bf), "%-*d ", notes->widths.addr + 1, al->line_nr);
 		else
 			printed = scnprintf(bf, sizeof(bf), "%-*s  ", notes->widths.addr, " ");
@@ -3182,15 +3282,15 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
 		u64 addr = al->offset;
 		int color = -1;
 
-		if (!notes->options->use_offset)
+		if (!annotate_opts.use_offset)
 			addr += notes->start;
 
-		if (!notes->options->use_offset) {
+		if (!annotate_opts.use_offset) {
 			printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr);
 		} else {
 			if (al->jump_sources &&
-			    notes->options->offset_level >= ANNOTATION__OFFSET_JUMP_TARGETS) {
-				if (notes->options->show_nr_jumps) {
+			    annotate_opts.offset_level >= ANNOTATION__OFFSET_JUMP_TARGETS) {
+				if (annotate_opts.show_nr_jumps) {
 					int prev;
 					printed = scnprintf(bf, sizeof(bf), "%*d ",
 							    notes->widths.jumps,
@@ -3204,9 +3304,9 @@ print_addr:
 				printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ",
 						    notes->widths.target, addr);
 			} else if (ins__is_call(&disasm_line(al)->ins) &&
-				   notes->options->offset_level >= ANNOTATION__OFFSET_CALL) {
+				   annotate_opts.offset_level >= ANNOTATION__OFFSET_CALL) {
 				goto print_addr;
-			} else if (notes->options->offset_level == ANNOTATION__MAX_OFFSET_LEVEL) {
+			} else if (annotate_opts.offset_level == ANNOTATION__MAX_OFFSET_LEVEL) {
 				goto print_addr;
 			} else {
 				printed = scnprintf(bf, sizeof(bf), "%-*s  ",
@@ -3228,43 +3328,44 @@ print_addr:
 }
 
 void annotation_line__write(struct annotation_line *al, struct annotation *notes,
-			    struct annotation_write_ops *wops,
-			    struct annotation_options *opts)
+			    struct annotation_write_ops *wops)
 {
 	__annotation_line__write(al, notes, wops->first_line, wops->current_entry,
 				 wops->change_color, wops->width, wops->obj,
-				 opts->percent_type,
+				 annotate_opts.percent_type,
 				 wops->set_color, wops->set_percent_color,
 				 wops->set_jumps_percent_color, wops->printf,
 				 wops->write_graph);
 }
 
 int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel,
-		      struct annotation_options *options, struct arch **parch)
+		      struct arch **parch)
 {
 	struct symbol *sym = ms->sym;
 	struct annotation *notes = symbol__annotation(sym);
 	size_t size = symbol__size(sym);
 	int nr_pcnt = 1, err;
 
-	notes->offsets = zalloc(size * sizeof(struct annotation_line *));
-	if (notes->offsets == NULL)
+	notes->src->offsets = zalloc(size * sizeof(struct annotation_line *));
+	if (notes->src->offsets == NULL)
 		return ENOMEM;
 
 	if (evsel__is_group_event(evsel))
 		nr_pcnt = evsel->core.nr_members;
 
-	err = symbol__annotate(ms, evsel, options, parch);
+	err = symbol__annotate(ms, evsel, parch);
 	if (err)
 		goto out_free_offsets;
 
-	notes->options = options;
-
 	symbol__calc_percent(sym, evsel);
 
 	annotation__set_offsets(notes, size);
 	annotation__mark_jump_targets(notes, sym);
-	annotation__compute_ipc(notes, size);
+
+	err = annotation__compute_ipc(notes, size);
+	if (err)
+		goto out_free_offsets;
+
 	annotation__init_column_widths(notes, sym);
 	notes->nr_events = nr_pcnt;
 
@@ -3274,7 +3375,7 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel,
 	return 0;
 
 out_free_offsets:
-	zfree(&notes->offsets);
+	zfree(&notes->src->offsets);
 	return err;
 }
 
@@ -3337,8 +3438,10 @@ static int annotation__config(const char *var, const char *value, void *data)
 	return 0;
 }
 
-void annotation_options__init(struct annotation_options *opt)
+void annotation_options__init(void)
 {
+	struct annotation_options *opt = &annotate_opts;
+
 	memset(opt, 0, sizeof(*opt));
 
 	/* Default values. */
@@ -3349,16 +3452,15 @@ void annotation_options__init(struct annotation_options *opt)
 	opt->percent_type = PERCENT_PERIOD_LOCAL;
 }
 
-
-void annotation_options__exit(struct annotation_options *opt)
+void annotation_options__exit(void)
 {
-	zfree(&opt->disassembler_style);
-	zfree(&opt->objdump_path);
+	zfree(&annotate_opts.disassembler_style);
+	zfree(&annotate_opts.objdump_path);
 }
 
-void annotation_config__init(struct annotation_options *opt)
+void annotation_config__init(void)
 {
-	perf_config(annotation__config, opt);
+	perf_config(annotation__config, &annotate_opts);
 }
 
 static unsigned int parse_percent_type(char *str1, char *str2)
@@ -3382,10 +3484,9 @@ static unsigned int parse_percent_type(char *str1, char *str2)
 	return type;
 }
 
-int annotate_parse_percent_type(const struct option *opt, const char *_str,
+int annotate_parse_percent_type(const struct option *opt __maybe_unused, const char *_str,
 				int unset __maybe_unused)
 {
-	struct annotation_options *opts = opt->value;
 	unsigned int type;
 	char *str1, *str2;
 	int err = -1;
@@ -3404,7 +3505,7 @@ int annotate_parse_percent_type(const struct option *opt, const char *_str,
 	if (type == (unsigned int) -1)
 		type = parse_percent_type(str2, str1);
 	if (type != (unsigned int) -1) {
-		opts->percent_type = type;
+		annotate_opts.percent_type = type;
 		err = 0;
 	}
 
@@ -3413,11 +3514,267 @@ out:
 	return err;
 }
 
-int annotate_check_args(struct annotation_options *args)
+int annotate_check_args(void)
 {
+	struct annotation_options *args = &annotate_opts;
+
 	if (args->prefix_strip && !args->prefix) {
 		pr_err("--prefix-strip requires --prefix\n");
 		return -1;
 	}
 	return 0;
 }
+
+/*
+ * Get register number and access offset from the given instruction.
+ * It assumes AT&T x86 asm format like OFFSET(REG).  Maybe it needs
+ * to revisit the format when it handles different architecture.
+ * Fills @reg and @offset when return 0.
+ */
+static int extract_reg_offset(struct arch *arch, const char *str,
+			      struct annotated_op_loc *op_loc)
+{
+	char *p;
+	char *regname;
+
+	if (arch->objdump.register_char == 0)
+		return -1;
+
+	/*
+	 * It should start from offset, but it's possible to skip 0
+	 * in the asm.  So 0(%rax) should be same as (%rax).
+	 *
+	 * However, it also start with a segment select register like
+	 * %gs:0x18(%rbx).  In that case it should skip the part.
+	 */
+	if (*str == arch->objdump.register_char) {
+		while (*str && !isdigit(*str) &&
+		       *str != arch->objdump.memory_ref_char)
+			str++;
+	}
+
+	op_loc->offset = strtol(str, &p, 0);
+
+	p = strchr(p, arch->objdump.register_char);
+	if (p == NULL)
+		return -1;
+
+	regname = strdup(p);
+	if (regname == NULL)
+		return -1;
+
+	op_loc->reg = get_dwarf_regnum(regname, 0);
+	free(regname);
+	return 0;
+}
+
+/**
+ * annotate_get_insn_location - Get location of instruction
+ * @arch: the architecture info
+ * @dl: the target instruction
+ * @loc: a buffer to save the data
+ *
+ * Get detailed location info (register and offset) in the instruction.
+ * It needs both source and target operand and whether it accesses a
+ * memory location.  The offset field is meaningful only when the
+ * corresponding mem flag is set.
+ *
+ * Some examples on x86:
+ *
+ *   mov  (%rax), %rcx   # src_reg = rax, src_mem = 1, src_offset = 0
+ *                       # dst_reg = rcx, dst_mem = 0
+ *
+ *   mov  0x18, %r8      # src_reg = -1, dst_reg = r8
+ */
+int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl,
+			       struct annotated_insn_loc *loc)
+{
+	struct ins_operands *ops;
+	struct annotated_op_loc *op_loc;
+	int i;
+
+	if (!strcmp(dl->ins.name, "lock"))
+		ops = dl->ops.locked.ops;
+	else
+		ops = &dl->ops;
+
+	if (ops == NULL)
+		return -1;
+
+	memset(loc, 0, sizeof(*loc));
+
+	for_each_insn_op_loc(loc, i, op_loc) {
+		const char *insn_str = ops->source.raw;
+
+		if (i == INSN_OP_TARGET)
+			insn_str = ops->target.raw;
+
+		/* Invalidate the register by default */
+		op_loc->reg = -1;
+
+		if (insn_str == NULL)
+			continue;
+
+		if (strchr(insn_str, arch->objdump.memory_ref_char)) {
+			op_loc->mem_ref = true;
+			extract_reg_offset(arch, insn_str, op_loc);
+		} else {
+			char *s = strdup(insn_str);
+
+			if (s) {
+				op_loc->reg = get_dwarf_regnum(s, 0);
+				free(s);
+			}
+		}
+	}
+
+	return 0;
+}
+
+static void symbol__ensure_annotate(struct map_symbol *ms, struct evsel *evsel)
+{
+	struct disasm_line *dl, *tmp_dl;
+	struct annotation *notes;
+
+	notes = symbol__annotation(ms->sym);
+	if (!list_empty(&notes->src->source))
+		return;
+
+	if (symbol__annotate(ms, evsel, NULL) < 0)
+		return;
+
+	/* remove non-insn disasm lines for simplicity */
+	list_for_each_entry_safe(dl, tmp_dl, &notes->src->source, al.node) {
+		if (dl->al.offset == -1) {
+			list_del(&dl->al.node);
+			free(dl);
+		}
+	}
+}
+
+static struct disasm_line *find_disasm_line(struct symbol *sym, u64 ip)
+{
+	struct disasm_line *dl;
+	struct annotation *notes;
+
+	notes = symbol__annotation(sym);
+
+	list_for_each_entry(dl, &notes->src->source, al.node) {
+		if (sym->start + dl->al.offset == ip)
+			return dl;
+	}
+	return NULL;
+}
+
+static struct annotated_item_stat *annotate_data_stat(struct list_head *head,
+						      const char *name)
+{
+	struct annotated_item_stat *istat;
+
+	list_for_each_entry(istat, head, list) {
+		if (!strcmp(istat->name, name))
+			return istat;
+	}
+
+	istat = zalloc(sizeof(*istat));
+	if (istat == NULL)
+		return NULL;
+
+	istat->name = strdup(name);
+	if (istat->name == NULL) {
+		free(istat);
+		return NULL;
+	}
+
+	list_add_tail(&istat->list, head);
+	return istat;
+}
+
+/**
+ * hist_entry__get_data_type - find data type for given hist entry
+ * @he: hist entry
+ *
+ * This function first annotates the instruction at @he->ip and extracts
+ * register and offset info from it.  Then it searches the DWARF debug
+ * info to get a variable and type information using the address, register,
+ * and offset.
+ */
+struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he)
+{
+	struct map_symbol *ms = &he->ms;
+	struct evsel *evsel = hists_to_evsel(he->hists);
+	struct arch *arch;
+	struct disasm_line *dl;
+	struct annotated_insn_loc loc;
+	struct annotated_op_loc *op_loc;
+	struct annotated_data_type *mem_type;
+	struct annotated_item_stat *istat;
+	u64 ip = he->ip;
+	int i;
+
+	ann_data_stat.total++;
+
+	if (ms->map == NULL || ms->sym == NULL) {
+		ann_data_stat.no_sym++;
+		return NULL;
+	}
+
+	if (!symbol_conf.init_annotation) {
+		ann_data_stat.no_sym++;
+		return NULL;
+	}
+
+	if (evsel__get_arch(evsel, &arch) < 0) {
+		ann_data_stat.no_insn++;
+		return NULL;
+	}
+
+	/* Make sure it runs objdump to get disasm of the function */
+	symbol__ensure_annotate(ms, evsel);
+
+	/*
+	 * Get a disasm to extract the location from the insn.
+	 * This is too slow...
+	 */
+	dl = find_disasm_line(ms->sym, ip);
+	if (dl == NULL) {
+		ann_data_stat.no_insn++;
+		return NULL;
+	}
+
+	istat = annotate_data_stat(&ann_insn_stat, dl->ins.name);
+	if (istat == NULL) {
+		ann_data_stat.no_insn++;
+		return NULL;
+	}
+
+	if (annotate_get_insn_location(arch, dl, &loc) < 0) {
+		ann_data_stat.no_insn_ops++;
+		istat->bad++;
+		return NULL;
+	}
+
+	for_each_insn_op_loc(&loc, i, op_loc) {
+		if (!op_loc->mem_ref)
+			continue;
+
+		mem_type = find_data_type(ms, ip, op_loc->reg, op_loc->offset);
+		if (mem_type)
+			istat->good++;
+		else
+			istat->bad++;
+
+		if (symbol_conf.annotate_data_sample) {
+			annotated_data_type__update_samples(mem_type, evsel,
+							    op_loc->offset,
+							    he->stat.nr_events,
+							    he->stat.period);
+		}
+		he->mem_type_off = op_loc->offset;
+		return mem_type;
+	}
+
+	ann_data_stat.no_mem_ops++;
+	istat->bad++;
+	return NULL;
+}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 962780559176..dba50762c6e8 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -23,6 +23,7 @@ struct option;
 struct perf_sample;
 struct evsel;
 struct symbol;
+struct annotated_data_type;
 
 struct ins {
 	const char     *name;
@@ -31,8 +32,6 @@ struct ins {
 
 struct ins_operands {
 	char	*raw;
-	char	*raw_comment;
-	char	*raw_func_start;
 	struct {
 		char	*raw;
 		char	*name;
@@ -41,22 +40,30 @@ struct ins_operands {
 		s64	offset;
 		bool	offset_avail;
 		bool	outside;
+		bool	multi_regs;
 	} target;
 	union {
 		struct {
 			char	*raw;
 			char	*name;
 			u64	addr;
+			bool	multi_regs;
 		} source;
 		struct {
 			struct ins	    ins;
 			struct ins_operands *ops;
 		} locked;
+		struct {
+			char	*raw_comment;
+			char	*raw_func_start;
+		} jump;
 	};
 };
 
 struct arch;
 
+bool arch__is(struct arch *arch, const char *name);
+
 struct ins_ops {
 	void (*free)(struct ins_operands *ops);
 	int (*parse)(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms);
@@ -101,6 +108,8 @@ struct annotation_options {
 	unsigned int percent_type;
 };
 
+extern struct annotation_options annotate_opts;
+
 enum {
 	ANNOTATION__OFFSET_JUMP_TARGETS = 1,
 	ANNOTATION__OFFSET_CALL,
@@ -130,6 +139,13 @@ struct annotation_data {
 	struct sym_hist_entry	 he;
 };
 
+struct cycles_info {
+	float			 ipc;
+	u64			 avg;
+	u64			 max;
+	u64			 min;
+};
+
 struct annotation_line {
 	struct list_head	 node;
 	struct rb_node		 rb_node;
@@ -137,12 +153,9 @@ struct annotation_line {
 	char			*line;
 	int			 line_nr;
 	char			*fileloc;
-	int			 jump_sources;
-	float			 ipc;
-	u64			 cycles;
-	u64			 cycles_max;
-	u64			 cycles_min;
 	char			*path;
+	struct cycles_info	*cycles;
+	int			 jump_sources;
 	u32			 idx;
 	int			 idx_asm;
 	int			 data_nr;
@@ -214,8 +227,7 @@ struct annotation_write_ops {
 };
 
 void annotation_line__write(struct annotation_line *al, struct annotation *notes,
-			    struct annotation_write_ops *ops,
-			    struct annotation_options *opts);
+			    struct annotation_write_ops *ops);
 
 int __annotation__scnprintf_samples_period(struct annotation *notes,
 					   char *bf, size_t size,
@@ -264,27 +276,29 @@ struct cyc_hist {
  * returns.
  */
 struct annotated_source {
-	struct list_head   source;
-	int    		   nr_histograms;
-	size_t		   sizeof_sym_hist;
-	struct cyc_hist	   *cycles_hist;
-	struct sym_hist	   *histograms;
+	struct list_head	source;
+	size_t			sizeof_sym_hist;
+	struct sym_hist		*histograms;
+	struct annotation_line	**offsets;
+	int    			nr_histograms;
+	int			nr_entries;
+	int			nr_asm_entries;
+	u16			max_line_len;
 };
 
-struct LOCKABLE annotation {
-	u64			max_coverage;
-	u64			start;
+struct annotated_branch {
 	u64			hit_cycles;
 	u64			hit_insn;
 	unsigned int		total_insn;
 	unsigned int		cover_insn;
-	struct annotation_options *options;
-	struct annotation_line	**offsets;
+	struct cyc_hist		*cycles_hist;
+	u64			max_coverage;
+};
+
+struct LOCKABLE annotation {
+	u64			start;
 	int			nr_events;
 	int			max_jump_sources;
-	int			nr_entries;
-	int			nr_asm_entries;
-	u16			max_line_len;
 	struct {
 		u8		addr;
 		u8		jumps;
@@ -293,8 +307,8 @@ struct LOCKABLE annotation {
 		u8		max_addr;
 		u8		max_ins_name;
 	} widths;
-	bool			have_cycles;
 	struct annotated_source *src;
+	struct annotated_branch *branch;
 };
 
 static inline void annotation__init(struct annotation *notes __maybe_unused)
@@ -308,10 +322,10 @@ bool annotation__trylock(struct annotation *notes) EXCLUSIVE_TRYLOCK_FUNCTION(tr
 
 static inline int annotation__cycles_width(struct annotation *notes)
 {
-	if (notes->have_cycles && notes->options->show_minmax_cycle)
+	if (notes->branch && annotate_opts.show_minmax_cycle)
 		return ANNOTATION__IPC_WIDTH + ANNOTATION__MINMAX_CYCLES_WIDTH;
 
-	return notes->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0;
+	return notes->branch ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0;
 }
 
 static inline int annotation__pcnt_width(struct annotation *notes)
@@ -319,13 +333,12 @@ static inline int annotation__pcnt_width(struct annotation *notes)
 	return (symbol_conf.show_total_period ? 12 : 7) * notes->nr_events;
 }
 
-static inline bool annotation_line__filter(struct annotation_line *al, struct annotation *notes)
+static inline bool annotation_line__filter(struct annotation_line *al)
 {
-	return notes->options->hide_src_code && al->offset == -1;
+	return annotate_opts.hide_src_code && al->offset == -1;
 }
 
 void annotation__set_offsets(struct annotation *notes, s64 size);
-void annotation__compute_ipc(struct annotation *notes, size_t size);
 void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym);
 void annotation__update_column_widths(struct annotation *notes);
 void annotation__init_column_widths(struct annotation *notes, struct symbol *sym);
@@ -349,6 +362,8 @@ static inline struct annotation *symbol__annotation(struct symbol *sym)
 int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample,
 				 struct evsel *evsel);
 
+struct annotated_branch *annotation__get_branch(struct annotation *notes);
+
 int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
 				    struct addr_map_symbol *start,
 				    unsigned cycles);
@@ -361,11 +376,9 @@ void symbol__annotate_zero_histograms(struct symbol *sym);
 
 int symbol__annotate(struct map_symbol *ms,
 		     struct evsel *evsel,
-		     struct annotation_options *options,
 		     struct arch **parch);
 int symbol__annotate2(struct map_symbol *ms,
 		      struct evsel *evsel,
-		      struct annotation_options *options,
 		      struct arch **parch);
 
 enum symbol_disassemble_errno {
@@ -392,43 +405,86 @@ enum symbol_disassemble_errno {
 
 int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen);
 
-int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel,
-			    struct annotation_options *options);
+int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel);
 void symbol__annotate_zero_histogram(struct symbol *sym, int evidx);
 void symbol__annotate_decay_histogram(struct symbol *sym, int evidx);
 void annotated_source__purge(struct annotated_source *as);
 
-int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel,
-				struct annotation_options *opts);
+int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel);
 
 bool ui__has_annotation(void);
 
-int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, struct annotation_options *opts);
+int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel);
 
-int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, struct annotation_options *opts);
+int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel);
 
 #ifdef HAVE_SLANG_SUPPORT
 int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
-			 struct hist_browser_timer *hbt,
-			 struct annotation_options *opts);
+			 struct hist_browser_timer *hbt);
 #else
 static inline int symbol__tui_annotate(struct map_symbol *ms __maybe_unused,
 				struct evsel *evsel  __maybe_unused,
-				struct hist_browser_timer *hbt __maybe_unused,
-				struct annotation_options *opts __maybe_unused)
+				struct hist_browser_timer *hbt __maybe_unused)
 {
 	return 0;
 }
 #endif
 
-void annotation_options__init(struct annotation_options *opt);
-void annotation_options__exit(struct annotation_options *opt);
+void annotation_options__init(void);
+void annotation_options__exit(void);
 
-void annotation_config__init(struct annotation_options *opt);
+void annotation_config__init(void);
 
 int annotate_parse_percent_type(const struct option *opt, const char *_str,
 				int unset);
 
-int annotate_check_args(struct annotation_options *args);
+int annotate_check_args(void);
+
+/**
+ * struct annotated_op_loc - Location info of instruction operand
+ * @reg: Register in the operand
+ * @offset: Memory access offset in the operand
+ * @mem_ref: Whether the operand accesses memory
+ */
+struct annotated_op_loc {
+	int reg;
+	int offset;
+	bool mem_ref;
+};
+
+enum annotated_insn_ops {
+	INSN_OP_SOURCE = 0,
+	INSN_OP_TARGET = 1,
+
+	INSN_OP_MAX,
+};
+
+/**
+ * struct annotated_insn_loc - Location info of instruction
+ * @ops: Array of location info for source and target operands
+ */
+struct annotated_insn_loc {
+	struct annotated_op_loc ops[INSN_OP_MAX];
+};
+
+#define for_each_insn_op_loc(insn_loc, i, op_loc)			\
+	for (i = INSN_OP_SOURCE, op_loc = &(insn_loc)->ops[i];		\
+	     i < INSN_OP_MAX;						\
+	     i++, op_loc++)
+
+/* Get detailed location info in the instruction */
+int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl,
+			       struct annotated_insn_loc *loc);
+
+/* Returns a data type from the sample instruction (if any) */
+struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he);
+
+struct annotated_item_stat {
+	struct list_head list;
+	char *name;
+	int good;
+	int bad;
+};
+extern struct list_head ann_insn_stat;
 
 #endif	/* __PERF_ANNOTATE_H */
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index a0368202a746..3684e6009b63 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -174,7 +174,7 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
 				   struct evlist *evlist,
 				   struct evsel *evsel, int idx)
 {
-	bool per_cpu = !perf_cpu_map__empty(evlist->core.user_requested_cpus);
+	bool per_cpu = !perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus);
 
 	mp->mmap_needed = evsel->needs_auxtrace_mmap;
 
@@ -648,7 +648,7 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
 
 static int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx)
 {
-	bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.user_requested_cpus);
+	bool per_cpu_mmaps = !perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus);
 
 	if (per_cpu_mmaps) {
 		struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx);
@@ -1638,6 +1638,9 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts,
 		case 'Z':
 			synth_opts->timeless_decoding = true;
 			break;
+		case 'T':
+			synth_opts->use_timestamp = true;
+			break;
 		case ' ':
 		case ',':
 			break;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 29eb82dff574..55702215a82d 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -99,6 +99,7 @@ enum itrace_period_type {
  * @remote_access: whether to synthesize remote access events
  * @mem: whether to synthesize memory events
  * @timeless_decoding: prefer "timeless" decoding i.e. ignore timestamps
+ * @use_timestamp: use the timestamp trace as kernel time
  * @vm_time_correlation: perform VM Time Correlation
  * @vm_tm_corr_dry_run: VM Time Correlation dry-run
  * @vm_tm_corr_args:  VM Time Correlation implementation-specific arguments
@@ -146,6 +147,7 @@ struct itrace_synth_opts {
 	bool			remote_access;
 	bool			mem;
 	bool			timeless_decoding;
+	bool			use_timestamp;
 	bool			vm_time_correlation;
 	bool			vm_tm_corr_dry_run;
 	char			*vm_tm_corr_args;
@@ -678,6 +680,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
 "				q:			quicker (less detailed) decoding\n" \
 "				A:			approximate IPC\n" \
 "				Z:			prefer to ignore timestamps (so-called \"timeless\" decoding)\n" \
+"				T:			use the timestamp trace as kernel time\n" \
 "				PERIOD[ns|us|ms|i|t]:   specify period to sample stream\n" \
 "				concatenate multiple options. Default is iybxwpe or cewp\n"
 
diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c
index 591fc1edd385..dec910989701 100644
--- a/tools/perf/util/block-info.c
+++ b/tools/perf/util/block-info.c
@@ -129,9 +129,9 @@ int block_info__process_sym(struct hist_entry *he, struct block_hist *bh,
 	al.sym = he->ms.sym;
 
 	notes = symbol__annotation(he->ms.sym);
-	if (!notes || !notes->src || !notes->src->cycles_hist)
+	if (!notes || !notes->branch || !notes->branch->cycles_hist)
 		return 0;
-	ch = notes->src->cycles_hist;
+	ch = notes->branch->cycles_hist;
 	for (unsigned int i = 0; i < symbol__size(he->ms.sym); i++) {
 		if (ch[i].num_aggr) {
 			struct block_info *bi;
@@ -464,8 +464,7 @@ void block_info__free_report(struct block_report *reps, int nr_reps)
 }
 
 int report__browse_block_hists(struct block_hist *bh, float min_percent,
-			       struct evsel *evsel, struct perf_env *env,
-			       struct annotation_options *annotation_opts)
+			       struct evsel *evsel, struct perf_env *env)
 {
 	int ret;
 
@@ -477,8 +476,7 @@ int report__browse_block_hists(struct block_hist *bh, float min_percent,
 		return 0;
 	case 1:
 		symbol_conf.report_individual_block = true;
-		ret = block_hists_tui_browse(bh, evsel, min_percent,
-					     env, annotation_opts);
+		ret = block_hists_tui_browse(bh, evsel, min_percent, env);
 		return ret;
 	default:
 		return -1;
diff --git a/tools/perf/util/block-info.h b/tools/perf/util/block-info.h
index 42e9dcc4cf0a..96f53e89795e 100644
--- a/tools/perf/util/block-info.h
+++ b/tools/perf/util/block-info.h
@@ -78,8 +78,7 @@ struct block_report *block_info__create_report(struct evlist *evlist,
 void block_info__free_report(struct block_report *reps, int nr_reps);
 
 int report__browse_block_hists(struct block_hist *bh, float min_percent,
-			       struct evsel *evsel, struct perf_env *env,
-			       struct annotation_options *annotation_opts);
+			       struct evsel *evsel, struct perf_env *env);
 
 float block_info__total_cycles_percent(struct hist_entry *he);
 
diff --git a/tools/perf/util/block-range.c b/tools/perf/util/block-range.c
index 680e92774d0c..15c42196c24c 100644
--- a/tools/perf/util/block-range.c
+++ b/tools/perf/util/block-range.c
@@ -311,6 +311,7 @@ done:
 double block_range__coverage(struct block_range *br)
 {
 	struct symbol *sym;
+	struct annotated_branch *branch;
 
 	if (!br) {
 		if (block_ranges.blocks)
@@ -323,5 +324,9 @@ double block_range__coverage(struct block_range *br)
 	if (!sym)
 		return -1;
 
-	return (double)br->coverage / symbol__annotation(sym)->max_coverage;
+	branch = symbol__annotation(sym)->branch;
+	if (!branch)
+		return -1;
+
+	return (double)br->coverage / branch->max_coverage;
 }
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index 38fcf3ba5749..3573e0b7ef3e 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -386,6 +386,9 @@ int perf_event__synthesize_bpf_events(struct perf_session *session,
 	int err;
 	int fd;
 
+	if (opts->no_bpf_event)
+		return 0;
+
 	event = malloc(sizeof(event->bpf) + KSYM_NAME_LEN + machine->id_hdr_size);
 	if (!event)
 		return -1;
@@ -542,9 +545,9 @@ int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env)
 	return evlist__add_sb_event(evlist, &attr, bpf_event__sb_cb, env);
 }
 
-void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info,
-				    struct perf_env *env,
-				    FILE *fp)
+void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info,
+				      struct perf_env *env,
+				      FILE *fp)
 {
 	__u32 *prog_lens = (__u32 *)(uintptr_t)(info->jited_func_lens);
 	__u64 *prog_addrs = (__u64 *)(uintptr_t)(info->jited_ksyms);
@@ -560,7 +563,7 @@ void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info,
 	if (info->btf_id) {
 		struct btf_node *node;
 
-		node = perf_env__find_btf(env, info->btf_id);
+		node = __perf_env__find_btf(env, info->btf_id);
 		if (node)
 			btf = btf__new((__u8 *)(node->data),
 				       node->data_size);
diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
index 1bcbd4fb6c66..e2f0420905f5 100644
--- a/tools/perf/util/bpf-event.h
+++ b/tools/perf/util/bpf-event.h
@@ -33,9 +33,9 @@ struct btf_node {
 int machine__process_bpf(struct machine *machine, union perf_event *event,
 			 struct perf_sample *sample);
 int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env);
-void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info,
-				    struct perf_env *env,
-				    FILE *fp);
+void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info,
+				      struct perf_env *env,
+				      FILE *fp);
 #else
 static inline int machine__process_bpf(struct machine *machine __maybe_unused,
 				       union perf_event *event __maybe_unused,
@@ -50,9 +50,9 @@ static inline int evlist__add_bpf_sb_event(struct evlist *evlist __maybe_unused,
 	return 0;
 }
 
-static inline void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info __maybe_unused,
-						  struct perf_env *env __maybe_unused,
-						  FILE *fp __maybe_unused)
+static inline void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info __maybe_unused,
+						    struct perf_env *env __maybe_unused,
+						    FILE *fp __maybe_unused)
 {
 
 }
diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c
index 7f9b0e46e008..7a8af60e0f51 100644
--- a/tools/perf/util/bpf_counter.c
+++ b/tools/perf/util/bpf_counter.c
@@ -455,7 +455,7 @@ static int bperf__load(struct evsel *evsel, struct target *target)
 		return -1;
 
 	if (!all_cpu_map) {
-		all_cpu_map = perf_cpu_map__new(NULL);
+		all_cpu_map = perf_cpu_map__new_online_cpus();
 		if (!all_cpu_map)
 			return -1;
 	}
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index f1716c089c99..31ff19afc20c 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -318,7 +318,7 @@ int lock_contention_read(struct lock_contention *con)
 	}
 
 	/* make sure it loads the kernel map */
-	map__load(maps__first(machine->kmaps)->map);
+	maps__load_first(machine->kmaps);
 
 	prev_key = NULL;
 	while (!bpf_map_get_next_key(fd, prev_key, &key)) {
diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h
index 0cd3369af2a4..b29109cd3609 100644
--- a/tools/perf/util/compress.h
+++ b/tools/perf/util/compress.h
@@ -3,6 +3,8 @@
 #define PERF_COMPRESS_H
 
 #include <stdbool.h>
+#include <stddef.h>
+#include <sys/types.h>
 #ifdef HAVE_ZSTD_SUPPORT
 #include <zstd.h>
 #endif
@@ -21,6 +23,7 @@ struct zstd_data {
 #ifdef HAVE_ZSTD_SUPPORT
 	ZSTD_CStream	*cstream;
 	ZSTD_DStream	*dstream;
+	int comp_level;
 #endif
 };
 
@@ -29,7 +32,7 @@ struct zstd_data {
 int zstd_init(struct zstd_data *data, int level);
 int zstd_fini(struct zstd_data *data);
 
-size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size,
+ssize_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size,
 				       void *src, size_t src_size, size_t max_record_size,
 				       size_t process_header(void *record, size_t increment));
 
@@ -48,7 +51,7 @@ static inline int zstd_fini(struct zstd_data *data __maybe_unused)
 }
 
 static inline
-size_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused,
+ssize_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused,
 				       void *dst __maybe_unused, size_t dst_size __maybe_unused,
 				       void *src __maybe_unused, size_t src_size __maybe_unused,
 				       size_t max_record_size __maybe_unused,
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 0e090e8bc334..0581ee0fa5f2 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -672,7 +672,7 @@ struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */
 	static struct perf_cpu_map *online;
 
 	if (!online)
-		online = perf_cpu_map__new(NULL); /* from /sys/devices/system/cpu/online */
+		online = perf_cpu_map__new_online_cpus(); /* from /sys/devices/system/cpu/online */
 
 	return online;
 }
diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c
index 81cfc85f4668..8bbeb2dc76fd 100644
--- a/tools/perf/util/cputopo.c
+++ b/tools/perf/util/cputopo.c
@@ -267,7 +267,7 @@ struct cpu_topology *cpu_topology__new(void)
 	ncpus = cpu__max_present_cpu().cpu;
 
 	/* build online CPU map */
-	map = perf_cpu_map__new(NULL);
+	map = perf_cpu_map__new_online_cpus();
 	if (map == NULL) {
 		pr_debug("failed to get system cpumap\n");
 		return NULL;
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index a9873d14c632..d65d7485886c 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -3346,12 +3346,27 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
 	etm->metadata = metadata;
 	etm->auxtrace_type = auxtrace_info->type;
 
-	/* Use virtual timestamps if all ETMs report ts_source = 1 */
-	etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
+	if (etm->synth_opts.use_timestamp)
+		/*
+		 * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature,
+		 * therefore the decoder cannot know if the timestamp trace is
+		 * same with the kernel time.
+		 *
+		 * If a user has knowledge for the working platform and can
+		 * specify itrace option 'T' to tell decoder to forcely use the
+		 * traced timestamp as the kernel time.
+		 */
+		etm->has_virtual_ts = true;
+	else
+		/* Use virtual timestamps if all ETMs report ts_source = 1 */
+		etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
 
 	if (!etm->has_virtual_ts)
 		ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
-			    "The time field of the samples will not be set accurately.\n\n");
+			    "The time field of the samples will not be set accurately.\n"
+			    "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n"
+			    "you can specify the itrace option 'T' for timestamp decoding\n"
+			    "if the Coresight timestamp on the platform is same with the kernel time.\n\n");
 
 	etm->auxtrace.process_event = cs_etm__process_event;
 	etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index b9fb71ab7a73..106429155c2e 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -253,8 +253,8 @@ static struct call_path *call_path_from_sample(struct db_export *dbe,
 		 */
 		addr_location__init(&al);
 		al.sym = node->ms.sym;
-		al.map = node->ms.map;
-		al.maps = thread__maps(thread);
+		al.map = map__get(node->ms.map);
+		al.maps = maps__get(thread__maps(thread));
 		al.addr = node->ip;
 
 		if (al.map && !al.sym)
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 88378c4c5dd9..e282b4ceb4d2 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -38,12 +38,21 @@ bool dump_trace = false, quiet = false;
 int debug_ordered_events;
 static int redirect_to_stderr;
 int debug_data_convert;
-static FILE *debug_file;
+static FILE *_debug_file;
 bool debug_display_time;
 
+FILE *debug_file(void)
+{
+	if (!_debug_file) {
+		pr_warning_once("debug_file not set");
+		debug_set_file(stderr);
+	}
+	return _debug_file;
+}
+
 void debug_set_file(FILE *file)
 {
-	debug_file = file;
+	_debug_file = file;
 }
 
 void debug_set_display_time(bool set)
@@ -78,8 +87,8 @@ int veprintf(int level, int var, const char *fmt, va_list args)
 		if (use_browser >= 1 && !redirect_to_stderr) {
 			ui_helpline__vshow(fmt, args);
 		} else {
-			ret = fprintf_time(debug_file);
-			ret += vfprintf(debug_file, fmt, args);
+			ret = fprintf_time(debug_file());
+			ret += vfprintf(debug_file(), fmt, args);
 		}
 	}
 
@@ -107,9 +116,8 @@ static int veprintf_time(u64 t, const char *fmt, va_list args)
 	nsecs -= secs  * NSEC_PER_SEC;
 	usecs  = nsecs / NSEC_PER_USEC;
 
-	ret = fprintf(stderr, "[%13" PRIu64 ".%06" PRIu64 "] ",
-		      secs, usecs);
-	ret += vfprintf(stderr, fmt, args);
+	ret = fprintf(debug_file(), "[%13" PRIu64 ".%06" PRIu64 "] ", secs, usecs);
+	ret += vfprintf(debug_file(), fmt, args);
 	return ret;
 }
 
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index f99468a7f681..de8870980d44 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -77,6 +77,7 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __printf(4, 5)
 int veprintf(int level, int var, const char *fmt, va_list args);
 
 int perf_debug_option(const char *str);
+FILE *debug_file(void);
 void debug_set_file(FILE *file);
 void debug_set_display_time(bool set);
 void perf_debug_setup(void);
diff --git a/tools/perf/util/debuginfo.c b/tools/perf/util/debuginfo.c
new file mode 100644
index 000000000000..19acf4775d35
--- /dev/null
+++ b/tools/perf/util/debuginfo.c
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * DWARF debug information handling code.  Copied from probe-finder.c.
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/zalloc.h>
+
+#include "build-id.h"
+#include "dso.h"
+#include "debug.h"
+#include "debuginfo.h"
+#include "symbol.h"
+
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+#include <elfutils/debuginfod.h>
+#endif
+
+/* Dwarf FL wrappers */
+static char *debuginfo_path;	/* Currently dummy */
+
+static const Dwfl_Callbacks offline_callbacks = {
+	.find_debuginfo = dwfl_standard_find_debuginfo,
+	.debuginfo_path = &debuginfo_path,
+
+	.section_address = dwfl_offline_section_address,
+
+	/* We use this table for core files too.  */
+	.find_elf = dwfl_build_id_find_elf,
+};
+
+/* Get a Dwarf from offline image */
+static int debuginfo__init_offline_dwarf(struct debuginfo *dbg,
+					 const char *path)
+{
+	GElf_Addr dummy;
+	int fd;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return fd;
+
+	dbg->dwfl = dwfl_begin(&offline_callbacks);
+	if (!dbg->dwfl)
+		goto error;
+
+	dwfl_report_begin(dbg->dwfl);
+	dbg->mod = dwfl_report_offline(dbg->dwfl, "", "", fd);
+	if (!dbg->mod)
+		goto error;
+
+	dbg->dbg = dwfl_module_getdwarf(dbg->mod, &dbg->bias);
+	if (!dbg->dbg)
+		goto error;
+
+	dwfl_module_build_id(dbg->mod, &dbg->build_id, &dummy);
+
+	dwfl_report_end(dbg->dwfl, NULL, NULL);
+
+	return 0;
+error:
+	if (dbg->dwfl)
+		dwfl_end(dbg->dwfl);
+	else
+		close(fd);
+	memset(dbg, 0, sizeof(*dbg));
+
+	return -ENOENT;
+}
+
+static struct debuginfo *__debuginfo__new(const char *path)
+{
+	struct debuginfo *dbg = zalloc(sizeof(*dbg));
+	if (!dbg)
+		return NULL;
+
+	if (debuginfo__init_offline_dwarf(dbg, path) < 0)
+		zfree(&dbg);
+	if (dbg)
+		pr_debug("Open Debuginfo file: %s\n", path);
+	return dbg;
+}
+
+enum dso_binary_type distro_dwarf_types[] = {
+	DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
+	DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
+	DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
+	DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
+	DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO,
+	DSO_BINARY_TYPE__NOT_FOUND,
+};
+
+struct debuginfo *debuginfo__new(const char *path)
+{
+	enum dso_binary_type *type;
+	char buf[PATH_MAX], nil = '\0';
+	struct dso *dso;
+	struct debuginfo *dinfo = NULL;
+	struct build_id bid;
+
+	/* Try to open distro debuginfo files */
+	dso = dso__new(path);
+	if (!dso)
+		goto out;
+
+	/* Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO */
+	if (is_regular_file(path) && filename__read_build_id(path, &bid) > 0)
+		dso__set_build_id(dso, &bid);
+
+	for (type = distro_dwarf_types;
+	     !dinfo && *type != DSO_BINARY_TYPE__NOT_FOUND;
+	     type++) {
+		if (dso__read_binary_type_filename(dso, *type, &nil,
+						   buf, PATH_MAX) < 0)
+			continue;
+		dinfo = __debuginfo__new(buf);
+	}
+	dso__put(dso);
+
+out:
+	/* if failed to open all distro debuginfo, open given binary */
+	return dinfo ? : __debuginfo__new(path);
+}
+
+void debuginfo__delete(struct debuginfo *dbg)
+{
+	if (dbg) {
+		if (dbg->dwfl)
+			dwfl_end(dbg->dwfl);
+		free(dbg);
+	}
+}
+
+/* For the kernel module, we need a special code to get a DIE */
+int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs,
+				bool adjust_offset)
+{
+	int n, i;
+	Elf32_Word shndx;
+	Elf_Scn *scn;
+	Elf *elf;
+	GElf_Shdr mem, *shdr;
+	const char *p;
+
+	elf = dwfl_module_getelf(dbg->mod, &dbg->bias);
+	if (!elf)
+		return -EINVAL;
+
+	/* Get the number of relocations */
+	n = dwfl_module_relocations(dbg->mod);
+	if (n < 0)
+		return -ENOENT;
+	/* Search the relocation related .text section */
+	for (i = 0; i < n; i++) {
+		p = dwfl_module_relocation_info(dbg->mod, i, &shndx);
+		if (strcmp(p, ".text") == 0) {
+			/* OK, get the section header */
+			scn = elf_getscn(elf, shndx);
+			if (!scn)
+				return -ENOENT;
+			shdr = gelf_getshdr(scn, &mem);
+			if (!shdr)
+				return -ENOENT;
+			*offs = shdr->sh_addr;
+			if (adjust_offset)
+				*offs -= shdr->sh_offset;
+		}
+	}
+	return 0;
+}
+
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+int get_source_from_debuginfod(const char *raw_path,
+			       const char *sbuild_id, char **new_path)
+{
+	debuginfod_client *c = debuginfod_begin();
+	const char *p = raw_path;
+	int fd;
+
+	if (!c)
+		return -ENOMEM;
+
+	fd = debuginfod_find_source(c, (const unsigned char *)sbuild_id,
+				0, p, new_path);
+	pr_debug("Search %s from debuginfod -> %d\n", p, fd);
+	if (fd >= 0)
+		close(fd);
+	debuginfod_end(c);
+	if (fd < 0) {
+		pr_debug("Failed to find %s in debuginfod (%s)\n",
+			raw_path, sbuild_id);
+		return -ENOENT;
+	}
+	pr_debug("Got a source %s\n", *new_path);
+
+	return 0;
+}
+#endif /* HAVE_DEBUGINFOD_SUPPORT */
diff --git a/tools/perf/util/debuginfo.h b/tools/perf/util/debuginfo.h
new file mode 100644
index 000000000000..4d65b8c605fc
--- /dev/null
+++ b/tools/perf/util/debuginfo.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _PERF_DEBUGINFO_H
+#define _PERF_DEBUGINFO_H
+
+#include <errno.h>
+#include <linux/compiler.h>
+
+#ifdef HAVE_DWARF_SUPPORT
+
+#include "dwarf-aux.h"
+
+/* debug information structure */
+struct debuginfo {
+	Dwarf		*dbg;
+	Dwfl_Module	*mod;
+	Dwfl		*dwfl;
+	Dwarf_Addr	bias;
+	const unsigned char	*build_id;
+};
+
+/* This also tries to open distro debuginfo */
+struct debuginfo *debuginfo__new(const char *path);
+void debuginfo__delete(struct debuginfo *dbg);
+
+int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs,
+			       bool adjust_offset);
+
+#else /* HAVE_DWARF_SUPPORT */
+
+/* dummy debug information structure */
+struct debuginfo {
+};
+
+static inline struct debuginfo *debuginfo__new(const char *path __maybe_unused)
+{
+	return NULL;
+}
+
+static inline void debuginfo__delete(struct debuginfo *dbg __maybe_unused)
+{
+}
+
+static inline int debuginfo__get_text_offset(struct debuginfo *dbg __maybe_unused,
+					     Dwarf_Addr *offs __maybe_unused,
+					     bool adjust_offset __maybe_unused)
+{
+	return -EINVAL;
+}
+
+#endif /* HAVE_DWARF_SUPPORT */
+
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+int get_source_from_debuginfod(const char *raw_path, const char *sbuild_id,
+			       char **new_path);
+#else /* HAVE_DEBUGINFOD_SUPPORT */
+static inline int get_source_from_debuginfod(const char *raw_path __maybe_unused,
+					     const char *sbuild_id __maybe_unused,
+					     char **new_path __maybe_unused)
+{
+	return -ENOTSUP;
+}
+#endif /* HAVE_DEBUGINFOD_SUPPORT */
+
+#endif /* _PERF_DEBUGINFO_H */
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 1f629b6fb7cf..22fd5fa806ed 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -31,6 +31,7 @@
 #include "debug.h"
 #include "string2.h"
 #include "vdso.h"
+#include "annotate-data.h"
 
 static const char * const debuglink_paths[] = {
 	"%.0s%s",
@@ -1327,6 +1328,7 @@ struct dso *dso__new_id(const char *name, struct dso_id *id)
 		dso->data.cache = RB_ROOT;
 		dso->inlined_nodes = RB_ROOT_CACHED;
 		dso->srclines = RB_ROOT_CACHED;
+		dso->data_types = RB_ROOT;
 		dso->data.fd = -1;
 		dso->data.status = DSO_DATA_STATUS_UNKNOWN;
 		dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND;
@@ -1370,6 +1372,8 @@ void dso__delete(struct dso *dso)
 	symbols__delete(&dso->symbols);
 	dso->symbol_names_len = 0;
 	zfree(&dso->symbol_names);
+	annotated_data_type__tree_delete(&dso->data_types);
+
 	if (dso->short_name_allocated) {
 		zfree((char **)&dso->short_name);
 		dso->short_name_allocated = false;
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 3759de8c2267..ce9f3849a773 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -154,6 +154,8 @@ struct dso {
 	size_t		 symbol_names_len;
 	struct rb_root_cached inlined_nodes;
 	struct rb_root_cached srclines;
+	struct rb_root	data_types;
+
 	struct {
 		u64		addr;
 		struct symbol	*symbol;
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index 2941d88f2199..7aa5fee0da19 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -1051,32 +1051,28 @@ Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
 }
 
 /**
- * die_get_typename - Get the name of given variable DIE
- * @vr_die: a variable DIE
+ * die_get_typename_from_type - Get the name of given type DIE
+ * @type_die: a type DIE
  * @buf: a strbuf for result type name
  *
- * Get the name of @vr_die and stores it to @buf. Return 0 if succeeded.
+ * Get the name of @type_die and stores it to @buf. Return 0 if succeeded.
  * and Return -ENOENT if failed to find type name.
  * Note that the result will stores typedef name if possible, and stores
  * "*(function_type)" if the type is a function pointer.
  */
-int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf)
+int die_get_typename_from_type(Dwarf_Die *type_die, struct strbuf *buf)
 {
-	Dwarf_Die type;
 	int tag, ret;
 	const char *tmp = "";
 
-	if (__die_get_real_type(vr_die, &type) == NULL)
-		return -ENOENT;
-
-	tag = dwarf_tag(&type);
+	tag = dwarf_tag(type_die);
 	if (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type)
 		tmp = "*";
 	else if (tag == DW_TAG_subroutine_type) {
 		/* Function pointer */
 		return strbuf_add(buf, "(function_type)", 15);
 	} else {
-		const char *name = dwarf_diename(&type);
+		const char *name = dwarf_diename(type_die);
 
 		if (tag == DW_TAG_union_type)
 			tmp = "union ";
@@ -1089,8 +1085,35 @@ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf)
 		/* Write a base name */
 		return strbuf_addf(buf, "%s%s", tmp, name ?: "");
 	}
-	ret = die_get_typename(&type, buf);
-	return ret ? ret : strbuf_addstr(buf, tmp);
+	ret = die_get_typename(type_die, buf);
+	if (ret < 0) {
+		/* void pointer has no type attribute */
+		if (tag == DW_TAG_pointer_type && ret == -ENOENT)
+			return strbuf_addf(buf, "void*");
+
+		return ret;
+	}
+	return strbuf_addstr(buf, tmp);
+}
+
+/**
+ * die_get_typename - Get the name of given variable DIE
+ * @vr_die: a variable DIE
+ * @buf: a strbuf for result type name
+ *
+ * Get the name of @vr_die and stores it to @buf. Return 0 if succeeded.
+ * and Return -ENOENT if failed to find type name.
+ * Note that the result will stores typedef name if possible, and stores
+ * "*(function_type)" if the type is a function pointer.
+ */
+int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf)
+{
+	Dwarf_Die type;
+
+	if (__die_get_real_type(vr_die, &type) == NULL)
+		return -ENOENT;
+
+	return die_get_typename_from_type(&type, buf);
 }
 
 /**
@@ -1238,12 +1261,151 @@ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf)
 out:
 	return ret;
 }
-#else
-int die_get_var_range(Dwarf_Die *sp_die __maybe_unused,
-		      Dwarf_Die *vr_die __maybe_unused,
-		      struct strbuf *buf __maybe_unused)
+
+/* Interval parameters for __die_find_var_reg_cb() */
+struct find_var_data {
+	/* Target instruction address */
+	Dwarf_Addr pc;
+	/* Target memory address (for global data) */
+	Dwarf_Addr addr;
+	/* Target register */
+	unsigned reg;
+	/* Access offset, set for global data */
+	int offset;
+};
+
+/* Max number of registers DW_OP_regN supports */
+#define DWARF_OP_DIRECT_REGS  32
+
+/* Only checks direct child DIEs in the given scope. */
+static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg)
+{
+	struct find_var_data *data = arg;
+	int tag = dwarf_tag(die_mem);
+	ptrdiff_t off = 0;
+	Dwarf_Attribute attr;
+	Dwarf_Addr base, start, end;
+	Dwarf_Op *ops;
+	size_t nops;
+
+	if (tag != DW_TAG_variable && tag != DW_TAG_formal_parameter)
+		return DIE_FIND_CB_SIBLING;
+
+	if (dwarf_attr(die_mem, DW_AT_location, &attr) == NULL)
+		return DIE_FIND_CB_SIBLING;
+
+	while ((off = dwarf_getlocations(&attr, off, &base, &start, &end, &ops, &nops)) > 0) {
+		/* Assuming the location list is sorted by address */
+		if (end < data->pc)
+			continue;
+		if (start > data->pc)
+			break;
+
+		/* Only match with a simple case */
+		if (data->reg < DWARF_OP_DIRECT_REGS) {
+			if (ops->atom == (DW_OP_reg0 + data->reg) && nops == 1)
+				return DIE_FIND_CB_END;
+		} else {
+			if (ops->atom == DW_OP_regx && ops->number == data->reg &&
+			    nops == 1)
+				return DIE_FIND_CB_END;
+		}
+	}
+	return DIE_FIND_CB_SIBLING;
+}
+
+/**
+ * die_find_variable_by_reg - Find a variable saved in a register
+ * @sc_die: a scope DIE
+ * @pc: the program address to find
+ * @reg: the register number to find
+ * @die_mem: a buffer to save the resulting DIE
+ *
+ * Find the variable DIE accessed by the given register.
+ */
+Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die, Dwarf_Addr pc, int reg,
+				    Dwarf_Die *die_mem)
+{
+	struct find_var_data data = {
+		.pc = pc,
+		.reg = reg,
+	};
+	return die_find_child(sc_die, __die_find_var_reg_cb, &data, die_mem);
+}
+
+/* Only checks direct child DIEs in the given scope */
+static int __die_find_var_addr_cb(Dwarf_Die *die_mem, void *arg)
+{
+	struct find_var_data *data = arg;
+	int tag = dwarf_tag(die_mem);
+	ptrdiff_t off = 0;
+	Dwarf_Attribute attr;
+	Dwarf_Addr base, start, end;
+	Dwarf_Word size;
+	Dwarf_Die type_die;
+	Dwarf_Op *ops;
+	size_t nops;
+
+	if (tag != DW_TAG_variable)
+		return DIE_FIND_CB_SIBLING;
+
+	if (dwarf_attr(die_mem, DW_AT_location, &attr) == NULL)
+		return DIE_FIND_CB_SIBLING;
+
+	while ((off = dwarf_getlocations(&attr, off, &base, &start, &end, &ops, &nops)) > 0) {
+		if (ops->atom != DW_OP_addr)
+			continue;
+
+		if (data->addr < ops->number)
+			continue;
+
+		if (data->addr == ops->number) {
+			/* Update offset relative to the start of the variable */
+			data->offset = 0;
+			return DIE_FIND_CB_END;
+		}
+
+		if (die_get_real_type(die_mem, &type_die) == NULL)
+			continue;
+
+		if (dwarf_aggregate_size(&type_die, &size) < 0)
+			continue;
+
+		if (data->addr >= ops->number + size)
+			continue;
+
+		/* Update offset relative to the start of the variable */
+		data->offset = data->addr - ops->number;
+		return DIE_FIND_CB_END;
+	}
+	return DIE_FIND_CB_SIBLING;
+}
+
+/**
+ * die_find_variable_by_addr - Find variable located at given address
+ * @sc_die: a scope DIE
+ * @pc: the program address to find
+ * @addr: the data address to find
+ * @die_mem: a buffer to save the resulting DIE
+ * @offset: the offset in the resulting type
+ *
+ * Find the variable DIE located at the given address (in PC-relative mode).
+ * This is usually for global variables.
+ */
+Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr pc,
+				     Dwarf_Addr addr, Dwarf_Die *die_mem,
+				     int *offset)
 {
-	return -ENOTSUP;
+	struct find_var_data data = {
+		.pc = pc,
+		.addr = addr,
+	};
+	Dwarf_Die *result;
+
+	result = die_find_child(sc_die, __die_find_var_addr_cb, &data, die_mem);
+	if (result)
+		*offset = data.offset;
+	return result;
 }
 #endif
 
@@ -1425,3 +1587,56 @@ void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die,
 
 	*entrypc = postprologue_addr;
 }
+
+/* Internal parameters for __die_find_scope_cb() */
+struct find_scope_data {
+	/* Target instruction address */
+	Dwarf_Addr pc;
+	/* Number of scopes found [output] */
+	int nr;
+	/* Array of scopes found, 0 for the outermost one. [output] */
+	Dwarf_Die *scopes;
+};
+
+static int __die_find_scope_cb(Dwarf_Die *die_mem, void *arg)
+{
+	struct find_scope_data *data = arg;
+
+	if (dwarf_haspc(die_mem, data->pc)) {
+		Dwarf_Die *tmp;
+
+		tmp = realloc(data->scopes, (data->nr + 1) * sizeof(*tmp));
+		if (tmp == NULL)
+			return DIE_FIND_CB_END;
+
+		memcpy(tmp + data->nr, die_mem, sizeof(*die_mem));
+		data->scopes = tmp;
+		data->nr++;
+		return DIE_FIND_CB_CHILD;
+	}
+	return DIE_FIND_CB_SIBLING;
+}
+
+/**
+ * die_get_scopes - Return a list of scopes including the address
+ * @cu_die: a compile unit DIE
+ * @pc: the address to find
+ * @scopes: the array of DIEs for scopes (result)
+ *
+ * This function does the same as the dwarf_getscopes() but doesn't follow
+ * the origins of inlined functions.  It returns the number of scopes saved
+ * in the @scopes argument.  The outer scope will be saved first (index 0) and
+ * the last one is the innermost scope at the @pc.
+ */
+int die_get_scopes(Dwarf_Die *cu_die, Dwarf_Addr pc, Dwarf_Die **scopes)
+{
+	struct find_scope_data data = {
+		.pc = pc,
+	};
+	Dwarf_Die die_mem;
+
+	die_find_child(cu_die, __die_find_scope_cb, &data, &die_mem);
+
+	*scopes = data.scopes;
+	return data.nr;
+}
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index 7ec8bc1083bb..4e64caac6df8 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -116,12 +116,14 @@ Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
 Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
 			   Dwarf_Die *die_mem);
 
+/* Get the name of given type DIE */
+int die_get_typename_from_type(Dwarf_Die *type_die, struct strbuf *buf);
+
 /* Get the name of given variable DIE */
 int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf);
 
 /* Get the name and type of given variable DIE, stored as "type\tname" */
 int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf);
-int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf);
 
 /* Check if target program is compiled with optimization */
 bool die_is_optimized_target(Dwarf_Die *cu_die);
@@ -130,4 +132,49 @@ bool die_is_optimized_target(Dwarf_Die *cu_die);
 void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die,
 		       Dwarf_Addr *entrypc);
 
-#endif
+/* Get the list of including scopes */
+int die_get_scopes(Dwarf_Die *cu_die, Dwarf_Addr pc, Dwarf_Die **scopes);
+
+#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT
+
+/* Get byte offset range of given variable DIE */
+int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf);
+
+/* Find a variable saved in the 'reg' at given address */
+Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die, Dwarf_Addr pc, int reg,
+				    Dwarf_Die *die_mem);
+
+/* Find a (global) variable located in the 'addr' */
+Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr pc,
+				     Dwarf_Addr addr, Dwarf_Die *die_mem,
+				     int *offset);
+
+#else /*  HAVE_DWARF_GETLOCATIONS_SUPPORT */
+
+static inline int die_get_var_range(Dwarf_Die *sp_die __maybe_unused,
+				    Dwarf_Die *vr_die __maybe_unused,
+				    struct strbuf *buf __maybe_unused)
+{
+	return -ENOTSUP;
+}
+
+static inline Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die __maybe_unused,
+						  Dwarf_Addr pc __maybe_unused,
+						  int reg __maybe_unused,
+						  Dwarf_Die *die_mem __maybe_unused)
+{
+	return NULL;
+}
+
+static inline Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die __maybe_unused,
+						   Dwarf_Addr pc __maybe_unused,
+						   Dwarf_Addr addr __maybe_unused,
+						   Dwarf_Die *die_mem __maybe_unused,
+						   int *offset __maybe_unused)
+{
+	return NULL;
+}
+
+#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */
+
+#endif /* _DWARF_AUX_H */
diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c
index 69cfaa5953bf..5b7f86c0063f 100644
--- a/tools/perf/util/dwarf-regs.c
+++ b/tools/perf/util/dwarf-regs.c
@@ -5,9 +5,12 @@
  * Written by: Masami Hiramatsu <mhiramat@kernel.org>
  */
 
+#include <stdlib.h>
+#include <string.h>
 #include <debug.h>
 #include <dwarf-regs.h>
 #include <elf.h>
+#include <errno.h>
 #include <linux/kernel.h>
 
 #ifndef EM_AARCH64
@@ -68,3 +71,34 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine)
 	}
 	return NULL;
 }
+
+__weak int get_arch_regnum(const char *name __maybe_unused)
+{
+	return -ENOTSUP;
+}
+
+/* Return DWARF register number from architecture register name */
+int get_dwarf_regnum(const char *name, unsigned int machine)
+{
+	char *regname = strdup(name);
+	int reg = -1;
+	char *p;
+
+	if (regname == NULL)
+		return -EINVAL;
+
+	/* For convenience, remove trailing characters */
+	p = strpbrk(regname, " ,)");
+	if (p)
+		*p = '\0';
+
+	switch (machine) {
+	case EM_NONE:	/* Generic arch - use host arch */
+		reg = get_arch_regnum(regname);
+		break;
+	default:
+		pr_err("ELF MACHINE %x is not supported.\n", machine);
+	}
+	free(regname);
+	return reg;
+}
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 44140b7f596a..a459374d0a1a 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -3,6 +3,7 @@
 #include "debug.h"
 #include "env.h"
 #include "util/header.h"
+#include "linux/compiler.h"
 #include <linux/ctype.h>
 #include <linux/zalloc.h>
 #include "cgroup.h"
@@ -12,6 +13,7 @@
 #include <string.h>
 #include "pmus.h"
 #include "strbuf.h"
+#include "trace/beauty/beauty.h"
 
 struct perf_env perf_env;
 
@@ -23,12 +25,18 @@ struct perf_env perf_env;
 void perf_env__insert_bpf_prog_info(struct perf_env *env,
 				    struct bpf_prog_info_node *info_node)
 {
+	down_write(&env->bpf_progs.lock);
+	__perf_env__insert_bpf_prog_info(env, info_node);
+	up_write(&env->bpf_progs.lock);
+}
+
+void __perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node)
+{
 	__u32 prog_id = info_node->info_linear->info.id;
 	struct bpf_prog_info_node *node;
 	struct rb_node *parent = NULL;
 	struct rb_node **p;
 
-	down_write(&env->bpf_progs.lock);
 	p = &env->bpf_progs.infos.rb_node;
 
 	while (*p != NULL) {
@@ -40,15 +48,13 @@ void perf_env__insert_bpf_prog_info(struct perf_env *env,
 			p = &(*p)->rb_right;
 		} else {
 			pr_debug("duplicated bpf prog info %u\n", prog_id);
-			goto out;
+			return;
 		}
 	}
 
 	rb_link_node(&info_node->rb_node, parent, p);
 	rb_insert_color(&info_node->rb_node, &env->bpf_progs.infos);
 	env->bpf_progs.infos_cnt++;
-out:
-	up_write(&env->bpf_progs.lock);
 }
 
 struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
@@ -78,13 +84,21 @@ out:
 
 bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node)
 {
+	bool ret;
+
+	down_write(&env->bpf_progs.lock);
+	ret = __perf_env__insert_btf(env, btf_node);
+	up_write(&env->bpf_progs.lock);
+	return ret;
+}
+
+bool __perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node)
+{
 	struct rb_node *parent = NULL;
 	__u32 btf_id = btf_node->id;
 	struct btf_node *node;
 	struct rb_node **p;
-	bool ret = true;
 
-	down_write(&env->bpf_progs.lock);
 	p = &env->bpf_progs.btfs.rb_node;
 
 	while (*p != NULL) {
@@ -96,25 +110,31 @@ bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node)
 			p = &(*p)->rb_right;
 		} else {
 			pr_debug("duplicated btf %u\n", btf_id);
-			ret = false;
-			goto out;
+			return false;
 		}
 	}
 
 	rb_link_node(&btf_node->rb_node, parent, p);
 	rb_insert_color(&btf_node->rb_node, &env->bpf_progs.btfs);
 	env->bpf_progs.btfs_cnt++;
-out:
-	up_write(&env->bpf_progs.lock);
-	return ret;
+	return true;
 }
 
 struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id)
 {
+	struct btf_node *res;
+
+	down_read(&env->bpf_progs.lock);
+	res = __perf_env__find_btf(env, btf_id);
+	up_read(&env->bpf_progs.lock);
+	return res;
+}
+
+struct btf_node *__perf_env__find_btf(struct perf_env *env, __u32 btf_id)
+{
 	struct btf_node *node = NULL;
 	struct rb_node *n;
 
-	down_read(&env->bpf_progs.lock);
 	n = env->bpf_progs.btfs.rb_node;
 
 	while (n) {
@@ -124,13 +144,9 @@ struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id)
 		else if (btf_id > node->id)
 			n = n->rb_right;
 		else
-			goto out;
+			return node;
 	}
-	node = NULL;
-
-out:
-	up_read(&env->bpf_progs.lock);
-	return node;
+	return NULL;
 }
 
 /* purge data in bpf_progs.infos tree */
@@ -453,6 +469,18 @@ const char *perf_env__arch(struct perf_env *env)
 	return normalize_arch(arch_name);
 }
 
+const char *perf_env__arch_strerrno(struct perf_env *env __maybe_unused, int err __maybe_unused)
+{
+#if defined(HAVE_SYSCALL_TABLE_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
+	if (env->arch_strerrno == NULL)
+		env->arch_strerrno = arch_syscalls__strerrno_function(perf_env__arch(env));
+
+	return env->arch_strerrno ? env->arch_strerrno(err) : "no arch specific strerrno function";
+#else
+	return "!(HAVE_SYSCALL_TABLE_SUPPORT && HAVE_LIBTRACEEVENT)";
+#endif
+}
+
 const char *perf_env__cpuid(struct perf_env *env)
 {
 	int status;
@@ -531,6 +559,24 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu)
 	return cpu.cpu >= 0 && cpu.cpu < env->nr_numa_map ? env->numa_map[cpu.cpu] : -1;
 }
 
+bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name)
+{
+	char *pmu_mapping = env->pmu_mappings, *colon;
+
+	for (int i = 0; i < env->nr_pmu_mappings; ++i) {
+		if (strtoul(pmu_mapping, &colon, 0) == ULONG_MAX || *colon != ':')
+			goto out_error;
+
+		pmu_mapping = colon + 1;
+		if (strcmp(pmu_mapping, pmu_name) == 0)
+			return true;
+
+		pmu_mapping += strlen(pmu_mapping) + 1;
+	}
+out_error:
+	return false;
+}
+
 char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name,
 			     const char *cap)
 {
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 4566c51f2fd9..7c527e65c186 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -46,10 +46,17 @@ struct hybrid_node {
 struct pmu_caps {
 	int		nr_caps;
 	unsigned int    max_branches;
+	unsigned int	br_cntr_nr;
+	unsigned int	br_cntr_width;
+
 	char            **caps;
 	char            *pmu_name;
 };
 
+typedef const char *(arch_syscalls__strerrno_t)(int err);
+
+arch_syscalls__strerrno_t *arch_syscalls__strerrno_function(const char *arch);
+
 struct perf_env {
 	char			*hostname;
 	char			*os_release;
@@ -62,6 +69,8 @@ struct perf_env {
 	unsigned long long	total_mem;
 	unsigned int		msr_pmu_type;
 	unsigned int		max_branches;
+	unsigned int		br_cntr_nr;
+	unsigned int		br_cntr_width;
 	int			kernel_is_64_bit;
 
 	int			nr_cmdline;
@@ -130,6 +139,7 @@ struct perf_env {
 		 */
 		bool	enabled;
 	} clock;
+	arch_syscalls__strerrno_t *arch_strerrno;
 };
 
 enum perf_compress_type {
@@ -159,19 +169,26 @@ int perf_env__read_cpu_topology_map(struct perf_env *env);
 void cpu_cache_level__free(struct cpu_cache_level *cache);
 
 const char *perf_env__arch(struct perf_env *env);
+const char *perf_env__arch_strerrno(struct perf_env *env, int err);
 const char *perf_env__cpuid(struct perf_env *env);
 const char *perf_env__raw_arch(struct perf_env *env);
 int perf_env__nr_cpus_avail(struct perf_env *env);
 
 void perf_env__init(struct perf_env *env);
+void __perf_env__insert_bpf_prog_info(struct perf_env *env,
+				      struct bpf_prog_info_node *info_node);
 void perf_env__insert_bpf_prog_info(struct perf_env *env,
 				    struct bpf_prog_info_node *info_node);
 struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
 							__u32 prog_id);
 bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
+bool __perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
 struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id);
+struct btf_node *__perf_env__find_btf(struct perf_env *env, __u32 btf_id);
 
 int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu);
 char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name,
 			     const char *cap);
+
+bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name);
 #endif /* __PERF_ENV_H */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 923c0fb15122..68f45e9e63b6 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -617,13 +617,13 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
 	if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) {
 		al->level = 'k';
 		maps = machine__kernel_maps(machine);
-		load_map = true;
+		load_map = !symbol_conf.lazy_load_kernel_maps;
 	} else if (cpumode == PERF_RECORD_MISC_USER && perf_host) {
 		al->level = '.';
 	} else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) {
 		al->level = 'g';
 		maps = machine__kernel_maps(machine);
-		load_map = true;
+		load_map = !symbol_conf.lazy_load_kernel_maps;
 	} else if (cpumode == PERF_RECORD_MISC_GUEST_USER && perf_guest) {
 		al->level = 'u';
 	} else {
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index e36da58522ef..95f25e9fb994 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1056,7 +1056,7 @@ int evlist__create_maps(struct evlist *evlist, struct target *target)
 		return -1;
 
 	if (target__uses_dummy_map(target))
-		cpus = perf_cpu_map__dummy_new();
+		cpus = perf_cpu_map__new_any_cpu();
 	else
 		cpus = perf_cpu_map__new(target->cpu_list);
 
@@ -1352,7 +1352,7 @@ static int evlist__create_syswide_maps(struct evlist *evlist)
 	 * error, and we may not want to do that fallback to a
 	 * default cpu identity map :-\
 	 */
-	cpus = perf_cpu_map__new(NULL);
+	cpus = perf_cpu_map__new_online_cpus();
 	if (!cpus)
 		goto out;
 
@@ -2518,3 +2518,33 @@ void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_lis
 	}
 	perf_cpu_map__put(user_requested_cpus);
 }
+
+void evlist__uniquify_name(struct evlist *evlist)
+{
+	char *new_name, empty_attributes[2] = ":", *attributes;
+	struct evsel *pos;
+
+	if (perf_pmus__num_core_pmus() == 1)
+		return;
+
+	evlist__for_each_entry(evlist, pos) {
+		if (!evsel__is_hybrid(pos))
+			continue;
+
+		if (strchr(pos->name, '/'))
+			continue;
+
+		attributes = strchr(pos->name, ':');
+		if (attributes)
+			*attributes = '\0';
+		else
+			attributes = empty_attributes;
+
+		if (asprintf(&new_name, "%s/%s/%s", pos->pmu_name, pos->name, attributes + 1)) {
+			free(pos->name);
+			pos->name = new_name;
+		} else {
+			*attributes = ':';
+		}
+	}
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 98e7ddb2bd30..cb91dc9117a2 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -442,5 +442,6 @@ struct evsel *evlist__find_evsel(struct evlist *evlist, int idx);
 int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf);
 void evlist__check_mem_load_aux(struct evlist *evlist);
 void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list);
+void evlist__uniquify_name(struct evlist *evlist);
 
 #endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 72a5dfc38d38..6d7c9c58a9bc 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1801,7 +1801,7 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
 
 	if (cpus == NULL) {
 		if (empty_cpu_map == NULL) {
-			empty_cpu_map = perf_cpu_map__dummy_new();
+			empty_cpu_map = perf_cpu_map__new_any_cpu();
 			if (empty_cpu_map == NULL)
 				return -ENOMEM;
 		}
@@ -1832,6 +1832,8 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
 
 static void evsel__disable_missing_features(struct evsel *evsel)
 {
+	if (perf_missing_features.branch_counters)
+		evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_COUNTERS;
 	if (perf_missing_features.read_lost)
 		evsel->core.attr.read_format &= ~PERF_FORMAT_LOST;
 	if (perf_missing_features.weight_struct) {
@@ -1885,7 +1887,12 @@ bool evsel__detect_missing_features(struct evsel *evsel)
 	 * Must probe features in the order they were added to the
 	 * perf_event_attr interface.
 	 */
-	if (!perf_missing_features.read_lost &&
+	if (!perf_missing_features.branch_counters &&
+	    (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) {
+		perf_missing_features.branch_counters = true;
+		pr_debug2("switching off branch counters support\n");
+		return true;
+	} else if (!perf_missing_features.read_lost &&
 	    (evsel->core.attr.read_format & PERF_FORMAT_LOST)) {
 		perf_missing_features.read_lost = true;
 		pr_debug2("switching off PERF_FORMAT_LOST support\n");
@@ -2318,6 +2325,22 @@ u64 evsel__bitfield_swap_branch_flags(u64 value)
 	return new_val;
 }
 
+static inline bool evsel__has_branch_counters(const struct evsel *evsel)
+{
+	struct evsel *cur, *leader = evsel__leader(evsel);
+
+	/* The branch counters feature only supports group */
+	if (!leader || !evsel->evlist)
+		return false;
+
+	evlist__for_each_entry(evsel->evlist, cur) {
+		if ((leader == evsel__leader(cur)) &&
+		    (cur->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS))
+			return true;
+	}
+	return false;
+}
+
 int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
 			struct perf_sample *data)
 {
@@ -2551,6 +2574,16 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
 
 		OVERFLOW_CHECK(array, sz, max_size);
 		array = (void *)array + sz;
+
+		if (evsel__has_branch_counters(evsel)) {
+			OVERFLOW_CHECK_u64(array);
+
+			data->branch_stack_cntr = (u64 *)array;
+			sz = data->branch_stack->nr * sizeof(u64);
+
+			OVERFLOW_CHECK(array, sz, max_size);
+			array = (void *)array + sz;
+		}
 	}
 
 	if (type & PERF_SAMPLE_REGS_USER) {
@@ -2820,7 +2853,8 @@ u64 evsel__intval_common(struct evsel *evsel, struct perf_sample *sample, const
 
 #endif
 
-bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize)
+bool evsel__fallback(struct evsel *evsel, struct target *target, int err,
+		     char *msg, size_t msgsize)
 {
 	int paranoid;
 
@@ -2828,18 +2862,19 @@ bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize)
 	    evsel->core.attr.type   == PERF_TYPE_HARDWARE &&
 	    evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES) {
 		/*
-		 * If it's cycles then fall back to hrtimer based
-		 * cpu-clock-tick sw counter, which is always available even if
-		 * no PMU support.
+		 * If it's cycles then fall back to hrtimer based cpu-clock sw
+		 * counter, which is always available even if no PMU support.
 		 *
 		 * PPC returns ENXIO until 2.6.37 (behavior changed with commit
 		 * b0a873e).
 		 */
-		scnprintf(msg, msgsize, "%s",
-"The cycles event is not supported, trying to fall back to cpu-clock-ticks");
-
 		evsel->core.attr.type   = PERF_TYPE_SOFTWARE;
-		evsel->core.attr.config = PERF_COUNT_SW_CPU_CLOCK;
+		evsel->core.attr.config = target__has_cpu(target)
+			? PERF_COUNT_SW_CPU_CLOCK
+			: PERF_COUNT_SW_TASK_CLOCK;
+		scnprintf(msg, msgsize,
+			"The cycles event is not supported, trying to fall back to %s",
+			target__has_cpu(target) ? "cpu-clock" : "task-clock");
 
 		zfree(&evsel->name);
 		return true;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index d791316a1792..efbb6e848287 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -191,6 +191,7 @@ struct perf_missing_features {
 	bool code_page_size;
 	bool weight_struct;
 	bool read_lost;
+	bool branch_counters;
 };
 
 extern struct perf_missing_features perf_missing_features;
@@ -459,7 +460,8 @@ static inline bool evsel__is_clock(const struct evsel *evsel)
 	       evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK);
 }
 
-bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize);
+bool evsel__fallback(struct evsel *evsel, struct target *target, int err,
+		     char *msg, size_t msgsize);
 int evsel__open_strerror(struct evsel *evsel, struct target *target,
 			 int err, char *msg, size_t size);
 
diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
index fefc72066c4e..ac17a3cb59dc 100644
--- a/tools/perf/util/genelf.c
+++ b/tools/perf/util/genelf.c
@@ -293,9 +293,9 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym,
 	 */
 	phdr = elf_newphdr(e, 1);
 	phdr[0].p_type = PT_LOAD;
-	phdr[0].p_offset = 0;
-	phdr[0].p_vaddr = 0;
-	phdr[0].p_paddr = 0;
+	phdr[0].p_offset = GEN_ELF_TEXT_OFFSET;
+	phdr[0].p_vaddr = GEN_ELF_TEXT_OFFSET;
+	phdr[0].p_paddr = GEN_ELF_TEXT_OFFSET;
 	phdr[0].p_filesz = csize;
 	phdr[0].p_memsz = csize;
 	phdr[0].p_flags = PF_X | PF_R;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index e86b9439ffee..3fe28edc3d01 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1444,7 +1444,9 @@ static int build_mem_topology(struct memory_node **nodesp, u64 *cntp)
 			nodes = new_nodes;
 			size += 4;
 		}
-		ret = memory_node__read(&nodes[cnt++], idx);
+		ret = memory_node__read(&nodes[cnt], idx);
+		if (!ret)
+			cnt += 1;
 	}
 out:
 	closedir(dir);
@@ -1847,8 +1849,8 @@ static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp)
 		node = rb_entry(next, struct bpf_prog_info_node, rb_node);
 		next = rb_next(&node->rb_node);
 
-		bpf_event__print_bpf_prog_info(&node->info_linear->info,
-					       env, fp);
+		__bpf_event__print_bpf_prog_info(&node->info_linear->info,
+						 env, fp);
 	}
 
 	up_read(&env->bpf_progs.lock);
@@ -2145,6 +2147,14 @@ static void print_pmu_caps(struct feat_fd *ff, FILE *fp)
 		__print_pmu_caps(fp, pmu_caps->nr_caps, pmu_caps->caps,
 				 pmu_caps->pmu_name);
 	}
+
+	if (strcmp(perf_env__arch(&ff->ph->env), "x86") == 0 &&
+	    perf_env__has_pmu_mapping(&ff->ph->env, "ibs_op")) {
+		char *max_precise = perf_env__find_pmu_cap(&ff->ph->env, "cpu", "max_precise");
+
+		if (max_precise != NULL && atoi(max_precise) == 0)
+			fprintf(fp, "# AMD systems uses ibs_op// PMU for some precise events, e.g.: cycles:p, see the 'perf list' man page for further details.\n");
+	}
 }
 
 static void print_pmu_mappings(struct feat_fd *ff, FILE *fp)
@@ -3178,7 +3188,7 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused)
 		/* after reading from file, translate offset to address */
 		bpil_offs_to_addr(info_linear);
 		info_node->info_linear = info_linear;
-		perf_env__insert_bpf_prog_info(env, info_node);
+		__perf_env__insert_bpf_prog_info(env, info_node);
 	}
 
 	up_write(&env->bpf_progs.lock);
@@ -3225,7 +3235,7 @@ static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused)
 		if (__do_read(ff, node->data, data_size))
 			goto out;
 
-		perf_env__insert_btf(env, node);
+		__perf_env__insert_btf(env, node);
 		node = NULL;
 	}
 
@@ -3259,7 +3269,9 @@ static int process_compressed(struct feat_fd *ff,
 }
 
 static int __process_pmu_caps(struct feat_fd *ff, int *nr_caps,
-			      char ***caps, unsigned int *max_branches)
+			      char ***caps, unsigned int *max_branches,
+			      unsigned int *br_cntr_nr,
+			      unsigned int *br_cntr_width)
 {
 	char *name, *value, *ptr;
 	u32 nr_pmu_caps, i;
@@ -3294,6 +3306,12 @@ static int __process_pmu_caps(struct feat_fd *ff, int *nr_caps,
 		if (!strcmp(name, "branches"))
 			*max_branches = atoi(value);
 
+		if (!strcmp(name, "branch_counter_nr"))
+			*br_cntr_nr = atoi(value);
+
+		if (!strcmp(name, "branch_counter_width"))
+			*br_cntr_width = atoi(value);
+
 		free(value);
 		free(name);
 	}
@@ -3318,7 +3336,9 @@ static int process_cpu_pmu_caps(struct feat_fd *ff,
 {
 	int ret = __process_pmu_caps(ff, &ff->ph->env.nr_cpu_pmu_caps,
 				     &ff->ph->env.cpu_pmu_caps,
-				     &ff->ph->env.max_branches);
+				     &ff->ph->env.max_branches,
+				     &ff->ph->env.br_cntr_nr,
+				     &ff->ph->env.br_cntr_width);
 
 	if (!ret && !ff->ph->env.cpu_pmu_caps)
 		pr_debug("cpu pmu capabilities not available\n");
@@ -3347,7 +3367,9 @@ static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused)
 	for (i = 0; i < nr_pmu; i++) {
 		ret = __process_pmu_caps(ff, &pmu_caps[i].nr_caps,
 					 &pmu_caps[i].caps,
-					 &pmu_caps[i].max_branches);
+					 &pmu_caps[i].max_branches,
+					 &pmu_caps[i].br_cntr_nr,
+					 &pmu_caps[i].br_cntr_width);
 		if (ret)
 			goto err;
 
@@ -4369,9 +4391,10 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp)
 		ret += fprintf(fp, "... ");
 
 		map = cpu_map__new_data(&ev->cpus.cpus);
-		if (map)
+		if (map) {
 			ret += cpu_map__fprintf(map, fp);
-		else
+			perf_cpu_map__put(map);
+		} else
 			ret += fprintf(fp, "failed to get cpus\n");
 		break;
 	default:
diff --git a/tools/perf/util/hisi-ptt.c b/tools/perf/util/hisi-ptt.c
index 43bd1ca62d58..52d0ce302ca0 100644
--- a/tools/perf/util/hisi-ptt.c
+++ b/tools/perf/util/hisi-ptt.c
@@ -123,6 +123,7 @@ static int hisi_ptt_process_auxtrace_event(struct perf_session *session,
 	if (dump_trace)
 		hisi_ptt_dump_event(ptt, data, size);
 
+	free(data);
 	return 0;
 }
 
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index afc9f1c7f4dc..4a0aea0c9e00 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -82,6 +82,9 @@ enum hist_column {
 	HISTC_ADDR_TO,
 	HISTC_ADDR,
 	HISTC_SIMD,
+	HISTC_TYPE,
+	HISTC_TYPE_OFFSET,
+	HISTC_SYMBOL_OFFSET,
 	HISTC_NR_COLS, /* Last entry */
 };
 
@@ -457,7 +460,6 @@ struct hist_browser_timer {
 	int refresh;
 };
 
-struct annotation_options;
 struct res_sample;
 
 enum rstype {
@@ -473,16 +475,13 @@ struct block_hist;
 void attr_to_script(char *buf, struct perf_event_attr *attr);
 
 int map_symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
-			     struct hist_browser_timer *hbt,
-			     struct annotation_options *annotation_opts);
+			     struct hist_browser_timer *hbt);
 
 int hist_entry__tui_annotate(struct hist_entry *he, struct evsel *evsel,
-			     struct hist_browser_timer *hbt,
-			     struct annotation_options *annotation_opts);
+			     struct hist_browser_timer *hbt);
 
 int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt,
-			     float min_pcnt, struct perf_env *env, bool warn_lost_event,
-			     struct annotation_options *annotation_options);
+			     float min_pcnt, struct perf_env *env, bool warn_lost_event);
 
 int script_browse(const char *script_opt, struct evsel *evsel);
 
@@ -492,8 +491,7 @@ int res_sample_browse(struct res_sample *res_samples, int num_res,
 void res_sample_init(void);
 
 int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel,
-			   float min_percent, struct perf_env *env,
-			   struct annotation_options *annotation_opts);
+			   float min_percent, struct perf_env *env);
 #else
 static inline
 int evlist__tui_browse_hists(struct evlist *evlist __maybe_unused,
@@ -501,23 +499,20 @@ int evlist__tui_browse_hists(struct evlist *evlist __maybe_unused,
 			     struct hist_browser_timer *hbt __maybe_unused,
 			     float min_pcnt __maybe_unused,
 			     struct perf_env *env __maybe_unused,
-			     bool warn_lost_event __maybe_unused,
-			     struct annotation_options *annotation_options __maybe_unused)
+			     bool warn_lost_event __maybe_unused)
 {
 	return 0;
 }
 static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused,
 					   struct evsel *evsel __maybe_unused,
-					   struct hist_browser_timer *hbt __maybe_unused,
-					   struct annotation_options *annotation_options __maybe_unused)
+					   struct hist_browser_timer *hbt __maybe_unused)
 {
 	return 0;
 }
 
 static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused,
 					   struct evsel *evsel __maybe_unused,
-					   struct hist_browser_timer *hbt __maybe_unused,
-					   struct annotation_options *annotation_opts __maybe_unused)
+					   struct hist_browser_timer *hbt __maybe_unused)
 {
 	return 0;
 }
@@ -541,8 +536,7 @@ static inline void res_sample_init(void) {}
 static inline int block_hists_tui_browse(struct block_hist *bh __maybe_unused,
 					 struct evsel *evsel __maybe_unused,
 					 float min_percent __maybe_unused,
-					 struct perf_env *env __maybe_unused,
-					 struct annotation_options *annotation_opts __maybe_unused)
+					 struct perf_env *env __maybe_unused)
 {
 	return 0;
 }
diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h
index 7d99a084e82d..01fb25a1150a 100644
--- a/tools/perf/util/include/dwarf-regs.h
+++ b/tools/perf/util/include/dwarf-regs.h
@@ -2,6 +2,9 @@
 #ifndef _PERF_DWARF_REGS_H_
 #define _PERF_DWARF_REGS_H_
 
+#define DWARF_REG_PC  0xd3af9c /* random number */
+#define DWARF_REG_FB  0xd3affb /* random number */
+
 #ifdef HAVE_DWARF_SUPPORT
 const char *get_arch_regstr(unsigned int n);
 /*
@@ -10,6 +13,22 @@ const char *get_arch_regstr(unsigned int n);
  * machine: ELF machine signature (EM_*)
  */
 const char *get_dwarf_regstr(unsigned int n, unsigned int machine);
+
+int get_arch_regnum(const char *name);
+/*
+ * get_dwarf_regnum - Returns DWARF regnum from register name
+ * name: architecture register name
+ * machine: ELF machine signature (EM_*)
+ */
+int get_dwarf_regnum(const char *name, unsigned int machine);
+
+#else /* HAVE_DWARF_SUPPORT */
+
+static inline int get_dwarf_regnum(const char *name __maybe_unused,
+				   unsigned int machine __maybe_unused)
+{
+	return -1;
+}
 #endif
 
 #ifdef HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 90c750150b19..b397a769006f 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -453,7 +453,7 @@ static struct thread *findnew_guest_code(struct machine *machine,
 	 * Guest code can be found in hypervisor process at the same address
 	 * so copy host maps.
 	 */
-	err = maps__clone(thread, thread__maps(host_thread));
+	err = maps__copy_from(thread__maps(thread), thread__maps(host_thread));
 	thread__put(host_thread);
 	if (err)
 		goto out_err;
@@ -1285,33 +1285,46 @@ static u64 find_entry_trampoline(struct dso *dso)
 #define X86_64_CPU_ENTRY_AREA_SIZE	0x2c000
 #define X86_64_ENTRY_TRAMPOLINE		0x6000
 
+struct machine__map_x86_64_entry_trampolines_args {
+	struct maps *kmaps;
+	bool found;
+};
+
+static int machine__map_x86_64_entry_trampolines_cb(struct map *map, void *data)
+{
+	struct machine__map_x86_64_entry_trampolines_args *args = data;
+	struct map *dest_map;
+	struct kmap *kmap = __map__kmap(map);
+
+	if (!kmap || !is_entry_trampoline(kmap->name))
+		return 0;
+
+	dest_map = maps__find(args->kmaps, map__pgoff(map));
+	if (dest_map != map)
+		map__set_pgoff(map, map__map_ip(dest_map, map__pgoff(map)));
+
+	args->found = true;
+	return 0;
+}
+
 /* Map x86_64 PTI entry trampolines */
 int machine__map_x86_64_entry_trampolines(struct machine *machine,
 					  struct dso *kernel)
 {
-	struct maps *kmaps = machine__kernel_maps(machine);
+	struct machine__map_x86_64_entry_trampolines_args args = {
+		.kmaps = machine__kernel_maps(machine),
+		.found = false,
+	};
 	int nr_cpus_avail, cpu;
-	bool found = false;
-	struct map_rb_node *rb_node;
 	u64 pgoff;
 
 	/*
 	 * In the vmlinux case, pgoff is a virtual address which must now be
 	 * mapped to a vmlinux offset.
 	 */
-	maps__for_each_entry(kmaps, rb_node) {
-		struct map *dest_map, *map = rb_node->map;
-		struct kmap *kmap = __map__kmap(map);
-
-		if (!kmap || !is_entry_trampoline(kmap->name))
-			continue;
+	maps__for_each_map(args.kmaps, machine__map_x86_64_entry_trampolines_cb, &args);
 
-		dest_map = maps__find(kmaps, map__pgoff(map));
-		if (dest_map != map)
-			map__set_pgoff(map, map__map_ip(dest_map, map__pgoff(map)));
-		found = true;
-	}
-	if (found || machine->trampolines_mapped)
+	if (args.found || machine->trampolines_mapped)
 		return 0;
 
 	pgoff = find_entry_trampoline(kernel);
@@ -1359,8 +1372,7 @@ __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
 	if (machine->vmlinux_map == NULL)
 		return -ENOMEM;
 
-	map__set_map_ip(machine->vmlinux_map, identity__map_ip);
-	map__set_unmap_ip(machine->vmlinux_map, identity__map_ip);
+	map__set_mapping_type(machine->vmlinux_map, MAPPING_TYPE__IDENTITY);
 	return maps__insert(machine__kernel_maps(machine), machine->vmlinux_map);
 }
 
@@ -1750,12 +1762,11 @@ int machine__create_kernel_maps(struct machine *machine)
 
 	if (end == ~0ULL) {
 		/* update end address of the kernel map using adjacent module address */
-		struct map_rb_node *rb_node = maps__find_node(machine__kernel_maps(machine),
-							machine__kernel_map(machine));
-		struct map_rb_node *next = map_rb_node__next(rb_node);
+		struct map *next = maps__find_next_entry(machine__kernel_maps(machine),
+							 machine__kernel_map(machine));
 
 		if (next)
-			machine__set_kernel_mmap(machine, start, map__start(next->map));
+			machine__set_kernel_mmap(machine, start, map__start(next));
 	}
 
 out_put:
@@ -2157,9 +2168,13 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event
 	if (dump_trace)
 		perf_event__fprintf_task(event, stdout);
 
-	if (thread != NULL)
-		thread__put(thread);
-
+	if (thread != NULL) {
+		if (symbol_conf.keep_exited_threads)
+			thread__set_exited(thread, /*exited=*/true);
+		else
+			machine__remove_thread(machine, thread);
+	}
+	thread__put(thread);
 	return 0;
 }
 
@@ -3395,16 +3410,8 @@ int machine__for_each_dso(struct machine *machine, machine__dso_t fn, void *priv
 int machine__for_each_kernel_map(struct machine *machine, machine__map_t fn, void *priv)
 {
 	struct maps *maps = machine__kernel_maps(machine);
-	struct map_rb_node *pos;
-	int err = 0;
 
-	maps__for_each_entry(maps, pos) {
-		err = fn(pos->map, priv);
-		if (err != 0) {
-			break;
-		}
-	}
-	return err;
+	return maps__for_each_map(maps, fn, priv);
 }
 
 bool machine__is_lock_function(struct machine *machine, u64 addr)
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index f64b83004421..54c67cb7ecef 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -109,8 +109,7 @@ void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso)
 	map__set_pgoff(map, pgoff);
 	map__set_reloc(map, 0);
 	map__set_dso(map, dso__get(dso));
-	map__set_map_ip(map, map__dso_map_ip);
-	map__set_unmap_ip(map, map__dso_unmap_ip);
+	map__set_mapping_type(map, MAPPING_TYPE__DSO);
 	map__set_erange_warned(map, false);
 	refcount_set(map__refcnt(map), 1);
 }
@@ -172,7 +171,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
 		map__init(result, start, start + len, pgoff, dso);
 
 		if (anon || no_dso) {
-			map->map_ip = map->unmap_ip = identity__map_ip;
+			map->mapping_type = MAPPING_TYPE__IDENTITY;
 
 			/*
 			 * Set memory without DSO as loaded. All map__find_*
@@ -630,18 +629,3 @@ struct maps *map__kmaps(struct map *map)
 	}
 	return kmap->kmaps;
 }
-
-u64 map__dso_map_ip(const struct map *map, u64 ip)
-{
-	return ip - map__start(map) + map__pgoff(map);
-}
-
-u64 map__dso_unmap_ip(const struct map *map, u64 ip)
-{
-	return ip + map__start(map) - map__pgoff(map);
-}
-
-u64 identity__map_ip(const struct map *map __maybe_unused, u64 ip)
-{
-	return ip;
-}
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 1b53d53adc86..49756716cb13 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -16,23 +16,25 @@ struct dso;
 struct maps;
 struct machine;
 
+enum mapping_type {
+	/* map__map_ip/map__unmap_ip are given as offsets in the DSO. */
+	MAPPING_TYPE__DSO,
+	/* map__map_ip/map__unmap_ip are just the given ip value. */
+	MAPPING_TYPE__IDENTITY,
+};
+
 DECLARE_RC_STRUCT(map) {
 	u64			start;
 	u64			end;
-	bool			erange_warned:1;
-	bool			priv:1;
-	u32			prot;
 	u64			pgoff;
 	u64			reloc;
-
-	/* ip -> dso rip */
-	u64			(*map_ip)(const struct map *, u64);
-	/* dso rip -> ip */
-	u64			(*unmap_ip)(const struct map *, u64);
-
 	struct dso		*dso;
 	refcount_t		refcnt;
+	u32			prot;
 	u32			flags;
+	enum mapping_type	mapping_type:8;
+	bool			erange_warned;
+	bool			priv;
 };
 
 struct kmap;
@@ -41,38 +43,11 @@ struct kmap *__map__kmap(struct map *map);
 struct kmap *map__kmap(struct map *map);
 struct maps *map__kmaps(struct map *map);
 
-/* ip -> dso rip */
-u64 map__dso_map_ip(const struct map *map, u64 ip);
-/* dso rip -> ip */
-u64 map__dso_unmap_ip(const struct map *map, u64 ip);
-/* Returns ip */
-u64 identity__map_ip(const struct map *map __maybe_unused, u64 ip);
-
 static inline struct dso *map__dso(const struct map *map)
 {
 	return RC_CHK_ACCESS(map)->dso;
 }
 
-static inline u64 map__map_ip(const struct map *map, u64 ip)
-{
-	return RC_CHK_ACCESS(map)->map_ip(map, ip);
-}
-
-static inline u64 map__unmap_ip(const struct map *map, u64 ip)
-{
-	return RC_CHK_ACCESS(map)->unmap_ip(map, ip);
-}
-
-static inline void *map__map_ip_ptr(struct map *map)
-{
-	return RC_CHK_ACCESS(map)->map_ip;
-}
-
-static inline void* map__unmap_ip_ptr(struct map *map)
-{
-	return RC_CHK_ACCESS(map)->unmap_ip;
-}
-
 static inline u64 map__start(const struct map *map)
 {
 	return RC_CHK_ACCESS(map)->start;
@@ -123,6 +98,34 @@ static inline size_t map__size(const struct map *map)
 	return map__end(map) - map__start(map);
 }
 
+/* ip -> dso rip */
+static inline u64 map__dso_map_ip(const struct map *map, u64 ip)
+{
+	return ip - map__start(map) + map__pgoff(map);
+}
+
+/* dso rip -> ip */
+static inline u64 map__dso_unmap_ip(const struct map *map, u64 rip)
+{
+	return rip + map__start(map) - map__pgoff(map);
+}
+
+static inline u64 map__map_ip(const struct map *map, u64 ip_or_rip)
+{
+	if ((RC_CHK_ACCESS(map)->mapping_type) == MAPPING_TYPE__DSO)
+		return map__dso_map_ip(map, ip_or_rip);
+	else
+		return ip_or_rip;
+}
+
+static inline u64 map__unmap_ip(const struct map *map, u64 ip_or_rip)
+{
+	if ((RC_CHK_ACCESS(map)->mapping_type) == MAPPING_TYPE__DSO)
+		return map__dso_unmap_ip(map, ip_or_rip);
+	else
+		return ip_or_rip;
+}
+
 /* rip/ip <-> addr suitable for passing to `objdump --start-address=` */
 u64 map__rip_2objdump(struct map *map, u64 rip);
 
@@ -294,13 +297,13 @@ static inline void map__set_dso(struct map *map, struct dso *dso)
 	RC_CHK_ACCESS(map)->dso = dso;
 }
 
-static inline void map__set_map_ip(struct map *map, u64 (*map_ip)(const struct map *map, u64 ip))
+static inline void map__set_mapping_type(struct map *map, enum mapping_type type)
 {
-	RC_CHK_ACCESS(map)->map_ip = map_ip;
+	RC_CHK_ACCESS(map)->mapping_type = type;
 }
 
-static inline void map__set_unmap_ip(struct map *map, u64 (*unmap_ip)(const struct map *map, u64 rip))
+static inline enum mapping_type map__mapping_type(struct map *map)
 {
-	RC_CHK_ACCESS(map)->unmap_ip = unmap_ip;
+	return RC_CHK_ACCESS(map)->mapping_type;
 }
 #endif /* __PERF_MAP_H */
diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c
index 233438c95b53..0334fc18d9c6 100644
--- a/tools/perf/util/maps.c
+++ b/tools/perf/util/maps.c
@@ -10,6 +10,68 @@
 #include "ui/ui.h"
 #include "unwind.h"
 
+struct map_rb_node {
+	struct rb_node rb_node;
+	struct map *map;
+};
+
+#define maps__for_each_entry(maps, map) \
+	for (map = maps__first(maps); map; map = map_rb_node__next(map))
+
+#define maps__for_each_entry_safe(maps, map, next) \
+	for (map = maps__first(maps), next = map_rb_node__next(map); map; \
+	     map = next, next = map_rb_node__next(map))
+
+static struct rb_root *maps__entries(struct maps *maps)
+{
+	return &RC_CHK_ACCESS(maps)->entries;
+}
+
+static struct rw_semaphore *maps__lock(struct maps *maps)
+{
+	return &RC_CHK_ACCESS(maps)->lock;
+}
+
+static struct map **maps__maps_by_name(struct maps *maps)
+{
+	return RC_CHK_ACCESS(maps)->maps_by_name;
+}
+
+static struct map_rb_node *maps__first(struct maps *maps)
+{
+	struct rb_node *first = rb_first(maps__entries(maps));
+
+	if (first)
+		return rb_entry(first, struct map_rb_node, rb_node);
+	return NULL;
+}
+
+static struct map_rb_node *map_rb_node__next(struct map_rb_node *node)
+{
+	struct rb_node *next;
+
+	if (!node)
+		return NULL;
+
+	next = rb_next(&node->rb_node);
+
+	if (!next)
+		return NULL;
+
+	return rb_entry(next, struct map_rb_node, rb_node);
+}
+
+static struct map_rb_node *maps__find_node(struct maps *maps, struct map *map)
+{
+	struct map_rb_node *rb_node;
+
+	maps__for_each_entry(maps, rb_node) {
+		if (rb_node->RC_CHK_ACCESS(map) == RC_CHK_ACCESS(map))
+			return rb_node;
+	}
+	return NULL;
+}
+
 static void maps__init(struct maps *maps, struct machine *machine)
 {
 	refcount_set(maps__refcnt(maps), 1);
@@ -196,6 +258,41 @@ void maps__put(struct maps *maps)
 		RC_CHK_PUT(maps);
 }
 
+int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data), void *data)
+{
+	struct map_rb_node *pos;
+	int ret = 0;
+
+	down_read(maps__lock(maps));
+	maps__for_each_entry(maps, pos)	{
+		ret = cb(pos->map, data);
+		if (ret)
+			break;
+	}
+	up_read(maps__lock(maps));
+	return ret;
+}
+
+void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data)
+{
+	struct map_rb_node *pos, *next;
+	unsigned int start_nr_maps;
+
+	down_write(maps__lock(maps));
+
+	start_nr_maps = maps__nr_maps(maps);
+	maps__for_each_entry_safe(maps, pos, next)	{
+		if (cb(pos->map, data)) {
+			__maps__remove(maps, pos);
+			--RC_CHK_ACCESS(maps)->nr_maps;
+		}
+	}
+	if (maps__maps_by_name(maps) && start_nr_maps != maps__nr_maps(maps))
+		__maps__free_maps_by_name(maps);
+
+	up_write(maps__lock(maps));
+}
+
 struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp)
 {
 	struct map *map = maps__find(maps, addr);
@@ -210,31 +307,40 @@ struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp)
 	return NULL;
 }
 
-struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp)
-{
+struct maps__find_symbol_by_name_args {
+	struct map **mapp;
+	const char *name;
 	struct symbol *sym;
-	struct map_rb_node *pos;
+};
 
-	down_read(maps__lock(maps));
+static int maps__find_symbol_by_name_cb(struct map *map, void *data)
+{
+	struct maps__find_symbol_by_name_args *args = data;
 
-	maps__for_each_entry(maps, pos) {
-		sym = map__find_symbol_by_name(pos->map, name);
+	args->sym = map__find_symbol_by_name(map, args->name);
+	if (!args->sym)
+		return 0;
 
-		if (sym == NULL)
-			continue;
-		if (!map__contains_symbol(pos->map, sym)) {
-			sym = NULL;
-			continue;
-		}
-		if (mapp != NULL)
-			*mapp = pos->map;
-		goto out;
+	if (!map__contains_symbol(map, args->sym)) {
+		args->sym = NULL;
+		return 0;
 	}
 
-	sym = NULL;
-out:
-	up_read(maps__lock(maps));
-	return sym;
+	if (args->mapp != NULL)
+		*args->mapp = map__get(map);
+	return 1;
+}
+
+struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp)
+{
+	struct maps__find_symbol_by_name_args args = {
+		.mapp = mapp,
+		.name = name,
+		.sym = NULL,
+	};
+
+	maps__for_each_map(maps, maps__find_symbol_by_name_cb, &args);
+	return args.sym;
 }
 
 int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams)
@@ -253,41 +359,46 @@ int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams)
 	return ams->ms.sym ? 0 : -1;
 }
 
-size_t maps__fprintf(struct maps *maps, FILE *fp)
-{
-	size_t printed = 0;
-	struct map_rb_node *pos;
+struct maps__fprintf_args {
+	FILE *fp;
+	size_t printed;
+};
 
-	down_read(maps__lock(maps));
+static int maps__fprintf_cb(struct map *map, void *data)
+{
+	struct maps__fprintf_args *args = data;
 
-	maps__for_each_entry(maps, pos) {
-		printed += fprintf(fp, "Map:");
-		printed += map__fprintf(pos->map, fp);
-		if (verbose > 2) {
-			printed += dso__fprintf(map__dso(pos->map), fp);
-			printed += fprintf(fp, "--\n");
-		}
+	args->printed += fprintf(args->fp, "Map:");
+	args->printed += map__fprintf(map, args->fp);
+	if (verbose > 2) {
+		args->printed += dso__fprintf(map__dso(map), args->fp);
+		args->printed += fprintf(args->fp, "--\n");
 	}
+	return 0;
+}
 
-	up_read(maps__lock(maps));
+size_t maps__fprintf(struct maps *maps, FILE *fp)
+{
+	struct maps__fprintf_args args = {
+		.fp = fp,
+		.printed = 0,
+	};
+
+	maps__for_each_map(maps, maps__fprintf_cb, &args);
 
-	return printed;
+	return args.printed;
 }
 
-int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp)
+/*
+ * Find first map where end > map->start.
+ * Same as find_vma() in kernel.
+ */
+static struct rb_node *first_ending_after(struct maps *maps, const struct map *map)
 {
 	struct rb_root *root;
 	struct rb_node *next, *first;
-	int err = 0;
-
-	down_write(maps__lock(maps));
 
 	root = maps__entries(maps);
-
-	/*
-	 * Find first map where end > map->start.
-	 * Same as find_vma() in kernel.
-	 */
 	next = root->rb_node;
 	first = NULL;
 	while (next) {
@@ -301,8 +412,23 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp)
 		} else
 			next = next->rb_right;
 	}
+	return first;
+}
 
-	next = first;
+/*
+ * Adds new to maps, if new overlaps existing entries then the existing maps are
+ * adjusted or removed so that new fits without overlapping any entries.
+ */
+int maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
+{
+
+	struct rb_node *next;
+	int err = 0;
+	FILE *fp = debug_file();
+
+	down_write(maps__lock(maps));
+
+	next = first_ending_after(maps, new);
 	while (next && !err) {
 		struct map_rb_node *pos = rb_entry(next, struct map_rb_node, rb_node);
 		next = rb_next(&pos->rb_node);
@@ -311,27 +437,27 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp)
 		 * Stop if current map starts after map->end.
 		 * Maps are ordered by start: next will not overlap for sure.
 		 */
-		if (map__start(pos->map) >= map__end(map))
+		if (map__start(pos->map) >= map__end(new))
 			break;
 
 		if (verbose >= 2) {
 
 			if (use_browser) {
 				pr_debug("overlapping maps in %s (disable tui for more info)\n",
-					 map__dso(map)->name);
+					 map__dso(new)->name);
 			} else {
-				fputs("overlapping maps:\n", fp);
-				map__fprintf(map, fp);
+				pr_debug("overlapping maps:\n");
+				map__fprintf(new, fp);
 				map__fprintf(pos->map, fp);
 			}
 		}
 
-		rb_erase_init(&pos->rb_node, root);
+		rb_erase_init(&pos->rb_node, maps__entries(maps));
 		/*
 		 * Now check if we need to create new maps for areas not
 		 * overlapped by the new map:
 		 */
-		if (map__start(map) > map__start(pos->map)) {
+		if (map__start(new) > map__start(pos->map)) {
 			struct map *before = map__clone(pos->map);
 
 			if (before == NULL) {
@@ -339,7 +465,7 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp)
 				goto put_map;
 			}
 
-			map__set_end(before, map__start(map));
+			map__set_end(before, map__start(new));
 			err = __maps__insert(maps, before);
 			if (err) {
 				map__put(before);
@@ -351,7 +477,7 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp)
 			map__put(before);
 		}
 
-		if (map__end(map) < map__end(pos->map)) {
+		if (map__end(new) < map__end(pos->map)) {
 			struct map *after = map__clone(pos->map);
 
 			if (after == NULL) {
@@ -359,10 +485,10 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp)
 				goto put_map;
 			}
 
-			map__set_start(after, map__end(map));
-			map__add_pgoff(after, map__end(map) - map__start(pos->map));
-			assert(map__map_ip(pos->map, map__end(map)) ==
-				map__map_ip(after, map__end(map)));
+			map__set_start(after, map__end(new));
+			map__add_pgoff(after, map__end(new) - map__start(pos->map));
+			assert(map__map_ip(pos->map, map__end(new)) ==
+				map__map_ip(after, map__end(new)));
 			err = __maps__insert(maps, after);
 			if (err) {
 				map__put(after);
@@ -376,16 +502,14 @@ put_map:
 		map__put(pos->map);
 		free(pos);
 	}
+	/* Add the map. */
+	err = __maps__insert(maps, new);
 	up_write(maps__lock(maps));
 	return err;
 }
 
-/*
- * XXX This should not really _copy_ te maps, but refcount them.
- */
-int maps__clone(struct thread *thread, struct maps *parent)
+int maps__copy_from(struct maps *maps, struct maps *parent)
 {
-	struct maps *maps = thread__maps(thread);
 	int err;
 	struct map_rb_node *rb_node;
 
@@ -416,17 +540,6 @@ out_unlock:
 	return err;
 }
 
-struct map_rb_node *maps__find_node(struct maps *maps, struct map *map)
-{
-	struct map_rb_node *rb_node;
-
-	maps__for_each_entry(maps, rb_node) {
-		if (rb_node->RC_CHK_ACCESS(map) == RC_CHK_ACCESS(map))
-			return rb_node;
-	}
-	return NULL;
-}
-
 struct map *maps__find(struct maps *maps, u64 ip)
 {
 	struct rb_node *p;
@@ -452,26 +565,275 @@ out:
 	return m ? m->map : NULL;
 }
 
-struct map_rb_node *maps__first(struct maps *maps)
+static int map__strcmp(const void *a, const void *b)
 {
-	struct rb_node *first = rb_first(maps__entries(maps));
+	const struct map *map_a = *(const struct map **)a;
+	const struct map *map_b = *(const struct map **)b;
+	const struct dso *dso_a = map__dso(map_a);
+	const struct dso *dso_b = map__dso(map_b);
+	int ret = strcmp(dso_a->short_name, dso_b->short_name);
 
-	if (first)
-		return rb_entry(first, struct map_rb_node, rb_node);
-	return NULL;
+	if (ret == 0 && map_a != map_b) {
+		/*
+		 * Ensure distinct but name equal maps have an order in part to
+		 * aid reference counting.
+		 */
+		ret = (int)map__start(map_a) - (int)map__start(map_b);
+		if (ret == 0)
+			ret = (int)((intptr_t)map_a - (intptr_t)map_b);
+	}
+
+	return ret;
 }
 
-struct map_rb_node *map_rb_node__next(struct map_rb_node *node)
+static int map__strcmp_name(const void *name, const void *b)
 {
-	struct rb_node *next;
+	const struct dso *dso = map__dso(*(const struct map **)b);
 
-	if (!node)
-		return NULL;
+	return strcmp(name, dso->short_name);
+}
 
-	next = rb_next(&node->rb_node);
+void __maps__sort_by_name(struct maps *maps)
+{
+	qsort(maps__maps_by_name(maps), maps__nr_maps(maps), sizeof(struct map *), map__strcmp);
+}
 
-	if (!next)
+static int map__groups__sort_by_name_from_rbtree(struct maps *maps)
+{
+	struct map_rb_node *rb_node;
+	struct map **maps_by_name = realloc(maps__maps_by_name(maps),
+					    maps__nr_maps(maps) * sizeof(struct map *));
+	int i = 0;
+
+	if (maps_by_name == NULL)
+		return -1;
+
+	up_read(maps__lock(maps));
+	down_write(maps__lock(maps));
+
+	RC_CHK_ACCESS(maps)->maps_by_name = maps_by_name;
+	RC_CHK_ACCESS(maps)->nr_maps_allocated = maps__nr_maps(maps);
+
+	maps__for_each_entry(maps, rb_node)
+		maps_by_name[i++] = map__get(rb_node->map);
+
+	__maps__sort_by_name(maps);
+
+	up_write(maps__lock(maps));
+	down_read(maps__lock(maps));
+
+	return 0;
+}
+
+static struct map *__maps__find_by_name(struct maps *maps, const char *name)
+{
+	struct map **mapp;
+
+	if (maps__maps_by_name(maps) == NULL &&
+	    map__groups__sort_by_name_from_rbtree(maps))
 		return NULL;
 
-	return rb_entry(next, struct map_rb_node, rb_node);
+	mapp = bsearch(name, maps__maps_by_name(maps), maps__nr_maps(maps),
+		       sizeof(*mapp), map__strcmp_name);
+	if (mapp)
+		return *mapp;
+	return NULL;
+}
+
+struct map *maps__find_by_name(struct maps *maps, const char *name)
+{
+	struct map_rb_node *rb_node;
+	struct map *map;
+
+	down_read(maps__lock(maps));
+
+
+	if (RC_CHK_ACCESS(maps)->last_search_by_name) {
+		const struct dso *dso = map__dso(RC_CHK_ACCESS(maps)->last_search_by_name);
+
+		if (strcmp(dso->short_name, name) == 0) {
+			map = RC_CHK_ACCESS(maps)->last_search_by_name;
+			goto out_unlock;
+		}
+	}
+	/*
+	 * If we have maps->maps_by_name, then the name isn't in the rbtree,
+	 * as maps->maps_by_name mirrors the rbtree when lookups by name are
+	 * made.
+	 */
+	map = __maps__find_by_name(maps, name);
+	if (map || maps__maps_by_name(maps) != NULL)
+		goto out_unlock;
+
+	/* Fallback to traversing the rbtree... */
+	maps__for_each_entry(maps, rb_node) {
+		struct dso *dso;
+
+		map = rb_node->map;
+		dso = map__dso(map);
+		if (strcmp(dso->short_name, name) == 0) {
+			RC_CHK_ACCESS(maps)->last_search_by_name = map;
+			goto out_unlock;
+		}
+	}
+	map = NULL;
+
+out_unlock:
+	up_read(maps__lock(maps));
+	return map;
+}
+
+struct map *maps__find_next_entry(struct maps *maps, struct map *map)
+{
+	struct map_rb_node *rb_node = maps__find_node(maps, map);
+	struct map_rb_node *next = map_rb_node__next(rb_node);
+
+	if (next)
+		return next->map;
+
+	return NULL;
+}
+
+void maps__fixup_end(struct maps *maps)
+{
+	struct map_rb_node *prev = NULL, *curr;
+
+	down_write(maps__lock(maps));
+
+	maps__for_each_entry(maps, curr) {
+		if (prev && (!map__end(prev->map) || map__end(prev->map) > map__start(curr->map)))
+			map__set_end(prev->map, map__start(curr->map));
+
+		prev = curr;
+	}
+
+	/*
+	 * We still haven't the actual symbols, so guess the
+	 * last map final address.
+	 */
+	if (curr && !map__end(curr->map))
+		map__set_end(curr->map, ~0ULL);
+
+	up_write(maps__lock(maps));
+}
+
+/*
+ * Merges map into maps by splitting the new map within the existing map
+ * regions.
+ */
+int maps__merge_in(struct maps *kmaps, struct map *new_map)
+{
+	struct map_rb_node *rb_node;
+	struct rb_node *first;
+	bool overlaps;
+	LIST_HEAD(merged);
+	int err = 0;
+
+	down_read(maps__lock(kmaps));
+	first = first_ending_after(kmaps, new_map);
+	rb_node = first ? rb_entry(first, struct map_rb_node, rb_node) : NULL;
+	overlaps = rb_node && map__start(rb_node->map) < map__end(new_map);
+	up_read(maps__lock(kmaps));
+
+	if (!overlaps)
+		return maps__insert(kmaps, new_map);
+
+	maps__for_each_entry(kmaps, rb_node) {
+		struct map *old_map = rb_node->map;
+
+		/* no overload with this one */
+		if (map__end(new_map) < map__start(old_map) ||
+		    map__start(new_map) >= map__end(old_map))
+			continue;
+
+		if (map__start(new_map) < map__start(old_map)) {
+			/*
+			 * |new......
+			 *       |old....
+			 */
+			if (map__end(new_map) < map__end(old_map)) {
+				/*
+				 * |new......|     -> |new..|
+				 *       |old....| ->       |old....|
+				 */
+				map__set_end(new_map, map__start(old_map));
+			} else {
+				/*
+				 * |new.............| -> |new..|       |new..|
+				 *       |old....|    ->       |old....|
+				 */
+				struct map_list_node *m = map_list_node__new();
+
+				if (!m) {
+					err = -ENOMEM;
+					goto out;
+				}
+
+				m->map = map__clone(new_map);
+				if (!m->map) {
+					free(m);
+					err = -ENOMEM;
+					goto out;
+				}
+
+				map__set_end(m->map, map__start(old_map));
+				list_add_tail(&m->node, &merged);
+				map__add_pgoff(new_map, map__end(old_map) - map__start(new_map));
+				map__set_start(new_map, map__end(old_map));
+			}
+		} else {
+			/*
+			 *      |new......
+			 * |old....
+			 */
+			if (map__end(new_map) < map__end(old_map)) {
+				/*
+				 *      |new..|   -> x
+				 * |old.........| -> |old.........|
+				 */
+				map__put(new_map);
+				new_map = NULL;
+				break;
+			} else {
+				/*
+				 *      |new......| ->         |new...|
+				 * |old....|        -> |old....|
+				 */
+				map__add_pgoff(new_map, map__end(old_map) - map__start(new_map));
+				map__set_start(new_map, map__end(old_map));
+			}
+		}
+	}
+
+out:
+	while (!list_empty(&merged)) {
+		struct map_list_node *old_node;
+
+		old_node = list_entry(merged.next, struct map_list_node, node);
+		list_del_init(&old_node->node);
+		if (!err)
+			err = maps__insert(kmaps, old_node->map);
+		map__put(old_node->map);
+		free(old_node);
+	}
+
+	if (new_map) {
+		if (!err)
+			err = maps__insert(kmaps, new_map);
+		map__put(new_map);
+	}
+	return err;
+}
+
+void maps__load_first(struct maps *maps)
+{
+	struct map_rb_node *first;
+
+	down_read(maps__lock(maps));
+
+	first = maps__first(maps);
+	if (first)
+		map__load(first->map);
+
+	up_read(maps__lock(maps));
 }
diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h
index 83144e0645ed..d836d04c9402 100644
--- a/tools/perf/util/maps.h
+++ b/tools/perf/util/maps.h
@@ -14,24 +14,18 @@ struct ref_reloc_sym;
 struct machine;
 struct map;
 struct maps;
-struct thread;
 
-struct map_rb_node {
-	struct rb_node rb_node;
+struct map_list_node {
+	struct list_head node;
 	struct map *map;
 };
 
-struct map_rb_node *maps__first(struct maps *maps);
-struct map_rb_node *map_rb_node__next(struct map_rb_node *node);
-struct map_rb_node *maps__find_node(struct maps *maps, struct map *map);
-struct map *maps__find(struct maps *maps, u64 addr);
-
-#define maps__for_each_entry(maps, map) \
-	for (map = maps__first(maps); map; map = map_rb_node__next(map))
+static inline struct map_list_node *map_list_node__new(void)
+{
+	return malloc(sizeof(struct map_list_node));
+}
 
-#define maps__for_each_entry_safe(maps, map, next) \
-	for (map = maps__first(maps), next = map_rb_node__next(map); map; \
-	     map = next, next = map_rb_node__next(map))
+struct map *maps__find(struct maps *maps, u64 addr);
 
 DECLARE_RC_STRUCT(maps) {
 	struct rb_root      entries;
@@ -58,7 +52,7 @@ struct kmap {
 
 struct maps *maps__new(struct machine *machine);
 bool maps__empty(struct maps *maps);
-int maps__clone(struct thread *thread, struct maps *parent);
+int maps__copy_from(struct maps *maps, struct maps *parent);
 
 struct maps *maps__get(struct maps *maps);
 void maps__put(struct maps *maps);
@@ -71,26 +65,16 @@ static inline void __maps__zput(struct maps **map)
 
 #define maps__zput(map) __maps__zput(&map)
 
-static inline struct rb_root *maps__entries(struct maps *maps)
-{
-	return &RC_CHK_ACCESS(maps)->entries;
-}
+/* Iterate over map calling cb for each entry. */
+int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data), void *data);
+/* Iterate over map removing an entry if cb returns true. */
+void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data);
 
 static inline struct machine *maps__machine(struct maps *maps)
 {
 	return RC_CHK_ACCESS(maps)->machine;
 }
 
-static inline struct rw_semaphore *maps__lock(struct maps *maps)
-{
-	return &RC_CHK_ACCESS(maps)->lock;
-}
-
-static inline struct map **maps__maps_by_name(struct maps *maps)
-{
-	return RC_CHK_ACCESS(maps)->maps_by_name;
-}
-
 static inline unsigned int maps__nr_maps(const struct maps *maps)
 {
 	return RC_CHK_ACCESS(maps)->nr_maps;
@@ -125,12 +109,18 @@ struct addr_map_symbol;
 
 int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams);
 
-int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp);
+int maps__fixup_overlap_and_insert(struct maps *maps, struct map *new);
 
 struct map *maps__find_by_name(struct maps *maps, const char *name);
 
+struct map *maps__find_next_entry(struct maps *maps, struct map *map);
+
 int maps__merge_in(struct maps *kmaps, struct map *new_map);
 
 void __maps__sort_by_name(struct maps *maps);
 
+void maps__fixup_end(struct maps *maps);
+
+void maps__load_first(struct maps *maps);
+
 #endif // __PERF_MAPS_H
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 954b235e12e5..3a2e3687878c 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -100,11 +100,14 @@ int perf_mem_events__parse(const char *str)
 	return -1;
 }
 
-static bool perf_mem_event__supported(const char *mnt, char *sysfs_name)
+static bool perf_mem_event__supported(const char *mnt, struct perf_pmu *pmu,
+				      struct perf_mem_event *e)
 {
+	char sysfs_name[100];
 	char path[PATH_MAX];
 	struct stat st;
 
+	scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, pmu->name);
 	scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, sysfs_name);
 	return !stat(path, &st);
 }
@@ -120,7 +123,6 @@ int perf_mem_events__init(void)
 
 	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
 		struct perf_mem_event *e = perf_mem_events__ptr(j);
-		char sysfs_name[100];
 		struct perf_pmu *pmu = NULL;
 
 		/*
@@ -136,12 +138,12 @@ int perf_mem_events__init(void)
 		 * of core PMU.
 		 */
 		while ((pmu = perf_pmus__scan(pmu)) != NULL) {
-			scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, pmu->name);
-			e->supported |= perf_mem_event__supported(mnt, sysfs_name);
+			e->supported |= perf_mem_event__supported(mnt, pmu, e);
+			if (e->supported) {
+				found = true;
+				break;
+			}
 		}
-
-		if (e->supported)
-			found = true;
 	}
 
 	return found ? 0 : -ENOENT;
@@ -167,13 +169,10 @@ static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e,
 						    int idx)
 {
 	const char *mnt = sysfs__mount();
-	char sysfs_name[100];
 	struct perf_pmu *pmu = NULL;
 
 	while ((pmu = perf_pmus__scan(pmu)) != NULL) {
-		scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name,
-			  pmu->name);
-		if (!perf_mem_event__supported(mnt, sysfs_name)) {
+		if (!perf_mem_event__supported(mnt, pmu, e)) {
 			pr_err("failed: event '%s' not supported\n",
 			       perf_mem_events__name(idx, pmu->name));
 		}
@@ -183,6 +182,7 @@ static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e,
 int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
 				 char **rec_tmp, int *tmp_nr)
 {
+	const char *mnt = sysfs__mount();
 	int i = *argv_nr, k = 0;
 	struct perf_mem_event *e;
 
@@ -211,6 +211,9 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
 			while ((pmu = perf_pmus__scan(pmu)) != NULL) {
 				const char *s = perf_mem_events__name(j, pmu->name);
 
+				if (!perf_mem_event__supported(mnt, pmu, e))
+					continue;
+
 				rec_argv[i++] = "-e";
 				if (s) {
 					char *copy = strdup(s);
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 49093b21ee2d..122ee198a86e 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -295,15 +295,14 @@ int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu
 
 	map->core.flush = mp->flush;
 
-	map->comp_level = mp->comp_level;
 #ifndef PYTHON_PERF
-	if (zstd_init(&map->zstd_data, map->comp_level)) {
+	if (zstd_init(&map->zstd_data, mp->comp_level)) {
 		pr_debug2("failed to init mmap compressor, error %d\n", errno);
 		return -1;
 	}
 #endif
 
-	if (map->comp_level && !perf_mmap__aio_enabled(map)) {
+	if (mp->comp_level && !perf_mmap__aio_enabled(map)) {
 		map->data = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE,
 				 MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
 		if (map->data == MAP_FAILED) {
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index f944c3cd5efa..0df6e1621c7e 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -39,7 +39,6 @@ struct mmap {
 #endif
 	struct mmap_cpu_mask	affinity_mask;
 	void		*data;
-	int		comp_level;
 	struct perf_data_file *file;
 	struct zstd_data      zstd_data;
 };
diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c
index fd67d204d720..f7f7aff3d85a 100644
--- a/tools/perf/util/parse-branch-options.c
+++ b/tools/perf/util/parse-branch-options.c
@@ -36,6 +36,7 @@ static const struct branch_mode branch_modes[] = {
 	BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK),
 	BRANCH_OPT("hw_index", PERF_SAMPLE_BRANCH_HW_INDEX),
 	BRANCH_OPT("priv", PERF_SAMPLE_BRANCH_PRIV_SAVE),
+	BRANCH_OPT("counter", PERF_SAMPLE_BRANCH_COUNTERS),
 	BRANCH_END
 };
 
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index aa2f5c6fc7fc..66eabcea4242 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -976,7 +976,7 @@ static int config_term_pmu(struct perf_event_attr *attr,
 			   struct parse_events_error *err)
 {
 	if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE) {
-		const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type);
+		struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type);
 
 		if (!pmu) {
 			char *err_str;
@@ -986,15 +986,23 @@ static int config_term_pmu(struct perf_event_attr *attr,
 							   err_str, /*help=*/NULL);
 			return -EINVAL;
 		}
-		if (perf_pmu__supports_legacy_cache(pmu)) {
+		/*
+		 * Rewrite the PMU event to a legacy cache one unless the PMU
+		 * doesn't support legacy cache events or the event is present
+		 * within the PMU.
+		 */
+		if (perf_pmu__supports_legacy_cache(pmu) &&
+		    !perf_pmu__have_event(pmu, term->config)) {
 			attr->type = PERF_TYPE_HW_CACHE;
 			return parse_events__decode_legacy_cache(term->config, pmu->type,
 								 &attr->config);
-		} else
+		} else {
 			term->type_term = PARSE_EVENTS__TERM_TYPE_USER;
+			term->no_value = true;
+		}
 	}
 	if (term->type_term == PARSE_EVENTS__TERM_TYPE_HARDWARE) {
-		const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type);
+		struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type);
 
 		if (!pmu) {
 			char *err_str;
@@ -1004,10 +1012,19 @@ static int config_term_pmu(struct perf_event_attr *attr,
 							   err_str, /*help=*/NULL);
 			return -EINVAL;
 		}
-		attr->type = PERF_TYPE_HARDWARE;
-		attr->config = term->val.num;
-		if (perf_pmus__supports_extended_type())
-			attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
+		/*
+		 * If the PMU has a sysfs or json event prefer it over
+		 * legacy. ARM requires this.
+		 */
+		if (perf_pmu__have_event(pmu, term->config)) {
+			term->type_term = PARSE_EVENTS__TERM_TYPE_USER;
+			term->no_value = true;
+		} else {
+			attr->type = PERF_TYPE_HARDWARE;
+			attr->config = term->val.num;
+			if (perf_pmus__supports_extended_type())
+				attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
+		}
 		return 0;
 	}
 	if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER ||
@@ -1381,6 +1398,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 	YYLTYPE *loc = loc_;
 	LIST_HEAD(config_terms);
 	struct parse_events_terms parsed_terms;
+	bool alias_rewrote_terms = false;
 
 	pmu = parse_state->fake_pmu ?: perf_pmus__find(name);
 
@@ -1433,7 +1451,15 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 		return evsel ? 0 : -ENOMEM;
 	}
 
-	if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, &parsed_terms, &info, err)) {
+	/* Configure attr/terms with a known PMU, this will set hardcoded terms. */
+	if (config_attr(&attr, &parsed_terms, parse_state->error, config_term_pmu)) {
+		parse_events_terms__exit(&parsed_terms);
+		return -EINVAL;
+	}
+
+	/* Look for event names in the terms and rewrite into format based terms. */
+	if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, &parsed_terms,
+							    &info, &alias_rewrote_terms, err)) {
 		parse_events_terms__exit(&parsed_terms);
 		return -EINVAL;
 	}
@@ -1447,11 +1473,9 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 		strbuf_release(&sb);
 	}
 
-	/*
-	 * Configure hardcoded terms first, no need to check
-	 * return value when called with fail == 0 ;)
-	 */
-	if (config_attr(&attr, &parsed_terms, parse_state->error, config_term_pmu)) {
+	/* Configure attr/terms again if an alias was expanded. */
+	if (alias_rewrote_terms &&
+	    config_attr(&attr, &parsed_terms, parse_state->error, config_term_pmu)) {
 		parse_events_terms__exit(&parsed_terms);
 		return -EINVAL;
 	}
diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c
index e1e2d701599c..1de3b69cdf4a 100644
--- a/tools/perf/util/perf_api_probe.c
+++ b/tools/perf/util/perf_api_probe.c
@@ -64,7 +64,7 @@ static bool perf_probe_api(setup_probe_fn_t fn)
 	struct perf_cpu cpu;
 	int ret, i = 0;
 
-	cpus = perf_cpu_map__new(NULL);
+	cpus = perf_cpu_map__new_online_cpus();
 	if (!cpus)
 		return false;
 	cpu = perf_cpu_map__cpu(cpus, 0);
@@ -140,7 +140,7 @@ bool perf_can_record_cpu_wide(void)
 	struct perf_cpu cpu;
 	int fd;
 
-	cpus = perf_cpu_map__new(NULL);
+	cpus = perf_cpu_map__new_online_cpus();
 	if (!cpus)
 		return false;
 
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index 2247991451f3..8f04d3b7f3ec 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -55,6 +55,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value)
 		bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
 		bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
 		bit_name(TYPE_SAVE), bit_name(HW_INDEX), bit_name(PRIV_SAVE),
+		bit_name(COUNTERS),
 		{ .name = NULL, }
 	};
 #undef bit_name
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index d3c9aa4326be..3c9609944a2f 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1494,12 +1494,14 @@ static int check_info_data(struct perf_pmu *pmu,
  * defined for the alias
  */
 int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms,
-			  struct perf_pmu_info *info, struct parse_events_error *err)
+			  struct perf_pmu_info *info, bool *rewrote_terms,
+			  struct parse_events_error *err)
 {
 	struct parse_events_term *term, *h;
 	struct perf_pmu_alias *alias;
 	int ret;
 
+	*rewrote_terms = false;
 	info->per_pkg = false;
 
 	/*
@@ -1521,7 +1523,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_
 						NULL);
 			return ret;
 		}
-
+		*rewrote_terms = true;
 		ret = check_info_data(pmu, alias, info, err, term->err_term);
 		if (ret)
 			return ret;
@@ -1615,6 +1617,8 @@ bool perf_pmu__auto_merge_stats(const struct perf_pmu *pmu)
 
 bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name)
 {
+	if (!name)
+		return false;
 	if (perf_pmu__find_alias(pmu, name, /*load=*/ true) != NULL)
 		return true;
 	if (pmu->cpu_aliases_added || !pmu->events_table)
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index d2895d415f08..424c3fee0949 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -201,7 +201,8 @@ int perf_pmu__config_terms(const struct perf_pmu *pmu,
 __u64 perf_pmu__format_bits(struct perf_pmu *pmu, const char *name);
 int perf_pmu__format_type(struct perf_pmu *pmu, const char *name);
 int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms,
-			  struct perf_pmu_info *info, struct parse_events_error *err);
+			  struct perf_pmu_info *info, bool *rewrote_terms,
+			  struct parse_events_error *err);
 int perf_pmu__find_event(struct perf_pmu *pmu, const char *event, void *state, pmu_event_callback cb);
 
 int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_load);
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 1a5b7fa459b2..a1a796043691 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -149,10 +149,32 @@ static int kernel_get_symbol_address_by_name(const char *name, u64 *addr,
 	return 0;
 }
 
+struct kernel_get_module_map_cb_args {
+	const char *module;
+	struct map *result;
+};
+
+static int kernel_get_module_map_cb(struct map *map, void *data)
+{
+	struct kernel_get_module_map_cb_args *args = data;
+	struct dso *dso = map__dso(map);
+	const char *short_name = dso->short_name; /* short_name is "[module]" */
+	u16 short_name_len =  dso->short_name_len;
+
+	if (strncmp(short_name + 1, args->module, short_name_len - 2) == 0 &&
+	    args->module[short_name_len - 2] == '\0') {
+		args->result = map__get(map);
+		return 1;
+	}
+	return 0;
+}
+
 static struct map *kernel_get_module_map(const char *module)
 {
-	struct maps *maps = machine__kernel_maps(host_machine);
-	struct map_rb_node *pos;
+	struct kernel_get_module_map_cb_args args = {
+		.module = module,
+		.result = NULL,
+	};
 
 	/* A file path -- this is an offline module */
 	if (module && strchr(module, '/'))
@@ -164,19 +186,9 @@ static struct map *kernel_get_module_map(const char *module)
 		return map__get(map);
 	}
 
-	maps__for_each_entry(maps, pos) {
-		/* short_name is "[module]" */
-		struct dso *dso = map__dso(pos->map);
-		const char *short_name = dso->short_name;
-		u16 short_name_len =  dso->short_name_len;
+	maps__for_each_map(machine__kernel_maps(host_machine), kernel_get_module_map_cb, &args);
 
-		if (strncmp(short_name + 1, module,
-			    short_name_len - 2) == 0 &&
-		    module[short_name_len - 2] == '\0') {
-			return map__get(pos->map);
-		}
-	}
-	return NULL;
+	return args.result;
 }
 
 struct map *get_target_map(const char *target, struct nsinfo *nsi, bool user)
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index f171360b0ef4..c8923375e30d 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -23,6 +23,7 @@
 #include "event.h"
 #include "dso.h"
 #include "debug.h"
+#include "debuginfo.h"
 #include "intlist.h"
 #include "strbuf.h"
 #include "strlist.h"
@@ -31,128 +32,9 @@
 #include "probe-file.h"
 #include "string2.h"
 
-#ifdef HAVE_DEBUGINFOD_SUPPORT
-#include <elfutils/debuginfod.h>
-#endif
-
 /* Kprobe tracer basic type is up to u64 */
 #define MAX_BASIC_TYPE_BITS	64
 
-/* Dwarf FL wrappers */
-static char *debuginfo_path;	/* Currently dummy */
-
-static const Dwfl_Callbacks offline_callbacks = {
-	.find_debuginfo = dwfl_standard_find_debuginfo,
-	.debuginfo_path = &debuginfo_path,
-
-	.section_address = dwfl_offline_section_address,
-
-	/* We use this table for core files too.  */
-	.find_elf = dwfl_build_id_find_elf,
-};
-
-/* Get a Dwarf from offline image */
-static int debuginfo__init_offline_dwarf(struct debuginfo *dbg,
-					 const char *path)
-{
-	GElf_Addr dummy;
-	int fd;
-
-	fd = open(path, O_RDONLY);
-	if (fd < 0)
-		return fd;
-
-	dbg->dwfl = dwfl_begin(&offline_callbacks);
-	if (!dbg->dwfl)
-		goto error;
-
-	dwfl_report_begin(dbg->dwfl);
-	dbg->mod = dwfl_report_offline(dbg->dwfl, "", "", fd);
-	if (!dbg->mod)
-		goto error;
-
-	dbg->dbg = dwfl_module_getdwarf(dbg->mod, &dbg->bias);
-	if (!dbg->dbg)
-		goto error;
-
-	dwfl_module_build_id(dbg->mod, &dbg->build_id, &dummy);
-
-	dwfl_report_end(dbg->dwfl, NULL, NULL);
-
-	return 0;
-error:
-	if (dbg->dwfl)
-		dwfl_end(dbg->dwfl);
-	else
-		close(fd);
-	memset(dbg, 0, sizeof(*dbg));
-
-	return -ENOENT;
-}
-
-static struct debuginfo *__debuginfo__new(const char *path)
-{
-	struct debuginfo *dbg = zalloc(sizeof(*dbg));
-	if (!dbg)
-		return NULL;
-
-	if (debuginfo__init_offline_dwarf(dbg, path) < 0)
-		zfree(&dbg);
-	if (dbg)
-		pr_debug("Open Debuginfo file: %s\n", path);
-	return dbg;
-}
-
-enum dso_binary_type distro_dwarf_types[] = {
-	DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
-	DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
-	DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
-	DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
-	DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO,
-	DSO_BINARY_TYPE__NOT_FOUND,
-};
-
-struct debuginfo *debuginfo__new(const char *path)
-{
-	enum dso_binary_type *type;
-	char buf[PATH_MAX], nil = '\0';
-	struct dso *dso;
-	struct debuginfo *dinfo = NULL;
-	struct build_id bid;
-
-	/* Try to open distro debuginfo files */
-	dso = dso__new(path);
-	if (!dso)
-		goto out;
-
-	/* Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO */
-	if (is_regular_file(path) && filename__read_build_id(path, &bid) > 0)
-		dso__set_build_id(dso, &bid);
-
-	for (type = distro_dwarf_types;
-	     !dinfo && *type != DSO_BINARY_TYPE__NOT_FOUND;
-	     type++) {
-		if (dso__read_binary_type_filename(dso, *type, &nil,
-						   buf, PATH_MAX) < 0)
-			continue;
-		dinfo = __debuginfo__new(buf);
-	}
-	dso__put(dso);
-
-out:
-	/* if failed to open all distro debuginfo, open given binary */
-	return dinfo ? : __debuginfo__new(path);
-}
-
-void debuginfo__delete(struct debuginfo *dbg)
-{
-	if (dbg) {
-		if (dbg->dwfl)
-			dwfl_end(dbg->dwfl);
-		free(dbg);
-	}
-}
-
 /*
  * Probe finder related functions
  */
@@ -722,7 +604,7 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
 	ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1);
 	if (ret <= 0 || nops == 0) {
 		pf->fb_ops = NULL;
-#if _ELFUTILS_PREREQ(0, 142)
+#ifdef HAVE_DWARF_CFI_SUPPORT
 	} else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa &&
 		   (pf->cfi_eh != NULL || pf->cfi_dbg != NULL)) {
 		if ((dwarf_cfi_addrframe(pf->cfi_eh, pf->addr, &frame) != 0 &&
@@ -733,7 +615,7 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
 			free(frame);
 			return -ENOENT;
 		}
-#endif
+#endif /* HAVE_DWARF_CFI_SUPPORT */
 	}
 
 	/* Call finder's callback handler */
@@ -1258,7 +1140,7 @@ static int debuginfo__find_probes(struct debuginfo *dbg,
 
 	pf->machine = ehdr.e_machine;
 
-#if _ELFUTILS_PREREQ(0, 142)
+#ifdef HAVE_DWARF_CFI_SUPPORT
 	do {
 		GElf_Shdr shdr;
 
@@ -1268,7 +1150,7 @@ static int debuginfo__find_probes(struct debuginfo *dbg,
 
 		pf->cfi_dbg = dwarf_getcfi(dbg->dbg);
 	} while (0);
-#endif
+#endif /* HAVE_DWARF_CFI_SUPPORT */
 
 	ret = debuginfo__find_probe_location(dbg, pf);
 	return ret;
@@ -1677,44 +1559,6 @@ int debuginfo__find_available_vars_at(struct debuginfo *dbg,
 	return (ret < 0) ? ret : af.nvls;
 }
 
-/* For the kernel module, we need a special code to get a DIE */
-int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs,
-				bool adjust_offset)
-{
-	int n, i;
-	Elf32_Word shndx;
-	Elf_Scn *scn;
-	Elf *elf;
-	GElf_Shdr mem, *shdr;
-	const char *p;
-
-	elf = dwfl_module_getelf(dbg->mod, &dbg->bias);
-	if (!elf)
-		return -EINVAL;
-
-	/* Get the number of relocations */
-	n = dwfl_module_relocations(dbg->mod);
-	if (n < 0)
-		return -ENOENT;
-	/* Search the relocation related .text section */
-	for (i = 0; i < n; i++) {
-		p = dwfl_module_relocation_info(dbg->mod, i, &shndx);
-		if (strcmp(p, ".text") == 0) {
-			/* OK, get the section header */
-			scn = elf_getscn(elf, shndx);
-			if (!scn)
-				return -ENOENT;
-			shdr = gelf_getshdr(scn, &mem);
-			if (!shdr)
-				return -ENOENT;
-			*offs = shdr->sh_addr;
-			if (adjust_offset)
-				*offs -= shdr->sh_offset;
-		}
-	}
-	return 0;
-}
-
 /* Reverse search */
 int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr,
 				struct perf_probe_point *ppt)
@@ -2009,41 +1853,6 @@ found:
 	return (ret < 0) ? ret : lf.found;
 }
 
-#ifdef HAVE_DEBUGINFOD_SUPPORT
-/* debuginfod doesn't require the comp_dir but buildid is required */
-static int get_source_from_debuginfod(const char *raw_path,
-				const char *sbuild_id, char **new_path)
-{
-	debuginfod_client *c = debuginfod_begin();
-	const char *p = raw_path;
-	int fd;
-
-	if (!c)
-		return -ENOMEM;
-
-	fd = debuginfod_find_source(c, (const unsigned char *)sbuild_id,
-				0, p, new_path);
-	pr_debug("Search %s from debuginfod -> %d\n", p, fd);
-	if (fd >= 0)
-		close(fd);
-	debuginfod_end(c);
-	if (fd < 0) {
-		pr_debug("Failed to find %s in debuginfod (%s)\n",
-			raw_path, sbuild_id);
-		return -ENOENT;
-	}
-	pr_debug("Got a source %s\n", *new_path);
-
-	return 0;
-}
-#else
-static inline int get_source_from_debuginfod(const char *raw_path __maybe_unused,
-				const char *sbuild_id __maybe_unused,
-				char **new_path __maybe_unused)
-{
-	return -ENOTSUP;
-}
-#endif
 /*
  * Find a src file from a DWARF tag path. Prepend optional source path prefix
  * and chop off leading directories that do not exist. Result is passed back as
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index 8bc1c80d3c1c..3add5ff516e1 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -24,21 +24,7 @@ static inline int is_c_varname(const char *name)
 #ifdef HAVE_DWARF_SUPPORT
 
 #include "dwarf-aux.h"
-
-/* TODO: export debuginfo data structure even if no dwarf support */
-
-/* debug information structure */
-struct debuginfo {
-	Dwarf		*dbg;
-	Dwfl_Module	*mod;
-	Dwfl		*dwfl;
-	Dwarf_Addr	bias;
-	const unsigned char	*build_id;
-};
-
-/* This also tries to open distro debuginfo */
-struct debuginfo *debuginfo__new(const char *path);
-void debuginfo__delete(struct debuginfo *dbg);
+#include "debuginfo.h"
 
 /* Find probe_trace_events specified by perf_probe_event from debuginfo */
 int debuginfo__find_trace_events(struct debuginfo *dbg,
@@ -49,9 +35,6 @@ int debuginfo__find_trace_events(struct debuginfo *dbg,
 int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr,
 				struct perf_probe_point *ppt);
 
-int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs,
-			       bool adjust_offset);
-
 /* Find a line range */
 int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr);
 
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 9eb5c6a08999..87e817b3cf7e 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -237,8 +237,8 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str)
 
 	evsel = evlist__last(temp_evlist);
 
-	if (!evlist || perf_cpu_map__empty(evlist->core.user_requested_cpus)) {
-		struct perf_cpu_map *cpus = perf_cpu_map__new(NULL);
+	if (!evlist || perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus)) {
+		struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus();
 
 		if (cpus)
 			cpu =  perf_cpu_map__cpu(cpus, 0);
diff --git a/tools/perf/util/s390-cpumcf-kernel.h b/tools/perf/util/s390-cpumcf-kernel.h
index f55ca07f3ca1..74b36644e384 100644
--- a/tools/perf/util/s390-cpumcf-kernel.h
+++ b/tools/perf/util/s390-cpumcf-kernel.h
@@ -12,6 +12,8 @@
 #define	S390_CPUMCF_DIAG_DEF	0xfeef	/* Counter diagnostic entry ID */
 #define	PERF_EVENT_CPUM_CF_DIAG	0xBC000	/* Event: Counter sets */
 #define PERF_EVENT_CPUM_SF_DIAG	0xBD000 /* Event: Combined-sampling */
+#define PERF_EVENT_PAI_CRYPTO_ALL	0x1000 /* Event: CRYPTO_ALL */
+#define PERF_EVENT_PAI_NNPA_ALL	0x1800 /* Event: NNPA_ALL */
 
 struct cf_ctrset_entry {	/* CPU-M CF counter set entry (8 byte) */
 	unsigned int def:16;	/* 0-15  Data Entry Format */
diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c
index 115b16edb451..53383e97ec9d 100644
--- a/tools/perf/util/s390-sample-raw.c
+++ b/tools/perf/util/s390-sample-raw.c
@@ -51,8 +51,6 @@ static bool s390_cpumcfdg_testctr(struct perf_sample *sample)
 	struct cf_trailer_entry *te;
 	struct cf_ctrset_entry *cep, ce;
 
-	if (!len)
-		return false;
 	while (offset < len) {
 		cep = (struct cf_ctrset_entry *)(buf + offset);
 		ce.def = be16_to_cpu(cep->def);
@@ -125,6 +123,9 @@ static int get_counterset_start(int setnr)
 		return 128;
 	case CPUMF_CTR_SET_MT_DIAG:		/* Diagnostic counter set */
 		return 448;
+	case PERF_EVENT_PAI_NNPA_ALL:		/* PAI NNPA counter set */
+	case PERF_EVENT_PAI_CRYPTO_ALL:		/* PAI CRYPTO counter set */
+		return setnr;
 	default:
 		return -1;
 	}
@@ -212,27 +213,120 @@ static void s390_cpumcfdg_dump(struct perf_pmu *pmu, struct perf_sample *sample)
 	}
 }
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpacked"
+#pragma GCC diagnostic ignored "-Wattributes"
+/*
+ * Check for consistency of PAI_CRYPTO/PAI_NNPA raw data.
+ */
+struct pai_data {		/* Event number and value */
+	u16 event_nr;
+	u64 event_val;
+} __packed;
+
+#pragma GCC diagnostic pop
+
+/*
+ * Test for valid raw data. At least one PAI event should be in the raw
+ * data section.
+ */
+static bool s390_pai_all_test(struct perf_sample *sample)
+{
+	size_t len = sample->raw_size;
+
+	if (len < 0xa)
+		return false;
+	return true;
+}
+
+static void s390_pai_all_dump(struct evsel *evsel, struct perf_sample *sample)
+{
+	size_t len = sample->raw_size, offset = 0;
+	unsigned char *p = sample->raw_data;
+	const char *color = PERF_COLOR_BLUE;
+	struct pai_data pai_data;
+	char *ev_name;
+
+	while (offset < len) {
+		memcpy(&pai_data.event_nr, p, sizeof(pai_data.event_nr));
+		pai_data.event_nr = be16_to_cpu(pai_data.event_nr);
+		p += sizeof(pai_data.event_nr);
+		offset += sizeof(pai_data.event_nr);
+
+		memcpy(&pai_data.event_val, p, sizeof(pai_data.event_val));
+		pai_data.event_val = be64_to_cpu(pai_data.event_val);
+		p += sizeof(pai_data.event_val);
+		offset += sizeof(pai_data.event_val);
+
+		ev_name = get_counter_name(evsel->core.attr.config,
+					   pai_data.event_nr, evsel->pmu);
+		color_fprintf(stdout, color, "\tCounter:%03d %s Value:%#018lx\n",
+			      pai_data.event_nr, ev_name ?: "<unknown>",
+			      pai_data.event_val);
+		free(ev_name);
+
+		if (offset + 0xa > len)
+			break;
+	}
+	color_fprintf(stdout, color, "\n");
+}
+
 /* S390 specific trace event function. Check for PERF_RECORD_SAMPLE events
- * and if the event was triggered by a counter set diagnostic event display
- * its raw data.
+ * and if the event was triggered by a
+ * - counter set diagnostic event
+ * - processor activity assist (PAI) crypto counter event
+ * - processor activity assist (PAI) neural network processor assist (NNPA)
+ *   counter event
+ * display its raw data.
  * The function is only invoked when the dump flag -D is set.
+ *
+ * Function evlist__s390_sample_raw() is defined as call back after it has
+ * been verified that the perf.data file was created on s390 platform.
  */
-void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample)
+void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event,
+			     struct perf_sample *sample)
 {
+	const char *pai_name;
 	struct evsel *evsel;
 
 	if (event->header.type != PERF_RECORD_SAMPLE)
 		return;
 
 	evsel = evlist__event2evsel(evlist, event);
-	if (evsel == NULL ||
-	    evsel->core.attr.config != PERF_EVENT_CPUM_CF_DIAG)
+	if (!evsel)
+		return;
+
+	/* Check for raw data in sample */
+	if (!sample->raw_size || !sample->raw_data)
 		return;
 
 	/* Display raw data on screen */
-	if (!s390_cpumcfdg_testctr(sample)) {
-		pr_err("Invalid counter set data encountered\n");
+	if (evsel->core.attr.config == PERF_EVENT_CPUM_CF_DIAG) {
+		if (!evsel->pmu)
+			evsel->pmu = perf_pmus__find("cpum_cf");
+		if (!s390_cpumcfdg_testctr(sample))
+			pr_err("Invalid counter set data encountered\n");
+		else
+			s390_cpumcfdg_dump(evsel->pmu, sample);
+		return;
+	}
+
+	switch (evsel->core.attr.config) {
+	case PERF_EVENT_PAI_NNPA_ALL:
+		pai_name = "NNPA_ALL";
+		break;
+	case PERF_EVENT_PAI_CRYPTO_ALL:
+		pai_name = "CRYPTO_ALL";
+		break;
+	default:
 		return;
 	}
-	s390_cpumcfdg_dump(evsel->pmu, sample);
+
+	if (!s390_pai_all_test(sample)) {
+		pr_err("Invalid %s raw data encountered\n", pai_name);
+	} else {
+		if (!evsel->pmu)
+			evsel->pmu = perf_pmus__find_by_type(evsel->core.attr.type);
+		s390_pai_all_dump(evsel, sample);
+	}
 }
diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
index c92ad0f51ecd..70b2c3135555 100644
--- a/tools/perf/util/sample.h
+++ b/tools/perf/util/sample.h
@@ -113,6 +113,7 @@ struct perf_sample {
 	void *raw_data;
 	struct ip_callchain *callchain;
 	struct branch_stack *branch_stack;
+	u64 *branch_stack_cntr;
 	struct regs_dump  user_regs;
 	struct regs_dump  intr_regs;
 	struct stack_dump user_stack;
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 603091317bed..b072ac5d3bc2 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -490,6 +490,9 @@ static int perl_start_script(const char *script, int argc, const char **argv,
 	scripting_context->session = session;
 
 	command_line = malloc((argc + 2) * sizeof(const char *));
+	if (!command_line)
+		return -ENOMEM;
+
 	command_line[0] = "";
 	command_line[1] = script;
 	for (i = 2; i < argc + 2; i++)
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 94312741443a..860e1837ba96 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -353,6 +353,8 @@ static PyObject *get_field_numeric_entry(struct tep_event *event,
 
 	if (is_array) {
 		list = PyList_New(field->arraylen);
+		if (!list)
+			Py_FatalError("couldn't create Python list");
 		item_size = field->size / field->arraylen;
 		n_items = field->arraylen;
 	} else {
@@ -754,7 +756,7 @@ static void regs_map(struct regs_dump *regs, uint64_t mask, const char *arch, ch
 	}
 }
 
-static void set_regs_in_dict(PyObject *dict,
+static int set_regs_in_dict(PyObject *dict,
 			     struct perf_sample *sample,
 			     struct evsel *evsel)
 {
@@ -770,6 +772,8 @@ static void set_regs_in_dict(PyObject *dict,
 	 */
 	int size = __sw_hweight64(attr->sample_regs_intr) * 28;
 	char *bf = malloc(size);
+	if (!bf)
+		return -1;
 
 	regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, size);
 
@@ -781,6 +785,8 @@ static void set_regs_in_dict(PyObject *dict,
 	pydict_set_item_string_decref(dict, "uregs",
 			_PyUnicode_FromString(bf));
 	free(bf);
+
+	return 0;
 }
 
 static void set_sym_in_dict(PyObject *dict, struct addr_location *al,
@@ -920,7 +926,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
 			PyLong_FromUnsignedLongLong(sample->cyc_cnt));
 	}
 
-	set_regs_in_dict(dict, sample, evsel);
+	if (set_regs_in_dict(dict, sample, evsel))
+		Py_FatalError("Failed to setting regs in dict");
 
 	return dict;
 }
@@ -1918,12 +1925,18 @@ static int python_start_script(const char *script, int argc, const char **argv,
 	scripting_context->session = session;
 #if PY_MAJOR_VERSION < 3
 	command_line = malloc((argc + 1) * sizeof(const char *));
+	if (!command_line)
+		return -1;
+
 	command_line[0] = script;
 	for (i = 1; i < argc + 1; i++)
 		command_line[i] = argv[i - 1];
 	PyImport_AppendInittab(name, initperf_trace_context);
 #else
 	command_line = malloc((argc + 1) * sizeof(wchar_t *));
+	if (!command_line)
+		return -1;
+
 	command_line[0] = Py_DecodeLocale(script, NULL);
 	for (i = 1; i < argc + 1; i++)
 		command_line[i] = Py_DecodeLocale(argv[i - 1], NULL);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 1e9aa8ed15b6..199d3e8df315 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -115,6 +115,11 @@ static int perf_session__open(struct perf_session *session, int repipe_fd)
 		return -1;
 	}
 
+	if (perf_header__has_feat(&session->header, HEADER_AUXTRACE)) {
+		/* Auxiliary events may reference exited threads, hold onto dead ones. */
+		symbol_conf.keep_exited_threads = true;
+	}
+
 	if (perf_data__is_pipe(data))
 		return 0;
 
@@ -1150,9 +1155,13 @@ static void callchain__printf(struct evsel *evsel,
 		       i, callchain->ips[i]);
 }
 
-static void branch_stack__printf(struct perf_sample *sample, bool callstack)
+static void branch_stack__printf(struct perf_sample *sample,
+				 struct evsel *evsel)
 {
 	struct branch_entry *entries = perf_sample__branch_entries(sample);
+	bool callstack = evsel__has_branch_callstack(evsel);
+	u64 *branch_stack_cntr = sample->branch_stack_cntr;
+	struct perf_env *env = evsel__env(evsel);
 	uint64_t i;
 
 	if (!callstack) {
@@ -1194,6 +1203,13 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
 			}
 		}
 	}
+
+	if (branch_stack_cntr) {
+		printf("... branch stack counters: nr:%" PRIu64 " (counter width: %u max counter nr:%u)\n",
+			sample->branch_stack->nr, env->br_cntr_width, env->br_cntr_nr);
+		for (i = 0; i < sample->branch_stack->nr; i++)
+			printf("..... %2"PRIu64": %016" PRIx64 "\n", i, branch_stack_cntr[i]);
+	}
 }
 
 static void regs_dump__printf(u64 mask, u64 *regs, const char *arch)
@@ -1355,7 +1371,7 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
 		callchain__printf(evsel, sample);
 
 	if (evsel__has_br_stack(evsel))
-		branch_stack__printf(sample, evsel__has_branch_callstack(evsel));
+		branch_stack__printf(sample, evsel);
 
 	if (sample_type & PERF_SAMPLE_REGS_USER)
 		regs_user__printf(sample, arch);
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 80e4f6132740..30254eb63709 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -24,6 +24,7 @@
 #include "strbuf.h"
 #include "mem-events.h"
 #include "annotate.h"
+#include "annotate-data.h"
 #include "event.h"
 #include "time-utils.h"
 #include "cgroup.h"
@@ -418,6 +419,52 @@ struct sort_entry sort_sym = {
 	.se_width_idx	= HISTC_SYMBOL,
 };
 
+/* --sort symoff */
+
+static int64_t
+sort__symoff_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	int64_t ret;
+
+	ret = sort__sym_cmp(left, right);
+	if (ret)
+		return ret;
+
+	return left->ip - right->ip;
+}
+
+static int64_t
+sort__symoff_sort(struct hist_entry *left, struct hist_entry *right)
+{
+	int64_t ret;
+
+	ret = sort__sym_sort(left, right);
+	if (ret)
+		return ret;
+
+	return left->ip - right->ip;
+}
+
+static int
+hist_entry__symoff_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width)
+{
+	struct symbol *sym = he->ms.sym;
+
+	if (sym == NULL)
+		return repsep_snprintf(bf, size, "[%c] %-#.*llx", he->level, width - 4, he->ip);
+
+	return repsep_snprintf(bf, size, "[%c] %s+0x%llx", he->level, sym->name, he->ip - sym->start);
+}
+
+struct sort_entry sort_sym_offset = {
+	.se_header	= "Symbol Offset",
+	.se_cmp		= sort__symoff_cmp,
+	.se_sort	= sort__symoff_sort,
+	.se_snprintf	= hist_entry__symoff_snprintf,
+	.se_filter	= hist_entry__sym_filter,
+	.se_width_idx	= HISTC_SYMBOL_OFFSET,
+};
+
 /* --sort srcline */
 
 char *hist_entry__srcline(struct hist_entry *he)
@@ -583,21 +630,21 @@ static int hist_entry__sym_ipc_snprintf(struct hist_entry *he, char *bf,
 {
 
 	struct symbol *sym = he->ms.sym;
-	struct annotation *notes;
+	struct annotated_branch *branch;
 	double ipc = 0.0, coverage = 0.0;
 	char tmp[64];
 
 	if (!sym)
 		return repsep_snprintf(bf, size, "%-*s", width, "-");
 
-	notes = symbol__annotation(sym);
+	branch = symbol__annotation(sym)->branch;
 
-	if (notes->hit_cycles)
-		ipc = notes->hit_insn / ((double)notes->hit_cycles);
+	if (branch && branch->hit_cycles)
+		ipc = branch->hit_insn / ((double)branch->hit_cycles);
 
-	if (notes->total_insn) {
-		coverage = notes->cover_insn * 100.0 /
-			((double)notes->total_insn);
+	if (branch && branch->total_insn) {
+		coverage = branch->cover_insn * 100.0 /
+			((double)branch->total_insn);
 	}
 
 	snprintf(tmp, sizeof(tmp), "%-5.2f [%5.1f%%]", ipc, coverage);
@@ -2094,7 +2141,7 @@ struct sort_entry sort_dso_size = {
 	.se_width_idx	= HISTC_DSO_SIZE,
 };
 
-/* --sort dso_size */
+/* --sort addr */
 
 static int64_t
 sort__addr_cmp(struct hist_entry *left, struct hist_entry *right)
@@ -2131,6 +2178,152 @@ struct sort_entry sort_addr = {
 	.se_width_idx	= HISTC_ADDR,
 };
 
+/* --sort type */
+
+struct annotated_data_type unknown_type = {
+	.self = {
+		.type_name = (char *)"(unknown)",
+		.children = LIST_HEAD_INIT(unknown_type.self.children),
+	},
+};
+
+static int64_t
+sort__type_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return sort__addr_cmp(left, right);
+}
+
+static void sort__type_init(struct hist_entry *he)
+{
+	if (he->mem_type)
+		return;
+
+	he->mem_type = hist_entry__get_data_type(he);
+	if (he->mem_type == NULL) {
+		he->mem_type = &unknown_type;
+		he->mem_type_off = 0;
+	}
+}
+
+static int64_t
+sort__type_collapse(struct hist_entry *left, struct hist_entry *right)
+{
+	struct annotated_data_type *left_type = left->mem_type;
+	struct annotated_data_type *right_type = right->mem_type;
+
+	if (!left_type) {
+		sort__type_init(left);
+		left_type = left->mem_type;
+	}
+
+	if (!right_type) {
+		sort__type_init(right);
+		right_type = right->mem_type;
+	}
+
+	return strcmp(left_type->self.type_name, right_type->self.type_name);
+}
+
+static int64_t
+sort__type_sort(struct hist_entry *left, struct hist_entry *right)
+{
+	return sort__type_collapse(left, right);
+}
+
+static int hist_entry__type_snprintf(struct hist_entry *he, char *bf,
+				     size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%-*s", width, he->mem_type->self.type_name);
+}
+
+struct sort_entry sort_type = {
+	.se_header	= "Data Type",
+	.se_cmp		= sort__type_cmp,
+	.se_collapse	= sort__type_collapse,
+	.se_sort	= sort__type_sort,
+	.se_init	= sort__type_init,
+	.se_snprintf	= hist_entry__type_snprintf,
+	.se_width_idx	= HISTC_TYPE,
+};
+
+/* --sort typeoff */
+
+static int64_t
+sort__typeoff_sort(struct hist_entry *left, struct hist_entry *right)
+{
+	struct annotated_data_type *left_type = left->mem_type;
+	struct annotated_data_type *right_type = right->mem_type;
+	int64_t ret;
+
+	if (!left_type) {
+		sort__type_init(left);
+		left_type = left->mem_type;
+	}
+
+	if (!right_type) {
+		sort__type_init(right);
+		right_type = right->mem_type;
+	}
+
+	ret = strcmp(left_type->self.type_name, right_type->self.type_name);
+	if (ret)
+		return ret;
+	return left->mem_type_off - right->mem_type_off;
+}
+
+static void fill_member_name(char *buf, size_t sz, struct annotated_member *m,
+			     int offset, bool first)
+{
+	struct annotated_member *child;
+
+	if (list_empty(&m->children))
+		return;
+
+	list_for_each_entry(child, &m->children, node) {
+		if (child->offset <= offset && offset < child->offset + child->size) {
+			int len = 0;
+
+			/* It can have anonymous struct/union members */
+			if (child->var_name) {
+				len = scnprintf(buf, sz, "%s%s",
+						first ? "" : ".", child->var_name);
+				first = false;
+			}
+
+			fill_member_name(buf + len, sz - len, child, offset, first);
+			return;
+		}
+	}
+}
+
+static int hist_entry__typeoff_snprintf(struct hist_entry *he, char *bf,
+				     size_t size, unsigned int width __maybe_unused)
+{
+	struct annotated_data_type *he_type = he->mem_type;
+	char buf[4096];
+
+	buf[0] = '\0';
+	if (list_empty(&he_type->self.children))
+		snprintf(buf, sizeof(buf), "no field");
+	else
+		fill_member_name(buf, sizeof(buf), &he_type->self,
+				 he->mem_type_off, true);
+	buf[4095] = '\0';
+
+	return repsep_snprintf(bf, size, "%s %+d (%s)", he_type->self.type_name,
+			       he->mem_type_off, buf);
+}
+
+struct sort_entry sort_type_offset = {
+	.se_header	= "Data Type Offset",
+	.se_cmp		= sort__type_cmp,
+	.se_collapse	= sort__typeoff_sort,
+	.se_sort	= sort__typeoff_sort,
+	.se_init	= sort__type_init,
+	.se_snprintf	= hist_entry__typeoff_snprintf,
+	.se_width_idx	= HISTC_TYPE_OFFSET,
+};
+
 
 struct sort_dimension {
 	const char		*name;
@@ -2185,7 +2378,10 @@ static struct sort_dimension common_sort_dimensions[] = {
 	DIM(SORT_ADDR, "addr", sort_addr),
 	DIM(SORT_LOCAL_RETIRE_LAT, "local_retire_lat", sort_local_p_stage_cyc),
 	DIM(SORT_GLOBAL_RETIRE_LAT, "retire_lat", sort_global_p_stage_cyc),
-	DIM(SORT_SIMD, "simd", sort_simd)
+	DIM(SORT_SIMD, "simd", sort_simd),
+	DIM(SORT_ANNOTATE_DATA_TYPE, "type", sort_type),
+	DIM(SORT_ANNOTATE_DATA_TYPE_OFFSET, "typeoff", sort_type_offset),
+	DIM(SORT_SYM_OFFSET, "symoff", sort_sym_offset),
 };
 
 #undef DIM
@@ -3205,6 +3401,8 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
 			list->thread = 1;
 		} else if (sd->entry == &sort_comm) {
 			list->comm = 1;
+		} else if (sd->entry == &sort_type_offset) {
+			symbol_conf.annotate_data_member = true;
 		}
 
 		return __sort_dimension__add(sd, list, level);
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index ecfb7f1359d5..6f6b4189a389 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -15,6 +15,7 @@
 
 struct option;
 struct thread;
+struct annotated_data_type;
 
 extern regex_t parent_regex;
 extern const char *sort_order;
@@ -34,6 +35,7 @@ extern struct sort_entry sort_dso_to;
 extern struct sort_entry sort_sym_from;
 extern struct sort_entry sort_sym_to;
 extern struct sort_entry sort_srcline;
+extern struct sort_entry sort_type;
 extern const char default_mem_sort_order[];
 extern bool chk_double_cl;
 
@@ -111,6 +113,7 @@ struct hist_entry {
 	u64			p_stage_cyc;
 	u8			cpumode;
 	u8			depth;
+	int			mem_type_off;
 	struct simd_flags	simd_flags;
 
 	/* We are added by hists__add_dummy_entry. */
@@ -154,6 +157,7 @@ struct hist_entry {
 	struct perf_hpp_list	*hpp_list;
 	struct hist_entry	*parent_he;
 	struct hist_entry_ops	*ops;
+	struct annotated_data_type *mem_type;
 	union {
 		/* this is for hierarchical entry structure */
 		struct {
@@ -243,6 +247,9 @@ enum sort_type {
 	SORT_LOCAL_RETIRE_LAT,
 	SORT_GLOBAL_RETIRE_LAT,
 	SORT_SIMD,
+	SORT_ANNOTATE_DATA_TYPE,
+	SORT_ANNOTATE_DATA_TYPE_OFFSET,
+	SORT_SYM_OFFSET,
 
 	/* branch stack specific sort keys */
 	__SORT_BRANCH_STACK,
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index afe6db8e7bf4..8c61f8627ebc 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -898,7 +898,7 @@ static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config
 
 static void uniquify_counter(struct perf_stat_config *config, struct evsel *counter)
 {
-	if (config->no_merge || hybrid_uniquify(counter, config))
+	if (config->aggr_mode == AGGR_NONE || hybrid_uniquify(counter, config))
 		uniquify_event_name(counter);
 }
 
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 1c5c3eeba4cf..e31426167852 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -264,7 +264,7 @@ static void print_ll_miss(struct perf_stat_config *config,
 	static const double color_ratios[3] = {20.0, 10.0, 5.0};
 
 	print_ratio(config, evsel, aggr_idx, misses, out, STAT_LL_CACHE, color_ratios,
-		    "of all L1-icache accesses");
+		    "of all LL-cache accesses");
 }
 
 static void print_dtlb_miss(struct perf_stat_config *config,
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index ec3506042217..b0bcf92f0f9c 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -315,7 +315,7 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals,
 	if (!counter->per_pkg)
 		return 0;
 
-	if (perf_cpu_map__empty(cpus))
+	if (perf_cpu_map__has_any_cpu_or_is_empty(cpus))
 		return 0;
 
 	if (!mask) {
@@ -592,7 +592,7 @@ void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *ev
 {
 	struct evsel *evsel;
 
-	if (config->no_merge)
+	if (config->aggr_mode == AGGR_NONE)
 		return;
 
 	evlist__for_each_entry(evlist, evsel)
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 325d0fad1842..4357ba114822 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -76,7 +76,6 @@ struct perf_stat_config {
 	bool			 null_run;
 	bool			 ru_display;
 	bool			 big_num;
-	bool			 no_merge;
 	bool			 hybrid_merge;
 	bool			 walltime_run_table;
 	bool			 all_kernel;
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 9e7eeaf616b8..4b934ed3bfd1 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -1392,8 +1392,7 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
 			map__set_start(map, shdr->sh_addr + ref_reloc(kmap));
 			map__set_end(map, map__start(map) + shdr->sh_size);
 			map__set_pgoff(map, shdr->sh_offset);
-			map__set_map_ip(map, map__dso_map_ip);
-			map__set_unmap_ip(map, map__dso_unmap_ip);
+			map__set_mapping_type(map, MAPPING_TYPE__DSO);
 			/* Ensure maps are correctly ordered */
 			if (kmaps) {
 				int err;
@@ -1455,8 +1454,7 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
 			map__set_end(curr_map, map__start(curr_map) + shdr->sh_size);
 			map__set_pgoff(curr_map, shdr->sh_offset);
 		} else {
-			map__set_map_ip(curr_map, identity__map_ip);
-			map__set_unmap_ip(curr_map, identity__map_ip);
+			map__set_mapping_type(curr_map, MAPPING_TYPE__IDENTITY);
 		}
 		curr_dso->symtab_type = dso->symtab_type;
 		if (maps__insert(kmaps, curr_map))
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index a81a14769bd1..1da8b713509c 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c
@@ -159,9 +159,10 @@ int filename__read_build_id(const char *filename, struct build_id *bid)
 				goto out_free;
 
 			ret = read_build_id(buf, buf_size, bid, need_swap);
-			if (ret == 0)
+			if (ret == 0) {
 				ret = bid->size;
-			break;
+				break;
+			}
 		}
 	} else {
 		Elf64_Ehdr ehdr;
@@ -210,9 +211,10 @@ int filename__read_build_id(const char *filename, struct build_id *bid)
 				goto out_free;
 
 			ret = read_build_id(buf, buf_size, bid, need_swap);
-			if (ret == 0)
+			if (ret == 0) {
 				ret = bid->size;
-			break;
+				break;
+			}
 		}
 	}
 out_free:
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 82cc74b9358e..be212ba157dc 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -48,11 +48,6 @@ static bool symbol__is_idle(const char *name);
 int vmlinux_path__nr_entries;
 char **vmlinux_path;
 
-struct map_list_node {
-	struct list_head node;
-	struct map *map;
-};
-
 struct symbol_conf symbol_conf = {
 	.nanosecs		= false,
 	.use_modules		= true,
@@ -90,11 +85,6 @@ static enum dso_binary_type binary_type_symtab[] = {
 
 #define DSO_BINARY_TYPE__SYMTAB_CNT ARRAY_SIZE(binary_type_symtab)
 
-static struct map_list_node *map_list_node__new(void)
-{
-	return malloc(sizeof(struct map_list_node));
-}
-
 static bool symbol_type__filter(char symbol_type)
 {
 	symbol_type = toupper(symbol_type);
@@ -270,29 +260,6 @@ void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms)
 		curr->end = roundup(curr->start, 4096) + 4096;
 }
 
-void maps__fixup_end(struct maps *maps)
-{
-	struct map_rb_node *prev = NULL, *curr;
-
-	down_write(maps__lock(maps));
-
-	maps__for_each_entry(maps, curr) {
-		if (prev != NULL && !map__end(prev->map))
-			map__set_end(prev->map, map__start(curr->map));
-
-		prev = curr;
-	}
-
-	/*
-	 * We still haven't the actual symbols, so guess the
-	 * last map final address.
-	 */
-	if (curr && !map__end(curr->map))
-		map__set_end(curr->map, ~0ULL);
-
-	up_write(maps__lock(maps));
-}
-
 struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name)
 {
 	size_t namelen = strlen(name) + 1;
@@ -956,8 +923,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta,
 				return -1;
 			}
 
-			map__set_map_ip(curr_map, identity__map_ip);
-			map__set_unmap_ip(curr_map, identity__map_ip);
+			map__set_mapping_type(curr_map, MAPPING_TYPE__IDENTITY);
 			if (maps__insert(kmaps, curr_map)) {
 				dso__put(ndso);
 				return -1;
@@ -1148,33 +1114,35 @@ out_delete_from:
 	return ret;
 }
 
+static int do_validate_kcore_modules_cb(struct map *old_map, void *data)
+{
+	struct rb_root *modules = data;
+	struct module_info *mi;
+	struct dso *dso;
+
+	if (!__map__is_kmodule(old_map))
+		return 0;
+
+	dso = map__dso(old_map);
+	/* Module must be in memory at the same address */
+	mi = find_module(dso->short_name, modules);
+	if (!mi || mi->start != map__start(old_map))
+		return -EINVAL;
+
+	return 0;
+}
+
 static int do_validate_kcore_modules(const char *filename, struct maps *kmaps)
 {
 	struct rb_root modules = RB_ROOT;
-	struct map_rb_node *old_node;
 	int err;
 
 	err = read_proc_modules(filename, &modules);
 	if (err)
 		return err;
 
-	maps__for_each_entry(kmaps, old_node) {
-		struct map *old_map = old_node->map;
-		struct module_info *mi;
-		struct dso *dso;
+	err = maps__for_each_map(kmaps, do_validate_kcore_modules_cb, &modules);
 
-		if (!__map__is_kmodule(old_map)) {
-			continue;
-		}
-		dso = map__dso(old_map);
-		/* Module must be in memory at the same address */
-		mi = find_module(dso->short_name, &modules);
-		if (!mi || mi->start != map__start(old_map)) {
-			err = -EINVAL;
-			goto out;
-		}
-	}
-out:
 	delete_modules(&modules);
 	return err;
 }
@@ -1271,101 +1239,15 @@ static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data)
 	return 0;
 }
 
-/*
- * Merges map into maps by splitting the new map within the existing map
- * regions.
- */
-int maps__merge_in(struct maps *kmaps, struct map *new_map)
+static bool remove_old_maps(struct map *map, void *data)
 {
-	struct map_rb_node *rb_node;
-	LIST_HEAD(merged);
-	int err = 0;
-
-	maps__for_each_entry(kmaps, rb_node) {
-		struct map *old_map = rb_node->map;
-
-		/* no overload with this one */
-		if (map__end(new_map) < map__start(old_map) ||
-		    map__start(new_map) >= map__end(old_map))
-			continue;
-
-		if (map__start(new_map) < map__start(old_map)) {
-			/*
-			 * |new......
-			 *       |old....
-			 */
-			if (map__end(new_map) < map__end(old_map)) {
-				/*
-				 * |new......|     -> |new..|
-				 *       |old....| ->       |old....|
-				 */
-				map__set_end(new_map, map__start(old_map));
-			} else {
-				/*
-				 * |new.............| -> |new..|       |new..|
-				 *       |old....|    ->       |old....|
-				 */
-				struct map_list_node *m = map_list_node__new();
-
-				if (!m) {
-					err = -ENOMEM;
-					goto out;
-				}
-
-				m->map = map__clone(new_map);
-				if (!m->map) {
-					free(m);
-					err = -ENOMEM;
-					goto out;
-				}
-
-				map__set_end(m->map, map__start(old_map));
-				list_add_tail(&m->node, &merged);
-				map__add_pgoff(new_map, map__end(old_map) - map__start(new_map));
-				map__set_start(new_map, map__end(old_map));
-			}
-		} else {
-			/*
-			 *      |new......
-			 * |old....
-			 */
-			if (map__end(new_map) < map__end(old_map)) {
-				/*
-				 *      |new..|   -> x
-				 * |old.........| -> |old.........|
-				 */
-				map__put(new_map);
-				new_map = NULL;
-				break;
-			} else {
-				/*
-				 *      |new......| ->         |new...|
-				 * |old....|        -> |old....|
-				 */
-				map__add_pgoff(new_map, map__end(old_map) - map__start(new_map));
-				map__set_start(new_map, map__end(old_map));
-			}
-		}
-	}
-
-out:
-	while (!list_empty(&merged)) {
-		struct map_list_node *old_node;
-
-		old_node = list_entry(merged.next, struct map_list_node, node);
-		list_del_init(&old_node->node);
-		if (!err)
-			err = maps__insert(kmaps, old_node->map);
-		map__put(old_node->map);
-		free(old_node);
-	}
+	const struct map *map_to_save = data;
 
-	if (new_map) {
-		if (!err)
-			err = maps__insert(kmaps, new_map);
-		map__put(new_map);
-	}
-	return err;
+	/*
+	 * We need to preserve eBPF maps even if they are covered by kcore,
+	 * because we need to access eBPF dso for source data.
+	 */
+	return !RC_CHK_EQUAL(map, map_to_save) && !__map__is_bpf_prog(map);
 }
 
 static int dso__load_kcore(struct dso *dso, struct map *map,
@@ -1374,7 +1256,6 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
 	struct maps *kmaps = map__kmaps(map);
 	struct kcore_mapfn_data md;
 	struct map *replacement_map = NULL;
-	struct map_rb_node *old_node, *next;
 	struct machine *machine;
 	bool is_64_bit;
 	int err, fd;
@@ -1421,17 +1302,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
 	}
 
 	/* Remove old maps */
-	maps__for_each_entry_safe(kmaps, old_node, next) {
-		struct map *old_map = old_node->map;
-
-		/*
-		 * We need to preserve eBPF maps even if they are
-		 * covered by kcore, because we need to access
-		 * eBPF dso for source data.
-		 */
-		if (old_map != map && !__map__is_bpf_prog(old_map))
-			maps__remove(kmaps, old_map);
-	}
+	maps__remove_maps(kmaps, remove_old_maps, map);
 	machine->trampolines_mapped = false;
 
 	/* Find the kernel map using the '_stext' symbol */
@@ -1475,8 +1346,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
 			map__set_start(map, map__start(new_map));
 			map__set_end(map, map__end(new_map));
 			map__set_pgoff(map, map__pgoff(new_map));
-			map__set_map_ip(map, map__map_ip_ptr(new_map));
-			map__set_unmap_ip(map, map__unmap_ip_ptr(new_map));
+			map__set_mapping_type(map, map__mapping_type(new_map));
 			/* Ensure maps are correctly ordered */
 			map_ref = map__get(map);
 			maps__remove(kmaps, map_ref);
@@ -2067,124 +1937,6 @@ out:
 	return ret;
 }
 
-static int map__strcmp(const void *a, const void *b)
-{
-	const struct map *map_a = *(const struct map **)a;
-	const struct map *map_b = *(const struct map **)b;
-	const struct dso *dso_a = map__dso(map_a);
-	const struct dso *dso_b = map__dso(map_b);
-	int ret = strcmp(dso_a->short_name, dso_b->short_name);
-
-	if (ret == 0 && map_a != map_b) {
-		/*
-		 * Ensure distinct but name equal maps have an order in part to
-		 * aid reference counting.
-		 */
-		ret = (int)map__start(map_a) - (int)map__start(map_b);
-		if (ret == 0)
-			ret = (int)((intptr_t)map_a - (intptr_t)map_b);
-	}
-
-	return ret;
-}
-
-static int map__strcmp_name(const void *name, const void *b)
-{
-	const struct dso *dso = map__dso(*(const struct map **)b);
-
-	return strcmp(name, dso->short_name);
-}
-
-void __maps__sort_by_name(struct maps *maps)
-{
-	qsort(maps__maps_by_name(maps), maps__nr_maps(maps), sizeof(struct map *), map__strcmp);
-}
-
-static int map__groups__sort_by_name_from_rbtree(struct maps *maps)
-{
-	struct map_rb_node *rb_node;
-	struct map **maps_by_name = realloc(maps__maps_by_name(maps),
-					    maps__nr_maps(maps) * sizeof(struct map *));
-	int i = 0;
-
-	if (maps_by_name == NULL)
-		return -1;
-
-	up_read(maps__lock(maps));
-	down_write(maps__lock(maps));
-
-	RC_CHK_ACCESS(maps)->maps_by_name = maps_by_name;
-	RC_CHK_ACCESS(maps)->nr_maps_allocated = maps__nr_maps(maps);
-
-	maps__for_each_entry(maps, rb_node)
-		maps_by_name[i++] = map__get(rb_node->map);
-
-	__maps__sort_by_name(maps);
-
-	up_write(maps__lock(maps));
-	down_read(maps__lock(maps));
-
-	return 0;
-}
-
-static struct map *__maps__find_by_name(struct maps *maps, const char *name)
-{
-	struct map **mapp;
-
-	if (maps__maps_by_name(maps) == NULL &&
-	    map__groups__sort_by_name_from_rbtree(maps))
-		return NULL;
-
-	mapp = bsearch(name, maps__maps_by_name(maps), maps__nr_maps(maps),
-		       sizeof(*mapp), map__strcmp_name);
-	if (mapp)
-		return *mapp;
-	return NULL;
-}
-
-struct map *maps__find_by_name(struct maps *maps, const char *name)
-{
-	struct map_rb_node *rb_node;
-	struct map *map;
-
-	down_read(maps__lock(maps));
-
-
-	if (RC_CHK_ACCESS(maps)->last_search_by_name) {
-		const struct dso *dso = map__dso(RC_CHK_ACCESS(maps)->last_search_by_name);
-
-		if (strcmp(dso->short_name, name) == 0) {
-			map = RC_CHK_ACCESS(maps)->last_search_by_name;
-			goto out_unlock;
-		}
-	}
-	/*
-	 * If we have maps->maps_by_name, then the name isn't in the rbtree,
-	 * as maps->maps_by_name mirrors the rbtree when lookups by name are
-	 * made.
-	 */
-	map = __maps__find_by_name(maps, name);
-	if (map || maps__maps_by_name(maps) != NULL)
-		goto out_unlock;
-
-	/* Fallback to traversing the rbtree... */
-	maps__for_each_entry(maps, rb_node) {
-		struct dso *dso;
-
-		map = rb_node->map;
-		dso = map__dso(map);
-		if (strcmp(dso->short_name, name) == 0) {
-			RC_CHK_ACCESS(maps)->last_search_by_name = map;
-			goto out_unlock;
-		}
-	}
-	map = NULL;
-
-out_unlock:
-	up_read(maps__lock(maps));
-	return map;
-}
-
 int dso__load_vmlinux(struct dso *dso, struct map *map,
 		      const char *vmlinux, bool vmlinux_allocated)
 {
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index af87c46b3f89..071837ddce2a 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -189,7 +189,6 @@ void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym,
 void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
 void symbols__fixup_duplicate(struct rb_root_cached *symbols);
 void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms);
-void maps__fixup_end(struct maps *maps);
 
 typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data);
 int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data,
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
index 0b589570d1d0..c114bbceef40 100644
--- a/tools/perf/util/symbol_conf.h
+++ b/tools/perf/util/symbol_conf.h
@@ -42,7 +42,11 @@ struct symbol_conf {
 			inline_name,
 			disable_add2line_warn,
 			buildid_mmap2,
-			guest_code;
+			guest_code,
+			lazy_load_kernel_maps,
+			keep_exited_threads,
+			annotate_data_member,
+			annotate_data_sample;
 	const char	*vmlinux_name,
 			*kallsyms_name,
 			*source_prefix,
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index a0579c7d7b9e..3712186353fb 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -665,18 +665,74 @@ int perf_event__synthesize_cgroups(struct perf_tool *tool __maybe_unused,
 }
 #endif
 
+struct perf_event__synthesize_modules_maps_cb_args {
+	struct perf_tool *tool;
+	perf_event__handler_t process;
+	struct machine *machine;
+	union perf_event *event;
+};
+
+static int perf_event__synthesize_modules_maps_cb(struct map *map, void *data)
+{
+	struct perf_event__synthesize_modules_maps_cb_args *args = data;
+	union perf_event *event = args->event;
+	struct dso *dso;
+	size_t size;
+
+	if (!__map__is_kmodule(map))
+		return 0;
+
+	dso = map__dso(map);
+	if (symbol_conf.buildid_mmap2) {
+		size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64));
+		event->mmap2.header.type = PERF_RECORD_MMAP2;
+		event->mmap2.header.size = (sizeof(event->mmap2) -
+					(sizeof(event->mmap2.filename) - size));
+		memset(event->mmap2.filename + size, 0, args->machine->id_hdr_size);
+		event->mmap2.header.size += args->machine->id_hdr_size;
+		event->mmap2.start = map__start(map);
+		event->mmap2.len   = map__size(map);
+		event->mmap2.pid   = args->machine->pid;
+
+		memcpy(event->mmap2.filename, dso->long_name, dso->long_name_len + 1);
+
+		perf_record_mmap2__read_build_id(&event->mmap2, args->machine, false);
+	} else {
+		size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64));
+		event->mmap.header.type = PERF_RECORD_MMAP;
+		event->mmap.header.size = (sizeof(event->mmap) -
+					(sizeof(event->mmap.filename) - size));
+		memset(event->mmap.filename + size, 0, args->machine->id_hdr_size);
+		event->mmap.header.size += args->machine->id_hdr_size;
+		event->mmap.start = map__start(map);
+		event->mmap.len   = map__size(map);
+		event->mmap.pid   = args->machine->pid;
+
+		memcpy(event->mmap.filename, dso->long_name, dso->long_name_len + 1);
+	}
+
+	if (perf_tool__process_synth_event(args->tool, event, args->machine, args->process) != 0)
+		return -1;
+
+	return 0;
+}
+
 int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process,
 				   struct machine *machine)
 {
-	int rc = 0;
-	struct map_rb_node *pos;
+	int rc;
 	struct maps *maps = machine__kernel_maps(machine);
-	union perf_event *event;
-	size_t size = symbol_conf.buildid_mmap2 ?
-			sizeof(event->mmap2) : sizeof(event->mmap);
+	struct perf_event__synthesize_modules_maps_cb_args args = {
+		.tool = tool,
+		.process = process,
+		.machine = machine,
+	};
+	size_t size = symbol_conf.buildid_mmap2
+		? sizeof(args.event->mmap2)
+		: sizeof(args.event->mmap);
 
-	event = zalloc(size + machine->id_hdr_size);
-	if (event == NULL) {
+	args.event = zalloc(size + machine->id_hdr_size);
+	if (args.event == NULL) {
 		pr_debug("Not enough memory synthesizing mmap event "
 			 "for kernel modules\n");
 		return -1;
@@ -687,53 +743,13 @@ int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t
 	 * __perf_event_mmap
 	 */
 	if (machine__is_host(machine))
-		event->header.misc = PERF_RECORD_MISC_KERNEL;
+		args.event->header.misc = PERF_RECORD_MISC_KERNEL;
 	else
-		event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
-
-	maps__for_each_entry(maps, pos) {
-		struct map *map = pos->map;
-		struct dso *dso;
+		args.event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
 
-		if (!__map__is_kmodule(map))
-			continue;
+	rc = maps__for_each_map(maps, perf_event__synthesize_modules_maps_cb, &args);
 
-		dso = map__dso(map);
-		if (symbol_conf.buildid_mmap2) {
-			size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64));
-			event->mmap2.header.type = PERF_RECORD_MMAP2;
-			event->mmap2.header.size = (sizeof(event->mmap2) -
-						(sizeof(event->mmap2.filename) - size));
-			memset(event->mmap2.filename + size, 0, machine->id_hdr_size);
-			event->mmap2.header.size += machine->id_hdr_size;
-			event->mmap2.start = map__start(map);
-			event->mmap2.len   = map__size(map);
-			event->mmap2.pid   = machine->pid;
-
-			memcpy(event->mmap2.filename, dso->long_name, dso->long_name_len + 1);
-
-			perf_record_mmap2__read_build_id(&event->mmap2, machine, false);
-		} else {
-			size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64));
-			event->mmap.header.type = PERF_RECORD_MMAP;
-			event->mmap.header.size = (sizeof(event->mmap) -
-						(sizeof(event->mmap.filename) - size));
-			memset(event->mmap.filename + size, 0, machine->id_hdr_size);
-			event->mmap.header.size += machine->id_hdr_size;
-			event->mmap.start = map__start(map);
-			event->mmap.len   = map__size(map);
-			event->mmap.pid   = machine->pid;
-
-			memcpy(event->mmap.filename, dso->long_name, dso->long_name_len + 1);
-		}
-
-		if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
-			rc = -1;
-			break;
-		}
-	}
-
-	free(event);
+	free(args.event);
 	return rc;
 }
 
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index fe5e6991ae4b..89c47a5098e2 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -345,38 +345,36 @@ int thread__insert_map(struct thread *thread, struct map *map)
 	if (ret)
 		return ret;
 
-	maps__fixup_overlappings(thread__maps(thread), map, stderr);
-	return maps__insert(thread__maps(thread), map);
+	return maps__fixup_overlap_and_insert(thread__maps(thread), map);
 }
 
-static int __thread__prepare_access(struct thread *thread)
+struct thread__prepare_access_maps_cb_args {
+	int err;
+	struct maps *maps;
+};
+
+static int thread__prepare_access_maps_cb(struct map *map, void *data)
 {
 	bool initialized = false;
-	int err = 0;
-	struct maps *maps = thread__maps(thread);
-	struct map_rb_node *rb_node;
-
-	down_read(maps__lock(maps));
-
-	maps__for_each_entry(maps, rb_node) {
-		err = unwind__prepare_access(thread__maps(thread), rb_node->map, &initialized);
-		if (err || initialized)
-			break;
-	}
+	struct thread__prepare_access_maps_cb_args *args = data;
 
-	up_read(maps__lock(maps));
+	args->err = unwind__prepare_access(args->maps, map, &initialized);
 
-	return err;
+	return (args->err || initialized) ? 1 : 0;
 }
 
 static int thread__prepare_access(struct thread *thread)
 {
-	int err = 0;
+	struct thread__prepare_access_maps_cb_args args = {
+		.err = 0,
+	};
 
-	if (dwarf_callchain_users)
-		err = __thread__prepare_access(thread);
+	if (dwarf_callchain_users) {
+		args.maps = thread__maps(thread);
+		maps__for_each_map(thread__maps(thread), thread__prepare_access_maps_cb, &args);
+	}
 
-	return err;
+	return args.err;
 }
 
 static int thread__clone_maps(struct thread *thread, struct thread *parent, bool do_maps_clone)
@@ -385,14 +383,14 @@ static int thread__clone_maps(struct thread *thread, struct thread *parent, bool
 	if (thread__pid(thread) == thread__pid(parent))
 		return thread__prepare_access(thread);
 
-	if (thread__maps(thread) == thread__maps(parent)) {
+	if (RC_CHK_EQUAL(thread__maps(thread), thread__maps(parent))) {
 		pr_debug("broken map groups on thread %d/%d parent %d/%d\n",
 			 thread__pid(thread), thread__tid(thread),
 			 thread__pid(parent), thread__tid(parent));
 		return 0;
 	}
 	/* But this one is new process, copy maps. */
-	return do_maps_clone ? maps__clone(thread, thread__maps(parent)) : 0;
+	return do_maps_clone ? maps__copy_from(thread__maps(thread), thread__maps(parent)) : 0;
 }
 
 int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone)
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index e79225a0ea46..0df775b5c110 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -36,13 +36,22 @@ struct thread_rb_node {
 };
 
 DECLARE_RC_STRUCT(thread) {
+	/** @maps: mmaps associated with this thread. */
 	struct maps		*maps;
 	pid_t			pid_; /* Not all tools update this */
+	/** @tid: thread ID number unique to a machine. */
 	pid_t			tid;
+	/** @ppid: parent process of the process this thread belongs to. */
 	pid_t			ppid;
 	int			cpu;
 	int			guest_cpu; /* For QEMU thread */
 	refcount_t		refcnt;
+	/**
+	 * @exited: Has the thread had an exit event. Such threads are usually
+	 * removed from the machine's threads but some events/tools require
+	 * access to dead threads.
+	 */
+	bool			exited;
 	bool			comm_set;
 	int			comm_len;
 	struct list_head	namespaces_list;
@@ -189,6 +198,11 @@ static inline refcount_t *thread__refcnt(struct thread *thread)
 	return &RC_CHK_ACCESS(thread)->refcnt;
 }
 
+static inline void thread__set_exited(struct thread *thread, bool exited)
+{
+	RC_CHK_ACCESS(thread)->exited = exited;
+}
+
 static inline bool thread__comm_set(const struct thread *thread)
 {
 	return RC_CHK_ACCESS(thread)->comm_set;
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
index be7157de0451..4db3d1bd686c 100644
--- a/tools/perf/util/top.c
+++ b/tools/perf/util/top.c
@@ -28,6 +28,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
 	struct record_opts *opts = &top->record_opts;
 	struct target *target = &opts->target;
 	size_t ret = 0;
+	int nr_cpus;
 
 	if (top->samples) {
 		samples_per_sec = top->samples / top->delay_secs;
@@ -93,19 +94,17 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
 	else
 		ret += SNPRINTF(bf + ret, size - ret, " (all");
 
+	nr_cpus = perf_cpu_map__nr(top->evlist->core.user_requested_cpus);
 	if (target->cpu_list)
 		ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)",
-				perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1
-				? "s" : "",
+				nr_cpus > 1 ? "s" : "",
 				target->cpu_list);
 	else {
 		if (target->tid)
 			ret += SNPRINTF(bf + ret, size - ret, ")");
 		else
 			ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)",
-					perf_cpu_map__nr(top->evlist->core.user_requested_cpus),
-					perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1
-					? "s" : "");
+					nr_cpus, nr_cpus > 1 ? "s" : "");
 	}
 
 	perf_top__reset_sample_counters(top);
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index a8b0d79bd96c..4c5588dbb131 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -21,7 +21,6 @@ struct perf_top {
 	struct perf_tool   tool;
 	struct evlist *evlist, *sb_evlist;
 	struct record_opts record_opts;
-	struct annotation_options annotation_opts;
 	struct evswitch	   evswitch;
 	/*
 	 * Symbols will be added here in perf_event__process_sample and will
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index 8554db3fc0d7..6013335a8dae 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -46,6 +46,7 @@ static int __report_module(struct addr_location *al, u64 ip,
 {
 	Dwfl_Module *mod;
 	struct dso *dso = NULL;
+	Dwarf_Addr base;
 	/*
 	 * Some callers will use al->sym, so we can't just use the
 	 * cheaper thread__find_map() here.
@@ -58,13 +59,25 @@ static int __report_module(struct addr_location *al, u64 ip,
 	if (!dso)
 		return 0;
 
+	/*
+	 * The generated JIT DSO files only map the code segment without
+	 * ELF headers.  Since JIT codes used to be packed in a memory
+	 * segment, calculating the base address using pgoff falls into
+	 * a different code in another DSO.  So just use the map->start
+	 * directly to pick the correct one.
+	 */
+	if (!strncmp(dso->long_name, "/tmp/jitted-", 12))
+		base = map__start(al->map);
+	else
+		base = map__start(al->map) - map__pgoff(al->map);
+
 	mod = dwfl_addrmodule(ui->dwfl, ip);
 	if (mod) {
 		Dwarf_Addr s;
 
 		dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL);
-		if (s != map__start(al->map) - map__pgoff(al->map))
-			mod = 0;
+		if (s != base)
+			mod = NULL;
 	}
 
 	if (!mod) {
@@ -72,14 +85,14 @@ static int __report_module(struct addr_location *al, u64 ip,
 
 		__symbol__join_symfs(filename, sizeof(filename), dso->long_name);
 		mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1,
-				      map__start(al->map) - map__pgoff(al->map), false);
+				      base, false);
 	}
 	if (!mod) {
 		char filename[PATH_MAX];
 
 		if (dso__build_id_filename(dso, filename, sizeof(filename), false))
 			mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1,
-					      map__start(al->map) - map__pgoff(al->map), false);
+					      base, false);
 	}
 
 	if (mod) {
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index c0641882fd2f..dac536e28360 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -302,12 +302,31 @@ static int unwind_spec_ehframe(struct dso *dso, struct machine *machine,
 	return 0;
 }
 
+struct read_unwind_spec_eh_frame_maps_cb_args {
+	struct dso *dso;
+	u64 base_addr;
+};
+
+static int read_unwind_spec_eh_frame_maps_cb(struct map *map, void *data)
+{
+
+	struct read_unwind_spec_eh_frame_maps_cb_args *args = data;
+
+	if (map__dso(map) == args->dso && map__start(map) - map__pgoff(map) < args->base_addr)
+		args->base_addr = map__start(map) - map__pgoff(map);
+
+	return 0;
+}
+
+
 static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui,
 				     u64 *table_data, u64 *segbase,
 				     u64 *fde_count)
 {
-	struct map_rb_node *map_node;
-	u64 base_addr = UINT64_MAX;
+	struct read_unwind_spec_eh_frame_maps_cb_args args = {
+		.dso = dso,
+		.base_addr = UINT64_MAX,
+	};
 	int ret, fd;
 
 	if (dso->data.eh_frame_hdr_offset == 0) {
@@ -325,16 +344,11 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui,
 			return -EINVAL;
 	}
 
-	maps__for_each_entry(thread__maps(ui->thread), map_node) {
-		struct map *map = map_node->map;
-		u64 start = map__start(map);
+	maps__for_each_map(thread__maps(ui->thread), read_unwind_spec_eh_frame_maps_cb, &args);
 
-		if (map__dso(map) == dso && start < base_addr)
-			base_addr = start;
-	}
-	base_addr -= dso->data.elf_base_addr;
+	args.base_addr -= dso->data.elf_base_addr;
 	/* Address of .eh_frame_hdr */
-	*segbase = base_addr + dso->data.eh_frame_hdr_addr;
+	*segbase = args.base_addr + dso->data.eh_frame_hdr_addr;
 	ret = unwind_spec_ehframe(dso, ui->machine, dso->data.eh_frame_hdr_offset,
 				   table_data, fde_count);
 	if (ret)
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
index ae3eee69b659..df8963796187 100644
--- a/tools/perf/util/vdso.c
+++ b/tools/perf/util/vdso.c
@@ -140,23 +140,34 @@ static struct dso *__machine__addnew_vdso(struct machine *machine, const char *s
 	return dso;
 }
 
+struct machine__thread_dso_type_maps_cb_args {
+	struct machine *machine;
+	enum dso_type dso_type;
+};
+
+static int machine__thread_dso_type_maps_cb(struct map *map, void *data)
+{
+	struct machine__thread_dso_type_maps_cb_args *args = data;
+	struct dso *dso = map__dso(map);
+
+	if (!dso || dso->long_name[0] != '/')
+		return 0;
+
+	args->dso_type = dso__type(dso, args->machine);
+	return (args->dso_type != DSO__TYPE_UNKNOWN) ? 1 : 0;
+}
+
 static enum dso_type machine__thread_dso_type(struct machine *machine,
 					      struct thread *thread)
 {
-	enum dso_type dso_type = DSO__TYPE_UNKNOWN;
-	struct map_rb_node *rb_node;
-
-	maps__for_each_entry(thread__maps(thread), rb_node) {
-		struct dso *dso = map__dso(rb_node->map);
+	struct machine__thread_dso_type_maps_cb_args args = {
+		.machine = machine,
+		.dso_type = DSO__TYPE_UNKNOWN,
+	};
 
-		if (!dso || dso->long_name[0] != '/')
-			continue;
-		dso_type = dso__type(dso, machine);
-		if (dso_type != DSO__TYPE_UNKNOWN)
-			break;
-	}
+	maps__for_each_map(thread__maps(thread), machine__thread_dso_type_maps_cb, &args);
 
-	return dso_type;
+	return args.dso_type;
 }
 
 #if BITS_PER_LONG == 64
diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c
index 48dd2b018c47..57027e0ac7b6 100644
--- a/tools/perf/util/zstd.c
+++ b/tools/perf/util/zstd.c
@@ -7,35 +7,9 @@
 
 int zstd_init(struct zstd_data *data, int level)
 {
-	size_t ret;
-
-	data->dstream = ZSTD_createDStream();
-	if (data->dstream == NULL) {
-		pr_err("Couldn't create decompression stream.\n");
-		return -1;
-	}
-
-	ret = ZSTD_initDStream(data->dstream);
-	if (ZSTD_isError(ret)) {
-		pr_err("Failed to initialize decompression stream: %s\n", ZSTD_getErrorName(ret));
-		return -1;
-	}
-
-	if (!level)
-		return 0;
-
-	data->cstream = ZSTD_createCStream();
-	if (data->cstream == NULL) {
-		pr_err("Couldn't create compression stream.\n");
-		return -1;
-	}
-
-	ret = ZSTD_initCStream(data->cstream, level);
-	if (ZSTD_isError(ret)) {
-		pr_err("Failed to initialize compression stream: %s\n", ZSTD_getErrorName(ret));
-		return -1;
-	}
-
+	data->comp_level = level;
+	data->dstream = NULL;
+	data->cstream = NULL;
 	return 0;
 }
 
@@ -54,7 +28,7 @@ int zstd_fini(struct zstd_data *data)
 	return 0;
 }
 
-size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size,
+ssize_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size,
 				       void *src, size_t src_size, size_t max_record_size,
 				       size_t process_header(void *record, size_t increment))
 {
@@ -63,6 +37,21 @@ size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t
 	ZSTD_outBuffer output;
 	void *record;
 
+	if (!data->cstream) {
+		data->cstream = ZSTD_createCStream();
+		if (data->cstream == NULL) {
+			pr_err("Couldn't create compression stream.\n");
+			return -1;
+		}
+
+		ret = ZSTD_initCStream(data->cstream, data->comp_level);
+		if (ZSTD_isError(ret)) {
+			pr_err("Failed to initialize compression stream: %s\n",
+				ZSTD_getErrorName(ret));
+			return -1;
+		}
+	}
+
 	while (input.pos < input.size) {
 		record = dst;
 		size = process_header(record, 0);
@@ -96,6 +85,20 @@ size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size
 	ZSTD_inBuffer input = { src, src_size, 0 };
 	ZSTD_outBuffer output = { dst, dst_size, 0 };
 
+	if (!data->dstream) {
+		data->dstream = ZSTD_createDStream();
+		if (data->dstream == NULL) {
+			pr_err("Couldn't create decompression stream.\n");
+			return 0;
+		}
+
+		ret = ZSTD_initDStream(data->dstream);
+		if (ZSTD_isError(ret)) {
+			pr_err("Failed to initialize decompression stream: %s\n",
+				ZSTD_getErrorName(ret));
+			return 0;
+		}
+	}
 	while (input.pos < input.size) {
 		ret = ZSTD_decompressStream(data->dstream, &output, &input);
 		if (ZSTD_isError(ret)) {
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
index c54d1697f439..d508486cc0bd 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
@@ -162,7 +162,7 @@ prio_arp()
 	local mode=$1
 
 	for primary_reselect in 0 1 2; do
-		prio_test "mode active-backup arp_interval 100 arp_ip_target ${g_ip4} primary eth1 primary_reselect $primary_reselect"
+		prio_test "mode $mode arp_interval 100 arp_ip_target ${g_ip4} primary eth1 primary_reselect $primary_reselect"
 		log_test "prio" "$mode arp_ip_target primary_reselect $primary_reselect"
 	done
 }
@@ -178,7 +178,7 @@ prio_ns()
 	fi
 
 	for primary_reselect in 0 1 2; do
-		prio_test "mode active-backup arp_interval 100 ns_ip6_target ${g_ip6} primary eth1 primary_reselect $primary_reselect"
+		prio_test "mode $mode arp_interval 100 ns_ip6_target ${g_ip6} primary eth1 primary_reselect $primary_reselect"
 		log_test "prio" "$mode ns_ip6_target primary_reselect $primary_reselect"
 	done
 }
@@ -194,9 +194,9 @@ prio()
 
 	for mode in $modes; do
 		prio_miimon $mode
-		prio_arp $mode
-		prio_ns $mode
 	done
+	prio_arp "active-backup"
+	prio_ns "active-backup"
 }
 
 arp_validate_test()
diff --git a/tools/testing/selftests/drivers/net/bonding/settings b/tools/testing/selftests/drivers/net/bonding/settings
index 6091b45d226b..79b65bdf05db 100644
--- a/tools/testing/selftests/drivers/net/bonding/settings
+++ b/tools/testing/selftests/drivers/net/bonding/settings
@@ -1 +1 @@
-timeout=120
+timeout=1200
diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
index 4855ef597a15..f98435c502f6 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
@@ -270,6 +270,7 @@ for port in 0 1; do
 	echo 1 > $NSIM_DEV_SYS/new_port
     fi
     NSIM_NETDEV=`get_netdev_name old_netdevs`
+    ifconfig $NSIM_NETDEV up
 
     msg="new NIC device created"
     exp0=( 0 0 0 0 )
@@ -431,6 +432,7 @@ for port in 0 1; do
     fi
 
     echo $port > $NSIM_DEV_SYS/new_port
+    NSIM_NETDEV=`get_netdev_name old_netdevs`
     ifconfig $NSIM_NETDEV up
 
     overflow_table0 "overflow NIC table"
@@ -488,6 +490,7 @@ for port in 0 1; do
     fi
 
     echo $port > $NSIM_DEV_SYS/new_port
+    NSIM_NETDEV=`get_netdev_name old_netdevs`
     ifconfig $NSIM_NETDEV up
 
     overflow_table0 "overflow NIC table"
@@ -544,6 +547,7 @@ for port in 0 1; do
     fi
 
     echo $port > $NSIM_DEV_SYS/new_port
+    NSIM_NETDEV=`get_netdev_name old_netdevs`
     ifconfig $NSIM_NETDEV up
 
     overflow_table0 "destroy NIC"
@@ -573,6 +577,7 @@ for port in 0 1; do
     fi
 
     echo $port > $NSIM_DEV_SYS/new_port
+    NSIM_NETDEV=`get_netdev_name old_netdevs`
     ifconfig $NSIM_NETDEV up
 
     msg="create VxLANs v6"
@@ -633,6 +638,7 @@ for port in 0 1; do
     fi
 
     echo $port > $NSIM_DEV_SYS/new_port
+    NSIM_NETDEV=`get_netdev_name old_netdevs`
     ifconfig $NSIM_NETDEV up
 
     echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error
@@ -688,6 +694,7 @@ for port in 0 1; do
     fi
 
     echo $port > $NSIM_DEV_SYS/new_port
+    NSIM_NETDEV=`get_netdev_name old_netdevs`
     ifconfig $NSIM_NETDEV up
 
     msg="create VxLANs v6"
@@ -747,6 +754,7 @@ for port in 0 1; do
     fi
 
     echo $port > $NSIM_DEV_SYS/new_port
+    NSIM_NETDEV=`get_netdev_name old_netdevs`
     ifconfig $NSIM_NETDEV up
 
     msg="create VxLANs v6"
@@ -877,6 +885,7 @@ msg="re-add a port"
 
 echo 2 > $NSIM_DEV_SYS/del_port
 echo 2 > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV=`get_netdev_name old_netdevs`
 check_tables
 
 msg="replace VxLAN in overflow table"
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 8da562a9ae87..19ff75051660 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -1,5 +1,6 @@
 CONFIG_USER_NS=y
 CONFIG_NET_NS=y
+CONFIG_BONDING=m
 CONFIG_BPF_SYSCALL=y
 CONFIG_TEST_BPF=m
 CONFIG_NUMA=y
@@ -14,9 +15,13 @@ CONFIG_VETH=y
 CONFIG_NET_IPVTI=y
 CONFIG_IPV6_VTI=y
 CONFIG_DUMMY=y
+CONFIG_BRIDGE_VLAN_FILTERING=y
 CONFIG_BRIDGE=y
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_VLAN_8021Q=y
 CONFIG_IFB=y
+CONFIG_INET_DIAG=y
+CONFIG_IP_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NETFILTER_ADVANCED=y
 CONFIG_NF_CONNTRACK=m
@@ -25,15 +30,36 @@ CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP6_NF_NAT=m
 CONFIG_IP_NF_NAT=m
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_L2TP_ETH=m
+CONFIG_L2TP_IP=m
+CONFIG_L2TP=m
+CONFIG_L2TP_V3=y
+CONFIG_MACSEC=m
+CONFIG_MACVLAN=y
+CONFIG_MACVTAP=y
+CONFIG_MPLS=y
+CONFIG_MPTCP=y
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_IPV6=y
 CONFIG_NF_TABLES_IPV4=y
 CONFIG_NFT_NAT=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_U32=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_SCH_FQ_CODEL=m
+CONFIG_NET_SCH_HTB=m
 CONFIG_NET_SCH_FQ=m
 CONFIG_NET_SCH_ETF=m
 CONFIG_NET_SCH_NETEM=y
+CONFIG_PSAMPLE=m
+CONFIG_TCP_MD5SIG=y
 CONFIG_TEST_BLACKHOLE_DEV=m
 CONFIG_KALLSYMS=y
+CONFIG_TLS=m
 CONFIG_TRACEPOINTS=y
 CONFIG_NET_DROP_MONITOR=m
 CONFIG_NETDEVSIM=m
@@ -48,7 +74,9 @@ CONFIG_BAREUDP=m
 CONFIG_IPV6_IOAM6_LWTUNNEL=y
 CONFIG_CRYPTO_SM4_GENERIC=y
 CONFIG_AMT=m
+CONFIG_TUN=y
 CONFIG_VXLAN=m
 CONFIG_IP_SCTP=m
 CONFIG_NETFILTER_XT_MATCH_POLICY=m
 CONFIG_CRYPTO_ARIA=y
+CONFIG_XFRM_INTERFACE=m
diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh
index a26c5624429f..4287a8529890 100755
--- a/tools/testing/selftests/net/rps_default_mask.sh
+++ b/tools/testing/selftests/net/rps_default_mask.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
 readonly ksft_skip=4
@@ -33,6 +33,10 @@ chk_rps() {
 
 	rps_mask=$($cmd /sys/class/net/$dev_name/queues/rx-0/rps_cpus)
 	printf "%-60s" "$msg"
+
+	# In case there is more than 32 CPUs we need to remove commas from masks
+	rps_mask=${rps_mask//,}
+	expected_rps_mask=${expected_rps_mask//,}
 	if [ $rps_mask -eq $expected_rps_mask ]; then
 		echo "[ ok ]"
 	else
diff --git a/tools/testing/selftests/net/so_incoming_cpu.c b/tools/testing/selftests/net/so_incoming_cpu.c
index a14818164102..e9fa14e10732 100644
--- a/tools/testing/selftests/net/so_incoming_cpu.c
+++ b/tools/testing/selftests/net/so_incoming_cpu.c
@@ -3,19 +3,16 @@
 #define _GNU_SOURCE
 #include <sched.h>
 
+#include <fcntl.h>
+
 #include <netinet/in.h>
 #include <sys/socket.h>
 #include <sys/sysinfo.h>
 
 #include "../kselftest_harness.h"
 
-#define CLIENT_PER_SERVER	32 /* More sockets, more reliable */
-#define NR_SERVER		self->nproc
-#define NR_CLIENT		(CLIENT_PER_SERVER * NR_SERVER)
-
 FIXTURE(so_incoming_cpu)
 {
-	int nproc;
 	int *servers;
 	union {
 		struct sockaddr addr;
@@ -56,12 +53,47 @@ FIXTURE_VARIANT_ADD(so_incoming_cpu, after_all_listen)
 	.when_to_set = AFTER_ALL_LISTEN,
 };
 
+static void write_sysctl(struct __test_metadata *_metadata,
+			 char *filename, char *string)
+{
+	int fd, len, ret;
+
+	fd = open(filename, O_WRONLY);
+	ASSERT_NE(fd, -1);
+
+	len = strlen(string);
+	ret = write(fd, string, len);
+	ASSERT_EQ(ret, len);
+}
+
+static void setup_netns(struct __test_metadata *_metadata)
+{
+	ASSERT_EQ(unshare(CLONE_NEWNET), 0);
+	ASSERT_EQ(system("ip link set lo up"), 0);
+
+	write_sysctl(_metadata, "/proc/sys/net/ipv4/ip_local_port_range", "10000 60001");
+	write_sysctl(_metadata, "/proc/sys/net/ipv4/tcp_tw_reuse", "0");
+}
+
+#define NR_PORT				(60001 - 10000 - 1)
+#define NR_CLIENT_PER_SERVER_DEFAULT	32
+static int nr_client_per_server, nr_server, nr_client;
+
 FIXTURE_SETUP(so_incoming_cpu)
 {
-	self->nproc = get_nprocs();
-	ASSERT_LE(2, self->nproc);
+	setup_netns(_metadata);
+
+	nr_server = get_nprocs();
+	ASSERT_LE(2, nr_server);
+
+	if (NR_CLIENT_PER_SERVER_DEFAULT * nr_server < NR_PORT)
+		nr_client_per_server = NR_CLIENT_PER_SERVER_DEFAULT;
+	else
+		nr_client_per_server = NR_PORT / nr_server;
+
+	nr_client = nr_client_per_server * nr_server;
 
-	self->servers = malloc(sizeof(int) * NR_SERVER);
+	self->servers = malloc(sizeof(int) * nr_server);
 	ASSERT_NE(self->servers, NULL);
 
 	self->in_addr.sin_family = AF_INET;
@@ -74,7 +106,7 @@ FIXTURE_TEARDOWN(so_incoming_cpu)
 {
 	int i;
 
-	for (i = 0; i < NR_SERVER; i++)
+	for (i = 0; i < nr_server; i++)
 		close(self->servers[i]);
 
 	free(self->servers);
@@ -110,10 +142,10 @@ int create_server(struct __test_metadata *_metadata,
 	if (variant->when_to_set == BEFORE_LISTEN)
 		set_so_incoming_cpu(_metadata, fd, cpu);
 
-	/* We don't use CLIENT_PER_SERVER here not to block
+	/* We don't use nr_client_per_server here not to block
 	 * this test at connect() if SO_INCOMING_CPU is broken.
 	 */
-	ret = listen(fd, NR_CLIENT);
+	ret = listen(fd, nr_client);
 	ASSERT_EQ(ret, 0);
 
 	if (variant->when_to_set == AFTER_LISTEN)
@@ -128,7 +160,7 @@ void create_servers(struct __test_metadata *_metadata,
 {
 	int i, ret;
 
-	for (i = 0; i < NR_SERVER; i++) {
+	for (i = 0; i < nr_server; i++) {
 		self->servers[i] = create_server(_metadata, self, variant, i);
 
 		if (i == 0) {
@@ -138,7 +170,7 @@ void create_servers(struct __test_metadata *_metadata,
 	}
 
 	if (variant->when_to_set == AFTER_ALL_LISTEN) {
-		for (i = 0; i < NR_SERVER; i++)
+		for (i = 0; i < nr_server; i++)
 			set_so_incoming_cpu(_metadata, self->servers[i], i);
 	}
 }
@@ -149,7 +181,7 @@ void create_clients(struct __test_metadata *_metadata,
 	cpu_set_t cpu_set;
 	int i, j, fd, ret;
 
-	for (i = 0; i < NR_SERVER; i++) {
+	for (i = 0; i < nr_server; i++) {
 		CPU_ZERO(&cpu_set);
 
 		CPU_SET(i, &cpu_set);
@@ -162,7 +194,7 @@ void create_clients(struct __test_metadata *_metadata,
 		ret = sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
 		ASSERT_EQ(ret, 0);
 
-		for (j = 0; j < CLIENT_PER_SERVER; j++) {
+		for (j = 0; j < nr_client_per_server; j++) {
 			fd  = socket(AF_INET, SOCK_STREAM, 0);
 			ASSERT_NE(fd, -1);
 
@@ -180,8 +212,8 @@ void verify_incoming_cpu(struct __test_metadata *_metadata,
 	int i, j, fd, cpu, ret, total = 0;
 	socklen_t len = sizeof(int);
 
-	for (i = 0; i < NR_SERVER; i++) {
-		for (j = 0; j < CLIENT_PER_SERVER; j++) {
+	for (i = 0; i < nr_server; i++) {
+		for (j = 0; j < nr_client_per_server; j++) {
 			/* If we see -EAGAIN here, SO_INCOMING_CPU is broken */
 			fd = accept(self->servers[i], &self->addr, &self->addrlen);
 			ASSERT_NE(fd, -1);
@@ -195,7 +227,7 @@ void verify_incoming_cpu(struct __test_metadata *_metadata,
 		}
 	}
 
-	ASSERT_EQ(total, NR_CLIENT);
+	ASSERT_EQ(total, nr_client);
 	TH_LOG("SO_INCOMING_CPU is very likely to be "
 	       "working correctly with %d sockets.", total);
 }
diff --git a/tools/testing/selftests/riscv/hwprobe/cbo.c b/tools/testing/selftests/riscv/hwprobe/cbo.c
index 50a2cc8aef38..c537d52fafc5 100644
--- a/tools/testing/selftests/riscv/hwprobe/cbo.c
+++ b/tools/testing/selftests/riscv/hwprobe/cbo.c
@@ -36,16 +36,14 @@ static void sigill_handler(int sig, siginfo_t *info, void *context)
 	regs[0] += 4;
 }
 
-static void cbo_insn(char *base, int fn)
-{
-	uint32_t insn = MK_CBO(fn);
-
-	asm volatile(
-	"mv	a0, %0\n"
-	"li	a1, %1\n"
-	".4byte	%2\n"
-	: : "r" (base), "i" (fn), "i" (insn) : "a0", "a1", "memory");
-}
+#define cbo_insn(base, fn)							\
+({										\
+	asm volatile(								\
+	"mv	a0, %0\n"							\
+	"li	a1, %1\n"							\
+	".4byte	%2\n"								\
+	: : "r" (base), "i" (fn), "i" (MK_CBO(fn)) : "a0", "a1", "memory");	\
+})
 
 static void cbo_inval(char *base) { cbo_insn(base, 0); }
 static void cbo_clean(char *base) { cbo_insn(base, 1); }
@@ -97,7 +95,7 @@ static void test_zicboz(void *arg)
 	block_size = pair.value;
 	ksft_test_result(rc == 0 && pair.key == RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE &&
 			 is_power_of_2(block_size), "Zicboz block size\n");
-	ksft_print_msg("Zicboz block size: %ld\n", block_size);
+	ksft_print_msg("Zicboz block size: %llu\n", block_size);
 
 	illegal_insn = false;
 	cbo_zero(&mem[block_size]);
@@ -121,7 +119,7 @@ static void test_zicboz(void *arg)
 		for (j = 0; j < block_size; ++j) {
 			if (mem[i * block_size + j] != expected) {
 				ksft_test_result_fail("cbo.zero check\n");
-				ksft_print_msg("cbo.zero check: mem[%d] != 0x%x\n",
+				ksft_print_msg("cbo.zero check: mem[%llu] != 0x%x\n",
 					       i * block_size + j, expected);
 				return;
 			}
@@ -201,7 +199,7 @@ int main(int argc, char **argv)
 	pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
 	rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&cpus, 0);
 	if (rc < 0)
-		ksft_exit_fail_msg("hwprobe() failed with %d\n", rc);
+		ksft_exit_fail_msg("hwprobe() failed with %ld\n", rc);
 	assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0);
 
 	if (pair.value & RISCV_HWPROBE_EXT_ZICBOZ) {
diff --git a/tools/testing/selftests/riscv/hwprobe/hwprobe.c b/tools/testing/selftests/riscv/hwprobe/hwprobe.c
index d53e0889b59e..fd73c87804f3 100644
--- a/tools/testing/selftests/riscv/hwprobe/hwprobe.c
+++ b/tools/testing/selftests/riscv/hwprobe/hwprobe.c
@@ -29,7 +29,7 @@ int main(int argc, char **argv)
 		/* Fail if the kernel claims not to recognize a base key. */
 		if ((i < 4) && (pairs[i].key != i))
 			ksft_exit_fail_msg("Failed to recognize base key: key != i, "
-					   "key=%ld, i=%ld\n", pairs[i].key, i);
+					   "key=%lld, i=%ld\n", pairs[i].key, i);
 
 		if (pairs[i].key != RISCV_HWPROBE_KEY_BASE_BEHAVIOR)
 			continue;
@@ -37,7 +37,7 @@ int main(int argc, char **argv)
 		if (pairs[i].value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA)
 			continue;
 
-		ksft_exit_fail_msg("Unexpected pair: (%ld, %ld)\n", pairs[i].key, pairs[i].value);
+		ksft_exit_fail_msg("Unexpected pair: (%lld, %llu)\n", pairs[i].key, pairs[i].value);
 	}
 
 	out = riscv_hwprobe(pairs, 8, 0, 0, 0);
diff --git a/tools/testing/selftests/riscv/mm/mmap_test.h b/tools/testing/selftests/riscv/mm/mmap_test.h
index 9b8434f62f57..2e0db9c5be6c 100644
--- a/tools/testing/selftests/riscv/mm/mmap_test.h
+++ b/tools/testing/selftests/riscv/mm/mmap_test.h
@@ -18,6 +18,8 @@ struct addresses {
 	int *on_56_addr;
 };
 
+// Only works on 64 bit
+#if __riscv_xlen == 64
 static inline void do_mmaps(struct addresses *mmap_addresses)
 {
 	/*
@@ -50,6 +52,7 @@ static inline void do_mmaps(struct addresses *mmap_addresses)
 	mmap_addresses->on_56_addr =
 		mmap(on_56_bits, 5 * sizeof(int), prot, flags, 0, 0);
 }
+#endif /* __riscv_xlen == 64 */
 
 static inline int memory_layout(void)
 {
diff --git a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c
index 66764edb0d52..1dd94197da30 100644
--- a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c
+++ b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c
@@ -27,7 +27,7 @@ int main(void)
 
 	datap = malloc(MAX_VSIZE);
 	if (!datap) {
-		ksft_test_result_fail("fail to allocate memory for size = %lu\n", MAX_VSIZE);
+		ksft_test_result_fail("fail to allocate memory for size = %d\n", MAX_VSIZE);
 		exit(-1);
 	}
 
diff --git a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
index 2c0d2b1126c1..1f9969bed235 100644
--- a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
+++ b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
@@ -1,4 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/wait.h>
+
 #define THIS_PROGRAM "./vstate_exec_nolibc"
 
 int main(int argc, char **argv)
diff --git a/tools/testing/selftests/riscv/vector/vstate_prctl.c b/tools/testing/selftests/riscv/vector/vstate_prctl.c
index 8dcd399ef7fc..27668fb3b6d0 100644
--- a/tools/testing/selftests/riscv/vector/vstate_prctl.c
+++ b/tools/testing/selftests/riscv/vector/vstate_prctl.c
@@ -60,7 +60,7 @@ int test_and_compare_child(long provided, long expected, int inherit)
 	}
 	rc = launch_test(inherit);
 	if (rc != expected) {
-		ksft_test_result_fail("Test failed, check %d != %d\n", rc,
+		ksft_test_result_fail("Test failed, check %d != %ld\n", rc,
 				      expected);
 		return -2;
 	}
@@ -79,7 +79,7 @@ int main(void)
 	pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
 	rc = riscv_hwprobe(&pair, 1, 0, NULL, 0);
 	if (rc < 0) {
-		ksft_test_result_fail("hwprobe() failed with %d\n", rc);
+		ksft_test_result_fail("hwprobe() failed with %ld\n", rc);
 		return -1;
 	}
 
diff --git a/usr/gen_init_cpio.c b/usr/gen_init_cpio.c
index 61230532fef1..edcdb8abfa31 100644
--- a/usr/gen_init_cpio.c
+++ b/usr/gen_init_cpio.c
@@ -27,6 +27,7 @@
 static unsigned int offset;
 static unsigned int ino = 721;
 static time_t default_mtime;
+static bool do_file_mtime;
 static bool do_csum = false;
 
 struct file_handler {
@@ -329,6 +330,7 @@ static int cpio_mkfile(const char *name, const char *location,
 	int file;
 	int retval;
 	int rc = -1;
+	time_t mtime;
 	int namesize;
 	unsigned int i;
 	uint32_t csum = 0;
@@ -347,16 +349,21 @@ static int cpio_mkfile(const char *name, const char *location,
 		goto error;
 	}
 
-	if (buf.st_mtime > 0xffffffff) {
-		fprintf(stderr, "%s: Timestamp exceeds maximum cpio timestamp, clipping.\n",
-			location);
-		buf.st_mtime = 0xffffffff;
-	}
+	if (do_file_mtime) {
+		mtime = default_mtime;
+	} else {
+		mtime = buf.st_mtime;
+		if (mtime > 0xffffffff) {
+			fprintf(stderr, "%s: Timestamp exceeds maximum cpio timestamp, clipping.\n",
+					location);
+			mtime = 0xffffffff;
+		}
 
-	if (buf.st_mtime < 0) {
-		fprintf(stderr, "%s: Timestamp negative, clipping.\n",
-			location);
-		buf.st_mtime = 0;
+		if (mtime < 0) {
+			fprintf(stderr, "%s: Timestamp negative, clipping.\n",
+					location);
+			mtime = 0;
+		}
 	}
 
 	if (buf.st_size > 0xffffffff) {
@@ -387,7 +394,7 @@ static int cpio_mkfile(const char *name, const char *location,
 			(long) uid,		/* uid */
 			(long) gid,		/* gid */
 			nlinks,			/* nlink */
-			(long) buf.st_mtime,	/* mtime */
+			(long) mtime,		/* mtime */
 			size,			/* filesize */
 			3,			/* major */
 			1,			/* minor */
@@ -536,8 +543,9 @@ static void usage(const char *prog)
 		"file /sbin/kinit /usr/src/klibc/kinit/kinit 0755 0 0\n"
 		"\n"
 		"<timestamp> is time in seconds since Epoch that will be used\n"
-		"as mtime for symlinks, special files and directories. The default\n"
-		"is to use the current time for these entries.\n"
+		"as mtime for symlinks, directories, regular and special files.\n"
+		"The default is to use the current time for all files, but\n"
+		"preserve modification time for regular files.\n"
 		"-c: calculate and store 32-bit checksums for file data.\n",
 		prog);
 }
@@ -594,6 +602,7 @@ int main (int argc, char *argv[])
 				usage(argv[0]);
 				exit(1);
 			}
+			do_file_mtime = true;
 			break;
 		case 'c':
 			do_csum = true;