128 files changed, 7225 insertions, 1438 deletions
diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index 8a48cc2536f5..57c669d2aa90 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -11,11 +11,16 @@
 #include <string.h>
 #include <errno.h>
 #include <endian.h>
+#include <assert.h>
 
 #include <linux/bootconfig.h>
 
 #define pr_err(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__)
 
+/* Bootconfig footer is [size][csum][BOOTCONFIG_MAGIC]. */
+#define BOOTCONFIG_FOOTER_SIZE	\
+	(sizeof(uint32_t) * 2 + BOOTCONFIG_MAGIC_LEN)
+
 static int xbc_show_value(struct xbc_node *node, bool semicolon)
 {
 	const char *val, *eol;
@@ -185,7 +190,7 @@ static int load_xbc_from_initrd(int fd, char **buf)
 	if (ret < 0)
 		return -errno;
 
-	if (stat.st_size < 8 + BOOTCONFIG_MAGIC_LEN)
+	if (stat.st_size < BOOTCONFIG_FOOTER_SIZE)
 		return 0;
 
 	if (lseek(fd, -BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0)
@@ -198,7 +203,7 @@ static int load_xbc_from_initrd(int fd, char **buf)
 	if (memcmp(magic, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN) != 0)
 		return 0;
 
-	if (lseek(fd, -(8 + BOOTCONFIG_MAGIC_LEN), SEEK_END) < 0)
+	if (lseek(fd, -BOOTCONFIG_FOOTER_SIZE, SEEK_END) < 0)
 		return pr_errno("Failed to lseek for size", -errno);
 
 	if (read(fd, &size, sizeof(uint32_t)) < 0)
@@ -210,12 +215,12 @@ static int load_xbc_from_initrd(int fd, char **buf)
 	csum = le32toh(csum);
 
 	/* Wrong size error  */
-	if (stat.st_size < size + 8 + BOOTCONFIG_MAGIC_LEN) {
+	if (stat.st_size < size + BOOTCONFIG_FOOTER_SIZE) {
 		pr_err("bootconfig size is too big\n");
 		return -E2BIG;
 	}
 
-	if (lseek(fd, stat.st_size - (size + 8 + BOOTCONFIG_MAGIC_LEN),
+	if (lseek(fd, stat.st_size - (size + BOOTCONFIG_FOOTER_SIZE),
 		  SEEK_SET) < 0)
 		return pr_errno("Failed to lseek", -errno);
 
@@ -346,7 +351,7 @@ static int delete_xbc(const char *path)
 		ret = fstat(fd, &stat);
 		if (!ret)
 			ret = ftruncate(fd, stat.st_size
-					- size - 8 - BOOTCONFIG_MAGIC_LEN);
+					- size - BOOTCONFIG_FOOTER_SIZE);
 		if (ret)
 			ret = -errno;
 	} /* Ignore if there is no boot config in initrd */
@@ -359,7 +364,12 @@ static int delete_xbc(const char *path)
 
 static int apply_xbc(const char *path, const char *xbc_path)
 {
-	char *buf, *data, *p;
+	struct {
+		uint32_t size;
+		uint32_t csum;
+		char magic[BOOTCONFIG_MAGIC_LEN];
+	} footer;
+	char *buf, *data;
 	size_t total_size;
 	struct stat stat;
 	const char *msg;
@@ -376,8 +386,7 @@ static int apply_xbc(const char *path, const char *xbc_path)
 	csum = xbc_calc_checksum(buf, size);
 
 	/* Backup the bootconfig data */
-	data = calloc(size + BOOTCONFIG_ALIGN +
-		      sizeof(uint32_t) + sizeof(uint32_t) + BOOTCONFIG_MAGIC_LEN, 1);
+	data = calloc(size + BOOTCONFIG_ALIGN + BOOTCONFIG_FOOTER_SIZE, 1);
 	if (!data)
 		return -ENOMEM;
 	memcpy(data, buf, size);
@@ -425,22 +434,18 @@ static int apply_xbc(const char *path, const char *xbc_path)
 	}
 
 	/* To align up the total size to BOOTCONFIG_ALIGN, get padding size */
-	total_size = stat.st_size + size + sizeof(uint32_t) * 2 + BOOTCONFIG_MAGIC_LEN;
+	total_size = stat.st_size + size + BOOTCONFIG_FOOTER_SIZE;
 	pad = ((total_size + BOOTCONFIG_ALIGN - 1) & (~BOOTCONFIG_ALIGN_MASK)) - total_size;
 	size += pad;
 
 	/* Add a footer */
-	p = data + size;
-	*(uint32_t *)p = htole32(size);
-	p += sizeof(uint32_t);
-
-	*(uint32_t *)p = htole32(csum);
-	p += sizeof(uint32_t);
-
-	memcpy(p, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN);
-	p += BOOTCONFIG_MAGIC_LEN;
+	footer.size = htole32(size);
+	footer.csum = htole32(csum);
+	memcpy(footer.magic, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN);
+	static_assert(sizeof(footer) == BOOTCONFIG_FOOTER_SIZE);
+	memcpy(data + size, &footer, BOOTCONFIG_FOOTER_SIZE);
 
-	total_size = p - data;
+	total_size = size + BOOTCONFIG_FOOTER_SIZE;
 
 	ret = write(fd, data, total_size);
 	if (ret < total_size) {
diff --git a/tools/bootconfig/scripts/ftrace.sh b/tools/bootconfig/scripts/ftrace.sh
index 186eed923041..cc5250c64699 100644
--- a/tools/bootconfig/scripts/ftrace.sh
+++ b/tools/bootconfig/scripts/ftrace.sh
@@ -1,3 +1,4 @@
+#!/bin/sh
 # SPDX-License-Identifier: GPL-2.0-only
 
 clear_trace() { # reset trace output
diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh
index a2c484c243f5..7594659af1e1 100755
--- a/tools/bootconfig/test-bootconfig.sh
+++ b/tools/bootconfig/test-bootconfig.sh
@@ -27,16 +27,16 @@ NO=1
 
 xpass() { # pass test command
   echo "test case $NO ($*)... "
-  if ! ($@ && echo "\t\t[OK]"); then
-     echo "\t\t[NG]"; NG=$((NG + 1))
+  if ! ($@ && printf "\t\t[OK]\n"); then
+     printf "\t\t[NG]\n"; NG=$((NG + 1))
   fi
   NO=$((NO + 1))
 }
 
 xfail() { # fail test command
   echo "test case $NO ($*)... "
-  if ! (! $@ && echo "\t\t[OK]"); then
-     echo "\t\t[NG]"; NG=$((NG + 1))
+  if ! (! $@ && printf "\t\t[OK]\n"); then
+     printf "\t\t[NG]\n"; NG=$((NG + 1))
   fi
   NO=$((NO + 1))
 }
@@ -48,13 +48,13 @@ echo "Delete command should success without bootconfig"
 xpass $BOOTCONF -d $INITRD
 
 dd if=/dev/zero of=$INITRD bs=4096 count=1
-echo "key = value;" > $TEMPCONF
-bconf_size=$(stat -c %s $TEMPCONF)
-initrd_size=$(stat -c %s $INITRD)
+printf "key = value;" > $TEMPCONF
+bconf_size=$(wc -c < $TEMPCONF)
+initrd_size=$(wc -c < $INITRD)
 
 echo "Apply command test"
 xpass $BOOTCONF -a $TEMPCONF $INITRD
-new_size=$(stat -c %s $INITRD)
+new_size=$(wc -c < $INITRD)
 
 echo "Show command test"
 xpass $BOOTCONF $INITRD
@@ -69,13 +69,13 @@ echo "Apply command repeat test"
 xpass $BOOTCONF -a $TEMPCONF $INITRD
 
 echo "File size check"
-xpass test $new_size -eq $(stat -c %s $INITRD)
+xpass test $new_size -eq $(wc -c < $INITRD)
 
 echo "Delete command check"
 xpass $BOOTCONF -d $INITRD
 
 echo "File size check"
-new_size=$(stat -c %s $INITRD)
+new_size=$(wc -c < $INITRD)
 xpass test $new_size -eq $initrd_size
 
 echo "No error messge while applying"
@@ -97,19 +97,20 @@ BEGIN {
 ' > $TEMPCONF
 xpass $BOOTCONF -a $TEMPCONF $INITRD
 
-echo "badnode" >> $TEMPCONF
+printf "badnode\n" >> $TEMPCONF
 xfail $BOOTCONF -a $TEMPCONF $INITRD
 
 echo "Max filesize check"
 
 # Max size is 32767 (including terminal byte)
-echo -n "data = \"" > $TEMPCONF
+printf "data = \"" > $TEMPCONF
 dd if=/dev/urandom bs=768 count=32 | base64 -w0 >> $TEMPCONF
-echo "\"" >> $TEMPCONF
+printf "\"\n" >> $TEMPCONF
 xfail $BOOTCONF -a $TEMPCONF $INITRD
 
-truncate -s 32764 $TEMPCONF
-echo "\"" >> $TEMPCONF	# add 2 bytes + terminal ('\"\n\0')
+dd if=$TEMPCONF of=$OUTFILE bs=1 count=32764
+cp $OUTFILE $TEMPCONF
+printf "\"\n" >> $TEMPCONF	# add 2 bytes + terminal ('\"\n\0')
 xpass $BOOTCONF -a $TEMPCONF $INITRD
 
 echo "Adding same-key values"
@@ -139,7 +140,7 @@ xfail grep -q "baz" $OUTFILE
 xpass grep -q "qux" $OUTFILE
 
 echo "Double/single quotes test"
-echo "key = '\"string\"';" > $TEMPCONF
+printf "key = '\"string\"';" > $TEMPCONF
 $BOOTCONF -a $TEMPCONF $INITRD
 $BOOTCONF $INITRD > $TEMPCONF
 cat $TEMPCONF
@@ -167,8 +168,8 @@ echo > $INITRD
 
 xpass $BOOTCONF -a $TEMPCONF $INITRD
 $BOOTCONF $INITRD > $OUTFILE
-xfail grep -q val[[:space:]] $OUTFILE
-xpass grep -q val2[[:space:]] $OUTFILE
+xfail grep -q 'val[[:space:]]' $OUTFILE
+xpass grep -q 'val2[[:space:]]' $OUTFILE
 
 echo "=== expected failure cases ==="
 for i in samples/bad-* ; do
diff --git a/tools/cgroup/memcg_slabinfo.py b/tools/cgroup/memcg_slabinfo.py
index 270c28a0d098..6bf4bde77903 100644
--- a/tools/cgroup/memcg_slabinfo.py
+++ b/tools/cgroup/memcg_slabinfo.py
@@ -146,11 +146,11 @@ def detect_kernel_config():
 
 
 def for_each_slab(prog):
-    PGSlab = ~prog.constant('PG_slab')
+    slabtype = prog.constant('PGTY_slab')
 
     for page in for_each_page(prog):
         try:
-            if page.page_type.value_() == PGSlab:
+            if (page.page_type.value_() >> 24) == slabtype:
                 yield cast('struct slab *', page)
         except FaultError:
             pass
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index d00b85cb168c..7415a3863891 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -618,6 +618,7 @@ struct kvm_ioeventfd {
 #define KVM_X86_DISABLE_EXITS_HLT            (1 << 1)
 #define KVM_X86_DISABLE_EXITS_PAUSE          (1 << 2)
 #define KVM_X86_DISABLE_EXITS_CSTATE         (1 << 3)
+#define KVM_X86_DISABLE_EXITS_APERFMPERF     (1 << 4)
 
 /* for KVM_ENABLE_CAP */
 struct kvm_enable_cap {
diff --git a/tools/mm/show_page_info.py b/tools/mm/show_page_info.py
new file mode 100644
index 000000000000..c46d8ea283d7
--- /dev/null
+++ b/tools/mm/show_page_info.py
@@ -0,0 +1,169 @@
+#!/usr/bin/env drgn
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2025 Ye Liu <liuye@kylinos.cn>
+
+import argparse
+import sys
+from drgn import Object, FaultError, PlatformFlags, cast
+from drgn.helpers.linux import find_task, follow_page, page_size
+from drgn.helpers.linux.mm import (
+    decode_page_flags, page_to_pfn, page_to_phys, page_to_virt, vma_find,
+    PageSlab, PageCompound, PageHead, PageTail, compound_head, compound_order, compound_nr
+)
+from drgn.helpers.linux.cgroup import cgroup_name, cgroup_path
+
+DESC = """
+This is a drgn script to show the page state.
+For more info on drgn, visit https://github.com/osandov/drgn.
+"""
+
+def format_page_data(page):
+    """
+    Format raw page data into a readable hex dump with "RAW:" prefix.
+
+    :param page: drgn.Object instance representing the page.
+    :return: Formatted string of memory contents.
+    """
+    try:
+        address = page.value_()
+        size = prog.type("struct page").size
+
+        if prog.platform.flags & PlatformFlags.IS_64_BIT:
+            word_size = 8
+        else:
+            word_size = 4
+        num_words = size // word_size
+
+        values = []
+        for i in range(num_words):
+            word_address = address + i * word_size
+            word = prog.read_word(word_address)
+            values.append(f"{word:0{word_size * 2}x}")
+
+        lines = [f"RAW: {' '.join(values[i:i + 4])}" for i in range(0, len(values), 4)]
+
+        return "\n".join(lines)
+
+    except FaultError as e:
+        return f"Error reading memory: {e}"
+    except Exception as e:
+        return f"Unexpected error: {e}"
+
+def get_memcg_info(page):
+    """Retrieve memory cgroup information for a page."""
+    try:
+        MEMCG_DATA_OBJEXTS = prog.constant("MEMCG_DATA_OBJEXTS").value_()
+        MEMCG_DATA_KMEM = prog.constant("MEMCG_DATA_KMEM").value_()
+        mask = prog.constant('__NR_MEMCG_DATA_FLAGS').value_() - 1
+        memcg_data = page.memcg_data.read_()
+        if memcg_data & MEMCG_DATA_OBJEXTS:
+            slabobj_ext = cast("struct slabobj_ext *", memcg_data & ~mask)
+            memcg = slabobj_ext.objcg.memcg.value_()
+        elif memcg_data & MEMCG_DATA_KMEM:
+            objcg = cast("struct obj_cgroup *", memcg_data & ~mask)
+            memcg = objcg.memcg.value_()
+        else:
+            memcg = cast("struct mem_cgroup *", memcg_data & ~mask)
+
+        if memcg.value_() == 0:
+            return "none", "/sys/fs/cgroup/memory/"
+        cgrp = memcg.css.cgroup
+        return cgroup_name(cgrp).decode(), f"/sys/fs/cgroup/memory{cgroup_path(cgrp).decode()}"
+    except FaultError as e:
+        return "unknown", f"Error retrieving memcg info: {e}"
+    except Exception as e:
+        return "unknown", f"Unexpected error: {e}"
+
+def show_page_state(page, addr, mm, pid, task):
+    """Display detailed information about a page."""
+    try:
+        print(f'PID: {pid} Comm: {task.comm.string_().decode()} mm: {hex(mm)}')
+        try:
+            print(format_page_data(page))
+        except FaultError as e:
+            print(f"Error reading page data: {e}")
+        fields = {
+            "Page Address": hex(page.value_()),
+            "Page Flags": decode_page_flags(page),
+            "Page Size": prog["PAGE_SIZE"].value_(),
+            "Page PFN": hex(page_to_pfn(page).value_()),
+            "Page Physical": hex(page_to_phys(page).value_()),
+            "Page Virtual": hex(page_to_virt(page).value_()),
+            "Page Refcount": page._refcount.counter.value_(),
+            "Page Mapcount": page._mapcount.counter.value_(),
+            "Page Index": hex(page.__folio_index.value_()),
+            "Page Memcg Data": hex(page.memcg_data.value_()),
+        }
+
+        memcg_name, memcg_path = get_memcg_info(page)
+        fields["Memcg Name"] = memcg_name
+        fields["Memcg Path"] = memcg_path
+        fields["Page Mapping"] = hex(page.mapping.value_())
+        fields["Page Anon/File"] = "Anon" if page.mapping.value_() & 0x1 else "File"
+
+        try:
+            vma = vma_find(mm, addr)
+            fields["Page VMA"] = hex(vma.value_())
+            fields["VMA Start"] = hex(vma.vm_start.value_())
+            fields["VMA End"] = hex(vma.vm_end.value_())
+        except FaultError as e:
+            fields["Page VMA"] = "Unavailable"
+            fields["VMA Start"] = "Unavailable"
+            fields["VMA End"] = "Unavailable"
+            print(f"Error retrieving VMA information: {e}")
+
+        # Calculate the maximum field name length for alignment
+        max_field_len = max(len(field) for field in fields)
+
+        # Print aligned fields
+        for field, value in fields.items():
+            print(f"{field}:".ljust(max_field_len + 2) + f"{value}")
+
+        # Additional information about the page
+        if PageSlab(page):
+            print("This page belongs to the slab allocator.")
+
+        if PageCompound(page):
+            print("This page is part of a compound page.")
+            if PageHead(page):
+                print("This page is the head page of a compound page.")
+            if PageTail(page):
+                print("This page is the tail page of a compound page.")
+            print(f"{'Head Page:'.ljust(max_field_len + 2)}{hex(compound_head(page).value_())}")
+            print(f"{'Compound Order:'.ljust(max_field_len + 2)}{compound_order(page).value_()}")
+            print(f"{'Number of Pages:'.ljust(max_field_len + 2)}{compound_nr(page).value_()}")
+        else:
+            print("This page is not part of a compound page.")
+    except FaultError as e:
+        print(f"Error accessing page state: {e}")
+    except Exception as e:
+        print(f"Unexpected error: {e}")
+
+def main():
+    """Main function to parse arguments and display page state."""
+    parser = argparse.ArgumentParser(description=DESC, formatter_class=argparse.RawTextHelpFormatter)
+    parser.add_argument('pid', metavar='PID', type=int, help='Target process ID (PID)')
+    parser.add_argument('vaddr', metavar='VADDR', type=str, help='Target virtual address in hexadecimal format (e.g., 0x7fff1234abcd)')
+    args = parser.parse_args()
+
+    try:
+        vaddr = int(args.vaddr, 16)
+    except ValueError:
+        sys.exit(f"Error: Invalid virtual address format: {args.vaddr}")
+
+    try:
+        task = find_task(args.pid)
+        mm = task.mm
+        page = follow_page(mm, vaddr)
+
+        if page:
+            show_page_state(page, vaddr, mm, args.pid, task)
+        else:
+            sys.exit(f"Address {hex(vaddr)} is not mapped.")
+    except FaultError as e:
+        sys.exit(f"Error accessing task or memory: {e}")
+    except Exception as e:
+        sys.exit(f"Unexpected error: {e}")
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h
index eacfe3b0a8d1..6a922d046b8e 100644
--- a/tools/objtool/noreturns.h
+++ b/tools/objtool/noreturns.h
@@ -38,6 +38,7 @@ NORETURN(mpt_halt_firmware)
 NORETURN(mwait_play_dead)
 NORETURN(nmi_panic_self_stop)
 NORETURN(panic)
+NORETURN(vpanic)
 NORETURN(panic_smp_self_stop)
 NORETURN(rest_init)
 NORETURN(rewind_stack_and_make_dead)
diff --git a/tools/perf/arch/riscv/util/kvm-stat.c b/tools/perf/arch/riscv/util/kvm-stat.c
index 491aef449d1a..3ea7acb5e159 100644
--- a/tools/perf/arch/riscv/util/kvm-stat.c
+++ b/tools/perf/arch/riscv/util/kvm-stat.c
@@ -9,10 +9,10 @@
 #include <memory.h>
 #include "../../../util/evsel.h"
 #include "../../../util/kvm-stat.h"
-#include "riscv_exception_types.h"
+#include "riscv_trap_types.h"
 #include "debug.h"
 
-define_exit_reasons_table(riscv_exit_reasons, kvm_riscv_exception_class);
+define_exit_reasons_table(riscv_exit_reasons, kvm_riscv_trap_class);
 
 const char *vcpu_id_str = "id";
 const char *kvm_exit_reason = "scause";
@@ -30,7 +30,7 @@ static void event_get_key(struct evsel *evsel,
 			  struct event_key *key)
 {
 	key->info = 0;
-	key->key = evsel__intval(evsel, sample, kvm_exit_reason);
+	key->key = evsel__intval(evsel, sample, kvm_exit_reason) & ~CAUSE_IRQ_FLAG;
 	key->exit_reasons = riscv_exit_reasons;
 }
 
diff --git a/tools/perf/arch/riscv/util/riscv_exception_types.h b/tools/perf/arch/riscv/util/riscv_exception_types.h
deleted file mode 100644
index c49b8fa5e847..000000000000
--- a/tools/perf/arch/riscv/util/riscv_exception_types.h
+++ /dev/null
@@ -1,35 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#ifndef ARCH_PERF_RISCV_EXCEPTION_TYPES_H
-#define ARCH_PERF_RISCV_EXCEPTION_TYPES_H
-
-#define EXC_INST_MISALIGNED 0
-#define EXC_INST_ACCESS 1
-#define EXC_INST_ILLEGAL 2
-#define EXC_BREAKPOINT 3
-#define EXC_LOAD_MISALIGNED 4
-#define EXC_LOAD_ACCESS 5
-#define EXC_STORE_MISALIGNED 6
-#define EXC_STORE_ACCESS 7
-#define EXC_SYSCALL 8
-#define EXC_HYPERVISOR_SYSCALL 9
-#define EXC_SUPERVISOR_SYSCALL 10
-#define EXC_INST_PAGE_FAULT 12
-#define EXC_LOAD_PAGE_FAULT 13
-#define EXC_STORE_PAGE_FAULT 15
-#define EXC_INST_GUEST_PAGE_FAULT 20
-#define EXC_LOAD_GUEST_PAGE_FAULT 21
-#define EXC_VIRTUAL_INST_FAULT 22
-#define EXC_STORE_GUEST_PAGE_FAULT 23
-
-#define EXC(x) {EXC_##x, #x }
-
-#define kvm_riscv_exception_class                                         \
-	EXC(INST_MISALIGNED), EXC(INST_ACCESS), EXC(INST_ILLEGAL),         \
-	EXC(BREAKPOINT), EXC(LOAD_MISALIGNED), EXC(LOAD_ACCESS),           \
-	EXC(STORE_MISALIGNED), EXC(STORE_ACCESS), EXC(SYSCALL),            \
-	EXC(HYPERVISOR_SYSCALL), EXC(SUPERVISOR_SYSCALL),                  \
-	EXC(INST_PAGE_FAULT), EXC(LOAD_PAGE_FAULT), EXC(STORE_PAGE_FAULT), \
-	EXC(INST_GUEST_PAGE_FAULT), EXC(LOAD_GUEST_PAGE_FAULT),            \
-	EXC(VIRTUAL_INST_FAULT), EXC(STORE_GUEST_PAGE_FAULT)
-
-#endif /* ARCH_PERF_RISCV_EXCEPTION_TYPES_H */
diff --git a/tools/perf/arch/riscv/util/riscv_trap_types.h b/tools/perf/arch/riscv/util/riscv_trap_types.h
new file mode 100644
index 000000000000..6cc71eb01fca
--- /dev/null
+++ b/tools/perf/arch/riscv/util/riscv_trap_types.h
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef ARCH_PERF_RISCV_TRAP_TYPES_H
+#define ARCH_PERF_RISCV_TRAP_TYPES_H
+
+/* Exception cause high bit - is an interrupt if set */
+#define CAUSE_IRQ_FLAG		(_AC(1, UL) << (__riscv_xlen - 1))
+
+/* Interrupt causes (minus the high bit) */
+#define IRQ_S_SOFT 1
+#define IRQ_VS_SOFT 2
+#define IRQ_M_SOFT 3
+#define IRQ_S_TIMER 5
+#define IRQ_VS_TIMER 6
+#define IRQ_M_TIMER 7
+#define IRQ_S_EXT 9
+#define IRQ_VS_EXT 10
+#define IRQ_M_EXT 11
+#define IRQ_S_GEXT 12
+#define IRQ_PMU_OVF 13
+
+/* Exception causes */
+#define EXC_INST_MISALIGNED 0
+#define EXC_INST_ACCESS 1
+#define EXC_INST_ILLEGAL 2
+#define EXC_BREAKPOINT 3
+#define EXC_LOAD_MISALIGNED 4
+#define EXC_LOAD_ACCESS 5
+#define EXC_STORE_MISALIGNED 6
+#define EXC_STORE_ACCESS 7
+#define EXC_SYSCALL 8
+#define EXC_HYPERVISOR_SYSCALL 9
+#define EXC_SUPERVISOR_SYSCALL 10
+#define EXC_INST_PAGE_FAULT 12
+#define EXC_LOAD_PAGE_FAULT 13
+#define EXC_STORE_PAGE_FAULT 15
+#define EXC_INST_GUEST_PAGE_FAULT 20
+#define EXC_LOAD_GUEST_PAGE_FAULT 21
+#define EXC_VIRTUAL_INST_FAULT 22
+#define EXC_STORE_GUEST_PAGE_FAULT 23
+
+#define TRAP(x) { x, #x }
+
+#define kvm_riscv_trap_class \
+	TRAP(IRQ_S_SOFT), TRAP(IRQ_VS_SOFT), TRAP(IRQ_M_SOFT), \
+	TRAP(IRQ_S_TIMER), TRAP(IRQ_VS_TIMER), TRAP(IRQ_M_TIMER), \
+	TRAP(IRQ_S_EXT), TRAP(IRQ_VS_EXT), TRAP(IRQ_M_EXT), \
+	TRAP(IRQ_S_GEXT), TRAP(IRQ_PMU_OVF), \
+	TRAP(EXC_INST_MISALIGNED), TRAP(EXC_INST_ACCESS), TRAP(EXC_INST_ILLEGAL), \
+	TRAP(EXC_BREAKPOINT), TRAP(EXC_LOAD_MISALIGNED), TRAP(EXC_LOAD_ACCESS), \
+	TRAP(EXC_STORE_MISALIGNED), TRAP(EXC_STORE_ACCESS), TRAP(EXC_SYSCALL), \
+	TRAP(EXC_HYPERVISOR_SYSCALL), TRAP(EXC_SUPERVISOR_SYSCALL), \
+	TRAP(EXC_INST_PAGE_FAULT), TRAP(EXC_LOAD_PAGE_FAULT), \
+	TRAP(EXC_STORE_PAGE_FAULT), TRAP(EXC_INST_GUEST_PAGE_FAULT), \
+	TRAP(EXC_LOAD_GUEST_PAGE_FAULT), TRAP(EXC_VIRTUAL_INST_FAULT), \
+	TRAP(EXC_STORE_GUEST_PAGE_FAULT)
+
+#endif /* ARCH_PERF_RISCV_TRAP_TYPES_H */
diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c
index c3cd9a17d48e..69d877501cb7 100644
--- a/tools/sched_ext/scx_qmap.bpf.c
+++ b/tools/sched_ext/scx_qmap.bpf.c
@@ -615,6 +615,26 @@ void BPF_STRUCT_OPS(qmap_dump_task, struct scx_dump_ctx *dctx, struct task_struc
 		     taskc->force_local, taskc->core_sched_seq);
 }
 
+s32 BPF_STRUCT_OPS(qmap_cgroup_init, struct cgroup *cgrp, struct scx_cgroup_init_args *args)
+{
+	bpf_printk("CGRP INIT %llu weight=%u period=%lu quota=%ld burst=%lu",
+		   cgrp->kn->id, args->weight, args->bw_period_us,
+		   args->bw_quota_us, args->bw_burst_us);
+	return 0;
+}
+
+void BPF_STRUCT_OPS(qmap_cgroup_set_weight, struct cgroup *cgrp, u32 weight)
+{
+	bpf_printk("CGRP SET %llu weight=%u", cgrp->kn->id, weight);
+}
+
+void BPF_STRUCT_OPS(qmap_cgroup_set_bandwidth, struct cgroup *cgrp,
+		    u64 period_us, u64 quota_us, u64 burst_us)
+{
+	bpf_printk("CGRP SET %llu period=%lu quota=%ld burst=%lu", cgrp->kn->id,
+		   period_us, quota_us, burst_us);
+}
+
 /*
  * Print out the online and possible CPU map using bpf_printk() as a
  * demonstration of using the cpumask kfuncs and ops.cpu_on/offline().
@@ -840,6 +860,9 @@ SCX_OPS_DEFINE(qmap_ops,
 	       .dump			= (void *)qmap_dump,
 	       .dump_cpu		= (void *)qmap_dump_cpu,
 	       .dump_task		= (void *)qmap_dump_task,
+	       .cgroup_init		= (void *)qmap_cgroup_init,
+	       .cgroup_set_weight	= (void *)qmap_cgroup_set_weight,
+	       .cgroup_set_bandwidth	= (void *)qmap_cgroup_set_bandwidth,
 	       .cpu_online		= (void *)qmap_cpu_online,
 	       .cpu_offline		= (void *)qmap_cpu_offline,
 	       .init			= (void *)qmap_init,
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index a5f7fdd0c1fb..001c4df9f7df 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -21,6 +21,8 @@ my %opt;
 my %repeat_tests;
 my %repeats;
 my %evals;
+my @command_vars;
+my %command_tmp_vars;
 
 #default opts
 my %default = (
@@ -216,6 +218,7 @@ my $patchcheck_type;
 my $patchcheck_start;
 my $patchcheck_cherry;
 my $patchcheck_end;
+my $patchcheck_skip;
 
 my $build_time;
 my $install_time;
@@ -380,6 +383,7 @@ my %option_map = (
     "PATCHCHECK_START"		=> \$patchcheck_start,
     "PATCHCHECK_CHERRY"		=> \$patchcheck_cherry,
     "PATCHCHECK_END"		=> \$patchcheck_end,
+    "PATCHCHECK_SKIP"		=> \$patchcheck_skip,
 );
 
 # Options may be used by other options, record them.
@@ -900,14 +904,22 @@ sub set_eval {
 }
 
 sub set_variable {
-    my ($lvalue, $rvalue) = @_;
+    my ($lvalue, $rvalue, $command) = @_;
 
+    # Command line variables override all others
+    if (defined($command_tmp_vars{$lvalue})) {
+	return;
+    }
     if ($rvalue =~ /^\s*$/) {
 	delete $variable{$lvalue};
     } else {
 	$rvalue = process_variables($rvalue);
 	$variable{$lvalue} = $rvalue;
     }
+
+    if (defined($command)) {
+	$command_tmp_vars{$lvalue} = 1;
+    }
 }
 
 sub process_compare {
@@ -1286,6 +1298,19 @@ sub read_config {
 
     $test_case = __read_config $config, \$test_num;
 
+    foreach my $val (@command_vars) {
+	chomp $val;
+	my %command_overrides;
+	if ($val =~ m/^\s*([A-Z_\[\]\d]+)\s*=\s*(.*?)\s*$/) {
+	    my $lvalue = $1;
+	    my $rvalue = $2;
+
+	    set_value($lvalue, $rvalue, 1, \%command_overrides, "COMMAND LINE");
+	} else {
+	    die "Invalid option definition '$val'\n";
+	}
+    }
+
     # make sure we have all mandatory configs
     get_mandatory_configs;
 
@@ -1371,7 +1396,10 @@ sub __eval_option {
 	# If a variable contains itself, use the default var
 	if (($var eq $name) && defined($opt{$var})) {
 	    $o = $opt{$var};
-	    $retval = "$retval$o";
+	    # Only append if the default doesn't contain itself
+	    if ($o !~ m/\$\{$var\}/) {
+		$retval = "$retval$o";
+	    }
 	} elsif (defined($opt{$o})) {
 	    $o = $opt{$o};
 	    $retval = "$retval$o";
@@ -3511,11 +3539,37 @@ sub patchcheck {
 	@list = reverse @list;
     }
 
+    my %skip_list;
+    my $will_skip = 0;
+
+    if (defined($patchcheck_skip)) {
+	foreach my $s (split /\s+/, $patchcheck_skip) {
+	    $s = `git log --pretty=oneline $s~1..$s`;
+	    $s =~ s/^(\S+).*/$1/;
+	    chomp $s;
+	    $skip_list{$s} = 1;
+	    $will_skip++;
+	}
+    }
+
     doprint("Going to test the following commits:\n");
     foreach my $l (@list) {
+	my $sha1 = $l;
+	$sha1 =~ s/^([[:xdigit:]]+).*/$1/;
+	next if (defined($skip_list{$sha1}));
 	doprint "$l\n";
     }
 
+    if ($will_skip) {
+	doprint("\nSkipping the following commits:\n");
+	foreach my $l (@list) {
+	    my $sha1 = $l;
+	    $sha1 =~ s/^([[:xdigit:]]+).*/$1/;
+	    next if (!defined($skip_list{$sha1}));
+	    doprint "$l\n";
+	}
+    }
+
     my $save_clean = $noclean;
     my %ignored_warnings;
 
@@ -3530,6 +3584,11 @@ sub patchcheck {
 	my $sha1 = $item;
 	$sha1 =~ s/^([[:xdigit:]]+).*/$1/;
 
+	if (defined($skip_list{$sha1})) {
+	    doprint "\nSkipping \"$item\"\n\n";
+	    next;
+	}
+
 	doprint "\nProcessing commit \"$item\"\n\n";
 
 	run_command "git checkout $sha1" or
@@ -4242,8 +4301,55 @@ sub cancel_test {
     die "\nCaught Sig Int, test interrupted: $!\n"
 }
 
-$#ARGV < 1 or die "ktest.pl version: $VERSION\n   usage: ktest.pl [config-file]\n";
+sub die_usage {
+    die << "EOF"
+ktest.pl version: $VERSION
+   usage: ktest.pl [options] [config-file]
+    [options]:
+       -D value: Where value can act as an option override.
+                -D BUILD_NOCLEAN=1
+                    Sets global BUILD_NOCLEAN to 1
+                -D TEST_TYPE[2]=build
+                    Sets TEST_TYPE of test 2 to "build"
+
+	        It can also override all temp variables.
+                 -D USE_TEMP_DIR:=1
+                    Will override all variables that use
+                    "USE_TEMP_DIR="
+
+EOF
+;
+}
+
+while ( $#ARGV >= 0 ) {
+    if ( $ARGV[0] eq "-D" ) {
+	shift;
+	die_usage if ($#ARGV < 1);
+	my $val = shift;
+
+	if ($val =~ m/(.*?):=(.*)$/) {
+	    set_variable($1, $2, 1);
+	} else {
+	    $command_vars[$#command_vars + 1] = $val;
+	}
+
+    } elsif ( $ARGV[0] =~ m/^-D(.*)/) {
+	my $val = $1;
+	shift;
+
+	if ($val =~ m/(.*?):=(.*)$/) {
+	    set_variable($1, $2, 1);
+	} else {
+	    $command_vars[$#command_vars + 1] = $val;
+	}
+    } elsif ( $ARGV[0] eq "-h" ) {
+	die_usage;
+    } else {
+	last;
+    }
+}
 
+$#ARGV < 1 or die_usage;
 if ($#ARGV == 0) {
     $ktest_config = $ARGV[0];
     if (! -f $ktest_config) {
@@ -4466,6 +4572,10 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
 
     doprint "RUNNING TEST $i of $opt{NUM_TESTS}$name with option $test_type $run_type$installme\n\n";
 
+    # Always show which build directory and output directory is being used
+    doprint "BUILD_DIR=$builddir\n";
+    doprint "OUTPUT_DIR=$outputdir\n\n";
+
     if (defined($pre_test)) {
 	my $ret = run_command $pre_test;
 	if (!$ret && defined($pre_test_die) &&
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
index f43477a9b857..9c4c449a8f3e 100644
--- a/tools/testing/ktest/sample.conf
+++ b/tools/testing/ktest/sample.conf
@@ -1017,6 +1017,8 @@
 #      Note, PATCHCHECK_CHERRY requires PATCHCHECK_END to be defined.
 #      (default 0)
 #
+#  PATCHCHECK_SKIP is an optional list of shas to skip testing
+#
 #  PATCHCHECK_TYPE is required and is the type of test to run:
 #      build, boot, test.
 #
diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c
index c1ec099a3b1d..05e763aab104 100644
--- a/tools/testing/nvdimm/pmem-dax.c
+++ b/tools/testing/nvdimm/pmem-dax.c
@@ -10,7 +10,7 @@
 
 long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
 		long nr_pages, enum dax_access_mode mode, void **kaddr,
-		pfn_t *pfn)
+		unsigned long *pfn)
 {
 	resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset;
 
@@ -29,7 +29,7 @@ long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
 			*kaddr = pmem->virt_addr + offset;
 		page = vmalloc_to_page(pmem->virt_addr + offset);
 		if (pfn)
-			*pfn = page_to_pfn_t(page);
+			*pfn = page_to_pfn(page);
 		pr_debug_ratelimited("%s: pmem: %p pgoff: %#lx pfn: %#lx\n",
 				__func__, pmem, pgoff, page_to_pfn(page));
 
@@ -39,7 +39,7 @@ long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
 	if (kaddr)
 		*kaddr = pmem->virt_addr + offset;
 	if (pfn)
-		*pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
+		*pfn = PHYS_PFN(pmem->phys_addr + offset);
 
 	/*
 	 * If badblocks are present, limit known good range to the
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index e4313726fae3..f7e7bfe9bb85 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -8,7 +8,6 @@
 #include <linux/ioport.h>
 #include <linux/module.h>
 #include <linux/types.h>
-#include <linux/pfn_t.h>
 #include <linux/acpi.h>
 #include <linux/io.h>
 #include <linux/mm.h>
@@ -135,16 +134,6 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 }
 EXPORT_SYMBOL_GPL(__wrap_devm_memremap_pages);
 
-pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
-{
-	struct nfit_test_resource *nfit_res = get_nfit_res(addr);
-
-	if (nfit_res)
-		flags &= ~PFN_MAP;
-        return phys_to_pfn_t(addr, flags);
-}
-EXPORT_SYMBOL(__wrap_phys_to_pfn_t);
-
 void *__wrap_memremap(resource_size_t offset, size_t size,
 		unsigned long flags)
 {
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index b00583d1eace..b9047fb8ea4a 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -212,7 +212,6 @@ void __iomem *__wrap_devm_ioremap(struct device *dev,
 void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
 		size_t size, unsigned long flags);
 void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
-pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags);
 void *__wrap_memremap(resource_size_t offset, size_t size,
 		unsigned long flags);
 void __wrap_devm_memunmap(struct device *dev, void *addr);
diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
index 2c0b38301253..172700fb7784 100644
--- a/tools/testing/radix-tree/maple.c
+++ b/tools/testing/radix-tree/maple.c
@@ -35062,7 +35062,7 @@ void run_check_rcu_slowread(struct maple_tree *mt, struct rcu_test_struct *vals)
 
 	int i;
 	void *(*function)(void *);
-	pthread_t readers[20];
+	pthread_t readers[30];
 	unsigned int index = vals->index;
 
 	mt_set_in_rcu(mt);
@@ -35080,14 +35080,14 @@ void run_check_rcu_slowread(struct maple_tree *mt, struct rcu_test_struct *vals)
 		}
 	}
 
-	usleep(5); /* small yield to ensure all threads are at least started. */
+	usleep(3); /* small yield to ensure all threads are at least started. */
 
 	while (index <= vals->last) {
 		mtree_store(mt, index,
 			    (index % 2 ? vals->entry2 : vals->entry3),
 			    GFP_KERNEL);
 		index++;
-		usleep(5);
+		usleep(2);
 	}
 
 	while (i--)
@@ -35098,6 +35098,7 @@ void run_check_rcu_slowread(struct maple_tree *mt, struct rcu_test_struct *vals)
 	MT_BUG_ON(mt, !vals->seen_entry3);
 	MT_BUG_ON(mt, !vals->seen_both);
 }
+
 static noinline void __init check_rcu_simulated(struct maple_tree *mt)
 {
 	unsigned long i, nr_entries = 1000;
@@ -35668,6 +35669,18 @@ static noinline void __init check_prealloc(struct maple_tree *mt)
 	allocated = mas_allocated(&mas);
 	height = mas_mt_height(&mas);
 	MT_BUG_ON(mt, allocated != 0);
+
+	/* Chaining multiple preallocations */
+	mt_set_in_rcu(mt);
+	mas_set_range(&mas, 800, 805); /* Slot store, should be 0 allocations */
+	MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
+	allocated = mas_allocated(&mas);
+	MT_BUG_ON(mt, allocated != 0);
+	mas.last = 809; /* Node store */
+	MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0);
+	allocated = mas_allocated(&mas);
+	MT_BUG_ON(mt, allocated != 1);
+	mas_store_prealloc(&mas, ptr);
 }
 /* End of preallocation testing */
 
diff --git a/tools/testing/selftests/cgroup/lib/cgroup_util.c b/tools/testing/selftests/cgroup/lib/cgroup_util.c
index 8832f3d1cb61..0e89fcff4d05 100644
--- a/tools/testing/selftests/cgroup/lib/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/lib/cgroup_util.c
@@ -19,6 +19,8 @@
 #include "cgroup_util.h"
 #include "../../clone3/clone3_selftests.h"
 
+bool cg_test_v1_named;
+
 /* Returns read len on success, or -errno on failure. */
 ssize_t read_text(const char *path, char *buf, size_t max_len)
 {
@@ -361,7 +363,7 @@ int cg_enter_current(const char *cgroup)
 
 int cg_enter_current_thread(const char *cgroup)
 {
-	return cg_write(cgroup, "cgroup.threads", "0");
+	return cg_write(cgroup, CG_THREADS_FILE, "0");
 }
 
 int cg_run(const char *cgroup,
diff --git a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
index adb2bc193183..c69cab66254b 100644
--- a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
@@ -13,6 +13,10 @@
 
 #define TEST_UID	65534 /* usually nobody, any !root is fine */
 
+#define CG_THREADS_FILE (!cg_test_v1_named ? "cgroup.threads" : "tasks")
+#define CG_NAMED_NAME "selftest"
+#define CG_PATH_FORMAT (!cg_test_v1_named ? "0::%s" : (":name=" CG_NAMED_NAME ":%s"))
+
 /*
  * Checks if two given values differ by less than err% of their sum.
  */
@@ -65,3 +69,4 @@ extern int dirfd_open_opath(const char *dir);
 extern int cg_prepare_for_wait(const char *cgroup);
 extern int memcg_prepare_for_wait(const char *cgroup);
 extern int cg_wait_for(int fd);
+extern bool cg_test_v1_named;
diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c
index a5672a91d273..a360e2eb2eef 100644
--- a/tools/testing/selftests/cgroup/test_core.c
+++ b/tools/testing/selftests/cgroup/test_core.c
@@ -5,6 +5,8 @@
 #include <linux/sched.h>
 #include <sys/types.h>
 #include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
 #include <sys/wait.h>
 #include <unistd.h>
 #include <fcntl.h>
@@ -19,6 +21,9 @@
 #include "cgroup_util.h"
 
 static bool nsdelegate;
+#ifndef CLONE_NEWCGROUP
+#define CLONE_NEWCGROUP 0
+#endif
 
 static int touch_anon(char *buf, size_t size)
 {
@@ -148,6 +153,9 @@ static int test_cgcore_populated(const char *root)
 	int cgroup_fd = -EBADF;
 	pid_t pid;
 
+	if (cg_test_v1_named)
+		return KSFT_SKIP;
+
 	cg_test_a = cg_name(root, "cg_test_a");
 	cg_test_b = cg_name(root, "cg_test_a/cg_test_b");
 	cg_test_c = cg_name(root, "cg_test_a/cg_test_b/cg_test_c");
@@ -277,6 +285,9 @@ static int test_cgcore_invalid_domain(const char *root)
 	int ret = KSFT_FAIL;
 	char *grandparent = NULL, *parent = NULL, *child = NULL;
 
+	if (cg_test_v1_named)
+		return KSFT_SKIP;
+
 	grandparent = cg_name(root, "cg_test_grandparent");
 	parent = cg_name(root, "cg_test_grandparent/cg_test_parent");
 	child = cg_name(root, "cg_test_grandparent/cg_test_parent/cg_test_child");
@@ -339,6 +350,9 @@ static int test_cgcore_parent_becomes_threaded(const char *root)
 	int ret = KSFT_FAIL;
 	char *parent = NULL, *child = NULL;
 
+	if (cg_test_v1_named)
+		return KSFT_SKIP;
+
 	parent = cg_name(root, "cg_test_parent");
 	child = cg_name(root, "cg_test_parent/cg_test_child");
 	if (!parent || !child)
@@ -378,7 +392,8 @@ static int test_cgcore_no_internal_process_constraint_on_threads(const char *roo
 	int ret = KSFT_FAIL;
 	char *parent = NULL, *child = NULL;
 
-	if (cg_read_strstr(root, "cgroup.controllers", "cpu") ||
+	if (cg_test_v1_named ||
+	    cg_read_strstr(root, "cgroup.controllers", "cpu") ||
 	    cg_write(root, "cgroup.subtree_control", "+cpu")) {
 		ret = KSFT_SKIP;
 		goto cleanup;
@@ -430,6 +445,9 @@ static int test_cgcore_top_down_constraint_enable(const char *root)
 	int ret = KSFT_FAIL;
 	char *parent = NULL, *child = NULL;
 
+	if (cg_test_v1_named)
+		return KSFT_SKIP;
+
 	parent = cg_name(root, "cg_test_parent");
 	child = cg_name(root, "cg_test_parent/cg_test_child");
 	if (!parent || !child)
@@ -465,6 +483,9 @@ static int test_cgcore_top_down_constraint_disable(const char *root)
 	int ret = KSFT_FAIL;
 	char *parent = NULL, *child = NULL;
 
+	if (cg_test_v1_named)
+		return KSFT_SKIP;
+
 	parent = cg_name(root, "cg_test_parent");
 	child = cg_name(root, "cg_test_parent/cg_test_child");
 	if (!parent || !child)
@@ -506,6 +527,9 @@ static int test_cgcore_internal_process_constraint(const char *root)
 	int ret = KSFT_FAIL;
 	char *parent = NULL, *child = NULL;
 
+	if (cg_test_v1_named)
+		return KSFT_SKIP;
+
 	parent = cg_name(root, "cg_test_parent");
 	child = cg_name(root, "cg_test_parent/cg_test_child");
 	if (!parent || !child)
@@ -573,7 +597,7 @@ static int test_cgcore_proc_migration(const char *root)
 	}
 
 	cg_enter_current(dst);
-	if (cg_read_lc(dst, "cgroup.threads") != n_threads + 1)
+	if (cg_read_lc(dst, CG_THREADS_FILE) != n_threads + 1)
 		goto cleanup;
 
 	ret = KSFT_PASS;
@@ -605,7 +629,7 @@ static void *migrating_thread_fn(void *arg)
 	char lines[3][PATH_MAX];
 
 	for (g = 1; g < 3; ++g)
-		snprintf(lines[g], sizeof(lines[g]), "0::%s", grps[g] + strlen(grps[0]));
+		snprintf(lines[g], sizeof(lines[g]), CG_PATH_FORMAT, grps[g] + strlen(grps[0]));
 
 	for (i = 0; i < n_iterations; ++i) {
 		cg_enter_current_thread(grps[(i % 2) + 1]);
@@ -642,10 +666,12 @@ static int test_cgcore_thread_migration(const char *root)
 	if (cg_create(grps[2]))
 		goto cleanup;
 
-	if (cg_write(grps[1], "cgroup.type", "threaded"))
-		goto cleanup;
-	if (cg_write(grps[2], "cgroup.type", "threaded"))
-		goto cleanup;
+	if (!cg_test_v1_named) {
+		if (cg_write(grps[1], "cgroup.type", "threaded"))
+			goto cleanup;
+		if (cg_write(grps[2], "cgroup.type", "threaded"))
+			goto cleanup;
+	}
 
 	if (cg_enter_current(grps[1]))
 		goto cleanup;
@@ -659,7 +685,7 @@ static int test_cgcore_thread_migration(const char *root)
 	if (retval)
 		goto cleanup;
 
-	snprintf(line, sizeof(line), "0::%s", grps[1] + strlen(grps[0]));
+	snprintf(line, sizeof(line), CG_PATH_FORMAT, grps[1] + strlen(grps[0]));
 	if (proc_read_strstr(0, 1, "cgroup", line))
 		goto cleanup;
 
@@ -842,6 +868,38 @@ cleanup:
 	return ret;
 }
 
+static int setup_named_v1_root(char *root, size_t len, const char *name)
+{
+	char options[PATH_MAX];
+	int r;
+
+	r = snprintf(root, len, "/mnt/cg_selftest");
+	if (r < 0)
+		return r;
+
+	r = snprintf(options, sizeof(options), "none,name=%s", name);
+	if (r < 0)
+		return r;
+
+	r = mkdir(root, 0755);
+	if (r < 0 && errno != EEXIST)
+		return r;
+
+	r = mount("none", root, "cgroup", 0, options);
+	if (r < 0)
+		return r;
+
+	return 0;
+}
+
+static void cleanup_named_v1_root(char *root)
+{
+	if (!cg_test_v1_named)
+		return;
+	umount(root);
+	rmdir(root);
+}
+
 #define T(x) { x, #x }
 struct corecg_test {
 	int (*fn)(const char *root);
@@ -867,13 +925,18 @@ int main(int argc, char *argv[])
 	char root[PATH_MAX];
 	int i, ret = EXIT_SUCCESS;
 
-	if (cg_find_unified_root(root, sizeof(root), &nsdelegate))
-		ksft_exit_skip("cgroup v2 isn't mounted\n");
+	if (cg_find_unified_root(root, sizeof(root), &nsdelegate)) {
+		if (setup_named_v1_root(root, sizeof(root), CG_NAMED_NAME))
+			ksft_exit_skip("cgroup v2 isn't mounted and could not setup named v1 hierarchy\n");
+		cg_test_v1_named = true;
+		goto post_v2_setup;
+	}
 
 	if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
 		if (cg_write(root, "cgroup.subtree_control", "+memory"))
 			ksft_exit_skip("Failed to set memory controller\n");
 
+post_v2_setup:
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		switch (tests[i].fn(root)) {
 		case KSFT_PASS:
@@ -889,5 +952,6 @@ int main(int argc, char *argv[])
 		}
 	}
 
+	cleanup_named_v1_root(root);
 	return ret;
 }
diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c
index a2b50af8e9ee..2a60e6c41940 100644
--- a/tools/testing/selftests/cgroup/test_cpu.c
+++ b/tools/testing/selftests/cgroup/test_cpu.c
@@ -2,6 +2,7 @@
 
 #define _GNU_SOURCE
 #include <linux/limits.h>
+#include <sys/param.h>
 #include <sys/sysinfo.h>
 #include <sys/wait.h>
 #include <errno.h>
@@ -645,10 +646,16 @@ test_cpucg_nested_weight_underprovisioned(const char *root)
 static int test_cpucg_max(const char *root)
 {
 	int ret = KSFT_FAIL;
-	long usage_usec, user_usec;
-	long usage_seconds = 1;
-	long expected_usage_usec = usage_seconds * USEC_PER_SEC;
+	long quota_usec = 1000;
+	long default_period_usec = 100000; /* cpu.max's default period */
+	long duration_seconds = 1;
+
+	long duration_usec = duration_seconds * USEC_PER_SEC;
+	long usage_usec, n_periods, remainder_usec, expected_usage_usec;
 	char *cpucg;
+	char quota_buf[32];
+
+	snprintf(quota_buf, sizeof(quota_buf), "%ld", quota_usec);
 
 	cpucg = cg_name(root, "cpucg_test");
 	if (!cpucg)
@@ -657,13 +664,13 @@ static int test_cpucg_max(const char *root)
 	if (cg_create(cpucg))
 		goto cleanup;
 
-	if (cg_write(cpucg, "cpu.max", "1000"))
+	if (cg_write(cpucg, "cpu.max", quota_buf))
 		goto cleanup;
 
 	struct cpu_hog_func_param param = {
 		.nprocs = 1,
 		.ts = {
-			.tv_sec = usage_seconds,
+			.tv_sec = duration_seconds,
 			.tv_nsec = 0,
 		},
 		.clock_type = CPU_HOG_CLOCK_WALL,
@@ -672,14 +679,19 @@ static int test_cpucg_max(const char *root)
 		goto cleanup;
 
 	usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
-	user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
-	if (user_usec <= 0)
+	if (usage_usec <= 0)
 		goto cleanup;
 
-	if (user_usec >= expected_usage_usec)
-		goto cleanup;
+	/*
+	 * The following calculation applies only since
+	 * the cpu hog is set to run as per wall-clock time
+	 */
+	n_periods = duration_usec / default_period_usec;
+	remainder_usec = duration_usec - n_periods * default_period_usec;
+	expected_usage_usec
+		= n_periods * quota_usec + MIN(remainder_usec, quota_usec);
 
-	if (values_close(usage_usec, expected_usage_usec, 95))
+	if (!values_close(usage_usec, expected_usage_usec, 10))
 		goto cleanup;
 
 	ret = KSFT_PASS;
@@ -698,10 +710,16 @@ cleanup:
 static int test_cpucg_max_nested(const char *root)
 {
 	int ret = KSFT_FAIL;
-	long usage_usec, user_usec;
-	long usage_seconds = 1;
-	long expected_usage_usec = usage_seconds * USEC_PER_SEC;
+	long quota_usec = 1000;
+	long default_period_usec = 100000; /* cpu.max's default period */
+	long duration_seconds = 1;
+
+	long duration_usec = duration_seconds * USEC_PER_SEC;
+	long usage_usec, n_periods, remainder_usec, expected_usage_usec;
 	char *parent, *child;
+	char quota_buf[32];
+
+	snprintf(quota_buf, sizeof(quota_buf), "%ld", quota_usec);
 
 	parent = cg_name(root, "cpucg_parent");
 	child = cg_name(parent, "cpucg_child");
@@ -717,13 +735,13 @@ static int test_cpucg_max_nested(const char *root)
 	if (cg_create(child))
 		goto cleanup;
 
-	if (cg_write(parent, "cpu.max", "1000"))
+	if (cg_write(parent, "cpu.max", quota_buf))
 		goto cleanup;
 
 	struct cpu_hog_func_param param = {
 		.nprocs = 1,
 		.ts = {
-			.tv_sec = usage_seconds,
+			.tv_sec = duration_seconds,
 			.tv_nsec = 0,
 		},
 		.clock_type = CPU_HOG_CLOCK_WALL,
@@ -732,14 +750,19 @@ static int test_cpucg_max_nested(const char *root)
 		goto cleanup;
 
 	usage_usec = cg_read_key_long(child, "cpu.stat", "usage_usec");
-	user_usec = cg_read_key_long(child, "cpu.stat", "user_usec");
-	if (user_usec <= 0)
+	if (usage_usec <= 0)
 		goto cleanup;
 
-	if (user_usec >= expected_usage_usec)
-		goto cleanup;
+	/*
+	 * The following calculation applies only since
+	 * the cpu hog is set to run as per wall-clock time
+	 */
+	n_periods = duration_usec / default_period_usec;
+	remainder_usec = duration_usec - n_periods * default_period_usec;
+	expected_usage_usec
+		= n_periods * quota_usec + MIN(remainder_usec, quota_usec);
 
-	if (values_close(usage_usec, expected_usage_usec, 95))
+	if (!values_close(usage_usec, expected_usage_usec, 10))
 		goto cleanup;
 
 	ret = KSFT_PASS;
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
index 96693d8772be..63b3c9aad399 100644
--- a/tools/testing/selftests/cgroup/test_kmem.c
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -308,6 +308,7 @@ static int test_kmem_dead_cgroups(const char *root)
 	char *parent;
 	long dead;
 	int i;
+	int max_time = 20;
 
 	parent = cg_name(root, "kmem_dead_cgroups_test");
 	if (!parent)
@@ -322,7 +323,7 @@ static int test_kmem_dead_cgroups(const char *root)
 	if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30))
 		goto cleanup;
 
-	for (i = 0; i < 5; i++) {
+	for (i = 0; i < max_time; i++) {
 		dead = cg_read_key_long(parent, "cgroup.stat",
 					"nr_dying_descendants ");
 		if (dead == 0) {
@@ -334,6 +335,8 @@ static int test_kmem_dead_cgroups(const char *root)
 		 * let's wait a bit and repeat.
 		 */
 		sleep(1);
+		if (i > 5)
+			printf("Waiting time longer than 5s; wait: %ds (dead: %ld)\n", i, dead);
 	}
 
 cleanup:
diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c
index 40de679248b8..e1f578ca2841 100644
--- a/tools/testing/selftests/cgroup/test_zswap.c
+++ b/tools/testing/selftests/cgroup/test_zswap.c
@@ -338,7 +338,7 @@ static int test_zswap_writeback_one(const char *cgroup, bool wb)
 		return -1;
 
 	if (wb != !!zswpwb_after) {
-		ksft_print_msg("zswpwb_after is %ld while wb is %s",
+		ksft_print_msg("zswpwb_after is %ld while wb is %s\n",
 				zswpwb_after, wb ? "enabled" : "disabled");
 		return -1;
 	}
diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
index ff21524be458..5b230deb19e8 100644
--- a/tools/testing/selftests/damon/Makefile
+++ b/tools/testing/selftests/damon/Makefile
@@ -7,6 +7,7 @@ TEST_FILES = _damon_sysfs.py
 
 # functionality tests
 TEST_PROGS += sysfs.sh
+TEST_PROGS += sysfs.py
 TEST_PROGS += sysfs_update_schemes_tried_regions_wss_estimation.py
 TEST_PROGS += damos_quota.py damos_quota_goal.py damos_apply_interval.py
 TEST_PROGS += damos_tried_regions.py damon_nr_regions.py
@@ -15,6 +16,7 @@ TEST_PROGS += reclaim.sh lru_sort.sh
 # regression tests (reproducers of previously found bugs)
 TEST_PROGS += sysfs_update_removed_scheme_dir.sh
 TEST_PROGS += sysfs_update_schemes_tried_regions_hang.py
+TEST_PROGS += sysfs_memcg_path_leak.sh
 
 EXTRA_CLEAN = __pycache__
 
diff --git a/tools/testing/selftests/damon/_common.sh b/tools/testing/selftests/damon/_common.sh
new file mode 100644
index 000000000000..0279698f733e
--- /dev/null
+++ b/tools/testing/selftests/damon/_common.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+check_dependencies()
+{
+	if [ $EUID -ne 0 ]
+	then
+		echo "Run as root"
+		exit $ksft_skip
+	fi
+}
diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py
index 5b1cb6b3ce4e..a0e6290833fb 100644
--- a/tools/testing/selftests/damon/_damon_sysfs.py
+++ b/tools/testing/selftests/damon/_damon_sysfs.py
@@ -52,9 +52,9 @@ class DamosAccessPattern:
         if self.size is None:
             self.size = [0, 2**64 - 1]
         if self.nr_accesses is None:
-            self.nr_accesses = [0, 2**64 - 1]
+            self.nr_accesses = [0, 2**32 - 1]
         if self.age is None:
-            self.age = [0, 2**64 - 1]
+            self.age = [0, 2**32 - 1]
 
     def sysfs_dir(self):
         return os.path.join(self.scheme.sysfs_dir(), 'access_pattern')
@@ -93,14 +93,16 @@ class DamosQuotaGoal:
     metric = None
     target_value = None
     current_value = None
+    nid = None
     effective_bytes = None
     quota = None            # owner quota
     idx = None
 
-    def __init__(self, metric, target_value=10000, current_value=0):
+    def __init__(self, metric, target_value=10000, current_value=0, nid=0):
         self.metric = metric
         self.target_value = target_value
         self.current_value = current_value
+        self.nid = nid
 
     def sysfs_dir(self):
         return os.path.join(self.quota.sysfs_dir(), 'goals', '%d' % self.idx)
@@ -118,6 +120,10 @@ class DamosQuotaGoal:
                          self.current_value)
         if err is not None:
             return err
+        err = write_file(os.path.join(self.sysfs_dir(), 'nid'), self.nid)
+        if err is not None:
+            return err
+
         return None
 
 class DamosQuota:
@@ -125,12 +131,20 @@ class DamosQuota:
     ms = None                   # time quota
     goals = None                # quota goals
     reset_interval_ms = None    # quota reset interval
+    weight_sz_permil = None
+    weight_nr_accesses_permil = None
+    weight_age_permil = None
     scheme = None               # owner scheme
 
-    def __init__(self, sz=0, ms=0, goals=None, reset_interval_ms=0):
+    def __init__(self, sz=0, ms=0, goals=None, reset_interval_ms=0,
+                 weight_sz_permil=0, weight_nr_accesses_permil=0,
+                 weight_age_permil=0):
         self.sz = sz
         self.ms = ms
         self.reset_interval_ms = reset_interval_ms
+        self.weight_sz_permil = weight_sz_permil
+        self.weight_nr_accesses_permil = weight_nr_accesses_permil
+        self.weight_age_permil = weight_age_permil
         self.goals = goals if goals is not None else []
         for idx, goal in enumerate(self.goals):
             goal.idx = idx
@@ -151,6 +165,20 @@ class DamosQuota:
         if err is not None:
             return err
 
+        err = write_file(os.path.join(
+            self.sysfs_dir(), 'weights', 'sz_permil'), self.weight_sz_permil)
+        if err is not None:
+            return err
+        err = write_file(os.path.join(
+            self.sysfs_dir(), 'weights', 'nr_accesses_permil'),
+                         self.weight_nr_accesses_permil)
+        if err is not None:
+            return err
+        err = write_file(os.path.join(
+            self.sysfs_dir(), 'weights', 'age_permil'), self.weight_age_permil)
+        if err is not None:
+            return err
+
         nr_goals_file = os.path.join(self.sysfs_dir(), 'goals', 'nr_goals')
         content, err = read_file(nr_goals_file)
         if err is not None:
@@ -165,6 +193,178 @@ class DamosQuota:
                 return err
         return None
 
+class DamosWatermarks:
+    metric = None
+    interval = None
+    high = None
+    mid = None
+    low = None
+    scheme = None   # owner scheme
+
+    def __init__(self, metric='none', interval=0, high=0, mid=0, low=0):
+        self.metric = metric
+        self.interval = interval
+        self.high = high
+        self.mid = mid
+        self.low = low
+
+    def sysfs_dir(self):
+        return os.path.join(self.scheme.sysfs_dir(), 'watermarks')
+
+    def stage(self):
+        err = write_file(os.path.join(self.sysfs_dir(), 'metric'), self.metric)
+        if err is not None:
+            return err
+        err = write_file(os.path.join(self.sysfs_dir(), 'interval_us'),
+                         self.interval)
+        if err is not None:
+            return err
+        err = write_file(os.path.join(self.sysfs_dir(), 'high'), self.high)
+        if err is not None:
+            return err
+        err = write_file(os.path.join(self.sysfs_dir(), 'mid'), self.mid)
+        if err is not None:
+            return err
+        err = write_file(os.path.join(self.sysfs_dir(), 'low'), self.low)
+        if err is not None:
+            return err
+
+class DamosFilter:
+    type_ = None
+    matching = None
+    allow = None
+    memcg_path = None
+    addr_start = None
+    addr_end = None
+    target_idx = None
+    min_ = None
+    max_ = None
+    idx = None
+    filters = None  # owner filters
+
+    def __init__(self, type_='anon', matching=False, allow=False,
+                 memcg_path='', addr_start=0, addr_end=0, target_idx=0, min_=0,
+                 max_=0):
+        self.type_ = type_
+        self.matching = matching
+        self.allow = allow
+        self.memcg_path = memcg_path,
+        self.addr_start = addr_start
+        self.addr_end = addr_end
+        self.target_idx = target_idx
+        self.min_ = min_
+        self.max_ = max_
+
+    def sysfs_dir(self):
+        return os.path.join(self.filters.sysfs_dir(), '%d' % self.idx)
+
+    def stage(self):
+        err = write_file(os.path.join(self.sysfs_dir(), 'type'), self.type_)
+        if err is not None:
+            return err
+        err = write_file(os.path.join(self.sysfs_dir(), 'matching'),
+                         self.matching)
+        if err is not None:
+            return err
+        err = write_file(os.path.join(self.sysfs_dir(), 'allow'), self.allow)
+        if err is not None:
+            return err
+        err = write_file(os.path.join(self.sysfs_dir(), 'memcg_path'),
+                         self.memcg_path)
+        if err is not None:
+            return err
+        err = write_file(os.path.join(self.sysfs_dir(), 'addr_start'),
+                         self.addr_start)
+        if err is not None:
+            return err
+        err = write_file(os.path.join(self.sysfs_dir(), 'addr_end'),
+                         self.addr_end)
+        if err is not None:
+            return err
+        err = write_file(os.path.join(self.sysfs_dir(), 'damon_target_idx'),
+                         self.target_idx)
+        if err is not None:
+            return err
+        err = write_file(os.path.join(self.sysfs_dir(), 'min'), self.min_)
+        if err is not None:
+            return err
+        err = write_file(os.path.join(self.sysfs_dir(), 'max'), self.max_)
+        if err is not None:
+            return err
+        return None
+
+class DamosFilters:
+    name = None
+    filters = None
+    scheme = None   # owner scheme
+
+    def __init__(self, name, filters=[]):
+        self.name = name
+        self.filters = filters
+        for idx, filter_ in enumerate(self.filters):
+            filter_.idx = idx
+            filter_.filters = self
+
+    def sysfs_dir(self):
+        return os.path.join(self.scheme.sysfs_dir(), self.name)
+
+    def stage(self):
+        err = write_file(os.path.join(self.sysfs_dir(), 'nr_filters'),
+                         len(self.filters))
+        if err is not None:
+            return err
+        for filter_ in self.filters:
+            err = filter_.stage()
+            if err is not None:
+                return err
+        return None
+
+class DamosDest:
+    id = None
+    weight = None
+    idx = None
+    dests = None    # owner dests
+
+    def __init__(self, id=0, weight=0):
+        self.id = id
+        self.weight = weight
+
+    def sysfs_dir(self):
+        return os.path.join(self.dests.sysfs_dir(), '%d' % self.idx)
+
+    def stage(self):
+        err = write_file(os.path.join(self.sysfs_dir(), 'id'), self.id)
+        if err is not None:
+            return err
+        err = write_file(os.path.join(self.sysfs_dir(), 'weight'), self.weight)
+        if err is not None:
+            return err
+        return None
+
+class DamosDests:
+    dests = None
+    scheme = None   # owner scheme
+
+    def __init__(self, dests=[]):
+        self.dests = dests
+        for idx, dest in enumerate(self.dests):
+            dest.idx = idx
+            dest.dests = self
+
+    def sysfs_dir(self):
+        return os.path.join(self.scheme.sysfs_dir(), 'dests')
+
+    def stage(self):
+        err = write_file(os.path.join(self.sysfs_dir(), 'nr_dests'),
+                         len(self.dests))
+        if err is not None:
+            return err
+        for dest in self.dests:
+            err = dest.stage()
+            if err is not None:
+                return err
+        return None
+
 class DamosStats:
     nr_tried = None
     sz_tried = None
@@ -190,8 +390,13 @@ class Damos:
     action = None
     access_pattern = None
     quota = None
+    watermarks = None
+    core_filters = None
+    ops_filters = None
+    filters = None
     apply_interval_us = None
-    # todo: Support watermarks, stats
+    target_nid = None
+    dests = None
     idx = None
     context = None
     tried_bytes = None
@@ -199,12 +404,30 @@ class Damos:
     tried_regions = None
 
     def __init__(self, action='stat', access_pattern=DamosAccessPattern(),
-                 quota=DamosQuota(), apply_interval_us=0):
+                 quota=DamosQuota(), watermarks=DamosWatermarks(),
+                 core_filters=[], ops_filters=[], filters=[], target_nid=0,
+                 dests=DamosDests(), apply_interval_us=0):
         self.action = action
         self.access_pattern = access_pattern
         self.access_pattern.scheme = self
         self.quota = quota
         self.quota.scheme = self
+        self.watermarks = watermarks
+        self.watermarks.scheme = self
+
+        self.core_filters = DamosFilters(name='core_filters',
+                                         filters=core_filters)
+        self.core_filters.scheme = self
+        self.ops_filters = DamosFilters(name='ops_filters',
+                                         filters=ops_filters)
+        self.ops_filters.scheme = self
+        self.filters = DamosFilters(name='filters', filters=filters)
+        self.filters.scheme = self
+
+        self.target_nid = target_nid
+        self.dests = dests
+        self.dests.scheme = self
+
         self.apply_interval_us = apply_interval_us
 
     def sysfs_dir(self):
@@ -227,15 +450,26 @@ class Damos:
         if err is not None:
             return err
 
-        # disable watermarks
-        err = write_file(
-                os.path.join(self.sysfs_dir(), 'watermarks', 'metric'), 'none')
+        err = self.watermarks.stage()
         if err is not None:
             return err
 
-        # disable filters
-        err = write_file(
-                os.path.join(self.sysfs_dir(), 'filters', 'nr_filters'), '0')
+        err = self.core_filters.stage()
+        if err is not None:
+            return err
+        err = self.ops_filters.stage()
+        if err is not None:
+            return err
+        err = self.filters.stage()
+        if err is not None:
+            return err
+
+        err = write_file(os.path.join(self.sysfs_dir(), 'target_nid'), '%d' %
+                         self.target_nid)
+        if err is not None:
+            return err
+
+        err = self.dests.stage()
         if err is not None:
             return err
 
@@ -260,18 +494,56 @@ class DamonTarget:
         return write_file(
                 os.path.join(self.sysfs_dir(), 'pid_target'), self.pid)
 
+class IntervalsGoal:
+    access_bp = None
+    aggrs = None
+    min_sample_us = None
+    max_sample_us = None
+    attrs = None    # owner DamonAttrs
+
+    def __init__(self, access_bp=0, aggrs=0, min_sample_us=0, max_sample_us=0):
+        self.access_bp = access_bp
+        self.aggrs = aggrs
+        self.min_sample_us = min_sample_us
+        self.max_sample_us = max_sample_us
+
+    def sysfs_dir(self):
+        return os.path.join(self.attrs.interval_sysfs_dir(), 'intervals_goal')
+
+    def stage(self):
+        err = write_file(
+                os.path.join(self.sysfs_dir(), 'access_bp'), self.access_bp)
+        if err is not None:
+            return err
+        err = write_file(os.path.join(self.sysfs_dir(), 'aggrs'), self.aggrs)
+        if err is not None:
+            return err
+        err = write_file(os.path.join(self.sysfs_dir(), 'min_sample_us'),
+                         self.min_sample_us)
+        if err is not None:
+            return err
+        err = write_file(os.path.join(self.sysfs_dir(), 'max_sample_us'),
+                         self.max_sample_us)
+        if err is not None:
+            return err
+        return None
+
 class DamonAttrs:
     sample_us = None
     aggr_us = None
+    intervals_goal = None
     update_us = None
     min_nr_regions = None
     max_nr_regions = None
     context = None
 
-    def __init__(self, sample_us=5000, aggr_us=100000, update_us=1000000,
+    def __init__(self, sample_us=5000, aggr_us=100000,
+                 intervals_goal=IntervalsGoal(), update_us=1000000,
             min_nr_regions=10, max_nr_regions=1000):
         self.sample_us = sample_us
         self.aggr_us = aggr_us
+        self.intervals_goal = intervals_goal
+        self.intervals_goal.attrs = self
         self.update_us = update_us
         self.min_nr_regions = min_nr_regions
         self.max_nr_regions = max_nr_regions
@@ -293,6 +565,9 @@ class DamonAttrs:
                 self.aggr_us)
         if err is not None:
             return err
+        err = self.intervals_goal.stage()
+        if err is not None:
+            return err
         err = write_file(os.path.join(self.interval_sysfs_dir(), 'update_us'),
                 self.update_us)
         if err is not None:
@@ -408,6 +683,9 @@ class Kdamond:
             if err is not None:
                 return err
         err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'on')
+        if err is not None:
+            return err
+        self.pid, err = read_file(os.path.join(self.sysfs_dir(), 'pid'))
         return err
 
     def stop(self):
diff --git a/tools/testing/selftests/damon/drgn_dump_damon_status.py b/tools/testing/selftests/damon/drgn_dump_damon_status.py
new file mode 100755
index 000000000000..7233369a3a44
--- /dev/null
+++ b/tools/testing/selftests/damon/drgn_dump_damon_status.py
@@ -0,0 +1,222 @@
+#!/usr/bin/env drgn
+# SPDX-License-Identifier: GPL-2.0
+
+'''
+Read DAMON context data and dump as a json string.
+'''
+import drgn
+from drgn import FaultError, NULL, Object, cast, container_of, execscript, offsetof, reinterpret, sizeof
+from drgn.helpers.common import *
+from drgn.helpers.linux import *
+
+import json
+import sys
+
+if "prog" not in globals():
+    try:
+        prog = drgn.get_default_prog()
+    except drgn.NoDefaultProgramError:
+        prog = drgn.program_from_kernel()
+        drgn.set_default_prog(prog)
+
+def to_dict(object, attr_name_converter):
+    d = {}
+    for attr_name, converter in attr_name_converter:
+        d[attr_name] = converter(getattr(object, attr_name))
+    return d
+
+def ops_to_dict(ops):
+    return to_dict(ops, [
+        ['id', int],
+        ])
+
+def intervals_goal_to_dict(goal):
+    return to_dict(goal, [
+        ['access_bp', int],
+        ['aggrs', int],
+        ['min_sample_us', int],
+        ['max_sample_us', int],
+        ])
+
+def attrs_to_dict(attrs):
+    return to_dict(attrs, [
+        ['sample_interval', int],
+        ['aggr_interval', int],
+        ['ops_update_interval', int],
+        ['intervals_goal', intervals_goal_to_dict],
+        ['min_nr_regions', int],
+        ['max_nr_regions', int],
+        ])
+
+def addr_range_to_dict(addr_range):
+    return to_dict(addr_range, [
+        ['start', int],
+        ['end', int],
+        ])
+
+def region_to_dict(region):
+    return to_dict(region, [
+        ['ar', addr_range_to_dict],
+        ['sampling_addr', int],
+        ['nr_accesses', int],
+        ['nr_accesses_bp', int],
+        ['age', int],
+        ])
+
+def regions_to_list(regions):
+    return [region_to_dict(r)
+            for r in list_for_each_entry(
+                'struct damon_region', regions.address_of_(), 'list')]
+
+def target_to_dict(target):
+    return to_dict(target, [
+        ['pid', int],
+        ['nr_regions', int],
+        ['regions_list', regions_to_list],
+        ])
+
+def targets_to_list(targets):
+    return [target_to_dict(t)
+            for t in list_for_each_entry(
+                'struct damon_target', targets.address_of_(), 'list')]
+
+def damos_access_pattern_to_dict(pattern):
+    return to_dict(pattern, [
+        ['min_sz_region', int],
+        ['max_sz_region', int],
+        ['min_nr_accesses', int],
+        ['max_nr_accesses', int],
+        ['min_age_region', int],
+        ['max_age_region', int],
+        ])
+
+def damos_quota_goal_to_dict(goal):
+    return to_dict(goal, [
+        ['metric', int],
+        ['target_value', int],
+        ['current_value', int],
+        ['last_psi_total', int],
+        ['nid', int],
+        ])
+
+def damos_quota_goals_to_list(goals):
+    return [damos_quota_goal_to_dict(g)
+            for g in list_for_each_entry(
+                'struct damos_quota_goal', goals.address_of_(), 'list')]
+
+def damos_quota_to_dict(quota):
+    return to_dict(quota, [
+        ['reset_interval', int],
+        ['ms', int], ['sz', int],
+        ['goals', damos_quota_goals_to_list],
+        ['esz', int],
+        ['weight_sz', int],
+        ['weight_nr_accesses', int],
+        ['weight_age', int],
+        ])
+
+def damos_watermarks_to_dict(watermarks):
+    return to_dict(watermarks, [
+        ['metric', int],
+        ['interval', int],
+        ['high', int], ['mid', int], ['low', int],
+        ])
+
+def damos_migrate_dests_to_dict(dests):
+    nr_dests = int(dests.nr_dests)
+    node_id_arr = []
+    weight_arr = []
+    for i in range(nr_dests):
+        node_id_arr.append(int(dests.node_id_arr[i]))
+        weight_arr.append(int(dests.weight_arr[i]))
+    return {
+            'node_id_arr': node_id_arr,
+            'weight_arr': weight_arr,
+            'nr_dests': nr_dests,
+            }
+
+def damos_filter_to_dict(damos_filter):
+    filter_type_keyword = {
+            0: 'anon',
+            1: 'active',
+            2: 'memcg',
+            3: 'young',
+            4: 'hugepage_size',
+            5: 'unmapped',
+            6: 'addr',
+            7: 'target'
+            }
+    dict_ = {
+            'type': filter_type_keyword[int(damos_filter.type)],
+            'matching': bool(damos_filter.matching),
+            'allow': bool(damos_filter.allow),
+            }
+    type_ = dict_['type']
+    if type_ == 'memcg':
+        dict_['memcg_id'] = int(damos_filter.memcg_id)
+    elif type_ == 'addr':
+        dict_['addr_range'] = [int(damos_filter.addr_range.start),
+                               int(damos_filter.addr_range.end)]
+    elif type_ == 'target':
+        dict_['target_idx'] = int(damos_filter.target_idx)
+    elif type_ == 'hugeapge_size':
+        dict_['sz_range'] = [int(damos_filter.sz_range.min),
+                             int(damos_filter.sz_range.max)]
+    return dict_
+
+def scheme_to_dict(scheme):
+    dict_ = to_dict(scheme, [
+        ['pattern', damos_access_pattern_to_dict],
+        ['action', int],
+        ['apply_interval_us', int],
+        ['quota', damos_quota_to_dict],
+        ['wmarks', damos_watermarks_to_dict],
+        ['target_nid', int],
+        ['migrate_dests', damos_migrate_dests_to_dict],
+        ])
+    filters = []
+    for f in list_for_each_entry(
+            'struct damos_filter', scheme.filters.address_of_(), 'list'):
+        filters.append(damos_filter_to_dict(f))
+    dict_['filters'] = filters
+    ops_filters = []
+    for f in list_for_each_entry(
+            'struct damos_filter', scheme.ops_filters.address_of_(), 'list'):
+        ops_filters.append(damos_filter_to_dict(f))
+    dict_['ops_filters'] = ops_filters
+
+    return dict_
+
+def schemes_to_list(schemes):
+    return [scheme_to_dict(s)
+            for s in list_for_each_entry(
+                'struct damos', schemes.address_of_(), 'list')]
+
+def damon_ctx_to_dict(ctx):
+    return to_dict(ctx, [
+        ['ops', ops_to_dict],
+        ['attrs', attrs_to_dict],
+        ['adaptive_targets', targets_to_list],
+        ['schemes', schemes_to_list],
+        ])
+
+def main():
+    if len(sys.argv) < 3:
+        print('Usage: %s <kdamond pid> <file>' % sys.argv[0])
+        exit(1)
+
+    pid = int(sys.argv[1])
+    file_to_store = sys.argv[2]
+
+    kthread_data = cast('struct kthread *',
+                        find_task(prog, pid).worker_private).data
+    ctx = cast('struct damon_ctx *', kthread_data)
+    status = {'contexts': [damon_ctx_to_dict(ctx)]}
+    if file_to_store == 'stdout':
+        print(json.dumps(status, indent=4))
+    else:
+        with open(file_to_store, 'w') as f:
+            json.dump(status, f, indent=4)
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/testing/selftests/damon/lru_sort.sh b/tools/testing/selftests/damon/lru_sort.sh
index 61b80197c896..1e4849db78a9 100755
--- a/tools/testing/selftests/damon/lru_sort.sh
+++ b/tools/testing/selftests/damon/lru_sort.sh
@@ -1,14 +1,12 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+source _common.sh
+
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
 
-if [ $EUID -ne 0 ]
-then
-	echo "Run as root"
-	exit $ksft_skip
-fi
+check_dependencies
 
 damon_lru_sort_enabled="/sys/module/damon_lru_sort/parameters/enabled"
 if [ ! -f "$damon_lru_sort_enabled" ]
diff --git a/tools/testing/selftests/damon/reclaim.sh b/tools/testing/selftests/damon/reclaim.sh
index 78dbc2334cbe..e56ceb035129 100755
--- a/tools/testing/selftests/damon/reclaim.sh
+++ b/tools/testing/selftests/damon/reclaim.sh
@@ -1,14 +1,12 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+source _common.sh
+
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
 
-if [ $EUID -ne 0 ]
-then
-	echo "Run as root"
-	exit $ksft_skip
-fi
+check_dependencies
 
 damon_reclaim_enabled="/sys/module/damon_reclaim/parameters/enabled"
 if [ ! -f "$damon_reclaim_enabled" ]
diff --git a/tools/testing/selftests/damon/sysfs.py b/tools/testing/selftests/damon/sysfs.py
new file mode 100755
index 000000000000..2666c6f0f1a5
--- /dev/null
+++ b/tools/testing/selftests/damon/sysfs.py
@@ -0,0 +1,272 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import json
+import os
+import subprocess
+
+import _damon_sysfs
+
+def dump_damon_status_dict(pid):
+    try:
+        subprocess.check_output(['which', 'drgn'], stderr=subprocess.DEVNULL)
+    except:
+        return None, 'drgn not found'
+    file_dir = os.path.dirname(os.path.abspath(__file__))
+    dump_script = os.path.join(file_dir, 'drgn_dump_damon_status.py')
+    rc = subprocess.call(['drgn', dump_script, pid, 'damon_dump_output'],
+                         stderr=subprocess.DEVNULL)
+    if rc != 0:
+        return None, 'drgn fail'
+    try:
+        with open('damon_dump_output', 'r') as f:
+            return json.load(f), None
+    except Exception as e:
+        return None, 'json.load fail (%s)' % e
+
+def fail(expectation, status):
+    print('unexpected %s' % expectation)
+    print(json.dumps(status, indent=4))
+    exit(1)
+
+def assert_true(condition, expectation, status):
+    if condition is not True:
+        fail(expectation, status)
+
+def assert_watermarks_committed(watermarks, dump):
+    wmark_metric_val = {
+            'none': 0,
+            'free_mem_rate': 1,
+            }
+    assert_true(dump['metric'] == wmark_metric_val[watermarks.metric],
+                'metric', dump)
+    assert_true(dump['interval'] == watermarks.interval, 'interval', dump)
+    assert_true(dump['high'] == watermarks.high, 'high', dump)
+    assert_true(dump['mid'] == watermarks.mid, 'mid', dump)
+    assert_true(dump['low'] == watermarks.low, 'low', dump)
+
+def assert_quota_goal_committed(qgoal, dump):
+    metric_val = {
+            'user_input': 0,
+            'some_mem_psi_us': 1,
+            'node_mem_used_bp': 2,
+            'node_mem_free_bp': 3,
+            }
+    assert_true(dump['metric'] == metric_val[qgoal.metric], 'metric', dump)
+    assert_true(dump['target_value'] == qgoal.target_value, 'target_value',
+                dump)
+    if qgoal.metric == 'user_input':
+        assert_true(dump['current_value'] == qgoal.current_value,
+                    'current_value', dump)
+    assert_true(dump['nid'] == qgoal.nid, 'nid', dump)
+
+def assert_quota_committed(quota, dump):
+    assert_true(dump['reset_interval'] == quota.reset_interval_ms,
+                'reset_interval', dump)
+    assert_true(dump['ms'] == quota.ms, 'ms', dump)
+    assert_true(dump['sz'] == quota.sz, 'sz', dump)
+    for idx, qgoal in enumerate(quota.goals):
+        assert_quota_goal_committed(qgoal, dump['goals'][idx])
+    assert_true(dump['weight_sz'] == quota.weight_sz_permil, 'weight_sz', dump)
+    assert_true(dump['weight_nr_accesses'] == quota.weight_nr_accesses_permil,
+                'weight_nr_accesses', dump)
+    assert_true(
+            dump['weight_age'] == quota.weight_age_permil, 'weight_age', dump)
+
+
+def assert_migrate_dests_committed(dests, dump):
+    assert_true(dump['nr_dests'] == len(dests.dests), 'nr_dests', dump)
+    for idx, dest in enumerate(dests.dests):
+        assert_true(dump['node_id_arr'][idx] == dest.id, 'node_id', dump)
+        assert_true(dump['weight_arr'][idx] == dest.weight, 'weight', dump)
+
+def assert_filter_committed(filter_, dump):
+    assert_true(filter_.type_ == dump['type'], 'type', dump)
+    assert_true(filter_.matching == dump['matching'], 'matching', dump)
+    assert_true(filter_.allow == dump['allow'], 'allow', dump)
+    # TODO: check memcg_path and memcg_id if type is memcg
+    if filter_.type_ == 'addr':
+        assert_true([filter_.addr_start, filter_.addr_end] ==
+                    dump['addr_range'], 'addr_range', dump)
+    elif filter_.type_ == 'target':
+        assert_true(filter_.target_idx == dump['target_idx'], 'target_idx',
+                    dump)
+    elif filter_.type_ == 'hugepage_size':
+        assert_true([filter_.min_, filter_.max_] == dump['sz_range'],
+                    'sz_range', dump)
+
+def assert_access_pattern_committed(pattern, dump):
+    assert_true(dump['min_sz_region'] == pattern.size[0], 'min_sz_region',
+                dump)
+    assert_true(dump['max_sz_region'] == pattern.size[1], 'max_sz_region',
+                dump)
+    assert_true(dump['min_nr_accesses'] == pattern.nr_accesses[0],
+                'min_nr_accesses', dump)
+    assert_true(dump['max_nr_accesses'] == pattern.nr_accesses[1],
+                'max_nr_accesses', dump)
+    assert_true(dump['min_age_region'] == pattern.age[0], 'min_age_region',
+                dump)
+    assert_true(dump['max_age_region'] == pattern.age[1], 'miaxage_region',
+                dump)
+
+def assert_scheme_committed(scheme, dump):
+    assert_access_pattern_committed(scheme.access_pattern, dump['pattern'])
+    action_val = {
+            'willneed': 0,
+            'cold': 1,
+            'pageout': 2,
+            'hugepage': 3,
+            'nohugeapge': 4,
+            'lru_prio': 5,
+            'lru_deprio': 6,
+            'migrate_hot': 7,
+            'migrate_cold': 8,
+            'stat': 9,
+            }
+    assert_true(dump['action'] == action_val[scheme.action], 'action', dump)
+    assert_true(dump['apply_interval_us'] == scheme. apply_interval_us,
+                'apply_interval_us', dump)
+    assert_true(dump['target_nid'] == scheme.target_nid, 'target_nid', dump)
+    assert_migrate_dests_committed(scheme.dests, dump['migrate_dests'])
+    assert_quota_committed(scheme.quota, dump['quota'])
+    assert_watermarks_committed(scheme.watermarks, dump['wmarks'])
+    # TODO: test filters directory
+    for idx, f in enumerate(scheme.core_filters.filters):
+        assert_filter_committed(f, dump['filters'][idx])
+    for idx, f in enumerate(scheme.ops_filters.filters):
+        assert_filter_committed(f, dump['ops_filters'][idx])
+
+def assert_schemes_committed(schemes, dump):
+    assert_true(len(schemes) == len(dump), 'len_schemes', dump)
+    for idx, scheme in enumerate(schemes):
+        assert_scheme_committed(scheme, dump[idx])
+
+def assert_monitoring_attrs_committed(attrs, dump):
+    assert_true(dump['sample_interval'] == attrs.sample_us, 'sample_interval',
+                dump)
+    assert_true(dump['aggr_interval'] == attrs.aggr_us, 'aggr_interval', dump)
+    assert_true(dump['intervals_goal']['access_bp'] ==
+                attrs.intervals_goal.access_bp, 'access_bp',
+                dump['intervals_goal'])
+    assert_true(dump['intervals_goal']['aggrs'] == attrs.intervals_goal.aggrs,
+                'aggrs', dump['intervals_goal'])
+    assert_true(dump['intervals_goal']['min_sample_us'] ==
+                attrs.intervals_goal.min_sample_us, 'min_sample_us',
+                dump['intervals_goal'])
+    assert_true(dump['intervals_goal']['max_sample_us'] ==
+                attrs.intervals_goal.max_sample_us, 'max_sample_us',
+                dump['intervals_goal'])
+
+    assert_true(dump['ops_update_interval'] == attrs.update_us,
+                'ops_update_interval', dump)
+    assert_true(dump['min_nr_regions'] == attrs.min_nr_regions,
+                'min_nr_regions', dump)
+    assert_true(dump['max_nr_regions'] == attrs.max_nr_regions,
+                'max_nr_regions', dump)
+
+def assert_ctx_committed(ctx, dump):
+    ops_val = {
+            'vaddr': 0,
+            'fvaddr': 1,
+            'paddr': 2,
+            }
+    assert_true(dump['ops']['id'] == ops_val[ctx.ops], 'ops_id', dump)
+    assert_monitoring_attrs_committed(ctx.monitoring_attrs, dump['attrs'])
+    assert_schemes_committed(ctx.schemes, dump['schemes'])
+
+def assert_ctxs_committed(ctxs, dump):
+    assert_true(len(ctxs) == len(dump), 'ctxs length', dump)
+    for idx, ctx in enumerate(ctxs):
+        assert_ctx_committed(ctx, dump[idx])
+
+def main():
+    kdamonds = _damon_sysfs.Kdamonds(
+            [_damon_sysfs.Kdamond(
+                contexts=[_damon_sysfs.DamonCtx(
+                    targets=[_damon_sysfs.DamonTarget(pid=-1)],
+                    schemes=[_damon_sysfs.Damos()],
+                    )])])
+    err = kdamonds.start()
+    if err is not None:
+        print('kdamond start failed: %s' % err)
+        exit(1)
+
+    status, err = dump_damon_status_dict(kdamonds.kdamonds[0].pid)
+    if err is not None:
+        print(err)
+        kdamonds.stop()
+        exit(1)
+
+    assert_ctxs_committed(kdamonds.kdamonds[0].contexts, status['contexts'])
+
+    context = _damon_sysfs.DamonCtx(
+            monitoring_attrs=_damon_sysfs.DamonAttrs(
+                sample_us=100000, aggr_us=2000000,
+                intervals_goal=_damon_sysfs.IntervalsGoal(
+                    access_bp=400, aggrs=3, min_sample_us=5000,
+                    max_sample_us=10000000),
+                update_us=2000000),
+            schemes=[_damon_sysfs.Damos(
+                action='pageout',
+                access_pattern=_damon_sysfs.DamosAccessPattern(
+                    size=[4096, 2**10],
+                    nr_accesses=[3, 317],
+                    age=[5,71]),
+                quota=_damon_sysfs.DamosQuota(
+                    sz=100*1024*1024, ms=100,
+                    goals=[_damon_sysfs.DamosQuotaGoal(
+                        metric='node_mem_used_bp',
+                        target_value=9950,
+                        nid=1)],
+                    reset_interval_ms=1500,
+                    weight_sz_permil=20,
+                    weight_nr_accesses_permil=200,
+                    weight_age_permil=1000),
+                watermarks=_damon_sysfs.DamosWatermarks(
+                    metric = 'free_mem_rate', interval = 500000, # 500 ms
+                    high = 500, mid = 400, low = 50),
+                target_nid=1,
+                apply_interval_us=1000000,
+                dests=_damon_sysfs.DamosDests(
+                    dests=[_damon_sysfs.DamosDest(id=1, weight=30),
+                           _damon_sysfs.DamosDest(id=0, weight=70)]),
+                core_filters=[
+                    _damon_sysfs.DamosFilter(type_='addr', matching=True,
+                                             allow=False, addr_start=42,
+                                             addr_end=4242),
+                    ],
+                ops_filters=[
+                    _damon_sysfs.DamosFilter(type_='anon', matching=True,
+                                             allow=True),
+                    ],
+                )])
+    context.idx = 0
+    context.kdamond = kdamonds.kdamonds[0]
+    kdamonds.kdamonds[0].contexts = [context]
+    kdamonds.kdamonds[0].commit()
+
+    status, err = dump_damon_status_dict(kdamonds.kdamonds[0].pid)
+    if err is not None:
+        print(err)
+        exit(1)
+
+    assert_ctxs_committed(kdamonds.kdamonds[0].contexts, status['contexts'])
+
+    # test online commitment of minimum context.
+    context = _damon_sysfs.DamonCtx()
+    context.idx = 0
+    context.kdamond = kdamonds.kdamonds[0]
+    kdamonds.kdamonds[0].contexts = [context]
+    kdamonds.kdamonds[0].commit()
+
+    status, err = dump_damon_status_dict(kdamonds.kdamonds[0].pid)
+    if err is not None:
+        print(err)
+        exit(1)
+
+    assert_ctxs_committed(kdamonds.kdamonds[0].contexts, status['contexts'])
+
+    kdamonds.stop()
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh
index e9a976d296e2..83e3b7f63d81 100755
--- a/tools/testing/selftests/damon/sysfs.sh
+++ b/tools/testing/selftests/damon/sysfs.sh
@@ -1,6 +1,8 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+source _common.sh
+
 # Kselftest frmework requirement - SKIP code is 4.
 ksft_skip=4
 
@@ -364,14 +366,5 @@ test_damon_sysfs()
 	test_kdamonds "$damon_sysfs/kdamonds"
 }
 
-check_dependencies()
-{
-	if [ $EUID -ne 0 ]
-	then
-		echo "Run as root"
-		exit $ksft_skip
-	fi
-}
-
 check_dependencies
 test_damon_sysfs "/sys/kernel/mm/damon/admin"
diff --git a/tools/testing/selftests/damon/sysfs_memcg_path_leak.sh b/tools/testing/selftests/damon/sysfs_memcg_path_leak.sh
new file mode 100755
index 000000000000..64c5d8c518a4
--- /dev/null
+++ b/tools/testing/selftests/damon/sysfs_memcg_path_leak.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+if [ $EUID -ne 0 ]
+then
+	echo "Run as root"
+	exit $ksft_skip
+fi
+
+damon_sysfs="/sys/kernel/mm/damon/admin"
+if [ ! -d "$damon_sysfs" ]
+then
+	echo "damon sysfs not found"
+	exit $ksft_skip
+fi
+
+# ensure filter directory
+echo 1 > "$damon_sysfs/kdamonds/nr_kdamonds"
+echo 1 > "$damon_sysfs/kdamonds/0/contexts/nr_contexts"
+echo 1 > "$damon_sysfs/kdamonds/0/contexts/0/schemes/nr_schemes"
+echo 1 > "$damon_sysfs/kdamonds/0/contexts/0/schemes/0/filters/nr_filters"
+
+filter_dir="$damon_sysfs/kdamonds/0/contexts/0/schemes/0/filters/0"
+
+before_kb=$(grep Slab /proc/meminfo | awk '{print $2}')
+
+# try to leak 3000 KiB
+for i in {1..102400};
+do
+	echo "012345678901234567890123456789" > "$filter_dir/memcg_path"
+done
+
+after_kb=$(grep Slab /proc/meminfo | awk '{print $2}')
+# expect up to 1500 KiB free from other tasks memory
+expected_after_kb_max=$((before_kb + 1500))
+
+if [ "$after_kb" -gt "$expected_after_kb_max" ]
+then
+	echo "maybe memcg_path are leaking: $before_kb -> $after_kb"
+	exit 1
+else
+	exit 0
+fi
diff --git a/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh
index ade35576e748..35fc32beeaf7 100755
--- a/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh
+++ b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh
@@ -1,14 +1,12 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+source _common.sh
+
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
 
-if [ $EUID -ne 0 ]
-then
-	echo "Run as root"
-	exit $ksft_skip
-fi
+check_dependencies
 
 damon_sysfs="/sys/kernel/mm/damon/admin"
 if [ ! -d "$damon_sysfs" ]
diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
index 6062723a172e..77aa2897e79f 100644
--- a/tools/testing/selftests/drivers/dma-buf/udmabuf.c
+++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
@@ -138,7 +138,7 @@ int main(int argc, char *argv[])
 	void *addr1, *addr2;
 
 	ksft_print_header();
-	ksft_set_plan(6);
+	ksft_set_plan(7);
 
 	devfd = open("/dev/udmabuf", O_RDWR);
 	if (devfd < 0) {
@@ -250,6 +250,24 @@ int main(int argc, char *argv[])
 
 	close(buf);
 	close(memfd);
+
+	/* same test as above but we pin first before writing to memfd */
+	page_size = getpagesize() * 512; /* 2 MB */
+	size = MEMFD_SIZE * page_size;
+	memfd = create_memfd_with_seals(size, true);
+	buf = create_udmabuf_list(devfd, memfd, size);
+	addr2 = mmap_fd(buf, NUM_PAGES * NUM_ENTRIES * getpagesize());
+	addr1 = mmap_fd(memfd, size);
+	write_to_memfd(addr1, size, 'a');
+	write_to_memfd(addr1, size, 'b');
+	ret = compare_chunks(addr1, addr2, size);
+	if (ret < 0)
+		ksft_test_result_fail("%s: [FAIL,test-7]\n", TEST_PREFIX);
+	else
+		ksft_test_result_pass("%s: [PASS,test-7]\n", TEST_PREFIX);
+
+	close(buf);
+	close(memfd);
 	close(devfd);
 
 	ksft_print_msg("%s: ok\n", TEST_PREFIX);
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
index 73f6c6fcecab..2506f464811b 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
@@ -16,6 +16,18 @@ ocnt=`cat enabled_functions | wc -l`
 
 echo "f:myevent1 $PLACE" >> dynamic_events
 
+echo "f:myevent2 $PLACE%return" >> dynamic_events
+
+# add another event
+echo "f:myevent3 $PLACE2" >> dynamic_events
+
+grep -q myevent1 dynamic_events
+grep -q myevent2 dynamic_events
+grep -q myevent3 dynamic_events
+test -d events/fprobes/myevent1
+test -d events/fprobes/myevent2
+
+echo 1 > events/fprobes/myevent1/enable
 # Make sure the event is attached and is the only one
 grep -q $PLACE enabled_functions
 cnt=`cat enabled_functions | wc -l`
@@ -23,29 +35,22 @@ if [ $cnt -ne $((ocnt + 1)) ]; then
 	exit_fail
 fi
 
-echo "f:myevent2 $PLACE%return" >> dynamic_events
-
+echo 1 > events/fprobes/myevent2/enable
 # It should till be the only attached function
 cnt=`cat enabled_functions | wc -l`
 if [ $cnt -ne $((ocnt + 1)) ]; then
 	exit_fail
 fi
 
-# add another event
-echo "f:myevent3 $PLACE2" >> dynamic_events
-
+echo 1 > events/fprobes/myevent3/enable
+# If the function is different, the attached function should be increased
 grep -q $PLACE2 enabled_functions
 cnt=`cat enabled_functions | wc -l`
 if [ $cnt -ne $((ocnt + 2)) ]; then
 	exit_fail
 fi
 
-grep -q myevent1 dynamic_events
-grep -q myevent2 dynamic_events
-grep -q myevent3 dynamic_events
-test -d events/fprobes/myevent1
-test -d events/fprobes/myevent2
-
+echo 0 > events/fprobes/myevent2/enable
 echo "-:myevent2" >> dynamic_events
 
 grep -q myevent1 dynamic_events
@@ -57,6 +62,7 @@ if [ $cnt -ne $((ocnt + 2)) ]; then
 	exit_fail
 fi
 
+echo 0 > events/fprobes/enable
 echo > dynamic_events
 
 # Should have none left
@@ -67,12 +73,14 @@ fi
 
 echo "f:myevent4 $PLACE" >> dynamic_events
 
+echo 1 > events/fprobes/myevent4/enable
 # Should only have one enabled
 cnt=`cat enabled_functions | wc -l`
 if [ $cnt -ne $((ocnt + 1)) ]; then
 	exit_fail
 fi
 
+echo 0 > events/fprobes/enable
 echo > dynamic_events
 
 # Should have none left
diff --git a/tools/testing/selftests/hid/tests/base.py b/tools/testing/selftests/hid/tests/base.py
index 3a465768e507..5175cf235b2f 100644
--- a/tools/testing/selftests/hid/tests/base.py
+++ b/tools/testing/selftests/hid/tests/base.py
@@ -5,6 +5,7 @@
 # Copyright (c) 2017 Benjamin Tissoires <benjamin.tissoires@gmail.com>
 # Copyright (c) 2017 Red Hat, Inc.
 
+import dataclasses
 import libevdev
 import os
 import pytest
@@ -145,6 +146,18 @@ class UHIDTestDevice(BaseDevice):
         self.name = name
 
 
+@dataclasses.dataclass
+class HidBpf:
+    object_name: str
+    has_rdesc_fixup: bool
+
+
+@dataclasses.dataclass
+class KernelModule:
+    driver_name: str
+    module_name: str
+
+
 class BaseTestCase:
     class TestUhid(object):
         syn_event = libevdev.InputEvent(libevdev.EV_SYN.SYN_REPORT)  # type: ignore
@@ -155,20 +168,20 @@ class BaseTestCase:
 
         # List of kernel modules to load before starting the test
         # if any module is not available (not compiled), the test will skip.
-        # Each element is a tuple '(kernel driver name, kernel module)',
-        # for example ("playstation", "hid-playstation")
-        kernel_modules: List[Tuple[str, str]] = []
+        # Each element is a KernelModule object, for example
+        # KernelModule("playstation", "hid-playstation")
+        kernel_modules: List[KernelModule] = []
 
         # List of in kernel HID-BPF object files to load
         # before starting the test
         # Any existing pre-loaded HID-BPF module will be removed
         # before the ones in this list will be manually loaded.
-        # Each Element is a tuple '(hid_bpf_object, rdesc_fixup_present)',
-        # for example '("xppen-ArtistPro16Gen2.bpf.o", True)'
-        # If 'rdesc_fixup_present' is True, the test needs to wait
+        # Each Element is a HidBpf object, for example
+        # 'HidBpf("xppen-ArtistPro16Gen2.bpf.o", True)'
+        # If 'has_rdesc_fixup' is True, the test needs to wait
         # for one unbind and rebind before it can be sure the kernel is
         # ready
-        hid_bpfs: List[Tuple[str, bool]] = []
+        hid_bpfs: List[HidBpf] = []
 
         def assertInputEventsIn(self, expected_events, effective_events):
             effective_events = effective_events.copy()
@@ -232,25 +245,26 @@ class BaseTestCase:
 
         @pytest.fixture()
         def load_kernel_module(self):
-            for kernel_driver, kernel_module in self.kernel_modules:
-                self._load_kernel_module(kernel_driver, kernel_module)
+            for k in self.kernel_modules:
+                self._load_kernel_module(k.driver_name, k.module_name)
             yield
 
         def load_hid_bpfs(self):
+            # this function will only work when run in the kernel tree
             script_dir = Path(os.path.dirname(os.path.realpath(__file__)))
             root_dir = (script_dir / "../../../../..").resolve()
             bpf_dir = root_dir / "drivers/hid/bpf/progs"
 
+            if not bpf_dir.exists():
+                pytest.skip("looks like we are not in the kernel tree, skipping")
+
             udev_hid_bpf = shutil.which("udev-hid-bpf")
             if not udev_hid_bpf:
                 pytest.skip("udev-hid-bpf not found in $PATH, skipping")
 
-            wait = False
-            for _, rdesc_fixup in self.hid_bpfs:
-                if rdesc_fixup:
-                    wait = True
+            wait = any(b.has_rdesc_fixup for b in self.hid_bpfs)
 
-            for hid_bpf, _ in self.hid_bpfs:
+            for hid_bpf in self.hid_bpfs:
                 # We need to start `udev-hid-bpf` in the background
                 # and dispatch uhid events in case the kernel needs
                 # to fetch features on the device
@@ -260,13 +274,13 @@ class BaseTestCase:
                         "--verbose",
                         "add",
                         str(self.uhdev.sys_path),
-                        str(bpf_dir / hid_bpf),
+                        str(bpf_dir / hid_bpf.object_name),
                     ],
                 )
                 while process.poll() is None:
                     self.uhdev.dispatch(1)
 
-                if process.poll() != 0:
+                if process.returncode != 0:
                     pytest.fail(
                         f"Couldn't insert hid-bpf program '{hid_bpf}', marking the test as failed"
                     )
diff --git a/tools/testing/selftests/hid/tests/base_device.py b/tools/testing/selftests/hid/tests/base_device.py
index e0515be97f83..59465c58d94d 100644
--- a/tools/testing/selftests/hid/tests/base_device.py
+++ b/tools/testing/selftests/hid/tests/base_device.py
@@ -18,10 +18,12 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
+import dataclasses
 import fcntl
 import functools
 import libevdev
 import os
+import threading
 
 try:
     import pyudev
@@ -104,6 +106,12 @@ class PowerSupply(object):
         return self._type.str_value
 
 
+@dataclasses.dataclass
+class HidReadiness:
+    is_ready: bool = False
+    count: int = 0
+
+
 class HIDIsReady(object):
     """
     Companion class that binds to a kernel mechanism
@@ -115,18 +123,18 @@ class HIDIsReady(object):
     def __init__(self: "HIDIsReady", uhid: UHIDDevice) -> None:
         self.uhid = uhid
 
-    def is_ready(self: "HIDIsReady") -> bool:
+    def is_ready(self: "HIDIsReady") -> HidReadiness:
         """
         Overwrite in subclasses: should return True or False whether
         the attached uhid device is ready or not.
         """
-        return False
+        return HidReadiness()
 
 
 class UdevHIDIsReady(HIDIsReady):
     _pyudev_context: ClassVar[Optional[pyudev.Context]] = None
     _pyudev_monitor: ClassVar[Optional[pyudev.Monitor]] = None
-    _uhid_devices: ClassVar[Dict[int, Tuple[bool, int]]] = {}
+    _uhid_devices: ClassVar[Dict[int, HidReadiness]] = {}
 
     def __init__(self: "UdevHIDIsReady", uhid: UHIDDevice) -> None:
         super().__init__(uhid)
@@ -157,18 +165,19 @@ class UdevHIDIsReady(HIDIsReady):
 
             id = int(event.sys_path.strip().split(".")[-1], 16)
 
-            device_ready, count = cls._uhid_devices.get(id, (False, 0))
+            readiness = cls._uhid_devices.setdefault(id, HidReadiness())
 
             ready = event.action == "bind"
-            if not device_ready and ready:
-                count += 1
-            cls._uhid_devices[id] = (ready, count)
+            if not readiness.is_ready and ready:
+                readiness.count += 1
+
+            readiness.is_ready = ready
 
-    def is_ready(self: "UdevHIDIsReady") -> Tuple[bool, int]:
+    def is_ready(self: "UdevHIDIsReady") -> HidReadiness:
         try:
             return self._uhid_devices[self.uhid.hid_id]
         except KeyError:
-            return (False, 0)
+            return HidReadiness()
 
 
 class EvdevMatch(object):
@@ -322,11 +331,11 @@ class BaseDevice(UHIDDevice):
 
     @property
     def kernel_is_ready(self: "BaseDevice") -> bool:
-        return self._kernel_is_ready.is_ready()[0] and self.started
+        return self._kernel_is_ready.is_ready().is_ready and self.started
 
     @property
     def kernel_ready_count(self: "BaseDevice") -> int:
-        return self._kernel_is_ready.is_ready()[1]
+        return self._kernel_is_ready.is_ready().count
 
     @property
     def input_nodes(self: "BaseDevice") -> List[EvdevDevice]:
@@ -336,10 +345,28 @@ class BaseDevice(UHIDDevice):
         if not self.kernel_is_ready or not self.started:
             return []
 
+        # Starting with kernel v6.16, an event is emitted when
+        # userspace opens a kernel device, and for some devices
+        # this translates into a SET_REPORT.
+        # Because EvdevDevice(path) opens every single evdev node
+        # we need to have a separate thread to process the incoming
+        # SET_REPORT or we end up having to wait for the kernel
+        # timeout of 5 seconds.
+        done = False
+
+        def dispatch():
+            while not done:
+                self.dispatch(1)
+
+        t = threading.Thread(target=dispatch)
+        t.start()
+
         self._input_nodes = [
             EvdevDevice(path)
             for path in self.walk_sysfs("input", "input/input*/event*")
         ]
+        done = True
+        t.join()
         return self._input_nodes
 
     def match_evdev_rule(self, application, evdev):
diff --git a/tools/testing/selftests/hid/tests/test_apple_keyboard.py b/tools/testing/selftests/hid/tests/test_apple_keyboard.py
index f81071d46166..0e17588b945c 100644
--- a/tools/testing/selftests/hid/tests/test_apple_keyboard.py
+++ b/tools/testing/selftests/hid/tests/test_apple_keyboard.py
@@ -8,13 +8,14 @@
 
 from .test_keyboard import ArrayKeyboard, TestArrayKeyboard
 from hidtools.util import BusType
+from . import base
 
 import libevdev
 import logging
 
 logger = logging.getLogger("hidtools.test.apple-keyboard")
 
-KERNEL_MODULE = ("apple", "hid-apple")
+KERNEL_MODULE = base.KernelModule("apple", "hid-apple")
 
 
 class KbdData(object):
diff --git a/tools/testing/selftests/hid/tests/test_gamepad.py b/tools/testing/selftests/hid/tests/test_gamepad.py
index 8d5b5ffdae49..612197805931 100644
--- a/tools/testing/selftests/hid/tests/test_gamepad.py
+++ b/tools/testing/selftests/hid/tests/test_gamepad.py
@@ -12,6 +12,7 @@ import pytest
 
 from .base_gamepad import BaseGamepad, JoystickGamepad, AxisMapping
 from hidtools.util import BusType
+from .base import HidBpf
 
 import logging
 
@@ -654,7 +655,7 @@ class TestAsusGamepad(BaseTest.TestGamepad):
 
 
 class TestRaptorMach2Joystick(BaseTest.TestGamepad):
-    hid_bpfs = [("FR-TEC__Raptor-Mach-2.bpf.o", True)]
+    hid_bpfs = [HidBpf("FR-TEC__Raptor-Mach-2.bpf.o", True)]
 
     def create_device(self):
         return RaptorMach2Joystick(
diff --git a/tools/testing/selftests/hid/tests/test_ite_keyboard.py b/tools/testing/selftests/hid/tests/test_ite_keyboard.py
index 38550c167bae..f695eaad1648 100644
--- a/tools/testing/selftests/hid/tests/test_ite_keyboard.py
+++ b/tools/testing/selftests/hid/tests/test_ite_keyboard.py
@@ -11,10 +11,11 @@ from hidtools.util import BusType
 
 import libevdev
 import logging
+from . import base
 
 logger = logging.getLogger("hidtools.test.ite-keyboard")
 
-KERNEL_MODULE = ("itetech", "hid_ite")
+KERNEL_MODULE = base.KernelModule("itetech", "hid_ite")
 
 
 class KbdData(object):
diff --git a/tools/testing/selftests/hid/tests/test_multitouch.py b/tools/testing/selftests/hid/tests/test_multitouch.py
index 4265012231c6..5d2ffa3d5977 100644
--- a/tools/testing/selftests/hid/tests/test_multitouch.py
+++ b/tools/testing/selftests/hid/tests/test_multitouch.py
@@ -17,7 +17,7 @@ import time
 
 logger = logging.getLogger("hidtools.test.multitouch")
 
-KERNEL_MODULE = ("hid-multitouch", "hid_multitouch")
+KERNEL_MODULE = base.KernelModule("hid-multitouch", "hid_multitouch")
 
 
 def BIT(x):
diff --git a/tools/testing/selftests/hid/tests/test_sony.py b/tools/testing/selftests/hid/tests/test_sony.py
index 7e52c28e59c5..7fd3a8e6137d 100644
--- a/tools/testing/selftests/hid/tests/test_sony.py
+++ b/tools/testing/selftests/hid/tests/test_sony.py
@@ -7,6 +7,7 @@
 #
 
 from .base import application_matches
+from .base import KernelModule
 from .test_gamepad import BaseTest
 from hidtools.device.sony_gamepad import (
     PS3Controller,
@@ -24,9 +25,9 @@ import pytest
 
 logger = logging.getLogger("hidtools.test.sony")
 
-PS3_MODULE = ("sony", "hid_sony")
-PS4_MODULE = ("playstation", "hid_playstation")
-PS5_MODULE = ("playstation", "hid_playstation")
+PS3_MODULE = KernelModule("sony", "hid_sony")
+PS4_MODULE = KernelModule("playstation", "hid_playstation")
+PS5_MODULE = KernelModule("playstation", "hid_playstation")
 
 
 class SonyBaseTest:
diff --git a/tools/testing/selftests/hid/tests/test_tablet.py b/tools/testing/selftests/hid/tests/test_tablet.py
index a9e2de1e8861..50d5699812bb 100644
--- a/tools/testing/selftests/hid/tests/test_tablet.py
+++ b/tools/testing/selftests/hid/tests/test_tablet.py
@@ -10,6 +10,7 @@ from . import base
 import copy
 from enum import Enum
 from hidtools.util import BusType
+from .base import HidBpf
 import libevdev
 import logging
 import pytest
@@ -1228,9 +1229,9 @@ class Huion_Kamvas_Pro_19_256c_006b(PenDigitizer):
         pen.current_state = state
 
     def call_input_event(self, report):
-        if report[0] == 0x0a:
+        if report[0] == 0x0A:
             # ensures the original second Eraser usage is null
-            report[1] &= 0xdf
+            report[1] &= 0xDF
 
             # ensures the original last bit is equal to bit 6 (In Range)
             if report[1] & 0x40:
@@ -1472,7 +1473,7 @@ class TestGoodix_27c6_0e00(BaseTest.TestTablet):
 
 
 class TestXPPen_ArtistPro16Gen2_28bd_095b(BaseTest.TestTablet):
-    hid_bpfs = [("XPPen__ArtistPro16Gen2.bpf.o", True)]
+    hid_bpfs = [HidBpf("XPPen__ArtistPro16Gen2.bpf.o", True)]
 
     def create_device(self):
         dev = XPPen_ArtistPro16Gen2_28bd_095b(
@@ -1484,7 +1485,7 @@ class TestXPPen_ArtistPro16Gen2_28bd_095b(BaseTest.TestTablet):
 
 
 class TestXPPen_Artist24_28bd_093a(BaseTest.TestTablet):
-    hid_bpfs = [("XPPen__Artist24.bpf.o", True)]
+    hid_bpfs = [HidBpf("XPPen__Artist24.bpf.o", True)]
 
     def create_device(self):
         return XPPen_Artist24_28bd_093a(
@@ -1495,7 +1496,7 @@ class TestXPPen_Artist24_28bd_093a(BaseTest.TestTablet):
 
 
 class TestHuion_Kamvas_Pro_19_256c_006b(BaseTest.TestTablet):
-    hid_bpfs = [("Huion__Kamvas-Pro-19.bpf.o", True)]
+    hid_bpfs = [HidBpf("Huion__Kamvas-Pro-19.bpf.o", True)]
 
     def create_device(self):
         return Huion_Kamvas_Pro_19_256c_006b(
diff --git a/tools/testing/selftests/hid/tests/test_wacom_generic.py b/tools/testing/selftests/hid/tests/test_wacom_generic.py
index b62c7dba6777..2d6d04f0ff80 100644
--- a/tools/testing/selftests/hid/tests/test_wacom_generic.py
+++ b/tools/testing/selftests/hid/tests/test_wacom_generic.py
@@ -40,7 +40,7 @@ import logging
 
 logger = logging.getLogger("hidtools.test.wacom")
 
-KERNEL_MODULE = ("wacom", "wacom")
+KERNEL_MODULE = base.KernelModule("wacom", "wacom")
 
 
 class ProximityState(Enum):
@@ -892,9 +892,9 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest
         locations. The value of `t` may be incremented over time to move the
         points along a linear path.
         """
-        return [ self.make_contact(id, t) for id in range(0, n) ]
+        return [self.make_contact(id, t) for id in range(0, n)]
 
-    def assert_contact(self, uhdev, evdev, contact_ids, t=0):
+    def assert_contact(self, evdev, contact_ids, t=0):
         """
         Assert properties of a contact generated by make_contact.
         """
@@ -916,12 +916,12 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest
             assert evdev.slots[slot_num][libevdev.EV_ABS.ABS_MT_POSITION_X] == x
             assert evdev.slots[slot_num][libevdev.EV_ABS.ABS_MT_POSITION_Y] == y
 
-    def assert_contacts(self, uhdev, evdev, data, t=0):
+    def assert_contacts(self, evdev, data, t=0):
         """
         Assert properties of a list of contacts generated by make_contacts.
         """
         for contact_ids in data:
-            self.assert_contact(uhdev, evdev, contact_ids, t)
+            self.assert_contact(evdev, contact_ids, t)
 
     def test_contact_id_0(self):
         """
@@ -997,12 +997,16 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest
 
         assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events
 
-        self.assert_contacts(uhdev, evdev,
-            [ self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = None),
-              self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0),
-              self.ContactIds(contact_id = 2, tracking_id = -1, slot_num = None),
-              self.ContactIds(contact_id = 3, tracking_id = 1, slot_num = 1),
-              self.ContactIds(contact_id = 4, tracking_id = -1, slot_num = None) ])
+        self.assert_contacts(
+            evdev,
+            [
+                self.ContactIds(contact_id=0, tracking_id=-1, slot_num=None),
+                self.ContactIds(contact_id=1, tracking_id=0, slot_num=0),
+                self.ContactIds(contact_id=2, tracking_id=-1, slot_num=None),
+                self.ContactIds(contact_id=3, tracking_id=1, slot_num=1),
+                self.ContactIds(contact_id=4, tracking_id=-1, slot_num=None),
+            ],
+        )
 
     def confidence_change_assert_playback(self, uhdev, evdev, timeline):
         """
@@ -1026,8 +1030,8 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest
             events = uhdev.next_sync_events()
             self.debug_reports(r, uhdev, events)
 
-            ids = [ x[0] for x in state ]
-            self.assert_contacts(uhdev, evdev, ids, t)
+            ids = [x[0] for x in state]
+            self.assert_contacts(evdev, ids, t)
 
             t += 1
 
@@ -1044,27 +1048,68 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest
         uhdev = self.uhdev
         evdev = uhdev.get_evdev()
 
-        self.confidence_change_assert_playback(uhdev, evdev, [
-            # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident
-            # Both fingers confidently in contact
-            [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True),
-             (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
-
-            # t=1: Contact 0 == !Down + confident; Contact 1 == Down + confident
-            # First finger looses confidence and clears only the tipswitch flag
-            [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, True),
-             (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
-
-            # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident
-            # First finger has lost confidence and has both flags cleared
-            [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False),
-             (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
-
-            # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident
-            # First finger has lost confidence and has both flags cleared
-            [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False),
-             (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)]
-        ])
+        self.confidence_change_assert_playback(
+            uhdev,
+            evdev,
+            [
+                # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident
+                # Both fingers confidently in contact
+                [
+                    (
+                        self.ContactIds(contact_id=0, tracking_id=0, slot_num=0),
+                        True,
+                        True,
+                    ),
+                    (
+                        self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+                        True,
+                        True,
+                    ),
+                ],
+                # t=1: Contact 0 == !Down + confident; Contact 1 == Down + confident
+                # First finger looses confidence and clears only the tipswitch flag
+                [
+                    (
+                        self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+                        False,
+                        True,
+                    ),
+                    (
+                        self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+                        True,
+                        True,
+                    ),
+                ],
+                # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+                # First finger has lost confidence and has both flags cleared
+                [
+                    (
+                        self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+                        False,
+                        False,
+                    ),
+                    (
+                        self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+                        True,
+                        True,
+                    ),
+                ],
+                # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+                # First finger has lost confidence and has both flags cleared
+                [
+                    (
+                        self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+                        False,
+                        False,
+                    ),
+                    (
+                        self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+                        True,
+                        True,
+                    ),
+                ],
+            ],
+        )
 
     def test_confidence_loss_b(self):
         """
@@ -1079,27 +1124,68 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest
         uhdev = self.uhdev
         evdev = uhdev.get_evdev()
 
-        self.confidence_change_assert_playback(uhdev, evdev, [
-            # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident
-            # Both fingers confidently in contact
-            [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True),
-             (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
-
-            # t=1: Contact 0 == !Down + !confident; Contact 1 == Down + confident
-            # First finger looses confidence and has both flags cleared simultaneously
-            [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False),
-             (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
-
-            # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident
-            # First finger has lost confidence and has both flags cleared
-            [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False),
-             (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
-
-            # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident
-            # First finger has lost confidence and has both flags cleared
-            [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False),
-             (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)]
-        ])
+        self.confidence_change_assert_playback(
+            uhdev,
+            evdev,
+            [
+                # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident
+                # Both fingers confidently in contact
+                [
+                    (
+                        self.ContactIds(contact_id=0, tracking_id=0, slot_num=0),
+                        True,
+                        True,
+                    ),
+                    (
+                        self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+                        True,
+                        True,
+                    ),
+                ],
+                # t=1: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+                # First finger looses confidence and has both flags cleared simultaneously
+                [
+                    (
+                        self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+                        False,
+                        False,
+                    ),
+                    (
+                        self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+                        True,
+                        True,
+                    ),
+                ],
+                # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+                # First finger has lost confidence and has both flags cleared
+                [
+                    (
+                        self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+                        False,
+                        False,
+                    ),
+                    (
+                        self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+                        True,
+                        True,
+                    ),
+                ],
+                # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+                # First finger has lost confidence and has both flags cleared
+                [
+                    (
+                        self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+                        False,
+                        False,
+                    ),
+                    (
+                        self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+                        True,
+                        True,
+                    ),
+                ],
+            ],
+        )
 
     def test_confidence_loss_c(self):
         """
@@ -1113,27 +1199,68 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest
         uhdev = self.uhdev
         evdev = uhdev.get_evdev()
 
-        self.confidence_change_assert_playback(uhdev, evdev, [
-            # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident
-            # Both fingers confidently in contact
-            [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True),
-             (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
-
-            # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident
-            # First finger looses confidence and clears only the confidence flag
-            [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), True, False),
-             (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
-
-            # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident
-            # First finger has lost confidence and has both flags cleared
-            [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False),
-             (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
-
-            # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident
-            # First finger has lost confidence and has both flags cleared
-            [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False),
-             (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)]
-        ])
+        self.confidence_change_assert_playback(
+            uhdev,
+            evdev,
+            [
+                # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident
+                # Both fingers confidently in contact
+                [
+                    (
+                        self.ContactIds(contact_id=0, tracking_id=0, slot_num=0),
+                        True,
+                        True,
+                    ),
+                    (
+                        self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+                        True,
+                        True,
+                    ),
+                ],
+                # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident
+                # First finger looses confidence and clears only the confidence flag
+                [
+                    (
+                        self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+                        True,
+                        False,
+                    ),
+                    (
+                        self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+                        True,
+                        True,
+                    ),
+                ],
+                # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+                # First finger has lost confidence and has both flags cleared
+                [
+                    (
+                        self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+                        False,
+                        False,
+                    ),
+                    (
+                        self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+                        True,
+                        True,
+                    ),
+                ],
+                # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+                # First finger has lost confidence and has both flags cleared
+                [
+                    (
+                        self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+                        False,
+                        False,
+                    ),
+                    (
+                        self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+                        True,
+                        True,
+                    ),
+                ],
+            ],
+        )
 
     def test_confidence_gain_a(self):
         """
@@ -1144,27 +1271,68 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest
         uhdev = self.uhdev
         evdev = uhdev.get_evdev()
 
-        self.confidence_change_assert_playback(uhdev, evdev, [
-            # t=0: Contact 0 == Down + !confident; Contact 1 == Down + confident
-            # Only second finger is confidently in contact
-            [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = None), True, False),
-             (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)],
-
-            # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident
-            # First finger gains confidence
-            [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = None), True, False),
-             (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)],
-
-            # t=2: Contact 0 == Down + confident; Contact 1 == Down + confident
-            # First finger remains confident
-            [(self.ContactIds(contact_id = 0, tracking_id = 1, slot_num = 1), True, True),
-             (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)],
-
-            # t=3: Contact 0 == Down + confident; Contact 1 == Down + confident
-            # First finger remains confident
-            [(self.ContactIds(contact_id = 0, tracking_id = 1, slot_num = 1), True, True),
-             (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)]
-        ])
+        self.confidence_change_assert_playback(
+            uhdev,
+            evdev,
+            [
+                # t=0: Contact 0 == Down + !confident; Contact 1 == Down + confident
+                # Only second finger is confidently in contact
+                [
+                    (
+                        self.ContactIds(contact_id=0, tracking_id=-1, slot_num=None),
+                        True,
+                        False,
+                    ),
+                    (
+                        self.ContactIds(contact_id=1, tracking_id=0, slot_num=0),
+                        True,
+                        True,
+                    ),
+                ],
+                # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident
+                # First finger gains confidence
+                [
+                    (
+                        self.ContactIds(contact_id=0, tracking_id=-1, slot_num=None),
+                        True,
+                        False,
+                    ),
+                    (
+                        self.ContactIds(contact_id=1, tracking_id=0, slot_num=0),
+                        True,
+                        True,
+                    ),
+                ],
+                # t=2: Contact 0 == Down + confident; Contact 1 == Down + confident
+                # First finger remains confident
+                [
+                    (
+                        self.ContactIds(contact_id=0, tracking_id=1, slot_num=1),
+                        True,
+                        True,
+                    ),
+                    (
+                        self.ContactIds(contact_id=1, tracking_id=0, slot_num=0),
+                        True,
+                        True,
+                    ),
+                ],
+                # t=3: Contact 0 == Down + confident; Contact 1 == Down + confident
+                # First finger remains confident
+                [
+                    (
+                        self.ContactIds(contact_id=0, tracking_id=1, slot_num=1),
+                        True,
+                        True,
+                    ),
+                    (
+                        self.ContactIds(contact_id=1, tracking_id=0, slot_num=0),
+                        True,
+                        True,
+                    ),
+                ],
+            ],
+        )
 
     def test_confidence_gain_b(self):
         """
@@ -1175,24 +1343,65 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest
         uhdev = self.uhdev
         evdev = uhdev.get_evdev()
 
-        self.confidence_change_assert_playback(uhdev, evdev, [
-            # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident
-            # First and second finger confidently in contact
-            [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True),
-             (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
-
-            # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident
-            # Firtst finger looses confidence
-            [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), True, False),
-             (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
-
-            # t=2: Contact 0 == Down + confident; Contact 1 == Down + confident
-            # First finger gains confidence
-            [(self.ContactIds(contact_id = 0, tracking_id = 2, slot_num = 0), True, True),
-             (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
-
-            # t=3: Contact 0 == !Down + confident; Contact 1 == Down + confident
-            # First finger goes up
-            [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, True),
-             (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)]
-        ])
+        self.confidence_change_assert_playback(
+            uhdev,
+            evdev,
+            [
+                # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident
+                # First and second finger confidently in contact
+                [
+                    (
+                        self.ContactIds(contact_id=0, tracking_id=0, slot_num=0),
+                        True,
+                        True,
+                    ),
+                    (
+                        self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+                        True,
+                        True,
+                    ),
+                ],
+                # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident
+                # Firtst finger looses confidence
+                [
+                    (
+                        self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+                        True,
+                        False,
+                    ),
+                    (
+                        self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+                        True,
+                        True,
+                    ),
+                ],
+                # t=2: Contact 0 == Down + confident; Contact 1 == Down + confident
+                # First finger gains confidence
+                [
+                    (
+                        self.ContactIds(contact_id=0, tracking_id=2, slot_num=0),
+                        True,
+                        True,
+                    ),
+                    (
+                        self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+                        True,
+                        True,
+                    ),
+                ],
+                # t=3: Contact 0 == !Down + confident; Contact 1 == Down + confident
+                # First finger goes up
+                [
+                    (
+                        self.ContactIds(contact_id=0, tracking_id=-1, slot_num=0),
+                        False,
+                        True,
+                    ),
+                    (
+                        self.ContactIds(contact_id=1, tracking_id=1, slot_num=1),
+                        True,
+                        True,
+                    ),
+                ],
+            ],
+        )
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
index 1926ef6b40ab..3eebf5e3b974 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -766,19 +766,34 @@ TEST_F(iommufd_ioas, get_hw_info)
 		uint8_t max_pasid = 0;
 
 		/* Provide a zero-size user_buffer */
-		test_cmd_get_hw_info(self->device_id, NULL, 0);
+		test_cmd_get_hw_info(self->device_id,
+				     IOMMU_HW_INFO_TYPE_DEFAULT, NULL, 0);
 		/* Provide a user_buffer with exact size */
-		test_cmd_get_hw_info(self->device_id, &buffer_exact, sizeof(buffer_exact));
+		test_cmd_get_hw_info(self->device_id,
+				     IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_exact,
+				     sizeof(buffer_exact));
+
+		/* Request for a wrong data_type, and a correct one */
+		test_err_get_hw_info(EOPNOTSUPP, self->device_id,
+				     IOMMU_HW_INFO_TYPE_SELFTEST + 1,
+				     &buffer_exact, sizeof(buffer_exact));
+		test_cmd_get_hw_info(self->device_id,
+				     IOMMU_HW_INFO_TYPE_SELFTEST, &buffer_exact,
+				     sizeof(buffer_exact));
 		/*
 		 * Provide a user_buffer with size larger than the exact size to check if
 		 * kernel zero the trailing bytes.
 		 */
-		test_cmd_get_hw_info(self->device_id, &buffer_larger, sizeof(buffer_larger));
+		test_cmd_get_hw_info(self->device_id,
+				     IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_larger,
+				     sizeof(buffer_larger));
 		/*
 		 * Provide a user_buffer with size smaller than the exact size to check if
 		 * the fields within the size range still gets updated.
 		 */
-		test_cmd_get_hw_info(self->device_id, &buffer_smaller, sizeof(buffer_smaller));
+		test_cmd_get_hw_info(self->device_id,
+				     IOMMU_HW_INFO_TYPE_DEFAULT,
+				     &buffer_smaller, sizeof(buffer_smaller));
 		test_cmd_get_hw_info_pasid(self->device_id, &max_pasid);
 		ASSERT_EQ(0, max_pasid);
 		if (variant->pasid_capable) {
@@ -788,9 +803,11 @@ TEST_F(iommufd_ioas, get_hw_info)
 		}
 	} else {
 		test_err_get_hw_info(ENOENT, self->device_id,
-				     &buffer_exact, sizeof(buffer_exact));
+				     IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_exact,
+				     sizeof(buffer_exact));
 		test_err_get_hw_info(ENOENT, self->device_id,
-				     &buffer_larger, sizeof(buffer_larger));
+				     IOMMU_HW_INFO_TYPE_DEFAULT, &buffer_larger,
+				     sizeof(buffer_larger));
 	}
 }
 
@@ -953,6 +970,33 @@ TEST_F(iommufd_ioas, area_auto_iova)
 		test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE * (i + 1));
 }
 
+/*  https://lore.kernel.org/r/685af644.a00a0220.2e5631.0094.GAE@google.com */
+TEST_F(iommufd_ioas, reserved_overflow)
+{
+	struct iommu_test_cmd test_cmd = {
+		.size = sizeof(test_cmd),
+		.op = IOMMU_TEST_OP_ADD_RESERVED,
+		.id = self->ioas_id,
+		.add_reserved.start = 6,
+	};
+	unsigned int map_len;
+	__u64 iova;
+
+	if (PAGE_SIZE == 4096) {
+		test_cmd.add_reserved.length = 0xffffffffffff8001;
+		map_len = 0x5000;
+	} else {
+		test_cmd.add_reserved.length =
+			0xffffffffffffffff - MOCK_PAGE_SIZE * 16;
+		map_len = MOCK_PAGE_SIZE * 10;
+	}
+
+	ASSERT_EQ(0,
+		  ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED),
+			&test_cmd));
+	test_err_ioctl_ioas_map(ENOSPC, buffer, map_len, &iova);
+}
+
 TEST_F(iommufd_ioas, area_allowed)
 {
 	struct iommu_test_cmd test_cmd = {
@@ -2193,8 +2237,7 @@ TEST_F(iommufd_dirty_tracking, device_dirty_capability)
 
 	test_cmd_hwpt_alloc(self->idev_id, self->ioas_id, 0, &hwpt_id);
 	test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
-	test_cmd_get_hw_capabilities(self->idev_id, caps,
-				     IOMMU_HW_CAP_DIRTY_TRACKING);
+	test_cmd_get_hw_capabilities(self->idev_id, caps);
 	ASSERT_EQ(IOMMU_HW_CAP_DIRTY_TRACKING,
 		  caps & IOMMU_HW_CAP_DIRTY_TRACKING);
 
@@ -2706,7 +2749,7 @@ FIXTURE_SETUP(iommufd_viommu)
 
 		/* Allocate a vIOMMU taking refcount of the parent hwpt */
 		test_cmd_viommu_alloc(self->device_id, self->hwpt_id,
-				      IOMMU_VIOMMU_TYPE_SELFTEST,
+				      IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0,
 				      &self->viommu_id);
 
 		/* Allocate a regular nested hwpt */
@@ -2745,24 +2788,27 @@ TEST_F(iommufd_viommu, viommu_negative_tests)
 	if (self->device_id) {
 		/* Negative test -- invalid hwpt (hwpt_id=0) */
 		test_err_viommu_alloc(ENOENT, device_id, 0,
-				      IOMMU_VIOMMU_TYPE_SELFTEST, NULL);
+				      IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0,
+				      NULL);
 
 		/* Negative test -- not a nesting parent hwpt */
 		test_cmd_hwpt_alloc(device_id, ioas_id, 0, &hwpt_id);
 		test_err_viommu_alloc(EINVAL, device_id, hwpt_id,
-				      IOMMU_VIOMMU_TYPE_SELFTEST, NULL);
+				      IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0,
+				      NULL);
 		test_ioctl_destroy(hwpt_id);
 
 		/* Negative test -- unsupported viommu type */
 		test_err_viommu_alloc(EOPNOTSUPP, device_id, self->hwpt_id,
-				      0xdead, NULL);
+				      0xdead, NULL, 0, NULL);
 		EXPECT_ERRNO(EBUSY,
 			     _test_ioctl_destroy(self->fd, self->hwpt_id));
 		EXPECT_ERRNO(EBUSY,
 			     _test_ioctl_destroy(self->fd, self->viommu_id));
 	} else {
 		test_err_viommu_alloc(ENOENT, self->device_id, self->hwpt_id,
-				      IOMMU_VIOMMU_TYPE_SELFTEST, NULL);
+				      IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0,
+				      NULL);
 	}
 }
 
@@ -2778,35 +2824,66 @@ TEST_F(iommufd_viommu, viommu_alloc_nested_iopf)
 	uint32_t fault_fd;
 	uint32_t vdev_id;
 
-	if (self->device_id) {
-		test_ioctl_fault_alloc(&fault_id, &fault_fd);
-		test_err_hwpt_alloc_iopf(
-			ENOENT, dev_id, viommu_id, UINT32_MAX,
-			IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id,
-			IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data));
-		test_err_hwpt_alloc_iopf(
-			EOPNOTSUPP, dev_id, viommu_id, fault_id,
-			IOMMU_HWPT_FAULT_ID_VALID | (1 << 31), &iopf_hwpt_id,
-			IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data));
-		test_cmd_hwpt_alloc_iopf(
-			dev_id, viommu_id, fault_id, IOMMU_HWPT_FAULT_ID_VALID,
-			&iopf_hwpt_id, IOMMU_HWPT_DATA_SELFTEST, &data,
-			sizeof(data));
+	if (!dev_id)
+		SKIP(return, "Skipping test for variant no_viommu");
 
-		/* Must allocate vdevice before attaching to a nested hwpt */
-		test_err_mock_domain_replace(ENOENT, self->stdev_id,
-					     iopf_hwpt_id);
-		test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id);
-		test_cmd_mock_domain_replace(self->stdev_id, iopf_hwpt_id);
-		EXPECT_ERRNO(EBUSY,
-			     _test_ioctl_destroy(self->fd, iopf_hwpt_id));
-		test_cmd_trigger_iopf(dev_id, fault_fd);
+	test_ioctl_fault_alloc(&fault_id, &fault_fd);
+	test_err_hwpt_alloc_iopf(ENOENT, dev_id, viommu_id, UINT32_MAX,
+				 IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id,
+				 IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data));
+	test_err_hwpt_alloc_iopf(EOPNOTSUPP, dev_id, viommu_id, fault_id,
+				 IOMMU_HWPT_FAULT_ID_VALID | (1 << 31),
+				 &iopf_hwpt_id, IOMMU_HWPT_DATA_SELFTEST, &data,
+				 sizeof(data));
+	test_cmd_hwpt_alloc_iopf(dev_id, viommu_id, fault_id,
+				 IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id,
+				 IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data));
 
-		test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id);
-		test_ioctl_destroy(iopf_hwpt_id);
-		close(fault_fd);
-		test_ioctl_destroy(fault_id);
-	}
+	/* Must allocate vdevice before attaching to a nested hwpt */
+	test_err_mock_domain_replace(ENOENT, self->stdev_id, iopf_hwpt_id);
+	test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id);
+	test_cmd_mock_domain_replace(self->stdev_id, iopf_hwpt_id);
+	EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, iopf_hwpt_id));
+	test_cmd_trigger_iopf(dev_id, fault_fd);
+
+	test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id);
+	test_ioctl_destroy(iopf_hwpt_id);
+	close(fault_fd);
+	test_ioctl_destroy(fault_id);
+}
+
+TEST_F(iommufd_viommu, viommu_alloc_with_data)
+{
+	struct iommu_viommu_selftest data = {
+		.in_data = 0xbeef,
+	};
+	uint32_t *test;
+
+	if (!self->device_id)
+		SKIP(return, "Skipping test for variant no_viommu");
+
+	test_cmd_viommu_alloc(self->device_id, self->hwpt_id,
+			      IOMMU_VIOMMU_TYPE_SELFTEST, &data, sizeof(data),
+			      &self->viommu_id);
+	ASSERT_EQ(data.out_data, data.in_data);
+
+	/* Negative mmap tests -- offset and length cannot be changed */
+	test_err_mmap(ENXIO, data.out_mmap_length,
+		      data.out_mmap_offset + PAGE_SIZE);
+	test_err_mmap(ENXIO, data.out_mmap_length,
+		      data.out_mmap_offset + PAGE_SIZE * 2);
+	test_err_mmap(ENXIO, data.out_mmap_length / 2, data.out_mmap_offset);
+	test_err_mmap(ENXIO, data.out_mmap_length * 2, data.out_mmap_offset);
+
+	/* Now do a correct mmap for a loopback test */
+	test = mmap(NULL, data.out_mmap_length, PROT_READ | PROT_WRITE,
+		    MAP_SHARED, self->fd, data.out_mmap_offset);
+	ASSERT_NE(MAP_FAILED, test);
+	ASSERT_EQ(data.in_data, *test);
+
+	/* The owner of the mmap region should be blocked */
+	EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, self->viommu_id));
+	munmap(test, data.out_mmap_length);
 }
 
 TEST_F(iommufd_viommu, vdevice_alloc)
@@ -2867,169 +2944,234 @@ TEST_F(iommufd_viommu, vdevice_cache)
 	uint32_t vdev_id = 0;
 	uint32_t num_inv;
 
-	if (dev_id) {
-		test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id);
-
-		test_cmd_dev_check_cache_all(dev_id,
-					     IOMMU_TEST_DEV_CACHE_DEFAULT);
-
-		/* Check data_type by passing zero-length array */
-		num_inv = 0;
-		test_cmd_viommu_invalidate(viommu_id, inv_reqs,
-					   sizeof(*inv_reqs), &num_inv);
-		assert(!num_inv);
-
-		/* Negative test: Invalid data_type */
-		num_inv = 1;
-		test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
-					   IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST_INVALID,
-					   sizeof(*inv_reqs), &num_inv);
-		assert(!num_inv);
+	if (!dev_id)
+		SKIP(return, "Skipping test for variant no_viommu");
+
+	test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id);
+
+	test_cmd_dev_check_cache_all(dev_id, IOMMU_TEST_DEV_CACHE_DEFAULT);
+
+	/* Check data_type by passing zero-length array */
+	num_inv = 0;
+	test_cmd_viommu_invalidate(viommu_id, inv_reqs, sizeof(*inv_reqs),
+				   &num_inv);
+	assert(!num_inv);
+
+	/* Negative test: Invalid data_type */
+	num_inv = 1;
+	test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+				   IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST_INVALID,
+				   sizeof(*inv_reqs), &num_inv);
+	assert(!num_inv);
+
+	/* Negative test: structure size sanity */
+	num_inv = 1;
+	test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+				   IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+				   sizeof(*inv_reqs) + 1, &num_inv);
+	assert(!num_inv);
+
+	num_inv = 1;
+	test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+				   IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, 1,
+				   &num_inv);
+	assert(!num_inv);
+
+	/* Negative test: invalid flag is passed */
+	num_inv = 1;
+	inv_reqs[0].flags = 0xffffffff;
+	inv_reqs[0].vdev_id = 0x99;
+	test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs,
+				   IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+				   sizeof(*inv_reqs), &num_inv);
+	assert(!num_inv);
+
+	/* Negative test: invalid data_uptr when array is not empty */
+	num_inv = 1;
+	inv_reqs[0].flags = 0;
+	inv_reqs[0].vdev_id = 0x99;
+	test_err_viommu_invalidate(EINVAL, viommu_id, NULL,
+				   IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+				   sizeof(*inv_reqs), &num_inv);
+	assert(!num_inv);
+
+	/* Negative test: invalid entry_len when array is not empty */
+	num_inv = 1;
+	inv_reqs[0].flags = 0;
+	inv_reqs[0].vdev_id = 0x99;
+	test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+				   IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, 0,
+				   &num_inv);
+	assert(!num_inv);
+
+	/* Negative test: invalid cache_id */
+	num_inv = 1;
+	inv_reqs[0].flags = 0;
+	inv_reqs[0].vdev_id = 0x99;
+	inv_reqs[0].cache_id = MOCK_DEV_CACHE_ID_MAX + 1;
+	test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+				   IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+				   sizeof(*inv_reqs), &num_inv);
+	assert(!num_inv);
+
+	/* Negative test: invalid vdev_id */
+	num_inv = 1;
+	inv_reqs[0].flags = 0;
+	inv_reqs[0].vdev_id = 0x9;
+	inv_reqs[0].cache_id = 0;
+	test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+				   IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+				   sizeof(*inv_reqs), &num_inv);
+	assert(!num_inv);
 
-		/* Negative test: structure size sanity */
-		num_inv = 1;
-		test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
-					   IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
-					   sizeof(*inv_reqs) + 1, &num_inv);
-		assert(!num_inv);
-
-		num_inv = 1;
-		test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
-					   IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
-					   1, &num_inv);
-		assert(!num_inv);
-
-		/* Negative test: invalid flag is passed */
-		num_inv = 1;
-		inv_reqs[0].flags = 0xffffffff;
-		inv_reqs[0].vdev_id = 0x99;
-		test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs,
-					   IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
-					   sizeof(*inv_reqs), &num_inv);
-		assert(!num_inv);
-
-		/* Negative test: invalid data_uptr when array is not empty */
-		num_inv = 1;
-		inv_reqs[0].flags = 0;
-		inv_reqs[0].vdev_id = 0x99;
-		test_err_viommu_invalidate(EINVAL, viommu_id, NULL,
-					   IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
-					   sizeof(*inv_reqs), &num_inv);
-		assert(!num_inv);
-
-		/* Negative test: invalid entry_len when array is not empty */
-		num_inv = 1;
-		inv_reqs[0].flags = 0;
-		inv_reqs[0].vdev_id = 0x99;
-		test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
-					   IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
-					   0, &num_inv);
-		assert(!num_inv);
-
-		/* Negative test: invalid cache_id */
-		num_inv = 1;
-		inv_reqs[0].flags = 0;
-		inv_reqs[0].vdev_id = 0x99;
-		inv_reqs[0].cache_id = MOCK_DEV_CACHE_ID_MAX + 1;
-		test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
-					   IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
-					   sizeof(*inv_reqs), &num_inv);
-		assert(!num_inv);
+	/*
+	 * Invalidate the 1st cache entry but fail the 2nd request
+	 * due to invalid flags configuration in the 2nd request.
+	 */
+	num_inv = 2;
+	inv_reqs[0].flags = 0;
+	inv_reqs[0].vdev_id = 0x99;
+	inv_reqs[0].cache_id = 0;
+	inv_reqs[1].flags = 0xffffffff;
+	inv_reqs[1].vdev_id = 0x99;
+	inv_reqs[1].cache_id = 1;
+	test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs,
+				   IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+				   sizeof(*inv_reqs), &num_inv);
+	assert(num_inv == 1);
+	test_cmd_dev_check_cache(dev_id, 0, 0);
+	test_cmd_dev_check_cache(dev_id, 1, IOMMU_TEST_DEV_CACHE_DEFAULT);
+	test_cmd_dev_check_cache(dev_id, 2, IOMMU_TEST_DEV_CACHE_DEFAULT);
+	test_cmd_dev_check_cache(dev_id, 3, IOMMU_TEST_DEV_CACHE_DEFAULT);
 
-		/* Negative test: invalid vdev_id */
-		num_inv = 1;
-		inv_reqs[0].flags = 0;
-		inv_reqs[0].vdev_id = 0x9;
-		inv_reqs[0].cache_id = 0;
-		test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
-					   IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
-					   sizeof(*inv_reqs), &num_inv);
-		assert(!num_inv);
+	/*
+	 * Invalidate the 1st cache entry but fail the 2nd request
+	 * due to invalid cache_id configuration in the 2nd request.
+	 */
+	num_inv = 2;
+	inv_reqs[0].flags = 0;
+	inv_reqs[0].vdev_id = 0x99;
+	inv_reqs[0].cache_id = 0;
+	inv_reqs[1].flags = 0;
+	inv_reqs[1].vdev_id = 0x99;
+	inv_reqs[1].cache_id = MOCK_DEV_CACHE_ID_MAX + 1;
+	test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
+				   IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
+				   sizeof(*inv_reqs), &num_inv);
+	assert(num_inv == 1);
+	test_cmd_dev_check_cache(dev_id, 0, 0);
+	test_cmd_dev_check_cache(dev_id, 1, IOMMU_TEST_DEV_CACHE_DEFAULT);
+	test_cmd_dev_check_cache(dev_id, 2, IOMMU_TEST_DEV_CACHE_DEFAULT);
+	test_cmd_dev_check_cache(dev_id, 3, IOMMU_TEST_DEV_CACHE_DEFAULT);
+
+	/* Invalidate the 2nd cache entry and verify */
+	num_inv = 1;
+	inv_reqs[0].flags = 0;
+	inv_reqs[0].vdev_id = 0x99;
+	inv_reqs[0].cache_id = 1;
+	test_cmd_viommu_invalidate(viommu_id, inv_reqs, sizeof(*inv_reqs),
+				   &num_inv);
+	assert(num_inv == 1);
+	test_cmd_dev_check_cache(dev_id, 0, 0);
+	test_cmd_dev_check_cache(dev_id, 1, 0);
+	test_cmd_dev_check_cache(dev_id, 2, IOMMU_TEST_DEV_CACHE_DEFAULT);
+	test_cmd_dev_check_cache(dev_id, 3, IOMMU_TEST_DEV_CACHE_DEFAULT);
+
+	/* Invalidate the 3rd and 4th cache entries and verify */
+	num_inv = 2;
+	inv_reqs[0].flags = 0;
+	inv_reqs[0].vdev_id = 0x99;
+	inv_reqs[0].cache_id = 2;
+	inv_reqs[1].flags = 0;
+	inv_reqs[1].vdev_id = 0x99;
+	inv_reqs[1].cache_id = 3;
+	test_cmd_viommu_invalidate(viommu_id, inv_reqs, sizeof(*inv_reqs),
+				   &num_inv);
+	assert(num_inv == 2);
+	test_cmd_dev_check_cache_all(dev_id, 0);
+
+	/* Invalidate all cache entries for nested_dev_id[1] and verify */
+	num_inv = 1;
+	inv_reqs[0].vdev_id = 0x99;
+	inv_reqs[0].flags = IOMMU_TEST_INVALIDATE_FLAG_ALL;
+	test_cmd_viommu_invalidate(viommu_id, inv_reqs, sizeof(*inv_reqs),
+				   &num_inv);
+	assert(num_inv == 1);
+	test_cmd_dev_check_cache_all(dev_id, 0);
+	test_ioctl_destroy(vdev_id);
+}
+
+TEST_F(iommufd_viommu, hw_queue)
+{
+	__u64 iova = MOCK_APERTURE_START, iova2;
+	uint32_t viommu_id = self->viommu_id;
+	uint32_t hw_queue_id[2];
+
+	if (!viommu_id)
+		SKIP(return, "Skipping test for variant no_viommu");
+
+	/* Fail IOMMU_HW_QUEUE_TYPE_DEFAULT */
+	test_err_hw_queue_alloc(EOPNOTSUPP, viommu_id,
+				IOMMU_HW_QUEUE_TYPE_DEFAULT, 0, iova, PAGE_SIZE,
+				&hw_queue_id[0]);
+	/* Fail queue addr and length */
+	test_err_hw_queue_alloc(EINVAL, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST,
+				0, iova, 0, &hw_queue_id[0]);
+	test_err_hw_queue_alloc(EOVERFLOW, viommu_id,
+				IOMMU_HW_QUEUE_TYPE_SELFTEST, 0, ~(uint64_t)0,
+				PAGE_SIZE, &hw_queue_id[0]);
+	/* Fail missing iova */
+	test_err_hw_queue_alloc(ENOENT, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST,
+				0, iova, PAGE_SIZE, &hw_queue_id[0]);
+
+	/* Map iova */
+	test_ioctl_ioas_map(buffer, PAGE_SIZE, &iova);
+	test_ioctl_ioas_map(buffer + PAGE_SIZE, PAGE_SIZE, &iova2);
+
+	/* Fail index=1 and =MAX; must start from index=0 */
+	test_err_hw_queue_alloc(EIO, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, 1,
+				iova, PAGE_SIZE, &hw_queue_id[0]);
+	test_err_hw_queue_alloc(EINVAL, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST,
+				IOMMU_TEST_HW_QUEUE_MAX, iova, PAGE_SIZE,
+				&hw_queue_id[0]);
+
+	/* Allocate index=0, declare ownership of the iova */
+	test_cmd_hw_queue_alloc(viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, 0,
+				iova, PAGE_SIZE, &hw_queue_id[0]);
+	/* Fail duplicated index */
+	test_err_hw_queue_alloc(EEXIST, viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST,
+				0, iova, PAGE_SIZE, &hw_queue_id[0]);
+	/* Fail unmap, due to iova ownership */
+	test_err_ioctl_ioas_unmap(EBUSY, iova, PAGE_SIZE);
+	/* The 2nd page is not pinned, so it can be unmmap */
+	test_ioctl_ioas_unmap(iova2, PAGE_SIZE);
+
+	/* Allocate index=1, with an unaligned case */
+	test_cmd_hw_queue_alloc(viommu_id, IOMMU_HW_QUEUE_TYPE_SELFTEST, 1,
+				iova + PAGE_SIZE / 2, PAGE_SIZE / 2,
+				&hw_queue_id[1]);
+	/* Fail to destroy, due to dependency */
+	EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, hw_queue_id[0]));
+
+	/* Destroy in descending order */
+	test_ioctl_destroy(hw_queue_id[1]);
+	test_ioctl_destroy(hw_queue_id[0]);
+	/* Now it can unmap the first page */
+	test_ioctl_ioas_unmap(iova, PAGE_SIZE);
+}
 
-		/*
-		 * Invalidate the 1st cache entry but fail the 2nd request
-		 * due to invalid flags configuration in the 2nd request.
-		 */
-		num_inv = 2;
-		inv_reqs[0].flags = 0;
-		inv_reqs[0].vdev_id = 0x99;
-		inv_reqs[0].cache_id = 0;
-		inv_reqs[1].flags = 0xffffffff;
-		inv_reqs[1].vdev_id = 0x99;
-		inv_reqs[1].cache_id = 1;
-		test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs,
-					   IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
-					   sizeof(*inv_reqs), &num_inv);
-		assert(num_inv == 1);
-		test_cmd_dev_check_cache(dev_id, 0, 0);
-		test_cmd_dev_check_cache(dev_id, 1,
-					 IOMMU_TEST_DEV_CACHE_DEFAULT);
-		test_cmd_dev_check_cache(dev_id, 2,
-					 IOMMU_TEST_DEV_CACHE_DEFAULT);
-		test_cmd_dev_check_cache(dev_id, 3,
-					 IOMMU_TEST_DEV_CACHE_DEFAULT);
+TEST_F(iommufd_viommu, vdevice_tombstone)
+{
+	uint32_t viommu_id = self->viommu_id;
+	uint32_t dev_id = self->device_id;
+	uint32_t vdev_id = 0;
 
-		/*
-		 * Invalidate the 1st cache entry but fail the 2nd request
-		 * due to invalid cache_id configuration in the 2nd request.
-		 */
-		num_inv = 2;
-		inv_reqs[0].flags = 0;
-		inv_reqs[0].vdev_id = 0x99;
-		inv_reqs[0].cache_id = 0;
-		inv_reqs[1].flags = 0;
-		inv_reqs[1].vdev_id = 0x99;
-		inv_reqs[1].cache_id = MOCK_DEV_CACHE_ID_MAX + 1;
-		test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs,
-					   IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST,
-					   sizeof(*inv_reqs), &num_inv);
-		assert(num_inv == 1);
-		test_cmd_dev_check_cache(dev_id, 0, 0);
-		test_cmd_dev_check_cache(dev_id, 1,
-					 IOMMU_TEST_DEV_CACHE_DEFAULT);
-		test_cmd_dev_check_cache(dev_id, 2,
-					 IOMMU_TEST_DEV_CACHE_DEFAULT);
-		test_cmd_dev_check_cache(dev_id, 3,
-					 IOMMU_TEST_DEV_CACHE_DEFAULT);
-
-		/* Invalidate the 2nd cache entry and verify */
-		num_inv = 1;
-		inv_reqs[0].flags = 0;
-		inv_reqs[0].vdev_id = 0x99;
-		inv_reqs[0].cache_id = 1;
-		test_cmd_viommu_invalidate(viommu_id, inv_reqs,
-					   sizeof(*inv_reqs), &num_inv);
-		assert(num_inv == 1);
-		test_cmd_dev_check_cache(dev_id, 0, 0);
-		test_cmd_dev_check_cache(dev_id, 1, 0);
-		test_cmd_dev_check_cache(dev_id, 2,
-					 IOMMU_TEST_DEV_CACHE_DEFAULT);
-		test_cmd_dev_check_cache(dev_id, 3,
-					 IOMMU_TEST_DEV_CACHE_DEFAULT);
-
-		/* Invalidate the 3rd and 4th cache entries and verify */
-		num_inv = 2;
-		inv_reqs[0].flags = 0;
-		inv_reqs[0].vdev_id = 0x99;
-		inv_reqs[0].cache_id = 2;
-		inv_reqs[1].flags = 0;
-		inv_reqs[1].vdev_id = 0x99;
-		inv_reqs[1].cache_id = 3;
-		test_cmd_viommu_invalidate(viommu_id, inv_reqs,
-					   sizeof(*inv_reqs), &num_inv);
-		assert(num_inv == 2);
-		test_cmd_dev_check_cache_all(dev_id, 0);
+	if (!dev_id)
+		SKIP(return, "Skipping test for variant no_viommu");
 
-		/* Invalidate all cache entries for nested_dev_id[1] and verify */
-		num_inv = 1;
-		inv_reqs[0].vdev_id = 0x99;
-		inv_reqs[0].flags = IOMMU_TEST_INVALIDATE_FLAG_ALL;
-		test_cmd_viommu_invalidate(viommu_id, inv_reqs,
-					   sizeof(*inv_reqs), &num_inv);
-		assert(num_inv == 1);
-		test_cmd_dev_check_cache_all(dev_id, 0);
-		test_ioctl_destroy(vdev_id);
-	}
+	test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id);
+	test_ioctl_destroy(self->stdev_id);
+	EXPECT_ERRNO(ENOENT, _test_ioctl_destroy(self->fd, vdev_id));
 }
 
 FIXTURE(iommufd_device_pasid)
@@ -3123,8 +3265,7 @@ TEST_F(iommufd_device_pasid, pasid_attach)
 
 	/* Allocate a regular nested hwpt based on viommu */
 	test_cmd_viommu_alloc(self->device_id, parent_hwpt_id,
-			      IOMMU_VIOMMU_TYPE_SELFTEST,
-			      &viommu_id);
+			      IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0, &viommu_id);
 	test_cmd_hwpt_alloc_nested(self->device_id, viommu_id,
 				   IOMMU_HWPT_ALLOC_PASID,
 				   &nested_hwpt_id[2],
diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c
index e11ec4b121fc..651fc9f13c08 100644
--- a/tools/testing/selftests/iommu/iommufd_fail_nth.c
+++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c
@@ -634,6 +634,7 @@ TEST_FAIL_NTH(basic_fail_nth, device)
 	uint32_t idev_id;
 	uint32_t hwpt_id;
 	uint32_t viommu_id;
+	uint32_t hw_queue_id;
 	uint32_t vdev_id;
 	__u64 iova;
 
@@ -666,8 +667,8 @@ TEST_FAIL_NTH(basic_fail_nth, device)
 					&self->stdev_id, NULL, &idev_id))
 		return -1;
 
-	if (_test_cmd_get_hw_info(self->fd, idev_id, &info,
-				  sizeof(info), NULL, NULL))
+	if (_test_cmd_get_hw_info(self->fd, idev_id, IOMMU_HW_INFO_TYPE_DEFAULT,
+				  &info, sizeof(info), NULL, NULL))
 		return -1;
 
 	if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0,
@@ -688,13 +689,19 @@ TEST_FAIL_NTH(basic_fail_nth, device)
 				 IOMMU_HWPT_DATA_NONE, 0, 0))
 		return -1;
 
-	if (_test_cmd_viommu_alloc(self->fd, idev_id, hwpt_id,
-				   IOMMU_VIOMMU_TYPE_SELFTEST, 0, &viommu_id))
+	if (_test_cmd_viommu_alloc(self->fd, idev_id, hwpt_id, 0,
+				   IOMMU_VIOMMU_TYPE_SELFTEST, NULL, 0,
+				   &viommu_id))
 		return -1;
 
 	if (_test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, 0, &vdev_id))
 		return -1;
 
+	if (_test_cmd_hw_queue_alloc(self->fd, viommu_id,
+				     IOMMU_HW_QUEUE_TYPE_SELFTEST, 0, iova,
+				     PAGE_SIZE, &hw_queue_id))
+		return -1;
+
 	if (_test_ioctl_fault_alloc(self->fd, &fault_id, &fault_fd))
 		return -1;
 	close(fault_fd);
diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h
index 6e967b58acfd..3c3e08b8c90e 100644
--- a/tools/testing/selftests/iommu/iommufd_utils.h
+++ b/tools/testing/selftests/iommu/iommufd_utils.h
@@ -56,6 +56,10 @@ static unsigned long PAGE_SIZE;
 #define offsetofend(TYPE, MEMBER) \
 	(offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
 
+#define test_err_mmap(_errno, length, offset)                                 \
+	EXPECT_ERRNO(_errno, (long)mmap(NULL, length, PROT_READ | PROT_WRITE, \
+					MAP_SHARED, self->fd, offset))
+
 static inline void *memfd_mmap(size_t length, int prot, int flags, int *mfd_p)
 {
 	int mfd_flags = (flags & MAP_HUGETLB) ? MFD_HUGETLB : 0;
@@ -762,20 +766,24 @@ static void teardown_iommufd(int fd, struct __test_metadata *_metadata)
 #endif
 
 /* @data can be NULL */
-static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data,
-				 size_t data_len, uint32_t *capabilities,
-				 uint8_t *max_pasid)
+static int _test_cmd_get_hw_info(int fd, __u32 device_id, __u32 data_type,
+				 void *data, size_t data_len,
+				 uint32_t *capabilities, uint8_t *max_pasid)
 {
 	struct iommu_test_hw_info *info = (struct iommu_test_hw_info *)data;
 	struct iommu_hw_info cmd = {
 		.size = sizeof(cmd),
 		.dev_id = device_id,
 		.data_len = data_len,
+		.in_data_type = data_type,
 		.data_uptr = (uint64_t)data,
 		.out_capabilities = 0,
 	};
 	int ret;
 
+	if (data_type != IOMMU_HW_INFO_TYPE_DEFAULT)
+		cmd.flags |= IOMMU_HW_INFO_FLAG_INPUT_TYPE;
+
 	ret = ioctl(fd, IOMMU_GET_HW_INFO, &cmd);
 	if (ret)
 		return ret;
@@ -818,20 +826,23 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data,
 	return 0;
 }
 
-#define test_cmd_get_hw_info(device_id, data, data_len)               \
-	ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, data, \
-					   data_len, NULL, NULL))
+#define test_cmd_get_hw_info(device_id, data_type, data, data_len)         \
+	ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, data_type, \
+					   data, data_len, NULL, NULL))
 
-#define test_err_get_hw_info(_errno, device_id, data, data_len)               \
-	EXPECT_ERRNO(_errno, _test_cmd_get_hw_info(self->fd, device_id, data, \
-						   data_len, NULL, NULL))
+#define test_err_get_hw_info(_errno, device_id, data_type, data, data_len) \
+	EXPECT_ERRNO(_errno,                                               \
+		     _test_cmd_get_hw_info(self->fd, device_id, data_type, \
+					   data, data_len, NULL, NULL))
 
-#define test_cmd_get_hw_capabilities(device_id, caps, mask) \
-	ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, \
+#define test_cmd_get_hw_capabilities(device_id, caps)                        \
+	ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id,              \
+					   IOMMU_HW_INFO_TYPE_DEFAULT, NULL, \
 					   0, &caps, NULL))
 
-#define test_cmd_get_hw_info_pasid(device_id, max_pasid)              \
-	ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, \
+#define test_cmd_get_hw_info_pasid(device_id, max_pasid)                     \
+	ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id,              \
+					   IOMMU_HW_INFO_TYPE_DEFAULT, NULL, \
 					   0, NULL, max_pasid))
 
 static int _test_ioctl_fault_alloc(int fd, __u32 *fault_id, __u32 *fault_fd)
@@ -902,7 +913,8 @@ static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 pasid,
 					    pasid, fault_fd))
 
 static int _test_cmd_viommu_alloc(int fd, __u32 device_id, __u32 hwpt_id,
-				  __u32 type, __u32 flags, __u32 *viommu_id)
+				  __u32 flags, __u32 type, void *data,
+				  __u32 data_len, __u32 *viommu_id)
 {
 	struct iommu_viommu_alloc cmd = {
 		.size = sizeof(cmd),
@@ -910,6 +922,8 @@ static int _test_cmd_viommu_alloc(int fd, __u32 device_id, __u32 hwpt_id,
 		.type = type,
 		.dev_id = device_id,
 		.hwpt_id = hwpt_id,
+		.data_uptr = (uint64_t)data,
+		.data_len = data_len,
 	};
 	int ret;
 
@@ -921,13 +935,15 @@ static int _test_cmd_viommu_alloc(int fd, __u32 device_id, __u32 hwpt_id,
 	return 0;
 }
 
-#define test_cmd_viommu_alloc(device_id, hwpt_id, type, viommu_id)        \
-	ASSERT_EQ(0, _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, \
-					    type, 0, viommu_id))
-#define test_err_viommu_alloc(_errno, device_id, hwpt_id, type, viommu_id) \
-	EXPECT_ERRNO(_errno,                                               \
-		     _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id,  \
-					    type, 0, viommu_id))
+#define test_cmd_viommu_alloc(device_id, hwpt_id, type, data, data_len,      \
+			      viommu_id)                                     \
+	ASSERT_EQ(0, _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, 0, \
+					    type, data, data_len, viommu_id))
+#define test_err_viommu_alloc(_errno, device_id, hwpt_id, type, data,        \
+			      data_len, viommu_id)                           \
+	EXPECT_ERRNO(_errno,                                                 \
+		     _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, 0, \
+					    type, data, data_len, viommu_id))
 
 static int _test_cmd_vdevice_alloc(int fd, __u32 viommu_id, __u32 idev_id,
 				   __u64 virt_id, __u32 *vdev_id)
@@ -956,6 +972,37 @@ static int _test_cmd_vdevice_alloc(int fd, __u32 viommu_id, __u32 idev_id,
 		     _test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id,   \
 					     virt_id, vdev_id))
 
+static int _test_cmd_hw_queue_alloc(int fd, __u32 viommu_id, __u32 type,
+				    __u32 idx, __u64 base_addr, __u64 length,
+				    __u32 *hw_queue_id)
+{
+	struct iommu_hw_queue_alloc cmd = {
+		.size = sizeof(cmd),
+		.viommu_id = viommu_id,
+		.type = type,
+		.index = idx,
+		.nesting_parent_iova = base_addr,
+		.length = length,
+	};
+	int ret;
+
+	ret = ioctl(fd, IOMMU_HW_QUEUE_ALLOC, &cmd);
+	if (ret)
+		return ret;
+	if (hw_queue_id)
+		*hw_queue_id = cmd.out_hw_queue_id;
+	return 0;
+}
+
+#define test_cmd_hw_queue_alloc(viommu_id, type, idx, base_addr, len, out_qid) \
+	ASSERT_EQ(0, _test_cmd_hw_queue_alloc(self->fd, viommu_id, type, idx,  \
+					      base_addr, len, out_qid))
+#define test_err_hw_queue_alloc(_errno, viommu_id, type, idx, base_addr, len, \
+				out_qid)                                      \
+	EXPECT_ERRNO(_errno,                                                  \
+		     _test_cmd_hw_queue_alloc(self->fd, viommu_id, type, idx, \
+					      base_addr, len, out_qid))
+
 static int _test_cmd_veventq_alloc(int fd, __u32 viommu_id, __u32 type,
 				   __u32 *veventq_id, __u32 *veventq_fd)
 {
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index 38b95998e1e6..f6fe7a07a0a2 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -59,6 +59,7 @@ TEST_PROGS_x86 += x86/nx_huge_pages_test.sh
 TEST_GEN_PROGS_COMMON = demand_paging_test
 TEST_GEN_PROGS_COMMON += dirty_log_test
 TEST_GEN_PROGS_COMMON += guest_print_test
+TEST_GEN_PROGS_COMMON += irqfd_test
 TEST_GEN_PROGS_COMMON += kvm_binary_stats_test
 TEST_GEN_PROGS_COMMON += kvm_create_max_vcpus
 TEST_GEN_PROGS_COMMON += kvm_page_table_test
@@ -134,6 +135,7 @@ TEST_GEN_PROGS_x86 += x86/amx_test
 TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test
 TEST_GEN_PROGS_x86 += x86/triple_fault_event_test
 TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test
+TEST_GEN_PROGS_x86 += x86/aperfmperf_test
 TEST_GEN_PROGS_x86 += access_tracking_perf_test
 TEST_GEN_PROGS_x86 += coalesced_io_test
 TEST_GEN_PROGS_x86 += dirty_log_perf_test
@@ -156,7 +158,7 @@ TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases
 TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
 TEST_GEN_PROGS_arm64 += arm64/host_sve
 TEST_GEN_PROGS_arm64 += arm64/hypercalls
-TEST_GEN_PROGS_arm64 += arm64/mmio_abort
+TEST_GEN_PROGS_arm64 += arm64/external_aborts
 TEST_GEN_PROGS_arm64 += arm64/page_fault_test
 TEST_GEN_PROGS_arm64 += arm64/psci_test
 TEST_GEN_PROGS_arm64 += arm64/set_id_regs
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index da7196fd1b23..c9de66537ec3 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -596,11 +596,8 @@ int main(int argc, char *argv[])
 		if (ret)
 			return ret;
 	} else {
-		page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
-		__TEST_REQUIRE(page_idle_fd >= 0,
-			       "Couldn't open /sys/kernel/mm/page_idle/bitmap. "
-			       "Is CONFIG_IDLE_PAGE_TRACKING enabled?");
-
+		page_idle_fd = __open_path_or_exit("/sys/kernel/mm/page_idle/bitmap", O_RDWR,
+						   "Is CONFIG_IDLE_PAGE_TRACKING enabled?");
 		close(page_idle_fd);
 
 		puts("Using page_idle for aging");
diff --git a/tools/testing/selftests/kvm/arch_timer.c b/tools/testing/selftests/kvm/arch_timer.c
index acb2cb596332..cf8fb67104f1 100644
--- a/tools/testing/selftests/kvm/arch_timer.c
+++ b/tools/testing/selftests/kvm/arch_timer.c
@@ -98,16 +98,11 @@ static uint32_t test_get_pcpu(void)
 static int test_migrate_vcpu(unsigned int vcpu_idx)
 {
 	int ret;
-	cpu_set_t cpuset;
 	uint32_t new_pcpu = test_get_pcpu();
 
-	CPU_ZERO(&cpuset);
-	CPU_SET(new_pcpu, &cpuset);
-
 	pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu_idx, new_pcpu);
 
-	ret = pthread_setaffinity_np(pt_vcpu_run[vcpu_idx],
-				     sizeof(cpuset), &cpuset);
+	ret = __pin_task_to_cpu(pt_vcpu_run[vcpu_idx], new_pcpu);
 
 	/* Allow the error where the vCPU thread is already finished */
 	TEST_ASSERT(ret == 0 || ret == ESRCH,
diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
index 4e71740a098b..ce74d069cb7b 100644
--- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
+++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
@@ -862,25 +862,6 @@ static uint32_t next_pcpu(void)
 	return next;
 }
 
-static void migrate_self(uint32_t new_pcpu)
-{
-	int ret;
-	cpu_set_t cpuset;
-	pthread_t thread;
-
-	thread = pthread_self();
-
-	CPU_ZERO(&cpuset);
-	CPU_SET(new_pcpu, &cpuset);
-
-	pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu);
-
-	ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
-
-	TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n",
-		    new_pcpu, ret);
-}
-
 static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt,
 			   enum arch_timer timer)
 {
@@ -907,7 +888,7 @@ static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
 		sched_yield();
 		break;
 	case USERSPACE_MIGRATE_SELF:
-		migrate_self(next_pcpu());
+		pin_self_to_cpu(next_pcpu());
 		break;
 	default:
 		break;
@@ -919,7 +900,7 @@ static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
 	struct ucall uc;
 
 	/* Start on CPU 0 */
-	migrate_self(0);
+	pin_self_to_cpu(0);
 
 	while (true) {
 		vcpu_run(vcpu);
diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c
new file mode 100644
index 000000000000..062bf84cced1
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/external_aborts.c
@@ -0,0 +1,330 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * external_abort - Tests for userspace external abort injection
+ *
+ * Copyright (c) 2024 Google LLC
+ */
+#include "processor.h"
+#include "test_util.h"
+
+#define MMIO_ADDR		0x8000000ULL
+#define EXPECTED_SERROR_ISS	(ESR_ELx_ISV | 0x1d1ed)
+
+static u64 expected_abort_pc;
+
+static void expect_sea_handler(struct ex_regs *regs)
+{
+	u64 esr = read_sysreg(esr_el1);
+
+	GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
+	GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
+	GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
+
+	GUEST_DONE();
+}
+
+static void unexpected_dabt_handler(struct ex_regs *regs)
+{
+	GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc);
+}
+
+static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code,
+						  handler_fn dabt_handler)
+{
+	struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code);
+
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(*vcpu);
+	vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler);
+
+	virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1);
+
+	return vm;
+}
+
+static void vcpu_inject_sea(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_events events = {};
+
+	events.exception.ext_dabt_pending = true;
+	vcpu_events_set(vcpu, &events);
+}
+
+static bool vcpu_has_ras(struct kvm_vcpu *vcpu)
+{
+	u64 pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+
+	return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, pfr0);
+}
+
+static bool guest_has_ras(void)
+{
+	return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, read_sysreg(id_aa64pfr0_el1));
+}
+
+static void vcpu_inject_serror(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_events events = {};
+
+	events.exception.serror_pending = true;
+	if (vcpu_has_ras(vcpu)) {
+		events.exception.serror_has_esr = true;
+		events.exception.serror_esr = EXPECTED_SERROR_ISS;
+	}
+
+	vcpu_events_set(vcpu, &events);
+}
+
+static void __vcpu_run_expect(struct kvm_vcpu *vcpu, unsigned int cmd)
+{
+	struct ucall uc;
+
+	vcpu_run(vcpu);
+	switch (get_ucall(vcpu, &uc)) {
+	case UCALL_ABORT:
+		REPORT_GUEST_ASSERT(uc);
+		break;
+	default:
+		if (uc.cmd == cmd)
+			return;
+
+		TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+	}
+}
+
+static void vcpu_run_expect_done(struct kvm_vcpu *vcpu)
+{
+	__vcpu_run_expect(vcpu, UCALL_DONE);
+}
+
+static void vcpu_run_expect_sync(struct kvm_vcpu *vcpu)
+{
+	__vcpu_run_expect(vcpu, UCALL_SYNC);
+}
+
+extern char test_mmio_abort_insn;
+
+static noinline void test_mmio_abort_guest(void)
+{
+	WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn);
+
+	asm volatile("test_mmio_abort_insn:\n\t"
+		     "ldr x0, [%0]\n\t"
+		     : : "r" (MMIO_ADDR) : "x0", "memory");
+
+	GUEST_FAIL("MMIO instruction should not retire");
+}
+
+/*
+ * Test that KVM doesn't complete MMIO emulation when userspace has made an
+ * external abort pending for the instruction.
+ */
+static void test_mmio_abort(void)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest,
+							expect_sea_handler);
+	struct kvm_run *run = vcpu->run;
+
+	vcpu_run(vcpu);
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
+	TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
+	TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
+	TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
+
+	vcpu_inject_sea(vcpu);
+	vcpu_run_expect_done(vcpu);
+	kvm_vm_free(vm);
+}
+
+extern char test_mmio_nisv_insn;
+
+static void test_mmio_nisv_guest(void)
+{
+	WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn);
+
+	asm volatile("test_mmio_nisv_insn:\n\t"
+		     "ldr x0, [%0], #8\n\t"
+		     : : "r" (MMIO_ADDR) : "x0", "memory");
+
+	GUEST_FAIL("MMIO instruction should not retire");
+}
+
+/*
+ * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace
+ * hasn't enabled KVM_CAP_ARM_NISV_TO_USER.
+ */
+static void test_mmio_nisv(void)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
+							unexpected_dabt_handler);
+
+	TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN");
+	TEST_ASSERT_EQ(errno, ENOSYS);
+
+	kvm_vm_free(vm);
+}
+
+/*
+ * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA
+ * reaches the guest.
+ */
+static void test_mmio_nisv_abort(void)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
+							expect_sea_handler);
+	struct kvm_run *run = vcpu->run;
+
+	vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1);
+
+	vcpu_run(vcpu);
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV);
+	TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR);
+
+	vcpu_inject_sea(vcpu);
+	vcpu_run_expect_done(vcpu);
+	kvm_vm_free(vm);
+}
+
+static void unexpected_serror_handler(struct ex_regs *regs)
+{
+	GUEST_FAIL("Took unexpected SError exception");
+}
+
+static void test_serror_masked_guest(void)
+{
+	GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A);
+
+	isb();
+
+	GUEST_DONE();
+}
+
+static void test_serror_masked(void)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_masked_guest,
+							unexpected_dabt_handler);
+
+	vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, unexpected_serror_handler);
+
+	vcpu_inject_serror(vcpu);
+	vcpu_run_expect_done(vcpu);
+	kvm_vm_free(vm);
+}
+
+static void expect_serror_handler(struct ex_regs *regs)
+{
+	u64 esr = read_sysreg(esr_el1);
+
+	GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_SERROR);
+	if (guest_has_ras())
+		GUEST_ASSERT_EQ(ESR_ELx_ISS(esr), EXPECTED_SERROR_ISS);
+
+	GUEST_DONE();
+}
+
+static void test_serror_guest(void)
+{
+	GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A);
+
+	local_serror_enable();
+	isb();
+	local_serror_disable();
+
+	GUEST_FAIL("Should've taken pending SError exception");
+}
+
+static void test_serror(void)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_guest,
+							unexpected_dabt_handler);
+
+	vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler);
+
+	vcpu_inject_serror(vcpu);
+	vcpu_run_expect_done(vcpu);
+	kvm_vm_free(vm);
+}
+
+static void test_serror_emulated_guest(void)
+{
+	GUEST_ASSERT(!(read_sysreg(isr_el1) & ISR_EL1_A));
+
+	local_serror_enable();
+	GUEST_SYNC(0);
+	local_serror_disable();
+
+	GUEST_FAIL("Should've taken unmasked SError exception");
+}
+
+static void test_serror_emulated(void)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_emulated_guest,
+							unexpected_dabt_handler);
+
+	vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler);
+
+	vcpu_run_expect_sync(vcpu);
+	vcpu_inject_serror(vcpu);
+	vcpu_run_expect_done(vcpu);
+	kvm_vm_free(vm);
+}
+
+static void test_mmio_ease_guest(void)
+{
+	sysreg_clear_set_s(SYS_SCTLR2_EL1, 0, SCTLR2_EL1_EASE);
+	isb();
+
+	test_mmio_abort_guest();
+}
+
+/*
+ * Test that KVM doesn't complete MMIO emulation when userspace has made an
+ * external abort pending for the instruction.
+ */
+static void test_mmio_ease(void)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_ease_guest,
+							unexpected_dabt_handler);
+	struct kvm_run *run = vcpu->run;
+	u64 pfr1;
+
+	pfr1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
+	if (!SYS_FIELD_GET(ID_AA64PFR1_EL1, DF2, pfr1)) {
+		pr_debug("Skipping %s\n", __func__);
+		return;
+	}
+
+	/*
+	 * SCTLR2_ELx.EASE changes the exception vector to the SError vector but
+	 * doesn't further modify the exception context (e.g. ESR_ELx, FAR_ELx).
+	 */
+	vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_sea_handler);
+
+	vcpu_run(vcpu);
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
+	TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
+	TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
+	TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
+
+	vcpu_inject_sea(vcpu);
+	vcpu_run_expect_done(vcpu);
+	kvm_vm_free(vm);
+}
+
+int main(void)
+{
+	test_mmio_abort();
+	test_mmio_nisv();
+	test_mmio_nisv_abort();
+	test_serror();
+	test_serror_masked();
+	test_serror_emulated();
+	test_mmio_ease();
+}
diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c
index d01798b6b3b4..011fad95dd02 100644
--- a/tools/testing/selftests/kvm/arm64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c
@@ -15,6 +15,12 @@
 #include "test_util.h"
 #include "processor.h"
 
+#define SYS_REG(r)	ARM64_SYS_REG(sys_reg_Op0(SYS_ ## r),	\
+				      sys_reg_Op1(SYS_ ## r),	\
+				      sys_reg_CRn(SYS_ ## r),	\
+				      sys_reg_CRm(SYS_ ## r),	\
+				      sys_reg_Op2(SYS_ ## r))
+
 struct feature_id_reg {
 	__u64 reg;
 	__u64 id_reg;
@@ -22,37 +28,43 @@ struct feature_id_reg {
 	__u64 feat_min;
 };
 
-static struct feature_id_reg feat_id_regs[] = {
-	{
-		ARM64_SYS_REG(3, 0, 2, 0, 3),	/* TCR2_EL1 */
-		ARM64_SYS_REG(3, 0, 0, 7, 3),	/* ID_AA64MMFR3_EL1 */
-		0,
-		1
-	},
-	{
-		ARM64_SYS_REG(3, 0, 10, 2, 2),	/* PIRE0_EL1 */
-		ARM64_SYS_REG(3, 0, 0, 7, 3),	/* ID_AA64MMFR3_EL1 */
-		8,
-		1
-	},
-	{
-		ARM64_SYS_REG(3, 0, 10, 2, 3),	/* PIR_EL1 */
-		ARM64_SYS_REG(3, 0, 0, 7, 3),	/* ID_AA64MMFR3_EL1 */
-		8,
-		1
-	},
-	{
-		ARM64_SYS_REG(3, 0, 10, 2, 4),	/* POR_EL1 */
-		ARM64_SYS_REG(3, 0, 0, 7, 3),	/* ID_AA64MMFR3_EL1 */
-		16,
-		1
-	},
-	{
-		ARM64_SYS_REG(3, 3, 10, 2, 4),	/* POR_EL0 */
-		ARM64_SYS_REG(3, 0, 0, 7, 3),	/* ID_AA64MMFR3_EL1 */
-		16,
-		1
+#define FEAT(id, f, v)					\
+	.id_reg		= SYS_REG(id),			\
+	.feat_shift	= id ## _ ## f ## _SHIFT,	\
+	.feat_min	= id ## _ ## f ## _ ## v
+
+#define REG_FEAT(r, id, f, v)			\
+	{					\
+		.reg = SYS_REG(r),		\
+		FEAT(id, f, v)			\
 	}
+
+static struct feature_id_reg feat_id_regs[] = {
+	REG_FEAT(TCR2_EL1,	ID_AA64MMFR3_EL1, TCRX, IMP),
+	REG_FEAT(TCR2_EL2,	ID_AA64MMFR3_EL1, TCRX, IMP),
+	REG_FEAT(PIRE0_EL1,	ID_AA64MMFR3_EL1, S1PIE, IMP),
+	REG_FEAT(PIRE0_EL2,	ID_AA64MMFR3_EL1, S1PIE, IMP),
+	REG_FEAT(PIR_EL1,	ID_AA64MMFR3_EL1, S1PIE, IMP),
+	REG_FEAT(PIR_EL2,	ID_AA64MMFR3_EL1, S1PIE, IMP),
+	REG_FEAT(POR_EL1,	ID_AA64MMFR3_EL1, S1POE, IMP),
+	REG_FEAT(POR_EL0,	ID_AA64MMFR3_EL1, S1POE, IMP),
+	REG_FEAT(POR_EL2,	ID_AA64MMFR3_EL1, S1POE, IMP),
+	REG_FEAT(HCRX_EL2,	ID_AA64MMFR1_EL1, HCX, IMP),
+	REG_FEAT(HFGRTR_EL2,	ID_AA64MMFR0_EL1, FGT, IMP),
+	REG_FEAT(HFGWTR_EL2,	ID_AA64MMFR0_EL1, FGT, IMP),
+	REG_FEAT(HFGITR_EL2,	ID_AA64MMFR0_EL1, FGT, IMP),
+	REG_FEAT(HDFGRTR_EL2,	ID_AA64MMFR0_EL1, FGT, IMP),
+	REG_FEAT(HDFGWTR_EL2,	ID_AA64MMFR0_EL1, FGT, IMP),
+	REG_FEAT(HAFGRTR_EL2,	ID_AA64MMFR0_EL1, FGT, IMP),
+	REG_FEAT(HFGRTR2_EL2,	ID_AA64MMFR0_EL1, FGT, FGT2),
+	REG_FEAT(HFGWTR2_EL2,	ID_AA64MMFR0_EL1, FGT, FGT2),
+	REG_FEAT(HFGITR2_EL2,	ID_AA64MMFR0_EL1, FGT, FGT2),
+	REG_FEAT(HDFGRTR2_EL2,	ID_AA64MMFR0_EL1, FGT, FGT2),
+	REG_FEAT(HDFGWTR2_EL2,	ID_AA64MMFR0_EL1, FGT, FGT2),
+	REG_FEAT(ZCR_EL2,	ID_AA64PFR0_EL1, SVE, IMP),
+	REG_FEAT(SCTLR2_EL1,	ID_AA64MMFR3_EL1, SCTLRX, IMP),
+	REG_FEAT(VDISR_EL2,	ID_AA64PFR0_EL1, RAS, IMP),
+	REG_FEAT(VSESR_EL2,	ID_AA64PFR0_EL1, RAS, IMP),
 };
 
 bool filter_reg(__u64 reg)
@@ -469,6 +481,7 @@ static __u64 base_regs[] = {
 	ARM64_SYS_REG(3, 0, 1, 0, 0),	/* SCTLR_EL1 */
 	ARM64_SYS_REG(3, 0, 1, 0, 1),	/* ACTLR_EL1 */
 	ARM64_SYS_REG(3, 0, 1, 0, 2),	/* CPACR_EL1 */
+	KVM_ARM64_SYS_REG(SYS_SCTLR2_EL1),
 	ARM64_SYS_REG(3, 0, 2, 0, 0),	/* TTBR0_EL1 */
 	ARM64_SYS_REG(3, 0, 2, 0, 1),	/* TTBR1_EL1 */
 	ARM64_SYS_REG(3, 0, 2, 0, 2),	/* TCR_EL1 */
@@ -686,6 +699,62 @@ static __u64 pauth_generic_regs[] = {
 	ARM64_SYS_REG(3, 0, 2, 3, 1),	/* APGAKEYHI_EL1 */
 };
 
+static __u64 el2_regs[] = {
+	SYS_REG(VPIDR_EL2),
+	SYS_REG(VMPIDR_EL2),
+	SYS_REG(SCTLR_EL2),
+	SYS_REG(ACTLR_EL2),
+	SYS_REG(HCR_EL2),
+	SYS_REG(MDCR_EL2),
+	SYS_REG(CPTR_EL2),
+	SYS_REG(HSTR_EL2),
+	SYS_REG(HFGRTR_EL2),
+	SYS_REG(HFGWTR_EL2),
+	SYS_REG(HFGITR_EL2),
+	SYS_REG(HACR_EL2),
+	SYS_REG(ZCR_EL2),
+	SYS_REG(HCRX_EL2),
+	SYS_REG(TTBR0_EL2),
+	SYS_REG(TTBR1_EL2),
+	SYS_REG(TCR_EL2),
+	SYS_REG(TCR2_EL2),
+	SYS_REG(VTTBR_EL2),
+	SYS_REG(VTCR_EL2),
+	SYS_REG(VNCR_EL2),
+	SYS_REG(HDFGRTR2_EL2),
+	SYS_REG(HDFGWTR2_EL2),
+	SYS_REG(HFGRTR2_EL2),
+	SYS_REG(HFGWTR2_EL2),
+	SYS_REG(HDFGRTR_EL2),
+	SYS_REG(HDFGWTR_EL2),
+	SYS_REG(HAFGRTR_EL2),
+	SYS_REG(HFGITR2_EL2),
+	SYS_REG(SPSR_EL2),
+	SYS_REG(ELR_EL2),
+	SYS_REG(AFSR0_EL2),
+	SYS_REG(AFSR1_EL2),
+	SYS_REG(ESR_EL2),
+	SYS_REG(FAR_EL2),
+	SYS_REG(HPFAR_EL2),
+	SYS_REG(MAIR_EL2),
+	SYS_REG(PIRE0_EL2),
+	SYS_REG(PIR_EL2),
+	SYS_REG(POR_EL2),
+	SYS_REG(AMAIR_EL2),
+	SYS_REG(VBAR_EL2),
+	SYS_REG(CONTEXTIDR_EL2),
+	SYS_REG(TPIDR_EL2),
+	SYS_REG(CNTVOFF_EL2),
+	SYS_REG(CNTHCTL_EL2),
+	SYS_REG(CNTHP_CTL_EL2),
+	SYS_REG(CNTHP_CVAL_EL2),
+	SYS_REG(CNTHV_CTL_EL2),
+	SYS_REG(CNTHV_CVAL_EL2),
+	SYS_REG(SP_EL2),
+	SYS_REG(VDISR_EL2),
+	SYS_REG(VSESR_EL2),
+};
+
 #define BASE_SUBLIST \
 	{ "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
 #define VREGS_SUBLIST \
@@ -712,6 +781,14 @@ static __u64 pauth_generic_regs[] = {
 		.regs		= pauth_generic_regs,			\
 		.regs_n		= ARRAY_SIZE(pauth_generic_regs),	\
 	}
+#define EL2_SUBLIST						\
+	{							\
+		.name 		= "EL2",			\
+		.capability	= KVM_CAP_ARM_EL2,		\
+		.feature	= KVM_ARM_VCPU_HAS_EL2,		\
+		.regs		= el2_regs,			\
+		.regs_n		= ARRAY_SIZE(el2_regs),		\
+	}
 
 static struct vcpu_reg_list vregs_config = {
 	.sublists = {
@@ -761,6 +838,65 @@ static struct vcpu_reg_list pauth_pmu_config = {
 	},
 };
 
+static struct vcpu_reg_list el2_vregs_config = {
+	.sublists = {
+	BASE_SUBLIST,
+	EL2_SUBLIST,
+	VREGS_SUBLIST,
+	{0},
+	},
+};
+
+static struct vcpu_reg_list el2_vregs_pmu_config = {
+	.sublists = {
+	BASE_SUBLIST,
+	EL2_SUBLIST,
+	VREGS_SUBLIST,
+	PMU_SUBLIST,
+	{0},
+	},
+};
+
+static struct vcpu_reg_list el2_sve_config = {
+	.sublists = {
+	BASE_SUBLIST,
+	EL2_SUBLIST,
+	SVE_SUBLIST,
+	{0},
+	},
+};
+
+static struct vcpu_reg_list el2_sve_pmu_config = {
+	.sublists = {
+	BASE_SUBLIST,
+	EL2_SUBLIST,
+	SVE_SUBLIST,
+	PMU_SUBLIST,
+	{0},
+	},
+};
+
+static struct vcpu_reg_list el2_pauth_config = {
+	.sublists = {
+	BASE_SUBLIST,
+	EL2_SUBLIST,
+	VREGS_SUBLIST,
+	PAUTH_SUBLIST,
+	{0},
+	},
+};
+
+static struct vcpu_reg_list el2_pauth_pmu_config = {
+	.sublists = {
+	BASE_SUBLIST,
+	EL2_SUBLIST,
+	VREGS_SUBLIST,
+	PAUTH_SUBLIST,
+	PMU_SUBLIST,
+	{0},
+	},
+};
+
 struct vcpu_reg_list *vcpu_configs[] = {
 	&vregs_config,
 	&vregs_pmu_config,
@@ -768,5 +904,12 @@ struct vcpu_reg_list *vcpu_configs[] = {
 	&sve_pmu_config,
 	&pauth_config,
 	&pauth_pmu_config,
+
+	&el2_vregs_config,
+	&el2_vregs_pmu_config,
+	&el2_sve_config,
+	&el2_sve_pmu_config,
+	&el2_pauth_config,
+	&el2_pauth_pmu_config,
 };
 int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
diff --git a/tools/testing/selftests/kvm/arm64/mmio_abort.c b/tools/testing/selftests/kvm/arm64/mmio_abort.c
deleted file mode 100644
index 8b7a80a51b1c..000000000000
--- a/tools/testing/selftests/kvm/arm64/mmio_abort.c
+++ /dev/null
@@ -1,159 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * mmio_abort - Tests for userspace MMIO abort injection
- *
- * Copyright (c) 2024 Google LLC
- */
-#include "processor.h"
-#include "test_util.h"
-
-#define MMIO_ADDR	0x8000000ULL
-
-static u64 expected_abort_pc;
-
-static void expect_sea_handler(struct ex_regs *regs)
-{
-	u64 esr = read_sysreg(esr_el1);
-
-	GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
-	GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
-	GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
-
-	GUEST_DONE();
-}
-
-static void unexpected_dabt_handler(struct ex_regs *regs)
-{
-	GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc);
-}
-
-static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code,
-						  handler_fn dabt_handler)
-{
-	struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code);
-
-	vm_init_descriptor_tables(vm);
-	vcpu_init_descriptor_tables(*vcpu);
-	vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler);
-
-	virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1);
-
-	return vm;
-}
-
-static void vcpu_inject_extabt(struct kvm_vcpu *vcpu)
-{
-	struct kvm_vcpu_events events = {};
-
-	events.exception.ext_dabt_pending = true;
-	vcpu_events_set(vcpu, &events);
-}
-
-static void vcpu_run_expect_done(struct kvm_vcpu *vcpu)
-{
-	struct ucall uc;
-
-	vcpu_run(vcpu);
-	switch (get_ucall(vcpu, &uc)) {
-	case UCALL_ABORT:
-		REPORT_GUEST_ASSERT(uc);
-		break;
-	case UCALL_DONE:
-		break;
-	default:
-		TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
-	}
-}
-
-extern char test_mmio_abort_insn;
-
-static void test_mmio_abort_guest(void)
-{
-	WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn);
-
-	asm volatile("test_mmio_abort_insn:\n\t"
-		     "ldr x0, [%0]\n\t"
-		     : : "r" (MMIO_ADDR) : "x0", "memory");
-
-	GUEST_FAIL("MMIO instruction should not retire");
-}
-
-/*
- * Test that KVM doesn't complete MMIO emulation when userspace has made an
- * external abort pending for the instruction.
- */
-static void test_mmio_abort(void)
-{
-	struct kvm_vcpu *vcpu;
-	struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest,
-							expect_sea_handler);
-	struct kvm_run *run = vcpu->run;
-
-	vcpu_run(vcpu);
-	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
-	TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
-	TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
-	TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
-
-	vcpu_inject_extabt(vcpu);
-	vcpu_run_expect_done(vcpu);
-	kvm_vm_free(vm);
-}
-
-extern char test_mmio_nisv_insn;
-
-static void test_mmio_nisv_guest(void)
-{
-	WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn);
-
-	asm volatile("test_mmio_nisv_insn:\n\t"
-		     "ldr x0, [%0], #8\n\t"
-		     : : "r" (MMIO_ADDR) : "x0", "memory");
-
-	GUEST_FAIL("MMIO instruction should not retire");
-}
-
-/*
- * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace
- * hasn't enabled KVM_CAP_ARM_NISV_TO_USER.
- */
-static void test_mmio_nisv(void)
-{
-	struct kvm_vcpu *vcpu;
-	struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
-							unexpected_dabt_handler);
-
-	TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN");
-	TEST_ASSERT_EQ(errno, ENOSYS);
-
-	kvm_vm_free(vm);
-}
-
-/*
- * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA
- * reaches the guest.
- */
-static void test_mmio_nisv_abort(void)
-{
-	struct kvm_vcpu *vcpu;
-	struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
-							expect_sea_handler);
-	struct kvm_run *run = vcpu->run;
-
-	vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1);
-
-	vcpu_run(vcpu);
-	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV);
-	TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR);
-
-	vcpu_inject_extabt(vcpu);
-	vcpu_run_expect_done(vcpu);
-	kvm_vm_free(vm);
-}
-
-int main(void)
-{
-	test_mmio_abort();
-	test_mmio_nisv();
-	test_mmio_nisv_abort();
-}
diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c
index 8f422bfdfcb9..d3bf9204409c 100644
--- a/tools/testing/selftests/kvm/arm64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c
@@ -139,6 +139,7 @@ static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = {
 };
 
 static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = {
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, DF2, 0),
 	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0),
 	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI),
 	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0),
@@ -187,6 +188,14 @@ static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = {
 	REG_FTR_END,
 };
 
+static const struct reg_ftr_bits ftr_id_aa64mmfr3_el1[] = {
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1POE, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1PIE, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, SCTLRX, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, TCRX, 0),
+	REG_FTR_END,
+};
+
 static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = {
 	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0),
 	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0),
@@ -217,6 +226,7 @@ static struct test_feature_reg test_regs[] = {
 	TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1),
 	TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1),
 	TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1),
+	TEST_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3_el1),
 	TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1),
 };
 
@@ -774,8 +784,8 @@ int main(void)
 		   ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) +
 		   ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) +
 		   ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) +
-		   ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 3 +
-		   MPAM_IDREG_TEST + MTE_IDREG_TEST;
+		   ARRAY_SIZE(ftr_id_aa64mmfr3_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) -
+		   ARRAY_SIZE(test_regs) + 3 + MPAM_IDREG_TEST + MTE_IDREG_TEST;
 
 	ksft_set_plan(test_cnt);
 
diff --git a/tools/testing/selftests/kvm/arm64/vgic_init.c b/tools/testing/selftests/kvm/arm64/vgic_init.c
index b3b5fb0ff0a9..a8e0f46bc0ab 100644
--- a/tools/testing/selftests/kvm/arm64/vgic_init.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_init.c
@@ -9,17 +9,18 @@
 #include <asm/kvm.h>
 #include <asm/kvm_para.h>
 
+#include <arm64/gic_v3.h>
+
 #include "test_util.h"
 #include "kvm_util.h"
 #include "processor.h"
 #include "vgic.h"
+#include "gic_v3.h"
 
 #define NR_VCPUS		4
 
 #define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset)
 
-#define GICR_TYPER 0x8
-
 #define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2)
 #define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3)
 
@@ -675,6 +676,44 @@ static void test_v3_its_region(void)
 	vm_gic_destroy(&v);
 }
 
+static void test_v3_nassgicap(void)
+{
+	struct kvm_vcpu *vcpus[NR_VCPUS];
+	bool has_nassgicap;
+	struct vm_gic vm;
+	u32 typer2;
+	int ret;
+
+	vm = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+	kvm_device_attr_get(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+			    GICD_TYPER2, &typer2);
+	has_nassgicap = typer2 & GICD_TYPER2_nASSGIcap;
+
+	typer2 |= GICD_TYPER2_nASSGIcap;
+	ret = __kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+				    GICD_TYPER2, &typer2);
+	if (has_nassgicap)
+		TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEVICE_ATTR_SET, ret));
+	else
+		TEST_ASSERT(ret && errno == EINVAL,
+			    "Enabled nASSGIcap even though it's unavailable");
+
+	typer2 &= ~GICD_TYPER2_nASSGIcap;
+	kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+			    GICD_TYPER2, &typer2);
+
+	kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+			    KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+	typer2 ^= GICD_TYPER2_nASSGIcap;
+	ret = __kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+				    GICD_TYPER2, &typer2);
+	TEST_ASSERT(ret && errno == EBUSY,
+		    "Changed nASSGIcap after initializing the VGIC");
+
+	vm_gic_destroy(&vm);
+}
+
 /*
  * Returns 0 if it's possible to create GIC device of a given type (V2 or V3).
  */
@@ -715,6 +754,220 @@ int test_kvm_device(uint32_t gic_dev_type)
 	return 0;
 }
 
+struct sr_def {
+	const char	*name;
+	u32		encoding;
+};
+
+#define PACK_SR(r)						\
+	((sys_reg_Op0(r) << 14) |				\
+	 (sys_reg_Op1(r) << 11) |				\
+	 (sys_reg_CRn(r) << 7) |				\
+	 (sys_reg_CRm(r) << 3) |				\
+	 (sys_reg_Op2(r)))
+
+#define SR(r)							\
+	{							\
+		.name		= #r,				\
+		.encoding	= r,				\
+	}
+
+static const struct sr_def sysregs_el1[] = {
+	SR(SYS_ICC_PMR_EL1),
+	SR(SYS_ICC_BPR0_EL1),
+	SR(SYS_ICC_AP0R0_EL1),
+	SR(SYS_ICC_AP0R1_EL1),
+	SR(SYS_ICC_AP0R2_EL1),
+	SR(SYS_ICC_AP0R3_EL1),
+	SR(SYS_ICC_AP1R0_EL1),
+	SR(SYS_ICC_AP1R1_EL1),
+	SR(SYS_ICC_AP1R2_EL1),
+	SR(SYS_ICC_AP1R3_EL1),
+	SR(SYS_ICC_BPR1_EL1),
+	SR(SYS_ICC_CTLR_EL1),
+	SR(SYS_ICC_SRE_EL1),
+	SR(SYS_ICC_IGRPEN0_EL1),
+	SR(SYS_ICC_IGRPEN1_EL1),
+};
+
+static const struct sr_def sysregs_el2[] = {
+	SR(SYS_ICH_AP0R0_EL2),
+	SR(SYS_ICH_AP0R1_EL2),
+	SR(SYS_ICH_AP0R2_EL2),
+	SR(SYS_ICH_AP0R3_EL2),
+	SR(SYS_ICH_AP1R0_EL2),
+	SR(SYS_ICH_AP1R1_EL2),
+	SR(SYS_ICH_AP1R2_EL2),
+	SR(SYS_ICH_AP1R3_EL2),
+	SR(SYS_ICH_HCR_EL2),
+	SR(SYS_ICC_SRE_EL2),
+	SR(SYS_ICH_VTR_EL2),
+	SR(SYS_ICH_VMCR_EL2),
+	SR(SYS_ICH_LR0_EL2),
+	SR(SYS_ICH_LR1_EL2),
+	SR(SYS_ICH_LR2_EL2),
+	SR(SYS_ICH_LR3_EL2),
+	SR(SYS_ICH_LR4_EL2),
+	SR(SYS_ICH_LR5_EL2),
+	SR(SYS_ICH_LR6_EL2),
+	SR(SYS_ICH_LR7_EL2),
+	SR(SYS_ICH_LR8_EL2),
+	SR(SYS_ICH_LR9_EL2),
+	SR(SYS_ICH_LR10_EL2),
+	SR(SYS_ICH_LR11_EL2),
+	SR(SYS_ICH_LR12_EL2),
+	SR(SYS_ICH_LR13_EL2),
+	SR(SYS_ICH_LR14_EL2),
+	SR(SYS_ICH_LR15_EL2),
+};
+
+static void test_sysreg_array(int gic, const struct sr_def *sr, int nr,
+			      int (*check)(int, const struct sr_def *, const char *))
+{
+	for (int i = 0; i < nr; i++) {
+		u64 val;
+		u64 attr;
+		int ret;
+
+		/* Assume MPIDR_EL1.Aff*=0 */
+		attr = PACK_SR(sr[i].encoding);
+
+		/*
+		 * The API is braindead. A register can be advertised as
+		 * available, and yet not be readable or writable.
+		 * ICC_APnR{1,2,3}_EL1 are examples of such non-sense, and
+		 * ICH_APnR{1,2,3}_EL2 do follow suit for consistency.
+		 *
+		 * On the bright side, no known HW is implementing more than
+		 * 5 bits of priority, so we're safe. Sort of...
+		 */
+		ret = __kvm_has_device_attr(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+					    attr);
+		TEST_ASSERT(ret == 0, "%s unavailable", sr[i].name);
+
+		/* Check that we can write back what we read */
+		ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+					    attr, &val);
+		TEST_ASSERT(ret == 0 || !check(gic, &sr[i], "read"), "%s unreadable", sr[i].name);
+		ret = __kvm_device_attr_set(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+					    attr, &val);
+		TEST_ASSERT(ret == 0 || !check(gic, &sr[i], "write"), "%s unwritable", sr[i].name);
+	}
+}
+
+static u8 get_ctlr_pribits(int gic)
+{
+	int ret;
+	u64 val;
+	u8 pri;
+
+	ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+				    PACK_SR(SYS_ICC_CTLR_EL1), &val);
+	TEST_ASSERT(ret == 0, "ICC_CTLR_EL1 unreadable");
+
+	pri = FIELD_GET(ICC_CTLR_EL1_PRI_BITS_MASK, val) + 1;
+	TEST_ASSERT(pri >= 5 && pri <= 7, "Bad pribits %d", pri);
+
+	return pri;
+}
+
+static int check_unaccessible_el1_regs(int gic, const struct sr_def *sr, const char *what)
+{
+	switch (sr->encoding) {
+	case SYS_ICC_AP0R1_EL1:
+	case SYS_ICC_AP1R1_EL1:
+		if (get_ctlr_pribits(gic) >= 6)
+			return -EINVAL;
+		break;
+	case SYS_ICC_AP0R2_EL1:
+	case SYS_ICC_AP0R3_EL1:
+	case SYS_ICC_AP1R2_EL1:
+	case SYS_ICC_AP1R3_EL1:
+		if (get_ctlr_pribits(gic) == 7)
+			return 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	pr_info("SKIP %s for %s\n", sr->name, what);
+	return 0;
+}
+
+static u8 get_vtr_pribits(int gic)
+{
+	int ret;
+	u64 val;
+	u8 pri;
+
+	ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+				    PACK_SR(SYS_ICH_VTR_EL2), &val);
+	TEST_ASSERT(ret == 0, "ICH_VTR_EL2 unreadable");
+
+	pri = FIELD_GET(ICH_VTR_EL2_PRIbits, val) + 1;
+	TEST_ASSERT(pri >= 5 && pri <= 7, "Bad pribits %d", pri);
+
+	return pri;
+}
+
+static int check_unaccessible_el2_regs(int gic, const struct sr_def *sr, const char *what)
+{
+	switch (sr->encoding) {
+	case SYS_ICH_AP0R1_EL2:
+	case SYS_ICH_AP1R1_EL2:
+		if (get_vtr_pribits(gic) >= 6)
+			return -EINVAL;
+		break;
+	case SYS_ICH_AP0R2_EL2:
+	case SYS_ICH_AP0R3_EL2:
+	case SYS_ICH_AP1R2_EL2:
+	case SYS_ICH_AP1R3_EL2:
+		if (get_vtr_pribits(gic) == 7)
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	pr_info("SKIP %s for %s\n", sr->name, what);
+	return 0;
+}
+
+static void test_v3_sysregs(void)
+{
+	struct kvm_vcpu_init init = {};
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	u32 feat = 0;
+	int gic;
+
+	if (kvm_check_cap(KVM_CAP_ARM_EL2))
+		feat |= BIT(KVM_ARM_VCPU_HAS_EL2);
+
+	vm = vm_create(1);
+
+	vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+	init.features[0] |= feat;
+
+	vcpu = aarch64_vcpu_add(vm, 0, &init, NULL);
+	TEST_ASSERT(vcpu, "Can't create a vcpu?");
+
+	gic = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+	TEST_ASSERT(gic >= 0, "No GIC???");
+
+	kvm_device_attr_set(gic, KVM_DEV_ARM_VGIC_GRP_CTRL,
+			    KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+	test_sysreg_array(gic, sysregs_el1, ARRAY_SIZE(sysregs_el1), check_unaccessible_el1_regs);
+	if (feat)
+		test_sysreg_array(gic, sysregs_el2, ARRAY_SIZE(sysregs_el2), check_unaccessible_el2_regs);
+	else
+		pr_info("SKIP EL2 registers, not available\n");
+
+	close(gic);
+	kvm_vm_free(vm);
+}
+
 void run_tests(uint32_t gic_dev_type)
 {
 	test_vcpus_then_vgic(gic_dev_type);
@@ -730,6 +983,8 @@ void run_tests(uint32_t gic_dev_type)
 		test_v3_last_bit_single_rdist();
 		test_v3_redist_ipa_range_check_at_vcpu_run();
 		test_v3_its_region();
+		test_v3_sysregs();
+		test_v3_nassgicap();
 	}
 }
 
diff --git a/tools/testing/selftests/kvm/arm64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c
index f4ac28d53747..a09dd423c2d7 100644
--- a/tools/testing/selftests/kvm/arm64/vgic_irq.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c
@@ -620,18 +620,12 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
 	 * that no actual interrupt was injected for those cases.
 	 */
 
-	for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
-		fd[f] = eventfd(0, 0);
-		TEST_ASSERT(fd[f] != -1, __KVM_SYSCALL_ERROR("eventfd()", fd[f]));
-	}
+	for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++)
+		fd[f] = kvm_new_eventfd();
 
 	for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
-		struct kvm_irqfd irqfd = {
-			.fd  = fd[f],
-			.gsi = i - MIN_SPI,
-		};
 		assert(i <= (uint64_t)UINT_MAX);
-		vm_ioctl(vm, KVM_IRQFD, &irqfd);
+		kvm_assign_irqfd(vm, i - MIN_SPI, fd[f]);
 	}
 
 	for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
diff --git a/tools/testing/selftests/kvm/config b/tools/testing/selftests/kvm/config
index 8835fed09e9f..96d874b239eb 100644
--- a/tools/testing/selftests/kvm/config
+++ b/tools/testing/selftests/kvm/config
@@ -1,5 +1,6 @@
 CONFIG_KVM=y
 CONFIG_KVM_INTEL=y
 CONFIG_KVM_AMD=y
+CONFIG_EVENTFD=y
 CONFIG_USERFAULTFD=y
 CONFIG_IDLE_PAGE_TRACKING=y
diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h
index b0fc0f945766..255fed769a8a 100644
--- a/tools/testing/selftests/kvm/include/arm64/processor.h
+++ b/tools/testing/selftests/kvm/include/arm64/processor.h
@@ -254,6 +254,16 @@ static inline void local_irq_disable(void)
 	asm volatile("msr daifset, #3" : : : "memory");
 }
 
+static inline void local_serror_enable(void)
+{
+	asm volatile("msr daifclr, #4" : : : "memory");
+}
+
+static inline void local_serror_disable(void)
+{
+	asm volatile("msr daifset, #4" : : : "memory");
+}
+
 /**
  * struct arm_smccc_res - Result from SMC/HVC call
  * @a0-a3 result values from registers 0 to 3
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index bee65ca08721..23a506d7eca3 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -18,8 +18,11 @@
 #include <asm/atomic.h>
 #include <asm/kvm.h>
 
+#include <sys/eventfd.h>
 #include <sys/ioctl.h>
 
+#include <pthread.h>
+
 #include "kvm_util_arch.h"
 #include "kvm_util_types.h"
 #include "sparsebit.h"
@@ -253,6 +256,7 @@ struct vm_guest_mode_params {
 };
 extern const struct vm_guest_mode_params vm_guest_mode_params[];
 
+int __open_path_or_exit(const char *path, int flags, const char *enoent_help);
 int open_path_or_exit(const char *path, int flags);
 int open_kvm_dev_path_or_exit(void);
 
@@ -502,6 +506,45 @@ static inline int vm_get_stats_fd(struct kvm_vm *vm)
 	return fd;
 }
 
+static inline int __kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd,
+			      uint32_t flags)
+{
+	struct kvm_irqfd irqfd = {
+		.fd = eventfd,
+		.gsi = gsi,
+		.flags = flags,
+		.resamplefd = -1,
+	};
+
+	return __vm_ioctl(vm, KVM_IRQFD, &irqfd);
+}
+
+static inline void kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd,
+			      uint32_t flags)
+{
+	int ret = __kvm_irqfd(vm, gsi, eventfd, flags);
+
+	TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_IRQFD, ret, vm);
+}
+
+static inline void kvm_assign_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd)
+{
+	kvm_irqfd(vm, gsi, eventfd, 0);
+}
+
+static inline void kvm_deassign_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd)
+{
+	kvm_irqfd(vm, gsi, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
+}
+
+static inline int kvm_new_eventfd(void)
+{
+	int fd = eventfd(0, 0);
+
+	TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("eventfd()", fd));
+	return fd;
+}
+
 static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header)
 {
 	ssize_t ret;
@@ -1013,7 +1056,34 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm);
 
 void kvm_set_files_rlimit(uint32_t nr_vcpus);
 
-void kvm_pin_this_task_to_pcpu(uint32_t pcpu);
+int __pin_task_to_cpu(pthread_t task, int cpu);
+
+static inline void pin_task_to_cpu(pthread_t task, int cpu)
+{
+	int r;
+
+	r = __pin_task_to_cpu(task, cpu);
+	TEST_ASSERT(!r, "Failed to set thread affinity to pCPU '%u'", cpu);
+}
+
+static inline int pin_task_to_any_cpu(pthread_t task)
+{
+	int cpu = sched_getcpu();
+
+	pin_task_to_cpu(task, cpu);
+	return cpu;
+}
+
+static inline void pin_self_to_cpu(int cpu)
+{
+	pin_task_to_cpu(pthread_self(), cpu);
+}
+
+static inline int pin_self_to_any_cpu(void)
+{
+	return pin_task_to_any_cpu(pthread_self());
+}
+
 void kvm_print_vcpu_pinning_help(void);
 void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
 			    int nr_vcpus);
diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
index b11b5a53ebd5..2efb05c2f2fb 100644
--- a/tools/testing/selftests/kvm/include/x86/processor.h
+++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -1150,7 +1150,6 @@ do {											\
 
 void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
 void kvm_init_vm_address_properties(struct kvm_vm *vm);
-bool vm_is_unrestricted_guest(struct kvm_vm *vm);
 
 struct ex_regs {
 	uint64_t rax, rcx, rdx, rbx;
@@ -1325,6 +1324,11 @@ static inline bool kvm_is_forced_emulation_enabled(void)
 	return !!get_kvm_param_integer("force_emulation_prefix");
 }
 
+static inline bool kvm_is_unrestricted_guest_enabled(void)
+{
+	return get_kvm_intel_param_bool("unrestricted_guest");
+}
+
 uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
 				    int *level);
 uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr);
diff --git a/tools/testing/selftests/kvm/irqfd_test.c b/tools/testing/selftests/kvm/irqfd_test.c
new file mode 100644
index 000000000000..7c301b4c7005
--- /dev/null
+++ b/tools/testing/selftests/kvm/irqfd_test.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <stdint.h>
+#include <sys/sysinfo.h>
+
+#include "kvm_util.h"
+
+static struct kvm_vm *vm1;
+static struct kvm_vm *vm2;
+static int __eventfd;
+static bool done;
+
+/*
+ * KVM de-assigns based on eventfd *and* GSI, but requires unique eventfds when
+ * assigning (the API isn't symmetrical).  Abuse the oddity and use a per-task
+ * GSI base to avoid false failures due to cross-task de-assign, i.e. so that
+ * the secondary doesn't de-assign the primary's eventfd and cause assign to
+ * unexpectedly succeed on the primary.
+ */
+#define GSI_BASE_PRIMARY	0x20
+#define GSI_BASE_SECONDARY	0x30
+
+static void juggle_eventfd_secondary(struct kvm_vm *vm, int eventfd)
+{
+	int r, i;
+
+	/*
+	 * The secondary task can encounter EBADF since the primary can close
+	 * the eventfd at any time.  And because the primary can recreate the
+	 * eventfd, at the safe fd in the file table, the secondary can also
+	 * encounter "unexpected" success, e.g. if the close+recreate happens
+	 * between the first and second assignments.  The secondary's role is
+	 * mostly to antagonize KVM, not to detect bugs.
+	 */
+	for (i = 0; i < 2; i++) {
+		r = __kvm_irqfd(vm, GSI_BASE_SECONDARY, eventfd, 0);
+		TEST_ASSERT(!r || errno == EBUSY || errno == EBADF,
+			    "Wanted success, EBUSY, or EBADF, r = %d, errno = %d",
+			    r, errno);
+
+		/* De-assign should succeed unless the eventfd was closed. */
+		r = __kvm_irqfd(vm, GSI_BASE_SECONDARY + i, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
+		TEST_ASSERT(!r || errno == EBADF,
+			    "De-assign should succeed unless the fd was closed");
+	}
+}
+
+static void *secondary_irqfd_juggler(void *ign)
+{
+	while (!READ_ONCE(done)) {
+		juggle_eventfd_secondary(vm1, READ_ONCE(__eventfd));
+		juggle_eventfd_secondary(vm2, READ_ONCE(__eventfd));
+	}
+
+	return NULL;
+}
+
+static void juggle_eventfd_primary(struct kvm_vm *vm, int eventfd)
+{
+	int r1, r2;
+
+	/*
+	 * At least one of the assigns should fail.  KVM disallows assigning a
+	 * single eventfd to multiple GSIs (or VMs), so it's possible that both
+	 * assignments can fail, too.
+	 */
+	r1 = __kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, 0);
+	TEST_ASSERT(!r1 || errno == EBUSY,
+		    "Wanted success or EBUSY, r = %d, errno = %d", r1, errno);
+
+	r2 = __kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, 0);
+	TEST_ASSERT(r1 || (r2 && errno == EBUSY),
+		    "Wanted failure (EBUSY), r1 = %d, r2 = %d, errno = %d",
+		    r1, r2, errno);
+
+	/*
+	 * De-assign should always succeed, even if the corresponding assign
+	 * failed.
+	 */
+	kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
+	kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
+}
+
+int main(int argc, char *argv[])
+{
+	pthread_t racing_thread;
+	int r, i;
+
+	/* Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. */
+	vm1 = vm_create(1);
+	vm2 = vm_create(1);
+
+	WRITE_ONCE(__eventfd, kvm_new_eventfd());
+
+	kvm_irqfd(vm1, 10, __eventfd, 0);
+
+	r = __kvm_irqfd(vm1, 11, __eventfd, 0);
+	TEST_ASSERT(r && errno == EBUSY,
+		    "Wanted EBUSY, r = %d, errno = %d", r, errno);
+
+	r = __kvm_irqfd(vm2, 12, __eventfd, 0);
+	TEST_ASSERT(r && errno == EBUSY,
+		    "Wanted EBUSY, r = %d, errno = %d", r, errno);
+
+	/*
+	 * De-assign all eventfds, along with multiple eventfds that were never
+	 * assigned.  KVM's ABI is that de-assign is allowed so long as the
+	 * eventfd itself is valid.
+	 */
+	kvm_irqfd(vm1, 11, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+	kvm_irqfd(vm1, 12, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+	kvm_irqfd(vm1, 13, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+	kvm_irqfd(vm1, 14, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+	kvm_irqfd(vm1, 10, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+
+	close(__eventfd);
+
+	pthread_create(&racing_thread, NULL, secondary_irqfd_juggler, vm2);
+
+	for (i = 0; i < 10000; i++) {
+		WRITE_ONCE(__eventfd, kvm_new_eventfd());
+
+		juggle_eventfd_primary(vm1, __eventfd);
+		juggle_eventfd_primary(vm2, __eventfd);
+		close(__eventfd);
+	}
+
+	WRITE_ONCE(done, true);
+	pthread_join(racing_thread, NULL);
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index a055343a7bf7..c3f5142b0a54 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -26,15 +26,27 @@ static uint32_t last_guest_seed;
 
 static int vcpu_mmap_sz(void);
 
-int open_path_or_exit(const char *path, int flags)
+int __open_path_or_exit(const char *path, int flags, const char *enoent_help)
 {
 	int fd;
 
 	fd = open(path, flags);
-	__TEST_REQUIRE(fd >= 0 || errno != ENOENT, "Cannot open %s: %s", path, strerror(errno));
-	TEST_ASSERT(fd >= 0, "Failed to open '%s'", path);
+	if (fd < 0)
+		goto error;
 
 	return fd;
+
+error:
+	if (errno == EACCES || errno == ENOENT)
+		ksft_exit_skip("- Cannot open '%s': %s.  %s\n",
+			       path, strerror(errno),
+			       errno == EACCES ? "Root required?" : enoent_help);
+	TEST_FAIL("Failed to open '%s'", path);
+}
+
+int open_path_or_exit(const char *path, int flags)
+{
+	return __open_path_or_exit(path, flags, "");
 }
 
 /*
@@ -48,7 +60,7 @@ int open_path_or_exit(const char *path, int flags)
  */
 static int _open_kvm_dev_path_or_exit(int flags)
 {
-	return open_path_or_exit(KVM_DEV_PATH, flags);
+	return __open_path_or_exit(KVM_DEV_PATH, flags, "Is KVM loaded and enabled?");
 }
 
 int open_kvm_dev_path_or_exit(void)
@@ -64,6 +76,9 @@ static ssize_t get_module_param(const char *module_name, const char *param,
 	ssize_t bytes_read;
 	int fd, r;
 
+	/* Verify KVM is loaded, to provide a more helpful SKIP message. */
+	close(open_kvm_dev_path_or_exit());
+
 	r = snprintf(path, path_size, "/sys/module/%s/parameters/%s",
 		     module_name, param);
 	TEST_ASSERT(r < path_size,
@@ -605,15 +620,14 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm)
 	return vm_vcpu_recreate(vm, 0);
 }
 
-void kvm_pin_this_task_to_pcpu(uint32_t pcpu)
+int __pin_task_to_cpu(pthread_t task, int cpu)
 {
-	cpu_set_t mask;
-	int r;
+	cpu_set_t cpuset;
+
+	CPU_ZERO(&cpuset);
+	CPU_SET(cpu, &cpuset);
 
-	CPU_ZERO(&mask);
-	CPU_SET(pcpu, &mask);
-	r = sched_setaffinity(0, sizeof(mask), &mask);
-	TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.", pcpu);
+	return pthread_setaffinity_np(task, sizeof(cpuset), &cpuset);
 }
 
 static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask)
@@ -667,7 +681,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
 
 	/* 2. Check if the main worker needs to be pinned. */
 	if (cpu) {
-		kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask));
+		pin_self_to_cpu(parse_pcpu(cpu, &allowed_mask));
 		cpu = strtok(NULL, delim);
 	}
 
@@ -1716,7 +1730,18 @@ void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa)
 /* Create an interrupt controller chip for the specified VM. */
 void vm_create_irqchip(struct kvm_vm *vm)
 {
-	vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL);
+	int r;
+
+	/*
+	 * Allocate a fully in-kernel IRQ chip by default, but fall back to a
+	 * split model (x86 only) if that fails (KVM x86 allows compiling out
+	 * support for KVM_CREATE_IRQCHIP).
+	 */
+	r = __vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL);
+	if (r && errno == ENOTTY && kvm_has_cap(KVM_CAP_SPLIT_IRQCHIP))
+		vm_enable_cap(vm, KVM_CAP_SPLIT_IRQCHIP, 24);
+	else
+		TEST_ASSERT_VM_VCPU_IOCTL(!r, KVM_CREATE_IRQCHIP, r, vm);
 
 	vm->has_irqchip = true;
 }
diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c
index 313277486a1d..557c0a0a5658 100644
--- a/tools/testing/selftests/kvm/lib/memstress.c
+++ b/tools/testing/selftests/kvm/lib/memstress.c
@@ -265,7 +265,7 @@ static void *vcpu_thread_main(void *data)
 	int vcpu_idx = vcpu->vcpu_idx;
 
 	if (memstress_args.pin_vcpus)
-		kvm_pin_this_task_to_pcpu(memstress_args.vcpu_to_pcpu[vcpu_idx]);
+		pin_self_to_cpu(memstress_args.vcpu_to_pcpu[vcpu_idx]);
 
 	WRITE_ONCE(vcpu->running, true);
 
diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c
index cfed9d26cc71..a99188f87a38 100644
--- a/tools/testing/selftests/kvm/lib/sparsebit.c
+++ b/tools/testing/selftests/kvm/lib/sparsebit.c
@@ -116,7 +116,7 @@
  *
  *   + A node with all mask bits set only occurs when the last bit
  *     described by the previous node is not equal to this nodes
- *     starting index - 1.  All such occurences of this condition are
+ *     starting index - 1.  All such occurrences of this condition are
  *     avoided by moving the setting of the nodes mask bits into
  *     the previous nodes num_after setting.
  *
@@ -592,7 +592,7 @@ static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx)
  *
  *   + A node with all mask bits set only occurs when the last bit
  *     described by the previous node is not equal to this nodes
- *     starting index - 1.  All such occurences of this condition are
+ *     starting index - 1.  All such occurrences of this condition are
  *     avoided by moving the setting of the nodes mask bits into
  *     the previous nodes num_after setting.
  */
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
index a92dc1dad085..d4c19ac885a9 100644
--- a/tools/testing/selftests/kvm/lib/x86/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86/processor.c
@@ -1264,16 +1264,6 @@ done:
 	return min(max_gfn, ht_gfn - 1);
 }
 
-/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */
-bool vm_is_unrestricted_guest(struct kvm_vm *vm)
-{
-	/* Ensure that a KVM vendor-specific module is loaded. */
-	if (vm == NULL)
-		close(open_kvm_dev_path_or_exit());
-
-	return get_kvm_intel_param_bool("unrestricted_guest");
-}
-
 void kvm_selftest_arch_init(void)
 {
 	host_cpu_is_intel = this_cpu_is_intel();
diff --git a/tools/testing/selftests/kvm/x86/aperfmperf_test.c b/tools/testing/selftests/kvm/x86/aperfmperf_test.c
new file mode 100644
index 000000000000..8b15a13df939
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/aperfmperf_test.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for KVM_X86_DISABLE_EXITS_APERFMPERF
+ *
+ * Copyright (C) 2025, Google LLC.
+ *
+ * Test the ability to disable VM-exits for rdmsr of IA32_APERF and
+ * IA32_MPERF. When these VM-exits are disabled, reads of these MSRs
+ * return the host's values.
+ *
+ * Note: Requires read access to /dev/cpu/<lpu>/msr to read host MSRs.
+ */
+
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <asm/msr-index.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "test_util.h"
+#include "vmx.h"
+
+#define NUM_ITERATIONS 10000
+
+static int open_dev_msr(int cpu)
+{
+	char path[PATH_MAX];
+
+	snprintf(path, sizeof(path), "/dev/cpu/%d/msr", cpu);
+	return open_path_or_exit(path, O_RDONLY);
+}
+
+static uint64_t read_dev_msr(int msr_fd, uint32_t msr)
+{
+	uint64_t data;
+	ssize_t rc;
+
+	rc = pread(msr_fd, &data, sizeof(data), msr);
+	TEST_ASSERT(rc == sizeof(data), "Read of MSR 0x%x failed", msr);
+
+	return data;
+}
+
+static void guest_read_aperf_mperf(void)
+{
+	int i;
+
+	for (i = 0; i < NUM_ITERATIONS; i++)
+		GUEST_SYNC2(rdmsr(MSR_IA32_APERF), rdmsr(MSR_IA32_MPERF));
+}
+
+#define L2_GUEST_STACK_SIZE	64
+
+static void l2_guest_code(void)
+{
+	guest_read_aperf_mperf();
+	GUEST_DONE();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	struct vmcb *vmcb = svm->vmcb;
+
+	generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+	run_guest(vmcb, svm->vmcb_gpa);
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+	GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
+	GUEST_ASSERT_EQ(load_vmcs(vmx), true);
+
+	prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	/*
+	 * Enable MSR bitmaps (the bitmap itself is allocated, zeroed, and set
+	 * in the VMCS by prepare_vmcs()), as MSR exiting mandatory on Intel.
+	 */
+	vmwrite(CPU_BASED_VM_EXEC_CONTROL,
+		vmreadz(CPU_BASED_VM_EXEC_CONTROL) | CPU_BASED_USE_MSR_BITMAPS);
+
+	GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code));
+	GUEST_ASSERT(!vmlaunch());
+}
+
+static void guest_code(void *nested_test_data)
+{
+	guest_read_aperf_mperf();
+
+	if (this_cpu_has(X86_FEATURE_SVM))
+		l1_svm_code(nested_test_data);
+	else if (this_cpu_has(X86_FEATURE_VMX))
+		l1_vmx_code(nested_test_data);
+	else
+		GUEST_DONE();
+
+	TEST_FAIL("L2 should have signaled 'done'");
+}
+
+static void guest_no_aperfmperf(void)
+{
+	uint64_t msr_val;
+	uint8_t vector;
+
+	vector = rdmsr_safe(MSR_IA32_APERF, &msr_val);
+	GUEST_ASSERT(vector == GP_VECTOR);
+
+	vector = rdmsr_safe(MSR_IA32_APERF, &msr_val);
+	GUEST_ASSERT(vector == GP_VECTOR);
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX);
+	uint64_t host_aperf_before, host_mperf_before;
+	vm_vaddr_t nested_test_data_gva;
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	int msr_fd, cpu, i;
+
+	/* Sanity check that APERF/MPERF are unsupported by default. */
+	vm = vm_create_with_one_vcpu(&vcpu, guest_no_aperfmperf);
+	vcpu_run(vcpu);
+	TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+	kvm_vm_free(vm);
+
+	cpu = pin_self_to_any_cpu();
+
+	msr_fd = open_dev_msr(cpu);
+
+	/*
+	 * This test requires a non-standard VM initialization, because
+	 * KVM_ENABLE_CAP cannot be used on a VM file descriptor after
+	 * a VCPU has been created.
+	 */
+	vm = vm_create(1);
+
+	TEST_REQUIRE(vm_check_cap(vm, KVM_CAP_X86_DISABLE_EXITS) &
+		     KVM_X86_DISABLE_EXITS_APERFMPERF);
+
+	vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS,
+		      KVM_X86_DISABLE_EXITS_APERFMPERF);
+
+	vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+	if (!has_nested)
+		nested_test_data_gva = NONCANONICAL;
+	else if (kvm_cpu_has(X86_FEATURE_SVM))
+		vcpu_alloc_svm(vm, &nested_test_data_gva);
+	else
+		vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+	vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+	host_aperf_before = read_dev_msr(msr_fd, MSR_IA32_APERF);
+	host_mperf_before = read_dev_msr(msr_fd, MSR_IA32_MPERF);
+
+	for (i = 0; i <= NUM_ITERATIONS * (1 + has_nested); i++) {
+		uint64_t host_aperf_after, host_mperf_after;
+		uint64_t guest_aperf, guest_mperf;
+		struct ucall uc;
+
+		vcpu_run(vcpu);
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_DONE:
+			goto done;
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT(uc);
+		case UCALL_SYNC:
+			guest_aperf = uc.args[0];
+			guest_mperf = uc.args[1];
+
+			host_aperf_after = read_dev_msr(msr_fd, MSR_IA32_APERF);
+			host_mperf_after = read_dev_msr(msr_fd, MSR_IA32_MPERF);
+
+			TEST_ASSERT(host_aperf_before < guest_aperf,
+				    "APERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")",
+				    host_aperf_before, guest_aperf);
+			TEST_ASSERT(guest_aperf < host_aperf_after,
+				    "APERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")",
+				    guest_aperf, host_aperf_after);
+			TEST_ASSERT(host_mperf_before < guest_mperf,
+				    "MPERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")",
+				    host_mperf_before, guest_mperf);
+			TEST_ASSERT(guest_mperf < host_mperf_after,
+				    "MPERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")",
+				    guest_mperf, host_mperf_after);
+
+			host_aperf_before = host_aperf_after;
+			host_mperf_before = host_mperf_after;
+
+			break;
+		}
+	}
+	TEST_FAIL("Didn't receive UCALL_DONE\n");
+done:
+	kvm_vm_free(vm);
+	close(msr_fd);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
index 32b2794b78fe..8463a9956410 100644
--- a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
+++ b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
@@ -343,6 +343,12 @@ static void guest_code_permission_bitmap(void)
 	data = test_rdmsr(MSR_GS_BASE);
 	GUEST_ASSERT(data == MSR_GS_BASE);
 
+	/* Access the MSRs again to ensure KVM has disabled interception.*/
+	data = test_rdmsr(MSR_FS_BASE);
+	GUEST_ASSERT(data != MSR_FS_BASE);
+	data = test_rdmsr(MSR_GS_BASE);
+	GUEST_ASSERT(data != MSR_GS_BASE);
+
 	GUEST_DONE();
 }
 
@@ -682,6 +688,8 @@ KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap)
 		    "Expected ucall state to be UCALL_SYNC.");
 	vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs);
 	run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE);
+
+	vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow);
 	run_guest_then_process_ucall_done(vcpu);
 }
 
diff --git a/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
index 3fd6eceab46f..2cae86d9d5e2 100644
--- a/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
+++ b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
@@ -110,7 +110,7 @@ int main(int argc, char *argv[])
 	struct kvm_vm *vm;
 
 	TEST_REQUIRE(host_cpu_is_intel);
-	TEST_REQUIRE(!vm_is_unrestricted_guest(NULL));
+	TEST_REQUIRE(!kvm_is_unrestricted_guest_enabled());
 
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
 	get_set_sigalrm_vcpu(vcpu);
diff --git a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
index 287829f850f7..23909b501ac2 100644
--- a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
@@ -547,15 +547,9 @@ int main(int argc, char *argv[])
 	int irq_fd[2] = { -1, -1 };
 
 	if (do_eventfd_tests) {
-		irq_fd[0] = eventfd(0, 0);
-		irq_fd[1] = eventfd(0, 0);
+		irq_fd[0] = kvm_new_eventfd();
+		irq_fd[1] = kvm_new_eventfd();
 
-		/* Unexpected, but not a KVM failure */
-		if (irq_fd[0] == -1 || irq_fd[1] == -1)
-			do_evtchn_tests = do_eventfd_tests = false;
-	}
-
-	if (do_eventfd_tests) {
 		irq_routes.info.nr = 2;
 
 		irq_routes.entries[0].gsi = 32;
@@ -572,15 +566,8 @@ int main(int argc, char *argv[])
 
 		vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info);
 
-		struct kvm_irqfd ifd = { };
-
-		ifd.fd = irq_fd[0];
-		ifd.gsi = 32;
-		vm_ioctl(vm, KVM_IRQFD, &ifd);
-
-		ifd.fd = irq_fd[1];
-		ifd.gsi = 33;
-		vm_ioctl(vm, KVM_IRQFD, &ifd);
+		kvm_assign_irqfd(vm, 32, irq_fd[0]);
+		kvm_assign_irqfd(vm, 33, irq_fd[1]);
 
 		struct sigaction sa = { };
 		sa.sa_handler = handle_alrm;
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
index 824266982aa3..f2dafa0b700b 100644
--- a/tools/testing/selftests/mm/.gitignore
+++ b/tools/testing/selftests/mm/.gitignore
@@ -38,9 +38,6 @@ map_fixed_noreplace
 write_to_hugetlbfs
 hmm-tests
 memfd_secret
-hugetlb_dio
-pkey_sighandler_tests_32
-pkey_sighandler_tests_64
 soft-dirty
 split_huge_page_test
 ksm_tests
diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c
index dbbcc5eb3dce..d30625c18259 100644
--- a/tools/testing/selftests/mm/cow.c
+++ b/tools/testing/selftests/mm/cow.c
@@ -72,31 +72,6 @@ static int detect_thp_sizes(size_t sizes[], int max)
 	return count;
 }
 
-static void detect_huge_zeropage(void)
-{
-	int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page",
-		      O_RDONLY);
-	size_t enabled = 0;
-	char buf[15];
-	int ret;
-
-	if (fd < 0)
-		return;
-
-	ret = pread(fd, buf, sizeof(buf), 0);
-	if (ret > 0 && ret < sizeof(buf)) {
-		buf[ret] = 0;
-
-		enabled = strtoul(buf, NULL, 10);
-		if (enabled == 1) {
-			has_huge_zeropage = true;
-			ksft_print_msg("[INFO] huge zeropage is enabled\n");
-		}
-	}
-
-	close(fd);
-}
-
 static bool range_is_swapped(void *addr, size_t size)
 {
 	for (; size; addr += pagesize, size -= pagesize)
@@ -113,11 +88,11 @@ struct comm_pipes {
 static int setup_comm_pipes(struct comm_pipes *comm_pipes)
 {
 	if (pipe(comm_pipes->child_ready) < 0) {
-		ksft_perror("pipe()");
+		ksft_perror("pipe() failed");
 		return -errno;
 	}
 	if (pipe(comm_pipes->parent_ready) < 0) {
-		ksft_perror("pipe()");
+		ksft_perror("pipe() failed");
 		close(comm_pipes->child_ready[0]);
 		close(comm_pipes->child_ready[1]);
 		return -errno;
@@ -268,8 +243,10 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
 		 * fail because (a) harder to fix and (b) nobody really cares.
 		 * Flag them as expected failure for now.
 		 */
+		ksft_print_msg("Leak from parent into child\n");
 		log_test_result(KSFT_XFAIL);
 	} else {
+		ksft_print_msg("Leak from parent into child\n");
 		log_test_result(KSFT_FAIL);
 	}
 close_comm_pipes:
@@ -332,7 +309,7 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
 	if (before_fork) {
 		transferred = vmsplice(fds[1], &iov, 1, 0);
 		if (transferred <= 0) {
-			ksft_print_msg("vmsplice() failed\n");
+			ksft_perror("vmsplice() failed\n");
 			log_test_result(KSFT_FAIL);
 			goto close_pipe;
 		}
@@ -397,8 +374,10 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
 		 * fail because (a) harder to fix and (b) nobody really cares.
 		 * Flag them as expected failure for now.
 		 */
+		ksft_print_msg("Leak from child into parent\n");
 		log_test_result(KSFT_XFAIL);
 	} else {
+		ksft_print_msg("Leak from child into parent\n");
 		log_test_result(KSFT_FAIL);
 	}
 close_pipe:
@@ -562,7 +541,7 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
 	while (total < size) {
 		cur = pread(fd, tmp + total, size - total, total);
 		if (cur < 0) {
-			ksft_print_msg("pread() failed\n");
+			ksft_perror("pread() failed\n");
 			log_test_result(KSFT_FAIL);
 			goto quit_child;
 		}
@@ -570,10 +549,12 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork)
 	}
 
 	/* Finally, check if we read what we expected. */
-	if (!memcmp(mem, tmp, size))
+	if (!memcmp(mem, tmp, size)) {
 		log_test_result(KSFT_PASS);
-	else
+	} else {
+		ksft_print_msg("Longtom R/W pin is not reliable\n");
 		log_test_result(KSFT_FAIL);
+	}
 
 quit_child:
 	if (use_fork) {
@@ -628,7 +609,7 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
 
 	tmp = malloc(size);
 	if (!tmp) {
-		ksft_print_msg("malloc() failed\n");
+		ksft_perror("malloc() failed\n");
 		log_test_result(KSFT_FAIL);
 		return;
 	}
@@ -725,10 +706,12 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
 		ksft_perror("PIN_LONGTERM_TEST_READ failed");
 		log_test_result(KSFT_FAIL);
 	} else {
-		if (!memcmp(mem, tmp, size))
+		if (!memcmp(mem, tmp, size)) {
 			log_test_result(KSFT_PASS);
-		else
+		} else {
+			ksft_print_msg("Longterm R/O pin is not reliable\n");
 			log_test_result(KSFT_FAIL);
+		}
 	}
 
 	ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP);
@@ -1417,10 +1400,12 @@ static void do_test_anon_thp_collapse(char *mem, size_t size,
 	else
 		ret = -EINVAL;
 
-	if (!ret)
+	if (!ret) {
 		log_test_result(KSFT_PASS);
-	else
+	} else {
+		ksft_print_msg("Leak from parent into child\n");
 		log_test_result(KSFT_FAIL);
+	}
 close_comm_pipes:
 	close_comm_pipes(&comm_pipes);
 }
@@ -1528,10 +1513,12 @@ static void test_cow(char *mem, const char *smem, size_t size)
 	memset(mem, 0xff, size);
 
 	/* See if we still read the old values via the other mapping. */
-	if (!memcmp(smem, old, size))
+	if (!memcmp(smem, old, size)) {
 		log_test_result(KSFT_PASS);
-	else
+	} else {
+		ksft_print_msg("Other mapping modified\n");
 		log_test_result(KSFT_FAIL);
+	}
 	free(old);
 }
 
@@ -1547,7 +1534,7 @@ static void test_ro_fast_pin(char *mem, const char *smem, size_t size)
 
 static void run_with_zeropage(non_anon_test_fn fn, const char *desc)
 {
-	char *mem, *smem, tmp;
+	char *mem, *smem;
 
 	log_test_start("%s ... with shared zeropage", desc);
 
@@ -1567,8 +1554,8 @@ static void run_with_zeropage(non_anon_test_fn fn, const char *desc)
 	}
 
 	/* Read from the page to populate the shared zeropage. */
-	tmp = *mem + *smem;
-	asm volatile("" : "+r" (tmp));
+	FORCE_READ(mem);
+	FORCE_READ(smem);
 
 	fn(mem, smem, pagesize);
 munmap:
@@ -1579,7 +1566,7 @@ munmap:
 
 static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
 {
-	char *mem, *smem, *mmap_mem, *mmap_smem, tmp;
+	char *mem, *smem, *mmap_mem, *mmap_smem;
 	size_t mmap_size;
 	int ret;
 
@@ -1613,13 +1600,13 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
 	smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1));
 
 	ret = madvise(mem, pmdsize, MADV_HUGEPAGE);
-	if (ret != 0) {
+	if (ret) {
 		ksft_perror("madvise()");
 		log_test_result(KSFT_FAIL);
 		goto munmap;
 	}
-	ret |= madvise(smem, pmdsize, MADV_HUGEPAGE);
-	if (ret != 0) {
+	ret = madvise(smem, pmdsize, MADV_HUGEPAGE);
+	if (ret) {
 		ksft_perror("madvise()");
 		log_test_result(KSFT_FAIL);
 		goto munmap;
@@ -1630,8 +1617,8 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
 	 * the first sub-page and test if we get another sub-page populated
 	 * automatically.
 	 */
-	tmp = *mem + *smem;
-	asm volatile("" : "+r" (tmp));
+	FORCE_READ(mem);
+	FORCE_READ(smem);
 	if (!pagemap_is_populated(pagemap_fd, mem + pagesize) ||
 	    !pagemap_is_populated(pagemap_fd, smem + pagesize)) {
 		ksft_test_result_skip("Did not get THPs populated\n");
@@ -1647,7 +1634,7 @@ munmap:
 
 static void run_with_memfd(non_anon_test_fn fn, const char *desc)
 {
-	char *mem, *smem, tmp;
+	char *mem, *smem;
 	int fd;
 
 	log_test_start("%s ... with memfd", desc);
@@ -1681,8 +1668,8 @@ static void run_with_memfd(non_anon_test_fn fn, const char *desc)
 	}
 
 	/* Fault the page in. */
-	tmp = *mem + *smem;
-	asm volatile("" : "+r" (tmp));
+	FORCE_READ(mem);
+	FORCE_READ(smem);
 
 	fn(mem, smem, pagesize);
 munmap:
@@ -1695,7 +1682,7 @@ close:
 
 static void run_with_tmpfile(non_anon_test_fn fn, const char *desc)
 {
-	char *mem, *smem, tmp;
+	char *mem, *smem;
 	FILE *file;
 	int fd;
 
@@ -1737,8 +1724,8 @@ static void run_with_tmpfile(non_anon_test_fn fn, const char *desc)
 	}
 
 	/* Fault the page in. */
-	tmp = *mem + *smem;
-	asm volatile("" : "+r" (tmp));
+	FORCE_READ(mem);
+	FORCE_READ(smem);
 
 	fn(mem, smem, pagesize);
 munmap:
@@ -1753,7 +1740,7 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
 				   size_t hugetlbsize)
 {
 	int flags = MFD_HUGETLB;
-	char *mem, *smem, tmp;
+	char *mem, *smem;
 	int fd;
 
 	log_test_start("%s ... with memfd hugetlb (%zu kB)", desc,
@@ -1791,8 +1778,8 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
 	}
 
 	/* Fault the page in. */
-	tmp = *mem + *smem;
-	asm volatile("" : "+r" (tmp));
+	FORCE_READ(mem);
+	FORCE_READ(smem);
 
 	fn(mem, smem, hugetlbsize);
 munmap:
@@ -1891,7 +1878,7 @@ int main(int argc, char **argv)
 	}
 	nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
 						    ARRAY_SIZE(hugetlbsizes));
-	detect_huge_zeropage();
+	has_huge_zeropage = detect_huge_zeropage();
 
 	ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() +
 		      ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() +
diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c
index 93af3d3760f9..b0d42eb04e3a 100644
--- a/tools/testing/selftests/mm/guard-regions.c
+++ b/tools/testing/selftests/mm/guard-regions.c
@@ -36,13 +36,6 @@ static volatile sig_atomic_t signal_jump_set;
 static sigjmp_buf signal_jmp_buf;
 
 /*
- * Ignore the checkpatch warning, we must read from x but don't want to do
- * anything with it in order to trigger a read page fault. We therefore must use
- * volatile to stop the compiler from optimising this away.
- */
-#define FORCE_READ(x) (*(volatile typeof(x) *)x)
-
-/*
  * How is the test backing the mapping being tested?
  */
 enum backing_type {
@@ -582,7 +575,7 @@ TEST_F(guard_regions, process_madvise)
 
 	/* OK we don't have permission to do this, skip. */
 	if (count == -1 && errno == EPERM)
-		ksft_exit_skip("No process_madvise() permissions, try running as root.\n");
+		SKIP(return, "No process_madvise() permissions, try running as root.\n");
 
 	/* Returns the number of bytes advised. */
 	ASSERT_EQ(count, 6 * page_size);
diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c
index 29047d2e0c49..268dadb8ce43 100644
--- a/tools/testing/selftests/mm/gup_longterm.c
+++ b/tools/testing/selftests/mm/gup_longterm.c
@@ -114,7 +114,15 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
 	}
 
 	if (fallocate(fd, 0, 0, size)) {
-		if (size == pagesize) {
+		/*
+		 * Some filesystems (eg, NFSv3) don't support
+		 * fallocate(), report this as a skip rather than a
+		 * test failure.
+		 */
+		if (errno == EOPNOTSUPP) {
+			ksft_print_msg("fallocate() not supported by filesystem\n");
+			result = KSFT_SKIP;
+		} else if (size == pagesize) {
 			ksft_print_msg("fallocate() failed (%s)\n", strerror(errno));
 			result = KSFT_FAIL;
 		} else {
diff --git a/tools/testing/selftests/mm/hugetlb-madvise.c b/tools/testing/selftests/mm/hugetlb-madvise.c
index e74107185324..1afe14b9dc0c 100644
--- a/tools/testing/selftests/mm/hugetlb-madvise.c
+++ b/tools/testing/selftests/mm/hugetlb-madvise.c
@@ -47,14 +47,11 @@ void write_fault_pages(void *addr, unsigned long nr_pages)
 
 void read_fault_pages(void *addr, unsigned long nr_pages)
 {
-	volatile unsigned long dummy = 0;
 	unsigned long i;
 
 	for (i = 0; i < nr_pages; i++) {
-		dummy += *((unsigned long *)(addr + (i * huge_page_size)));
-
 		/* Prevent the compiler from optimizing out the entire loop: */
-		asm volatile("" : "+r" (dummy));
+		FORCE_READ(((unsigned long *)(addr + (i * huge_page_size))));
 	}
 }
 
diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c
index 8a4d34cce36b..a18c50d51141 100644
--- a/tools/testing/selftests/mm/khugepaged.c
+++ b/tools/testing/selftests/mm/khugepaged.c
@@ -561,8 +561,6 @@ static bool wait_for_scan(const char *msg, char *p, int nr_hpages,
 		usleep(TICK);
 	}
 
-	madvise(p, nr_hpages * hpage_pmd_size, MADV_NOHUGEPAGE);
-
 	return timeout == -1;
 }
 
@@ -1190,6 +1188,11 @@ int main(int argc, char **argv)
 		.read_ahead_kb = 0,
 	};
 
+	if (!thp_is_enabled()) {
+		printf("Transparent Hugepages not available\n");
+		return KSFT_SKIP;
+	}
+
 	parse_test_type(argc, argv);
 
 	setbuf(stdout, NULL);
diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c
index b61803e36d1c..d8bd1911dfc0 100644
--- a/tools/testing/selftests/mm/ksm_functional_tests.c
+++ b/tools/testing/selftests/mm/ksm_functional_tests.c
@@ -393,9 +393,13 @@ static void test_unmerge_uffd_wp(void)
 
 	/* See if UFFD-WP is around. */
 	uffdio_api.api = UFFD_API;
-	uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP;
+	uffdio_api.features = 0;
 	if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) {
-		ksft_test_result_fail("UFFDIO_API failed\n");
+		if (errno == EINVAL)
+			ksft_test_result_skip("The API version requested is not supported\n");
+		else
+			ksft_test_result_fail("UFFDIO_API failed: %s\n", strerror(errno));
+
 		goto close_uffd;
 	}
 	if (!(uffdio_api.features & UFFD_FEATURE_PAGEFAULT_FLAG_WP)) {
@@ -403,6 +407,26 @@ static void test_unmerge_uffd_wp(void)
 		goto close_uffd;
 	}
 
+	/*
+	 * UFFDIO_API must only be called once to enable features.
+	 * So we close the old userfaultfd and create a new one to
+	 * actually enable UFFD_FEATURE_PAGEFAULT_FLAG_WP.
+	 */
+	close(uffd);
+	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+	if (uffd < 0) {
+		ksft_test_result_fail("__NR_userfaultfd failed\n");
+		goto unmap;
+	}
+
+	/* Now, enable it ("two-step handshake") */
+	uffdio_api.api = UFFD_API;
+	uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP;
+	if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) {
+		ksft_test_result_fail("UFFDIO_API failed: %s\n", strerror(errno));
+		goto close_uffd;
+	}
+
 	/* Register UFFD-WP, no need for an actual handler. */
 	if (uffd_register(uffd, map, size, false, true, false)) {
 		ksft_test_result_fail("UFFDIO_REGISTER_MODE_WP failed\n");
diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c
index e80deac1436b..b77462b5c240 100644
--- a/tools/testing/selftests/mm/ksm_tests.c
+++ b/tools/testing/selftests/mm/ksm_tests.c
@@ -15,6 +15,7 @@
 #include "../kselftest.h"
 #include <include/vdso/time64.h>
 #include "vm_util.h"
+#include "thp_settings.h"
 
 #define KSM_SYSFS_PATH "/sys/kernel/mm/ksm/"
 #define KSM_FP(s) (KSM_SYSFS_PATH s)
@@ -527,6 +528,11 @@ static int ksm_merge_hugepages_time(int merge_type, int mapping, int prot,
 	unsigned long scan_time_ns;
 	int pagemap_fd, n_normal_pages, n_huge_pages;
 
+	if (!thp_is_enabled()) {
+		printf("Transparent Hugepages not available\n");
+		return KSFT_SKIP;
+	}
+
 	map_size *= MB;
 	size_t len = map_size;
 
diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c
index cc26480098ae..cc4253f47f10 100644
--- a/tools/testing/selftests/mm/merge.c
+++ b/tools/testing/selftests/mm/merge.c
@@ -2,15 +2,18 @@
 
 #define _GNU_SOURCE
 #include "../kselftest_harness.h"
+#include <linux/prctl.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <sys/mman.h>
+#include <sys/prctl.h>
 #include <sys/syscall.h>
 #include <sys/wait.h>
 #include <linux/perf_event.h>
 #include "vm_util.h"
+#include <linux/mman.h>
 
 FIXTURE(merge)
 {
@@ -23,7 +26,7 @@ FIXTURE_SETUP(merge)
 {
 	self->page_size = psize();
 	/* Carve out PROT_NONE region to map over. */
-	self->carveout = mmap(NULL, 12 * self->page_size, PROT_NONE,
+	self->carveout = mmap(NULL, 30 * self->page_size, PROT_NONE,
 			      MAP_ANON | MAP_PRIVATE, -1, 0);
 	ASSERT_NE(self->carveout, MAP_FAILED);
 	/* Setup PROCMAP_QUERY interface. */
@@ -32,8 +35,13 @@ FIXTURE_SETUP(merge)
 
 FIXTURE_TEARDOWN(merge)
 {
-	ASSERT_EQ(munmap(self->carveout, 12 * self->page_size), 0);
+	ASSERT_EQ(munmap(self->carveout, 30 * self->page_size), 0);
 	ASSERT_EQ(close_procmap(&self->procmap), 0);
+	/*
+	 * Clear unconditionally, as some tests set this. It is no issue if this
+	 * fails (KSM may be disabled for instance).
+	 */
+	prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0);
 }
 
 TEST_F(merge, mprotect_unfaulted_left)
@@ -498,4 +506,669 @@ out:
 	remove(probe_file);
 }
 
+TEST_F(merge, ksm_merge)
+{
+	unsigned int page_size = self->page_size;
+	char *carveout = self->carveout;
+	struct procmap_fd *procmap = &self->procmap;
+	char *ptr, *ptr2;
+	int err;
+
+	/*
+	 * Map two R/W immediately adjacent to one another, they should
+	 * trivially merge:
+	 *
+	 * |-----------|-----------|
+	 * |    R/W    |    R/W    |
+	 * |-----------|-----------|
+	 *      ptr         ptr2
+	 */
+
+	ptr = mmap(&carveout[page_size], page_size, PROT_READ | PROT_WRITE,
+		   MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+	ASSERT_NE(ptr, MAP_FAILED);
+	ptr2 = mmap(&carveout[2 * page_size], page_size,
+		    PROT_READ | PROT_WRITE,
+		    MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+	ASSERT_NE(ptr2, MAP_FAILED);
+	ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+	ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+	ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 2 * page_size);
+
+	/* Unmap the second half of this merged VMA. */
+	ASSERT_EQ(munmap(ptr2, page_size), 0);
+
+	/* OK, now enable global KSM merge. We clear this on test teardown. */
+	err = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
+	if (err == -1) {
+		int errnum = errno;
+
+		/* Only non-failure case... */
+		ASSERT_EQ(errnum, EINVAL);
+		/* ...but indicates we should skip. */
+		SKIP(return, "KSM memory merging not supported, skipping.");
+	}
+
+	/*
+	 * Now map a VMA adjacent to the existing that was just made
+	 * VM_MERGEABLE, this should merge as well.
+	 */
+	ptr2 = mmap(&carveout[2 * page_size], page_size,
+		    PROT_READ | PROT_WRITE,
+		    MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+	ASSERT_NE(ptr2, MAP_FAILED);
+	ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+	ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+	ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 2 * page_size);
+
+	/* Now this VMA altogether. */
+	ASSERT_EQ(munmap(ptr, 2 * page_size), 0);
+
+	/* Try the same operation as before, asserting this also merges fine. */
+	ptr = mmap(&carveout[page_size], page_size, PROT_READ | PROT_WRITE,
+		   MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+	ASSERT_NE(ptr, MAP_FAILED);
+	ptr2 = mmap(&carveout[2 * page_size], page_size,
+		    PROT_READ | PROT_WRITE,
+		    MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+	ASSERT_NE(ptr2, MAP_FAILED);
+	ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+	ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+	ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 2 * page_size);
+}
+
+TEST_F(merge, mremap_unfaulted_to_faulted)
+{
+	unsigned int page_size = self->page_size;
+	char *carveout = self->carveout;
+	struct procmap_fd *procmap = &self->procmap;
+	char *ptr, *ptr2;
+
+	/*
+	 * Map two distinct areas:
+	 *
+	 * |-----------|  |-----------|
+	 * | unfaulted |  | unfaulted |
+	 * |-----------|  |-----------|
+	 *      ptr            ptr2
+	 */
+	ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+		   MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+	ASSERT_NE(ptr, MAP_FAILED);
+	ptr2 = mmap(&carveout[7 * page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+		    MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+	ASSERT_NE(ptr2, MAP_FAILED);
+
+	/* Offset ptr2 further away. */
+	ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+			  MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000);
+	ASSERT_NE(ptr2, MAP_FAILED);
+
+	/*
+	 * Fault in ptr:
+	 *                \
+	 * |-----------|  /  |-----------|
+	 * |  faulted  |  \  | unfaulted |
+	 * |-----------|  /  |-----------|
+	 *      ptr       \       ptr2
+	 */
+	ptr[0] = 'x';
+
+	/*
+	 * Now move ptr2 adjacent to ptr:
+	 *
+	 * |-----------|-----------|
+	 * |  faulted  | unfaulted |
+	 * |-----------|-----------|
+	 *      ptr         ptr2
+	 *
+	 * It should merge:
+	 *
+	 * |----------------------|
+	 * |       faulted        |
+	 * |----------------------|
+	 *            ptr
+	 */
+	ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+			  MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]);
+	ASSERT_NE(ptr2, MAP_FAILED);
+
+	ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+	ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+	ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size);
+}
+
+TEST_F(merge, mremap_unfaulted_behind_faulted)
+{
+	unsigned int page_size = self->page_size;
+	char *carveout = self->carveout;
+	struct procmap_fd *procmap = &self->procmap;
+	char *ptr, *ptr2;
+
+	/*
+	 * Map two distinct areas:
+	 *
+	 * |-----------|  |-----------|
+	 * | unfaulted |  | unfaulted |
+	 * |-----------|  |-----------|
+	 *      ptr            ptr2
+	 */
+	ptr = mmap(&carveout[6 * page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+		   MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+	ASSERT_NE(ptr, MAP_FAILED);
+	ptr2 = mmap(&carveout[14 * page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+		    MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+	ASSERT_NE(ptr2, MAP_FAILED);
+
+	/* Offset ptr2 further away. */
+	ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+			  MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000);
+	ASSERT_NE(ptr2, MAP_FAILED);
+
+	/*
+	 * Fault in ptr:
+	 *                \
+	 * |-----------|  /  |-----------|
+	 * |  faulted  |  \  | unfaulted |
+	 * |-----------|  /  |-----------|
+	 *      ptr       \       ptr2
+	 */
+	ptr[0] = 'x';
+
+	/*
+	 * Now move ptr2 adjacent, but behind, ptr:
+	 *
+	 * |-----------|-----------|
+	 * | unfaulted |  faulted  |
+	 * |-----------|-----------|
+	 *      ptr2        ptr
+	 *
+	 * It should merge:
+	 *
+	 * |----------------------|
+	 * |       faulted        |
+	 * |----------------------|
+	 *            ptr2
+	 */
+	ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+			  MREMAP_MAYMOVE | MREMAP_FIXED, &carveout[page_size]);
+	ASSERT_NE(ptr2, MAP_FAILED);
+
+	ASSERT_TRUE(find_vma_procmap(procmap, ptr2));
+	ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2);
+	ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 10 * page_size);
+}
+
+TEST_F(merge, mremap_unfaulted_between_faulted)
+{
+	unsigned int page_size = self->page_size;
+	char *carveout = self->carveout;
+	struct procmap_fd *procmap = &self->procmap;
+	char *ptr, *ptr2, *ptr3;
+
+	/*
+	 * Map three distinct areas:
+	 *
+	 * |-----------|  |-----------|  |-----------|
+	 * | unfaulted |  | unfaulted |  | unfaulted |
+	 * |-----------|  |-----------|  |-----------|
+	 *      ptr            ptr2           ptr3
+	 */
+	ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+		   MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+	ASSERT_NE(ptr, MAP_FAILED);
+	ptr2 = mmap(&carveout[7 * page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+		    MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+	ASSERT_NE(ptr2, MAP_FAILED);
+	ptr3 = mmap(&carveout[14 * page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+		    MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+	ASSERT_NE(ptr3, MAP_FAILED);
+
+	/* Offset ptr3 further away. */
+	ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size,
+			  MREMAP_MAYMOVE | MREMAP_FIXED, ptr3 + page_size * 2000);
+	ASSERT_NE(ptr3, MAP_FAILED);
+
+	/* Offset ptr2 further away. */
+	ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+			  MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000);
+	ASSERT_NE(ptr2, MAP_FAILED);
+
+	/*
+	 * Fault in ptr, ptr3:
+	 *                \                 \
+	 * |-----------|  /  |-----------|  /  |-----------|
+	 * |  faulted  |  \  | unfaulted |  \  |  faulted  |
+	 * |-----------|  /  |-----------|  /  |-----------|
+	 *      ptr       \       ptr2      \       ptr3
+	 */
+	ptr[0] = 'x';
+	ptr3[0] = 'x';
+
+	/*
+	 * Move ptr3 back into place, leaving a place for ptr2:
+	 *                                        \
+	 * |-----------|           |-----------|  /  |-----------|
+	 * |  faulted  |           |  faulted  |  \  | unfaulted |
+	 * |-----------|           |-----------|  /  |-----------|
+	 *      ptr                     ptr3      \       ptr2
+	 */
+	ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size,
+			  MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[10 * page_size]);
+	ASSERT_NE(ptr3, MAP_FAILED);
+
+	/*
+	 * Finally, move ptr2 into place:
+	 *
+	 * |-----------|-----------|-----------|
+	 * |  faulted  | unfaulted |  faulted  |
+	 * |-----------|-----------|-----------|
+	 *      ptr        ptr2         ptr3
+	 *
+	 * It should merge, but only ptr, ptr2:
+	 *
+	 * |-----------------------|-----------|
+	 * |        faulted        | unfaulted |
+	 * |-----------------------|-----------|
+	 */
+	ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+			  MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]);
+	ASSERT_NE(ptr2, MAP_FAILED);
+
+	ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+	ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+	ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size);
+
+	ASSERT_TRUE(find_vma_procmap(procmap, ptr3));
+	ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr3);
+	ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr3 + 5 * page_size);
+}
+
+TEST_F(merge, mremap_unfaulted_between_faulted_unfaulted)
+{
+	unsigned int page_size = self->page_size;
+	char *carveout = self->carveout;
+	struct procmap_fd *procmap = &self->procmap;
+	char *ptr, *ptr2, *ptr3;
+
+	/*
+	 * Map three distinct areas:
+	 *
+	 * |-----------|  |-----------|  |-----------|
+	 * | unfaulted |  | unfaulted |  | unfaulted |
+	 * |-----------|  |-----------|  |-----------|
+	 *      ptr            ptr2           ptr3
+	 */
+	ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+		   MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+	ASSERT_NE(ptr, MAP_FAILED);
+	ptr2 = mmap(&carveout[7 * page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+		    MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+	ASSERT_NE(ptr2, MAP_FAILED);
+	ptr3 = mmap(&carveout[14 * page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+		    MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+	ASSERT_NE(ptr3, MAP_FAILED);
+
+	/* Offset ptr3 further away. */
+	ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size,
+			  MREMAP_MAYMOVE | MREMAP_FIXED, ptr3 + page_size * 2000);
+	ASSERT_NE(ptr3, MAP_FAILED);
+
+
+	/* Offset ptr2 further away. */
+	ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+			  MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000);
+	ASSERT_NE(ptr2, MAP_FAILED);
+
+	/*
+	 * Fault in ptr:
+	 *                \                 \
+	 * |-----------|  /  |-----------|  /  |-----------|
+	 * |  faulted  |  \  | unfaulted |  \  | unfaulted |
+	 * |-----------|  /  |-----------|  /  |-----------|
+	 *      ptr       \       ptr2      \       ptr3
+	 */
+	ptr[0] = 'x';
+
+	/*
+	 * Move ptr3 back into place, leaving a place for ptr2:
+	 *                                        \
+	 * |-----------|           |-----------|  /  |-----------|
+	 * |  faulted  |           | unfaulted |  \  | unfaulted |
+	 * |-----------|           |-----------|  /  |-----------|
+	 *      ptr                     ptr3      \       ptr2
+	 */
+	ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size,
+			  MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[10 * page_size]);
+	ASSERT_NE(ptr3, MAP_FAILED);
+
+	/*
+	 * Finally, move ptr2 into place:
+	 *
+	 * |-----------|-----------|-----------|
+	 * |  faulted  | unfaulted | unfaulted |
+	 * |-----------|-----------|-----------|
+	 *      ptr        ptr2         ptr3
+	 *
+	 * It should merge:
+	 *
+	 * |-----------------------------------|
+	 * |              faulted              |
+	 * |-----------------------------------|
+	 */
+	ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+			  MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]);
+	ASSERT_NE(ptr2, MAP_FAILED);
+
+	ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+	ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+	ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size);
+}
+
+TEST_F(merge, mremap_unfaulted_between_correctly_placed_faulted)
+{
+	unsigned int page_size = self->page_size;
+	char *carveout = self->carveout;
+	struct procmap_fd *procmap = &self->procmap;
+	char *ptr, *ptr2;
+
+	/*
+	 * Map one larger area:
+	 *
+	 * |-----------------------------------|
+	 * |            unfaulted              |
+	 * |-----------------------------------|
+	 */
+	ptr = mmap(&carveout[page_size], 15 * page_size, PROT_READ | PROT_WRITE,
+		   MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+	ASSERT_NE(ptr, MAP_FAILED);
+
+	/*
+	 * Fault in ptr:
+	 *
+	 * |-----------------------------------|
+	 * |              faulted              |
+	 * |-----------------------------------|
+	 */
+	ptr[0] = 'x';
+
+	/*
+	 * Unmap middle:
+	 *
+	 * |-----------|           |-----------|
+	 * |  faulted  |           |  faulted  |
+	 * |-----------|           |-----------|
+	 *
+	 * Now the faulted areas are compatible with each other (anon_vma the
+	 * same, vma->vm_pgoff equal to virtual page offset).
+	 */
+	ASSERT_EQ(munmap(&ptr[5 * page_size], 5 * page_size), 0);
+
+	/*
+	 * Map a new area, ptr2:
+	 *                                        \
+	 * |-----------|           |-----------|  /  |-----------|
+	 * |  faulted  |           |  faulted  |  \  | unfaulted |
+	 * |-----------|           |-----------|  /  |-----------|
+	 *      ptr                               \       ptr2
+	 */
+	ptr2 = mmap(&carveout[20 * page_size], 5 * page_size, PROT_READ | PROT_WRITE,
+		    MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+	ASSERT_NE(ptr2, MAP_FAILED);
+
+	/*
+	 * Finally, move ptr2 into place:
+	 *
+	 * |-----------|-----------|-----------|
+	 * |  faulted  | unfaulted |  faulted  |
+	 * |-----------|-----------|-----------|
+	 *      ptr        ptr2         ptr3
+	 *
+	 * It should merge:
+	 *
+	 * |-----------------------------------|
+	 * |              faulted              |
+	 * |-----------------------------------|
+	 */
+	ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+			  MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]);
+	ASSERT_NE(ptr2, MAP_FAILED);
+
+	ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+	ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+	ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size);
+}
+
+TEST_F(merge, mremap_correct_placed_faulted)
+{
+	unsigned int page_size = self->page_size;
+	char *carveout = self->carveout;
+	struct procmap_fd *procmap = &self->procmap;
+	char *ptr, *ptr2, *ptr3;
+
+	/*
+	 * Map one larger area:
+	 *
+	 * |-----------------------------------|
+	 * |            unfaulted              |
+	 * |-----------------------------------|
+	 */
+	ptr = mmap(&carveout[page_size], 15 * page_size, PROT_READ | PROT_WRITE,
+		   MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
+	ASSERT_NE(ptr, MAP_FAILED);
+
+	/*
+	 * Fault in ptr:
+	 *
+	 * |-----------------------------------|
+	 * |              faulted              |
+	 * |-----------------------------------|
+	 */
+	ptr[0] = 'x';
+
+	/*
+	 * Offset the final and middle 5 pages further away:
+	 *                \                 \
+	 * |-----------|  /  |-----------|  /  |-----------|
+	 * |  faulted  |  \  |  faulted  |  \  |  faulted  |
+	 * |-----------|  /  |-----------|  /  |-----------|
+	 *      ptr       \       ptr2      \       ptr3
+	 */
+	ptr3 = &ptr[10 * page_size];
+	ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size,
+			  MREMAP_MAYMOVE | MREMAP_FIXED, ptr3 + page_size * 2000);
+	ASSERT_NE(ptr3, MAP_FAILED);
+	ptr2 = &ptr[5 * page_size];
+	ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+			  MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000);
+	ASSERT_NE(ptr2, MAP_FAILED);
+
+	/*
+	 * Move ptr2 into its correct place:
+	 *                            \
+	 * |-----------|-----------|  /  |-----------|
+	 * |  faulted  |  faulted  |  \  |  faulted  |
+	 * |-----------|-----------|  /  |-----------|
+	 *      ptr         ptr2      \       ptr3
+	 *
+	 * It should merge:
+	 *                            \
+	 * |-----------------------|  /  |-----------|
+	 * |        faulted        |  \  |  faulted  |
+	 * |-----------------------|  /  |-----------|
+	 *            ptr             \       ptr3
+	 */
+
+	ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+			  MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]);
+	ASSERT_NE(ptr2, MAP_FAILED);
+
+	ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+	ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+	ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size);
+
+	/*
+	 * Now move ptr out of place:
+	 *                            \                 \
+	 *             |-----------|  /  |-----------|  /  |-----------|
+	 *             |  faulted  |  \  |  faulted  |  \  |  faulted  |
+	 *             |-----------|  /  |-----------|  /  |-----------|
+	 *                  ptr2      \       ptr       \       ptr3
+	 */
+	ptr = sys_mremap(ptr, 5 * page_size, 5 * page_size,
+			 MREMAP_MAYMOVE | MREMAP_FIXED, ptr + page_size * 1000);
+	ASSERT_NE(ptr, MAP_FAILED);
+
+	/*
+	 * Now move ptr back into place:
+	 *                            \
+	 * |-----------|-----------|  /  |-----------|
+	 * |  faulted  |  faulted  |  \  |  faulted  |
+	 * |-----------|-----------|  /  |-----------|
+	 *      ptr         ptr2      \       ptr3
+	 *
+	 * It should merge:
+	 *                            \
+	 * |-----------------------|  /  |-----------|
+	 * |        faulted        |  \  |  faulted  |
+	 * |-----------------------|  /  |-----------|
+	 *            ptr             \       ptr3
+	 */
+	ptr = sys_mremap(ptr, 5 * page_size, 5 * page_size,
+			 MREMAP_MAYMOVE | MREMAP_FIXED, &carveout[page_size]);
+	ASSERT_NE(ptr, MAP_FAILED);
+
+	ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+	ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+	ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size);
+
+	/*
+	 * Now move ptr out of place again:
+	 *                            \                 \
+	 *             |-----------|  /  |-----------|  /  |-----------|
+	 *             |  faulted  |  \  |  faulted  |  \  |  faulted  |
+	 *             |-----------|  /  |-----------|  /  |-----------|
+	 *                  ptr2      \       ptr       \       ptr3
+	 */
+	ptr = sys_mremap(ptr, 5 * page_size, 5 * page_size,
+			 MREMAP_MAYMOVE | MREMAP_FIXED, ptr + page_size * 1000);
+	ASSERT_NE(ptr, MAP_FAILED);
+
+	/*
+	 * Now move ptr3 back into place:
+	 *                                        \
+	 *             |-----------|-----------|  /  |-----------|
+	 *             |  faulted  |  faulted  |  \  |  faulted  |
+	 *             |-----------|-----------|  /  |-----------|
+	 *                  ptr2        ptr3      \       ptr
+	 *
+	 * It should merge:
+	 *                                        \
+	 *             |-----------------------|  /  |-----------|
+	 *             |        faulted        |  \  |  faulted  |
+	 *             |-----------------------|  /  |-----------|
+	 *                        ptr2            \       ptr
+	 */
+	ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size,
+			  MREMAP_MAYMOVE | MREMAP_FIXED, &ptr2[5 * page_size]);
+	ASSERT_NE(ptr3, MAP_FAILED);
+
+	ASSERT_TRUE(find_vma_procmap(procmap, ptr2));
+	ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2);
+	ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 10 * page_size);
+
+	/*
+	 * Now move ptr back into place:
+	 *
+	 * |-----------|-----------------------|
+	 * |  faulted  |        faulted        |
+	 * |-----------|-----------------------|
+	 *      ptr               ptr2
+	 *
+	 * It should merge:
+	 *
+	 * |-----------------------------------|
+	 * |              faulted              |
+	 * |-----------------------------------|
+	 *                  ptr
+	 */
+	ptr = sys_mremap(ptr, 5 * page_size, 5 * page_size,
+			 MREMAP_MAYMOVE | MREMAP_FIXED, &carveout[page_size]);
+	ASSERT_NE(ptr, MAP_FAILED);
+
+	ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+	ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+	ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size);
+
+	/*
+	 * Now move ptr2 out of the way:
+	 *                                        \
+	 * |-----------|           |-----------|  /  |-----------|
+	 * |  faulted  |           |  faulted  |  \  |  faulted  |
+	 * |-----------|           |-----------|  /  |-----------|
+	 *      ptr                     ptr3      \       ptr2
+	 */
+	ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+			  MREMAP_MAYMOVE | MREMAP_FIXED, ptr2 + page_size * 1000);
+	ASSERT_NE(ptr2, MAP_FAILED);
+
+	/*
+	 * Now move it back:
+	 *
+	 * |-----------|-----------|-----------|
+	 * |  faulted  |  faulted  |  faulted  |
+	 * |-----------|-----------|-----------|
+	 *      ptr         ptr2        ptr3
+	 *
+	 * It should merge:
+	 *
+	 * |-----------------------------------|
+	 * |              faulted              |
+	 * |-----------------------------------|
+	 *                  ptr
+	 */
+	ptr2 = sys_mremap(ptr2, 5 * page_size, 5 * page_size,
+			  MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[5 * page_size]);
+	ASSERT_NE(ptr2, MAP_FAILED);
+
+	ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+	ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+	ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size);
+
+	/*
+	 * Move ptr3 out of place:
+	 *                                        \
+	 * |-----------------------|              /  |-----------|
+	 * |        faulted        |              \  |  faulted  |
+	 * |-----------------------|              /  |-----------|
+	 *            ptr                         \       ptr3
+	 */
+	ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size,
+			  MREMAP_MAYMOVE | MREMAP_FIXED, ptr3 + page_size * 1000);
+	ASSERT_NE(ptr3, MAP_FAILED);
+
+	/*
+	 * Now move it back:
+	 *
+	 * |-----------|-----------|-----------|
+	 * |  faulted  |  faulted  |  faulted  |
+	 * |-----------|-----------|-----------|
+	 *      ptr         ptr2        ptr3
+	 *
+	 * It should merge:
+	 *
+	 * |-----------------------------------|
+	 * |              faulted              |
+	 * |-----------------------------------|
+	 *                  ptr
+	 */
+	ptr3 = sys_mremap(ptr3, 5 * page_size, 5 * page_size,
+			  MREMAP_MAYMOVE | MREMAP_FIXED, &ptr[10 * page_size]);
+	ASSERT_NE(ptr3, MAP_FAILED);
+
+	ASSERT_TRUE(find_vma_procmap(procmap, ptr));
+	ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr);
+	ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size);
+}
+
 TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c
index 1e3a595fbf01..c5a73617796a 100644
--- a/tools/testing/selftests/mm/migration.c
+++ b/tools/testing/selftests/mm/migration.c
@@ -5,6 +5,8 @@
  */
 
 #include "../kselftest_harness.h"
+#include "thp_settings.h"
+
 #include <strings.h>
 #include <pthread.h>
 #include <numa.h>
@@ -14,6 +16,7 @@
 #include <sys/types.h>
 #include <signal.h>
 #include <time.h>
+#include "vm_util.h"
 
 #define TWOMEG		(2<<20)
 #define RUNTIME		(20)
@@ -101,15 +104,13 @@ int migrate(uint64_t *ptr, int n1, int n2)
 
 void *access_mem(void *ptr)
 {
-	volatile uint64_t y = 0;
-	volatile uint64_t *x = ptr;
-
 	while (1) {
 		pthread_testcancel();
-		y += *x;
-
-		/* Prevent the compiler from optimizing out the writes to y: */
-		asm volatile("" : "+r" (y));
+		/* Force a read from the memory pointed to by ptr. This ensures
+		 * the memory access actually happens and prevents the compiler
+		 * from optimizing away this entire loop.
+		 */
+		FORCE_READ((uint64_t *)ptr);
 	}
 
 	return NULL;
@@ -185,6 +186,9 @@ TEST_F_TIMEOUT(migration, private_anon_thp, 2*RUNTIME)
 	uint64_t *ptr;
 	int i;
 
+	if (!thp_is_enabled())
+		SKIP(return, "Transparent Hugepages not available");
+
 	if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
 		SKIP(return, "Not enough threads or NUMA nodes available");
 
@@ -214,6 +218,9 @@ TEST_F_TIMEOUT(migration, shared_anon_thp, 2*RUNTIME)
 	uint64_t *ptr;
 	int i;
 
+	if (!thp_is_enabled())
+		SKIP(return, "Transparent Hugepages not available");
+
 	if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
 		SKIP(return, "Not enough threads or NUMA nodes available");
 
diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c
index bb84476a177f..fccf9e797a0c 100644
--- a/tools/testing/selftests/mm/mremap_test.c
+++ b/tools/testing/selftests/mm/mremap_test.c
@@ -380,6 +380,359 @@ out:
 		ksft_test_result_fail("%s\n", test_name);
 }
 
+static bool is_multiple_vma_range_ok(unsigned int pattern_seed,
+				     char *ptr, unsigned long page_size)
+{
+	int i;
+
+	srand(pattern_seed);
+	for (i = 0; i <= 10; i += 2) {
+		int j;
+		char *buf = &ptr[i * page_size];
+		size_t size = i == 4 ? 2 * page_size : page_size;
+
+		for (j = 0; j < size; j++) {
+			char chr = rand();
+
+			if (chr != buf[j]) {
+				ksft_print_msg("page %d offset %d corrupted, expected %d got %d\n",
+					       i, j, chr, buf[j]);
+				return false;
+			}
+		}
+	}
+
+	return true;
+}
+
+static void mremap_move_multiple_vmas(unsigned int pattern_seed,
+				      unsigned long page_size,
+				      bool dont_unmap)
+{
+	int mremap_flags = MREMAP_FIXED | MREMAP_MAYMOVE;
+	char *test_name = "mremap move multiple vmas";
+	const size_t size = 11 * page_size;
+	bool success = true;
+	char *ptr, *tgt_ptr;
+	int i;
+
+	if (dont_unmap)
+		mremap_flags |= MREMAP_DONTUNMAP;
+
+	ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+		   MAP_PRIVATE | MAP_ANON, -1, 0);
+	if (ptr == MAP_FAILED) {
+		perror("mmap");
+		success = false;
+		goto out;
+	}
+
+	tgt_ptr = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE,
+		       MAP_PRIVATE | MAP_ANON, -1, 0);
+	if (tgt_ptr == MAP_FAILED) {
+		perror("mmap");
+		success = false;
+		goto out;
+	}
+	if (munmap(tgt_ptr, 2 * size)) {
+		perror("munmap");
+		success = false;
+		goto out_unmap;
+	}
+
+	/*
+	 * Unmap so we end up with:
+	 *
+	 *  0   2   4 5 6   8   10 offset in buffer
+	 * |*| |*| |*****| |*| |*|
+	 * |*| |*| |*****| |*| |*|
+	 *  0   1   2 3 4   5   6  pattern offset
+	 */
+	for (i = 1; i < 10; i += 2) {
+		if (i == 5)
+			continue;
+
+		if (munmap(&ptr[i * page_size], page_size)) {
+			perror("munmap");
+			success = false;
+			goto out_unmap;
+		}
+	}
+
+	srand(pattern_seed);
+
+	/* Set up random patterns. */
+	for (i = 0; i <= 10; i += 2) {
+		int j;
+		size_t size = i == 4 ? 2 * page_size : page_size;
+		char *buf = &ptr[i * page_size];
+
+		for (j = 0; j < size; j++)
+			buf[j] = rand();
+	}
+
+	/* First, just move the whole thing. */
+	if (mremap(ptr, size, size, mremap_flags, tgt_ptr) == MAP_FAILED) {
+		perror("mremap");
+		success = false;
+		goto out_unmap;
+	}
+	/* Check move was ok. */
+	if (!is_multiple_vma_range_ok(pattern_seed, tgt_ptr, page_size)) {
+		success = false;
+		goto out_unmap;
+	}
+
+	/* Move next to itself. */
+	if (mremap(tgt_ptr, size, size, mremap_flags,
+		   &tgt_ptr[size]) == MAP_FAILED) {
+		perror("mremap");
+		success = false;
+		goto out_unmap;
+	}
+	/* Check that the move is ok. */
+	if (!is_multiple_vma_range_ok(pattern_seed, &tgt_ptr[size], page_size)) {
+		success = false;
+		goto out_unmap;
+	}
+
+	/* Map a range to overwrite. */
+	if (mmap(tgt_ptr, size, PROT_NONE,
+		 MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0) == MAP_FAILED) {
+		perror("mmap tgt");
+		success = false;
+		goto out_unmap;
+	}
+	/* Move and overwrite. */
+	if (mremap(&tgt_ptr[size], size, size,
+		   mremap_flags, tgt_ptr) == MAP_FAILED) {
+		perror("mremap");
+		success = false;
+		goto out_unmap;
+	}
+	/* Check that the move is ok. */
+	if (!is_multiple_vma_range_ok(pattern_seed, tgt_ptr, page_size)) {
+		success = false;
+		goto out_unmap;
+	}
+
+out_unmap:
+	if (munmap(tgt_ptr, 2 * size))
+		perror("munmap tgt");
+	if (munmap(ptr, size))
+		perror("munmap src");
+
+out:
+	if (success)
+		ksft_test_result_pass("%s%s\n", test_name,
+				      dont_unmap ? " [dontunnmap]" : "");
+	else
+		ksft_test_result_fail("%s%s\n", test_name,
+				      dont_unmap ? " [dontunnmap]" : "");
+}
+
+static void mremap_shrink_multiple_vmas(unsigned long page_size,
+					bool inplace)
+{
+	char *test_name = "mremap shrink multiple vmas";
+	const size_t size = 10 * page_size;
+	bool success = true;
+	char *ptr, *tgt_ptr;
+	void *res;
+	int i;
+
+	ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+		   MAP_PRIVATE | MAP_ANON, -1, 0);
+	if (ptr == MAP_FAILED) {
+		perror("mmap");
+		success = false;
+		goto out;
+	}
+
+	tgt_ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+		       MAP_PRIVATE | MAP_ANON, -1, 0);
+	if (tgt_ptr == MAP_FAILED) {
+		perror("mmap");
+		success = false;
+		goto out;
+	}
+	if (munmap(tgt_ptr, size)) {
+		perror("munmap");
+		success = false;
+		goto out_unmap;
+	}
+
+	/*
+	 * Unmap so we end up with:
+	 *
+	 *  0   2   4   6   8   10 offset in buffer
+	 * |*| |*| |*| |*| |*| |*|
+	 * |*| |*| |*| |*| |*| |*|
+	 */
+	for (i = 1; i < 10; i += 2) {
+		if (munmap(&ptr[i * page_size], page_size)) {
+			perror("munmap");
+			success = false;
+			goto out_unmap;
+		}
+	}
+
+	/*
+	 * Shrink in-place across multiple VMAs and gaps so we end up with:
+	 *
+	 *  0
+	 * |*|
+	 * |*|
+	 */
+	if (inplace)
+		res = mremap(ptr, size, page_size, 0);
+	else
+		res = mremap(ptr, size, page_size, MREMAP_MAYMOVE | MREMAP_FIXED,
+			     tgt_ptr);
+
+	if (res == MAP_FAILED) {
+		perror("mremap");
+		success = false;
+		goto out_unmap;
+	}
+
+out_unmap:
+	if (munmap(tgt_ptr, size))
+		perror("munmap tgt");
+	if (munmap(ptr, size))
+		perror("munmap src");
+out:
+	if (success)
+		ksft_test_result_pass("%s%s\n", test_name,
+				      inplace ? " [inplace]" : "");
+	else
+		ksft_test_result_fail("%s%s\n", test_name,
+				      inplace ? " [inplace]" : "");
+}
+
+static void mremap_move_multiple_vmas_split(unsigned int pattern_seed,
+					    unsigned long page_size,
+					    bool dont_unmap)
+{
+	char *test_name = "mremap move multiple vmas split";
+	int mremap_flags = MREMAP_FIXED | MREMAP_MAYMOVE;
+	const size_t size = 10 * page_size;
+	bool success = true;
+	char *ptr, *tgt_ptr;
+	int i;
+
+	if (dont_unmap)
+		mremap_flags |= MREMAP_DONTUNMAP;
+
+	ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+		   MAP_PRIVATE | MAP_ANON, -1, 0);
+	if (ptr == MAP_FAILED) {
+		perror("mmap");
+		success = false;
+		goto out;
+	}
+
+	tgt_ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+		       MAP_PRIVATE | MAP_ANON, -1, 0);
+	if (tgt_ptr == MAP_FAILED) {
+		perror("mmap");
+		success = false;
+		goto out;
+	}
+	if (munmap(tgt_ptr, size)) {
+		perror("munmap");
+		success = false;
+		goto out_unmap;
+	}
+
+	/*
+	 * Unmap so we end up with:
+	 *
+	 *  0 1 2 3 4 5 6 7 8 9 10 offset in buffer
+	 * |**********| |*******|
+	 * |**********| |*******|
+	 *  0 1 2 3 4   5 6 7 8 9  pattern offset
+	 */
+	if (munmap(&ptr[5 * page_size], page_size)) {
+		perror("munmap");
+		success = false;
+		goto out_unmap;
+	}
+
+	/* Set up random patterns. */
+	srand(pattern_seed);
+	for (i = 0; i < 10; i++) {
+		int j;
+		char *buf = &ptr[i * page_size];
+
+		if (i == 5)
+			continue;
+
+		for (j = 0; j < page_size; j++)
+			buf[j] = rand();
+	}
+
+	/*
+	 * Move the below:
+	 *
+	 *      <------------->
+	 *  0 1 2 3 4 5 6 7 8 9 10 offset in buffer
+	 * |**********| |*******|
+	 * |**********| |*******|
+	 *  0 1 2 3 4   5 6 7 8 9  pattern offset
+	 *
+	 * Into:
+	 *
+	 * 0 1 2 3 4 5 6 7 offset in buffer
+	 * |*****| |*****|
+	 * |*****| |*****|
+	 * 2 3 4   5 6 7   pattern offset
+	 */
+	if (mremap(&ptr[2 * page_size], size - 3 * page_size, size - 3 * page_size,
+		   mremap_flags, tgt_ptr) == MAP_FAILED) {
+		perror("mremap");
+		success = false;
+		goto out_unmap;
+	}
+
+	/* Offset into random pattern. */
+	srand(pattern_seed);
+	for (i = 0; i < 2 * page_size; i++)
+		rand();
+
+	/* Check pattern. */
+	for (i = 0; i < 7; i++) {
+		int j;
+		char *buf = &tgt_ptr[i * page_size];
+
+		if (i == 3)
+			continue;
+
+		for (j = 0; j < page_size; j++) {
+			char chr = rand();
+
+			if (chr != buf[j]) {
+				ksft_print_msg("page %d offset %d corrupted, expected %d got %d\n",
+					       i, j, chr, buf[j]);
+				goto out_unmap;
+			}
+		}
+	}
+
+out_unmap:
+	if (munmap(tgt_ptr, size))
+		perror("munmap tgt");
+	if (munmap(ptr, size))
+		perror("munmap src");
+out:
+	if (success)
+		ksft_test_result_pass("%s%s\n", test_name,
+				      dont_unmap ? " [dontunnmap]" : "");
+	else
+		ksft_test_result_fail("%s%s\n", test_name,
+				      dont_unmap ? " [dontunnmap]" : "");
+}
+
 /* Returns the time taken for the remap on success else returns -1. */
 static long long remap_region(struct config c, unsigned int threshold_mb,
 			      char *rand_addr)
@@ -721,7 +1074,7 @@ int main(int argc, char **argv)
 	char *rand_addr;
 	size_t rand_size;
 	int num_expand_tests = 2;
-	int num_misc_tests = 2;
+	int num_misc_tests = 8;
 	struct test test_cases[MAX_TEST] = {};
 	struct test perf_test_cases[MAX_PERF_TEST];
 	int page_size;
@@ -848,6 +1201,12 @@ int main(int argc, char **argv)
 
 	mremap_move_within_range(pattern_seed, rand_addr);
 	mremap_move_1mb_from_start(pattern_seed, rand_addr);
+	mremap_shrink_multiple_vmas(page_size, /* inplace= */true);
+	mremap_shrink_multiple_vmas(page_size, /* inplace= */false);
+	mremap_move_multiple_vmas(pattern_seed, page_size, /* dontunmap= */ false);
+	mremap_move_multiple_vmas(pattern_seed, page_size, /* dontunmap= */ true);
+	mremap_move_multiple_vmas_split(pattern_seed, page_size, /* dontunmap= */ false);
+	mremap_move_multiple_vmas_split(pattern_seed, page_size, /* dontunmap= */ true);
 
 	if (run_perf_tests) {
 		ksft_print_msg("\n%s\n",
diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c
index b07acc86f4f0..0d4209eef0c3 100644
--- a/tools/testing/selftests/mm/pagemap_ioctl.c
+++ b/tools/testing/selftests/mm/pagemap_ioctl.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+
 #define _GNU_SOURCE
 #include <stdio.h>
 #include <fcntl.h>
@@ -34,8 +35,8 @@
 #define PAGEMAP "/proc/self/pagemap"
 int pagemap_fd;
 int uffd;
-unsigned long page_size;
-unsigned int hpage_size;
+size_t page_size;
+size_t hpage_size;
 const char *progname;
 
 #define LEN(region)	((region.end - region.start)/page_size)
@@ -1480,6 +1481,66 @@ static void transact_test(int page_size)
 			      extra_thread_faults);
 }
 
+void zeropfn_tests(void)
+{
+	unsigned long long mem_size;
+	struct page_region vec;
+	int i, ret;
+	char *mmap_mem, *mem;
+
+	/* Test with normal memory */
+	mem_size = 10 * page_size;
+	mem = mmap(NULL, mem_size, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
+	if (mem == MAP_FAILED)
+		ksft_exit_fail_msg("error nomem\n");
+
+	/* Touch each page to ensure it's mapped */
+	for (i = 0; i < mem_size; i += page_size)
+		(void)((volatile char *)mem)[i];
+
+	ret = pagemap_ioctl(mem, mem_size, &vec, 1, 0,
+			    (mem_size / page_size), PAGE_IS_PFNZERO, 0, 0, PAGE_IS_PFNZERO);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+	ksft_test_result(ret == 1 && LEN(vec) == (mem_size / page_size),
+			 "%s all pages must have PFNZERO set\n", __func__);
+
+	munmap(mem, mem_size);
+
+	/* Test with huge page if user_zero_page is set to 1 */
+	if (!detect_huge_zeropage()) {
+		ksft_test_result_skip("%s use_zero_page not supported or set to 1\n", __func__);
+		return;
+	}
+
+	mem_size = 2 * hpage_size;
+	mmap_mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE,
+			MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (mmap_mem == MAP_FAILED)
+		ksft_exit_fail_msg("error nomem\n");
+
+	/* We need a THP-aligned memory area. */
+	mem = (char *)(((uintptr_t)mmap_mem + hpage_size) & ~(hpage_size - 1));
+
+	ret = madvise(mem, hpage_size, MADV_HUGEPAGE);
+	if (!ret) {
+		FORCE_READ(mem);
+
+		ret = pagemap_ioctl(mem, hpage_size, &vec, 1, 0,
+				    0, PAGE_IS_PFNZERO, 0, 0, PAGE_IS_PFNZERO);
+		if (ret < 0)
+			ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+		ksft_test_result(ret == 1 && LEN(vec) == (hpage_size / page_size),
+				 "%s all huge pages must have PFNZERO set\n", __func__);
+	} else {
+		ksft_test_result_skip("%s huge page not supported\n", __func__);
+	}
+
+	munmap(mmap_mem, mem_size);
+}
+
 int main(int __attribute__((unused)) argc, char *argv[])
 {
 	int shmid, buf_size, fd, i, ret;
@@ -1494,7 +1555,7 @@ int main(int __attribute__((unused)) argc, char *argv[])
 	if (init_uffd())
 		ksft_exit_pass();
 
-	ksft_set_plan(115);
+	ksft_set_plan(117);
 
 	page_size = getpagesize();
 	hpage_size = read_pmd_pagesize();
@@ -1669,6 +1730,9 @@ int main(int __attribute__((unused)) argc, char *argv[])
 	/* 16. Userfaultfd tests */
 	userfaultfd_tests();
 
+	/* 17. ZEROPFN tests */
+	zeropfn_tests();
+
 	close(pagemap_fd);
 	ksft_exit_pass();
 }
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index dddd1dd8af14..a38c984103ce 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -429,7 +429,9 @@ CATEGORY="vma_merge" run_test ./merge
 
 if [ -x ./memfd_secret ]
 then
-(echo 0 > /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix
+if [ -f /proc/sys/kernel/yama/ptrace_scope ]; then
+	(echo 0 > /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix
+fi
 CATEGORY="memfd_secret" run_test ./memfd_secret
 fi
 
@@ -483,6 +485,10 @@ CATEGORY="thp" run_test ./khugepaged
 
 CATEGORY="thp" run_test ./khugepaged -s 2
 
+CATEGORY="thp" run_test ./khugepaged all:shmem
+
+CATEGORY="thp" run_test ./khugepaged -s 4 all:shmem
+
 CATEGORY="thp" run_test ./transhuge-stress -d 20
 
 # Try to create XFS if not provided
diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c
index 8e1462ce0532..8a3f2b4b2186 100644
--- a/tools/testing/selftests/mm/soft-dirty.c
+++ b/tools/testing/selftests/mm/soft-dirty.c
@@ -6,8 +6,10 @@
 #include <stdint.h>
 #include <malloc.h>
 #include <sys/mman.h>
+
 #include "../kselftest.h"
 #include "vm_util.h"
+#include "thp_settings.h"
 
 #define PAGEMAP_FILE_PATH "/proc/self/pagemap"
 #define TEST_ITERATIONS 10000
@@ -78,8 +80,13 @@ static void test_hugepage(int pagemap_fd, int pagesize)
 {
 	char *map;
 	int i, ret;
-	size_t hpage_len = read_pmd_pagesize();
 
+	if (!thp_is_enabled()) {
+		ksft_test_result_skip("Transparent Hugepages not available\n");
+		return;
+	}
+
+	size_t hpage_len = read_pmd_pagesize();
 	if (!hpage_len)
 		ksft_exit_fail_msg("Reading PMD pagesize failed");
 
diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
index f0d9c035641d..05de1fc0005b 100644
--- a/tools/testing/selftests/mm/split_huge_page_test.c
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -399,7 +399,6 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
 		char **addr)
 {
 	size_t i;
-	int dummy = 0;
 	unsigned char buf[1024];
 
 	srand(time(NULL));
@@ -441,8 +440,7 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
 	madvise(*addr, fd_size, MADV_HUGEPAGE);
 
 	for (size_t i = 0; i < fd_size; i++)
-		dummy += *(*addr + i);
-	asm volatile("" : "+r" (dummy));
+		FORCE_READ((*addr + i));
 
 	if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) {
 		ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n");
diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c
index ad872af1c81a..bad60ac52874 100644
--- a/tools/testing/selftests/mm/thp_settings.c
+++ b/tools/testing/selftests/mm/thp_settings.c
@@ -381,3 +381,14 @@ unsigned long thp_shmem_supported_orders(void)
 {
 	return __thp_supported_orders(true);
 }
+
+bool thp_is_enabled(void)
+{
+	if (access(THP_SYSFS, F_OK) != 0)
+		return false;
+
+	int mode = thp_read_string("enabled", thp_enabled_strings);
+
+	/* THP is considered enabled if it's either "always" or "madvise" */
+	return mode == 1 || mode == 3;
+}
diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h
index fc131d23d593..6c07f70beee9 100644
--- a/tools/testing/selftests/mm/thp_settings.h
+++ b/tools/testing/selftests/mm/thp_settings.h
@@ -84,4 +84,6 @@ void thp_set_read_ahead_path(char *path);
 unsigned long thp_supported_orders(void);
 unsigned long thp_shmem_supported_orders(void);
 
+bool thp_is_enabled(void);
+
 #endif /* __THP_SETTINGS_H__ */
diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c
index 95b6f043a3cb..8e2b08dc5762 100644
--- a/tools/testing/selftests/mm/thuge-gen.c
+++ b/tools/testing/selftests/mm/thuge-gen.c
@@ -77,40 +77,20 @@ void show(unsigned long ps)
 	system(buf);
 }
 
-unsigned long thuge_read_sysfs(int warn, char *fmt, ...)
+unsigned long read_free(unsigned long ps)
 {
-	char *line = NULL;
-	size_t linelen = 0;
-	char buf[100];
-	FILE *f;
-	va_list ap;
 	unsigned long val = 0;
+	char buf[100];
 
-	va_start(ap, fmt);
-	vsnprintf(buf, sizeof buf, fmt, ap);
-	va_end(ap);
+	snprintf(buf, sizeof(buf),
+		 "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
+		 ps >> 10);
+	if (read_sysfs(buf, &val) && ps != getpagesize())
+		ksft_print_msg("missing %s\n", buf);
 
-	f = fopen(buf, "r");
-	if (!f) {
-		if (warn)
-			ksft_print_msg("missing %s\n", buf);
-		return 0;
-	}
-	if (getline(&line, &linelen, f) > 0) {
-		sscanf(line, "%lu", &val);
-	}
-	fclose(f);
-	free(line);
 	return val;
 }
 
-unsigned long read_free(unsigned long ps)
-{
-	return thuge_read_sysfs(ps != getpagesize(),
-			  "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
-			  ps >> 10);
-}
-
 void test_mmap(unsigned long size, unsigned flags)
 {
 	char *map;
@@ -173,6 +153,7 @@ void test_shmget(unsigned long size, unsigned flags)
 void find_pagesizes(void)
 {
 	unsigned long largest = getpagesize();
+	unsigned long shmmax_val = 0;
 	int i;
 	glob_t g;
 
@@ -195,7 +176,8 @@ void find_pagesizes(void)
 	}
 	globfree(&g);
 
-	if (thuge_read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest)
+	read_sysfs("/proc/sys/kernel/shmmax", &shmmax_val);
+	if (shmmax_val < NUM_PAGES * largest)
 		ksft_exit_fail_msg("Please do echo %lu > /proc/sys/kernel/shmmax",
 				   largest * NUM_PAGES);
 
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
index c73fd5d455c8..50501b38e34e 100644
--- a/tools/testing/selftests/mm/uffd-unit-tests.c
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -1027,6 +1027,9 @@ static void uffd_poison_handle_fault(
 		do_uffdio_poison(uffd, offset);
 }
 
+/* Make sure to cover odd/even, and minimum duplications */
+#define  UFFD_POISON_TEST_NPAGES  4
+
 static void uffd_poison_test(uffd_test_args_t *targs)
 {
 	pthread_t uffd_mon;
@@ -1034,12 +1037,17 @@ static void uffd_poison_test(uffd_test_args_t *targs)
 	struct uffd_args args = { 0 };
 	struct sigaction act = { 0 };
 	unsigned long nr_sigbus = 0;
-	unsigned long nr;
+	unsigned long nr, poison_pages = UFFD_POISON_TEST_NPAGES;
+
+	if (nr_pages < poison_pages) {
+		uffd_test_skip("Too few pages for POISON test");
+		return;
+	}
 
 	fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
 
-	uffd_register_poison(uffd, area_dst, nr_pages * page_size);
-	memset(area_src, 0, nr_pages * page_size);
+	uffd_register_poison(uffd, area_dst, poison_pages * page_size);
+	memset(area_src, 0, poison_pages * page_size);
 
 	args.handle_fault = uffd_poison_handle_fault;
 	if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
@@ -1051,7 +1059,7 @@ static void uffd_poison_test(uffd_test_args_t *targs)
 	if (sigaction(SIGBUS, &act, 0))
 		err("sigaction");
 
-	for (nr = 0; nr < nr_pages; ++nr) {
+	for (nr = 0; nr < poison_pages; ++nr) {
 		unsigned long offset = nr * page_size;
 		const char *bytes = (const char *) area_dst + offset;
 		const char *i;
@@ -1078,9 +1086,9 @@ static void uffd_poison_test(uffd_test_args_t *targs)
 	if (pthread_join(uffd_mon, NULL))
 		err("pthread_join()");
 
-	if (nr_sigbus != nr_pages / 2)
+	if (nr_sigbus != poison_pages / 2)
 		err("expected to receive %lu SIGBUS, actually received %lu",
-		    nr_pages / 2, nr_sigbus);
+		    poison_pages / 2, nr_sigbus);
 
 	uffd_test_pass();
 }
diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
index 5492e3f784df..9dafa7669ef9 100644
--- a/tools/testing/selftests/mm/vm_util.c
+++ b/tools/testing/selftests/mm/vm_util.c
@@ -524,3 +524,34 @@ int read_sysfs(const char *file_path, unsigned long *val)
 
 	return 0;
 }
+
+void *sys_mremap(void *old_address, unsigned long old_size,
+		 unsigned long new_size, int flags, void *new_address)
+{
+	return (void *)syscall(__NR_mremap, (unsigned long)old_address,
+			       old_size, new_size, flags,
+			       (unsigned long)new_address);
+}
+
+bool detect_huge_zeropage(void)
+{
+	int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page",
+		      O_RDONLY);
+	bool enabled = 0;
+	char buf[15];
+	int ret;
+
+	if (fd < 0)
+		return 0;
+
+	ret = pread(fd, buf, sizeof(buf), 0);
+	if (ret > 0 && ret < sizeof(buf)) {
+		buf[ret] = 0;
+
+		if (strtoul(buf, NULL, 10) == 1)
+			enabled = 1;
+	}
+
+	close(fd);
+	return enabled;
+}
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index b8136d12a0f8..c20298ae98ea 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -18,6 +18,13 @@
 #define PM_SWAP                       BIT_ULL(62)
 #define PM_PRESENT                    BIT_ULL(63)
 
+/*
+ * Ignore the checkpatch warning, we must read from x but don't want to do
+ * anything with it in order to trigger a read page fault. We therefore must use
+ * volatile to stop the compiler from optimising this away.
+ */
+#define FORCE_READ(x) (*(volatile typeof(x) *)x)
+
 extern unsigned int __page_size;
 extern unsigned int __page_shift;
 
@@ -44,6 +51,8 @@ static inline unsigned int pshift(void)
 	return __page_shift;
 }
 
+bool detect_huge_zeropage(void);
+
 /*
  * Plan 9 FS has bugs (at least on QEMU) where certain operations fail with
  * ENOENT on unlinked files. See
@@ -117,6 +126,9 @@ static inline void log_test_result(int result)
 	ksft_test_result_report(result, "%s\n", test_name);
 }
 
+void *sys_mremap(void *old_address, unsigned long old_size,
+		 unsigned long new_size, int flags, void *new_address);
+
 /*
  * On ppc64 this will only work with radix 2M hugepage size
  */
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
index 973968f45bba..19bb333e2485 100644
--- a/tools/testing/selftests/proc/.gitignore
+++ b/tools/testing/selftests/proc/.gitignore
@@ -5,6 +5,7 @@
 /proc-2-is-kthread
 /proc-fsconfig-hidepid
 /proc-loadavg-001
+/proc-maps-race
 /proc-multiple-procfs
 /proc-empty-vm
 /proc-pid-vm
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
index b12921b9794b..50aba102201a 100644
--- a/tools/testing/selftests/proc/Makefile
+++ b/tools/testing/selftests/proc/Makefile
@@ -9,6 +9,7 @@ TEST_GEN_PROGS += fd-002-posix-eq
 TEST_GEN_PROGS += fd-003-kthread
 TEST_GEN_PROGS += proc-2-is-kthread
 TEST_GEN_PROGS += proc-loadavg-001
+TEST_GEN_PROGS += proc-maps-race
 TEST_GEN_PROGS += proc-empty-vm
 TEST_GEN_PROGS += proc-pid-vm
 TEST_GEN_PROGS += proc-self-map-files-001
diff --git a/tools/testing/selftests/proc/proc-maps-race.c b/tools/testing/selftests/proc/proc-maps-race.c
new file mode 100644
index 000000000000..66773685a047
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-maps-race.c
@@ -0,0 +1,741 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022 Google LLC.
+ * Author: Suren Baghdasaryan <surenb@google.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+ * Fork a child that concurrently modifies address space while the main
+ * process is reading /proc/$PID/maps and verifying the results. Address
+ * space modifications include:
+ *     VMA splitting and merging
+ *
+ */
+#define _GNU_SOURCE
+#include "../kselftest_harness.h"
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+/* /proc/pid/maps parsing routines */
+struct page_content {
+	char *data;
+	ssize_t size;
+};
+
+#define LINE_MAX_SIZE		256
+
+struct line_content {
+	char text[LINE_MAX_SIZE];
+	unsigned long start_addr;
+	unsigned long end_addr;
+};
+
+enum test_state {
+	INIT,
+	CHILD_READY,
+	PARENT_READY,
+	SETUP_READY,
+	SETUP_MODIFY_MAPS,
+	SETUP_MAPS_MODIFIED,
+	SETUP_RESTORE_MAPS,
+	SETUP_MAPS_RESTORED,
+	TEST_READY,
+	TEST_DONE,
+};
+
+struct vma_modifier_info;
+
+FIXTURE(proc_maps_race)
+{
+	struct vma_modifier_info *mod_info;
+	struct page_content page1;
+	struct page_content page2;
+	struct line_content last_line;
+	struct line_content first_line;
+	unsigned long duration_sec;
+	int shared_mem_size;
+	int page_size;
+	int vma_count;
+	bool verbose;
+	int maps_fd;
+	pid_t pid;
+};
+
+typedef bool (*vma_modifier_op)(FIXTURE_DATA(proc_maps_race) *self);
+typedef bool (*vma_mod_result_check_op)(struct line_content *mod_last_line,
+					struct line_content *mod_first_line,
+					struct line_content *restored_last_line,
+					struct line_content *restored_first_line);
+
+struct vma_modifier_info {
+	int vma_count;
+	void *addr;
+	int prot;
+	void *next_addr;
+	vma_modifier_op vma_modify;
+	vma_modifier_op vma_restore;
+	vma_mod_result_check_op vma_mod_check;
+	pthread_mutex_t sync_lock;
+	pthread_cond_t sync_cond;
+	enum test_state curr_state;
+	bool exit;
+	void *child_mapped_addr[];
+};
+
+
+static bool read_two_pages(FIXTURE_DATA(proc_maps_race) *self)
+{
+	ssize_t  bytes_read;
+
+	if (lseek(self->maps_fd, 0, SEEK_SET) < 0)
+		return false;
+
+	bytes_read = read(self->maps_fd, self->page1.data, self->page_size);
+	if (bytes_read <= 0)
+		return false;
+
+	self->page1.size = bytes_read;
+
+	bytes_read = read(self->maps_fd, self->page2.data, self->page_size);
+	if (bytes_read <= 0)
+		return false;
+
+	self->page2.size = bytes_read;
+
+	return true;
+}
+
+static void copy_first_line(struct page_content *page, char *first_line)
+{
+	char *pos = strchr(page->data, '\n');
+
+	strncpy(first_line, page->data, pos - page->data);
+	first_line[pos - page->data] = '\0';
+}
+
+static void copy_last_line(struct page_content *page, char *last_line)
+{
+	/* Get the last line in the first page */
+	const char *end = page->data + page->size - 1;
+	/* skip last newline */
+	const char *pos = end - 1;
+
+	/* search previous newline */
+	while (pos[-1] != '\n')
+		pos--;
+	strncpy(last_line, pos, end - pos);
+	last_line[end - pos] = '\0';
+}
+
+/* Read the last line of the first page and the first line of the second page */
+static bool read_boundary_lines(FIXTURE_DATA(proc_maps_race) *self,
+				struct line_content *last_line,
+				struct line_content *first_line)
+{
+	if (!read_two_pages(self))
+		return false;
+
+	copy_last_line(&self->page1, last_line->text);
+	copy_first_line(&self->page2, first_line->text);
+
+	return sscanf(last_line->text, "%lx-%lx", &last_line->start_addr,
+		      &last_line->end_addr) == 2 &&
+	       sscanf(first_line->text, "%lx-%lx", &first_line->start_addr,
+		      &first_line->end_addr) == 2;
+}
+
+/* Thread synchronization routines */
+static void wait_for_state(struct vma_modifier_info *mod_info, enum test_state state)
+{
+	pthread_mutex_lock(&mod_info->sync_lock);
+	while (mod_info->curr_state != state)
+		pthread_cond_wait(&mod_info->sync_cond, &mod_info->sync_lock);
+	pthread_mutex_unlock(&mod_info->sync_lock);
+}
+
+static void signal_state(struct vma_modifier_info *mod_info, enum test_state state)
+{
+	pthread_mutex_lock(&mod_info->sync_lock);
+	mod_info->curr_state = state;
+	pthread_cond_signal(&mod_info->sync_cond);
+	pthread_mutex_unlock(&mod_info->sync_lock);
+}
+
+static void stop_vma_modifier(struct vma_modifier_info *mod_info)
+{
+	wait_for_state(mod_info, SETUP_READY);
+	mod_info->exit = true;
+	signal_state(mod_info, SETUP_MODIFY_MAPS);
+}
+
+static void print_first_lines(char *text, int nr)
+{
+	const char *end = text;
+
+	while (nr && (end = strchr(end, '\n')) != NULL) {
+		nr--;
+		end++;
+	}
+
+	if (end) {
+		int offs = end - text;
+
+		text[offs] = '\0';
+		printf(text);
+		text[offs] = '\n';
+		printf("\n");
+	} else {
+		printf(text);
+	}
+}
+
+static void print_last_lines(char *text, int nr)
+{
+	const char *start = text + strlen(text);
+
+	nr++; /* to ignore the last newline */
+	while (nr) {
+		while (start > text && *start != '\n')
+			start--;
+		nr--;
+		start--;
+	}
+	printf(start);
+}
+
+static void print_boundaries(const char *title, FIXTURE_DATA(proc_maps_race) *self)
+{
+	if (!self->verbose)
+		return;
+
+	printf("%s", title);
+	/* Print 3 boundary lines from each page */
+	print_last_lines(self->page1.data, 3);
+	printf("-----------------page boundary-----------------\n");
+	print_first_lines(self->page2.data, 3);
+}
+
+static bool print_boundaries_on(bool condition, const char *title,
+				FIXTURE_DATA(proc_maps_race) *self)
+{
+	if (self->verbose && condition)
+		print_boundaries(title, self);
+
+	return condition;
+}
+
+static void report_test_start(const char *name, bool verbose)
+{
+	if (verbose)
+		printf("==== %s ====\n", name);
+}
+
+static struct timespec print_ts;
+
+static void start_test_loop(struct timespec *ts, bool verbose)
+{
+	if (verbose)
+		print_ts.tv_sec = ts->tv_sec;
+}
+
+static void end_test_iteration(struct timespec *ts, bool verbose)
+{
+	if (!verbose)
+		return;
+
+	/* Update every second */
+	if (print_ts.tv_sec == ts->tv_sec)
+		return;
+
+	printf(".");
+	fflush(stdout);
+	print_ts.tv_sec = ts->tv_sec;
+}
+
+static void end_test_loop(bool verbose)
+{
+	if (verbose)
+		printf("\n");
+}
+
+static bool capture_mod_pattern(FIXTURE_DATA(proc_maps_race) *self,
+				struct line_content *mod_last_line,
+				struct line_content *mod_first_line,
+				struct line_content *restored_last_line,
+				struct line_content *restored_first_line)
+{
+	print_boundaries("Before modification", self);
+
+	signal_state(self->mod_info, SETUP_MODIFY_MAPS);
+	wait_for_state(self->mod_info, SETUP_MAPS_MODIFIED);
+
+	/* Copy last line of the first page and first line of the last page */
+	if (!read_boundary_lines(self, mod_last_line, mod_first_line))
+		return false;
+
+	print_boundaries("After modification", self);
+
+	signal_state(self->mod_info, SETUP_RESTORE_MAPS);
+	wait_for_state(self->mod_info, SETUP_MAPS_RESTORED);
+
+	/* Copy last line of the first page and first line of the last page */
+	if (!read_boundary_lines(self, restored_last_line, restored_first_line))
+		return false;
+
+	print_boundaries("After restore", self);
+
+	if (!self->mod_info->vma_mod_check(mod_last_line, mod_first_line,
+					   restored_last_line, restored_first_line))
+		return false;
+
+	/*
+	 * The content of these lines after modify+resore should be the same
+	 * as the original.
+	 */
+	return strcmp(restored_last_line->text, self->last_line.text) == 0 &&
+	       strcmp(restored_first_line->text, self->first_line.text) == 0;
+}
+
+static inline bool split_vma(FIXTURE_DATA(proc_maps_race) *self)
+{
+	return mmap(self->mod_info->addr, self->page_size, self->mod_info->prot | PROT_EXEC,
+		    MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) != MAP_FAILED;
+}
+
+static inline bool merge_vma(FIXTURE_DATA(proc_maps_race) *self)
+{
+	return mmap(self->mod_info->addr, self->page_size, self->mod_info->prot,
+		    MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) != MAP_FAILED;
+}
+
+static inline bool check_split_result(struct line_content *mod_last_line,
+				      struct line_content *mod_first_line,
+				      struct line_content *restored_last_line,
+				      struct line_content *restored_first_line)
+{
+	/* Make sure vmas at the boundaries are changing */
+	return strcmp(mod_last_line->text, restored_last_line->text) != 0 &&
+	       strcmp(mod_first_line->text, restored_first_line->text) != 0;
+}
+
+static inline bool shrink_vma(FIXTURE_DATA(proc_maps_race) *self)
+{
+	return mremap(self->mod_info->addr, self->page_size * 3,
+		      self->page_size, 0) != MAP_FAILED;
+}
+
+static inline bool expand_vma(FIXTURE_DATA(proc_maps_race) *self)
+{
+	return mremap(self->mod_info->addr, self->page_size,
+		      self->page_size * 3, 0) != MAP_FAILED;
+}
+
+static inline bool check_shrink_result(struct line_content *mod_last_line,
+				       struct line_content *mod_first_line,
+				       struct line_content *restored_last_line,
+				       struct line_content *restored_first_line)
+{
+	/* Make sure only the last vma of the first page is changing */
+	return strcmp(mod_last_line->text, restored_last_line->text) != 0 &&
+	       strcmp(mod_first_line->text, restored_first_line->text) == 0;
+}
+
+static inline bool remap_vma(FIXTURE_DATA(proc_maps_race) *self)
+{
+	/*
+	 * Remap the last page of the next vma into the middle of the vma.
+	 * This splits the current vma and the first and middle parts (the
+	 * parts at lower addresses) become the last vma objserved in the
+	 * first page and the first vma observed in the last page.
+	 */
+	return mremap(self->mod_info->next_addr + self->page_size * 2, self->page_size,
+		      self->page_size, MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP,
+		      self->mod_info->addr + self->page_size) != MAP_FAILED;
+}
+
+static inline bool patch_vma(FIXTURE_DATA(proc_maps_race) *self)
+{
+	return mprotect(self->mod_info->addr + self->page_size, self->page_size,
+			self->mod_info->prot) == 0;
+}
+
+static inline bool check_remap_result(struct line_content *mod_last_line,
+				      struct line_content *mod_first_line,
+				      struct line_content *restored_last_line,
+				      struct line_content *restored_first_line)
+{
+	/* Make sure vmas at the boundaries are changing */
+	return strcmp(mod_last_line->text, restored_last_line->text) != 0 &&
+	       strcmp(mod_first_line->text, restored_first_line->text) != 0;
+}
+
+FIXTURE_SETUP(proc_maps_race)
+{
+	const char *verbose = getenv("VERBOSE");
+	const char *duration = getenv("DURATION");
+	struct vma_modifier_info *mod_info;
+	pthread_mutexattr_t mutex_attr;
+	pthread_condattr_t cond_attr;
+	unsigned long duration_sec;
+	char fname[32];
+
+	self->page_size = (unsigned long)sysconf(_SC_PAGESIZE);
+	self->verbose = verbose && !strncmp(verbose, "1", 1);
+	duration_sec = duration ? atol(duration) : 0;
+	self->duration_sec = duration_sec ? duration_sec : 5UL;
+
+	/*
+	 * Have to map enough vmas for /proc/pid/maps to contain more than one
+	 * page worth of vmas. Assume at least 32 bytes per line in maps output
+	 */
+	self->vma_count = self->page_size / 32 + 1;
+	self->shared_mem_size = sizeof(struct vma_modifier_info) + self->vma_count * sizeof(void *);
+
+	/* map shared memory for communication with the child process */
+	self->mod_info = (struct vma_modifier_info *)mmap(NULL, self->shared_mem_size,
+				PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+	ASSERT_NE(self->mod_info, MAP_FAILED);
+	mod_info = self->mod_info;
+
+	/* Initialize shared members */
+	pthread_mutexattr_init(&mutex_attr);
+	pthread_mutexattr_setpshared(&mutex_attr, PTHREAD_PROCESS_SHARED);
+	ASSERT_EQ(pthread_mutex_init(&mod_info->sync_lock, &mutex_attr), 0);
+	pthread_condattr_init(&cond_attr);
+	pthread_condattr_setpshared(&cond_attr, PTHREAD_PROCESS_SHARED);
+	ASSERT_EQ(pthread_cond_init(&mod_info->sync_cond, &cond_attr), 0);
+	mod_info->vma_count = self->vma_count;
+	mod_info->curr_state = INIT;
+	mod_info->exit = false;
+
+	self->pid = fork();
+	if (!self->pid) {
+		/* Child process modifying the address space */
+		int prot = PROT_READ | PROT_WRITE;
+		int i;
+
+		for (i = 0; i < mod_info->vma_count; i++) {
+			mod_info->child_mapped_addr[i] = mmap(NULL, self->page_size * 3, prot,
+					MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+			ASSERT_NE(mod_info->child_mapped_addr[i], MAP_FAILED);
+			/* change protection in adjacent maps to prevent merging */
+			prot ^= PROT_WRITE;
+		}
+		signal_state(mod_info, CHILD_READY);
+		wait_for_state(mod_info, PARENT_READY);
+		while (true) {
+			signal_state(mod_info, SETUP_READY);
+			wait_for_state(mod_info, SETUP_MODIFY_MAPS);
+			if (mod_info->exit)
+				break;
+
+			ASSERT_TRUE(mod_info->vma_modify(self));
+			signal_state(mod_info, SETUP_MAPS_MODIFIED);
+			wait_for_state(mod_info, SETUP_RESTORE_MAPS);
+			ASSERT_TRUE(mod_info->vma_restore(self));
+			signal_state(mod_info, SETUP_MAPS_RESTORED);
+
+			wait_for_state(mod_info, TEST_READY);
+			while (mod_info->curr_state != TEST_DONE) {
+				ASSERT_TRUE(mod_info->vma_modify(self));
+				ASSERT_TRUE(mod_info->vma_restore(self));
+			}
+		}
+		for (i = 0; i < mod_info->vma_count; i++)
+			munmap(mod_info->child_mapped_addr[i], self->page_size * 3);
+
+		exit(0);
+	}
+
+	sprintf(fname, "/proc/%d/maps", self->pid);
+	self->maps_fd = open(fname, O_RDONLY);
+	ASSERT_NE(self->maps_fd, -1);
+
+	/* Wait for the child to map the VMAs */
+	wait_for_state(mod_info, CHILD_READY);
+
+	/* Read first two pages */
+	self->page1.data = malloc(self->page_size);
+	ASSERT_NE(self->page1.data, NULL);
+	self->page2.data = malloc(self->page_size);
+	ASSERT_NE(self->page2.data, NULL);
+
+	ASSERT_TRUE(read_boundary_lines(self, &self->last_line, &self->first_line));
+
+	/*
+	 * Find the addresses corresponding to the last line in the first page
+	 * and the first line in the last page.
+	 */
+	mod_info->addr = NULL;
+	mod_info->next_addr = NULL;
+	for (int i = 0; i < mod_info->vma_count; i++) {
+		if (mod_info->child_mapped_addr[i] == (void *)self->last_line.start_addr) {
+			mod_info->addr = mod_info->child_mapped_addr[i];
+			mod_info->prot = PROT_READ;
+			/* Even VMAs have write permission */
+			if ((i % 2) == 0)
+				mod_info->prot |= PROT_WRITE;
+		} else if (mod_info->child_mapped_addr[i] == (void *)self->first_line.start_addr) {
+			mod_info->next_addr = mod_info->child_mapped_addr[i];
+		}
+
+		if (mod_info->addr && mod_info->next_addr)
+			break;
+	}
+	ASSERT_TRUE(mod_info->addr && mod_info->next_addr);
+
+	signal_state(mod_info, PARENT_READY);
+
+}
+
+FIXTURE_TEARDOWN(proc_maps_race)
+{
+	int status;
+
+	stop_vma_modifier(self->mod_info);
+
+	free(self->page2.data);
+	free(self->page1.data);
+
+	for (int i = 0; i < self->vma_count; i++)
+		munmap(self->mod_info->child_mapped_addr[i], self->page_size);
+	close(self->maps_fd);
+	waitpid(self->pid, &status, 0);
+	munmap(self->mod_info, self->shared_mem_size);
+}
+
+TEST_F(proc_maps_race, test_maps_tearing_from_split)
+{
+	struct vma_modifier_info *mod_info = self->mod_info;
+
+	struct line_content split_last_line;
+	struct line_content split_first_line;
+	struct line_content restored_last_line;
+	struct line_content restored_first_line;
+
+	wait_for_state(mod_info, SETUP_READY);
+
+	/* re-read the file to avoid using stale data from previous test */
+	ASSERT_TRUE(read_boundary_lines(self, &self->last_line, &self->first_line));
+
+	mod_info->vma_modify = split_vma;
+	mod_info->vma_restore = merge_vma;
+	mod_info->vma_mod_check = check_split_result;
+
+	report_test_start("Tearing from split", self->verbose);
+	ASSERT_TRUE(capture_mod_pattern(self, &split_last_line, &split_first_line,
+					&restored_last_line, &restored_first_line));
+
+	/* Now start concurrent modifications for self->duration_sec */
+	signal_state(mod_info, TEST_READY);
+
+	struct line_content new_last_line;
+	struct line_content new_first_line;
+	struct timespec start_ts, end_ts;
+
+	clock_gettime(CLOCK_MONOTONIC_COARSE, &start_ts);
+	start_test_loop(&start_ts, self->verbose);
+	do {
+		bool last_line_changed;
+		bool first_line_changed;
+
+		ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line));
+
+		/* Check if we read vmas after split */
+		if (!strcmp(new_last_line.text, split_last_line.text)) {
+			/*
+			 * The vmas should be consistent with split results,
+			 * however if vma was concurrently restored after a
+			 * split, it can be reported twice (first the original
+			 * split one, then the same vma but extended after the
+			 * merge) because we found it as the next vma again.
+			 * In that case new first line will be the same as the
+			 * last restored line.
+			 */
+			ASSERT_FALSE(print_boundaries_on(
+					strcmp(new_first_line.text, split_first_line.text) &&
+					strcmp(new_first_line.text, restored_last_line.text),
+					"Split result invalid", self));
+		} else {
+			/* The vmas should be consistent with merge results */
+			ASSERT_FALSE(print_boundaries_on(
+					strcmp(new_last_line.text, restored_last_line.text),
+					"Merge result invalid", self));
+			ASSERT_FALSE(print_boundaries_on(
+					strcmp(new_first_line.text, restored_first_line.text),
+					"Merge result invalid", self));
+		}
+		/*
+		 * First and last lines should change in unison. If the last
+		 * line changed then the first line should change as well and
+		 * vice versa.
+		 */
+		last_line_changed = strcmp(new_last_line.text, self->last_line.text) != 0;
+		first_line_changed = strcmp(new_first_line.text, self->first_line.text) != 0;
+		ASSERT_EQ(last_line_changed, first_line_changed);
+
+		clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts);
+		end_test_iteration(&end_ts, self->verbose);
+	} while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec);
+	end_test_loop(self->verbose);
+
+	/* Signal the modifyer thread to stop and wait until it exits */
+	signal_state(mod_info, TEST_DONE);
+}
+
+TEST_F(proc_maps_race, test_maps_tearing_from_resize)
+{
+	struct vma_modifier_info *mod_info = self->mod_info;
+
+	struct line_content shrunk_last_line;
+	struct line_content shrunk_first_line;
+	struct line_content restored_last_line;
+	struct line_content restored_first_line;
+
+	wait_for_state(mod_info, SETUP_READY);
+
+	/* re-read the file to avoid using stale data from previous test */
+	ASSERT_TRUE(read_boundary_lines(self, &self->last_line, &self->first_line));
+
+	mod_info->vma_modify = shrink_vma;
+	mod_info->vma_restore = expand_vma;
+	mod_info->vma_mod_check = check_shrink_result;
+
+	report_test_start("Tearing from resize", self->verbose);
+	ASSERT_TRUE(capture_mod_pattern(self, &shrunk_last_line, &shrunk_first_line,
+					&restored_last_line, &restored_first_line));
+
+	/* Now start concurrent modifications for self->duration_sec */
+	signal_state(mod_info, TEST_READY);
+
+	struct line_content new_last_line;
+	struct line_content new_first_line;
+	struct timespec start_ts, end_ts;
+
+	clock_gettime(CLOCK_MONOTONIC_COARSE, &start_ts);
+	start_test_loop(&start_ts, self->verbose);
+	do {
+		ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line));
+
+		/* Check if we read vmas after shrinking it */
+		if (!strcmp(new_last_line.text, shrunk_last_line.text)) {
+			/*
+			 * The vmas should be consistent with shrunk results,
+			 * however if the vma was concurrently restored, it
+			 * can be reported twice (first as shrunk one, then
+			 * as restored one) because we found it as the next vma
+			 * again. In that case new first line will be the same
+			 * as the last restored line.
+			 */
+			ASSERT_FALSE(print_boundaries_on(
+					strcmp(new_first_line.text, shrunk_first_line.text) &&
+					strcmp(new_first_line.text, restored_last_line.text),
+					"Shrink result invalid", self));
+		} else {
+			/* The vmas should be consistent with the original/resored state */
+			ASSERT_FALSE(print_boundaries_on(
+					strcmp(new_last_line.text, restored_last_line.text),
+					"Expand result invalid", self));
+			ASSERT_FALSE(print_boundaries_on(
+					strcmp(new_first_line.text, restored_first_line.text),
+					"Expand result invalid", self));
+		}
+
+		clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts);
+		end_test_iteration(&end_ts, self->verbose);
+	} while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec);
+	end_test_loop(self->verbose);
+
+	/* Signal the modifyer thread to stop and wait until it exits */
+	signal_state(mod_info, TEST_DONE);
+}
+
+TEST_F(proc_maps_race, test_maps_tearing_from_remap)
+{
+	struct vma_modifier_info *mod_info = self->mod_info;
+
+	struct line_content remapped_last_line;
+	struct line_content remapped_first_line;
+	struct line_content restored_last_line;
+	struct line_content restored_first_line;
+
+	wait_for_state(mod_info, SETUP_READY);
+
+	/* re-read the file to avoid using stale data from previous test */
+	ASSERT_TRUE(read_boundary_lines(self, &self->last_line, &self->first_line));
+
+	mod_info->vma_modify = remap_vma;
+	mod_info->vma_restore = patch_vma;
+	mod_info->vma_mod_check = check_remap_result;
+
+	report_test_start("Tearing from remap", self->verbose);
+	ASSERT_TRUE(capture_mod_pattern(self, &remapped_last_line, &remapped_first_line,
+					&restored_last_line, &restored_first_line));
+
+	/* Now start concurrent modifications for self->duration_sec */
+	signal_state(mod_info, TEST_READY);
+
+	struct line_content new_last_line;
+	struct line_content new_first_line;
+	struct timespec start_ts, end_ts;
+
+	clock_gettime(CLOCK_MONOTONIC_COARSE, &start_ts);
+	start_test_loop(&start_ts, self->verbose);
+	do {
+		ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line));
+
+		/* Check if we read vmas after remapping it */
+		if (!strcmp(new_last_line.text, remapped_last_line.text)) {
+			/*
+			 * The vmas should be consistent with remap results,
+			 * however if the vma was concurrently restored, it
+			 * can be reported twice (first as split one, then
+			 * as restored one) because we found it as the next vma
+			 * again. In that case new first line will be the same
+			 * as the last restored line.
+			 */
+			ASSERT_FALSE(print_boundaries_on(
+					strcmp(new_first_line.text, remapped_first_line.text) &&
+					strcmp(new_first_line.text, restored_last_line.text),
+					"Remap result invalid", self));
+		} else {
+			/* The vmas should be consistent with the original/resored state */
+			ASSERT_FALSE(print_boundaries_on(
+					strcmp(new_last_line.text, restored_last_line.text),
+					"Remap restore result invalid", self));
+			ASSERT_FALSE(print_boundaries_on(
+					strcmp(new_first_line.text, restored_first_line.text),
+					"Remap restore result invalid", self));
+		}
+
+		clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts);
+		end_test_iteration(&end_ts, self->verbose);
+	} while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec);
+	end_test_loop(self->verbose);
+
+	/* Signal the modifyer thread to stop and wait until it exits */
+	signal_state(mod_info, TEST_DONE);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/sched_ext/maximal.bpf.c b/tools/testing/selftests/sched_ext/maximal.bpf.c
index 430f5e13bf55..01cf4f3da4e0 100644
--- a/tools/testing/selftests/sched_ext/maximal.bpf.c
+++ b/tools/testing/selftests/sched_ext/maximal.bpf.c
@@ -123,6 +123,10 @@ void BPF_STRUCT_OPS(maximal_cgroup_cancel_move, struct task_struct *p,
 void BPF_STRUCT_OPS(maximal_cgroup_set_weight, struct cgroup *cgrp, u32 weight)
 {}
 
+void BPF_STRUCT_OPS(maximal_cgroup_set_bandwidth, struct cgroup *cgrp,
+		    u64 period_us, u64 quota_us, u64 burst_us)
+{}
+
 s32 BPF_STRUCT_OPS_SLEEPABLE(maximal_init)
 {
 	return scx_bpf_create_dsq(DSQ_ID, -1);
@@ -160,6 +164,7 @@ struct sched_ext_ops maximal_ops = {
 	.cgroup_move		= (void *) maximal_cgroup_move,
 	.cgroup_cancel_move	= (void *) maximal_cgroup_cancel_move,
 	.cgroup_set_weight	= (void *) maximal_cgroup_set_weight,
+	.cgroup_set_bandwidth	= (void *) maximal_cgroup_set_bandwidth,
 	.init			= (void *) maximal_init,
 	.exit			= (void *) maximal_exit,
 	.name			= "maximal",
diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c
index 2be7597a2ac2..656e1c75b711 100644
--- a/tools/testing/vma/vma.c
+++ b/tools/testing/vma/vma.c
@@ -65,7 +65,7 @@ static struct vm_area_struct *alloc_vma(struct mm_struct *mm,
 					unsigned long start,
 					unsigned long end,
 					pgoff_t pgoff,
-					vm_flags_t flags)
+					vm_flags_t vm_flags)
 {
 	struct vm_area_struct *ret = vm_area_alloc(mm);
 
@@ -75,7 +75,7 @@ static struct vm_area_struct *alloc_vma(struct mm_struct *mm,
 	ret->vm_start = start;
 	ret->vm_end = end;
 	ret->vm_pgoff = pgoff;
-	ret->__vm_flags = flags;
+	ret->__vm_flags = vm_flags;
 	vma_assert_detached(ret);
 
 	return ret;
@@ -103,9 +103,9 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
 						 unsigned long start,
 						 unsigned long end,
 						 pgoff_t pgoff,
-						 vm_flags_t flags)
+						 vm_flags_t vm_flags)
 {
-	struct vm_area_struct *vma = alloc_vma(mm, start, end, pgoff, flags);
+	struct vm_area_struct *vma = alloc_vma(mm, start, end, pgoff, vm_flags);
 
 	if (vma == NULL)
 		return NULL;
@@ -172,7 +172,7 @@ static int expand_existing(struct vma_merge_struct *vmg)
  * specified new range.
  */
 static void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start,
-			  unsigned long end, pgoff_t pgoff, vm_flags_t flags)
+			  unsigned long end, pgoff_t pgoff, vm_flags_t vm_flags)
 {
 	vma_iter_set(vmg->vmi, start);
 
@@ -184,7 +184,7 @@ static void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start,
 	vmg->start = start;
 	vmg->end = end;
 	vmg->pgoff = pgoff;
-	vmg->flags = flags;
+	vmg->vm_flags = vm_flags;
 
 	vmg->just_expand = false;
 	vmg->__remove_middle = false;
@@ -195,10 +195,10 @@ static void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start,
 
 /* Helper function to set both the VMG range and its anon_vma. */
 static void vmg_set_range_anon_vma(struct vma_merge_struct *vmg, unsigned long start,
-				   unsigned long end, pgoff_t pgoff, vm_flags_t flags,
+				   unsigned long end, pgoff_t pgoff, vm_flags_t vm_flags,
 				   struct anon_vma *anon_vma)
 {
-	vmg_set_range(vmg, start, end, pgoff, flags);
+	vmg_set_range(vmg, start, end, pgoff, vm_flags);
 	vmg->anon_vma = anon_vma;
 }
 
@@ -211,12 +211,12 @@ static void vmg_set_range_anon_vma(struct vma_merge_struct *vmg, unsigned long s
 static struct vm_area_struct *try_merge_new_vma(struct mm_struct *mm,
 						struct vma_merge_struct *vmg,
 						unsigned long start, unsigned long end,
-						pgoff_t pgoff, vm_flags_t flags,
+						pgoff_t pgoff, vm_flags_t vm_flags,
 						bool *was_merged)
 {
 	struct vm_area_struct *merged;
 
-	vmg_set_range(vmg, start, end, pgoff, flags);
+	vmg_set_range(vmg, start, end, pgoff, vm_flags);
 
 	merged = merge_new(vmg);
 	if (merged) {
@@ -229,7 +229,7 @@ static struct vm_area_struct *try_merge_new_vma(struct mm_struct *mm,
 
 	ASSERT_EQ(vmg->state, VMA_MERGE_NOMERGE);
 
-	return alloc_and_link_vma(mm, start, end, pgoff, flags);
+	return alloc_and_link_vma(mm, start, end, pgoff, vm_flags);
 }
 
 /*
@@ -301,17 +301,17 @@ static void vma_set_dummy_anon_vma(struct vm_area_struct *vma,
 static bool test_simple_merge(void)
 {
 	struct vm_area_struct *vma;
-	unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
+	vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
 	struct mm_struct mm = {};
-	struct vm_area_struct *vma_left = alloc_vma(&mm, 0, 0x1000, 0, flags);
-	struct vm_area_struct *vma_right = alloc_vma(&mm, 0x2000, 0x3000, 2, flags);
+	struct vm_area_struct *vma_left = alloc_vma(&mm, 0, 0x1000, 0, vm_flags);
+	struct vm_area_struct *vma_right = alloc_vma(&mm, 0x2000, 0x3000, 2, vm_flags);
 	VMA_ITERATOR(vmi, &mm, 0x1000);
 	struct vma_merge_struct vmg = {
 		.mm = &mm,
 		.vmi = &vmi,
 		.start = 0x1000,
 		.end = 0x2000,
-		.flags = flags,
+		.vm_flags = vm_flags,
 		.pgoff = 1,
 	};
 
@@ -324,7 +324,7 @@ static bool test_simple_merge(void)
 	ASSERT_EQ(vma->vm_start, 0);
 	ASSERT_EQ(vma->vm_end, 0x3000);
 	ASSERT_EQ(vma->vm_pgoff, 0);
-	ASSERT_EQ(vma->vm_flags, flags);
+	ASSERT_EQ(vma->vm_flags, vm_flags);
 
 	detach_free_vma(vma);
 	mtree_destroy(&mm.mm_mt);
@@ -335,9 +335,9 @@ static bool test_simple_merge(void)
 static bool test_simple_modify(void)
 {
 	struct vm_area_struct *vma;
-	unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
+	vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
 	struct mm_struct mm = {};
-	struct vm_area_struct *init_vma = alloc_vma(&mm, 0, 0x3000, 0, flags);
+	struct vm_area_struct *init_vma = alloc_vma(&mm, 0, 0x3000, 0, vm_flags);
 	VMA_ITERATOR(vmi, &mm, 0x1000);
 
 	ASSERT_FALSE(attach_vma(&mm, init_vma));
@@ -394,13 +394,13 @@ static bool test_simple_modify(void)
 
 static bool test_simple_expand(void)
 {
-	unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
+	vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
 	struct mm_struct mm = {};
-	struct vm_area_struct *vma = alloc_vma(&mm, 0, 0x1000, 0, flags);
+	struct vm_area_struct *vma = alloc_vma(&mm, 0, 0x1000, 0, vm_flags);
 	VMA_ITERATOR(vmi, &mm, 0);
 	struct vma_merge_struct vmg = {
 		.vmi = &vmi,
-		.middle = vma,
+		.target = vma,
 		.start = 0,
 		.end = 0x3000,
 		.pgoff = 0,
@@ -422,9 +422,9 @@ static bool test_simple_expand(void)
 
 static bool test_simple_shrink(void)
 {
-	unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
+	vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
 	struct mm_struct mm = {};
-	struct vm_area_struct *vma = alloc_vma(&mm, 0, 0x3000, 0, flags);
+	struct vm_area_struct *vma = alloc_vma(&mm, 0, 0x3000, 0, vm_flags);
 	VMA_ITERATOR(vmi, &mm, 0);
 
 	ASSERT_FALSE(attach_vma(&mm, vma));
@@ -443,7 +443,7 @@ static bool test_simple_shrink(void)
 
 static bool test_merge_new(void)
 {
-	unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
+	vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
 	struct mm_struct mm = {};
 	VMA_ITERATOR(vmi, &mm, 0);
 	struct vma_merge_struct vmg = {
@@ -473,18 +473,18 @@ static bool test_merge_new(void)
 	 * 0123456789abc
 	 * AA B       CC
 	 */
-	vma_a = alloc_and_link_vma(&mm, 0, 0x2000, 0, flags);
+	vma_a = alloc_and_link_vma(&mm, 0, 0x2000, 0, vm_flags);
 	ASSERT_NE(vma_a, NULL);
 	/* We give each VMA a single avc so we can test anon_vma duplication. */
 	INIT_LIST_HEAD(&vma_a->anon_vma_chain);
 	list_add(&dummy_anon_vma_chain_a.same_vma, &vma_a->anon_vma_chain);
 
-	vma_b = alloc_and_link_vma(&mm, 0x3000, 0x4000, 3, flags);
+	vma_b = alloc_and_link_vma(&mm, 0x3000, 0x4000, 3, vm_flags);
 	ASSERT_NE(vma_b, NULL);
 	INIT_LIST_HEAD(&vma_b->anon_vma_chain);
 	list_add(&dummy_anon_vma_chain_b.same_vma, &vma_b->anon_vma_chain);
 
-	vma_c = alloc_and_link_vma(&mm, 0xb000, 0xc000, 0xb, flags);
+	vma_c = alloc_and_link_vma(&mm, 0xb000, 0xc000, 0xb, vm_flags);
 	ASSERT_NE(vma_c, NULL);
 	INIT_LIST_HEAD(&vma_c->anon_vma_chain);
 	list_add(&dummy_anon_vma_chain_c.same_vma, &vma_c->anon_vma_chain);
@@ -495,7 +495,7 @@ static bool test_merge_new(void)
 	 * 0123456789abc
 	 * AA B   **  CC
 	 */
-	vma_d = try_merge_new_vma(&mm, &vmg, 0x7000, 0x9000, 7, flags, &merged);
+	vma_d = try_merge_new_vma(&mm, &vmg, 0x7000, 0x9000, 7, vm_flags, &merged);
 	ASSERT_NE(vma_d, NULL);
 	INIT_LIST_HEAD(&vma_d->anon_vma_chain);
 	list_add(&dummy_anon_vma_chain_d.same_vma, &vma_d->anon_vma_chain);
@@ -510,7 +510,7 @@ static bool test_merge_new(void)
 	 */
 	vma_a->vm_ops = &vm_ops; /* This should have no impact. */
 	vma_b->anon_vma = &dummy_anon_vma;
-	vma = try_merge_new_vma(&mm, &vmg, 0x2000, 0x3000, 2, flags, &merged);
+	vma = try_merge_new_vma(&mm, &vmg, 0x2000, 0x3000, 2, vm_flags, &merged);
 	ASSERT_EQ(vma, vma_a);
 	/* Merge with A, delete B. */
 	ASSERT_TRUE(merged);
@@ -527,7 +527,7 @@ static bool test_merge_new(void)
 	 * 0123456789abc
 	 * AAAA*  DD  CC
 	 */
-	vma = try_merge_new_vma(&mm, &vmg, 0x4000, 0x5000, 4, flags, &merged);
+	vma = try_merge_new_vma(&mm, &vmg, 0x4000, 0x5000, 4, vm_flags, &merged);
 	ASSERT_EQ(vma, vma_a);
 	/* Extend A. */
 	ASSERT_TRUE(merged);
@@ -546,7 +546,7 @@ static bool test_merge_new(void)
 	 */
 	vma_d->anon_vma = &dummy_anon_vma;
 	vma_d->vm_ops = &vm_ops; /* This should have no impact. */
-	vma = try_merge_new_vma(&mm, &vmg, 0x6000, 0x7000, 6, flags, &merged);
+	vma = try_merge_new_vma(&mm, &vmg, 0x6000, 0x7000, 6, vm_flags, &merged);
 	ASSERT_EQ(vma, vma_d);
 	/* Prepend. */
 	ASSERT_TRUE(merged);
@@ -564,7 +564,7 @@ static bool test_merge_new(void)
 	 * AAAAA*DDD  CC
 	 */
 	vma_d->vm_ops = NULL; /* This would otherwise degrade the merge. */
-	vma = try_merge_new_vma(&mm, &vmg, 0x5000, 0x6000, 5, flags, &merged);
+	vma = try_merge_new_vma(&mm, &vmg, 0x5000, 0x6000, 5, vm_flags, &merged);
 	ASSERT_EQ(vma, vma_a);
 	/* Merge with A, delete D. */
 	ASSERT_TRUE(merged);
@@ -582,7 +582,7 @@ static bool test_merge_new(void)
 	 * AAAAAAAAA *CC
 	 */
 	vma_c->anon_vma = &dummy_anon_vma;
-	vma = try_merge_new_vma(&mm, &vmg, 0xa000, 0xb000, 0xa, flags, &merged);
+	vma = try_merge_new_vma(&mm, &vmg, 0xa000, 0xb000, 0xa, vm_flags, &merged);
 	ASSERT_EQ(vma, vma_c);
 	/* Prepend C. */
 	ASSERT_TRUE(merged);
@@ -599,7 +599,7 @@ static bool test_merge_new(void)
 	 * 0123456789abc
 	 * AAAAAAAAA*CCC
 	 */
-	vma = try_merge_new_vma(&mm, &vmg, 0x9000, 0xa000, 0x9, flags, &merged);
+	vma = try_merge_new_vma(&mm, &vmg, 0x9000, 0xa000, 0x9, vm_flags, &merged);
 	ASSERT_EQ(vma, vma_a);
 	/* Extend A and delete C. */
 	ASSERT_TRUE(merged);
@@ -639,7 +639,7 @@ static bool test_merge_new(void)
 
 static bool test_vma_merge_special_flags(void)
 {
-	unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
+	vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
 	struct mm_struct mm = {};
 	VMA_ITERATOR(vmi, &mm, 0);
 	struct vma_merge_struct vmg = {
@@ -661,7 +661,7 @@ static bool test_vma_merge_special_flags(void)
 	 * 01234
 	 * AAA
 	 */
-	vma_left = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
+	vma_left = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
 	ASSERT_NE(vma_left, NULL);
 
 	/* 1. Set up new VMA with special flag that would otherwise merge. */
@@ -672,12 +672,12 @@ static bool test_vma_merge_special_flags(void)
 	 *
 	 * This should merge if not for the VM_SPECIAL flag.
 	 */
-	vmg_set_range(&vmg, 0x3000, 0x4000, 3, flags);
+	vmg_set_range(&vmg, 0x3000, 0x4000, 3, vm_flags);
 	for (i = 0; i < ARRAY_SIZE(special_flags); i++) {
 		vm_flags_t special_flag = special_flags[i];
 
-		vma_left->__vm_flags = flags | special_flag;
-		vmg.flags = flags | special_flag;
+		vma_left->__vm_flags = vm_flags | special_flag;
+		vmg.vm_flags = vm_flags | special_flag;
 		vma = merge_new(&vmg);
 		ASSERT_EQ(vma, NULL);
 		ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
@@ -691,15 +691,15 @@ static bool test_vma_merge_special_flags(void)
 	 *
 	 * Create a VMA to modify.
 	 */
-	vma = alloc_and_link_vma(&mm, 0x3000, 0x4000, 3, flags);
+	vma = alloc_and_link_vma(&mm, 0x3000, 0x4000, 3, vm_flags);
 	ASSERT_NE(vma, NULL);
 	vmg.middle = vma;
 
 	for (i = 0; i < ARRAY_SIZE(special_flags); i++) {
 		vm_flags_t special_flag = special_flags[i];
 
-		vma_left->__vm_flags = flags | special_flag;
-		vmg.flags = flags | special_flag;
+		vma_left->__vm_flags = vm_flags | special_flag;
+		vmg.vm_flags = vm_flags | special_flag;
 		vma = merge_existing(&vmg);
 		ASSERT_EQ(vma, NULL);
 		ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
@@ -711,7 +711,7 @@ static bool test_vma_merge_special_flags(void)
 
 static bool test_vma_merge_with_close(void)
 {
-	unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
+	vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
 	struct mm_struct mm = {};
 	VMA_ITERATOR(vmi, &mm, 0);
 	struct vma_merge_struct vmg = {
@@ -791,11 +791,11 @@ static bool test_vma_merge_with_close(void)
 	 * PPPPPPNNN
 	 */
 
-	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
-	vma_next = alloc_and_link_vma(&mm, 0x5000, 0x9000, 5, flags);
+	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
+	vma_next = alloc_and_link_vma(&mm, 0x5000, 0x9000, 5, vm_flags);
 	vma_next->vm_ops = &vm_ops;
 
-	vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
+	vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags);
 	ASSERT_EQ(merge_new(&vmg), vma_prev);
 	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
 	ASSERT_EQ(vma_prev->vm_start, 0);
@@ -816,11 +816,11 @@ static bool test_vma_merge_with_close(void)
 	 * proceed.
 	 */
 
-	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
-	vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags);
+	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
+	vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags);
 	vma->vm_ops = &vm_ops;
 
-	vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
+	vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags);
 	vmg.prev = vma_prev;
 	vmg.middle = vma;
 
@@ -844,11 +844,11 @@ static bool test_vma_merge_with_close(void)
 	 * proceed.
 	 */
 
-	vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags);
-	vma_next = alloc_and_link_vma(&mm, 0x5000, 0x9000, 5, flags);
+	vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags);
+	vma_next = alloc_and_link_vma(&mm, 0x5000, 0x9000, 5, vm_flags);
 	vma->vm_ops = &vm_ops;
 
-	vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
+	vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags);
 	vmg.middle = vma;
 	ASSERT_EQ(merge_existing(&vmg), NULL);
 	/*
@@ -872,12 +872,12 @@ static bool test_vma_merge_with_close(void)
 	 * PPPVVNNNN
 	 */
 
-	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
-	vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags);
-	vma_next = alloc_and_link_vma(&mm, 0x5000, 0x9000, 5, flags);
+	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
+	vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags);
+	vma_next = alloc_and_link_vma(&mm, 0x5000, 0x9000, 5, vm_flags);
 	vma->vm_ops = &vm_ops;
 
-	vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
+	vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags);
 	vmg.prev = vma_prev;
 	vmg.middle = vma;
 
@@ -898,12 +898,12 @@ static bool test_vma_merge_with_close(void)
 	 * PPPPPNNNN
 	 */
 
-	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
-	vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags);
-	vma_next = alloc_and_link_vma(&mm, 0x5000, 0x9000, 5, flags);
+	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
+	vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags);
+	vma_next = alloc_and_link_vma(&mm, 0x5000, 0x9000, 5, vm_flags);
 	vma_next->vm_ops = &vm_ops;
 
-	vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
+	vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags);
 	vmg.prev = vma_prev;
 	vmg.middle = vma;
 
@@ -920,15 +920,15 @@ static bool test_vma_merge_with_close(void)
 
 static bool test_vma_merge_new_with_close(void)
 {
-	unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
+	vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
 	struct mm_struct mm = {};
 	VMA_ITERATOR(vmi, &mm, 0);
 	struct vma_merge_struct vmg = {
 		.mm = &mm,
 		.vmi = &vmi,
 	};
-	struct vm_area_struct *vma_prev = alloc_and_link_vma(&mm, 0, 0x2000, 0, flags);
-	struct vm_area_struct *vma_next = alloc_and_link_vma(&mm, 0x5000, 0x7000, 5, flags);
+	struct vm_area_struct *vma_prev = alloc_and_link_vma(&mm, 0, 0x2000, 0, vm_flags);
+	struct vm_area_struct *vma_next = alloc_and_link_vma(&mm, 0x5000, 0x7000, 5, vm_flags);
 	const struct vm_operations_struct vm_ops = {
 		.close = dummy_close,
 	};
@@ -958,7 +958,7 @@ static bool test_vma_merge_new_with_close(void)
 	vma_prev->vm_ops = &vm_ops;
 	vma_next->vm_ops = &vm_ops;
 
-	vmg_set_range(&vmg, 0x2000, 0x5000, 2, flags);
+	vmg_set_range(&vmg, 0x2000, 0x5000, 2, vm_flags);
 	vma = merge_new(&vmg);
 	ASSERT_NE(vma, NULL);
 	ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
@@ -975,7 +975,7 @@ static bool test_vma_merge_new_with_close(void)
 
 static bool test_merge_existing(void)
 {
-	unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
+	vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
 	struct mm_struct mm = {};
 	VMA_ITERATOR(vmi, &mm, 0);
 	struct vm_area_struct *vma, *vma_prev, *vma_next;
@@ -998,11 +998,11 @@ static bool test_merge_existing(void)
 	 * 0123456789
 	 *   VNNNNNN
 	 */
-	vma = alloc_and_link_vma(&mm, 0x2000, 0x6000, 2, flags);
+	vma = alloc_and_link_vma(&mm, 0x2000, 0x6000, 2, vm_flags);
 	vma->vm_ops = &vm_ops; /* This should have no impact. */
-	vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, flags);
+	vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, vm_flags);
 	vma_next->vm_ops = &vm_ops; /* This should have no impact. */
-	vmg_set_range_anon_vma(&vmg, 0x3000, 0x6000, 3, flags, &dummy_anon_vma);
+	vmg_set_range_anon_vma(&vmg, 0x3000, 0x6000, 3, vm_flags, &dummy_anon_vma);
 	vmg.middle = vma;
 	vmg.prev = vma;
 	vma_set_dummy_anon_vma(vma, &avc);
@@ -1032,10 +1032,10 @@ static bool test_merge_existing(void)
 	 * 0123456789
 	 *   NNNNNNN
 	 */
-	vma = alloc_and_link_vma(&mm, 0x2000, 0x6000, 2, flags);
-	vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, flags);
+	vma = alloc_and_link_vma(&mm, 0x2000, 0x6000, 2, vm_flags);
+	vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, vm_flags);
 	vma_next->vm_ops = &vm_ops; /* This should have no impact. */
-	vmg_set_range_anon_vma(&vmg, 0x2000, 0x6000, 2, flags, &dummy_anon_vma);
+	vmg_set_range_anon_vma(&vmg, 0x2000, 0x6000, 2, vm_flags, &dummy_anon_vma);
 	vmg.middle = vma;
 	vma_set_dummy_anon_vma(vma, &avc);
 	ASSERT_EQ(merge_existing(&vmg), vma_next);
@@ -1060,11 +1060,11 @@ static bool test_merge_existing(void)
 	 * 0123456789
 	 * PPPPPPV
 	 */
-	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
+	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
 	vma_prev->vm_ops = &vm_ops; /* This should have no impact. */
-	vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags);
+	vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, vm_flags);
 	vma->vm_ops = &vm_ops; /* This should have no impact. */
-	vmg_set_range_anon_vma(&vmg, 0x3000, 0x6000, 3, flags, &dummy_anon_vma);
+	vmg_set_range_anon_vma(&vmg, 0x3000, 0x6000, 3, vm_flags, &dummy_anon_vma);
 	vmg.prev = vma_prev;
 	vmg.middle = vma;
 	vma_set_dummy_anon_vma(vma, &avc);
@@ -1094,10 +1094,10 @@ static bool test_merge_existing(void)
 	 * 0123456789
 	 * PPPPPPP
 	 */
-	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
+	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
 	vma_prev->vm_ops = &vm_ops; /* This should have no impact. */
-	vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags);
-	vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, flags, &dummy_anon_vma);
+	vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, vm_flags);
+	vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, vm_flags, &dummy_anon_vma);
 	vmg.prev = vma_prev;
 	vmg.middle = vma;
 	vma_set_dummy_anon_vma(vma, &avc);
@@ -1123,11 +1123,11 @@ static bool test_merge_existing(void)
 	 * 0123456789
 	 * PPPPPPPPPP
 	 */
-	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
+	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
 	vma_prev->vm_ops = &vm_ops; /* This should have no impact. */
-	vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags);
-	vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, flags);
-	vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, flags, &dummy_anon_vma);
+	vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, vm_flags);
+	vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, vm_flags);
+	vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, vm_flags, &dummy_anon_vma);
 	vmg.prev = vma_prev;
 	vmg.middle = vma;
 	vma_set_dummy_anon_vma(vma, &avc);
@@ -1158,41 +1158,41 @@ static bool test_merge_existing(void)
 	 * PPPVVVVVNNN
 	 */
 
-	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
-	vma = alloc_and_link_vma(&mm, 0x3000, 0x8000, 3, flags);
-	vma_next = alloc_and_link_vma(&mm, 0x8000, 0xa000, 8, flags);
+	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
+	vma = alloc_and_link_vma(&mm, 0x3000, 0x8000, 3, vm_flags);
+	vma_next = alloc_and_link_vma(&mm, 0x8000, 0xa000, 8, vm_flags);
 
-	vmg_set_range(&vmg, 0x4000, 0x5000, 4, flags);
+	vmg_set_range(&vmg, 0x4000, 0x5000, 4, vm_flags);
 	vmg.prev = vma;
 	vmg.middle = vma;
 	ASSERT_EQ(merge_existing(&vmg), NULL);
 	ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
 
-	vmg_set_range(&vmg, 0x5000, 0x6000, 5, flags);
+	vmg_set_range(&vmg, 0x5000, 0x6000, 5, vm_flags);
 	vmg.prev = vma;
 	vmg.middle = vma;
 	ASSERT_EQ(merge_existing(&vmg), NULL);
 	ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
 
-	vmg_set_range(&vmg, 0x6000, 0x7000, 6, flags);
+	vmg_set_range(&vmg, 0x6000, 0x7000, 6, vm_flags);
 	vmg.prev = vma;
 	vmg.middle = vma;
 	ASSERT_EQ(merge_existing(&vmg), NULL);
 	ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
 
-	vmg_set_range(&vmg, 0x4000, 0x7000, 4, flags);
+	vmg_set_range(&vmg, 0x4000, 0x7000, 4, vm_flags);
 	vmg.prev = vma;
 	vmg.middle = vma;
 	ASSERT_EQ(merge_existing(&vmg), NULL);
 	ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
 
-	vmg_set_range(&vmg, 0x4000, 0x6000, 4, flags);
+	vmg_set_range(&vmg, 0x4000, 0x6000, 4, vm_flags);
 	vmg.prev = vma;
 	vmg.middle = vma;
 	ASSERT_EQ(merge_existing(&vmg), NULL);
 	ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
 
-	vmg_set_range(&vmg, 0x5000, 0x6000, 5, flags);
+	vmg_set_range(&vmg, 0x5000, 0x6000, 5, vm_flags);
 	vmg.prev = vma;
 	vmg.middle = vma;
 	ASSERT_EQ(merge_existing(&vmg), NULL);
@@ -1205,7 +1205,7 @@ static bool test_merge_existing(void)
 
 static bool test_anon_vma_non_mergeable(void)
 {
-	unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
+	vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
 	struct mm_struct mm = {};
 	VMA_ITERATOR(vmi, &mm, 0);
 	struct vm_area_struct *vma, *vma_prev, *vma_next;
@@ -1229,9 +1229,9 @@ static bool test_anon_vma_non_mergeable(void)
 	 * 0123456789
 	 * PPPPPPPNNN
 	 */
-	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
-	vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags);
-	vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, flags);
+	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
+	vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, vm_flags);
+	vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, vm_flags);
 
 	/*
 	 * Give both prev and next single anon_vma_chain fields, so they will
@@ -1239,7 +1239,7 @@ static bool test_anon_vma_non_mergeable(void)
 	 *
 	 * However, when prev is compared to next, the merge should fail.
 	 */
-	vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, flags, NULL);
+	vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, vm_flags, NULL);
 	vmg.prev = vma_prev;
 	vmg.middle = vma;
 	vma_set_dummy_anon_vma(vma_prev, &dummy_anon_vma_chain_1);
@@ -1267,10 +1267,10 @@ static bool test_anon_vma_non_mergeable(void)
 	 * 0123456789
 	 * PPPPPPPNNN
 	 */
-	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
-	vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, flags);
+	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
+	vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, vm_flags);
 
-	vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, flags, NULL);
+	vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, vm_flags, NULL);
 	vmg.prev = vma_prev;
 	vma_set_dummy_anon_vma(vma_prev, &dummy_anon_vma_chain_1);
 	__vma_set_dummy_anon_vma(vma_next, &dummy_anon_vma_chain_2, &dummy_anon_vma_2);
@@ -1292,7 +1292,7 @@ static bool test_anon_vma_non_mergeable(void)
 
 static bool test_dup_anon_vma(void)
 {
-	unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
+	vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
 	struct mm_struct mm = {};
 	VMA_ITERATOR(vmi, &mm, 0);
 	struct vma_merge_struct vmg = {
@@ -1313,12 +1313,12 @@ static bool test_dup_anon_vma(void)
 	 * This covers new VMA merging, as these operations amount to a VMA
 	 * expand.
 	 */
-	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
-	vma_next = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags);
+	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
+	vma_next = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags);
 	vma_next->anon_vma = &dummy_anon_vma;
 
-	vmg_set_range(&vmg, 0, 0x5000, 0, flags);
-	vmg.middle = vma_prev;
+	vmg_set_range(&vmg, 0, 0x5000, 0, vm_flags);
+	vmg.target = vma_prev;
 	vmg.next = vma_next;
 
 	ASSERT_EQ(expand_existing(&vmg), 0);
@@ -1339,16 +1339,16 @@ static bool test_dup_anon_vma(void)
 	 *  extend   delete  delete
 	 */
 
-	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
-	vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags);
-	vma_next = alloc_and_link_vma(&mm, 0x5000, 0x8000, 5, flags);
+	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
+	vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags);
+	vma_next = alloc_and_link_vma(&mm, 0x5000, 0x8000, 5, vm_flags);
 
 	/* Initialise avc so mergeability check passes. */
 	INIT_LIST_HEAD(&vma_next->anon_vma_chain);
 	list_add(&dummy_anon_vma_chain.same_vma, &vma_next->anon_vma_chain);
 
 	vma_next->anon_vma = &dummy_anon_vma;
-	vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
+	vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags);
 	vmg.prev = vma_prev;
 	vmg.middle = vma;
 
@@ -1372,12 +1372,12 @@ static bool test_dup_anon_vma(void)
 	 *  extend   delete  delete
 	 */
 
-	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
-	vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags);
-	vma_next = alloc_and_link_vma(&mm, 0x5000, 0x8000, 5, flags);
+	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
+	vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags);
+	vma_next = alloc_and_link_vma(&mm, 0x5000, 0x8000, 5, vm_flags);
 	vmg.anon_vma = &dummy_anon_vma;
 	vma_set_dummy_anon_vma(vma, &dummy_anon_vma_chain);
-	vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
+	vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags);
 	vmg.prev = vma_prev;
 	vmg.middle = vma;
 
@@ -1401,11 +1401,11 @@ static bool test_dup_anon_vma(void)
 	 *  extend shrink/delete
 	 */
 
-	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
-	vma = alloc_and_link_vma(&mm, 0x3000, 0x8000, 3, flags);
+	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
+	vma = alloc_and_link_vma(&mm, 0x3000, 0x8000, 3, vm_flags);
 
 	vma_set_dummy_anon_vma(vma, &dummy_anon_vma_chain);
-	vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
+	vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags);
 	vmg.prev = vma_prev;
 	vmg.middle = vma;
 
@@ -1429,11 +1429,11 @@ static bool test_dup_anon_vma(void)
 	 * shrink/delete extend
 	 */
 
-	vma = alloc_and_link_vma(&mm, 0, 0x5000, 0, flags);
-	vma_next = alloc_and_link_vma(&mm, 0x5000, 0x8000, 5, flags);
+	vma = alloc_and_link_vma(&mm, 0, 0x5000, 0, vm_flags);
+	vma_next = alloc_and_link_vma(&mm, 0x5000, 0x8000, 5, vm_flags);
 
 	vma_set_dummy_anon_vma(vma, &dummy_anon_vma_chain);
-	vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
+	vmg_set_range(&vmg, 0x3000, 0x5000, 3, vm_flags);
 	vmg.prev = vma;
 	vmg.middle = vma;
 
@@ -1452,7 +1452,7 @@ static bool test_dup_anon_vma(void)
 
 static bool test_vmi_prealloc_fail(void)
 {
-	unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
+	vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
 	struct mm_struct mm = {};
 	VMA_ITERATOR(vmi, &mm, 0);
 	struct vma_merge_struct vmg = {
@@ -1468,11 +1468,11 @@ static bool test_vmi_prealloc_fail(void)
 	 * the duplicated anon_vma is unlinked.
 	 */
 
-	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
-	vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags);
+	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
+	vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags);
 	vma->anon_vma = &dummy_anon_vma;
 
-	vmg_set_range_anon_vma(&vmg, 0x3000, 0x5000, 3, flags, &dummy_anon_vma);
+	vmg_set_range_anon_vma(&vmg, 0x3000, 0x5000, 3, vm_flags, &dummy_anon_vma);
 	vmg.prev = vma_prev;
 	vmg.middle = vma;
 	vma_set_dummy_anon_vma(vma, &avc);
@@ -1496,12 +1496,12 @@ static bool test_vmi_prealloc_fail(void)
 	 * performed in this case too.
 	 */
 
-	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
-	vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags);
+	vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, vm_flags);
+	vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags);
 	vma->anon_vma = &dummy_anon_vma;
 
-	vmg_set_range(&vmg, 0, 0x5000, 3, flags);
-	vmg.middle = vma_prev;
+	vmg_set_range(&vmg, 0, 0x5000, 3, vm_flags);
+	vmg.target = vma_prev;
 	vmg.next = vma;
 
 	fail_prealloc = true;
@@ -1518,13 +1518,13 @@ static bool test_vmi_prealloc_fail(void)
 
 static bool test_merge_extend(void)
 {
-	unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
+	vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
 	struct mm_struct mm = {};
 	VMA_ITERATOR(vmi, &mm, 0x1000);
 	struct vm_area_struct *vma;
 
-	vma = alloc_and_link_vma(&mm, 0, 0x1000, 0, flags);
-	alloc_and_link_vma(&mm, 0x3000, 0x4000, 3, flags);
+	vma = alloc_and_link_vma(&mm, 0, 0x1000, 0, vm_flags);
+	alloc_and_link_vma(&mm, 0x3000, 0x4000, 3, vm_flags);
 
 	/*
 	 * Extend a VMA into the gap between itself and the following VMA.
@@ -1548,7 +1548,7 @@ static bool test_merge_extend(void)
 
 static bool test_copy_vma(void)
 {
-	unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
+	vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
 	struct mm_struct mm = {};
 	bool need_locks = false;
 	VMA_ITERATOR(vmi, &mm, 0);
@@ -1556,7 +1556,7 @@ static bool test_copy_vma(void)
 
 	/* Move backwards and do not merge. */
 
-	vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags);
+	vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags);
 	vma_new = copy_vma(&vma, 0, 0x2000, 0, &need_locks);
 	ASSERT_NE(vma_new, vma);
 	ASSERT_EQ(vma_new->vm_start, 0);
@@ -1568,8 +1568,8 @@ static bool test_copy_vma(void)
 
 	/* Move a VMA into position next to another and merge the two. */
 
-	vma = alloc_and_link_vma(&mm, 0, 0x2000, 0, flags);
-	vma_next = alloc_and_link_vma(&mm, 0x6000, 0x8000, 6, flags);
+	vma = alloc_and_link_vma(&mm, 0, 0x2000, 0, vm_flags);
+	vma_next = alloc_and_link_vma(&mm, 0x6000, 0x8000, 6, vm_flags);
 	vma_new = copy_vma(&vma, 0x4000, 0x2000, 4, &need_locks);
 	vma_assert_attached(vma_new);
 
@@ -1581,11 +1581,11 @@ static bool test_copy_vma(void)
 
 static bool test_expand_only_mode(void)
 {
-	unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
+	vm_flags_t vm_flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE;
 	struct mm_struct mm = {};
 	VMA_ITERATOR(vmi, &mm, 0);
 	struct vm_area_struct *vma_prev, *vma;
-	VMG_STATE(vmg, &mm, &vmi, 0x5000, 0x9000, flags, 5);
+	VMG_STATE(vmg, &mm, &vmi, 0x5000, 0x9000, vm_flags, 5);
 
 	/*
 	 * Place a VMA prior to the one we're expanding so we assert that we do
@@ -1593,14 +1593,14 @@ static bool test_expand_only_mode(void)
 	 * have, through the use of the just_expand flag, indicated we do not
 	 * need to do so.
 	 */
-	alloc_and_link_vma(&mm, 0, 0x2000, 0, flags);
+	alloc_and_link_vma(&mm, 0, 0x2000, 0, vm_flags);
 
 	/*
 	 * We will be positioned at the prev VMA, but looking to expand to
 	 * 0x9000.
 	 */
 	vma_iter_set(&vmi, 0x3000);
-	vma_prev = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags);
+	vma_prev = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, vm_flags);
 	vmg.prev = vma_prev;
 	vmg.just_expand = true;
 
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index 816e7e057585..a838c37f93e5 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -576,7 +576,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 	return __pgprot(pgprot_val(oldprot) | pgprot_val(newprot));
 }
 
-static inline pgprot_t vm_get_page_prot(unsigned long vm_flags)
+static inline pgprot_t vm_get_page_prot(vm_flags_t vm_flags)
 {
 	return __pgprot(vm_flags);
 }
@@ -1084,7 +1084,7 @@ static inline bool mpol_equal(struct mempolicy *, struct mempolicy *)
 }
 
 static inline void khugepaged_enter_vma(struct vm_area_struct *vma,
-			  unsigned long vm_flags)
+			  vm_flags_t vm_flags)
 {
 	(void)vma;
 	(void)vm_flags;
@@ -1200,7 +1200,7 @@ bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
 /* Update vma->vm_page_prot to reflect vma->vm_flags. */
 static inline void vma_set_page_prot(struct vm_area_struct *vma)
 {
-	unsigned long vm_flags = vma->vm_flags;
+	vm_flags_t vm_flags = vma->vm_flags;
 	pgprot_t vm_page_prot;
 
 	/* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */
@@ -1215,7 +1215,7 @@ static inline void vma_set_page_prot(struct vm_area_struct *vma)
 	WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
 }
 
-static inline bool arch_validate_flags(unsigned long)
+static inline bool arch_validate_flags(vm_flags_t)
 {
 	return true;
 }
@@ -1280,12 +1280,12 @@ static inline bool capable(int cap)
 	return true;
 }
 
-static inline bool mlock_future_ok(struct mm_struct *mm, unsigned long flags,
+static inline bool mlock_future_ok(struct mm_struct *mm, vm_flags_t vm_flags,
 			unsigned long bytes)
 {
 	unsigned long locked_pages, limit_pages;
 
-	if (!(flags & VM_LOCKED) || capable(CAP_IPC_LOCK))
+	if (!(vm_flags & VM_LOCKED) || capable(CAP_IPC_LOCK))
 		return true;
 
 	locked_pages = bytes >> PAGE_SHIFT;
@@ -1504,4 +1504,15 @@ static inline void vma_set_file(struct vm_area_struct *vma, struct file *file)
 	fput(file);
 }
 
+static inline bool shmem_file(struct file *)
+{
+	return false;
+}
+
+static inline vm_flags_t ksm_vma_flags(const struct mm_struct *, const struct file *,
+			 vm_flags_t vm_flags)
+{
+	return vm_flags;
+}
+
 #endif	/* __MM_VMA_INTERNAL_H */
diff --git a/tools/verification/dot2/Makefile b/tools/verification/dot2/Makefile
deleted file mode 100644
index 021beb07a521..000000000000
--- a/tools/verification/dot2/Makefile
+++ /dev/null
@@ -1,26 +0,0 @@
-INSTALL=install
-
-prefix  ?= /usr
-bindir  ?= $(prefix)/bin
-mandir  ?= $(prefix)/share/man
-miscdir ?= $(prefix)/share/dot2
-srcdir  ?= $(prefix)/src
-
-PYLIB  ?= $(shell python3 -c 'import sysconfig;  print (sysconfig.get_path("purelib"))')
-
-.PHONY: all
-all:
-
-.PHONY: clean
-clean:
-
-.PHONY: install
-install:
-	$(INSTALL) automata.py -D -m 644 $(DESTDIR)$(PYLIB)/dot2/automata.py
-	$(INSTALL) dot2c.py -D -m 644 $(DESTDIR)$(PYLIB)/dot2/dot2c.py
-	$(INSTALL) dot2c -D -m 755 $(DESTDIR)$(bindir)/
-	$(INSTALL) dot2k.py -D -m 644 $(DESTDIR)$(PYLIB)/dot2/dot2k.py
-	$(INSTALL) dot2k -D -m 755 $(DESTDIR)$(bindir)/
-
-	mkdir -p ${miscdir}/
-	cp -rp dot2k_templates $(DESTDIR)$(miscdir)/
diff --git a/tools/verification/dot2/dot2k b/tools/verification/dot2/dot2k
deleted file mode 100644
index 767064f415e7..000000000000
--- a/tools/verification/dot2/dot2k
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/usr/bin/env python3
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org>
-#
-# dot2k: transform dot files into a monitor for the Linux kernel.
-#
-# For further information, see:
-#   Documentation/trace/rv/da_monitor_synthesis.rst
-
-if __name__ == '__main__':
-    from dot2.dot2k import dot2k
-    import argparse
-    import sys
-
-    def is_container():
-        """Should work even before parsing the arguments"""
-        return "-c" in sys.argv or "--container" in sys.argv
-
-    parser = argparse.ArgumentParser(description='transform .dot file into kernel rv monitor')
-    parser.add_argument('-d', "--dot", dest="dot_file", required=not is_container())
-    parser.add_argument('-t', "--monitor_type", dest="monitor_type", required=not is_container(),
-                        help=f"Available options: {', '.join(dot2k.monitor_types.keys())}")
-    parser.add_argument('-n', "--model_name", dest="model_name", required=is_container())
-    parser.add_argument("-D", "--description", dest="description", required=False)
-    parser.add_argument("-a", "--auto_patch", dest="auto_patch",
-                        action="store_true", required=False,
-                        help="Patch the kernel in place")
-    parser.add_argument("-p", "--parent", dest="parent",
-                        required=False, help="Create a monitor nested to parent")
-    parser.add_argument("-c", "--container", dest="container",
-                        action="store_true", required=False,
-                        help="Create an empty monitor to be used as a container")
-    params = parser.parse_args()
-
-    if not is_container():
-        print("Opening and parsing the dot file %s" % params.dot_file)
-    try:
-        monitor=dot2k(params.dot_file, params.monitor_type, vars(params))
-    except Exception as e:
-        print('Error: '+ str(e))
-        print("Sorry : :-(")
-        sys.exit(1)
-
-    print("Writing the monitor into the directory %s" % monitor.name)
-    monitor.print_files()
-    print("Almost done, checklist")
-    if not is_container():
-        print("  - Edit the %s/%s.c to add the instrumentation" % (monitor.name, monitor.name))
-        print(monitor.fill_tracepoint_tooltip())
-    print(monitor.fill_makefile_tooltip())
-    print(monitor.fill_kconfig_tooltip())
-    print(monitor.fill_monitor_tooltip())
diff --git a/tools/verification/models/rtapp/pagefault.ltl b/tools/verification/models/rtapp/pagefault.ltl
new file mode 100644
index 000000000000..d7ce62102733
--- /dev/null
+++ b/tools/verification/models/rtapp/pagefault.ltl
@@ -0,0 +1 @@
+RULE = always (RT imply not PAGEFAULT)
diff --git a/tools/verification/models/rtapp/sleep.ltl b/tools/verification/models/rtapp/sleep.ltl
new file mode 100644
index 000000000000..6379bbeb6212
--- /dev/null
+++ b/tools/verification/models/rtapp/sleep.ltl
@@ -0,0 +1,22 @@
+RULE = always ((RT and SLEEP) imply (RT_FRIENDLY_SLEEP or ALLOWLIST))
+
+RT_FRIENDLY_SLEEP = (RT_VALID_SLEEP_REASON or KERNEL_THREAD)
+                and ((not WAKE) until RT_FRIENDLY_WAKE)
+
+RT_VALID_SLEEP_REASON = FUTEX_WAIT
+                     or RT_FRIENDLY_NANOSLEEP
+
+RT_FRIENDLY_NANOSLEEP = CLOCK_NANOSLEEP
+                    and NANOSLEEP_TIMER_ABSTIME
+                    and (NANOSLEEP_CLOCK_MONOTONIC or NANOSLEEP_CLOCK_TAI)
+
+RT_FRIENDLY_WAKE = WOKEN_BY_EQUAL_OR_HIGHER_PRIO
+                or WOKEN_BY_HARDIRQ
+                or WOKEN_BY_NMI
+                or ABORT_SLEEP
+                or KTHREAD_SHOULD_STOP
+
+ALLOWLIST = BLOCK_ON_RT_MUTEX
+         or FUTEX_LOCK_PI
+         or TASK_IS_RCU
+         or TASK_IS_MIGRATION
diff --git a/tools/verification/models/sched/nrp.dot b/tools/verification/models/sched/nrp.dot
new file mode 100644
index 000000000000..77bb64669416
--- /dev/null
+++ b/tools/verification/models/sched/nrp.dot
@@ -0,0 +1,29 @@
+digraph state_automaton {
+	center = true;
+	size = "7,11";
+	{node [shape = doublecircle] "any_thread_running"};
+	{node [shape = circle] "any_thread_running"};
+	{node [shape = circle] "nested_preempt"};
+	{node [shape = plaintext, style=invis, label=""] "__init_preempt_irq"};
+	{node [shape = circle] "preempt_irq"};
+	{node [shape = circle] "rescheduling"};
+	"__init_preempt_irq" -> "preempt_irq";
+	"any_thread_running" [label = "any_thread_running", color = green3];
+	"any_thread_running" -> "any_thread_running" [ label = "schedule_entry\nirq_entry" ];
+	"any_thread_running" -> "rescheduling" [ label = "sched_need_resched" ];
+	"nested_preempt" [label = "nested_preempt"];
+	"nested_preempt" -> "any_thread_running" [ label = "schedule_entry_preempt\nschedule_entry" ];
+	"nested_preempt" -> "nested_preempt" [ label = "irq_entry" ];
+	"nested_preempt" -> "preempt_irq" [ label = "sched_need_resched" ];
+	"preempt_irq" [label = "preempt_irq"];
+	"preempt_irq" -> "nested_preempt" [ label = "schedule_entry_preempt\nschedule_entry" ];
+	"preempt_irq" -> "preempt_irq" [ label = "irq_entry\nsched_need_resched" ];
+	"rescheduling" [label = "rescheduling"];
+	"rescheduling" -> "any_thread_running" [ label = "schedule_entry_preempt\nschedule_entry" ];
+	"rescheduling" -> "preempt_irq" [ label = "irq_entry" ];
+	"rescheduling" -> "rescheduling" [ label = "sched_need_resched" ];
+	{ rank = min ;
+		"__init_preempt_irq";
+		"preempt_irq";
+	}
+}
diff --git a/tools/verification/models/sched/opid.dot b/tools/verification/models/sched/opid.dot
new file mode 100644
index 000000000000..840052f6952b
--- /dev/null
+++ b/tools/verification/models/sched/opid.dot
@@ -0,0 +1,35 @@
+digraph state_automaton {
+	center = true;
+	size = "7,11";
+	{node [shape = plaintext, style=invis, label=""] "__init_disabled"};
+	{node [shape = circle] "disabled"};
+	{node [shape = doublecircle] "enabled"};
+	{node [shape = circle] "enabled"};
+	{node [shape = circle] "in_irq"};
+	{node [shape = circle] "irq_disabled"};
+	{node [shape = circle] "preempt_disabled"};
+	"__init_disabled" -> "disabled";
+	"disabled" [label = "disabled"];
+	"disabled" -> "disabled" [ label = "sched_need_resched\nsched_waking\nirq_entry" ];
+	"disabled" -> "irq_disabled" [ label = "preempt_enable" ];
+	"disabled" -> "preempt_disabled" [ label = "irq_enable" ];
+	"enabled" [label = "enabled", color = green3];
+	"enabled" -> "enabled" [ label = "preempt_enable" ];
+	"enabled" -> "irq_disabled" [ label = "irq_disable" ];
+	"enabled" -> "preempt_disabled" [ label = "preempt_disable" ];
+	"in_irq" [label = "in_irq"];
+	"in_irq" -> "enabled" [ label = "irq_enable" ];
+	"in_irq" -> "in_irq" [ label = "sched_need_resched\nsched_waking\nirq_entry" ];
+	"irq_disabled" [label = "irq_disabled"];
+	"irq_disabled" -> "disabled" [ label = "preempt_disable" ];
+	"irq_disabled" -> "enabled" [ label = "irq_enable" ];
+	"irq_disabled" -> "in_irq" [ label = "irq_entry" ];
+	"irq_disabled" -> "irq_disabled" [ label = "sched_need_resched" ];
+	"preempt_disabled" [label = "preempt_disabled"];
+	"preempt_disabled" -> "disabled" [ label = "irq_disable" ];
+	"preempt_disabled" -> "enabled" [ label = "preempt_enable" ];
+	{ rank = min ;
+		"__init_disabled";
+		"disabled";
+	}
+}
diff --git a/tools/verification/models/sched/sncid.dot b/tools/verification/models/sched/sncid.dot
deleted file mode 100644
index 072851721b50..000000000000
--- a/tools/verification/models/sched/sncid.dot
+++ /dev/null
@@ -1,18 +0,0 @@
-digraph state_automaton {
-	center = true;
-	size = "7,11";
-	{node [shape = plaintext, style=invis, label=""] "__init_can_sched"};
-	{node [shape = ellipse] "can_sched"};
-	{node [shape = plaintext] "can_sched"};
-	{node [shape = plaintext] "cant_sched"};
-	"__init_can_sched" -> "can_sched";
-	"can_sched" [label = "can_sched", color = green3];
-	"can_sched" -> "can_sched" [ label = "schedule_entry\nschedule_exit" ];
-	"can_sched" -> "cant_sched" [ label = "irq_disable" ];
-	"cant_sched" [label = "cant_sched"];
-	"cant_sched" -> "can_sched" [ label = "irq_enable" ];
-	{ rank = min ;
-		"__init_can_sched";
-		"can_sched";
-	}
-}
diff --git a/tools/verification/models/sched/sssw.dot b/tools/verification/models/sched/sssw.dot
new file mode 100644
index 000000000000..4994c3e876be
--- /dev/null
+++ b/tools/verification/models/sched/sssw.dot
@@ -0,0 +1,30 @@
+digraph state_automaton {
+	center = true;
+	size = "7,11";
+	{node [shape = plaintext, style=invis, label=""] "__init_runnable"};
+	{node [shape = doublecircle] "runnable"};
+	{node [shape = circle] "runnable"};
+	{node [shape = circle] "signal_wakeup"};
+	{node [shape = circle] "sleepable"};
+	{node [shape = circle] "sleeping"};
+	"__init_runnable" -> "runnable";
+	"runnable" [label = "runnable", color = green3];
+	"runnable" -> "runnable" [ label = "sched_set_state_runnable\nsched_wakeup\nsched_switch_in\nsched_switch_yield\nsched_switch_preempt\nsignal_deliver" ];
+	"runnable" -> "sleepable" [ label = "sched_set_state_sleepable" ];
+	"runnable" -> "sleeping" [ label = "sched_switch_blocking" ];
+	"signal_wakeup" [label = "signal_wakeup"];
+	"signal_wakeup" -> "runnable" [ label = "signal_deliver" ];
+	"signal_wakeup" -> "signal_wakeup" [ label = "sched_switch_in\nsched_switch_preempt\nsched_switch_yield\nsched_wakeup" ];
+	"signal_wakeup" -> "sleepable" [ label = "sched_set_state_sleepable" ];
+	"sleepable" [label = "sleepable"];
+	"sleepable" -> "runnable" [ label = "sched_set_state_runnable\nsched_wakeup" ];
+	"sleepable" -> "signal_wakeup" [ label = "sched_switch_yield" ];
+	"sleepable" -> "sleepable" [ label = "sched_set_state_sleepable\nsched_switch_in\nsched_switch_preempt\nsignal_deliver" ];
+	"sleepable" -> "sleeping" [ label = "sched_switch_suspend\nsched_switch_blocking" ];
+	"sleeping" [label = "sleeping"];
+	"sleeping" -> "runnable" [ label = "sched_wakeup" ];
+	{ rank = min ;
+		"__init_runnable";
+		"runnable";
+	}
+}
diff --git a/tools/verification/models/sched/sts.dot b/tools/verification/models/sched/sts.dot
new file mode 100644
index 000000000000..8f5f38be04d5
--- /dev/null
+++ b/tools/verification/models/sched/sts.dot
@@ -0,0 +1,38 @@
+digraph state_automaton {
+	center = true;
+	size = "7,11";
+	{node [shape = plaintext, style=invis, label=""] "__init_can_sched"};
+	{node [shape = doublecircle] "can_sched"};
+	{node [shape = circle] "can_sched"};
+	{node [shape = circle] "cant_sched"};
+	{node [shape = circle] "disable_to_switch"};
+	{node [shape = circle] "enable_to_exit"};
+	{node [shape = circle] "in_irq"};
+	{node [shape = circle] "scheduling"};
+	{node [shape = circle] "switching"};
+	"__init_can_sched" -> "can_sched";
+	"can_sched" [label = "can_sched", color = green3];
+	"can_sched" -> "cant_sched" [ label = "irq_disable" ];
+	"can_sched" -> "scheduling" [ label = "schedule_entry" ];
+	"cant_sched" [label = "cant_sched"];
+	"cant_sched" -> "can_sched" [ label = "irq_enable" ];
+	"cant_sched" -> "cant_sched" [ label = "irq_entry" ];
+	"disable_to_switch" [label = "disable_to_switch"];
+	"disable_to_switch" -> "enable_to_exit" [ label = "irq_enable" ];
+	"disable_to_switch" -> "in_irq" [ label = "irq_entry" ];
+	"disable_to_switch" -> "switching" [ label = "sched_switch" ];
+	"enable_to_exit" [label = "enable_to_exit"];
+	"enable_to_exit" -> "can_sched" [ label = "schedule_exit" ];
+	"enable_to_exit" -> "enable_to_exit" [ label = "irq_disable\nirq_entry\nirq_enable" ];
+	"in_irq" [label = "in_irq"];
+	"in_irq" -> "in_irq" [ label = "irq_entry" ];
+	"in_irq" -> "scheduling" [ label = "irq_enable" ];
+	"scheduling" [label = "scheduling"];
+	"scheduling" -> "disable_to_switch" [ label = "irq_disable" ];
+	"switching" [label = "switching"];
+	"switching" -> "enable_to_exit" [ label = "irq_enable" ];
+	{ rank = min ;
+		"__init_can_sched";
+		"can_sched";
+	}
+}
diff --git a/tools/verification/models/sched/tss.dot b/tools/verification/models/sched/tss.dot
deleted file mode 100644
index 7dfa1d9121bb..000000000000
--- a/tools/verification/models/sched/tss.dot
+++ /dev/null
@@ -1,18 +0,0 @@
-digraph state_automaton {
-	center = true;
-	size = "7,11";
-	{node [shape = plaintext] "sched"};
-	{node [shape = plaintext, style=invis, label=""] "__init_thread"};
-	{node [shape = ellipse] "thread"};
-	{node [shape = plaintext] "thread"};
-	"__init_thread" -> "thread";
-	"sched" [label = "sched"];
-	"sched" -> "sched" [ label = "sched_switch" ];
-	"sched" -> "thread" [ label = "schedule_exit" ];
-	"thread" [label = "thread", color = green3];
-	"thread" -> "sched" [ label = "schedule_entry" ];
-	{ rank = min ;
-		"__init_thread";
-		"thread";
-	}
-}
diff --git a/tools/verification/rv/src/in_kernel.c b/tools/verification/rv/src/in_kernel.c
index c0dcee795c0d..4bb746ea6e17 100644
--- a/tools/verification/rv/src/in_kernel.c
+++ b/tools/verification/rv/src/in_kernel.c
@@ -431,7 +431,7 @@ ikm_event_handler(struct trace_seq *s, struct tep_record *record,
 
 	if (config_has_id && (config_my_pid == id))
 		return 0;
-	else if (config_my_pid && (config_my_pid == pid))
+	else if (config_my_pid == pid)
 		return 0;
 
 	tep_print_event(trace_event->tep, s, record, "%16s-%-8d [%.3d] ",
@@ -734,7 +734,7 @@ static int parse_arguments(char *monitor_name, int argc, char **argv)
 			config_reactor = optarg;
 			break;
 		case 's':
-			config_my_pid = 0;
+			config_my_pid = -1;
 			break;
 		case 't':
 			config_trace = 1;
diff --git a/tools/verification/rv/src/rv.c b/tools/verification/rv/src/rv.c
index 239de054d1e0..b8fe24a87d97 100644
--- a/tools/verification/rv/src/rv.c
+++ b/tools/verification/rv/src/rv.c
@@ -191,6 +191,7 @@ int main(int argc, char **argv)
 		 * and exit.
 		 */
 		signal(SIGINT, stop_rv);
+		signal(SIGTERM, stop_rv);
 
 		rv_mon(argc - 1, &argv[1]);
 	}
diff --git a/tools/verification/rvgen/.gitignore b/tools/verification/rvgen/.gitignore
new file mode 100644
index 000000000000..1e288a076560
--- /dev/null
+++ b/tools/verification/rvgen/.gitignore
@@ -0,0 +1,3 @@
+__pycache__/
+parser.out
+parsetab.py
diff --git a/tools/verification/rvgen/Makefile b/tools/verification/rvgen/Makefile
new file mode 100644
index 000000000000..cfc4056c1e87
--- /dev/null
+++ b/tools/verification/rvgen/Makefile
@@ -0,0 +1,27 @@
+INSTALL=install
+
+prefix  ?= /usr
+bindir  ?= $(prefix)/bin
+mandir  ?= $(prefix)/share/man
+srcdir  ?= $(prefix)/src
+
+PYLIB  ?= $(shell python3 -c 'import sysconfig;  print (sysconfig.get_path("purelib"))')
+
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+
+.PHONY: install
+install:
+	$(INSTALL) rvgen/automata.py -D -m 644 $(DESTDIR)$(PYLIB)/rvgen/automata.py
+	$(INSTALL) rvgen/dot2c.py -D -m 644 $(DESTDIR)$(PYLIB)/rvgen/dot2c.py
+	$(INSTALL) dot2c -D -m 755 $(DESTDIR)$(bindir)/
+	$(INSTALL) rvgen/dot2k.py -D -m 644 $(DESTDIR)$(PYLIB)/rvgen/dot2k.py
+	$(INSTALL) rvgen/container.py -D -m 644 $(DESTDIR)$(PYLIB)/rvgen/container.py
+	$(INSTALL) rvgen/generator.py -D -m 644 $(DESTDIR)$(PYLIB)/rvgen/generator.py
+	$(INSTALL) rvgen/ltl2ba.py -D -m 644 $(DESTDIR)$(PYLIB)/rvgen/ltl2ba.py
+	$(INSTALL) rvgen/ltl2k.py -D -m 644 $(DESTDIR)$(PYLIB)/rvgen/ltl2k.py
+	$(INSTALL) __main__.py -D -m 755 $(DESTDIR)$(bindir)/rvgen
+	cp -rp rvgen/templates $(DESTDIR)$(PYLIB)/rvgen/
diff --git a/tools/verification/rvgen/__main__.py b/tools/verification/rvgen/__main__.py
new file mode 100644
index 000000000000..fa6fc1f4de2f
--- /dev/null
+++ b/tools/verification/rvgen/__main__.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org>
+#
+# dot2k: transform dot files into a monitor for the Linux kernel.
+#
+# For further information, see:
+#   Documentation/trace/rv/da_monitor_synthesis.rst
+
+if __name__ == '__main__':
+    from rvgen.dot2k import dot2k
+    from rvgen.generator import Monitor
+    from rvgen.container import Container
+    from rvgen.ltl2k import ltl2k
+    import argparse
+    import sys
+
+    parser = argparse.ArgumentParser(description='Generate kernel rv monitor')
+    parser.add_argument("-D", "--description", dest="description", required=False)
+    parser.add_argument("-a", "--auto_patch", dest="auto_patch",
+                        action="store_true", required=False,
+                        help="Patch the kernel in place")
+
+    subparsers = parser.add_subparsers(dest="subcmd", required=True)
+
+    monitor_parser = subparsers.add_parser("monitor")
+    monitor_parser.add_argument('-n', "--model_name", dest="model_name")
+    monitor_parser.add_argument("-p", "--parent", dest="parent",
+                                required=False, help="Create a monitor nested to parent")
+    monitor_parser.add_argument('-c', "--class", dest="monitor_class",
+                                help="Monitor class, either \"da\" or \"ltl\"")
+    monitor_parser.add_argument('-s', "--spec", dest="spec", help="Monitor specification file")
+    monitor_parser.add_argument('-t', "--monitor_type", dest="monitor_type",
+                                help=f"Available options: {', '.join(Monitor.monitor_types.keys())}")
+
+    container_parser = subparsers.add_parser("container")
+    container_parser.add_argument('-n', "--model_name", dest="model_name", required=True)
+
+    params = parser.parse_args()
+
+    try:
+        if params.subcmd == "monitor":
+            print("Opening and parsing the specification file %s" % params.spec)
+            if params.monitor_class == "da":
+                monitor = dot2k(params.spec, params.monitor_type, vars(params))
+            elif params.monitor_class == "ltl":
+                monitor = ltl2k(params.spec, params.monitor_type, vars(params))
+            else:
+                print("Unknown monitor class:", params.monitor_class)
+                sys.exit(1)
+        else:
+            monitor = Container(vars(params))
+    except Exception as e:
+        print('Error: '+ str(e))
+        print("Sorry : :-(")
+        sys.exit(1)
+
+    print("Writing the monitor into the directory %s" % monitor.name)
+    monitor.print_files()
+    print("Almost done, checklist")
+    if params.subcmd == "monitor":
+        print("  - Edit the %s/%s.c to add the instrumentation" % (monitor.name, monitor.name))
+        print(monitor.fill_tracepoint_tooltip())
+    print(monitor.fill_makefile_tooltip())
+    print(monitor.fill_kconfig_tooltip())
+    print(monitor.fill_monitor_tooltip())
diff --git a/tools/verification/dot2/dot2c b/tools/verification/rvgen/dot2c
index 3fe89ab88b65..bf0c67c5b66c 100644
--- a/tools/verification/dot2/dot2c
+++ b/tools/verification/rvgen/dot2c
@@ -14,7 +14,7 @@
 #   Documentation/trace/rv/deterministic_automata.rst
 
 if __name__ == '__main__':
-    from dot2 import dot2c
+    from rvgen import dot2c
     import argparse
     import sys
 
diff --git a/tools/verification/dot2/automata.py b/tools/verification/rvgen/rvgen/automata.py
index d9a3fe2b74bf..d9a3fe2b74bf 100644
--- a/tools/verification/dot2/automata.py
+++ b/tools/verification/rvgen/rvgen/automata.py
diff --git a/tools/verification/rvgen/rvgen/container.py b/tools/verification/rvgen/rvgen/container.py
new file mode 100644
index 000000000000..51f188530b4d
--- /dev/null
+++ b/tools/verification/rvgen/rvgen/container.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org>
+#
+# Generator for runtime verification monitor container
+
+from . import generator
+
+
+class Container(generator.RVGenerator):
+    template_dir = "container"
+
+    def __init__(self, extra_params={}):
+        super().__init__(extra_params)
+        self.name = extra_params.get("model_name")
+        self.main_h = self._read_template_file("main.h")
+
+    def fill_model_h(self):
+        main_h = self.main_h
+        main_h = main_h.replace("%%MODEL_NAME%%", self.name)
+        return main_h
+
+    def fill_kconfig_tooltip(self):
+        """Override to produce a marker for this container in the Kconfig"""
+        container_marker = self._kconfig_marker(self.name) + "\n"
+        result = super().fill_kconfig_tooltip()
+        if self.auto_patch:
+            self._patch_file("Kconfig",
+                             self._kconfig_marker(), container_marker)
+            return result
+        return result + container_marker
diff --git a/tools/verification/dot2/dot2c.py b/tools/verification/rvgen/rvgen/dot2c.py
index fa2816ac7b61..b9b6f14cc536 100644
--- a/tools/verification/dot2/dot2c.py
+++ b/tools/verification/rvgen/rvgen/dot2c.py
@@ -13,7 +13,7 @@
 # For further information, see:
 #   Documentation/trace/rv/deterministic_automata.rst
 
-from dot2.automata import Automata
+from .automata import Automata
 
 class Dot2c(Automata):
     enum_suffix = ""
@@ -152,28 +152,30 @@ class Dot2c(Automata):
         max_state_name = max(self.states, key = len).__len__()
         return max(max_state_name, self.invalid_state_str.__len__())
 
-    def __get_state_string_length(self):
-        maxlen = self.__get_max_strlen_of_states() + self.enum_suffix.__len__()
-        return "%" + str(maxlen) + "s"
-
     def get_aut_init_function(self):
         nr_states = self.states.__len__()
         nr_events = self.events.__len__()
         buff = []
 
-        strformat = self.__get_state_string_length()
-
+        maxlen = self.__get_max_strlen_of_states() + len(self.enum_suffix)
+        tab_braces = 2 * 8 + 2 + 1  # "\t\t{ " ... "}"
+        comma_space = 2  # ", " count last comma here
+        linetoolong = tab_braces + (maxlen + comma_space) * nr_events > self.line_length
         for x in range(nr_states):
-            line = "\t\t{ "
+            line = "\t\t{\n" if linetoolong else "\t\t{ "
             for y in range(nr_events):
                 next_state = self.function[x][y]
                 if next_state != self.invalid_state_str:
                     next_state = self.function[x][y] + self.enum_suffix
 
+                if linetoolong:
+                    line += "\t\t\t%s" % next_state
+                else:
+                    line += "%*s" % (maxlen, next_state)
                 if y != nr_events-1:
-                    line = line + strformat % next_state + ", "
+                    line += ",\n" if linetoolong else ", "
                 else:
-                    line = line + strformat % next_state + " },"
+                    line += "\n\t\t}," if linetoolong else " },"
             buff.append(line)
 
         return self.__buff_to_string(buff)
diff --git a/tools/verification/rvgen/rvgen/dot2k.py b/tools/verification/rvgen/rvgen/dot2k.py
new file mode 100644
index 000000000000..ed0a3c901106
--- /dev/null
+++ b/tools/verification/rvgen/rvgen/dot2k.py
@@ -0,0 +1,129 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org>
+#
+# dot2k: transform dot files into a monitor for the Linux kernel.
+#
+# For further information, see:
+#   Documentation/trace/rv/da_monitor_synthesis.rst
+
+from .dot2c import Dot2c
+from .generator import Monitor
+
+
+class dot2k(Monitor, Dot2c):
+    template_dir = "dot2k"
+
+    def __init__(self, file_path, MonitorType, extra_params={}):
+        self.monitor_type = MonitorType
+        Monitor.__init__(self, extra_params)
+        Dot2c.__init__(self, file_path, extra_params.get("model_name"))
+        self.enum_suffix = "_%s" % self.name
+
+    def fill_monitor_type(self):
+        return self.monitor_type.upper()
+
+    def fill_tracepoint_handlers_skel(self):
+        buff = []
+        for event in self.events:
+            buff.append("static void handle_%s(void *data, /* XXX: fill header */)" % event)
+            buff.append("{")
+            handle = "handle_event"
+            if self.is_start_event(event):
+                buff.append("\t/* XXX: validate that this event always leads to the initial state */")
+                handle = "handle_start_event"
+            elif self.is_start_run_event(event):
+                buff.append("\t/* XXX: validate that this event is only valid in the initial state */")
+                handle = "handle_start_run_event"
+            if self.monitor_type == "per_task":
+                buff.append("\tstruct task_struct *p = /* XXX: how do I get p? */;");
+                buff.append("\tda_%s_%s(p, %s%s);" % (handle, self.name, event, self.enum_suffix));
+            else:
+                buff.append("\tda_%s_%s(%s%s);" % (handle, self.name, event, self.enum_suffix));
+            buff.append("}")
+            buff.append("")
+        return '\n'.join(buff)
+
+    def fill_tracepoint_attach_probe(self):
+        buff = []
+        for event in self.events:
+            buff.append("\trv_attach_trace_probe(\"%s\", /* XXX: tracepoint */, handle_%s);" % (self.name, event))
+        return '\n'.join(buff)
+
+    def fill_tracepoint_detach_helper(self):
+        buff = []
+        for event in self.events:
+            buff.append("\trv_detach_trace_probe(\"%s\", /* XXX: tracepoint */, handle_%s);" % (self.name, event))
+        return '\n'.join(buff)
+
+    def fill_model_h_header(self):
+        buff = []
+        buff.append("/* SPDX-License-Identifier: GPL-2.0 */")
+        buff.append("/*")
+        buff.append(" * Automatically generated C representation of %s automaton" % (self.name))
+        buff.append(" * For further information about this format, see kernel documentation:")
+        buff.append(" *   Documentation/trace/rv/deterministic_automata.rst")
+        buff.append(" */")
+        buff.append("")
+
+        return buff
+
+    def fill_model_h(self):
+        #
+        # Adjust the definition names
+        #
+        self.enum_states_def = "states_%s" % self.name
+        self.enum_events_def = "events_%s" % self.name
+        self.struct_automaton_def = "automaton_%s" % self.name
+        self.var_automaton_def = "automaton_%s" % self.name
+
+        buff = self.fill_model_h_header()
+        buff += self.format_model()
+
+        return '\n'.join(buff)
+
+    def fill_monitor_class_type(self):
+        if self.monitor_type == "per_task":
+            return "DA_MON_EVENTS_ID"
+        return "DA_MON_EVENTS_IMPLICIT"
+
+    def fill_monitor_class(self):
+        if self.monitor_type == "per_task":
+            return "da_monitor_id"
+        return "da_monitor"
+
+    def fill_tracepoint_args_skel(self, tp_type):
+        buff = []
+        tp_args_event = [
+                ("char *", "state"),
+                ("char *", "event"),
+                ("char *", "next_state"),
+                ("bool ",  "final_state"),
+                ]
+        tp_args_error = [
+                ("char *", "state"),
+                ("char *", "event"),
+                ]
+        tp_args_id = ("int ", "id")
+        tp_args = tp_args_event if tp_type == "event" else tp_args_error
+        if self.monitor_type == "per_task":
+            tp_args.insert(0, tp_args_id)
+        tp_proto_c = ", ".join([a+b for a,b in tp_args])
+        tp_args_c = ", ".join([b for a,b in tp_args])
+        buff.append("	     TP_PROTO(%s)," % tp_proto_c)
+        buff.append("	     TP_ARGS(%s)" % tp_args_c)
+        return '\n'.join(buff)
+
+    def fill_main_c(self):
+        main_c = super().fill_main_c()
+
+        min_type = self.get_minimun_type()
+        nr_events = len(self.events)
+        monitor_type = self.fill_monitor_type()
+
+        main_c = main_c.replace("%%MIN_TYPE%%", min_type)
+        main_c = main_c.replace("%%NR_EVENTS%%", str(nr_events))
+        main_c = main_c.replace("%%MONITOR_TYPE%%", monitor_type)
+
+        return main_c
diff --git a/tools/verification/dot2/dot2k.py b/tools/verification/rvgen/rvgen/generator.py
index 745d35a4a379..3441385c1177 100644
--- a/tools/verification/dot2/dot2k.py
+++ b/tools/verification/rvgen/rvgen/generator.py
@@ -3,74 +3,27 @@
 #
 # Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org>
 #
-# dot2k: transform dot files into a monitor for the Linux kernel.
-#
-# For further information, see:
-#   Documentation/trace/rv/da_monitor_synthesis.rst
+# Abtract class for generating kernel runtime verification monitors from specification file
 
-from dot2.dot2c import Dot2c
 import platform
 import os
 
-class dot2k(Dot2c):
-    monitor_types = { "global" : 1, "per_cpu" : 2, "per_task" : 3 }
-    monitor_templates_dir = "dot2/dot2k_templates/"
+
+class RVGenerator:
     rv_dir = "kernel/trace/rv"
-    monitor_type = "per_cpu"
 
-    def __init__(self, file_path, MonitorType, extra_params={}):
-        self.container = extra_params.get("container")
+    def __init__(self, extra_params={}):
+        self.name = extra_params.get("model_name")
         self.parent = extra_params.get("parent")
-        self.__fill_rv_templates_dir()
-
-        if self.container:
-            if file_path:
-                raise ValueError("A container does not require a dot file")
-            if MonitorType:
-                raise ValueError("A container does not require a monitor type")
-            if self.parent:
-                raise ValueError("A container cannot have a parent")
-            self.name = extra_params.get("model_name")
-            self.events = []
-            self.states = []
-            self.main_c = self.__read_file(self.monitor_templates_dir + "main_container.c")
-            self.main_h = self.__read_file(self.monitor_templates_dir + "main_container.h")
-        else:
-            super().__init__(file_path, extra_params.get("model_name"))
-
-            self.monitor_type = self.monitor_types.get(MonitorType)
-            if self.monitor_type is None:
-                raise ValueError("Unknown monitor type: %s" % MonitorType)
-            self.monitor_type = MonitorType
-            self.main_c = self.__read_file(self.monitor_templates_dir + "main.c")
-            self.trace_h = self.__read_file(self.monitor_templates_dir + "trace.h")
-        self.kconfig = self.__read_file(self.monitor_templates_dir + "Kconfig")
-        self.enum_suffix = "_%s" % self.name
+        self.abs_template_dir = \
+            os.path.join(os.path.dirname(__file__), "templates", self.template_dir)
+        self.main_c = self._read_template_file("main.c")
+        self.kconfig = self._read_template_file("Kconfig")
         self.description = extra_params.get("description", self.name) or "auto-generated"
         self.auto_patch = extra_params.get("auto_patch")
         if self.auto_patch:
             self.__fill_rv_kernel_dir()
 
-    def __fill_rv_templates_dir(self):
-
-        if os.path.exists(self.monitor_templates_dir):
-            return
-
-        if platform.system() != "Linux":
-            raise OSError("I can only run on Linux.")
-
-        kernel_path = "/lib/modules/%s/build/tools/verification/dot2/dot2k_templates/" % (platform.release())
-
-        if os.path.exists(kernel_path):
-            self.monitor_templates_dir = kernel_path
-            return
-
-        if os.path.exists("/usr/share/dot2/dot2k_templates/"):
-            self.monitor_templates_dir = "/usr/share/dot2/dot2k_templates/"
-            return
-
-        raise FileNotFoundError("Could not find the template directory, do you have the kernel source installed?")
-
     def __fill_rv_kernel_dir(self):
 
         # first try if we are running in the kernel tree root
@@ -97,7 +50,7 @@ class dot2k(Dot2c):
 
         raise FileNotFoundError("Could not find the rv directory, do you have the kernel source installed?")
 
-    def __read_file(self, path):
+    def _read_file(self, path):
         try:
             fd = open(path, 'r')
         except OSError:
@@ -108,17 +61,15 @@ class dot2k(Dot2c):
         fd.close()
         return content
 
-    def __buff_to_string(self, buff):
-        string = ""
-
-        for line in buff:
-            string = string + line + "\n"
-
-        # cut off the last \n
-        return string[:-1]
-
-    def fill_monitor_type(self):
-        return self.monitor_type.upper()
+    def _read_template_file(self, file):
+        try:
+            path = os.path.join(self.abs_template_dir, file)
+            return self._read_file(path)
+        except Exception:
+            # Specific template file not found. Try the generic template file in the template/
+            # directory, which is one level up
+            path = os.path.join(self.abs_template_dir, "..", file)
+            return self._read_file(path)
 
     def fill_parent(self):
         return "&rv_%s" % self.parent if self.parent else "NULL"
@@ -129,53 +80,23 @@ class dot2k(Dot2c):
         return ""
 
     def fill_tracepoint_handlers_skel(self):
-        buff = []
-        for event in self.events:
-            buff.append("static void handle_%s(void *data, /* XXX: fill header */)" % event)
-            buff.append("{")
-            handle = "handle_event"
-            if self.is_start_event(event):
-                buff.append("\t/* XXX: validate that this event always leads to the initial state */")
-                handle = "handle_start_event"
-            elif self.is_start_run_event(event):
-                buff.append("\t/* XXX: validate that this event is only valid in the initial state */")
-                handle = "handle_start_run_event"
-            if self.monitor_type == "per_task":
-                buff.append("\tstruct task_struct *p = /* XXX: how do I get p? */;");
-                buff.append("\tda_%s_%s(p, %s%s);" % (handle, self.name, event, self.enum_suffix));
-            else:
-                buff.append("\tda_%s_%s(%s%s);" % (handle, self.name, event, self.enum_suffix));
-            buff.append("}")
-            buff.append("")
-        return self.__buff_to_string(buff)
+        return "NotImplemented"
 
     def fill_tracepoint_attach_probe(self):
-        buff = []
-        for event in self.events:
-            buff.append("\trv_attach_trace_probe(\"%s\", /* XXX: tracepoint */, handle_%s);" % (self.name, event))
-        return self.__buff_to_string(buff)
+        return "NotImplemented"
 
     def fill_tracepoint_detach_helper(self):
-        buff = []
-        for event in self.events:
-            buff.append("\trv_detach_trace_probe(\"%s\", /* XXX: tracepoint */, handle_%s);" % (self.name, event))
-        return self.__buff_to_string(buff)
+        return "NotImplemented"
 
     def fill_main_c(self):
         main_c = self.main_c
-        monitor_type = self.fill_monitor_type()
-        min_type = self.get_minimun_type()
-        nr_events = len(self.events)
         tracepoint_handlers = self.fill_tracepoint_handlers_skel()
         tracepoint_attach = self.fill_tracepoint_attach_probe()
         tracepoint_detach = self.fill_tracepoint_detach_helper()
         parent = self.fill_parent()
         parent_include = self.fill_include_parent()
 
-        main_c = main_c.replace("%%MONITOR_TYPE%%", monitor_type)
-        main_c = main_c.replace("%%MIN_TYPE%%", min_type)
         main_c = main_c.replace("%%MODEL_NAME%%", self.name)
-        main_c = main_c.replace("%%NR_EVENTS%%", str(nr_events))
         main_c = main_c.replace("%%TRACEPOINT_HANDLERS_SKEL%%", tracepoint_handlers)
         main_c = main_c.replace("%%TRACEPOINT_ATTACH%%", tracepoint_attach)
         main_c = main_c.replace("%%TRACEPOINT_DETACH%%", tracepoint_detach)
@@ -185,63 +106,17 @@ class dot2k(Dot2c):
 
         return main_c
 
-    def fill_model_h_header(self):
-        buff = []
-        buff.append("/* SPDX-License-Identifier: GPL-2.0 */")
-        buff.append("/*")
-        buff.append(" * Automatically generated C representation of %s automaton" % (self.name))
-        buff.append(" * For further information about this format, see kernel documentation:")
-        buff.append(" *   Documentation/trace/rv/deterministic_automata.rst")
-        buff.append(" */")
-        buff.append("")
-
-        return buff
-
     def fill_model_h(self):
-        #
-        # Adjust the definition names
-        #
-        self.enum_states_def = "states_%s" % self.name
-        self.enum_events_def = "events_%s" % self.name
-        self.struct_automaton_def = "automaton_%s" % self.name
-        self.var_automaton_def = "automaton_%s" % self.name
-
-        buff = self.fill_model_h_header()
-        buff += self.format_model()
-
-        return self.__buff_to_string(buff)
+        return "NotImplemented"
 
     def fill_monitor_class_type(self):
-        if self.monitor_type == "per_task":
-            return "DA_MON_EVENTS_ID"
-        return "DA_MON_EVENTS_IMPLICIT"
+        return "NotImplemented"
 
     def fill_monitor_class(self):
-        if self.monitor_type == "per_task":
-            return "da_monitor_id"
-        return "da_monitor"
+        return "NotImplemented"
 
     def fill_tracepoint_args_skel(self, tp_type):
-        buff = []
-        tp_args_event = [
-                ("char *", "state"),
-                ("char *", "event"),
-                ("char *", "next_state"),
-                ("bool ",  "final_state"),
-                ]
-        tp_args_error = [
-                ("char *", "state"),
-                ("char *", "event"),
-                ]
-        tp_args_id = ("int ", "id")
-        tp_args = tp_args_event if tp_type == "event" else tp_args_error
-        if self.monitor_type == "per_task":
-            tp_args.insert(0, tp_args_id)
-        tp_proto_c = ", ".join([a+b for a,b in tp_args])
-        tp_args_c = ", ".join([b for a,b in tp_args])
-        buff.append("	     TP_PROTO(%s)," % tp_proto_c)
-        buff.append("	     TP_ARGS(%s)" % tp_args_c)
-        return self.__buff_to_string(buff)
+        return "NotImplemented"
 
     def fill_monitor_deps(self):
         buff = []
@@ -249,21 +124,7 @@ class dot2k(Dot2c):
         if self.parent:
             buff.append("	depends on RV_MON_%s" % self.parent.upper())
             buff.append("	default y")
-        return self.__buff_to_string(buff)
-
-    def fill_trace_h(self):
-        trace_h = self.trace_h
-        monitor_class = self.fill_monitor_class()
-        monitor_class_type = self.fill_monitor_class_type()
-        tracepoint_args_skel_event = self.fill_tracepoint_args_skel("event")
-        tracepoint_args_skel_error = self.fill_tracepoint_args_skel("error")
-        trace_h = trace_h.replace("%%MODEL_NAME%%", self.name)
-        trace_h = trace_h.replace("%%MODEL_NAME_UP%%", self.name.upper())
-        trace_h = trace_h.replace("%%MONITOR_CLASS%%", monitor_class)
-        trace_h = trace_h.replace("%%MONITOR_CLASS_TYPE%%", monitor_class_type)
-        trace_h = trace_h.replace("%%TRACEPOINT_ARGS_SKEL_EVENT%%", tracepoint_args_skel_event)
-        trace_h = trace_h.replace("%%TRACEPOINT_ARGS_SKEL_ERROR%%", tracepoint_args_skel_error)
-        return trace_h
+        return '\n'.join(buff)
 
     def fill_kconfig(self):
         kconfig = self.kconfig
@@ -276,21 +137,17 @@ class dot2k(Dot2c):
         kconfig = kconfig.replace("%%MONITOR_DEPS%%", monitor_deps)
         return kconfig
 
-    def fill_main_container_h(self):
-        main_h = self.main_h
-        main_h = main_h.replace("%%MODEL_NAME%%", self.name)
-        return main_h
-
-    def __patch_file(self, file, marker, line):
+    def _patch_file(self, file, marker, line):
+        assert self.auto_patch
         file_to_patch = os.path.join(self.rv_dir, file)
-        content = self.__read_file(file_to_patch)
+        content = self._read_file(file_to_patch)
         content = content.replace(marker, line + "\n" + marker)
         self.__write_file(file_to_patch, content)
 
     def fill_tracepoint_tooltip(self):
         monitor_class_type = self.fill_monitor_class_type()
         if self.auto_patch:
-            self.__patch_file("rv_trace.h",
+            self._patch_file("rv_trace.h",
                             "// Add new monitors based on CONFIG_%s here" % monitor_class_type,
                             "#include <monitors/%s/%s_trace.h>" % (self.name, self.name))
             return "  - Patching %s/rv_trace.h, double check the result" % self.rv_dir
@@ -300,10 +157,15 @@ Add this line where other tracepoints are included and %s is defined:
 #include <monitors/%s/%s_trace.h>
 """ % (self.rv_dir, monitor_class_type, self.name, self.name)
 
+    def _kconfig_marker(self, container=None) -> str:
+        return "# Add new %smonitors here" % (container + " "
+                                              if container else "")
+
     def fill_kconfig_tooltip(self):
         if self.auto_patch:
-            self.__patch_file("Kconfig",
-                            "# Add new monitors here",
+            # monitors with a container should stay together in the Kconfig
+            self._patch_file("Kconfig",
+                             self._kconfig_marker(self.parent),
                             "source \"kernel/trace/rv/monitors/%s/Kconfig\"" % (self.name))
             return "  - Patching %s/Kconfig, double check the result" % self.rv_dir
 
@@ -316,7 +178,7 @@ source \"kernel/trace/rv/monitors/%s/Kconfig\"
         name = self.name
         name_up = name.upper()
         if self.auto_patch:
-            self.__patch_file("Makefile",
+            self._patch_file("Makefile",
                             "# Add new monitors here",
                             "obj-$(CONFIG_RV_MON_%s) += monitors/%s/%s.o" % (name_up, name, name))
             return "  - Patching %s/Makefile, double check the result" % self.rv_dir
@@ -352,7 +214,7 @@ obj-$(CONFIG_RV_MON_%s) += monitors/%s/%s.o
 
         file.close()
 
-    def __create_file(self, file_name, content):
+    def _create_file(self, file_name, content):
         path = "%s/%s" % (self.name, file_name)
         if self.auto_patch:
             path = os.path.join(self.rv_dir, "monitors", path)
@@ -370,20 +232,39 @@ obj-$(CONFIG_RV_MON_%s) += monitors/%s/%s.o
         self.__create_directory()
 
         path = "%s.c" % self.name
-        self.__create_file(path, main_c)
-
-        if self.container:
-            main_h = self.fill_main_container_h()
-            path = "%s.h" % self.name
-            self.__create_file(path, main_h)
-        else:
-            model_h = self.fill_model_h()
-            path = "%s.h" % self.name
-            self.__create_file(path, model_h)
+        self._create_file(path, main_c)
 
-            trace_h = self.fill_trace_h()
-            path = "%s_trace.h" % self.name
-            self.__create_file(path, trace_h)
+        model_h = self.fill_model_h()
+        path = "%s.h" % self.name
+        self._create_file(path, model_h)
 
         kconfig = self.fill_kconfig()
-        self.__create_file("Kconfig", kconfig)
+        self._create_file("Kconfig", kconfig)
+
+
+class Monitor(RVGenerator):
+    monitor_types = { "global" : 1, "per_cpu" : 2, "per_task" : 3 }
+
+    def __init__(self, extra_params={}):
+        super().__init__(extra_params)
+        self.trace_h = self._read_template_file("trace.h")
+
+    def fill_trace_h(self):
+        trace_h = self.trace_h
+        monitor_class = self.fill_monitor_class()
+        monitor_class_type = self.fill_monitor_class_type()
+        tracepoint_args_skel_event = self.fill_tracepoint_args_skel("event")
+        tracepoint_args_skel_error = self.fill_tracepoint_args_skel("error")
+        trace_h = trace_h.replace("%%MODEL_NAME%%", self.name)
+        trace_h = trace_h.replace("%%MODEL_NAME_UP%%", self.name.upper())
+        trace_h = trace_h.replace("%%MONITOR_CLASS%%", monitor_class)
+        trace_h = trace_h.replace("%%MONITOR_CLASS_TYPE%%", monitor_class_type)
+        trace_h = trace_h.replace("%%TRACEPOINT_ARGS_SKEL_EVENT%%", tracepoint_args_skel_event)
+        trace_h = trace_h.replace("%%TRACEPOINT_ARGS_SKEL_ERROR%%", tracepoint_args_skel_error)
+        return trace_h
+
+    def print_files(self):
+        super().print_files()
+        trace_h = self.fill_trace_h()
+        path = "%s_trace.h" % self.name
+        self._create_file(path, trace_h)
diff --git a/tools/verification/rvgen/rvgen/ltl2ba.py b/tools/verification/rvgen/rvgen/ltl2ba.py
new file mode 100644
index 000000000000..f14e6760ac3d
--- /dev/null
+++ b/tools/verification/rvgen/rvgen/ltl2ba.py
@@ -0,0 +1,566 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Implementation based on
+# Gerth, R., Peled, D., Vardi, M.Y., Wolper, P. (1996).
+# Simple On-the-fly Automatic Verification of Linear Temporal Logic.
+# https://doi.org/10.1007/978-0-387-34892-6_1
+# With extra optimizations
+
+from ply.lex import lex
+from ply.yacc import yacc
+
+# Grammar:
+# 	ltl ::= opd | ( ltl ) | ltl binop ltl | unop ltl
+#
+# Operands (opd):
+# 	true, false, user-defined names
+#
+# Unary Operators (unop):
+#       always
+#       eventually
+#       next
+#       not
+#
+# Binary Operators (binop):
+#       until
+#       and
+#       or
+#       imply
+#       equivalent
+
+tokens = (
+   'AND',
+   'OR',
+   'IMPLY',
+   'UNTIL',
+   'ALWAYS',
+   'EVENTUALLY',
+   'NEXT',
+   'VARIABLE',
+   'LITERAL',
+   'NOT',
+   'LPAREN',
+   'RPAREN',
+   'ASSIGN',
+)
+
+t_AND = r'and'
+t_OR = r'or'
+t_IMPLY = r'imply'
+t_UNTIL = r'until'
+t_ALWAYS = r'always'
+t_NEXT = r'next'
+t_EVENTUALLY = r'eventually'
+t_VARIABLE = r'[A-Z_0-9]+'
+t_LITERAL = r'true|false'
+t_NOT = r'not'
+t_LPAREN = r'\('
+t_RPAREN = r'\)'
+t_ASSIGN = r'='
+t_ignore_COMMENT = r'\#.*'
+t_ignore = ' \t\n'
+
+def t_error(t):
+    raise ValueError(f"Illegal character '{t.value[0]}'")
+
+lexer = lex()
+
+class GraphNode:
+    uid = 0
+
+    def __init__(self, incoming: set['GraphNode'], new, old, _next):
+        self.init = False
+        self.outgoing = set()
+        self.labels = set()
+        self.incoming = incoming.copy()
+        self.new = new.copy()
+        self.old = old.copy()
+        self.next = _next.copy()
+        self.id = GraphNode.uid
+        GraphNode.uid += 1
+
+    def expand(self, node_set):
+        if not self.new:
+            for nd in node_set:
+                if nd.old == self.old and nd.next == self.next:
+                    nd.incoming |= self.incoming
+                    return node_set
+
+            new_current_node = GraphNode({self}, self.next, set(), set())
+            return new_current_node.expand({self} | node_set)
+        n = self.new.pop()
+        return n.expand(self, node_set)
+
+    def __lt__(self, other):
+        return self.id < other.id
+
+class ASTNode:
+    uid = 1
+
+    def __init__(self, op):
+        self.op = op
+        self.id = ASTNode.uid
+        ASTNode.uid += 1
+
+    def __hash__(self):
+        return hash(self.op)
+
+    def __eq__(self, other):
+        return self is other
+
+    def __iter__(self):
+        yield self
+        yield from self.op
+
+    def negate(self):
+        self.op = self.op.negate()
+        return self
+
+    def expand(self, node, node_set):
+        return self.op.expand(self, node, node_set)
+
+    def __str__(self):
+        if isinstance(self.op, Literal):
+            return str(self.op.value)
+        if isinstance(self.op, Variable):
+            return self.op.name.lower()
+        return "val" + str(self.id)
+
+    def normalize(self):
+        # Get rid of:
+        #   - ALWAYS
+        #   - EVENTUALLY
+        #   - IMPLY
+        # And move all the NOT to be inside
+        self.op = self.op.normalize()
+        return self
+
+class BinaryOp:
+    op_str = "not_supported"
+
+    def __init__(self, left: ASTNode, right: ASTNode):
+        self.left = left
+        self.right = right
+
+    def __hash__(self):
+        return hash((self.left, self.right))
+
+    def __iter__(self):
+        yield from self.left
+        yield from self.right
+
+    def normalize(self):
+        raise NotImplementedError
+
+    def negate(self):
+        raise NotImplementedError
+
+    def _is_temporal(self):
+        raise NotImplementedError
+
+    def is_temporal(self):
+        if self.left.op.is_temporal():
+            return True
+        if self.right.op.is_temporal():
+            return True
+        return self._is_temporal()
+
+    @staticmethod
+    def expand(n: ASTNode, node: GraphNode, node_set) -> set[GraphNode]:
+        raise NotImplementedError
+
+class AndOp(BinaryOp):
+    op_str = '&&'
+
+    def normalize(self):
+        return self
+
+    def negate(self):
+        return OrOp(self.left.negate(), self.right.negate())
+
+    def _is_temporal(self):
+        return False
+
+    @staticmethod
+    def expand(n: ASTNode, node: GraphNode, node_set) -> set[GraphNode]:
+        if not n.op.is_temporal():
+            node.old.add(n)
+            return node.expand(node_set)
+
+        tmp = GraphNode(node.incoming,
+                        node.new | ({n.op.left, n.op.right} - node.old),
+                        node.old | {n},
+                        node.next)
+        return tmp.expand(node_set)
+
+class OrOp(BinaryOp):
+    op_str = '||'
+
+    def normalize(self):
+        return self
+
+    def negate(self):
+        return AndOp(self.left.negate(), self.right.negate())
+
+    def _is_temporal(self):
+        return False
+
+    @staticmethod
+    def expand(n: ASTNode, node: GraphNode, node_set) -> set[GraphNode]:
+        if not n.op.is_temporal():
+            node.old |= {n}
+            return node.expand(node_set)
+
+        node1 = GraphNode(node.incoming,
+                          node.new | ({n.op.left} - node.old),
+                          node.old | {n},
+                          node.next)
+        node2 = GraphNode(node.incoming,
+                          node.new | ({n.op.right} - node.old),
+                          node.old | {n},
+                          node.next)
+        return node2.expand(node1.expand(node_set))
+
+class UntilOp(BinaryOp):
+    def normalize(self):
+        return self
+
+    def negate(self):
+        return VOp(self.left.negate(), self.right.negate())
+
+    def _is_temporal(self):
+        return True
+
+    @staticmethod
+    def expand(n: ASTNode, node: GraphNode, node_set) -> set[GraphNode]:
+        node1 = GraphNode(node.incoming,
+                          node.new | ({n.op.left} - node.old),
+                          node.old | {n},
+                          node.next | {n})
+        node2 = GraphNode(node.incoming,
+                          node.new | ({n.op.right} - node.old),
+                          node.old | {n},
+                          node.next)
+        return node2.expand(node1.expand(node_set))
+
+class VOp(BinaryOp):
+    def normalize(self):
+        return self
+
+    def negate(self):
+        return UntilOp(self.left.negate(), self.right.negate())
+
+    def _is_temporal(self):
+        return True
+
+    @staticmethod
+    def expand(n: ASTNode, node: GraphNode, node_set) -> set[GraphNode]:
+        node1 = GraphNode(node.incoming,
+                          node.new | ({n.op.right} - node.old),
+                          node.old | {n},
+                          node.next | {n})
+        node2 = GraphNode(node.incoming,
+                          node.new | ({n.op.left, n.op.right} - node.old),
+                          node.old | {n},
+                          node.next)
+        return node2.expand(node1.expand(node_set))
+
+class ImplyOp(BinaryOp):
+    def normalize(self):
+        # P -> Q === !P | Q
+        return OrOp(self.left.negate(), self.right)
+
+    def _is_temporal(self):
+        return False
+
+    def negate(self):
+        # !(P -> Q) === !(!P | Q) === P & !Q
+        return AndOp(self.left, self.right.negate())
+
+class UnaryOp:
+    def __init__(self, child: ASTNode):
+        self.child = child
+
+    def __iter__(self):
+        yield from self.child
+
+    def __hash__(self):
+        return hash(self.child)
+
+    def normalize(self):
+        raise NotImplementedError
+
+    def _is_temporal(self):
+        raise NotImplementedError
+
+    def is_temporal(self):
+        if self.child.op.is_temporal():
+            return True
+        return self._is_temporal()
+
+    def negate(self):
+        raise NotImplementedError
+
+class EventuallyOp(UnaryOp):
+    def __str__(self):
+        return "eventually " + str(self.child)
+
+    def normalize(self):
+        # <>F == true U F
+        return UntilOp(ASTNode(Literal(True)), self.child)
+
+    def _is_temporal(self):
+        return True
+
+    def negate(self):
+        # !<>F == [](!F)
+        return AlwaysOp(self.child.negate()).normalize()
+
+class AlwaysOp(UnaryOp):
+    def normalize(self):
+        # []F === !(true U !F) == false V F
+        new = ASTNode(Literal(False))
+        return VOp(new, self.child)
+
+    def _is_temporal(self):
+        return True
+
+    def negate(self):
+        # ![]F == <>(!F)
+        return EventuallyOp(self.child.negate()).normalize()
+
+class NextOp(UnaryOp):
+    def normalize(self):
+        return self
+
+    def _is_temporal(self):
+        return True
+
+    def negate(self):
+        # not (next A) == next (not A)
+        self.child = self.child.negate()
+        return self
+
+    @staticmethod
+    def expand(n: ASTNode, node: GraphNode, node_set) -> set[GraphNode]:
+        tmp = GraphNode(node.incoming,
+                        node.new,
+                        node.old | {n},
+                        node.next | {n.op.child})
+        return tmp.expand(node_set)
+
+class NotOp(UnaryOp):
+    def __str__(self):
+        return "!" + str(self.child)
+
+    def normalize(self):
+        return self.child.op.negate()
+
+    def negate(self):
+        return self.child.op
+
+    def _is_temporal(self):
+        return False
+
+    @staticmethod
+    def expand(n: ASTNode, node: GraphNode, node_set) -> set[GraphNode]:
+        for f in node.old:
+            if n.op.child is f:
+                return node_set
+        node.old |= {n}
+        return node.expand(node_set)
+
+class Variable:
+    def __init__(self, name: str):
+        self.name = name
+
+    def __hash__(self):
+        return hash(self.name)
+
+    def __iter__(self):
+        yield from ()
+
+    def negate(self):
+        new = ASTNode(self)
+        return NotOp(new)
+
+    def normalize(self):
+        return self
+
+    def is_temporal(self):
+        return False
+
+    @staticmethod
+    def expand(n: ASTNode, node: GraphNode, node_set) -> set[GraphNode]:
+        for f in node.old:
+            if isinstance(f, NotOp) and f.op.child is n:
+                return node_set
+        node.old |= {n}
+        return node.expand(node_set)
+
+class Literal:
+    def __init__(self, value: bool):
+        self.value = value
+
+    def __iter__(self):
+        yield from ()
+
+    def __hash__(self):
+        return hash(self.value)
+
+    def __str__(self):
+        if self.value:
+            return "true"
+        return "false"
+
+    def negate(self):
+        self.value = not self.value
+        return self
+
+    def normalize(self):
+        return self
+
+    def is_temporal(self):
+        return False
+
+    @staticmethod
+    def expand(n: ASTNode, node: GraphNode, node_set) -> set[GraphNode]:
+        if not n.op.value:
+            return node_set
+        node.old |= {n}
+        return node.expand(node_set)
+
+def p_spec(p):
+    '''
+    spec : assign
+         | assign spec
+    '''
+    if len(p) == 3:
+        p[2].append(p[1])
+        p[0] = p[2]
+    else:
+        p[0] = [p[1]]
+
+def p_assign(p):
+    '''
+    assign : VARIABLE ASSIGN ltl
+    '''
+    p[0] = (p[1], p[3])
+
+def p_ltl(p):
+    '''
+    ltl : opd
+        | binop
+        | unop
+    '''
+    p[0] = p[1]
+
+def p_opd(p):
+    '''
+    opd : VARIABLE
+        | LITERAL
+        | LPAREN ltl RPAREN
+    '''
+    if p[1] == "true":
+        p[0] = ASTNode(Literal(True))
+    elif p[1] == "false":
+        p[0] = ASTNode(Literal(False))
+    elif p[1] == '(':
+        p[0] = p[2]
+    else:
+        p[0] = ASTNode(Variable(p[1]))
+
+def p_unop(p):
+    '''
+    unop : ALWAYS ltl
+         | EVENTUALLY ltl
+         | NEXT ltl
+         | NOT ltl
+    '''
+    if p[1] == "always":
+        op = AlwaysOp(p[2])
+    elif p[1] == "eventually":
+        op = EventuallyOp(p[2])
+    elif p[1] == "next":
+        op = NextOp(p[2])
+    elif p[1] == "not":
+        op = NotOp(p[2])
+    else:
+        raise ValueError(f"Invalid unary operator {p[1]}")
+
+    p[0] = ASTNode(op)
+
+def p_binop(p):
+    '''
+    binop : opd UNTIL ltl
+          | opd AND ltl
+          | opd OR ltl
+          | opd IMPLY ltl
+    '''
+    if p[2] == "and":
+        op = AndOp(p[1], p[3])
+    elif p[2] == "until":
+        op = UntilOp(p[1], p[3])
+    elif p[2] == "or":
+        op = OrOp(p[1], p[3])
+    elif p[2] == "imply":
+        op = ImplyOp(p[1], p[3])
+    else:
+        raise ValueError(f"Invalid binary operator {p[2]}")
+
+    p[0] = ASTNode(op)
+
+parser = yacc()
+
+def parse_ltl(s: str) -> ASTNode:
+    spec = parser.parse(s)
+
+    rule = None
+    subexpr = {}
+
+    for assign in spec:
+        if assign[0] == "RULE":
+            rule = assign[1]
+        else:
+            subexpr[assign[0]] = assign[1]
+
+    if rule is None:
+        raise ValueError("Please define your specification in the \"RULE = <LTL spec>\" format")
+
+    for node in rule:
+        if not isinstance(node.op, Variable):
+            continue
+        replace = subexpr.get(node.op.name)
+        if replace is not None:
+            node.op = replace.op
+
+    return rule
+
+def create_graph(s: str):
+    atoms = set()
+
+    ltl = parse_ltl(s)
+    for c in ltl:
+        c.normalize()
+        if isinstance(c.op, Variable):
+            atoms.add(c.op.name)
+
+    init = GraphNode(set(), set(), set(), set())
+    head = GraphNode({init}, {ltl}, set(), set())
+    graph = sorted(head.expand(set()))
+
+    for i, node in enumerate(graph):
+        # The id assignment during graph generation has gaps. Reassign them
+        node.id = i
+
+        for incoming in node.incoming:
+            if incoming is init:
+                node.init = True
+            else:
+                incoming.outgoing.add(node)
+        for o in node.old:
+            if not o.op.is_temporal():
+                node.labels.add(str(o))
+
+    return sorted(atoms), graph, ltl
diff --git a/tools/verification/rvgen/rvgen/ltl2k.py b/tools/verification/rvgen/rvgen/ltl2k.py
new file mode 100644
index 000000000000..b075f98d50c4
--- /dev/null
+++ b/tools/verification/rvgen/rvgen/ltl2k.py
@@ -0,0 +1,271 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+
+from pathlib import Path
+from . import generator
+from . import ltl2ba
+
+COLUMN_LIMIT = 100
+
+def line_len(line: str) -> int:
+    tabs = line.count('\t')
+    return tabs * 7 + len(line)
+
+def break_long_line(line: str, indent='') -> list[str]:
+    result = []
+    while line_len(line) > COLUMN_LIMIT:
+        i = line[:COLUMN_LIMIT - line_len(line)].rfind(' ')
+        result.append(line[:i])
+        line = indent + line[i + 1:]
+    if line:
+        result.append(line)
+    return result
+
+def build_condition_string(node: ltl2ba.GraphNode):
+    if not node.labels:
+        return "(true)"
+
+    result = "("
+
+    first = True
+    for label in sorted(node.labels):
+        if not first:
+            result += " && "
+        result += label
+        first = False
+
+    result += ")"
+
+    return result
+
+def abbreviate_atoms(atoms: list[str]) -> list[str]:
+    def shorten(s: str) -> str:
+        skip = ["is", "by", "or", "and"]
+        return '_'.join([x[:2] for x in s.lower().split('_') if x not in skip])
+
+    abbrs = []
+    for atom in atoms:
+        for i in range(len(atom), -1, -1):
+            if sum(a.startswith(atom[:i]) for a in atoms) > 1:
+                break
+        share = atom[:i]
+        unique = atom[i:]
+        abbrs.append((shorten(share) + shorten(unique)))
+    return abbrs
+
+class ltl2k(generator.Monitor):
+    template_dir = "ltl2k"
+
+    def __init__(self, file_path, MonitorType, extra_params={}):
+        if MonitorType != "per_task":
+            raise NotImplementedError("Only per_task monitor is supported for LTL")
+        super().__init__(extra_params)
+        with open(file_path) as f:
+            self.atoms, self.ba, self.ltl = ltl2ba.create_graph(f.read())
+        self.atoms_abbr = abbreviate_atoms(self.atoms)
+        self.name = extra_params.get("model_name")
+        if not self.name:
+            self.name = Path(file_path).stem
+
+    def _fill_states(self) -> str:
+        buf = [
+            "enum ltl_buchi_state {",
+        ]
+
+        for node in self.ba:
+            buf.append("\tS%i," % node.id)
+        buf.append("\tRV_NUM_BA_STATES")
+        buf.append("};")
+        buf.append("static_assert(RV_NUM_BA_STATES <= RV_MAX_BA_STATES);")
+        return buf
+
+    def _fill_atoms(self):
+        buf = ["enum ltl_atom {"]
+        for a in sorted(self.atoms):
+            buf.append("\tLTL_%s," % a)
+        buf.append("\tLTL_NUM_ATOM")
+        buf.append("};")
+        buf.append("static_assert(LTL_NUM_ATOM <= RV_MAX_LTL_ATOM);")
+        return buf
+
+    def _fill_atoms_to_string(self):
+        buf = [
+            "static const char *ltl_atom_str(enum ltl_atom atom)",
+            "{",
+            "\tstatic const char *const names[] = {"
+        ]
+
+        for name in self.atoms_abbr:
+            buf.append("\t\t\"%s\"," % name)
+
+        buf.extend([
+            "\t};",
+            "",
+            "\treturn names[atom];",
+            "}"
+        ])
+        return buf
+
+    def _fill_atom_values(self, required_values):
+        buf = []
+        for node in self.ltl:
+            if str(node) not in required_values:
+                continue
+
+            if isinstance(node.op, ltl2ba.AndOp):
+                buf.append("\tbool %s = %s && %s;" % (node, node.op.left, node.op.right))
+                required_values |= {str(node.op.left), str(node.op.right)}
+            elif isinstance(node.op, ltl2ba.OrOp):
+                buf.append("\tbool %s = %s || %s;" % (node, node.op.left, node.op.right))
+                required_values |= {str(node.op.left), str(node.op.right)}
+            elif isinstance(node.op, ltl2ba.NotOp):
+                buf.append("\tbool %s = !%s;" % (node, node.op.child))
+                required_values.add(str(node.op.child))
+
+        for atom in self.atoms:
+            if atom.lower() not in required_values:
+                continue
+            buf.append("\tbool %s = test_bit(LTL_%s, mon->atoms);" % (atom.lower(), atom))
+
+        buf.reverse()
+
+        buf2 = []
+        for line in buf:
+            buf2.extend(break_long_line(line, "\t     "))
+        return buf2
+
+    def _fill_transitions(self):
+        buf = [
+            "static void",
+            "ltl_possible_next_states(struct ltl_monitor *mon, unsigned int state, unsigned long *next)",
+            "{"
+        ]
+
+        required_values = set()
+        for node in self.ba:
+            for o in sorted(node.outgoing):
+                required_values |= o.labels
+
+        buf.extend(self._fill_atom_values(required_values))
+        buf.extend([
+            "",
+            "\tswitch (state) {"
+        ])
+
+        for node in self.ba:
+            buf.append("\tcase S%i:" % node.id)
+
+            for o in sorted(node.outgoing):
+                line   = "\t\tif "
+                indent = "\t\t   "
+
+                line += build_condition_string(o)
+                lines = break_long_line(line, indent)
+                buf.extend(lines)
+
+                buf.append("\t\t\t__set_bit(S%i, next);" % o.id)
+            buf.append("\t\tbreak;")
+        buf.extend([
+            "\t}",
+            "}"
+        ])
+
+        return buf
+
+    def _fill_start(self):
+        buf = [
+            "static void ltl_start(struct task_struct *task, struct ltl_monitor *mon)",
+            "{"
+        ]
+
+        required_values = set()
+        for node in self.ba:
+            if node.init:
+                required_values |= node.labels
+
+        buf.extend(self._fill_atom_values(required_values))
+        buf.append("")
+
+        for node in self.ba:
+            if not node.init:
+                continue
+
+            line   = "\tif "
+            indent = "\t   "
+
+            line += build_condition_string(node)
+            lines = break_long_line(line, indent)
+            buf.extend(lines)
+
+            buf.append("\t\t__set_bit(S%i, mon->states);" % node.id)
+        buf.append("}")
+        return buf
+
+    def fill_tracepoint_handlers_skel(self):
+        buff = []
+        buff.append("static void handle_example_event(void *data, /* XXX: fill header */)")
+        buff.append("{")
+        buff.append("\tltl_atom_update(task, LTL_%s, true/false);" % self.atoms[0])
+        buff.append("}")
+        buff.append("")
+        return '\n'.join(buff)
+
+    def fill_tracepoint_attach_probe(self):
+        return "\trv_attach_trace_probe(\"%s\", /* XXX: tracepoint */, handle_example_event);" \
+                % self.name
+
+    def fill_tracepoint_detach_helper(self):
+        return "\trv_detach_trace_probe(\"%s\", /* XXX: tracepoint */, handle_sample_event);" \
+                % self.name
+
+    def fill_atoms_init(self):
+        buff = []
+        for a in self.atoms:
+            buff.append("\tltl_atom_set(mon, LTL_%s, true/false);" % a)
+        return '\n'.join(buff)
+
+    def fill_model_h(self):
+        buf = [
+            "/* SPDX-License-Identifier: GPL-2.0 */",
+            "",
+            "/*",
+            " * C implementation of Buchi automaton, automatically generated by",
+            " * tools/verification/rvgen from the linear temporal logic specification.",
+            " * For further information, see kernel documentation:",
+            " *   Documentation/trace/rv/linear_temporal_logic.rst",
+            " */",
+            "",
+            "#include <linux/rv.h>",
+            "",
+            "#define MONITOR_NAME " + self.name,
+            ""
+        ]
+
+        buf.extend(self._fill_atoms())
+        buf.append('')
+
+        buf.extend(self._fill_atoms_to_string())
+        buf.append('')
+
+        buf.extend(self._fill_states())
+        buf.append('')
+
+        buf.extend(self._fill_start())
+        buf.append('')
+
+        buf.extend(self._fill_transitions())
+        buf.append('')
+
+        return '\n'.join(buf)
+
+    def fill_monitor_class_type(self):
+        return "LTL_MON_EVENTS_ID"
+
+    def fill_monitor_class(self):
+        return "ltl_monitor_id"
+
+    def fill_main_c(self):
+        main_c = super().fill_main_c()
+        main_c = main_c.replace("%%ATOMS_INIT%%", self.fill_atoms_init())
+
+        return main_c
diff --git a/tools/verification/dot2/dot2k_templates/Kconfig b/tools/verification/rvgen/rvgen/templates/Kconfig
index 291b29ea28db..291b29ea28db 100644
--- a/tools/verification/dot2/dot2k_templates/Kconfig
+++ b/tools/verification/rvgen/rvgen/templates/Kconfig
diff --git a/tools/verification/rvgen/rvgen/templates/container/Kconfig b/tools/verification/rvgen/rvgen/templates/container/Kconfig
new file mode 100644
index 000000000000..a606111949c2
--- /dev/null
+++ b/tools/verification/rvgen/rvgen/templates/container/Kconfig
@@ -0,0 +1,5 @@
+config RV_MON_%%MODEL_NAME_UP%%
+	depends on RV
+	bool "%%MODEL_NAME%% monitor"
+	help
+	  %%DESCRIPTION%%
diff --git a/tools/verification/dot2/dot2k_templates/main_container.c b/tools/verification/rvgen/rvgen/templates/container/main.c
index 89fc17cf8958..7d9b2f95c7e9 100644
--- a/tools/verification/dot2/dot2k_templates/main_container.c
+++ b/tools/verification/rvgen/rvgen/templates/container/main.c
@@ -21,8 +21,7 @@ struct rv_monitor rv_%%MODEL_NAME%% = {
 
 static int __init register_%%MODEL_NAME%%(void)
 {
-	rv_register_monitor(&rv_%%MODEL_NAME%%, NULL);
-	return 0;
+	return rv_register_monitor(&rv_%%MODEL_NAME%%, NULL);
 }
 
 static void __exit unregister_%%MODEL_NAME%%(void)
diff --git a/tools/verification/dot2/dot2k_templates/main_container.h b/tools/verification/rvgen/rvgen/templates/container/main.h
index 0f6883ab4bcc..0f6883ab4bcc 100644
--- a/tools/verification/dot2/dot2k_templates/main_container.h
+++ b/tools/verification/rvgen/rvgen/templates/container/main.h
diff --git a/tools/verification/dot2/dot2k_templates/main.c b/tools/verification/rvgen/rvgen/templates/dot2k/main.c
index 83044a20c89a..e0fd1134bd85 100644
--- a/tools/verification/dot2/dot2k_templates/main.c
+++ b/tools/verification/rvgen/rvgen/templates/dot2k/main.c
@@ -74,8 +74,7 @@ static struct rv_monitor rv_%%MODEL_NAME%% = {
 
 static int __init register_%%MODEL_NAME%%(void)
 {
-	rv_register_monitor(&rv_%%MODEL_NAME%%, %%PARENT%%);
-	return 0;
+	return rv_register_monitor(&rv_%%MODEL_NAME%%, %%PARENT%%);
 }
 
 static void __exit unregister_%%MODEL_NAME%%(void)
diff --git a/tools/verification/dot2/dot2k_templates/trace.h b/tools/verification/rvgen/rvgen/templates/dot2k/trace.h
index 87d3a1308926..87d3a1308926 100644
--- a/tools/verification/dot2/dot2k_templates/trace.h
+++ b/tools/verification/rvgen/rvgen/templates/dot2k/trace.h
diff --git a/tools/verification/rvgen/rvgen/templates/ltl2k/main.c b/tools/verification/rvgen/rvgen/templates/ltl2k/main.c
new file mode 100644
index 000000000000..f85d076fbf78
--- /dev/null
+++ b/tools/verification/rvgen/rvgen/templates/ltl2k/main.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ftrace.h>
+#include <linux/tracepoint.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rv.h>
+#include <rv/instrumentation.h>
+
+#define MODULE_NAME "%%MODEL_NAME%%"
+
+/*
+ * XXX: include required tracepoint headers, e.g.,
+ * #include <trace/events/sched.h>
+ */
+#include <rv_trace.h>
+%%INCLUDE_PARENT%%
+
+/*
+ * This is the self-generated part of the monitor. Generally, there is no need
+ * to touch this section.
+ */
+#include "%%MODEL_NAME%%.h"
+#include <rv/ltl_monitor.h>
+
+static void ltl_atoms_fetch(struct task_struct *task, struct ltl_monitor *mon)
+{
+	/*
+	 * This is called everytime the Buchi automaton is triggered.
+	 *
+	 * This function could be used to fetch the atomic propositions which
+	 * are expensive to trace. It is possible only if the atomic proposition
+	 * does not need to be updated at precise time.
+	 *
+	 * It is recommended to use tracepoints and ltl_atom_update() instead.
+	 */
+}
+
+static void ltl_atoms_init(struct task_struct *task, struct ltl_monitor *mon, bool task_creation)
+{
+	/*
+	 * This should initialize as many atomic propositions as possible.
+	 *
+	 * @task_creation indicates whether the task is being created. This is
+	 * false if the task is already running before the monitor is enabled.
+	 */
+%%ATOMS_INIT%%
+}
+
+/*
+ * This is the instrumentation part of the monitor.
+ *
+ * This is the section where manual work is required. Here the kernel events
+ * are translated into model's event.
+ */
+%%TRACEPOINT_HANDLERS_SKEL%%
+static int enable_%%MODEL_NAME%%(void)
+{
+	int retval;
+
+	retval = ltl_monitor_init();
+	if (retval)
+		return retval;
+
+%%TRACEPOINT_ATTACH%%
+
+	return 0;
+}
+
+static void disable_%%MODEL_NAME%%(void)
+{
+%%TRACEPOINT_DETACH%%
+
+	ltl_monitor_destroy();
+}
+
+/*
+ * This is the monitor register section.
+ */
+static struct rv_monitor rv_%%MODEL_NAME%% = {
+	.name = "%%MODEL_NAME%%",
+	.description = "%%DESCRIPTION%%",
+	.enable = enable_%%MODEL_NAME%%,
+	.disable = disable_%%MODEL_NAME%%,
+};
+
+static int __init register_%%MODEL_NAME%%(void)
+{
+	return rv_register_monitor(&rv_%%MODEL_NAME%%, %%PARENT%%);
+}
+
+static void __exit unregister_%%MODEL_NAME%%(void)
+{
+	rv_unregister_monitor(&rv_%%MODEL_NAME%%);
+}
+
+module_init(register_%%MODEL_NAME%%);
+module_exit(unregister_%%MODEL_NAME%%);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR(/* TODO */);
+MODULE_DESCRIPTION("%%MODEL_NAME%%: %%DESCRIPTION%%");
diff --git a/tools/verification/rvgen/rvgen/templates/ltl2k/trace.h b/tools/verification/rvgen/rvgen/templates/ltl2k/trace.h
new file mode 100644
index 000000000000..49394c4b0f1c
--- /dev/null
+++ b/tools/verification/rvgen/rvgen/templates/ltl2k/trace.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Snippet to be included in rv_trace.h
+ */
+
+#ifdef CONFIG_RV_MON_%%MODEL_NAME_UP%%
+DEFINE_EVENT(event_%%MONITOR_CLASS%%, event_%%MODEL_NAME%%,
+	     TP_PROTO(struct task_struct *task, char *states, char *atoms, char *next),
+	     TP_ARGS(task, states, atoms, next));
+DEFINE_EVENT(error_%%MONITOR_CLASS%%, error_%%MODEL_NAME%%,
+	     TP_PROTO(struct task_struct *task),
+	     TP_ARGS(task));
+#endif /* CONFIG_RV_MON_%%MODEL_NAME_UP%% */