summaryrefslogtreecommitdiff
path: root/tools/testing/selftests
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests')
-rw-r--r--tools/testing/selftests/Makefile2
-rw-r--r--tools/testing/selftests/bpf/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/Makefile5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/recursive_attach.c67
-rw-r--r--tools/testing/selftests/bpf/prog_tests/snprintf.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_sysctl.c (renamed from tools/testing/selftests/bpf/test_sysctl.c)37
-rw-r--r--tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_map_resize.c16
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_vfs_accept.c18
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_vfs_reject.c15
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.c6
-rw-r--r--tools/testing/selftests/bpf/test_lru_map.c105
-rw-r--r--tools/testing/selftests/coredump/stackdump_test.c5
-rw-r--r--tools/testing/selftests/drivers/net/hw/config5
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_ctx.py59
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_input_xfrm.py2
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/tso.py4
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/peer.sh3
-rw-r--r--tools/testing/selftests/futex/functional/.gitignore1
-rw-r--r--tools/testing/selftests/futex/functional/futex_numa_mpol.c10
-rw-r--r--tools/testing/selftests/futex/functional/futex_priv_hash.c2
-rw-r--r--tools/testing/selftests/futex/include/futex2test.h8
-rw-r--r--tools/testing/selftests/hid/tests/test_mouse.py70
-rw-r--r--tools/testing/selftests/iommu/iommufd.c40
-rw-r--r--tools/testing/selftests/iommu/iommufd_utils.h9
-rw-r--r--tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c55
-rw-r--r--tools/testing/selftests/kvm/x86/monitor_mwait_test.c1
-rw-r--r--tools/testing/selftests/mm/config3
-rw-r--r--tools/testing/selftests/mm/gup_longterm.c7
-rw-r--r--tools/testing/selftests/mm/ksm_tests.c32
-rw-r--r--tools/testing/selftests/mm/merge.c46
-rw-r--r--tools/testing/selftests/mm/settings2
-rw-r--r--tools/testing/selftests/mm/thuge-gen.c6
-rw-r--r--tools/testing/selftests/mm/virtual_address_range.c7
-rw-r--r--tools/testing/selftests/mm/vm_util.c38
-rw-r--r--tools/testing/selftests/mm/vm_util.h2
-rw-r--r--tools/testing/selftests/mount_setattr/mount_setattr_test.c17
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile3
-rw-r--r--tools/testing/selftests/net/af_unix/msg_oob.c142
-rwxr-xr-xtools/testing/selftests/net/gre_ipv6_lladdr.sh27
-rw-r--r--tools/testing/selftests/net/lib.sh2
-rwxr-xr-xtools/testing/selftests/net/nat6to4.sh15
-rw-r--r--tools/testing/selftests/net/netfilter/.gitignore1
-rw-r--r--tools/testing/selftests/net/netfilter/Makefile3
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_clash.sh175
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_resize.sh97
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range.sh105
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_nat.sh81
-rw-r--r--tools/testing/selftests/net/netfilter/udpclash.c158
-rw-r--r--tools/testing/selftests/net/ovpn/ovpn-cli.c1
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-large-mtu.sh9
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt53
-rw-r--r--tools/testing/selftests/net/tfo.c171
-rwxr-xr-xtools/testing/selftests/net/tfo_passive.sh112
-rwxr-xr-xtools/testing/selftests/net/udpgro.sh8
-rwxr-xr-xtools/testing/selftests/net/vlan_hw_filter.sh98
-rw-r--r--tools/testing/selftests/sched_ext/exit.c8
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json129
-rw-r--r--tools/testing/selftests/ublk/Makefile1
-rw-r--r--tools/testing/selftests/ublk/fault_inject.c4
-rw-r--r--tools/testing/selftests/ublk/file_backed.c20
-rw-r--r--tools/testing/selftests/ublk/kublk.c374
-rw-r--r--tools/testing/selftests/ublk/kublk.h73
-rw-r--r--tools/testing/selftests/ublk/null.c22
-rw-r--r--tools/testing/selftests/ublk/stripe.c17
-rwxr-xr-xtools/testing/selftests/ublk/test_common.sh5
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_12.sh55
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_03.sh11
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_04.sh7
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_05.sh7
-rw-r--r--tools/testing/selftests/ublk/trace/count_ios_per_tid.bt11
-rw-r--r--tools/testing/selftests/vDSO/vgetrandom-chacha.S2
-rw-r--r--tools/testing/selftests/x86/Makefile2
-rw-r--r--tools/testing/selftests/x86/sigtrap_loop.c101
75 files changed, 2408 insertions, 417 deletions
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 6aa11cd3db42..339b31e6a6b5 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -205,7 +205,7 @@ export KHDR_INCLUDES
all:
@ret=1; \
- for TARGET in $(TARGETS); do \
+ for TARGET in $(TARGETS) $(INSTALL_DEP_TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
mkdir $$BUILD_TARGET -p; \
$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET \
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index e2a2c46c008b..3d8378972d26 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -21,7 +21,6 @@ test_lirc_mode2_user
flow_dissector_load
test_tcpnotify_user
test_libbpf
-test_sysctl
xdping
test_cpp
*.d
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index cf5ed3bee573..910d8d6402ef 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -73,7 +73,7 @@ endif
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_progs \
test_sockmap \
- test_tcpnotify_user test_sysctl \
+ test_tcpnotify_user \
test_progs-no_alu32
TEST_INST_SUBDIRS := no_alu32
@@ -220,7 +220,7 @@ ifeq ($(VMLINUX_BTF),)
$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)")
endif
-# Define simple and short `make test_progs`, `make test_sysctl`, etc targets
+# Define simple and short `make test_progs`, `make test_maps`, etc targets
# to build individual tests.
# NOTE: Semicolon at the end is critical to override lib.mk's default static
# rule for binaries.
@@ -329,7 +329,6 @@ NETWORK_HELPERS := $(OUTPUT)/network_helpers.o
$(OUTPUT)/test_sockmap: $(CGROUP_HELPERS) $(TESTING_HELPERS)
$(OUTPUT)/test_tcpnotify_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(TRACE_HELPERS)
$(OUTPUT)/test_sock_fields: $(CGROUP_HELPERS) $(TESTING_HELPERS)
-$(OUTPUT)/test_sysctl: $(CGROUP_HELPERS) $(TESTING_HELPERS)
$(OUTPUT)/test_tag: $(TESTING_HELPERS)
$(OUTPUT)/test_lirc_mode2_user: $(TESTING_HELPERS)
$(OUTPUT)/xdping: $(TESTING_HELPERS)
diff --git a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c
index 8100509e561b..0ffa01d54ce2 100644
--- a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c
@@ -149,3 +149,70 @@ close_prog:
fentry_recursive_target__destroy(target_skel);
fentry_recursive__destroy(tracing_skel);
}
+
+static void *fentry_target_test_run(void *arg)
+{
+ for (;;) {
+ int prog_fd = __atomic_load_n((int *)arg, __ATOMIC_SEQ_CST);
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err;
+
+ if (prog_fd == -1)
+ break;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "fentry_target test_run"))
+ break;
+ }
+
+ return NULL;
+}
+
+void test_fentry_attach_stress(void)
+{
+ struct fentry_recursive_target *target_skel = NULL;
+ struct fentry_recursive *tracing_skel = NULL;
+ struct bpf_program *prog;
+ int err, i, tgt_prog_fd;
+ pthread_t thread;
+
+ target_skel = fentry_recursive_target__open_and_load();
+ if (!ASSERT_OK_PTR(target_skel,
+ "fentry_recursive_target__open_and_load"))
+ goto close_prog;
+ tgt_prog_fd = bpf_program__fd(target_skel->progs.fentry_target);
+ err = pthread_create(&thread, NULL,
+ fentry_target_test_run, &tgt_prog_fd);
+ if (!ASSERT_OK(err, "bpf_program__set_attach_target"))
+ goto close_prog;
+
+ for (i = 0; i < 1000; i++) {
+ tracing_skel = fentry_recursive__open();
+ if (!ASSERT_OK_PTR(tracing_skel, "fentry_recursive__open"))
+ goto stop_thread;
+
+ prog = tracing_skel->progs.recursive_attach;
+ err = bpf_program__set_attach_target(prog, tgt_prog_fd,
+ "fentry_target");
+ if (!ASSERT_OK(err, "bpf_program__set_attach_target"))
+ goto stop_thread;
+
+ err = fentry_recursive__load(tracing_skel);
+ if (!ASSERT_OK(err, "fentry_recursive__load"))
+ goto stop_thread;
+
+ err = fentry_recursive__attach(tracing_skel);
+ if (!ASSERT_OK(err, "fentry_recursive__attach"))
+ goto stop_thread;
+
+ fentry_recursive__destroy(tracing_skel);
+ tracing_skel = NULL;
+ }
+
+stop_thread:
+ __atomic_store_n(&tgt_prog_fd, -1, __ATOMIC_SEQ_CST);
+ err = pthread_join(thread, NULL);
+ ASSERT_OK(err, "pthread_join");
+close_prog:
+ fentry_recursive__destroy(tracing_skel);
+ fentry_recursive_target__destroy(target_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
index 4be6fdb78c6a..594441acb707 100644
--- a/tools/testing/selftests/bpf/prog_tests/snprintf.c
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
@@ -116,6 +116,8 @@ static void test_snprintf_negative(void)
ASSERT_ERR(load_single_snprintf("%llc"), "invalid specifier 7");
ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character");
ASSERT_ERR(load_single_snprintf("\x1"), "non printable character");
+ ASSERT_ERR(load_single_snprintf("%p%"), "invalid specifier 8");
+ ASSERT_ERR(load_single_snprintf("%s%"), "invalid specifier 9");
}
void test_snprintf(void)
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c
index bcdbd27f22f0..273dd41ca09e 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c
@@ -1,22 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
-#include <fcntl.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <linux/filter.h>
-
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include <bpf/bpf_endian.h>
-#include "bpf_util.h"
+#include "test_progs.h"
#include "cgroup_helpers.h"
-#include "testing_helpers.h"
#define CG_PATH "/foo"
#define MAX_INSNS 512
@@ -1608,26 +1594,19 @@ static int run_tests(int cgfd)
return fails ? -1 : 0;
}
-int main(int argc, char **argv)
+void test_sysctl(void)
{
- int cgfd = -1;
- int err = 0;
+ int cgfd;
cgfd = cgroup_setup_and_join(CG_PATH);
- if (cgfd < 0)
- goto err;
+ if (!ASSERT_OK_FD(cgfd < 0, "create_cgroup"))
+ goto out;
- /* Use libbpf 1.0 API mode */
- libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+ if (!ASSERT_OK(run_tests(cgfd), "run_tests"))
+ goto out;
- if (run_tests(cgfd))
- goto err;
-
- goto out;
-err:
- err = -1;
out:
close(cgfd);
cleanup_cgroup_environment();
- return err;
+ return;
}
diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
index 69f81cb555ca..d93f68024cc6 100644
--- a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
+++ b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
@@ -57,15 +57,15 @@ int BPF_PROG(test_percpu_load, struct cgroup *cgrp, const char *path)
SEC("tp_btf/cgroup_mkdir")
int BPF_PROG(test_percpu_helper, struct cgroup *cgrp, const char *path)
{
- struct cgroup_rstat_cpu *rstat;
+ struct css_rstat_cpu *rstat;
__u32 cpu;
cpu = bpf_get_smp_processor_id();
- rstat = (struct cgroup_rstat_cpu *)bpf_per_cpu_ptr(
+ rstat = (struct css_rstat_cpu *)bpf_per_cpu_ptr(
cgrp->self.rstat_cpu, cpu);
if (rstat) {
/* READ_ONCE */
- *(volatile int *)rstat;
+ *(volatile long *)rstat;
}
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_global_map_resize.c b/tools/testing/selftests/bpf/progs/test_global_map_resize.c
index a3f220ba7025..ee65bad0436d 100644
--- a/tools/testing/selftests/bpf/progs/test_global_map_resize.c
+++ b/tools/testing/selftests/bpf/progs/test_global_map_resize.c
@@ -32,6 +32,16 @@ int my_int_last SEC(".data.array_not_last");
int percpu_arr[1] SEC(".data.percpu_arr");
+/* at least one extern is included, to ensure that a specific
+ * regression is tested whereby resizing resulted in a free-after-use
+ * bug after type information is invalidated by the resize operation.
+ *
+ * There isn't a particularly good API to test for this specific condition,
+ * but by having externs for the resizing tests it will cover this path.
+ */
+extern int LINUX_KERNEL_VERSION __kconfig;
+long version_sink;
+
SEC("tp/syscalls/sys_enter_getpid")
int bss_array_sum(void *ctx)
{
@@ -44,6 +54,9 @@ int bss_array_sum(void *ctx)
for (size_t i = 0; i < bss_array_len; ++i)
sum += array[i];
+ /* see above; ensure this is not optimized out */
+ version_sink = LINUX_KERNEL_VERSION;
+
return 0;
}
@@ -59,6 +72,9 @@ int data_array_sum(void *ctx)
for (size_t i = 0; i < data_array_len; ++i)
sum += my_array[i];
+ /* see above; ensure this is not optimized out */
+ version_sink = LINUX_KERNEL_VERSION;
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c
index a7c0a553aa50..3e2d76ee8050 100644
--- a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c
+++ b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2024 Google LLC. */
#include <vmlinux.h>
+#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
@@ -82,4 +83,21 @@ int BPF_PROG(path_d_path_from_file_argument, struct file *file)
return 0;
}
+SEC("lsm.s/inode_rename")
+__success
+int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ struct inode *inode = new_dentry->d_inode;
+ ino_t ino;
+
+ if (!inode)
+ return 0;
+ ino = inode->i_ino;
+ if (ino == 0)
+ return -EACCES;
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c b/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c
index d6d3f4fcb24c..4b392c6c8fc4 100644
--- a/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c
+++ b/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2024 Google LLC. */
#include <vmlinux.h>
+#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <linux/limits.h>
@@ -158,4 +159,18 @@ int BPF_PROG(path_d_path_kfunc_non_lsm, struct path *path, struct file *f)
return 0;
}
+SEC("lsm.s/inode_rename")
+__failure __msg("invalid mem access 'trusted_ptr_or_null_'")
+int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ struct inode *inode = new_dentry->d_inode;
+ ino_t ino;
+
+ ino = inode->i_ino;
+ if (ino == 0)
+ return -EACCES;
+ return 0;
+}
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
index e6c248e3ae54..e9e918cdf31f 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
@@ -385,7 +385,7 @@ int bpf_testmod_fentry_ok;
noinline ssize_t
bpf_testmod_test_read(struct file *file, struct kobject *kobj,
- struct bin_attribute *bin_attr,
+ const struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t len)
{
struct bpf_testmod_test_read_ctx ctx = {
@@ -465,7 +465,7 @@ ALLOW_ERROR_INJECTION(bpf_testmod_test_read, ERRNO);
noinline ssize_t
bpf_testmod_test_write(struct file *file, struct kobject *kobj,
- struct bin_attribute *bin_attr,
+ const struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t len)
{
struct bpf_testmod_test_write_ctx ctx = {
@@ -567,7 +567,7 @@ static void testmod_unregister_uprobe(void)
static ssize_t
bpf_testmod_uprobe_write(struct file *file, struct kobject *kobj,
- struct bin_attribute *bin_attr,
+ const struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t len)
{
unsigned long offset = 0;
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index fda7589c5023..0921939532c6 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -138,6 +138,18 @@ static int sched_next_online(int pid, int *next_to_try)
return ret;
}
+/* Derive target_free from map_size, same as bpf_common_lru_populate */
+static unsigned int __tgt_size(unsigned int map_size)
+{
+ return (map_size / nr_cpus) / 2;
+}
+
+/* Inverse of how bpf_common_lru_populate derives target_free from map_size. */
+static unsigned int __map_size(unsigned int tgt_free)
+{
+ return tgt_free * nr_cpus * 2;
+}
+
/* Size of the LRU map is 2
* Add key=1 (+1 key)
* Add key=2 (+1 key)
@@ -231,11 +243,11 @@ static void test_lru_sanity0(int map_type, int map_flags)
printf("Pass\n");
}
-/* Size of the LRU map is 1.5*tgt_free
- * Insert 1 to tgt_free (+tgt_free keys)
- * Lookup 1 to tgt_free/2
- * Insert 1+tgt_free to 2*tgt_free (+tgt_free keys)
- * => 1+tgt_free/2 to LOCALFREE_TARGET will be removed by LRU
+/* Verify that unreferenced elements are recycled before referenced ones.
+ * Insert elements.
+ * Reference a subset of these.
+ * Insert more, enough to trigger recycling.
+ * Verify that unreferenced are recycled.
*/
static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
{
@@ -257,7 +269,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
batch_size = tgt_free / 2;
assert(batch_size * 2 == tgt_free);
- map_size = tgt_free + batch_size;
+ map_size = __map_size(tgt_free) + batch_size;
lru_map_fd = create_map(map_type, map_flags, map_size);
assert(lru_map_fd != -1);
@@ -266,13 +278,13 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
value[0] = 1234;
- /* Insert 1 to tgt_free (+tgt_free keys) */
- end_key = 1 + tgt_free;
+ /* Insert map_size - batch_size keys */
+ end_key = 1 + __map_size(tgt_free);
for (key = 1; key < end_key; key++)
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
- /* Lookup 1 to tgt_free/2 */
+ /* Lookup 1 to batch_size */
end_key = 1 + batch_size;
for (key = 1; key < end_key; key++) {
assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value));
@@ -280,12 +292,13 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
BPF_NOEXIST));
}
- /* Insert 1+tgt_free to 2*tgt_free
- * => 1+tgt_free/2 to LOCALFREE_TARGET will be
+ /* Insert another map_size - batch_size keys
+ * Map will contain 1 to batch_size plus these latest, i.e.,
+ * => previous 1+batch_size to map_size - batch_size will have been
* removed by LRU
*/
- key = 1 + tgt_free;
- end_key = key + tgt_free;
+ key = 1 + __map_size(tgt_free);
+ end_key = key + __map_size(tgt_free);
for (; key < end_key; key++) {
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
@@ -301,17 +314,8 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
printf("Pass\n");
}
-/* Size of the LRU map 1.5 * tgt_free
- * Insert 1 to tgt_free (+tgt_free keys)
- * Update 1 to tgt_free/2
- * => The original 1 to tgt_free/2 will be removed due to
- * the LRU shrink process
- * Re-insert 1 to tgt_free/2 again and do a lookup immeidately
- * Insert 1+tgt_free to tgt_free*3/2
- * Insert 1+tgt_free*3/2 to tgt_free*5/2
- * => Key 1+tgt_free to tgt_free*3/2
- * will be removed from LRU because it has never
- * been lookup and ref bit is not set
+/* Verify that insertions exceeding map size will recycle the oldest.
+ * Verify that unreferenced elements are recycled before referenced.
*/
static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
{
@@ -334,7 +338,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
batch_size = tgt_free / 2;
assert(batch_size * 2 == tgt_free);
- map_size = tgt_free + batch_size;
+ map_size = __map_size(tgt_free) + batch_size;
lru_map_fd = create_map(map_type, map_flags, map_size);
assert(lru_map_fd != -1);
@@ -343,8 +347,8 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
value[0] = 1234;
- /* Insert 1 to tgt_free (+tgt_free keys) */
- end_key = 1 + tgt_free;
+ /* Insert map_size - batch_size keys */
+ end_key = 1 + __map_size(tgt_free);
for (key = 1; key < end_key; key++)
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
@@ -357,8 +361,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
* shrink the inactive list to get tgt_free
* number of free nodes.
*
- * Hence, the oldest key 1 to tgt_free/2
- * are removed from the LRU list.
+ * Hence, the oldest key is removed from the LRU list.
*/
key = 1;
if (map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
@@ -370,8 +373,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
BPF_EXIST));
}
- /* Re-insert 1 to tgt_free/2 again and do a lookup
- * immeidately.
+ /* Re-insert 1 to batch_size again and do a lookup immediately.
*/
end_key = 1 + batch_size;
value[0] = 4321;
@@ -387,17 +389,18 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
value[0] = 1234;
- /* Insert 1+tgt_free to tgt_free*3/2 */
- end_key = 1 + tgt_free + batch_size;
- for (key = 1 + tgt_free; key < end_key; key++)
+ /* Insert batch_size new elements */
+ key = 1 + __map_size(tgt_free);
+ end_key = key + batch_size;
+ for (; key < end_key; key++)
/* These newly added but not referenced keys will be
* gone during the next LRU shrink.
*/
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
- /* Insert 1+tgt_free*3/2 to tgt_free*5/2 */
- end_key = key + tgt_free;
+ /* Insert map_size - batch_size elements */
+ end_key += __map_size(tgt_free);
for (; key < end_key; key++) {
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
@@ -413,12 +416,12 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
printf("Pass\n");
}
-/* Size of the LRU map is 2*tgt_free
- * It is to test the active/inactive list rotation
- * Insert 1 to 2*tgt_free (+2*tgt_free keys)
- * Lookup key 1 to tgt_free*3/2
- * Add 1+2*tgt_free to tgt_free*5/2 (+tgt_free/2 keys)
- * => key 1+tgt_free*3/2 to 2*tgt_free are removed from LRU
+/* Test the active/inactive list rotation
+ *
+ * Fill the whole map, deplete the free list.
+ * Reference all except the last lru->target_free elements.
+ * Insert lru->target_free new elements. This triggers one shrink.
+ * Verify that the non-referenced elements are replaced.
*/
static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
{
@@ -437,8 +440,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
assert(sched_next_online(0, &next_cpu) != -1);
- batch_size = tgt_free / 2;
- assert(batch_size * 2 == tgt_free);
+ batch_size = __tgt_size(tgt_free);
map_size = tgt_free * 2;
lru_map_fd = create_map(map_type, map_flags, map_size);
@@ -449,23 +451,21 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
value[0] = 1234;
- /* Insert 1 to 2*tgt_free (+2*tgt_free keys) */
- end_key = 1 + (2 * tgt_free);
+ /* Fill the map */
+ end_key = 1 + map_size;
for (key = 1; key < end_key; key++)
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
- /* Lookup key 1 to tgt_free*3/2 */
- end_key = tgt_free + batch_size;
+ /* Reference all but the last batch_size */
+ end_key = 1 + map_size - batch_size;
for (key = 1; key < end_key; key++) {
assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value));
assert(!bpf_map_update_elem(expected_map_fd, &key, value,
BPF_NOEXIST));
}
- /* Add 1+2*tgt_free to tgt_free*5/2
- * (+tgt_free/2 keys)
- */
+ /* Insert new batch_size: replaces the non-referenced elements */
key = 2 * tgt_free + 1;
end_key = key + batch_size;
for (; key < end_key; key++) {
@@ -500,7 +500,8 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free)
lru_map_fd = create_map(map_type, map_flags,
3 * tgt_free * nr_cpus);
else
- lru_map_fd = create_map(map_type, map_flags, 3 * tgt_free);
+ lru_map_fd = create_map(map_type, map_flags,
+ 3 * __map_size(tgt_free));
assert(lru_map_fd != -1);
expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0,
diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c
index 9984413be9f0..68f8e479ac36 100644
--- a/tools/testing/selftests/coredump/stackdump_test.c
+++ b/tools/testing/selftests/coredump/stackdump_test.c
@@ -461,10 +461,15 @@ TEST_F(coredump, socket_detect_userspace_client)
_exit(EXIT_FAILURE);
}
+ ret = read(fd_coredump, &c, 1);
+
close(fd_coredump);
close(fd_server);
close(fd_peer_pidfd);
close(fd_core_file);
+
+ if (ret < 1)
+ _exit(EXIT_FAILURE);
_exit(EXIT_SUCCESS);
}
self->pid_coredump_server = pid_coredump_server;
diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config
new file mode 100644
index 000000000000..88ae719e6f8f
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/config
@@ -0,0 +1,5 @@
+CONFIG_IPV6=y
+CONFIG_IPV6_GRE=y
+CONFIG_NET_IPGRE=y
+CONFIG_NET_IPGRE_DEMUX=y
+CONFIG_VXLAN=y
diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
index ca60ae325c22..7bb552f8b182 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
@@ -747,6 +747,62 @@ def test_rss_ntuple_addition(cfg):
'noise' : (0,) })
+def test_rss_default_context_rule(cfg):
+ """
+ Allocate a port, direct this port to context 0, then create a new RSS
+ context and steer all TCP traffic to it (context 1). Verify that:
+ * Traffic to the specific port continues to use queues of the main
+ context (0/1).
+ * Traffic to any other TCP port is redirected to the new context
+ (queues 2/3).
+ """
+
+ require_ntuple(cfg)
+
+ queue_cnt = len(_get_rx_cnts(cfg))
+ if queue_cnt < 4:
+ try:
+ ksft_pr(f"Increasing queue count {queue_cnt} -> 4")
+ ethtool(f"-L {cfg.ifname} combined 4")
+ defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}")
+ except Exception as exc:
+ raise KsftSkipEx("Not enough queues for the test") from exc
+
+ # Use queues 0 and 1 for the main context
+ ethtool(f"-X {cfg.ifname} equal 2")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ # Create a new RSS context that uses queues 2 and 3
+ ctx_id = ethtool_create(cfg, "-X", "context new start 2 equal 2")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+ # Generic low-priority rule: redirect all TCP traffic to the new context.
+ # Give it an explicit higher location number (lower priority).
+ flow_generic = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} context {ctx_id} loc 1"
+ ethtool(f"-N {cfg.ifname} {flow_generic}")
+ defer(ethtool, f"-N {cfg.ifname} delete 1")
+
+ # Specific high-priority rule for a random port that should stay on context 0.
+ # Assign loc 0 so it is evaluated before the generic rule.
+ port_main = rand_port()
+ flow_main = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port_main} context 0 loc 0"
+ ethtool(f"-N {cfg.ifname} {flow_main}")
+ defer(ethtool, f"-N {cfg.ifname} delete 0")
+
+ _ntuple_rule_check(cfg, 1, ctx_id)
+
+ # Verify that traffic matching the specific rule still goes to queues 0/1
+ _send_traffic_check(cfg, port_main, "context 0",
+ { 'target': (0, 1),
+ 'empty' : (2, 3) })
+
+ # And that traffic for any other port is steered to the new context
+ port_other = rand_port()
+ _send_traffic_check(cfg, port_other, f"context {ctx_id}",
+ { 'target': (2, 3),
+ 'noise' : (0, 1) })
+
+
def main() -> None:
with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
cfg.context_cnt = None
@@ -760,7 +816,8 @@ def main() -> None:
test_rss_context_overlap, test_rss_context_overlap2,
test_rss_context_out_of_order, test_rss_context4_create_with_cfg,
test_flow_add_context_missing,
- test_delete_rss_context_busy, test_rss_ntuple_addition],
+ test_delete_rss_context_busy, test_rss_ntuple_addition,
+ test_rss_default_context_rule],
args=(cfg, ))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
index f439c434ba36..648ff50bc1c3 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
@@ -38,7 +38,7 @@ def test_rss_input_xfrm(cfg, ipver):
raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11")
input_xfrm = cfg.ethnl.rss_get(
- {'header': {'dev-name': cfg.ifname}}).get('input_xfrm')
+ {'header': {'dev-name': cfg.ifname}}).get('input-xfrm')
# Check for symmetric xor/or-xor
if not input_xfrm or (input_xfrm != 1 and input_xfrm != 2):
diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py
index e1ecb92f79d9..3370827409aa 100755
--- a/tools/testing/selftests/drivers/net/hw/tso.py
+++ b/tools/testing/selftests/drivers/net/hw/tso.py
@@ -39,7 +39,7 @@ def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso):
port = rand_port()
listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{port},reuseport /dev/null,ignoreeof"
- with bkg(listen_cmd, host=cfg.remote) as nc:
+ with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc:
wait_port_listen(port, host=cfg.remote)
if ipver == "4":
@@ -216,7 +216,7 @@ def main() -> None:
("", "6", "tx-tcp6-segmentation", None),
("vxlan", "", "tx-udp_tnl-segmentation", ("vxlan", True, "id 100 dstport 4789 noudpcsum")),
("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", False, "id 100 dstport 4789 udpcsum")),
- ("gre", "4", "tx-gre-segmentation", ("ipgre", False, "")),
+ ("gre", "4", "tx-gre-segmentation", ("gre", False, "")),
("gre", "6", "tx-gre-segmentation", ("ip6gre", False, "")),
)
diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
index 1bb46ec435d4..7f32b5600925 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/peer.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
@@ -1,7 +1,8 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0-only
-source ../../../net/lib.sh
+lib_dir=$(dirname $0)/../../../net
+source $lib_dir/lib.sh
NSIM_DEV_1_ID=$((256 + RANDOM % 256))
NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID
diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
index 7b24ae89594a..776ad658f75e 100644
--- a/tools/testing/selftests/futex/functional/.gitignore
+++ b/tools/testing/selftests/futex/functional/.gitignore
@@ -11,3 +11,4 @@ futex_wait_timeout
futex_wait_uninitialized_heap
futex_wait_wouldblock
futex_waitv
+futex_numa
diff --git a/tools/testing/selftests/futex/functional/futex_numa_mpol.c b/tools/testing/selftests/futex/functional/futex_numa_mpol.c
index 20a9d3ecf743..a9ecfb2d3932 100644
--- a/tools/testing/selftests/futex/functional/futex_numa_mpol.c
+++ b/tools/testing/selftests/futex/functional/futex_numa_mpol.c
@@ -144,7 +144,7 @@ int main(int argc, char *argv[])
struct futex32_numa *futex_numa;
int mem_size, i;
void *futex_ptr;
- char c;
+ int c;
while ((c = getopt(argc, argv, "chv:")) != -1) {
switch (c) {
@@ -210,6 +210,10 @@ int main(int argc, char *argv[])
ret = mbind(futex_ptr, mem_size, MPOL_BIND, &nodemask,
sizeof(nodemask) * 8, 0);
if (ret == 0) {
+ ret = numa_set_mempolicy_home_node(futex_ptr, mem_size, i, 0);
+ if (ret != 0)
+ ksft_exit_fail_msg("Failed to set home node: %m, %d\n", errno);
+
ksft_print_msg("Node %d test\n", i);
futex_numa->futex = 0;
futex_numa->numa = FUTEX_NO_NODE;
@@ -220,8 +224,8 @@ int main(int argc, char *argv[])
if (0)
test_futex_mpol(futex_numa, 0);
if (futex_numa->numa != i) {
- ksft_test_result_fail("Returned NUMA node is %d expected %d\n",
- futex_numa->numa, i);
+ ksft_exit_fail_msg("Returned NUMA node is %d expected %d\n",
+ futex_numa->numa, i);
}
}
}
diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c
index 2dca18fefedc..24a92dc94eb8 100644
--- a/tools/testing/selftests/futex/functional/futex_priv_hash.c
+++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c
@@ -130,7 +130,7 @@ int main(int argc, char *argv[])
pthread_mutexattr_t mutex_attr_pi;
int use_global_hash = 0;
int ret;
- char c;
+ int c;
while ((c = getopt(argc, argv, "cghv:")) != -1) {
switch (c) {
diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h
index ea79662405bc..1f625b39948a 100644
--- a/tools/testing/selftests/futex/include/futex2test.h
+++ b/tools/testing/selftests/futex/include/futex2test.h
@@ -4,6 +4,7 @@
*
* Copyright 2021 Collabora Ltd.
*/
+#include <linux/time_types.h>
#include <stdint.h>
#define u64_to_ptr(x) ((void *)(uintptr_t)(x))
@@ -65,7 +66,12 @@ struct futex32_numa {
static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned long nr_waiters,
unsigned long flags, struct timespec *timo, clockid_t clockid)
{
- return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo, clockid);
+ struct __kernel_timespec ts = {
+ .tv_sec = timo->tv_sec,
+ .tv_nsec = timo->tv_nsec,
+ };
+
+ return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, &ts, clockid);
}
/*
diff --git a/tools/testing/selftests/hid/tests/test_mouse.py b/tools/testing/selftests/hid/tests/test_mouse.py
index 66daf7e5975c..eb4e15a0e53b 100644
--- a/tools/testing/selftests/hid/tests/test_mouse.py
+++ b/tools/testing/selftests/hid/tests/test_mouse.py
@@ -439,6 +439,68 @@ class BadResolutionMultiplierMouse(ResolutionMultiplierMouse):
return 32 # EPIPE
+class BadReportDescriptorMouse(BaseMouse):
+ """
+ This "device" was one autogenerated by syzbot. There are a lot of issues in
+ it, and the most problematic is that it declares features that have no
+ size.
+
+ This leads to report->size being set to 0 and can mess up with usbhid
+ internals. Fortunately, uhid merely passes the incoming buffer, without
+ touching it so a buffer of size 0 will be translated to [] without
+ triggering a kernel oops.
+
+ Because the report descriptor is wrong, no input are created, and we need
+ to tweak a little bit the parameters to make it look correct.
+ """
+
+ # fmt: off
+ report_descriptor = [
+ 0x96, 0x01, 0x00, # Report Count (1) 0
+ 0x06, 0x01, 0x00, # Usage Page (Generic Desktop) 3
+ # 0x03, 0x00, 0x00, 0x00, 0x00, # Ignored by the kernel somehow
+ 0x2a, 0x90, 0xa0, # Usage Maximum (41104) 6
+ 0x27, 0x00, 0x00, 0x00, 0x00, # Logical Maximum (0) 9
+ 0xb3, 0x81, 0x3e, 0x25, 0x03, # Feature (Cnst,Arr,Abs,Vol) 14
+ 0x1b, 0xdd, 0xe8, 0x40, 0x50, # Usage Minimum (1346431197) 19
+ 0x3b, 0x5d, 0x8c, 0x3d, 0xda, # Designator Index 24
+ ]
+ # fmt: on
+
+ def __init__(
+ self, rdesc=report_descriptor, name=None, input_info=(3, 0x045E, 0x07DA)
+ ):
+ super().__init__(rdesc, name, input_info)
+ self.high_resolution_report_called = False
+
+ def get_evdev(self, application=None):
+ assert self._input_nodes is None
+ return (
+ "Ok" # should be a list or None, but both would fail, so abusing the system
+ )
+
+ def next_sync_events(self, application=None):
+ # there are no evdev nodes, so no events
+ return []
+
+ def is_ready(self):
+ # we wait for the SET_REPORT command to come
+ return self.high_resolution_report_called
+
+ def set_report(self, req, rnum, rtype, data):
+ if rtype != self.UHID_FEATURE_REPORT:
+ raise InvalidHIDCommunication(f"Unexpected report type: {rtype}")
+ if rnum != 0x0:
+ raise InvalidHIDCommunication(f"Unexpected report number: {rnum}")
+
+ if len(data) != 1:
+ raise InvalidHIDCommunication(f"Unexpected data: {data}, expected '[0]'")
+
+ self.high_resolution_report_called = True
+
+ return 0
+
+
class ResolutionMultiplierHWheelMouse(TwoWheelMouse):
# fmt: off
report_descriptor = [
@@ -975,3 +1037,11 @@ class TestMiMouse(TestWheelMouse):
# assert below print out the real error
pass
assert remaining == []
+
+
+class TestBadReportDescriptorMouse(base.BaseTestCase.TestUhid):
+ def create_device(self):
+ return BadReportDescriptorMouse()
+
+ def assertName(self, uhdev):
+ pass
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
index 1a8e85afe9aa..1926ef6b40ab 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -54,6 +54,8 @@ static __attribute__((constructor)) void setup_sizes(void)
mfd_buffer = memfd_mmap(BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
&mfd);
+ assert(mfd_buffer != MAP_FAILED);
+ assert(mfd > 0);
}
FIXTURE(iommufd)
@@ -1746,13 +1748,15 @@ TEST_F(iommufd_mock_domain, all_aligns)
unsigned int end;
uint8_t *buf;
int prot = PROT_READ | PROT_WRITE;
- int mfd;
+ int mfd = -1;
if (variant->file)
buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd);
else
buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0);
ASSERT_NE(MAP_FAILED, buf);
+ if (variant->file)
+ ASSERT_GT(mfd, 0);
check_refs(buf, buf_size, 0);
/*
@@ -1798,13 +1802,15 @@ TEST_F(iommufd_mock_domain, all_aligns_copy)
unsigned int end;
uint8_t *buf;
int prot = PROT_READ | PROT_WRITE;
- int mfd;
+ int mfd = -1;
if (variant->file)
buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd);
else
buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0);
ASSERT_NE(MAP_FAILED, buf);
+ if (variant->file)
+ ASSERT_GT(mfd, 0);
check_refs(buf, buf_size, 0);
/*
@@ -2008,6 +2014,7 @@ FIXTURE_VARIANT(iommufd_dirty_tracking)
FIXTURE_SETUP(iommufd_dirty_tracking)
{
+ size_t mmap_buffer_size;
unsigned long size;
int mmap_flags;
void *vrc;
@@ -2022,22 +2029,33 @@ FIXTURE_SETUP(iommufd_dirty_tracking)
self->fd = open("/dev/iommu", O_RDWR);
ASSERT_NE(-1, self->fd);
- rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, variant->buffer_size);
- if (rc || !self->buffer) {
- SKIP(return, "Skipping buffer_size=%lu due to errno=%d",
- variant->buffer_size, rc);
- }
-
mmap_flags = MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED;
+ mmap_buffer_size = variant->buffer_size;
if (variant->hugepages) {
/*
* MAP_POPULATE will cause the kernel to fail mmap if THPs are
* not available.
*/
mmap_flags |= MAP_HUGETLB | MAP_POPULATE;
+
+ /*
+ * Allocation must be aligned to the HUGEPAGE_SIZE, because the
+ * following mmap() will automatically align the length to be a
+ * multiple of the underlying huge page size. Failing to do the
+ * same at this allocation will result in a memory overwrite by
+ * the mmap().
+ */
+ if (mmap_buffer_size < HUGEPAGE_SIZE)
+ mmap_buffer_size = HUGEPAGE_SIZE;
+ }
+
+ rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, mmap_buffer_size);
+ if (rc || !self->buffer) {
+ SKIP(return, "Skipping buffer_size=%lu due to errno=%d",
+ mmap_buffer_size, rc);
}
assert((uintptr_t)self->buffer % HUGEPAGE_SIZE == 0);
- vrc = mmap(self->buffer, variant->buffer_size, PROT_READ | PROT_WRITE,
+ vrc = mmap(self->buffer, mmap_buffer_size, PROT_READ | PROT_WRITE,
mmap_flags, -1, 0);
assert(vrc == self->buffer);
@@ -2066,8 +2084,8 @@ FIXTURE_SETUP(iommufd_dirty_tracking)
FIXTURE_TEARDOWN(iommufd_dirty_tracking)
{
- munmap(self->buffer, variant->buffer_size);
- munmap(self->bitmap, DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE));
+ free(self->buffer);
+ free(self->bitmap);
teardown_iommufd(self->fd, _metadata);
}
diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h
index 72f6636e5d90..6e967b58acfd 100644
--- a/tools/testing/selftests/iommu/iommufd_utils.h
+++ b/tools/testing/selftests/iommu/iommufd_utils.h
@@ -60,13 +60,18 @@ static inline void *memfd_mmap(size_t length, int prot, int flags, int *mfd_p)
{
int mfd_flags = (flags & MAP_HUGETLB) ? MFD_HUGETLB : 0;
int mfd = memfd_create("buffer", mfd_flags);
+ void *buf = MAP_FAILED;
if (mfd <= 0)
return MAP_FAILED;
if (ftruncate(mfd, length))
- return MAP_FAILED;
+ goto out;
*mfd_p = mfd;
- return mmap(0, length, prot, flags, mfd, 0);
+ buf = mmap(0, length, prot, flags, mfd, 0);
+out:
+ if (buf == MAP_FAILED)
+ close(mfd);
+ return buf;
}
/*
diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
index a36a7e2db434..4e71740a098b 100644
--- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
+++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
@@ -22,7 +22,8 @@
#include "gic.h"
#include "vgic.h"
-static const uint64_t CVAL_MAX = ~0ULL;
+/* Depends on counter width. */
+static uint64_t CVAL_MAX;
/* tval is a signed 32-bit int. */
static const int32_t TVAL_MAX = INT32_MAX;
static const int32_t TVAL_MIN = INT32_MIN;
@@ -30,8 +31,8 @@ static const int32_t TVAL_MIN = INT32_MIN;
/* After how much time we say there is no IRQ. */
static const uint32_t TIMEOUT_NO_IRQ_US = 50000;
-/* A nice counter value to use as the starting one for most tests. */
-static const uint64_t DEF_CNT = (CVAL_MAX / 2);
+/* Counter value to use as the starting one for most tests. Set to CVAL_MAX/2 */
+static uint64_t DEF_CNT;
/* Number of runs. */
static const uint32_t NR_TEST_ITERS_DEF = 5;
@@ -191,8 +192,8 @@ static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles,
{
atomic_set(&shared_data.handled, 0);
atomic_set(&shared_data.spurious, 0);
- timer_set_ctl(timer, ctl);
timer_set_tval(timer, tval_cycles);
+ timer_set_ctl(timer, ctl);
}
static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl,
@@ -732,12 +733,6 @@ static void test_move_counters_ahead_of_timers(enum arch_timer timer)
test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1,
wm);
}
-
- for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
- sleep_method_t sm = sleep_method[i];
-
- test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm);
- }
}
/*
@@ -849,17 +844,17 @@ static void guest_code(enum arch_timer timer)
GUEST_DONE();
}
+static cpu_set_t default_cpuset;
+
static uint32_t next_pcpu(void)
{
uint32_t max = get_nprocs();
uint32_t cur = sched_getcpu();
uint32_t next = cur;
- cpu_set_t cpuset;
+ cpu_set_t cpuset = default_cpuset;
TEST_ASSERT(max > 1, "Need at least two physical cpus");
- sched_getaffinity(0, sizeof(cpuset), &cpuset);
-
do {
next = (next + 1) % CPU_SETSIZE;
} while (!CPU_ISSET(next, &cpuset));
@@ -959,6 +954,8 @@ static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
}
+static int gic_fd;
+
static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu,
enum arch_timer timer)
{
@@ -973,8 +970,18 @@ static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu,
vcpu_args_set(*vcpu, 1, timer);
test_init_timer_irq(*vm, *vcpu);
- vgic_v3_setup(*vm, 1, 64);
+ gic_fd = vgic_v3_setup(*vm, 1, 64);
+ __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3");
+
sync_global_to_guest(*vm, test_args);
+ sync_global_to_guest(*vm, CVAL_MAX);
+ sync_global_to_guest(*vm, DEF_CNT);
+}
+
+static void test_vm_cleanup(struct kvm_vm *vm)
+{
+ close(gic_fd);
+ kvm_vm_free(vm);
}
static void test_print_help(char *name)
@@ -986,7 +993,7 @@ static void test_print_help(char *name)
pr_info("\t-b: Test both physical and virtual timers (default: true)\n");
pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n",
LONG_WAIT_TEST_MS);
- pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n",
+ pr_info("\t-w: Delta (in ms) used for wait times (default: %u)\n",
WAIT_TEST_MS);
pr_info("\t-p: Test physical timer (default: true)\n");
pr_info("\t-v: Test virtual timer (default: true)\n");
@@ -1035,6 +1042,17 @@ static bool parse_args(int argc, char *argv[])
return false;
}
+static void set_counter_defaults(void)
+{
+ const uint64_t MIN_ROLLOVER_SECS = 40ULL * 365 * 24 * 3600;
+ uint64_t freq = read_sysreg(CNTFRQ_EL0);
+ uint64_t width = ilog2(MIN_ROLLOVER_SECS * freq);
+
+ width = clamp(width, 56, 64);
+ CVAL_MAX = GENMASK_ULL(width - 1, 0);
+ DEF_CNT = CVAL_MAX / 2;
+}
+
int main(int argc, char *argv[])
{
struct kvm_vcpu *vcpu;
@@ -1046,16 +1064,19 @@ int main(int argc, char *argv[])
if (!parse_args(argc, argv))
exit(KSFT_SKIP);
+ sched_getaffinity(0, sizeof(default_cpuset), &default_cpuset);
+ set_counter_defaults();
+
if (test_args.test_virtual) {
test_vm_create(&vm, &vcpu, VIRTUAL);
test_run(vm, vcpu);
- kvm_vm_free(vm);
+ test_vm_cleanup(vm);
}
if (test_args.test_physical) {
test_vm_create(&vm, &vcpu, PHYSICAL);
test_run(vm, vcpu);
- kvm_vm_free(vm);
+ test_vm_cleanup(vm);
}
return 0;
diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
index 390ae2d87493..0eb371c62ab8 100644
--- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
+++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
@@ -74,6 +74,7 @@ int main(int argc, char *argv[])
int testcase;
char test[80];
+ TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
ksft_print_header();
diff --git a/tools/testing/selftests/mm/config b/tools/testing/selftests/mm/config
index a28baa536332..deba93379c80 100644
--- a/tools/testing/selftests/mm/config
+++ b/tools/testing/selftests/mm/config
@@ -8,3 +8,6 @@ CONFIG_GUP_TEST=y
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_MEM_SOFT_DIRTY=y
CONFIG_ANON_VMA_NAME=y
+CONFIG_FTRACE=y
+CONFIG_PROFILING=y
+CONFIG_UPROBES=y
diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c
index 8a97ac5176a4..29047d2e0c49 100644
--- a/tools/testing/selftests/mm/gup_longterm.c
+++ b/tools/testing/selftests/mm/gup_longterm.c
@@ -298,8 +298,11 @@ static void run_with_memfd(test_fn fn, const char *desc)
log_test_start("%s ... with memfd", desc);
fd = memfd_create("test", 0);
- if (fd < 0)
+ if (fd < 0) {
ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno));
+ log_test_result(KSFT_SKIP);
+ return;
+ }
fn(fd, pagesize);
close(fd);
@@ -366,6 +369,8 @@ static void run_with_memfd_hugetlb(test_fn fn, const char *desc,
fd = memfd_create("test", flags);
if (fd < 0) {
ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno));
+ log_test_result(KSFT_SKIP);
+ return;
}
fn(fd, hugetlbsize);
diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c
index dcdd5bb20f3d..e80deac1436b 100644
--- a/tools/testing/selftests/mm/ksm_tests.c
+++ b/tools/testing/selftests/mm/ksm_tests.c
@@ -58,40 +58,12 @@ int debug;
static int ksm_write_sysfs(const char *file_path, unsigned long val)
{
- FILE *f = fopen(file_path, "w");
-
- if (!f) {
- fprintf(stderr, "f %s\n", file_path);
- perror("fopen");
- return 1;
- }
- if (fprintf(f, "%lu", val) < 0) {
- perror("fprintf");
- fclose(f);
- return 1;
- }
- fclose(f);
-
- return 0;
+ return write_sysfs(file_path, val);
}
static int ksm_read_sysfs(const char *file_path, unsigned long *val)
{
- FILE *f = fopen(file_path, "r");
-
- if (!f) {
- fprintf(stderr, "f %s\n", file_path);
- perror("fopen");
- return 1;
- }
- if (fscanf(f, "%lu", val) != 1) {
- perror("fscanf");
- fclose(f);
- return 1;
- }
- fclose(f);
-
- return 0;
+ return read_sysfs(file_path, val);
}
static void ksm_print_sysfs(void)
diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c
index c76646cdf6e6..cc26480098ae 100644
--- a/tools/testing/selftests/mm/merge.c
+++ b/tools/testing/selftests/mm/merge.c
@@ -2,11 +2,14 @@
#define _GNU_SOURCE
#include "../kselftest_harness.h"
+#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/mman.h>
+#include <sys/syscall.h>
#include <sys/wait.h>
+#include <linux/perf_event.h>
#include "vm_util.h"
FIXTURE(merge)
@@ -452,4 +455,47 @@ TEST_F(merge, forked_source_vma)
ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 5 * page_size);
}
+TEST_F(merge, handle_uprobe_upon_merged_vma)
+{
+ const size_t attr_sz = sizeof(struct perf_event_attr);
+ unsigned int page_size = self->page_size;
+ const char *probe_file = "./foo";
+ char *carveout = self->carveout;
+ struct perf_event_attr attr;
+ unsigned long type;
+ void *ptr1, *ptr2;
+ int fd;
+
+ fd = open(probe_file, O_RDWR|O_CREAT, 0600);
+ ASSERT_GE(fd, 0);
+
+ ASSERT_EQ(ftruncate(fd, page_size), 0);
+ if (read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type) != 0) {
+ SKIP(goto out, "Failed to read uprobe sysfs file, skipping");
+ }
+
+ memset(&attr, 0, attr_sz);
+ attr.size = attr_sz;
+ attr.type = type;
+ attr.config1 = (__u64)(long)probe_file;
+ attr.config2 = 0x0;
+
+ ASSERT_GE(syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0), 0);
+
+ ptr1 = mmap(&carveout[page_size], 10 * page_size, PROT_EXEC,
+ MAP_PRIVATE | MAP_FIXED, fd, 0);
+ ASSERT_NE(ptr1, MAP_FAILED);
+
+ ptr2 = mremap(ptr1, page_size, 2 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, ptr1 + 5 * page_size);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ ASSERT_NE(mremap(ptr2, page_size, page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, ptr1), MAP_FAILED);
+
+out:
+ close(fd);
+ remove(probe_file);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/settings b/tools/testing/selftests/mm/settings
index a953c96aa16e..e2206265f67c 100644
--- a/tools/testing/selftests/mm/settings
+++ b/tools/testing/selftests/mm/settings
@@ -1 +1 @@
-timeout=180
+timeout=900
diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c
index a41bc1234b37..95b6f043a3cb 100644
--- a/tools/testing/selftests/mm/thuge-gen.c
+++ b/tools/testing/selftests/mm/thuge-gen.c
@@ -77,7 +77,7 @@ void show(unsigned long ps)
system(buf);
}
-unsigned long read_sysfs(int warn, char *fmt, ...)
+unsigned long thuge_read_sysfs(int warn, char *fmt, ...)
{
char *line = NULL;
size_t linelen = 0;
@@ -106,7 +106,7 @@ unsigned long read_sysfs(int warn, char *fmt, ...)
unsigned long read_free(unsigned long ps)
{
- return read_sysfs(ps != getpagesize(),
+ return thuge_read_sysfs(ps != getpagesize(),
"/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
ps >> 10);
}
@@ -195,7 +195,7 @@ void find_pagesizes(void)
}
globfree(&g);
- if (read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest)
+ if (thuge_read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest)
ksft_exit_fail_msg("Please do echo %lu > /proc/sys/kernel/shmmax",
largest * NUM_PAGES);
diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c
index b380e102b22f..169dbd692bf5 100644
--- a/tools/testing/selftests/mm/virtual_address_range.c
+++ b/tools/testing/selftests/mm/virtual_address_range.c
@@ -77,8 +77,11 @@ static void validate_addr(char *ptr, int high_addr)
{
unsigned long addr = (unsigned long) ptr;
- if (high_addr && addr < HIGH_ADDR_MARK)
- ksft_exit_fail_msg("Bad address %lx\n", addr);
+ if (high_addr) {
+ if (addr < HIGH_ADDR_MARK)
+ ksft_exit_fail_msg("Bad address %lx\n", addr);
+ return;
+ }
if (addr > HIGH_ADDR_MARK)
ksft_exit_fail_msg("Bad address %lx\n", addr);
diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
index 61d7bf1f8c62..5492e3f784df 100644
--- a/tools/testing/selftests/mm/vm_util.c
+++ b/tools/testing/selftests/mm/vm_util.c
@@ -486,3 +486,41 @@ int close_procmap(struct procmap_fd *procmap)
{
return close(procmap->fd);
}
+
+int write_sysfs(const char *file_path, unsigned long val)
+{
+ FILE *f = fopen(file_path, "w");
+
+ if (!f) {
+ fprintf(stderr, "f %s\n", file_path);
+ perror("fopen");
+ return 1;
+ }
+ if (fprintf(f, "%lu", val) < 0) {
+ perror("fprintf");
+ fclose(f);
+ return 1;
+ }
+ fclose(f);
+
+ return 0;
+}
+
+int read_sysfs(const char *file_path, unsigned long *val)
+{
+ FILE *f = fopen(file_path, "r");
+
+ if (!f) {
+ fprintf(stderr, "f %s\n", file_path);
+ perror("fopen");
+ return 1;
+ }
+ if (fscanf(f, "%lu", val) != 1) {
+ perror("fscanf");
+ fclose(f);
+ return 1;
+ }
+ fclose(f);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index adb5d294a220..b8136d12a0f8 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -88,6 +88,8 @@ int open_procmap(pid_t pid, struct procmap_fd *procmap_out);
int query_procmap(struct procmap_fd *procmap);
bool find_vma_procmap(struct procmap_fd *procmap, void *address);
int close_procmap(struct procmap_fd *procmap);
+int write_sysfs(const char *file_path, unsigned long val);
+int read_sysfs(const char *file_path, unsigned long *val);
static inline int open_self_procmap(struct procmap_fd *procmap_out)
{
diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
index 8b378c91debf..b1e4618399be 100644
--- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c
+++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@@ -2079,24 +2079,9 @@ TEST_F(mount_setattr, detached_tree_propagation)
* means that the device information will be different for any
* statx() that was taken from /mnt/A before the mount compared
* to one after the mount.
- *
- * Since we already now that the device information between the
- * stx1 and stx2 samples are identical we also now that stx2 and
- * stx3 device information will necessarily differ.
*/
ASSERT_NE(stx1.stx_dev_minor, stx3.stx_dev_minor);
-
- /*
- * If mount propagation worked correctly then the tmpfs mount
- * that was created after the mount namespace was unshared will
- * have propagated onto /mnt/A in the detached mount tree.
- *
- * Verify that the device information for stx3 and stx4 are
- * identical. It is already established that stx3 is different
- * from both stx1 and stx2 sampled before the tmpfs mount was
- * done so if stx3 and stx4 are identical the proof is done.
- */
- ASSERT_EQ(stx3.stx_dev_minor, stx4.stx_dev_minor);
+ ASSERT_EQ(stx1.stx_dev_minor, stx4.stx_dev_minor);
EXPECT_EQ(close(fd_tree), 0);
}
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 532bb732bc6d..c6dd2a335cf4 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -50,6 +50,7 @@ tap
tcp_fastopen_backup_key
tcp_inq
tcp_mmap
+tfo
timestamping
tls
toeplitz
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index ea84b88bcb30..332f387615d7 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -27,6 +27,7 @@ TEST_PROGS += amt.sh
TEST_PROGS += unicast_extensions.sh
TEST_PROGS += udpgro_fwd.sh
TEST_PROGS += udpgro_frglist.sh
+TEST_PROGS += nat6to4.sh
TEST_PROGS += veth.sh
TEST_PROGS += ioam6.sh
TEST_PROGS += gro.sh
@@ -109,6 +110,8 @@ TEST_GEN_PROGS += proc_net_pktgen
TEST_PROGS += lwt_dst_cache_ref_loop.sh
TEST_PROGS += skf_net_off.sh
TEST_GEN_FILES += skf_net_off
+TEST_GEN_FILES += tfo
+TEST_PROGS += tfo_passive.sh
# YNL files, must be before "include ..lib.mk"
YNL_GEN_FILES := busy_poller netlink-dumps
diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c
index 3ed3882a93b8..b5f474969917 100644
--- a/tools/testing/selftests/net/af_unix/msg_oob.c
+++ b/tools/testing/selftests/net/af_unix/msg_oob.c
@@ -210,7 +210,7 @@ static void __sendpair(struct __test_metadata *_metadata,
static void __recvpair(struct __test_metadata *_metadata,
FIXTURE_DATA(msg_oob) *self,
const char *expected_buf, int expected_len,
- int buf_len, int flags)
+ int buf_len, int flags, bool is_sender)
{
int i, ret[2], recv_errno[2], expected_errno = 0;
char recv_buf[2][BUF_SZ] = {};
@@ -221,7 +221,9 @@ static void __recvpair(struct __test_metadata *_metadata,
errno = 0;
for (i = 0; i < 2; i++) {
- ret[i] = recv(self->fd[i * 2 + 1], recv_buf[i], buf_len, flags);
+ int index = is_sender ? i * 2 : i * 2 + 1;
+
+ ret[i] = recv(self->fd[index], recv_buf[i], buf_len, flags);
recv_errno[i] = errno;
}
@@ -308,6 +310,20 @@ static void __siocatmarkpair(struct __test_metadata *_metadata,
ASSERT_EQ(answ[0], answ[1]);
}
+static void __resetpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self,
+ const FIXTURE_VARIANT(msg_oob) *variant,
+ bool reset)
+{
+ int i;
+
+ for (i = 0; i < 2; i++)
+ close(self->fd[i * 2 + 1]);
+
+ __recvpair(_metadata, self, "", reset ? -ECONNRESET : 0, 1,
+ variant->peek ? MSG_PEEK : 0, true);
+}
+
#define sendpair(buf, len, flags) \
__sendpair(_metadata, self, buf, len, flags)
@@ -316,9 +332,10 @@ static void __siocatmarkpair(struct __test_metadata *_metadata,
if (variant->peek) \
__recvpair(_metadata, self, \
expected_buf, expected_len, \
- buf_len, (flags) | MSG_PEEK); \
+ buf_len, (flags) | MSG_PEEK, false); \
__recvpair(_metadata, self, \
- expected_buf, expected_len, buf_len, flags); \
+ expected_buf, expected_len, \
+ buf_len, flags, false); \
} while (0)
#define epollpair(oob_remaining) \
@@ -330,6 +347,9 @@ static void __siocatmarkpair(struct __test_metadata *_metadata,
#define setinlinepair() \
__setinlinepair(_metadata, self)
+#define resetpair(reset) \
+ __resetpair(_metadata, self, variant, reset)
+
#define tcp_incompliant \
for (self->tcp_compliant = false; \
self->tcp_compliant == false; \
@@ -344,6 +364,21 @@ TEST_F(msg_oob, non_oob)
recvpair("", -EINVAL, 1, MSG_OOB);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(true);
+}
+
+TEST_F(msg_oob, non_oob_no_reset)
+{
+ sendpair("x", 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("x", 1, 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, oob)
@@ -355,6 +390,19 @@ TEST_F(msg_oob, oob)
recvpair("x", 1, 1, MSG_OOB);
epollpair(false);
siocatmarkpair(true);
+
+ tcp_incompliant {
+ resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */
+ }
+}
+
+TEST_F(msg_oob, oob_reset)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ resetpair(true);
}
TEST_F(msg_oob, oob_drop)
@@ -370,6 +418,8 @@ TEST_F(msg_oob, oob_drop)
recvpair("", -EINVAL, 1, MSG_OOB);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, oob_ahead)
@@ -385,6 +435,10 @@ TEST_F(msg_oob, oob_ahead)
recvpair("hell", 4, 4, 0);
epollpair(false);
siocatmarkpair(true);
+
+ tcp_incompliant {
+ resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */
+ }
}
TEST_F(msg_oob, oob_break)
@@ -403,6 +457,8 @@ TEST_F(msg_oob, oob_break)
recvpair("", -EAGAIN, 1, 0);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, oob_ahead_break)
@@ -426,6 +482,8 @@ TEST_F(msg_oob, oob_ahead_break)
recvpair("world", 5, 5, 0);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, oob_break_drop)
@@ -449,6 +507,8 @@ TEST_F(msg_oob, oob_break_drop)
recvpair("", -EINVAL, 1, MSG_OOB);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, ex_oob_break)
@@ -476,6 +536,8 @@ TEST_F(msg_oob, ex_oob_break)
recvpair("ld", 2, 2, 0);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, ex_oob_drop)
@@ -498,6 +560,8 @@ TEST_F(msg_oob, ex_oob_drop)
epollpair(false);
siocatmarkpair(true);
}
+
+ resetpair(false);
}
TEST_F(msg_oob, ex_oob_drop_2)
@@ -523,6 +587,8 @@ TEST_F(msg_oob, ex_oob_drop_2)
epollpair(false);
siocatmarkpair(true);
}
+
+ resetpair(false);
}
TEST_F(msg_oob, ex_oob_oob)
@@ -546,6 +612,54 @@ TEST_F(msg_oob, ex_oob_oob)
recvpair("", -EINVAL, 1, MSG_OOB);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
+}
+
+TEST_F(msg_oob, ex_oob_ex_oob)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("x", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("y", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ tcp_incompliant {
+ resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */
+ }
+}
+
+TEST_F(msg_oob, ex_oob_ex_oob_oob)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("x", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("y", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ sendpair("z", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
}
TEST_F(msg_oob, ex_oob_ahead_break)
@@ -576,6 +690,10 @@ TEST_F(msg_oob, ex_oob_ahead_break)
recvpair("d", 1, 1, MSG_OOB);
epollpair(false);
siocatmarkpair(true);
+
+ tcp_incompliant {
+ resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */
+ }
}
TEST_F(msg_oob, ex_oob_siocatmark)
@@ -595,6 +713,8 @@ TEST_F(msg_oob, ex_oob_siocatmark)
recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */
epollpair(true);
siocatmarkpair(false);
+
+ resetpair(true);
}
TEST_F(msg_oob, inline_oob)
@@ -612,6 +732,8 @@ TEST_F(msg_oob, inline_oob)
recvpair("x", 1, 1, 0);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, inline_oob_break)
@@ -633,6 +755,8 @@ TEST_F(msg_oob, inline_oob_break)
recvpair("o", 1, 1, 0);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, inline_oob_ahead_break)
@@ -661,6 +785,8 @@ TEST_F(msg_oob, inline_oob_ahead_break)
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, inline_ex_oob_break)
@@ -686,6 +812,8 @@ TEST_F(msg_oob, inline_ex_oob_break)
recvpair("rld", 3, 3, 0);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, inline_ex_oob_no_drop)
@@ -707,6 +835,8 @@ TEST_F(msg_oob, inline_ex_oob_no_drop)
recvpair("y", 1, 1, 0);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, inline_ex_oob_drop)
@@ -731,6 +861,8 @@ TEST_F(msg_oob, inline_ex_oob_drop)
epollpair(false);
siocatmarkpair(false);
}
+
+ resetpair(false);
}
TEST_F(msg_oob, inline_ex_oob_siocatmark)
@@ -752,6 +884,8 @@ TEST_F(msg_oob, inline_ex_oob_siocatmark)
recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */
epollpair(true);
siocatmarkpair(false);
+
+ resetpair(true);
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh
index 5b34f6e1f831..48eb999a3120 100755
--- a/tools/testing/selftests/net/gre_ipv6_lladdr.sh
+++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh
@@ -24,7 +24,10 @@ setup_basenet()
ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad
}
-# Check if network device has an IPv6 link-local address assigned.
+# Check the IPv6 configuration of a network device.
+#
+# We currently check the generation of the link-local IPv6 address and the
+# creation of the ff00::/8 multicast route.
#
# Parameters:
#
@@ -35,7 +38,7 @@ setup_basenet()
# a link-local address)
# * $4: The user visible name for the scenario being tested
#
-check_ipv6_ll_addr()
+check_ipv6_device_config()
{
local DEV="$1"
local EXTRA_MATCH="$2"
@@ -45,7 +48,11 @@ check_ipv6_ll_addr()
RET=0
set +e
ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}"
- check_err_fail "${XRET}" $? ""
+ check_err_fail "${XRET}" $? "IPv6 link-local address generation"
+
+ ip -netns "${NS0}" -6 route show table local type multicast ff00::/8 proto kernel | grep -q "${DEV}"
+ check_err_fail 0 $? "IPv6 multicast route creation"
+
log_test "${MSG}"
set -e
}
@@ -102,20 +109,20 @@ test_gre_device()
;;
esac
- # Check that IPv6 link-local address is generated when device goes up
+ # Check the IPv6 device configuration when it goes up
ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}"
ip -netns "${NS0}" link set dev gretest up
- check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}"
+ check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}"
# Now disable link-local address generation
ip -netns "${NS0}" link set dev gretest down
ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1
ip -netns "${NS0}" link set dev gretest up
- # Check that link-local address generation works when re-enabled while
- # the device is already up
+ # Check the IPv6 device configuration when link-local address
+ # generation is re-enabled while the device is already up
ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}"
- check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}"
+ check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}"
ip -netns "${NS0}" link del dev gretest
}
@@ -126,7 +133,7 @@ test_gre4()
local MODE
for GRE_TYPE in "gre" "gretap"; do
- printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n"
+ printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n"
for MODE in "eui64" "none" "stable-privacy" "random"; do
test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}"
@@ -142,7 +149,7 @@ test_gre6()
local MODE
for GRE_TYPE in "ip6gre" "ip6gretap"; do
- printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n"
+ printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n"
for MODE in "eui64" "none" "stable-privacy" "random"; do
test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}"
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 006fdadcc4b9..86a216e9aca8 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -312,7 +312,7 @@ log_test_result()
local test_name=$1; shift
local opt_str=$1; shift
local result=$1; shift
- local retmsg=$1; shift
+ local retmsg=$1
printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result"
if [[ $retmsg ]]; then
diff --git a/tools/testing/selftests/net/nat6to4.sh b/tools/testing/selftests/net/nat6to4.sh
new file mode 100755
index 000000000000..0ee859b622a4
--- /dev/null
+++ b/tools/testing/selftests/net/nat6to4.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NS="ns-peer-$(mktemp -u XXXXXX)"
+
+ip netns add "${NS}"
+ip -netns "${NS}" link set lo up
+ip -netns "${NS}" route add default via 127.0.0.2 dev lo
+
+tc -n "${NS}" qdisc add dev lo ingress
+tc -n "${NS}" filter add dev lo ingress prio 4 protocol ip \
+ bpf object-file nat6to4.bpf.o section schedcls/egress4/snat4 direct-action
+
+ip netns exec "${NS}" \
+ bash -c 'echo 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789abc | socat - UDP4-DATAGRAM:224.1.0.1:6666,ip-multicast-loop=1'
diff --git a/tools/testing/selftests/net/netfilter/.gitignore b/tools/testing/selftests/net/netfilter/.gitignore
index 64c4f8d9aa6c..5d2be9a00627 100644
--- a/tools/testing/selftests/net/netfilter/.gitignore
+++ b/tools/testing/selftests/net/netfilter/.gitignore
@@ -5,3 +5,4 @@ conntrack_dump_flush
conntrack_reverse_clash
sctp_collision
nf_queue
+udpclash
diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile
index e9b2f553588d..a98ed892f55f 100644
--- a/tools/testing/selftests/net/netfilter/Makefile
+++ b/tools/testing/selftests/net/netfilter/Makefile
@@ -15,6 +15,7 @@ TEST_PROGS += conntrack_tcp_unreplied.sh
TEST_PROGS += conntrack_resize.sh
TEST_PROGS += conntrack_sctp_collision.sh
TEST_PROGS += conntrack_vrf.sh
+TEST_PROGS += conntrack_clash.sh
TEST_PROGS += conntrack_reverse_clash.sh
TEST_PROGS += ipvs.sh
TEST_PROGS += nf_conntrack_packetdrill.sh
@@ -44,6 +45,7 @@ TEST_GEN_FILES += connect_close nf_queue
TEST_GEN_FILES += conntrack_dump_flush
TEST_GEN_FILES += conntrack_reverse_clash
TEST_GEN_FILES += sctp_collision
+TEST_GEN_FILES += udpclash
include ../../lib.mk
@@ -52,6 +54,7 @@ $(OUTPUT)/nf_queue: LDLIBS += $(MNL_LDLIBS)
$(OUTPUT)/conntrack_dump_flush: CFLAGS += $(MNL_CFLAGS)
$(OUTPUT)/conntrack_dump_flush: LDLIBS += $(MNL_LDLIBS)
+$(OUTPUT)/udpclash: LDLIBS += -lpthread
TEST_FILES := lib.sh
TEST_FILES += packetdrill
diff --git a/tools/testing/selftests/net/netfilter/conntrack_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_clash.sh
new file mode 100755
index 000000000000..3712c1b9b38b
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_clash.sh
@@ -0,0 +1,175 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+clash_resolution_active=0
+dport=22111
+ret=0
+
+cleanup()
+{
+ # netns cleanup also zaps any remaining socat echo server.
+ cleanup_all_ns
+}
+
+checktool "nft --version" "run test without nft"
+checktool "conntrack --version" "run test without conntrack"
+checktool "socat -h" "run test without socat"
+
+trap cleanup EXIT
+
+setup_ns nsclient1 nsclient2 nsrouter
+
+ip netns exec "$nsrouter" nft -f -<<EOF
+table ip t {
+ chain lb {
+ meta l4proto udp dnat to numgen random mod 3 map { 0 : 10.0.2.1 . 9000, 1 : 10.0.2.1 . 9001, 2 : 10.0.2.1 . 9002 }
+ }
+
+ chain prerouting {
+ type nat hook prerouting priority dstnat
+
+ udp dport $dport counter jump lb
+ }
+
+ chain output {
+ type nat hook output priority dstnat
+
+ udp dport $dport counter jump lb
+ }
+}
+EOF
+
+load_simple_ruleset()
+{
+ip netns exec "$1" nft -f -<<EOF
+table ip t {
+ chain forward {
+ type filter hook forward priority 0
+
+ ct state new counter
+ }
+}
+EOF
+}
+
+spawn_servers()
+{
+ local ns="$1"
+ local ports="9000 9001 9002"
+
+ for port in $ports; do
+ ip netns exec "$ns" socat UDP-RECVFROM:$port,fork PIPE 2>/dev/null &
+ done
+
+ for port in $ports; do
+ wait_local_port_listen "$ns" $port udp
+ done
+}
+
+add_addr()
+{
+ local ns="$1"
+ local dev="$2"
+ local i="$3"
+ local j="$4"
+
+ ip -net "$ns" link set "$dev" up
+ ip -net "$ns" addr add "10.0.$i.$j/24" dev "$dev"
+}
+
+ping_test()
+{
+ local ns="$1"
+ local daddr="$2"
+
+ if ! ip netns exec "$ns" ping -q -c 1 $daddr > /dev/null;then
+ echo "FAIL: ping from $ns to $daddr"
+ exit 1
+ fi
+}
+
+run_one_clash_test()
+{
+ local ns="$1"
+ local daddr="$2"
+ local dport="$3"
+ local entries
+ local cre
+
+ if ! ip netns exec "$ns" ./udpclash $daddr $dport;then
+ echo "FAIL: did not receive expected number of replies for $daddr:$dport"
+ ret=1
+ return 1
+ fi
+
+ entries=$(conntrack -S | wc -l)
+ cre=$(conntrack -S | grep -v "clash_resolve=0" | wc -l)
+
+ if [ "$cre" -ne "$entries" ] ;then
+ clash_resolution_active=1
+ return 0
+ fi
+
+ # 1 cpu -> parallel insertion impossible
+ if [ "$entries" -eq 1 ]; then
+ return 0
+ fi
+
+ # not a failure: clash resolution logic did not trigger, but all replies
+ # were received. With right timing, xmit completed sequentially and
+ # no parallel insertion occurs.
+ return $ksft_skip
+}
+
+run_clash_test()
+{
+ local ns="$1"
+ local daddr="$2"
+ local dport="$3"
+
+ for i in $(seq 1 10);do
+ run_one_clash_test "$ns" "$daddr" "$dport"
+ local rv=$?
+ if [ $rv -eq 0 ];then
+ echo "PASS: clash resolution test for $daddr:$dport on attempt $i"
+ return 0
+ elif [ $rv -eq 1 ];then
+ echo "FAIL: clash resolution test for $daddr:$dport on attempt $i"
+ return 1
+ fi
+ done
+}
+
+ip link add veth0 netns "$nsclient1" type veth peer name veth0 netns "$nsrouter"
+ip link add veth0 netns "$nsclient2" type veth peer name veth1 netns "$nsrouter"
+add_addr "$nsclient1" veth0 1 1
+add_addr "$nsclient2" veth0 2 1
+add_addr "$nsrouter" veth0 1 99
+add_addr "$nsrouter" veth1 2 99
+
+ip -net "$nsclient1" route add default via 10.0.1.99
+ip -net "$nsclient2" route add default via 10.0.2.99
+ip netns exec "$nsrouter" sysctl -q net.ipv4.ip_forward=1
+
+ping_test "$nsclient1" 10.0.1.99
+ping_test "$nsclient1" 10.0.2.1
+ping_test "$nsclient2" 10.0.1.1
+
+spawn_servers "$nsclient2"
+
+# exercise clash resolution with nat:
+# nsrouter is supposed to dnat to 10.0.2.1:900{0,1,2,3}.
+run_clash_test "$nsclient1" 10.0.1.99 "$dport"
+
+# exercise clash resolution without nat.
+load_simple_ruleset "$nsclient2"
+run_clash_test "$nsclient2" 127.0.0.1 9001
+
+if [ $clash_resolution_active -eq 0 ];then
+ [ "$ret" -eq 0 ] && ret=$ksft_skip
+ echo "SKIP: Clash resolution did not trigger"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh
index 9e033e80219e..788cd56ea4a0 100755
--- a/tools/testing/selftests/net/netfilter/conntrack_resize.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh
@@ -12,6 +12,9 @@ tmpfile=""
tmpfile_proc=""
tmpfile_uniq=""
ret=0
+have_socat=0
+
+socat -h > /dev/null && have_socat=1
insert_count=2000
[ "$KSFT_MACHINE_SLOW" = "yes" ] && insert_count=400
@@ -123,7 +126,7 @@ ctflush() {
done
}
-ctflood()
+ct_pingflood()
{
local ns="$1"
local duration="$2"
@@ -152,6 +155,44 @@ ctflood()
wait
}
+ct_udpflood()
+{
+ local ns="$1"
+ local duration="$2"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ [ $have_socat -ne "1" ] && return
+
+ while [ $now -lt $end ]; do
+ip netns exec "$ns" bash<<"EOF"
+ for i in $(seq 1 100);do
+ dport=$(((RANDOM%65536)+1))
+
+ echo bar | socat -u STDIN UDP:"127.0.0.1:$dport" &
+ done > /dev/null 2>&1
+ wait
+EOF
+ now=$(date +%s)
+ done
+}
+
+ct_udpclash()
+{
+ local ns="$1"
+ local duration="$2"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ [ -x udpclash ] || return
+
+ while [ $now -lt $end ]; do
+ ip netns exec "$ns" ./udpclash 127.0.0.1 $((RANDOM%65536)) > /dev/null 2>&1
+
+ now=$(date +%s)
+ done
+}
+
# dump to /dev/null. We don't want dumps to cause infinite loops
# or use-after-free even when conntrack table is altered while dumps
# are in progress.
@@ -169,6 +210,48 @@ ct_nulldump()
wait
}
+ct_nulldump_loop()
+{
+ local ns="$1"
+ local duration="$2"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ while [ $now -lt $end ]; do
+ ct_nulldump "$ns"
+ sleep $((RANDOM%2))
+ now=$(date +%s)
+ done
+}
+
+change_timeouts()
+{
+ local ns="$1"
+ local r1=$((RANDOM%2))
+ local r2=$((RANDOM%2))
+
+ [ "$r1" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=$((RANDOM%5))
+ [ "$r2" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=$((RANDOM%5))
+}
+
+ct_change_timeouts_loop()
+{
+ local ns="$1"
+ local duration="$2"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ while [ $now -lt $end ]; do
+ change_timeouts "$ns"
+ sleep $((RANDOM%2))
+ now=$(date +%s)
+ done
+
+ # restore defaults
+ ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=30
+ ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=30
+}
+
check_taint()
{
local tainted_then="$1"
@@ -198,10 +281,14 @@ insert_flood()
r=$((RANDOM%$insert_count))
- ctflood "$n" "$timeout" "floodresize" &
+ ct_pingflood "$n" "$timeout" "floodresize" &
+ ct_udpflood "$n" "$timeout" &
+ ct_udpclash "$n" "$timeout" &
+
insert_ctnetlink "$n" "$r" &
ctflush "$n" "$timeout" &
- ct_nulldump "$n" &
+ ct_nulldump_loop "$n" "$timeout" &
+ ct_change_timeouts_loop "$n" "$timeout" &
wait
}
@@ -306,7 +393,7 @@ test_dump_all()
ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=3600
- ctflood "$nsclient1" $timeout "dumpall" &
+ ct_pingflood "$nsclient1" $timeout "dumpall" &
insert_ctnetlink "$nsclient2" $insert_count
wait
@@ -368,7 +455,7 @@ test_conntrack_disable()
ct_flush_once "$nsclient1"
ct_flush_once "$nsclient2"
- ctflood "$nsclient1" "$timeout" "conntrack disable"
+ ct_pingflood "$nsclient1" "$timeout" "conntrack disable"
ip netns exec "$nsclient2" ping -q -c 1 127.0.0.1 >/dev/null 2>&1
# Disabled, should not have picked up any connection.
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
index efea93cf23d4..20e76b395c85 100755
--- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -378,7 +378,7 @@ display net,port,proto
type_spec ipv4_addr . inet_service . inet_proto
chain_spec ip daddr . udp dport . meta l4proto
dst addr4 port proto
-src
+src
start 1
count 9
src_delta 9
@@ -419,6 +419,7 @@ table inet filter {
set test {
type ${type_spec}
+ counter
flags interval,timeout
}
@@ -1158,9 +1159,18 @@ del() {
fi
}
-# Return packet count from 'test' counter in 'inet filter' table
+# Return packet count for elem $1 from 'test' counter in 'inet filter' table
count_packets() {
found=0
+ for token in $(nft reset element inet filter test "${1}" ); do
+ [ ${found} -eq 1 ] && echo "${token}" && return
+ [ "${token}" = "packets" ] && found=1
+ done
+}
+
+# Return packet count from 'test' counter in 'inet filter' table
+count_packets_nomatch() {
+ found=0
for token in $(nft list counter inet filter test); do
[ ${found} -eq 1 ] && echo "${token}" && return
[ "${token}" = "packets" ] && found=1
@@ -1206,6 +1216,10 @@ perf() {
# Set MAC addresses, send single packet, check that it matches, reset counter
send_match() {
+ local elem="$1"
+
+ shift
+
ip link set veth_a address "$(format_mac "${1}")"
ip -n B link set veth_b address "$(format_mac "${2}")"
@@ -1216,7 +1230,7 @@ send_match() {
eval src_"$f"=\$\(format_\$f "${2}"\)
done
eval send_\$proto
- if [ "$(count_packets)" != "1" ]; then
+ if [ "$(count_packets "$elem")" != "1" ]; then
err "${proto} packet to:"
err " $(for f in ${dst}; do
eval format_\$f "${1}"; printf ' '; done)"
@@ -1242,7 +1256,7 @@ send_nomatch() {
eval src_"$f"=\$\(format_\$f "${2}"\)
done
eval send_\$proto
- if [ "$(count_packets)" != "0" ]; then
+ if [ "$(count_packets_nomatch)" != "0" ]; then
err "${proto} packet to:"
err " $(for f in ${dst}; do
eval format_\$f "${1}"; printf ' '; done)"
@@ -1255,13 +1269,54 @@ send_nomatch() {
fi
}
+maybe_send_nomatch() {
+ local elem="$1"
+ local what="$4"
+
+ [ $((RANDOM%20)) -gt 0 ] && return
+
+ dst_addr4="$2"
+ dst_port="$3"
+ send_udp
+
+ if [ "$(count_packets_nomatch)" != "0" ]; then
+ err "Packet to $dst_addr4:$dst_port did match $what"
+ err "$(nft -a list ruleset)"
+ return 1
+ fi
+}
+
+maybe_send_match() {
+ local elem="$1"
+ local what="$4"
+
+ [ $((RANDOM%20)) -gt 0 ] && return
+
+ dst_addr4="$2"
+ dst_port="$3"
+ send_udp
+
+ if [ "$(count_packets "{ $elem }")" != "1" ]; then
+ err "Packet to $dst_addr4:$dst_port did not match $what"
+ err "$(nft -a list ruleset)"
+ return 1
+ fi
+ nft reset counter inet filter test >/dev/null
+ nft reset element inet filter test "{ $elem }" >/dev/null
+}
+
# Correctness test template:
# - add ranged element, check that packets match it
# - check that packets outside range don't match it
# - remove some elements, check that packets don't match anymore
test_correctness_main() {
range_size=1
+
+ send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1
+
for i in $(seq "${start}" $((start + count))); do
+ local elem=""
+
end=$((start + range_size))
# Avoid negative or zero-sized port ranges
@@ -1272,15 +1327,16 @@ test_correctness_main() {
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
- add "$(format)" || return 1
+ elem="$(format)"
+ add "$elem" || return 1
for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
- send_match "${j}" $((j + src_delta)) || return 1
+ send_match "$elem" "${j}" $((j + src_delta)) || return 1
done
send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1
# Delete elements now and then
if [ $((i % 3)) -eq 0 ]; then
- del "$(format)" || return 1
+ del "$elem" || return 1
for j in $(seq "$start" \
$((range_size / 2 + 1)) ${end}); do
send_nomatch "${j}" $((j + src_delta)) \
@@ -1572,14 +1628,17 @@ test_timeout() {
range_size=1
for i in $(seq "$start" $((start + count))); do
+ local elem=""
+
end=$((start + range_size))
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
- add "$(format)" || return 1
+ elem="$(format)"
+ add "$elem" || return 1
for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
- send_match "${j}" $((j + src_delta)) || return 1
+ send_match "$elem" "${j}" $((j + src_delta)) || return 1
done
range_size=$((range_size + 1))
@@ -1737,7 +1796,7 @@ test_bug_reload() {
srcend=$((end + src_delta))
for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
- send_match "${j}" $((j + src_delta)) || return 1
+ send_match "$(format)" "${j}" $((j + src_delta)) || return 1
done
range_size=$((range_size + 1))
@@ -1756,22 +1815,34 @@ test_bug_net_port_proto_match() {
range_size=1
for i in $(seq 1 10); do
for j in $(seq 1 20) ; do
- elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))")
+ local dport=$j
+
+ elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))")
+
+ # too slow, do not test all addresses
+ maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "before add" || return 1
nft "add element inet filter test { $elem }" || return 1
+
+ maybe_send_match "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "after add" || return 1
+
nft "get element inet filter test { $elem }" | grep -q "$elem"
if [ $? -ne 0 ];then
local got=$(nft "get element inet filter test { $elem }")
err "post-add: should have returned $elem but got $got"
return 1
fi
+
+ maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "out-of-range" || return 1
done
done
# recheck after set was filled
for i in $(seq 1 10); do
for j in $(seq 1 20) ; do
- elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))")
+ local dport=$j
+
+ elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))")
nft "get element inet filter test { $elem }" | grep -q "$elem"
if [ $? -ne 0 ];then
@@ -1779,6 +1850,9 @@ test_bug_net_port_proto_match() {
err "post-fill: should have returned $elem but got $got"
return 1
fi
+
+ maybe_send_match "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "recheck" || return 1
+ maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "recheck out-of-range" || return 1
done
done
@@ -1786,9 +1860,10 @@ test_bug_net_port_proto_match() {
for i in $(seq 1 10); do
for j in $(seq 1 20) ; do
local rnd=$((RANDOM%10))
+ local dport=$j
local got=""
- elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))")
+ elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))")
if [ $rnd -gt 0 ];then
continue
fi
@@ -1799,6 +1874,8 @@ test_bug_net_port_proto_match() {
err "post-delete: query for $elem returned $got instead of error."
return 1
fi
+
+ maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "match after deletion" || return 1
done
done
@@ -1817,7 +1894,7 @@ test_bug_avx2_mismatch()
dst_addr6="$a2"
send_icmp6
- if [ "$(count_packets)" -gt "0" ]; then
+ if [ "$(count_packets "{ icmpv6 . $a1 }")" -gt "0" ]; then
err "False match for $a2"
return 1
fi
diff --git a/tools/testing/selftests/net/netfilter/nft_nat.sh b/tools/testing/selftests/net/netfilter/nft_nat.sh
index 9e39de26455f..a954754b99b3 100755
--- a/tools/testing/selftests/net/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/net/netfilter/nft_nat.sh
@@ -866,6 +866,24 @@ EOF
ip netns exec "$ns0" nft delete table $family nat
}
+file_cmp()
+{
+ local infile="$1"
+ local outfile="$2"
+
+ if ! cmp "$infile" "$outfile";then
+ echo -n "Infile "
+ ls -l "$infile"
+ echo -n "Outfile "
+ ls -l "$outfile"
+ echo "ERROR: in and output file mismatch when checking $msg" 1>&1
+ ret=1
+ return 1
+ fi
+
+ return 0
+}
+
test_stateless_nat_ip()
{
local lret=0
@@ -966,11 +984,7 @@ EOF
wait
- if ! cmp "$INFILE" "$OUTFILE";then
- ls -l "$INFILE" "$OUTFILE"
- echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2
- lret=1
- fi
+ file_cmp "$INFILE" "$OUTFILE" "udp with stateless nat" || lret=1
:> "$OUTFILE"
@@ -991,6 +1005,62 @@ EOF
return $lret
}
+test_dnat_clash()
+{
+ local lret=0
+
+ if ! socat -h > /dev/null 2>&1;then
+ echo "SKIP: Could not run dnat clash test without socat tool"
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return $ksft_skip
+ fi
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+flush ruleset
+table ip dnat-test {
+ chain prerouting {
+ type nat hook prerouting priority dstnat; policy accept;
+ ip daddr 10.0.2.1 udp dport 1234 counter dnat to 10.0.1.1:1234
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add dnat rules"
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return $ksft_skip
+ fi
+
+ local udpdaddr="10.0.2.1"
+ for i in 1 2;do
+ echo "PING $udpdaddr" > "$INFILE"
+ echo "PONG 10.0.1.1 step $i" | ip netns exec "$ns0" timeout 3 socat STDIO UDP4-LISTEN:1234,bind=10.0.1.1 > "$OUTFILE" 2>/dev/null &
+ local lpid=$!
+
+ busywait $BUSYWAIT_TIMEOUT listener_ready "$ns0" 1234 "-u"
+
+ result=$(ip netns exec "$ns1" timeout 3 socat STDIO UDP4-SENDTO:"$udpdaddr:1234,sourceport=4321" < "$INFILE")
+ udpdaddr="10.0.1.1"
+
+ if [ "$result" != "PONG 10.0.1.1 step $i" ] ; then
+ echo "ERROR: failed to test udp $ns1 to $ns2 with dnat rule step $i, result: \"$result\"" 1>&2
+ lret=1
+ ret=1
+ fi
+
+ wait
+
+ file_cmp "$INFILE" "$OUTFILE" "udp dnat step $i" || lret=1
+
+ :> "$OUTFILE"
+ done
+
+ test $lret -eq 0 && echo "PASS: IP dnat clash $ns1:$ns2"
+
+ ip netns exec "$ns0" nft flush ruleset
+
+ return $lret
+}
+
# ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99
for i in "$ns0" "$ns1" "$ns2" ;do
ip netns exec "$i" nft -f /dev/stdin <<EOF
@@ -1147,6 +1217,7 @@ $test_inet_nat && test_redirect6 inet
test_port_shadowing
test_stateless_nat_ip
+test_dnat_clash
if [ $ret -ne 0 ];then
echo -n "FAIL: "
diff --git a/tools/testing/selftests/net/netfilter/udpclash.c b/tools/testing/selftests/net/netfilter/udpclash.c
new file mode 100644
index 000000000000..85c7b906ad08
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/udpclash.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Usage: ./udpclash <IP> <PORT>
+ *
+ * Emit THREAD_COUNT UDP packets sharing the same saddr:daddr pair.
+ *
+ * This mimics DNS resolver libraries that emit A and AAAA requests
+ * in parallel.
+ *
+ * This exercises conntrack clash resolution logic added and later
+ * refined in
+ *
+ * 71d8c47fc653 ("netfilter: conntrack: introduce clash resolution on insertion race")
+ * ed07d9a021df ("netfilter: nf_conntrack: resolve clash for matching conntracks")
+ * 6a757c07e51f ("netfilter: conntrack: allow insertion of clashing entries")
+ */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <pthread.h>
+
+#define THREAD_COUNT 128
+
+struct thread_args {
+ const struct sockaddr_in *si_remote;
+ int sockfd;
+};
+
+static int wait = 1;
+
+static void *thread_main(void *varg)
+{
+ const struct sockaddr_in *si_remote;
+ const struct thread_args *args = varg;
+ static const char msg[] = "foo";
+
+ si_remote = args->si_remote;
+
+ while (wait == 1)
+ ;
+
+ if (sendto(args->sockfd, msg, strlen(msg), MSG_NOSIGNAL,
+ (struct sockaddr *)si_remote, sizeof(*si_remote)) < 0)
+ exit(111);
+
+ return varg;
+}
+
+static int run_test(int fd, const struct sockaddr_in *si_remote)
+{
+ struct thread_args thread_args = {
+ .si_remote = si_remote,
+ .sockfd = fd,
+ };
+ pthread_t *tid = calloc(THREAD_COUNT, sizeof(pthread_t));
+ unsigned int repl_count = 0, timeout = 0;
+ int i;
+
+ if (!tid) {
+ perror("calloc");
+ return 1;
+ }
+
+ for (i = 0; i < THREAD_COUNT; i++) {
+ int err = pthread_create(&tid[i], NULL, &thread_main, &thread_args);
+
+ if (err != 0) {
+ perror("pthread_create");
+ exit(1);
+ }
+ }
+
+ wait = 0;
+
+ for (i = 0; i < THREAD_COUNT; i++)
+ pthread_join(tid[i], NULL);
+
+ while (repl_count < THREAD_COUNT) {
+ struct sockaddr_in si_repl;
+ socklen_t si_repl_len = sizeof(si_repl);
+ char repl[512];
+ ssize_t ret;
+
+ ret = recvfrom(fd, repl, sizeof(repl), MSG_NOSIGNAL,
+ (struct sockaddr *) &si_repl, &si_repl_len);
+ if (ret < 0) {
+ if (timeout++ > 5000) {
+ fputs("timed out while waiting for reply from thread\n", stderr);
+ break;
+ }
+
+ /* give reply time to pass though the stack */
+ usleep(1000);
+ continue;
+ }
+
+ if (si_repl_len != sizeof(*si_remote)) {
+ fprintf(stderr, "warning: reply has unexpected repl_len %d vs %d\n",
+ (int)si_repl_len, (int)sizeof(si_repl));
+ } else if (si_remote->sin_addr.s_addr != si_repl.sin_addr.s_addr ||
+ si_remote->sin_port != si_repl.sin_port) {
+ char a[64], b[64];
+
+ inet_ntop(AF_INET, &si_remote->sin_addr, a, sizeof(a));
+ inet_ntop(AF_INET, &si_repl.sin_addr, b, sizeof(b));
+
+ fprintf(stderr, "reply from wrong source: want %s:%d got %s:%d\n",
+ a, ntohs(si_remote->sin_port), b, ntohs(si_repl.sin_port));
+ }
+
+ repl_count++;
+ }
+
+ printf("got %d of %d replies\n", repl_count, THREAD_COUNT);
+
+ free(tid);
+
+ return repl_count == THREAD_COUNT ? 0 : 1;
+}
+
+int main(int argc, char *argv[])
+{
+ struct sockaddr_in si_local = {
+ .sin_family = AF_INET,
+ };
+ struct sockaddr_in si_remote = {
+ .sin_family = AF_INET,
+ };
+ int fd, ret;
+
+ if (argc < 3) {
+ fputs("Usage: send_udp <daddr> <dport>\n", stderr);
+ return 1;
+ }
+
+ si_remote.sin_port = htons(atoi(argv[2]));
+ si_remote.sin_addr.s_addr = inet_addr(argv[1]);
+
+ fd = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, IPPROTO_UDP);
+ if (fd < 0) {
+ perror("socket");
+ return 1;
+ }
+
+ if (bind(fd, (struct sockaddr *)&si_local, sizeof(si_local)) < 0) {
+ perror("bind");
+ return 1;
+ }
+
+ ret = run_test(fd, &si_remote);
+
+ close(fd);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/net/ovpn/ovpn-cli.c b/tools/testing/selftests/net/ovpn/ovpn-cli.c
index de9c26f98b2e..9201f2905f2c 100644
--- a/tools/testing/selftests/net/ovpn/ovpn-cli.c
+++ b/tools/testing/selftests/net/ovpn/ovpn-cli.c
@@ -2166,6 +2166,7 @@ static int ovpn_parse_cmd_args(struct ovpn_ctx *ovpn, int argc, char *argv[])
ovpn->peers_file = argv[4];
+ ovpn->sa_family = AF_INET;
if (argc > 5 && !strcmp(argv[5], "ipv6"))
ovpn->sa_family = AF_INET6;
break;
diff --git a/tools/testing/selftests/net/ovpn/test-large-mtu.sh b/tools/testing/selftests/net/ovpn/test-large-mtu.sh
new file mode 100755
index 000000000000..ce2a2cb64f72
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-large-mtu.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+MTU="1500"
+
+source test.sh
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt
new file mode 100644
index 000000000000..09aabc775e80
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"`
+
+// Test that a not-yet-accepted socket does not change
+// its initial sk_rcvbuf (tcp_rmem[1]) when receiving ooo packets.
+
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10>
+ +.1 < . 1:1(0) ack 1 win 257
+ +0 < . 2001:41001(39000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:41001>
+ +0 < . 41001:101001(60000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:101001>
+ +0 < . 1:1001(1000) ack 1 win 257
+ +0 > . 1:1(0) ack 1001 <nop,nop,sack 2001:101001>
+ +0 < . 1001:2001(1000) ack 1 win 257
+ +0 > . 1:1(0) ack 101001
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 %{ assert SK_MEMINFO_RCVBUF == 131072, SK_MEMINFO_RCVBUF }%
+
+ +0 close(4) = 0
+ +0 close(3) = 0
+
+// Test that ooo packets for accepted sockets do increase sk_rcvbuf
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 < . 2001:41001(39000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:41001>
+ +0 < . 41001:101001(60000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:101001>
+
+ +0 %{ assert SK_MEMINFO_RCVBUF > 131072, SK_MEMINFO_RCVBUF }%
+
diff --git a/tools/testing/selftests/net/tfo.c b/tools/testing/selftests/net/tfo.c
new file mode 100644
index 000000000000..eb3cac5e583c
--- /dev/null
+++ b/tools/testing/selftests/net/tfo.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <netinet/tcp.h>
+#include <errno.h>
+
+static int cfg_server;
+static int cfg_client;
+static int cfg_port = 8000;
+static struct sockaddr_in6 cfg_addr;
+static char *cfg_outfile;
+
+static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6)
+{
+ int ret;
+
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = htons(port);
+
+ ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr);
+ if (ret != 1) {
+ /* fallback to plain IPv4 */
+ ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]);
+ if (ret != 1)
+ return -1;
+
+ /* add ::ffff prefix */
+ sin6->sin6_addr.s6_addr32[0] = 0;
+ sin6->sin6_addr.s6_addr32[1] = 0;
+ sin6->sin6_addr.s6_addr16[4] = 0;
+ sin6->sin6_addr.s6_addr16[5] = 0xffff;
+ }
+
+ return 0;
+}
+
+static void run_server(void)
+{
+ unsigned long qlen = 32;
+ int fd, opt, connfd;
+ socklen_t len;
+ char buf[64];
+ FILE *outfile;
+
+ outfile = fopen(cfg_outfile, "w");
+ if (!outfile)
+ error(1, errno, "fopen() outfile");
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket()");
+
+ opt = 1;
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0)
+ error(1, errno, "setsockopt(SO_REUSEADDR)");
+
+ if (setsockopt(fd, SOL_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) < 0)
+ error(1, errno, "setsockopt(TCP_FASTOPEN)");
+
+ if (bind(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)) < 0)
+ error(1, errno, "bind()");
+
+ if (listen(fd, 5) < 0)
+ error(1, errno, "listen()");
+
+ len = sizeof(cfg_addr);
+ connfd = accept(fd, (struct sockaddr *)&cfg_addr, &len);
+ if (connfd < 0)
+ error(1, errno, "accept()");
+
+ len = sizeof(opt);
+ if (getsockopt(connfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &opt, &len) < 0)
+ error(1, errno, "getsockopt(SO_INCOMING_NAPI_ID)");
+
+ read(connfd, buf, 64);
+ fprintf(outfile, "%d\n", opt);
+
+ fclose(outfile);
+ close(connfd);
+ close(fd);
+}
+
+static void run_client(void)
+{
+ int fd;
+ char *msg = "Hello, world!";
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket()");
+
+ sendto(fd, msg, strlen(msg), MSG_FASTOPEN, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr));
+
+ close(fd);
+}
+
+static void usage(const char *filepath)
+{
+ error(1, 0, "Usage: %s (-s|-c) -h<server_ip> -p<port> -o<outfile> ", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ struct sockaddr_in6 *addr6 = (void *) &cfg_addr;
+ char *addr = NULL;
+ int ret;
+ int c;
+
+ if (argc <= 1)
+ usage(argv[0]);
+
+ while ((c = getopt(argc, argv, "sch:p:o:")) != -1) {
+ switch (c) {
+ case 's':
+ if (cfg_client)
+ error(1, 0, "Pass one of -s or -c");
+ cfg_server = 1;
+ break;
+ case 'c':
+ if (cfg_server)
+ error(1, 0, "Pass one of -s or -c");
+ cfg_client = 1;
+ break;
+ case 'h':
+ addr = optarg;
+ break;
+ case 'p':
+ cfg_port = strtoul(optarg, NULL, 0);
+ break;
+ case 'o':
+ cfg_outfile = strdup(optarg);
+ if (!cfg_outfile)
+ error(1, 0, "outfile invalid");
+ break;
+ }
+ }
+
+ if (cfg_server && addr)
+ error(1, 0, "Server cannot have -h specified");
+
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ addr6->sin6_addr = in6addr_any;
+ if (addr) {
+ ret = parse_address(addr, cfg_port, addr6);
+ if (ret)
+ error(1, 0, "Client address parse error: %s", addr);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+
+ if (cfg_server)
+ run_server();
+ else if (cfg_client)
+ run_client();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/net/tfo_passive.sh b/tools/testing/selftests/net/tfo_passive.sh
new file mode 100755
index 000000000000..80bf11fdc046
--- /dev/null
+++ b/tools/testing/selftests/net/tfo_passive.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+source lib.sh
+
+NSIM_SV_ID=$((256 + RANDOM % 256))
+NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID
+NSIM_CL_ID=$((512 + RANDOM % 256))
+NSIM_CL_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_CL_ID
+
+NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
+NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device
+NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device
+NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device
+
+SERVER_IP=192.168.1.1
+CLIENT_IP=192.168.1.2
+SERVER_PORT=48675
+
+setup_ns()
+{
+ set -e
+ ip netns add nssv
+ ip netns add nscl
+
+ NSIM_SV_NAME=$(find $NSIM_SV_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_SV_SYS/net -exec basename {} \;)
+ NSIM_CL_NAME=$(find $NSIM_CL_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_CL_SYS/net -exec basename {} \;)
+
+ ip link set $NSIM_SV_NAME netns nssv
+ ip link set $NSIM_CL_NAME netns nscl
+
+ ip netns exec nssv ip addr add "${SERVER_IP}/24" dev $NSIM_SV_NAME
+ ip netns exec nscl ip addr add "${CLIENT_IP}/24" dev $NSIM_CL_NAME
+
+ ip netns exec nssv ip link set dev $NSIM_SV_NAME up
+ ip netns exec nscl ip link set dev $NSIM_CL_NAME up
+
+ # Enable passive TFO
+ ip netns exec nssv sysctl -w net.ipv4.tcp_fastopen=519 > /dev/null
+
+ set +e
+}
+
+cleanup_ns()
+{
+ ip netns del nscl
+ ip netns del nssv
+}
+
+###
+### Code start
+###
+
+modprobe netdevsim
+
+# linking
+
+echo $NSIM_SV_ID > $NSIM_DEV_SYS_NEW
+echo $NSIM_CL_ID > $NSIM_DEV_SYS_NEW
+udevadm settle
+
+setup_ns
+
+NSIM_SV_FD=$((256 + RANDOM % 256))
+exec {NSIM_SV_FD}</var/run/netns/nssv
+NSIM_SV_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_SV_NAME/ifindex)
+
+NSIM_CL_FD=$((256 + RANDOM % 256))
+exec {NSIM_CL_FD}</var/run/netns/nscl
+NSIM_CL_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_CL_NAME/ifindex)
+
+echo "$NSIM_SV_FD:$NSIM_SV_IFIDX $NSIM_CL_FD:$NSIM_CL_IFIDX" > \
+ $NSIM_DEV_SYS_LINK
+
+if [ $? -ne 0 ]; then
+ echo "linking netdevsim1 with netdevsim2 should succeed"
+ cleanup_ns
+ exit 1
+fi
+
+out_file=$(mktemp)
+
+timeout -k 1s 30s ip netns exec nssv ./tfo \
+ -s \
+ -p ${SERVER_PORT} \
+ -o ${out_file}&
+
+wait_local_port_listen nssv ${SERVER_PORT} tcp
+
+ip netns exec nscl ./tfo -c -h ${SERVER_IP} -p ${SERVER_PORT}
+
+wait
+
+res=$(cat $out_file)
+rm $out_file
+
+if [ $res -eq 0 ]; then
+ echo "got invalid NAPI ID from passive TFO socket"
+ cleanup_ns
+ exit 1
+fi
+
+echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK
+
+echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL
+
+cleanup_ns
+
+modprobe -r netdevsim
+
+exit 0
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
index 1dc337c709f8..b17e032a6d75 100755
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -48,7 +48,7 @@ run_one() {
cfg_veth
- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} &
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${rx_args} &
local PID1=$!
wait_local_port_listen ${PEER_NS} 8000 udp
@@ -95,7 +95,7 @@ run_one_nat() {
# will land on the 'plain' one
ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 &
local PID1=$!
- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} &
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${family} -b ${addr2%/*} ${rx_args} &
local PID2=$!
wait_local_port_listen "${PEER_NS}" 8000 udp
@@ -117,9 +117,9 @@ run_one_2sock() {
cfg_veth
- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 &
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${rx_args} -p 12345 &
local PID1=$!
- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} &
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 100 ${rx_args} &
local PID2=$!
wait_local_port_listen "${PEER_NS}" 12345 udp
diff --git a/tools/testing/selftests/net/vlan_hw_filter.sh b/tools/testing/selftests/net/vlan_hw_filter.sh
index 7bc804ffaf7c..0fb56baf28e4 100755
--- a/tools/testing/selftests/net/vlan_hw_filter.sh
+++ b/tools/testing/selftests/net/vlan_hw_filter.sh
@@ -3,27 +3,101 @@
readonly NETNS="ns-$(mktemp -u XXXXXX)"
+ALL_TESTS="
+ test_vlan_filter_check
+ test_vlan0_del_crash_01
+ test_vlan0_del_crash_02
+ test_vlan0_del_crash_03
+ test_vid0_memleak
+"
+
ret=0
+setup() {
+ ip netns add ${NETNS}
+}
+
cleanup() {
- ip netns del $NETNS
+ ip netns del $NETNS 2>/dev/null
}
trap cleanup EXIT
fail() {
- echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2
- ret=1
+ echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2
+ ret=1
+}
+
+tests_run()
+{
+ local current_test
+ for current_test in ${TESTS:-$ALL_TESTS}; do
+ $current_test
+ done
+}
+
+test_vlan_filter_check() {
+ setup
+ ip netns exec ${NETNS} ip link add bond0 type bond mode 0
+ ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2
+ ip netns exec ${NETNS} ip link set bond_slave_1 master bond0
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+ ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0
+ ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0
+ ip netns exec ${NETNS} ip link set bond_slave_1 nomaster
+ ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function"
+ cleanup
}
-ip netns add ${NETNS}
-ip netns exec ${NETNS} ip link add bond0 type bond mode 0
-ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2
-ip netns exec ${NETNS} ip link set bond_slave_1 master bond0
-ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
-ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0
-ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0
-ip netns exec ${NETNS} ip link set bond_slave_1 nomaster
-ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function"
+#enable vlan_filter feature of real_dev with vlan0 during running time
+test_vlan0_del_crash_01() {
+ setup
+ ip netns exec ${NETNS} ip link add bond0 type bond mode 0
+ ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+ ip netns exec ${NETNS} ifconfig bond0 up
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on
+ ip netns exec ${NETNS} ifconfig bond0 down
+ ip netns exec ${NETNS} ifconfig bond0 up
+ ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function"
+ cleanup
+}
+
+#enable vlan_filter feature and add vlan0 for real_dev during running time
+test_vlan0_del_crash_02() {
+ setup
+ ip netns exec ${NETNS} ip link add bond0 type bond mode 0
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+ ip netns exec ${NETNS} ifconfig bond0 up
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on
+ ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q
+ ip netns exec ${NETNS} ifconfig bond0 down
+ ip netns exec ${NETNS} ifconfig bond0 up
+ ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function"
+ cleanup
+}
+
+#enable vlan_filter feature of real_dev during running time
+#test kernel_bug of vlan unregister
+test_vlan0_del_crash_03() {
+ setup
+ ip netns exec ${NETNS} ip link add bond0 type bond mode 0
+ ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+ ip netns exec ${NETNS} ifconfig bond0 up
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on
+ ip netns exec ${NETNS} ifconfig bond0 down
+ ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function"
+ cleanup
+}
+
+test_vid0_memleak() {
+ setup
+ ip netns exec ${NETNS} ip link add bond0 up type bond mode 0
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+ ip netns exec ${NETNS} ip link del dev bond0 || fail "Please check vlan HW filter function"
+ cleanup
+}
+tests_run
exit $ret
diff --git a/tools/testing/selftests/sched_ext/exit.c b/tools/testing/selftests/sched_ext/exit.c
index 9451782689de..ee25824b1cbe 100644
--- a/tools/testing/selftests/sched_ext/exit.c
+++ b/tools/testing/selftests/sched_ext/exit.c
@@ -22,6 +22,14 @@ static enum scx_test_status run(void *ctx)
struct bpf_link *link;
char buf[16];
+ /*
+ * On single-CPU systems, ops.select_cpu() is never
+ * invoked, so skip this test to avoid getting stuck
+ * indefinitely.
+ */
+ if (tc == EXIT_SELECT_CPU && libbpf_num_possible_cpus() == 1)
+ continue;
+
skel = exit__open();
SCX_ENUM_INIT(skel);
skel->rodata->exit_point = tc;
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index 9aa44d8176d9..c6db7fa94f55 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -128,6 +128,32 @@
]
},
{
+ "id": "5456",
+ "name": "Test htb_dequeue_tree with deactivation and row emptying",
+ "category": [
+ "qdisc",
+ "htb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.11.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY root handle 1: htb default 1",
+ "$TC class add dev $DUMMY parent 1: classid 1:1 htb rate 64bit ",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2: netem",
+ "$TC qdisc add dev $DUMMY parent 2:1 handle 3: blackhole"
+ ],
+ "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.11.11",
+ "expExitCode": "1",
+ "verifyCmd": "$TC -j qdisc show dev $DUMMY",
+ "matchJSON": [],
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root"
+ ]
+ },
+ {
"id": "c024",
"name": "Test TBF with SKBPRIO - catch qlen corner cases",
"category": [
@@ -635,5 +661,108 @@
"$TC qdisc del dev $DUMMY handle 1:0 root",
"$IP addr del 10.10.10.10/24 dev $DUMMY || true"
]
+ },
+ {
+ "id": "d74b",
+ "name": "Test use-after-free with DRR/NETEM/BLACKHOLE chain",
+ "category": [
+ "qdisc",
+ "hfsc",
+ "drr",
+ "netem",
+ "blackhole"
+ ],
+ "plugins": {
+ "requires": [
+ "nsPlugin",
+ "scapyPlugin"
+ ]
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.11.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY root handle 1: drr",
+ "$TC filter add dev $DUMMY parent 1: basic classid 1:1",
+ "$TC class add dev $DUMMY parent 1: classid 1:1 drr",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2: hfsc def 1",
+ "$TC class add dev $DUMMY parent 2: classid 2:1 hfsc rt m1 8 d 1 m2 0",
+ "$TC qdisc add dev $DUMMY parent 2:1 handle 3: netem",
+ "$TC qdisc add dev $DUMMY parent 3:1 handle 4: blackhole",
+ "ping -c1 -W0.01 -I $DUMMY 10.10.11.11 || true",
+ "$TC class del dev $DUMMY classid 1:1"
+ ],
+ "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.11.11",
+ "expExitCode": "1",
+ "verifyCmd": "$TC -j class ls dev $DUMMY classid 1:1",
+ "matchJSON": [],
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root handle 1: drr"
+ ]
+ },
+ {
+ "id": "be28",
+ "name": "Try to add fq_codel qdisc as a child of an hhf qdisc",
+ "category": [
+ "qdisc",
+ "fq_codel",
+ "hhf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY root handle a: hhf"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY parent a: handle b: fq_codel",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle b:",
+ "matchJSON": [],
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root"
+ ]
+ },
+ {
+ "id": "fcb5",
+ "name": "Try to add pie qdisc as a child of a drr qdisc",
+ "category": [
+ "qdisc",
+ "pie",
+ "drr"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY root handle a: drr"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY parent a: handle b: pie",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle b:",
+ "matchJSON": [],
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root"
+ ]
+ },
+ {
+ "id": "7801",
+ "name": "Try to add fq qdisc as a child of an inexistent hfsc class",
+ "category": [
+ "qdisc",
+ "sfq",
+ "hfsc"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY root handle a: hfsc"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY parent a:fff2 sfq limit 4",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle b:",
+ "matchJSON": [],
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root"
+ ]
}
]
diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile
index 4dde8838261d..5d7f4ecfb816 100644
--- a/tools/testing/selftests/ublk/Makefile
+++ b/tools/testing/selftests/ublk/Makefile
@@ -19,6 +19,7 @@ TEST_PROGS += test_generic_08.sh
TEST_PROGS += test_generic_09.sh
TEST_PROGS += test_generic_10.sh
TEST_PROGS += test_generic_11.sh
+TEST_PROGS += test_generic_12.sh
TEST_PROGS += test_null_01.sh
TEST_PROGS += test_null_02.sh
diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c
index 5421774d7867..6e60f7d97125 100644
--- a/tools/testing/selftests/ublk/fault_inject.c
+++ b/tools/testing/selftests/ublk/fault_inject.c
@@ -46,9 +46,9 @@ static int ublk_fault_inject_queue_io(struct ublk_queue *q, int tag)
.tv_nsec = (long long)q->dev->private_data,
};
- ublk_queue_alloc_sqes(q, &sqe, 1);
+ ublk_io_alloc_sqes(ublk_get_io(q, tag), &sqe, 1);
io_uring_prep_timeout(sqe, &ts, 1, 0);
- sqe->user_data = build_user_data(tag, ublksrv_get_op(iod), 0, 1);
+ sqe->user_data = build_user_data(tag, ublksrv_get_op(iod), 0, q->q_id, 1);
ublk_queued_tgt_io(q, tag, 1);
diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c
index 509842df9bee..cfa59b631693 100644
--- a/tools/testing/selftests/ublk/file_backed.c
+++ b/tools/testing/selftests/ublk/file_backed.c
@@ -18,11 +18,11 @@ static int loop_queue_flush_io(struct ublk_queue *q, const struct ublksrv_io_des
unsigned ublk_op = ublksrv_get_op(iod);
struct io_uring_sqe *sqe[1];
- ublk_queue_alloc_sqes(q, sqe, 1);
+ ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 1);
io_uring_prep_fsync(sqe[0], 1 /*fds[1]*/, IORING_FSYNC_DATASYNC);
io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE);
/* bit63 marks us as tgt io */
- sqe[0]->user_data = build_user_data(tag, ublk_op, 0, 1);
+ sqe[0]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1);
return 1;
}
@@ -36,7 +36,7 @@ static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_de
void *addr = (zc | auto_zc) ? NULL : (void *)iod->addr;
if (!zc || auto_zc) {
- ublk_queue_alloc_sqes(q, sqe, 1);
+ ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 1);
if (!sqe[0])
return -ENOMEM;
@@ -48,26 +48,26 @@ static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_de
sqe[0]->buf_index = tag;
io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE);
/* bit63 marks us as tgt io */
- sqe[0]->user_data = build_user_data(tag, ublk_op, 0, 1);
+ sqe[0]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1);
return 1;
}
- ublk_queue_alloc_sqes(q, sqe, 3);
+ ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 3);
- io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, tag);
+ io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
sqe[0]->user_data = build_user_data(tag,
- ublk_cmd_op_nr(sqe[0]->cmd_op), 0, 1);
+ ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1);
io_uring_prep_rw(op, sqe[1], 1 /*fds[1]*/, 0,
iod->nr_sectors << 9,
iod->start_sector << 9);
sqe[1]->buf_index = tag;
sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK;
- sqe[1]->user_data = build_user_data(tag, ublk_op, 0, 1);
+ sqe[1]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1);
- io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, tag);
- sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, 1);
+ io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
+ sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1);
return 2;
}
diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c
index b5131a000795..e2d2042810d4 100644
--- a/tools/testing/selftests/ublk/kublk.c
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -348,8 +348,8 @@ static void ublk_ctrl_dump(struct ublk_dev *dev)
for (i = 0; i < info->nr_hw_queues; i++) {
ublk_print_cpu_set(&affinity[i], buf, sizeof(buf));
- printf("\tqueue %u: tid %d affinity(%s)\n",
- i, dev->q[i].tid, buf);
+ printf("\tqueue %u: affinity(%s)\n",
+ i, buf);
}
free(affinity);
}
@@ -412,16 +412,6 @@ static void ublk_queue_deinit(struct ublk_queue *q)
int i;
int nr_ios = q->q_depth;
- io_uring_unregister_buffers(&q->ring);
-
- io_uring_unregister_ring_fd(&q->ring);
-
- if (q->ring.ring_fd > 0) {
- io_uring_unregister_files(&q->ring);
- close(q->ring.ring_fd);
- q->ring.ring_fd = -1;
- }
-
if (q->io_cmd_buf)
munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q));
@@ -429,20 +419,30 @@ static void ublk_queue_deinit(struct ublk_queue *q)
free(q->ios[i].buf_addr);
}
+static void ublk_thread_deinit(struct ublk_thread *t)
+{
+ io_uring_unregister_buffers(&t->ring);
+
+ io_uring_unregister_ring_fd(&t->ring);
+
+ if (t->ring.ring_fd > 0) {
+ io_uring_unregister_files(&t->ring);
+ close(t->ring.ring_fd);
+ t->ring.ring_fd = -1;
+ }
+}
+
static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags)
{
struct ublk_dev *dev = q->dev;
int depth = dev->dev_info.queue_depth;
- int i, ret = -1;
+ int i;
int cmd_buf_size, io_buf_size;
unsigned long off;
- int ring_depth = dev->tgt.sq_depth, cq_depth = dev->tgt.cq_depth;
q->tgt_ops = dev->tgt.ops;
q->state = 0;
q->q_depth = depth;
- q->cmd_inflight = 0;
- q->tid = gettid();
if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) {
q->state |= UBLKSRV_NO_BUF;
@@ -467,6 +467,7 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags)
for (i = 0; i < q->q_depth; i++) {
q->ios[i].buf_addr = NULL;
q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE;
+ q->ios[i].tag = i;
if (q->state & UBLKSRV_NO_BUF)
continue;
@@ -479,39 +480,57 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags)
}
}
- ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth,
+ return 0;
+ fail:
+ ublk_queue_deinit(q);
+ ublk_err("ublk dev %d queue %d failed\n",
+ dev->dev_info.dev_id, q->q_id);
+ return -ENOMEM;
+}
+
+static int ublk_thread_init(struct ublk_thread *t)
+{
+ struct ublk_dev *dev = t->dev;
+ int ring_depth = dev->tgt.sq_depth, cq_depth = dev->tgt.cq_depth;
+ int ret;
+
+ ret = ublk_setup_ring(&t->ring, ring_depth, cq_depth,
IORING_SETUP_COOP_TASKRUN |
IORING_SETUP_SINGLE_ISSUER |
IORING_SETUP_DEFER_TASKRUN);
if (ret < 0) {
- ublk_err("ublk dev %d queue %d setup io_uring failed %d\n",
- q->dev->dev_info.dev_id, q->q_id, ret);
+ ublk_err("ublk dev %d thread %d setup io_uring failed %d\n",
+ dev->dev_info.dev_id, t->idx, ret);
goto fail;
}
if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) {
- ret = io_uring_register_buffers_sparse(&q->ring, q->q_depth);
+ unsigned nr_ios = dev->dev_info.queue_depth * dev->dev_info.nr_hw_queues;
+ unsigned max_nr_ios_per_thread = nr_ios / dev->nthreads;
+ max_nr_ios_per_thread += !!(nr_ios % dev->nthreads);
+ ret = io_uring_register_buffers_sparse(
+ &t->ring, max_nr_ios_per_thread);
if (ret) {
- ublk_err("ublk dev %d queue %d register spare buffers failed %d",
- dev->dev_info.dev_id, q->q_id, ret);
+ ublk_err("ublk dev %d thread %d register spare buffers failed %d",
+ dev->dev_info.dev_id, t->idx, ret);
goto fail;
}
}
- io_uring_register_ring_fd(&q->ring);
+ io_uring_register_ring_fd(&t->ring);
- ret = io_uring_register_files(&q->ring, dev->fds, dev->nr_fds);
+ ret = io_uring_register_files(&t->ring, dev->fds, dev->nr_fds);
if (ret) {
- ublk_err("ublk dev %d queue %d register files failed %d\n",
- q->dev->dev_info.dev_id, q->q_id, ret);
+ ublk_err("ublk dev %d thread %d register files failed %d\n",
+ t->dev->dev_info.dev_id, t->idx, ret);
goto fail;
}
return 0;
- fail:
- ublk_queue_deinit(q);
- ublk_err("ublk dev %d queue %d failed\n",
- dev->dev_info.dev_id, q->q_id);
+fail:
+ ublk_thread_deinit(t);
+ ublk_err("ublk dev %d thread %d init failed\n",
+ dev->dev_info.dev_id, t->idx);
return -ENOMEM;
}
@@ -562,7 +581,7 @@ static void ublk_set_auto_buf_reg(const struct ublk_queue *q,
if (q->tgt_ops->buf_index)
buf.index = q->tgt_ops->buf_index(q, tag);
else
- buf.index = tag;
+ buf.index = q->ios[tag].buf_index;
if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK)
buf.flags = UBLK_AUTO_BUF_REG_FALLBACK;
@@ -570,8 +589,10 @@ static void ublk_set_auto_buf_reg(const struct ublk_queue *q,
sqe->addr = ublk_auto_buf_reg_to_sqe_addr(&buf);
}
-int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag)
+int ublk_queue_io_cmd(struct ublk_io *io)
{
+ struct ublk_thread *t = io->t;
+ struct ublk_queue *q = ublk_io_to_queue(io);
struct ublksrv_io_cmd *cmd;
struct io_uring_sqe *sqe[1];
unsigned int cmd_op = 0;
@@ -596,13 +617,13 @@ int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag)
else if (io->flags & UBLKSRV_NEED_FETCH_RQ)
cmd_op = UBLK_U_IO_FETCH_REQ;
- if (io_uring_sq_space_left(&q->ring) < 1)
- io_uring_submit(&q->ring);
+ if (io_uring_sq_space_left(&t->ring) < 1)
+ io_uring_submit(&t->ring);
- ublk_queue_alloc_sqes(q, sqe, 1);
+ ublk_io_alloc_sqes(io, sqe, 1);
if (!sqe[0]) {
- ublk_err("%s: run out of sqe %d, tag %d\n",
- __func__, q->q_id, tag);
+ ublk_err("%s: run out of sqe. thread %u, tag %d\n",
+ __func__, t->idx, io->tag);
return -1;
}
@@ -617,7 +638,7 @@ int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag)
sqe[0]->opcode = IORING_OP_URING_CMD;
sqe[0]->flags = IOSQE_FIXED_FILE;
sqe[0]->rw_flags = 0;
- cmd->tag = tag;
+ cmd->tag = io->tag;
cmd->q_id = q->q_id;
if (!(q->state & UBLKSRV_NO_BUF))
cmd->addr = (__u64) (uintptr_t) io->buf_addr;
@@ -625,37 +646,72 @@ int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag)
cmd->addr = 0;
if (q->state & UBLKSRV_AUTO_BUF_REG)
- ublk_set_auto_buf_reg(q, sqe[0], tag);
+ ublk_set_auto_buf_reg(q, sqe[0], io->tag);
- user_data = build_user_data(tag, _IOC_NR(cmd_op), 0, 0);
+ user_data = build_user_data(io->tag, _IOC_NR(cmd_op), 0, q->q_id, 0);
io_uring_sqe_set_data64(sqe[0], user_data);
io->flags = 0;
- q->cmd_inflight += 1;
+ t->cmd_inflight += 1;
- ublk_dbg(UBLK_DBG_IO_CMD, "%s: (qid %d tag %u cmd_op %u) iof %x stopping %d\n",
- __func__, q->q_id, tag, cmd_op,
- io->flags, !!(q->state & UBLKSRV_QUEUE_STOPPING));
+ ublk_dbg(UBLK_DBG_IO_CMD, "%s: (thread %u qid %d tag %u cmd_op %u) iof %x stopping %d\n",
+ __func__, t->idx, q->q_id, io->tag, cmd_op,
+ io->flags, !!(t->state & UBLKSRV_THREAD_STOPPING));
return 1;
}
-static void ublk_submit_fetch_commands(struct ublk_queue *q)
+static void ublk_submit_fetch_commands(struct ublk_thread *t)
{
- int i = 0;
+ struct ublk_queue *q;
+ struct ublk_io *io;
+ int i = 0, j = 0;
- for (i = 0; i < q->q_depth; i++)
- ublk_queue_io_cmd(q, &q->ios[i], i);
+ if (t->dev->per_io_tasks) {
+ /*
+ * Lexicographically order all the (qid,tag) pairs, with
+ * qid taking priority (so (1,0) > (0,1)). Then make
+ * this thread the daemon for every Nth entry in this
+ * list (N is the number of threads), starting at this
+ * thread's index. This ensures that each queue is
+ * handled by as many ublk server threads as possible,
+ * so that load that is concentrated on one or a few
+ * queues can make use of all ublk server threads.
+ */
+ const struct ublksrv_ctrl_dev_info *dinfo = &t->dev->dev_info;
+ int nr_ios = dinfo->nr_hw_queues * dinfo->queue_depth;
+ for (i = t->idx; i < nr_ios; i += t->dev->nthreads) {
+ int q_id = i / dinfo->queue_depth;
+ int tag = i % dinfo->queue_depth;
+ q = &t->dev->q[q_id];
+ io = &q->ios[tag];
+ io->t = t;
+ io->buf_index = j++;
+ ublk_queue_io_cmd(io);
+ }
+ } else {
+ /*
+ * Service exclusively the queue whose q_id matches our
+ * thread index.
+ */
+ struct ublk_queue *q = &t->dev->q[t->idx];
+ for (i = 0; i < q->q_depth; i++) {
+ io = &q->ios[i];
+ io->t = t;
+ io->buf_index = i;
+ ublk_queue_io_cmd(io);
+ }
+ }
}
-static int ublk_queue_is_idle(struct ublk_queue *q)
+static int ublk_thread_is_idle(struct ublk_thread *t)
{
- return !io_uring_sq_ready(&q->ring) && !q->io_inflight;
+ return !io_uring_sq_ready(&t->ring) && !t->io_inflight;
}
-static int ublk_queue_is_done(struct ublk_queue *q)
+static int ublk_thread_is_done(struct ublk_thread *t)
{
- return (q->state & UBLKSRV_QUEUE_STOPPING) && ublk_queue_is_idle(q);
+ return (t->state & UBLKSRV_THREAD_STOPPING) && ublk_thread_is_idle(t);
}
static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q,
@@ -673,14 +729,16 @@ static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q,
q->tgt_ops->tgt_io_done(q, tag, cqe);
}
-static void ublk_handle_cqe(struct io_uring *r,
+static void ublk_handle_cqe(struct ublk_thread *t,
struct io_uring_cqe *cqe, void *data)
{
- struct ublk_queue *q = container_of(r, struct ublk_queue, ring);
+ struct ublk_dev *dev = t->dev;
+ unsigned q_id = user_data_to_q_id(cqe->user_data);
+ struct ublk_queue *q = &dev->q[q_id];
unsigned tag = user_data_to_tag(cqe->user_data);
unsigned cmd_op = user_data_to_op(cqe->user_data);
int fetch = (cqe->res != UBLK_IO_RES_ABORT) &&
- !(q->state & UBLKSRV_QUEUE_STOPPING);
+ !(t->state & UBLKSRV_THREAD_STOPPING);
struct ublk_io *io;
if (cqe->res < 0 && cqe->res != -ENODEV)
@@ -691,7 +749,7 @@ static void ublk_handle_cqe(struct io_uring *r,
__func__, cqe->res, q->q_id, tag, cmd_op,
is_target_io(cqe->user_data),
user_data_to_tgt_data(cqe->user_data),
- (q->state & UBLKSRV_QUEUE_STOPPING));
+ (t->state & UBLKSRV_THREAD_STOPPING));
/* Don't retrieve io in case of target io */
if (is_target_io(cqe->user_data)) {
@@ -700,10 +758,10 @@ static void ublk_handle_cqe(struct io_uring *r,
}
io = &q->ios[tag];
- q->cmd_inflight--;
+ t->cmd_inflight--;
if (!fetch) {
- q->state |= UBLKSRV_QUEUE_STOPPING;
+ t->state |= UBLKSRV_THREAD_STOPPING;
io->flags &= ~UBLKSRV_NEED_FETCH_RQ;
}
@@ -713,7 +771,7 @@ static void ublk_handle_cqe(struct io_uring *r,
q->tgt_ops->queue_io(q, tag);
} else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) {
io->flags |= UBLKSRV_NEED_GET_DATA | UBLKSRV_IO_FREE;
- ublk_queue_io_cmd(q, io, tag);
+ ublk_queue_io_cmd(io);
} else {
/*
* COMMIT_REQ will be completed immediately since no fetching
@@ -727,92 +785,93 @@ static void ublk_handle_cqe(struct io_uring *r,
}
}
-static int ublk_reap_events_uring(struct io_uring *r)
+static int ublk_reap_events_uring(struct ublk_thread *t)
{
struct io_uring_cqe *cqe;
unsigned head;
int count = 0;
- io_uring_for_each_cqe(r, head, cqe) {
- ublk_handle_cqe(r, cqe, NULL);
+ io_uring_for_each_cqe(&t->ring, head, cqe) {
+ ublk_handle_cqe(t, cqe, NULL);
count += 1;
}
- io_uring_cq_advance(r, count);
+ io_uring_cq_advance(&t->ring, count);
return count;
}
-static int ublk_process_io(struct ublk_queue *q)
+static int ublk_process_io(struct ublk_thread *t)
{
int ret, reapped;
- ublk_dbg(UBLK_DBG_QUEUE, "dev%d-q%d: to_submit %d inflight cmd %u stopping %d\n",
- q->dev->dev_info.dev_id,
- q->q_id, io_uring_sq_ready(&q->ring),
- q->cmd_inflight,
- (q->state & UBLKSRV_QUEUE_STOPPING));
+ ublk_dbg(UBLK_DBG_THREAD, "dev%d-t%u: to_submit %d inflight cmd %u stopping %d\n",
+ t->dev->dev_info.dev_id,
+ t->idx, io_uring_sq_ready(&t->ring),
+ t->cmd_inflight,
+ (t->state & UBLKSRV_THREAD_STOPPING));
- if (ublk_queue_is_done(q))
+ if (ublk_thread_is_done(t))
return -ENODEV;
- ret = io_uring_submit_and_wait(&q->ring, 1);
- reapped = ublk_reap_events_uring(&q->ring);
+ ret = io_uring_submit_and_wait(&t->ring, 1);
+ reapped = ublk_reap_events_uring(t);
- ublk_dbg(UBLK_DBG_QUEUE, "submit result %d, reapped %d stop %d idle %d\n",
- ret, reapped, (q->state & UBLKSRV_QUEUE_STOPPING),
- (q->state & UBLKSRV_QUEUE_IDLE));
+ ublk_dbg(UBLK_DBG_THREAD, "submit result %d, reapped %d stop %d idle %d\n",
+ ret, reapped, (t->state & UBLKSRV_THREAD_STOPPING),
+ (t->state & UBLKSRV_THREAD_IDLE));
return reapped;
}
-static void ublk_queue_set_sched_affinity(const struct ublk_queue *q,
+static void ublk_thread_set_sched_affinity(const struct ublk_thread *t,
cpu_set_t *cpuset)
{
if (sched_setaffinity(0, sizeof(*cpuset), cpuset) < 0)
- ublk_err("ublk dev %u queue %u set affinity failed",
- q->dev->dev_info.dev_id, q->q_id);
+ ublk_err("ublk dev %u thread %u set affinity failed",
+ t->dev->dev_info.dev_id, t->idx);
}
-struct ublk_queue_info {
- struct ublk_queue *q;
- sem_t *queue_sem;
+struct ublk_thread_info {
+ struct ublk_dev *dev;
+ unsigned idx;
+ sem_t *ready;
cpu_set_t *affinity;
- unsigned char auto_zc_fallback;
};
static void *ublk_io_handler_fn(void *data)
{
- struct ublk_queue_info *info = data;
- struct ublk_queue *q = info->q;
- int dev_id = q->dev->dev_info.dev_id;
- unsigned extra_flags = 0;
+ struct ublk_thread_info *info = data;
+ struct ublk_thread *t = &info->dev->threads[info->idx];
+ int dev_id = info->dev->dev_info.dev_id;
int ret;
- if (info->auto_zc_fallback)
- extra_flags = UBLKSRV_AUTO_BUF_REG_FALLBACK;
+ t->dev = info->dev;
+ t->idx = info->idx;
- ret = ublk_queue_init(q, extra_flags);
+ ret = ublk_thread_init(t);
if (ret) {
- ublk_err("ublk dev %d queue %d init queue failed\n",
- dev_id, q->q_id);
+ ublk_err("ublk dev %d thread %u init failed\n",
+ dev_id, t->idx);
return NULL;
}
/* IO perf is sensitive with queue pthread affinity on NUMA machine*/
- ublk_queue_set_sched_affinity(q, info->affinity);
- sem_post(info->queue_sem);
+ if (info->affinity)
+ ublk_thread_set_sched_affinity(t, info->affinity);
+ sem_post(info->ready);
- ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n",
- q->tid, dev_id, q->q_id);
+ ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %u started\n",
+ gettid(), dev_id, t->idx);
/* submit all io commands to ublk driver */
- ublk_submit_fetch_commands(q);
+ ublk_submit_fetch_commands(t);
do {
- if (ublk_process_io(q) < 0)
+ if (ublk_process_io(t) < 0)
break;
} while (1);
- ublk_dbg(UBLK_DBG_QUEUE, "ublk dev %d queue %d exited\n", dev_id, q->q_id);
- ublk_queue_deinit(q);
+ ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %d exiting\n",
+ gettid(), dev_id, t->idx);
+ ublk_thread_deinit(t);
return NULL;
}
@@ -855,20 +914,20 @@ static int ublk_send_dev_event(const struct dev_ctx *ctx, struct ublk_dev *dev,
static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
{
const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info;
- struct ublk_queue_info *qinfo;
+ struct ublk_thread_info *tinfo;
+ unsigned extra_flags = 0;
cpu_set_t *affinity_buf;
void *thread_ret;
- sem_t queue_sem;
+ sem_t ready;
int ret, i;
ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__);
- qinfo = (struct ublk_queue_info *)calloc(sizeof(struct ublk_queue_info),
- dinfo->nr_hw_queues);
- if (!qinfo)
+ tinfo = calloc(sizeof(struct ublk_thread_info), dev->nthreads);
+ if (!tinfo)
return -ENOMEM;
- sem_init(&queue_sem, 0, 0);
+ sem_init(&ready, 0, 0);
ret = ublk_dev_prep(ctx, dev);
if (ret)
return ret;
@@ -877,22 +936,44 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
if (ret)
return ret;
+ if (ctx->auto_zc_fallback)
+ extra_flags = UBLKSRV_AUTO_BUF_REG_FALLBACK;
+
for (i = 0; i < dinfo->nr_hw_queues; i++) {
dev->q[i].dev = dev;
dev->q[i].q_id = i;
- qinfo[i].q = &dev->q[i];
- qinfo[i].queue_sem = &queue_sem;
- qinfo[i].affinity = &affinity_buf[i];
- qinfo[i].auto_zc_fallback = ctx->auto_zc_fallback;
- pthread_create(&dev->q[i].thread, NULL,
+ ret = ublk_queue_init(&dev->q[i], extra_flags);
+ if (ret) {
+ ublk_err("ublk dev %d queue %d init queue failed\n",
+ dinfo->dev_id, i);
+ goto fail;
+ }
+ }
+
+ for (i = 0; i < dev->nthreads; i++) {
+ tinfo[i].dev = dev;
+ tinfo[i].idx = i;
+ tinfo[i].ready = &ready;
+
+ /*
+ * If threads are not tied 1:1 to queues, setting thread
+ * affinity based on queue affinity makes little sense.
+ * However, thread CPU affinity has significant impact
+ * on performance, so to compare fairly, we'll still set
+ * thread CPU affinity based on queue affinity where
+ * possible.
+ */
+ if (dev->nthreads == dinfo->nr_hw_queues)
+ tinfo[i].affinity = &affinity_buf[i];
+ pthread_create(&dev->threads[i].thread, NULL,
ublk_io_handler_fn,
- &qinfo[i]);
+ &tinfo[i]);
}
- for (i = 0; i < dinfo->nr_hw_queues; i++)
- sem_wait(&queue_sem);
- free(qinfo);
+ for (i = 0; i < dev->nthreads; i++)
+ sem_wait(&ready);
+ free(tinfo);
free(affinity_buf);
/* everything is fine now, start us */
@@ -914,9 +995,11 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id);
/* wait until we are terminated */
- for (i = 0; i < dinfo->nr_hw_queues; i++)
- pthread_join(dev->q[i].thread, &thread_ret);
+ for (i = 0; i < dev->nthreads; i++)
+ pthread_join(dev->threads[i].thread, &thread_ret);
fail:
+ for (i = 0; i < dinfo->nr_hw_queues; i++)
+ ublk_queue_deinit(&dev->q[i]);
ublk_dev_unprep(dev);
ublk_dbg(UBLK_DBG_DEV, "%s exit\n", __func__);
@@ -1022,13 +1105,14 @@ wait:
static int __cmd_dev_add(const struct dev_ctx *ctx)
{
+ unsigned nthreads = ctx->nthreads;
unsigned nr_queues = ctx->nr_hw_queues;
const char *tgt_type = ctx->tgt_type;
unsigned depth = ctx->queue_depth;
__u64 features;
const struct ublk_tgt_ops *ops;
struct ublksrv_ctrl_dev_info *info;
- struct ublk_dev *dev;
+ struct ublk_dev *dev = NULL;
int dev_id = ctx->dev_id;
int ret, i;
@@ -1036,29 +1120,55 @@ static int __cmd_dev_add(const struct dev_ctx *ctx)
if (!ops) {
ublk_err("%s: no such tgt type, type %s\n",
__func__, tgt_type);
- return -ENODEV;
+ ret = -ENODEV;
+ goto fail;
}
if (nr_queues > UBLK_MAX_QUEUES || depth > UBLK_QUEUE_DEPTH) {
ublk_err("%s: invalid nr_queues or depth queues %u depth %u\n",
__func__, nr_queues, depth);
- return -EINVAL;
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* default to 1:1 threads:queues if nthreads is unspecified */
+ if (!nthreads)
+ nthreads = nr_queues;
+
+ if (nthreads > UBLK_MAX_THREADS) {
+ ublk_err("%s: %u is too many threads (max %u)\n",
+ __func__, nthreads, UBLK_MAX_THREADS);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ if (nthreads != nr_queues && !ctx->per_io_tasks) {
+ ublk_err("%s: threads %u must be same as queues %u if "
+ "not using per_io_tasks\n",
+ __func__, nthreads, nr_queues);
+ ret = -EINVAL;
+ goto fail;
}
dev = ublk_ctrl_init();
if (!dev) {
ublk_err("%s: can't alloc dev id %d, type %s\n",
__func__, dev_id, tgt_type);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail;
}
/* kernel doesn't support get_features */
ret = ublk_ctrl_get_features(dev, &features);
- if (ret < 0)
- return -EINVAL;
+ if (ret < 0) {
+ ret = -EINVAL;
+ goto fail;
+ }
- if (!(features & UBLK_F_CMD_IOCTL_ENCODE))
- return -ENOTSUP;
+ if (!(features & UBLK_F_CMD_IOCTL_ENCODE)) {
+ ret = -ENOTSUP;
+ goto fail;
+ }
info = &dev->dev_info;
info->dev_id = ctx->dev_id;
@@ -1068,6 +1178,8 @@ static int __cmd_dev_add(const struct dev_ctx *ctx)
if ((features & UBLK_F_QUIESCE) &&
(info->flags & UBLK_F_USER_RECOVERY))
info->flags |= UBLK_F_QUIESCE;
+ dev->nthreads = nthreads;
+ dev->per_io_tasks = ctx->per_io_tasks;
dev->tgt.ops = ops;
dev->tgt.sq_depth = depth;
dev->tgt.cq_depth = depth;
@@ -1097,7 +1209,8 @@ static int __cmd_dev_add(const struct dev_ctx *ctx)
fail:
if (ret < 0)
ublk_send_dev_event(ctx, dev, -1);
- ublk_ctrl_deinit(dev);
+ if (dev)
+ ublk_ctrl_deinit(dev);
return ret;
}
@@ -1159,6 +1272,8 @@ run:
shmctl(ctx->_shmid, IPC_RMID, NULL);
/* wait for child and detach from it */
wait(NULL);
+ if (exit_code == EXIT_FAILURE)
+ ublk_err("%s: command failed\n", __func__);
exit(exit_code);
} else {
exit(EXIT_FAILURE);
@@ -1266,6 +1381,7 @@ static int cmd_dev_get_features(void)
[const_ilog2(UBLK_F_UPDATE_SIZE)] = "UPDATE_SIZE",
[const_ilog2(UBLK_F_AUTO_BUF_REG)] = "AUTO_BUF_REG",
[const_ilog2(UBLK_F_QUIESCE)] = "QUIESCE",
+ [const_ilog2(UBLK_F_PER_IO_DAEMON)] = "PER_IO_DAEMON",
};
struct ublk_dev *dev;
__u64 features = 0;
@@ -1360,8 +1476,10 @@ static void __cmd_create_help(char *exe, bool recovery)
exe, recovery ? "recover" : "add");
printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1 ] [-g]\n");
printf("\t[-e 0|1 ] [-i 0|1]\n");
+ printf("\t[--nthreads threads] [--per_io_tasks]\n");
printf("\t[target options] [backfile1] [backfile2] ...\n");
printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n");
+ printf("\tdefault: nthreads=nr_queues");
for (i = 0; i < sizeof(tgt_ops_list) / sizeof(tgt_ops_list[0]); i++) {
const struct ublk_tgt_ops *ops = tgt_ops_list[i];
@@ -1418,6 +1536,8 @@ int main(int argc, char *argv[])
{ "auto_zc", 0, NULL, 0 },
{ "auto_zc_fallback", 0, NULL, 0 },
{ "size", 1, NULL, 's'},
+ { "nthreads", 1, NULL, 0 },
+ { "per_io_tasks", 0, NULL, 0 },
{ 0, 0, 0, 0 }
};
const struct ublk_tgt_ops *ops = NULL;
@@ -1493,6 +1613,10 @@ int main(int argc, char *argv[])
ctx.flags |= UBLK_F_AUTO_BUF_REG;
if (!strcmp(longopts[option_idx].name, "auto_zc_fallback"))
ctx.auto_zc_fallback = 1;
+ if (!strcmp(longopts[option_idx].name, "nthreads"))
+ ctx.nthreads = strtol(optarg, NULL, 10);
+ if (!strcmp(longopts[option_idx].name, "per_io_tasks"))
+ ctx.per_io_tasks = 1;
break;
case '?':
/*
diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h
index e34508bf5798..6be601536b3d 100644
--- a/tools/testing/selftests/ublk/kublk.h
+++ b/tools/testing/selftests/ublk/kublk.h
@@ -49,11 +49,14 @@
#define UBLKSRV_IO_IDLE_SECS 20
#define UBLK_IO_MAX_BYTES (1 << 20)
-#define UBLK_MAX_QUEUES 32
+#define UBLK_MAX_QUEUES_SHIFT 5
+#define UBLK_MAX_QUEUES (1 << UBLK_MAX_QUEUES_SHIFT)
+#define UBLK_MAX_THREADS_SHIFT 5
+#define UBLK_MAX_THREADS (1 << UBLK_MAX_THREADS_SHIFT)
#define UBLK_QUEUE_DEPTH 1024
#define UBLK_DBG_DEV (1U << 0)
-#define UBLK_DBG_QUEUE (1U << 1)
+#define UBLK_DBG_THREAD (1U << 1)
#define UBLK_DBG_IO_CMD (1U << 2)
#define UBLK_DBG_IO (1U << 3)
#define UBLK_DBG_CTRL_CMD (1U << 4)
@@ -61,6 +64,7 @@
struct ublk_dev;
struct ublk_queue;
+struct ublk_thread;
struct stripe_ctx {
/* stripe */
@@ -76,6 +80,7 @@ struct dev_ctx {
char tgt_type[16];
unsigned long flags;
unsigned nr_hw_queues;
+ unsigned short nthreads;
unsigned queue_depth;
int dev_id;
int nr_files;
@@ -85,6 +90,7 @@ struct dev_ctx {
unsigned int fg:1;
unsigned int recovery:1;
unsigned int auto_zc_fallback:1;
+ unsigned int per_io_tasks:1;
int _evtfd;
int _shmid;
@@ -123,10 +129,14 @@ struct ublk_io {
unsigned short flags;
unsigned short refs; /* used by target code only */
+ int tag;
+
int result;
+ unsigned short buf_index;
unsigned short tgt_ios;
void *private_data;
+ struct ublk_thread *t;
};
struct ublk_tgt_ops {
@@ -165,28 +175,39 @@ struct ublk_tgt {
struct ublk_queue {
int q_id;
int q_depth;
- unsigned int cmd_inflight;
- unsigned int io_inflight;
struct ublk_dev *dev;
const struct ublk_tgt_ops *tgt_ops;
struct ublksrv_io_desc *io_cmd_buf;
- struct io_uring ring;
+
struct ublk_io ios[UBLK_QUEUE_DEPTH];
-#define UBLKSRV_QUEUE_STOPPING (1U << 0)
-#define UBLKSRV_QUEUE_IDLE (1U << 1)
#define UBLKSRV_NO_BUF (1U << 2)
#define UBLKSRV_ZC (1U << 3)
#define UBLKSRV_AUTO_BUF_REG (1U << 4)
#define UBLKSRV_AUTO_BUF_REG_FALLBACK (1U << 5)
unsigned state;
- pid_t tid;
+};
+
+struct ublk_thread {
+ struct ublk_dev *dev;
+ struct io_uring ring;
+ unsigned int cmd_inflight;
+ unsigned int io_inflight;
+
pthread_t thread;
+ unsigned idx;
+
+#define UBLKSRV_THREAD_STOPPING (1U << 0)
+#define UBLKSRV_THREAD_IDLE (1U << 1)
+ unsigned state;
};
struct ublk_dev {
struct ublk_tgt tgt;
struct ublksrv_ctrl_dev_info dev_info;
struct ublk_queue q[UBLK_MAX_QUEUES];
+ struct ublk_thread threads[UBLK_MAX_THREADS];
+ unsigned nthreads;
+ unsigned per_io_tasks;
int fds[MAX_BACK_FILES + 1]; /* fds[0] points to /dev/ublkcN */
int nr_fds;
@@ -211,7 +232,7 @@ struct ublk_dev {
extern unsigned int ublk_dbg_mask;
-extern int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag);
+extern int ublk_queue_io_cmd(struct ublk_io *io);
static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod)
@@ -225,11 +246,14 @@ static inline int is_target_io(__u64 user_data)
}
static inline __u64 build_user_data(unsigned tag, unsigned op,
- unsigned tgt_data, unsigned is_target_io)
+ unsigned tgt_data, unsigned q_id, unsigned is_target_io)
{
- assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16));
+ /* we only have 7 bits to encode q_id */
+ _Static_assert(UBLK_MAX_QUEUES_SHIFT <= 7);
+ assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16) && !(q_id >> 7));
- return tag | (op << 16) | (tgt_data << 24) | (__u64)is_target_io << 63;
+ return tag | (op << 16) | (tgt_data << 24) |
+ (__u64)q_id << 56 | (__u64)is_target_io << 63;
}
static inline unsigned int user_data_to_tag(__u64 user_data)
@@ -247,6 +271,11 @@ static inline unsigned int user_data_to_tgt_data(__u64 user_data)
return (user_data >> 24) & 0xffff;
}
+static inline unsigned int user_data_to_q_id(__u64 user_data)
+{
+ return (user_data >> 56) & 0x7f;
+}
+
static inline unsigned short ublk_cmd_op_nr(unsigned int op)
{
return _IOC_NR(op);
@@ -280,17 +309,23 @@ static inline void ublk_dbg(int level, const char *fmt, ...)
}
}
-static inline int ublk_queue_alloc_sqes(struct ublk_queue *q,
+static inline struct ublk_queue *ublk_io_to_queue(const struct ublk_io *io)
+{
+ return container_of(io, struct ublk_queue, ios[io->tag]);
+}
+
+static inline int ublk_io_alloc_sqes(struct ublk_io *io,
struct io_uring_sqe *sqes[], int nr_sqes)
{
- unsigned left = io_uring_sq_space_left(&q->ring);
+ struct io_uring *ring = &io->t->ring;
+ unsigned left = io_uring_sq_space_left(ring);
int i;
if (left < nr_sqes)
- io_uring_submit(&q->ring);
+ io_uring_submit(ring);
for (i = 0; i < nr_sqes; i++) {
- sqes[i] = io_uring_get_sqe(&q->ring);
+ sqes[i] = io_uring_get_sqe(ring);
if (!sqes[i])
return i;
}
@@ -373,7 +408,7 @@ static inline int ublk_complete_io(struct ublk_queue *q, unsigned tag, int res)
ublk_mark_io_done(io, res);
- return ublk_queue_io_cmd(q, io, tag);
+ return ublk_queue_io_cmd(io);
}
static inline void ublk_queued_tgt_io(struct ublk_queue *q, unsigned tag, int queued)
@@ -383,7 +418,7 @@ static inline void ublk_queued_tgt_io(struct ublk_queue *q, unsigned tag, int qu
else {
struct ublk_io *io = ublk_get_io(q, tag);
- q->io_inflight += queued;
+ io->t->io_inflight += queued;
io->tgt_ios = queued;
io->result = 0;
}
@@ -393,7 +428,7 @@ static inline int ublk_completed_tgt_io(struct ublk_queue *q, unsigned tag)
{
struct ublk_io *io = ublk_get_io(q, tag);
- q->io_inflight--;
+ io->t->io_inflight--;
return --io->tgt_ios == 0;
}
diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c
index 44aca31cf2b0..afe0b99d77ee 100644
--- a/tools/testing/selftests/ublk/null.c
+++ b/tools/testing/selftests/ublk/null.c
@@ -43,7 +43,7 @@ static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
}
static void __setup_nop_io(int tag, const struct ublksrv_io_desc *iod,
- struct io_uring_sqe *sqe)
+ struct io_uring_sqe *sqe, int q_id)
{
unsigned ublk_op = ublksrv_get_op(iod);
@@ -52,7 +52,7 @@ static void __setup_nop_io(int tag, const struct ublksrv_io_desc *iod,
sqe->flags |= IOSQE_FIXED_FILE;
sqe->rw_flags = IORING_NOP_FIXED_BUFFER | IORING_NOP_INJECT_RESULT;
sqe->len = iod->nr_sectors << 9; /* injected result */
- sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
+ sqe->user_data = build_user_data(tag, ublk_op, 0, q_id, 1);
}
static int null_queue_zc_io(struct ublk_queue *q, int tag)
@@ -60,18 +60,18 @@ static int null_queue_zc_io(struct ublk_queue *q, int tag)
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
struct io_uring_sqe *sqe[3];
- ublk_queue_alloc_sqes(q, sqe, 3);
+ ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 3);
- io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, tag);
+ io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
sqe[0]->user_data = build_user_data(tag,
- ublk_cmd_op_nr(sqe[0]->cmd_op), 0, 1);
+ ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1);
sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
- __setup_nop_io(tag, iod, sqe[1]);
+ __setup_nop_io(tag, iod, sqe[1], q->q_id);
sqe[1]->flags |= IOSQE_IO_HARDLINK;
- io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, tag);
- sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, 1);
+ io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
+ sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1);
// buf register is marked as IOSQE_CQE_SKIP_SUCCESS
return 2;
@@ -82,8 +82,8 @@ static int null_queue_auto_zc_io(struct ublk_queue *q, int tag)
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
struct io_uring_sqe *sqe[1];
- ublk_queue_alloc_sqes(q, sqe, 1);
- __setup_nop_io(tag, iod, sqe[0]);
+ ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 1);
+ __setup_nop_io(tag, iod, sqe[0], q->q_id);
return 1;
}
@@ -136,7 +136,7 @@ static unsigned short ublk_null_buf_index(const struct ublk_queue *q, int tag)
{
if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK)
return (unsigned short)-1;
- return tag;
+ return q->ios[tag].buf_index;
}
const struct ublk_tgt_ops null_tgt_ops = {
diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c
index 404a143bf3d6..37d50bbf5f5e 100644
--- a/tools/testing/selftests/ublk/stripe.c
+++ b/tools/testing/selftests/ublk/stripe.c
@@ -138,13 +138,13 @@ static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_
io->private_data = s;
calculate_stripe_array(conf, iod, s, base);
- ublk_queue_alloc_sqes(q, sqe, s->nr + extra);
+ ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, s->nr + extra);
if (zc) {
- io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, tag);
+ io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, io->buf_index);
sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
sqe[0]->user_data = build_user_data(tag,
- ublk_cmd_op_nr(sqe[0]->cmd_op), 0, 1);
+ ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1);
}
for (i = zc; i < s->nr + extra - zc; i++) {
@@ -162,13 +162,14 @@ static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_
sqe[i]->flags |= IOSQE_IO_HARDLINK;
}
/* bit63 marks us as tgt io */
- sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i - zc, 1);
+ sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i - zc, q->q_id, 1);
}
if (zc) {
struct io_uring_sqe *unreg = sqe[s->nr + 1];
- io_uring_prep_buf_unregister(unreg, 0, tag, q->q_id, tag);
- unreg->user_data = build_user_data(tag, ublk_cmd_op_nr(unreg->cmd_op), 0, 1);
+ io_uring_prep_buf_unregister(unreg, 0, tag, q->q_id, io->buf_index);
+ unreg->user_data = build_user_data(
+ tag, ublk_cmd_op_nr(unreg->cmd_op), 0, q->q_id, 1);
}
/* register buffer is skip_success */
@@ -181,11 +182,11 @@ static int handle_flush(struct ublk_queue *q, const struct ublksrv_io_desc *iod,
struct io_uring_sqe *sqe[NR_STRIPE];
int i;
- ublk_queue_alloc_sqes(q, sqe, conf->nr_files);
+ ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, conf->nr_files);
for (i = 0; i < conf->nr_files; i++) {
io_uring_prep_fsync(sqe[i], i + 1, IORING_FSYNC_DATASYNC);
io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
- sqe[i]->user_data = build_user_data(tag, UBLK_IO_OP_FLUSH, 0, 1);
+ sqe[i]->user_data = build_user_data(tag, UBLK_IO_OP_FLUSH, 0, q->q_id, 1);
}
return conf->nr_files;
}
diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh
index 0145569ee7e9..8a4dbd09feb0 100755
--- a/tools/testing/selftests/ublk/test_common.sh
+++ b/tools/testing/selftests/ublk/test_common.sh
@@ -278,6 +278,11 @@ __run_io_and_remove()
fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \
--rw=randrw --norandommap --iodepth=256 --size="${size}" --numjobs="$(nproc)" \
--runtime=20 --time_based > /dev/null 2>&1 &
+ fio --name=batchjob --filename=/dev/ublkb"${dev_id}" --ioengine=io_uring \
+ --rw=randrw --norandommap --iodepth=256 --size="${size}" \
+ --numjobs="$(nproc)" --runtime=20 --time_based \
+ --iodepth_batch_submit=32 --iodepth_batch_complete_min=32 \
+ --force_async=7 > /dev/null 2>&1 &
sleep 2
if [ "${kill_server}" = "yes" ]; then
local state
diff --git a/tools/testing/selftests/ublk/test_generic_12.sh b/tools/testing/selftests/ublk/test_generic_12.sh
new file mode 100755
index 000000000000..7abbb00d251d
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_12.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_12"
+ERR_CODE=0
+
+if ! _have_program bpftrace; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "null" "do imbalanced load, it should be balanced over I/O threads"
+
+NTHREADS=6
+dev_id=$(_add_ublk_dev -t null -q 4 -d 16 --nthreads $NTHREADS --per_io_tasks)
+_check_add_dev $TID $?
+
+dev_t=$(_get_disk_dev_t "$dev_id")
+bpftrace trace/count_ios_per_tid.bt "$dev_t" > "$UBLK_TMP" 2>&1 &
+btrace_pid=$!
+sleep 2
+
+if ! kill -0 "$btrace_pid" > /dev/null 2>&1; then
+ _cleanup_test "null"
+ exit "$UBLK_SKIP_CODE"
+fi
+
+# do imbalanced I/O on the ublk device
+# pin to cpu 0 to prevent migration/only target one queue
+fio --name=write_seq \
+ --filename=/dev/ublkb"${dev_id}" \
+ --ioengine=libaio --iodepth=16 \
+ --rw=write \
+ --size=512M \
+ --direct=1 \
+ --bs=4k \
+ --cpus_allowed=0 > /dev/null 2>&1
+ERR_CODE=$?
+kill "$btrace_pid"
+wait
+
+# check that every task handles some I/O, even though all I/O was issued
+# from a single CPU. when ublk gets support for round-robin tag
+# allocation, this check can be strengthened to assert that every thread
+# handles the same number of I/Os
+NR_THREADS_THAT_HANDLED_IO=$(grep -c '@' ${UBLK_TMP})
+if [[ $NR_THREADS_THAT_HANDLED_IO -ne $NTHREADS ]]; then
+ echo "only $NR_THREADS_THAT_HANDLED_IO handled I/O! expected $NTHREADS"
+ cat "$UBLK_TMP"
+ ERR_CODE=255
+fi
+
+_cleanup_test "null"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_03.sh b/tools/testing/selftests/ublk/test_stress_03.sh
index 7d728ce50774..3ed4c9b2d8c0 100755
--- a/tools/testing/selftests/ublk/test_stress_03.sh
+++ b/tools/testing/selftests/ublk/test_stress_03.sh
@@ -32,14 +32,23 @@ _create_backfile 2 128M
ublk_io_and_remove 8G -t null -q 4 -z &
ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" &
ublk_io_and_remove 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
if _have_feature "AUTO_BUF_REG"; then
ublk_io_and_remove 8G -t null -q 4 --auto_zc &
ublk_io_and_remove 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" &
ublk_io_and_remove 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback &
+ wait
+fi
+
+if _have_feature "PER_IO_DAEMON"; then
+ ublk_io_and_remove 8G -t null -q 4 --auto_zc --nthreads 8 --per_io_tasks &
+ ublk_io_and_remove 256M -t loop -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" &
+ ublk_io_and_remove 256M -t stripe -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+ ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback --nthreads 8 --per_io_tasks &
+ wait
fi
-wait
_cleanup_test "stress"
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh
index 9bcfa64ea1f0..40d1437ca298 100755
--- a/tools/testing/selftests/ublk/test_stress_04.sh
+++ b/tools/testing/selftests/ublk/test_stress_04.sh
@@ -38,6 +38,13 @@ if _have_feature "AUTO_BUF_REG"; then
ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
ublk_io_and_kill_daemon 8G -t null -q 4 -z --auto_zc --auto_zc_fallback &
fi
+
+if _have_feature "PER_IO_DAEMON"; then
+ ublk_io_and_kill_daemon 8G -t null -q 4 --nthreads 8 --per_io_tasks &
+ ublk_io_and_kill_daemon 256M -t loop -q 4 --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" &
+ ublk_io_and_kill_daemon 256M -t stripe -q 4 --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+ ublk_io_and_kill_daemon 8G -t null -q 4 --nthreads 8 --per_io_tasks &
+fi
wait
_cleanup_test "stress"
diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh
index bcfc904cefc6..566cfd90d192 100755
--- a/tools/testing/selftests/ublk/test_stress_05.sh
+++ b/tools/testing/selftests/ublk/test_stress_05.sh
@@ -69,5 +69,12 @@ if _have_feature "AUTO_BUF_REG"; then
done
fi
+if _have_feature "PER_IO_DAEMON"; then
+ ublk_io_and_remove 8G -t null -q 4 --nthreads 8 --per_io_tasks -r 1 -i "$reissue" &
+ ublk_io_and_remove 256M -t loop -q 4 --nthreads 8 --per_io_tasks -r 1 -i "$reissue" "${UBLK_BACKFILES[0]}" &
+ ublk_io_and_remove 8G -t null -q 4 --nthreads 8 --per_io_tasks -r 1 -i "$reissue" &
+fi
+wait
+
_cleanup_test "stress"
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/trace/count_ios_per_tid.bt b/tools/testing/selftests/ublk/trace/count_ios_per_tid.bt
new file mode 100644
index 000000000000..f4aa63ff2938
--- /dev/null
+++ b/tools/testing/selftests/ublk/trace/count_ios_per_tid.bt
@@ -0,0 +1,11 @@
+/*
+ * Tabulates and prints I/O completions per thread for the given device
+ *
+ * $1: dev_t
+*/
+tracepoint:block:block_rq_complete
+{
+ if (args.dev == $1) {
+ @[tid] = count();
+ }
+}
diff --git a/tools/testing/selftests/vDSO/vgetrandom-chacha.S b/tools/testing/selftests/vDSO/vgetrandom-chacha.S
index d6e09af7c0a9..a4a82e1c28a9 100644
--- a/tools/testing/selftests/vDSO/vgetrandom-chacha.S
+++ b/tools/testing/selftests/vDSO/vgetrandom-chacha.S
@@ -11,6 +11,8 @@
#include "../../../../arch/loongarch/vdso/vgetrandom-chacha.S"
#elif defined(__powerpc__) || defined(__powerpc64__)
#include "../../../../arch/powerpc/kernel/vdso/vgetrandom-chacha.S"
+#elif defined(__riscv) && __riscv_xlen == 64
+#include "../../../../arch/riscv/kernel/vdso/vgetrandom-chacha.S"
#elif defined(__s390x__)
#include "../../../../arch/s390/kernel/vdso64/vgetrandom-chacha.S"
#elif defined(__x86_64__)
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index f703fcfe9f7c..83148875a12c 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -12,7 +12,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie)
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
check_initial_reg_state sigreturn iopl ioperm \
- test_vsyscall mov_ss_trap \
+ test_vsyscall mov_ss_trap sigtrap_loop \
syscall_arg_fault fsgsbase_restore sigaltstack
TARGETS_C_BOTHBITS += nx_stack
TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
diff --git a/tools/testing/selftests/x86/sigtrap_loop.c b/tools/testing/selftests/x86/sigtrap_loop.c
new file mode 100644
index 000000000000..9d065479e89f
--- /dev/null
+++ b/tools/testing/selftests/x86/sigtrap_loop.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025 Intel Corporation
+ */
+#define _GNU_SOURCE
+
+#include <err.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ucontext.h>
+
+#ifdef __x86_64__
+# define REG_IP REG_RIP
+#else
+# define REG_IP REG_EIP
+#endif
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+
+ return;
+}
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t *)ctx_void;
+ static unsigned int loop_count_on_same_ip;
+ static unsigned long last_trap_ip;
+
+ if (last_trap_ip == ctx->uc_mcontext.gregs[REG_IP]) {
+ printf("\tTrapped at %016lx\n", last_trap_ip);
+
+ /*
+ * If the same IP is hit more than 10 times in a row, it is
+ * _considered_ an infinite loop.
+ */
+ if (++loop_count_on_same_ip > 10) {
+ printf("[FAIL]\tDetected SIGTRAP infinite loop\n");
+ exit(1);
+ }
+
+ return;
+ }
+
+ loop_count_on_same_ip = 0;
+ last_trap_ip = ctx->uc_mcontext.gregs[REG_IP];
+ printf("\tTrapped at %016lx\n", last_trap_ip);
+}
+
+int main(int argc, char *argv[])
+{
+ sethandler(SIGTRAP, sigtrap, 0);
+
+ /*
+ * Set the Trap Flag (TF) to single-step the test code, therefore to
+ * trigger a SIGTRAP signal after each instruction until the TF is
+ * cleared.
+ *
+ * Because the arithmetic flags are not significant here, the TF is
+ * set by pushing 0x302 onto the stack and then popping it into the
+ * flags register.
+ *
+ * Four instructions in the following asm code are executed with the
+ * TF set, thus the SIGTRAP handler is expected to run four times.
+ */
+ printf("[RUN]\tSIGTRAP infinite loop detection\n");
+ asm volatile(
+#ifdef __x86_64__
+ /*
+ * Avoid clobbering the redzone
+ *
+ * Equivalent to "sub $128, %rsp", however -128 can be encoded
+ * in a single byte immediate while 128 uses 4 bytes.
+ */
+ "add $-128, %rsp\n\t"
+#endif
+ "push $0x302\n\t"
+ "popf\n\t"
+ "nop\n\t"
+ "nop\n\t"
+ "push $0x202\n\t"
+ "popf\n\t"
+#ifdef __x86_64__
+ "sub $-128, %rsp\n\t"
+#endif
+ );
+
+ printf("[OK]\tNo SIGTRAP infinite loop detected\n");
+ return 0;
+}