diff options
Diffstat (limited to 'tools')
63 files changed, 3671 insertions, 602 deletions
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index b7dded3c8113..5cfb5d74dd5f 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -628,6 +628,7 @@ #define MSR_AMD64_OSVW_STATUS 0xc0010141 #define MSR_AMD_PPIN_CTL 0xc00102f0 #define MSR_AMD_PPIN 0xc00102f1 +#define MSR_AMD64_CPUID_FN_7 0xc0011002 #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 diff --git a/tools/include/linux/kallsyms.h b/tools/include/linux/kallsyms.h index 5a37ccbec54f..f61a01dd7eb7 100644 --- a/tools/include/linux/kallsyms.h +++ b/tools/include/linux/kallsyms.h @@ -18,6 +18,7 @@ static inline const char *kallsyms_lookup(unsigned long addr, return NULL; } +#ifdef HAVE_BACKTRACE_SUPPORT #include <execinfo.h> #include <stdlib.h> static inline void print_ip_sym(const char *loglvl, unsigned long ip) @@ -30,5 +31,8 @@ static inline void print_ip_sym(const char *loglvl, unsigned long ip) free(name); } +#else +static inline void print_ip_sym(const char *loglvl, unsigned long ip) {} +#endif #endif diff --git a/tools/include/uapi/linux/bits.h b/tools/include/uapi/linux/bits.h index 682b406e1067..a04afef9efca 100644 --- a/tools/include/uapi/linux/bits.h +++ b/tools/include/uapi/linux/bits.h @@ -4,9 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) diff --git a/tools/include/uapi/linux/coredump.h b/tools/include/uapi/linux/coredump.h new file mode 100644 index 000000000000..dc3789b78af0 --- /dev/null +++ b/tools/include/uapi/linux/coredump.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_LINUX_COREDUMP_H +#define _UAPI_LINUX_COREDUMP_H + +#include <linux/types.h> + +/** + * coredump_{req,ack} flags + * @COREDUMP_KERNEL: kernel writes coredump + * @COREDUMP_USERSPACE: userspace writes coredump + * @COREDUMP_REJECT: don't generate coredump + * @COREDUMP_WAIT: wait for coredump server + */ +enum { + COREDUMP_KERNEL = (1ULL << 0), + COREDUMP_USERSPACE = (1ULL << 1), + COREDUMP_REJECT = (1ULL << 2), + COREDUMP_WAIT = (1ULL << 3), +}; + +/** + * struct coredump_req - message kernel sends to userspace + * @size: size of struct coredump_req + * @size_ack: known size of struct coredump_ack on this kernel + * @mask: supported features + * + * When a coredump happens the kernel will connect to the coredump + * socket and send a coredump request to the coredump server. The @size + * member is set to the size of struct coredump_req and provides a hint + * to userspace how much data can be read. Userspace may use MSG_PEEK to + * peek the size of struct coredump_req and then choose to consume it in + * one go. Userspace may also simply read a COREDUMP_ACK_SIZE_VER0 + * request. If the size the kernel sends is larger userspace simply + * discards any remaining data. + * + * The coredump_req->mask member is set to the currently know features. + * Userspace may only set coredump_ack->mask to the bits raised by the + * kernel in coredump_req->mask. + * + * The coredump_req->size_ack member is set by the kernel to the size of + * struct coredump_ack the kernel knows. Userspace may only send up to + * coredump_req->size_ack bytes to the kernel and must set + * coredump_ack->size accordingly. + */ +struct coredump_req { + __u32 size; + __u32 size_ack; + __u64 mask; +}; + +enum { + COREDUMP_REQ_SIZE_VER0 = 16U, /* size of first published struct */ +}; + +/** + * struct coredump_ack - message userspace sends to kernel + * @size: size of the struct + * @spare: unused + * @mask: features kernel is supposed to use + * + * The @size member must be set to the size of struct coredump_ack. It + * may never exceed what the kernel returned in coredump_req->size_ack + * but it may of course be smaller (>= COREDUMP_ACK_SIZE_VER0 and <= + * coredump_req->size_ack). + * + * The @mask member must be set to the features the coredump server + * wants the kernel to use. Only bits the kernel returned in + * coredump_req->mask may be set. + */ +struct coredump_ack { + __u32 size; + __u32 spare; + __u64 mask; +}; + +enum { + COREDUMP_ACK_SIZE_VER0 = 16U, /* size of first published struct */ +}; + +/** + * enum coredump_mark - Markers for the coredump socket + * + * The kernel will place a single byte on the coredump socket. The + * markers notify userspace whether the coredump ack succeeded or + * failed. + * + * @COREDUMP_MARK_MINSIZE: the provided coredump_ack size was too small + * @COREDUMP_MARK_MAXSIZE: the provided coredump_ack size was too big + * @COREDUMP_MARK_UNSUPPORTED: the provided coredump_ack mask was invalid + * @COREDUMP_MARK_CONFLICTING: the provided coredump_ack mask has conflicting options + * @COREDUMP_MARK_REQACK: the coredump request and ack was successful + * @__COREDUMP_MARK_MAX: the maximum coredump mark value + */ +enum coredump_mark { + COREDUMP_MARK_REQACK = 0U, + COREDUMP_MARK_MINSIZE = 1U, + COREDUMP_MARK_MAXSIZE = 2U, + COREDUMP_MARK_UNSUPPORTED = 3U, + COREDUMP_MARK_CONFLICTING = 4U, + __COREDUMP_MARK_MAX = (1U << 31), +}; + +#endif /* _UAPI_LINUX_COREDUMP_H */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 52e353368f58..d41ee26b9443 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -735,7 +735,7 @@ struct bpf_object { struct usdt_manager *usdt_man; - struct bpf_map *arena_map; + int arena_map_idx; void *arena_data; size_t arena_data_sz; @@ -1517,6 +1517,7 @@ static struct bpf_object *bpf_object__new(const char *path, obj->efile.obj_buf_sz = obj_buf_sz; obj->efile.btf_maps_shndx = -1; obj->kconfig_map_idx = -1; + obj->arena_map_idx = -1; obj->kern_version = get_kernel_version(); obj->state = OBJ_OPEN; @@ -2964,7 +2965,7 @@ static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map, const long page_sz = sysconf(_SC_PAGE_SIZE); size_t mmap_sz; - mmap_sz = bpf_map_mmap_sz(obj->arena_map); + mmap_sz = bpf_map_mmap_sz(map); if (roundup(data_sz, page_sz) > mmap_sz) { pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n", sec_name, mmap_sz, data_sz); @@ -3038,12 +3039,12 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, if (map->def.type != BPF_MAP_TYPE_ARENA) continue; - if (obj->arena_map) { + if (obj->arena_map_idx >= 0) { pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n", - map->name, obj->arena_map->name); + map->name, obj->maps[obj->arena_map_idx].name); return -EINVAL; } - obj->arena_map = map; + obj->arena_map_idx = i; if (obj->efile.arena_data) { err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx, @@ -3053,7 +3054,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, return err; } } - if (obj->efile.arena_data && !obj->arena_map) { + if (obj->efile.arena_data && obj->arena_map_idx < 0) { pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n", ARENA_SEC); return -ENOENT; @@ -4583,8 +4584,13 @@ static int bpf_program__record_reloc(struct bpf_program *prog, if (shdr_idx == obj->efile.arena_data_shndx) { reloc_desc->type = RELO_DATA; reloc_desc->insn_idx = insn_idx; - reloc_desc->map_idx = obj->arena_map - obj->maps; + reloc_desc->map_idx = obj->arena_map_idx; reloc_desc->sym_off = sym->st_value; + + map = &obj->maps[obj->arena_map_idx]; + pr_debug("prog '%s': found arena map %d (%s, sec %d, off %zu) for insn %u\n", + prog->name, obj->arena_map_idx, map->name, map->sec_idx, + map->sec_offset, insn_idx); return 0; } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index d967ac001498..d14f20ef1db1 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -224,6 +224,7 @@ static bool is_rust_noreturn(const struct symbol *func) str_ends_with(func->name, "_4core9panicking14panic_explicit") || str_ends_with(func->name, "_4core9panicking14panic_nounwind") || str_ends_with(func->name, "_4core9panicking18panic_bounds_check") || + str_ends_with(func->name, "_4core9panicking18panic_nounwind_fmt") || str_ends_with(func->name, "_4core9panicking19assert_failed_inner") || str_ends_with(func->name, "_4core9panicking30panic_null_pointer_dereference") || str_ends_with(func->name, "_4core9panicking36panic_misaligned_pointer_dereference") || @@ -1192,8 +1193,8 @@ static const char *uaccess_safe_builtin[] = { "__ubsan_handle_type_mismatch_v1", "__ubsan_handle_shift_out_of_bounds", "__ubsan_handle_load_invalid_value", - /* STACKLEAK */ - "stackleak_track_stack", + /* KSTACK_ERASE */ + "__sanitizer_cov_stack_depth", /* TRACE_BRANCH_PROFILING */ "ftrace_likely_update", /* STACKPROTECTOR */ diff --git a/tools/power/cpupower/bindings/python/Makefile b/tools/power/cpupower/bindings/python/Makefile index 81db39a03efb..4527cd732b42 100644 --- a/tools/power/cpupower/bindings/python/Makefile +++ b/tools/power/cpupower/bindings/python/Makefile @@ -4,20 +4,22 @@ # This Makefile expects you have already run `make install-lib` in the lib # directory for the bindings to be created. -CC := gcc +CC ?= gcc +# CFLAGS ?= +LDFLAGS ?= -lcpupower HAVE_SWIG := $(shell if which swig >/dev/null 2>&1; then echo 1; else echo 0; fi) HAVE_PYCONFIG := $(shell if which python-config >/dev/null 2>&1; then echo 1; else echo 0; fi) -PY_INCLUDE = $(firstword $(shell python-config --includes)) -INSTALL_DIR = $(shell python3 -c "import site; print(site.getsitepackages()[0])") +PY_INCLUDE ?= $(firstword $(shell python-config --includes)) +INSTALL_DIR ?= $(shell python3 -c "import site; print(site.getsitepackages()[0])") all: _raw_pylibcpupower.so _raw_pylibcpupower.so: raw_pylibcpupower_wrap.o - $(CC) -shared -lcpupower raw_pylibcpupower_wrap.o -o _raw_pylibcpupower.so + $(CC) -shared $(LDFLAGS) raw_pylibcpupower_wrap.o -o _raw_pylibcpupower.so raw_pylibcpupower_wrap.o: raw_pylibcpupower_wrap.c - $(CC) -fPIC -c raw_pylibcpupower_wrap.c $(PY_INCLUDE) + $(CC) $(CFLAGS) $(PY_INCLUDE) -fPIC -c raw_pylibcpupower_wrap.c raw_pylibcpupower_wrap.c: raw_pylibcpupower.swg ifeq ($(HAVE_SWIG),0) diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index ad493157f826..e8b3841d5c0f 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -121,10 +121,8 @@ void print_header(int topology_depth) switch (topology_depth) { case TOPOLOGY_DEPTH_PKG: printf(" PKG|"); - break; case TOPOLOGY_DEPTH_CORE: printf("CORE|"); - break; case TOPOLOGY_DEPTH_CPU: printf(" CPU|"); break; @@ -167,10 +165,8 @@ void print_results(int topology_depth, int cpu) switch (topology_depth) { case TOPOLOGY_DEPTH_PKG: printf("%4d|", cpu_top.core_info[cpu].pkg); - break; case TOPOLOGY_DEPTH_CORE: printf("%4d|", cpu_top.core_info[cpu].core); - break; case TOPOLOGY_DEPTH_CPU: printf("%4d|", cpu_top.core_info[cpu].cpu); break; diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c index 73b6b10cbdd2..5ae02c3d5b64 100644 --- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c @@ -240,9 +240,9 @@ static int mperf_stop(void) int cpu; for (cpu = 0; cpu < cpu_count; cpu++) { - mperf_measure_stats(cpu); - mperf_get_tsc(&tsc_at_measure_end[cpu]); clock_gettime(CLOCK_REALTIME, &time_end[cpu]); + mperf_get_tsc(&tsc_at_measure_end[cpu]); + mperf_measure_stats(cpu); } return 0; diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 0f1d91f57ba3..d533481672b7 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -1828,27 +1828,10 @@ static ssize_t fw_buf_checksum_show(struct device *dev, { struct cxl_mockmem_data *mdata = dev_get_drvdata(dev); u8 hash[SHA256_DIGEST_SIZE]; - unsigned char *hstr, *hptr; - struct sha256_state sctx; - ssize_t written = 0; - int i; - - sha256_init(&sctx); - sha256_update(&sctx, mdata->fw, mdata->fw_size); - sha256_final(&sctx, hash); - - hstr = kzalloc((SHA256_DIGEST_SIZE * 2) + 1, GFP_KERNEL); - if (!hstr) - return -ENOMEM; - - hptr = hstr; - for (i = 0; i < SHA256_DIGEST_SIZE; i++) - hptr += sprintf(hptr, "%02x", hash[i]); - written = sysfs_emit(buf, "%s\n", hstr); + sha256(mdata->fw, mdata->fw_size, hash); - kfree(hstr); - return written; + return sysfs_emit(buf, "%*phN\n", SHA256_DIGEST_SIZE, hash); } static DEVICE_ATTR_RO(fw_buf_checksum); diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index 5e512a1d09d1..da7e230f2781 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -596,4 +596,7 @@ extern int bpf_iter_dmabuf_new(struct bpf_iter_dmabuf *it) __weak __ksym; extern struct dma_buf *bpf_iter_dmabuf_next(struct bpf_iter_dmabuf *it) __weak __ksym; extern void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it) __weak __ksym; +extern int bpf_cgroup_read_xattr(struct cgroup *cgroup, const char *name__str, + struct bpf_dynptr *value_p) __weak __ksym; + #endif diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c new file mode 100644 index 000000000000..87978a0f7eb7 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <errno.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <string.h> +#include <unistd.h> +#include <sys/socket.h> +#include <sys/xattr.h> + +#include <test_progs.h> + +#include "read_cgroupfs_xattr.skel.h" +#include "cgroup_read_xattr.skel.h" + +#define CGROUP_FS_ROOT "/sys/fs/cgroup/" +#define CGROUP_FS_PARENT CGROUP_FS_ROOT "foo/" +#define CGROUP_FS_CHILD CGROUP_FS_PARENT "bar/" + +static int move_pid_to_cgroup(const char *cgroup_folder, pid_t pid) +{ + char filename[128]; + char pid_str[64]; + int procs_fd; + int ret; + + snprintf(filename, sizeof(filename), "%scgroup.procs", cgroup_folder); + snprintf(pid_str, sizeof(pid_str), "%d", pid); + + procs_fd = open(filename, O_WRONLY | O_APPEND); + if (!ASSERT_OK_FD(procs_fd, "open")) + return -1; + + ret = write(procs_fd, pid_str, strlen(pid_str)); + close(procs_fd); + if (!ASSERT_GT(ret, 0, "write cgroup.procs")) + return -1; + return 0; +} + +static void reset_cgroups_and_lo(void) +{ + rmdir(CGROUP_FS_CHILD); + rmdir(CGROUP_FS_PARENT); + system("ip addr del 1.1.1.1/32 dev lo"); + system("ip link set dev lo down"); +} + +static const char xattr_value_a[] = "bpf_selftest_value_a"; +static const char xattr_value_b[] = "bpf_selftest_value_b"; +static const char xattr_name[] = "user.bpf_test"; + +static int setup_cgroups_and_lo(void) +{ + int err; + + err = mkdir(CGROUP_FS_PARENT, 0755); + if (!ASSERT_OK(err, "mkdir 1")) + goto error; + err = mkdir(CGROUP_FS_CHILD, 0755); + if (!ASSERT_OK(err, "mkdir 2")) + goto error; + + err = setxattr(CGROUP_FS_PARENT, xattr_name, xattr_value_a, + strlen(xattr_value_a) + 1, 0); + if (!ASSERT_OK(err, "setxattr 1")) + goto error; + + err = setxattr(CGROUP_FS_CHILD, xattr_name, xattr_value_b, + strlen(xattr_value_b) + 1, 0); + if (!ASSERT_OK(err, "setxattr 2")) + goto error; + + err = system("ip link set dev lo up"); + if (!ASSERT_OK(err, "lo up")) + goto error; + + err = system("ip addr add 1.1.1.1 dev lo"); + if (!ASSERT_OK(err, "lo addr v4")) + goto error; + + err = write_sysctl("/proc/sys/net/ipv4/ping_group_range", "0 0"); + if (!ASSERT_OK(err, "write_sysctl")) + goto error; + + return 0; +error: + reset_cgroups_and_lo(); + return err; +} + +static void test_read_cgroup_xattr(void) +{ + struct sockaddr_in sa4 = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + }; + struct read_cgroupfs_xattr *skel = NULL; + pid_t pid = gettid(); + int sock_fd = -1; + int connect_fd = -1; + + if (!ASSERT_OK(setup_cgroups_and_lo(), "setup_cgroups_and_lo")) + return; + if (!ASSERT_OK(move_pid_to_cgroup(CGROUP_FS_CHILD, pid), + "move_pid_to_cgroup")) + goto out; + + skel = read_cgroupfs_xattr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "read_cgroupfs_xattr__open_and_load")) + goto out; + + skel->bss->target_pid = pid; + + if (!ASSERT_OK(read_cgroupfs_xattr__attach(skel), "read_cgroupfs_xattr__attach")) + goto out; + + sock_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP); + if (!ASSERT_OK_FD(sock_fd, "sock create")) + goto out; + + connect_fd = connect(sock_fd, &sa4, sizeof(sa4)); + if (!ASSERT_OK_FD(connect_fd, "connect 1")) + goto out; + close(connect_fd); + + ASSERT_TRUE(skel->bss->found_value_a, "found_value_a"); + ASSERT_TRUE(skel->bss->found_value_b, "found_value_b"); + +out: + close(connect_fd); + close(sock_fd); + read_cgroupfs_xattr__destroy(skel); + move_pid_to_cgroup(CGROUP_FS_ROOT, pid); + reset_cgroups_and_lo(); +} + +void test_cgroup_xattr(void) +{ + RUN_TESTS(cgroup_read_xattr); + + if (test__start_subtest("read_cgroupfs_xattr")) + test_read_cgroup_xattr(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c index 8100509e561b..0ffa01d54ce2 100644 --- a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c @@ -149,3 +149,70 @@ close_prog: fentry_recursive_target__destroy(target_skel); fentry_recursive__destroy(tracing_skel); } + +static void *fentry_target_test_run(void *arg) +{ + for (;;) { + int prog_fd = __atomic_load_n((int *)arg, __ATOMIC_SEQ_CST); + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err; + + if (prog_fd == -1) + break; + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "fentry_target test_run")) + break; + } + + return NULL; +} + +void test_fentry_attach_stress(void) +{ + struct fentry_recursive_target *target_skel = NULL; + struct fentry_recursive *tracing_skel = NULL; + struct bpf_program *prog; + int err, i, tgt_prog_fd; + pthread_t thread; + + target_skel = fentry_recursive_target__open_and_load(); + if (!ASSERT_OK_PTR(target_skel, + "fentry_recursive_target__open_and_load")) + goto close_prog; + tgt_prog_fd = bpf_program__fd(target_skel->progs.fentry_target); + err = pthread_create(&thread, NULL, + fentry_target_test_run, &tgt_prog_fd); + if (!ASSERT_OK(err, "bpf_program__set_attach_target")) + goto close_prog; + + for (i = 0; i < 1000; i++) { + tracing_skel = fentry_recursive__open(); + if (!ASSERT_OK_PTR(tracing_skel, "fentry_recursive__open")) + goto stop_thread; + + prog = tracing_skel->progs.recursive_attach; + err = bpf_program__set_attach_target(prog, tgt_prog_fd, + "fentry_target"); + if (!ASSERT_OK(err, "bpf_program__set_attach_target")) + goto stop_thread; + + err = fentry_recursive__load(tracing_skel); + if (!ASSERT_OK(err, "fentry_recursive__load")) + goto stop_thread; + + err = fentry_recursive__attach(tracing_skel); + if (!ASSERT_OK(err, "fentry_recursive__attach")) + goto stop_thread; + + fentry_recursive__destroy(tracing_skel); + tracing_skel = NULL; + } + +stop_thread: + __atomic_store_n(&tgt_prog_fd, -1, __ATOMIC_SEQ_CST); + err = pthread_join(thread, NULL); + ASSERT_OK(err, "pthread_join"); +close_prog: + fentry_recursive__destroy(tracing_skel); + fentry_recursive_target__destroy(target_skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c index 4be6fdb78c6a..594441acb707 100644 --- a/tools/testing/selftests/bpf/prog_tests/snprintf.c +++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c @@ -116,6 +116,8 @@ static void test_snprintf_negative(void) ASSERT_ERR(load_single_snprintf("%llc"), "invalid specifier 7"); ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character"); ASSERT_ERR(load_single_snprintf("\x1"), "non printable character"); + ASSERT_ERR(load_single_snprintf("%p%"), "invalid specifier 8"); + ASSERT_ERR(load_single_snprintf("%s%"), "invalid specifier 9"); } void test_snprintf(void) diff --git a/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c new file mode 100644 index 000000000000..092db1d0435e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_experimental.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +char value[16]; + +static __always_inline void read_xattr(struct cgroup *cgroup) +{ + struct bpf_dynptr value_ptr; + + bpf_dynptr_from_mem(value, sizeof(value), 0, &value_ptr); + bpf_cgroup_read_xattr(cgroup, "user.bpf_test", + &value_ptr); +} + +SEC("lsm.s/socket_connect") +__success +int BPF_PROG(trusted_cgroup_ptr_sleepable) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup *cgrp; + + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + return 0; + + read_xattr(cgrp); + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("lsm/socket_connect") +__success +int BPF_PROG(trusted_cgroup_ptr_non_sleepable) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup *cgrp; + + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + return 0; + + read_xattr(cgrp); + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("lsm/socket_connect") +__success +int BPF_PROG(use_css_iter_non_sleepable) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup_subsys_state *css; + struct cgroup *cgrp; + + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + return 0; + + bpf_for_each(css, css, &cgrp->self, BPF_CGROUP_ITER_ANCESTORS_UP) + read_xattr(css->cgroup); + + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("lsm.s/socket_connect") +__failure __msg("expected an RCU CS") +int BPF_PROG(use_css_iter_sleepable_missing_rcu_lock) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup_subsys_state *css; + struct cgroup *cgrp; + + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + return 0; + + bpf_for_each(css, css, &cgrp->self, BPF_CGROUP_ITER_ANCESTORS_UP) + read_xattr(css->cgroup); + + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("lsm.s/socket_connect") +__success +int BPF_PROG(use_css_iter_sleepable_with_rcu_lock) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup_subsys_state *css; + struct cgroup *cgrp; + + bpf_rcu_read_lock(); + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + goto out; + + bpf_for_each(css, css, &cgrp->self, BPF_CGROUP_ITER_ANCESTORS_UP) + read_xattr(css->cgroup); + + bpf_cgroup_release(cgrp); +out: + bpf_rcu_read_unlock(); + return 0; +} + +SEC("lsm/socket_connect") +__success +int BPF_PROG(use_bpf_cgroup_ancestor) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup *cgrp, *ancestor; + + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + return 0; + + ancestor = bpf_cgroup_ancestor(cgrp, 1); + if (!ancestor) + goto out; + + read_xattr(cgrp); + bpf_cgroup_release(ancestor); +out: + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("cgroup/sendmsg4") +__success +int BPF_PROG(cgroup_skb) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup *cgrp, *ancestor; + + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + return 0; + + ancestor = bpf_cgroup_ancestor(cgrp, 1); + if (!ancestor) + goto out; + + read_xattr(cgrp); + bpf_cgroup_release(ancestor); +out: + bpf_cgroup_release(cgrp); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c b/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c new file mode 100644 index 000000000000..855f85fc5522 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_experimental.h" + +char _license[] SEC("license") = "GPL"; + +pid_t target_pid = 0; + +char xattr_value[64]; +static const char expected_value_a[] = "bpf_selftest_value_a"; +static const char expected_value_b[] = "bpf_selftest_value_b"; +bool found_value_a; +bool found_value_b; + +SEC("lsm.s/socket_connect") +int BPF_PROG(test_socket_connect) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup_subsys_state *css, *tmp; + struct bpf_dynptr value_ptr; + struct cgroup *cgrp; + + if ((bpf_get_current_pid_tgid() >> 32) != target_pid) + return 0; + + bpf_rcu_read_lock(); + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) { + bpf_rcu_read_unlock(); + return 0; + } + + css = &cgrp->self; + bpf_dynptr_from_mem(xattr_value, sizeof(xattr_value), 0, &value_ptr); + bpf_for_each(css, tmp, css, BPF_CGROUP_ITER_ANCESTORS_UP) { + int ret; + + ret = bpf_cgroup_read_xattr(tmp->cgroup, "user.bpf_test", + &value_ptr); + if (ret < 0) + continue; + + if (ret == sizeof(expected_value_a) && + !bpf_strncmp(xattr_value, sizeof(expected_value_a), expected_value_a)) + found_value_a = true; + if (ret == sizeof(expected_value_b) && + !bpf_strncmp(xattr_value, sizeof(expected_value_b), expected_value_b)) + found_value_b = true; + } + + bpf_rcu_read_unlock(); + bpf_cgroup_release(cgrp); + + return 0; +} diff --git a/tools/testing/selftests/coredump/Makefile b/tools/testing/selftests/coredump/Makefile index ed210037b29d..77b3665c73c7 100644 --- a/tools/testing/selftests/coredump/Makefile +++ b/tools/testing/selftests/coredump/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS = $(KHDR_INCLUDES) +CFLAGS += -Wall -O0 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) TEST_GEN_PROGS := stackdump_test TEST_FILES := stackdump diff --git a/tools/testing/selftests/coredump/config b/tools/testing/selftests/coredump/config new file mode 100644 index 000000000000..a05ef112b4f9 --- /dev/null +++ b/tools/testing/selftests/coredump/config @@ -0,0 +1,3 @@ +CONFIG_COREDUMP=y +CONFIG_NET=y +CONFIG_UNIX=y diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c index 68f8e479ac36..5a5a7a5f7e1d 100644 --- a/tools/testing/selftests/coredump/stackdump_test.c +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -1,12 +1,18 @@ // SPDX-License-Identifier: GPL-2.0 +#include <assert.h> #include <fcntl.h> #include <inttypes.h> #include <libgen.h> +#include <limits.h> +#include <linux/coredump.h> +#include <linux/fs.h> #include <linux/limits.h> #include <pthread.h> #include <string.h> #include <sys/mount.h> +#include <poll.h> +#include <sys/epoll.h> #include <sys/resource.h> #include <sys/stat.h> #include <sys/socket.h> @@ -14,16 +20,23 @@ #include <unistd.h> #include "../kselftest_harness.h" +#include "../filesystems/wrappers.h" #include "../pidfd/pidfd.h" #define STACKDUMP_FILE "stack_values" #define STACKDUMP_SCRIPT "stackdump" #define NUM_THREAD_SPAWN 128 +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + static void *do_nothing(void *) { while (1) pause(); + + return NULL; } static void crashing_child(void) @@ -42,16 +55,32 @@ FIXTURE(coredump) { char original_core_pattern[256]; pid_t pid_coredump_server; + int fd_tmpfs_detached; }; +static int create_detached_tmpfs(void) +{ + int fd_context, fd_tmpfs; + + fd_context = sys_fsopen("tmpfs", 0); + if (fd_context < 0) + return -1; + + if (sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0) + return -1; + + fd_tmpfs = sys_fsmount(fd_context, 0, 0); + close(fd_context); + return fd_tmpfs; +} + FIXTURE_SETUP(coredump) { - char buf[PATH_MAX]; FILE *file; - char *dir; int ret; self->pid_coredump_server = -ESRCH; + self->fd_tmpfs_detached = -1; file = fopen("/proc/sys/kernel/core_pattern", "r"); ASSERT_NE(NULL, file); @@ -60,6 +89,8 @@ FIXTURE_SETUP(coredump) ASSERT_LT(ret, sizeof(self->original_core_pattern)); self->original_core_pattern[ret] = '\0'; + self->fd_tmpfs_detached = create_detached_tmpfs(); + ASSERT_GE(self->fd_tmpfs_detached, 0); ret = fclose(file); ASSERT_EQ(0, ret); @@ -98,6 +129,15 @@ FIXTURE_TEARDOWN(coredump) goto fail; } + if (self->fd_tmpfs_detached >= 0) { + ret = close(self->fd_tmpfs_detached); + if (ret < 0) { + reason = "Unable to close detached tmpfs"; + goto fail; + } + self->fd_tmpfs_detached = -1; + } + return; fail: /* This should never happen */ @@ -106,11 +146,10 @@ fail: TEST_F_TIMEOUT(coredump, stackdump, 120) { - struct sigaction action = {}; unsigned long long stack; char *test_dir, *line; size_t line_length; - char buf[PATH_MAX]; + char buf[PAGE_SIZE]; int ret, i, status; FILE *file; pid_t pid; @@ -169,153 +208,166 @@ TEST_F_TIMEOUT(coredump, stackdump, 120) fclose(file); } +static int create_and_listen_unix_socket(const char *path) +{ + struct sockaddr_un addr = { + .sun_family = AF_UNIX, + }; + assert(strlen(path) < sizeof(addr.sun_path) - 1); + strncpy(addr.sun_path, path, sizeof(addr.sun_path) - 1); + size_t addr_len = + offsetof(struct sockaddr_un, sun_path) + strlen(path) + 1; + int fd, ret; + + fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + if (fd < 0) + goto out; + + ret = bind(fd, (const struct sockaddr *)&addr, addr_len); + if (ret < 0) + goto out; + + ret = listen(fd, 128); + if (ret < 0) + goto out; + + return fd; + +out: + if (fd >= 0) + close(fd); + return -1; +} + +static bool set_core_pattern(const char *pattern) +{ + int fd; + ssize_t ret; + + fd = open("/proc/sys/kernel/core_pattern", O_WRONLY | O_CLOEXEC); + if (fd < 0) + return false; + + ret = write(fd, pattern, strlen(pattern)); + close(fd); + if (ret < 0) + return false; + + fprintf(stderr, "Set core_pattern to '%s' | %zu == %zu\n", pattern, ret, strlen(pattern)); + return ret == strlen(pattern); +} + +static int get_peer_pidfd(int fd) +{ + int fd_peer_pidfd; + socklen_t fd_peer_pidfd_len = sizeof(fd_peer_pidfd); + int ret = getsockopt(fd, SOL_SOCKET, SO_PEERPIDFD, &fd_peer_pidfd, + &fd_peer_pidfd_len); + if (ret < 0) { + fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n"); + return -1; + } + return fd_peer_pidfd; +} + +static bool get_pidfd_info(int fd_peer_pidfd, struct pidfd_info *info) +{ + memset(info, 0, sizeof(*info)); + info->mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + return ioctl(fd_peer_pidfd, PIDFD_GET_INFO, info) == 0; +} + +static void +wait_and_check_coredump_server(pid_t pid_coredump_server, + struct __test_metadata *const _metadata, + FIXTURE_DATA(coredump)* self) +{ + int status; + waitpid(pid_coredump_server, &status, 0); + self->pid_coredump_server = -ESRCH; + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); +} + TEST_F(coredump, socket) { - int fd, pidfd, ret, status; - FILE *file; + int pidfd, ret, status; pid_t pid, pid_coredump_server; struct stat st; - char core_file[PATH_MAX]; struct pidfd_info info = {}; int ipc_sockets[2]; char c; - const struct sockaddr_un coredump_sk = { - .sun_family = AF_UNIX, - .sun_path = "/tmp/coredump.socket", - }; - size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) + - sizeof("/tmp/coredump.socket"); + + ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket")); ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); ASSERT_EQ(ret, 0); - file = fopen("/proc/sys/kernel/core_pattern", "w"); - ASSERT_NE(file, NULL); - - ret = fprintf(file, "@/tmp/coredump.socket"); - ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); - ASSERT_EQ(fclose(file), 0); - pid_coredump_server = fork(); ASSERT_GE(pid_coredump_server, 0); if (pid_coredump_server == 0) { - int fd_server, fd_coredump, fd_peer_pidfd, fd_core_file; - socklen_t fd_peer_pidfd_len; + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1; + int exit_code = EXIT_FAILURE; close(ipc_sockets[0]); - fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); if (fd_server < 0) - _exit(EXIT_FAILURE); - - ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len); - if (ret < 0) { - fprintf(stderr, "Failed to bind coredump socket\n"); - close(fd_server); - close(ipc_sockets[1]); - _exit(EXIT_FAILURE); - } + goto out; - ret = listen(fd_server, 1); - if (ret < 0) { - fprintf(stderr, "Failed to listen on coredump socket\n"); - close(fd_server); - close(ipc_sockets[1]); - _exit(EXIT_FAILURE); - } - - if (write_nointr(ipc_sockets[1], "1", 1) < 0) { - close(fd_server); - close(ipc_sockets[1]); - _exit(EXIT_FAILURE); - } + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; close(ipc_sockets[1]); fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); - if (fd_coredump < 0) { - fprintf(stderr, "Failed to accept coredump socket connection\n"); - close(fd_server); - _exit(EXIT_FAILURE); - } + if (fd_coredump < 0) + goto out; - fd_peer_pidfd_len = sizeof(fd_peer_pidfd); - ret = getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD, - &fd_peer_pidfd, &fd_peer_pidfd_len); - if (ret < 0) { - fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n"); - close(fd_coredump); - close(fd_server); - _exit(EXIT_FAILURE); - } + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; - memset(&info, 0, sizeof(info)); - info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; - ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, &info); - if (ret < 0) { - fprintf(stderr, "Failed to retrieve pidfd info from peer pidfd for coredump socket connection\n"); - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - _exit(EXIT_FAILURE); - } + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; - if (!(info.mask & PIDFD_INFO_COREDUMP)) { - fprintf(stderr, "Missing coredump information from coredumping task\n"); - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - _exit(EXIT_FAILURE); - } + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; - if (!(info.coredump_mask & PIDFD_COREDUMPED)) { - fprintf(stderr, "Received connection from non-coredumping task\n"); - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - _exit(EXIT_FAILURE); - } + if (!(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; fd_core_file = creat("/tmp/coredump.file", 0644); - if (fd_core_file < 0) { - fprintf(stderr, "Failed to create coredump file\n"); - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - _exit(EXIT_FAILURE); - } + if (fd_core_file < 0) + goto out; for (;;) { char buffer[4096]; ssize_t bytes_read, bytes_write; bytes_read = read(fd_coredump, buffer, sizeof(buffer)); - if (bytes_read < 0) { - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - close(fd_core_file); - _exit(EXIT_FAILURE); - } + if (bytes_read < 0) + goto out; if (bytes_read == 0) break; bytes_write = write(fd_core_file, buffer, bytes_read); - if (bytes_read != bytes_write) { - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - close(fd_core_file); - _exit(EXIT_FAILURE); - } + if (bytes_read != bytes_write) + goto out; } - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - close(fd_core_file); - _exit(EXIT_SUCCESS); + exit_code = EXIT_SUCCESS; +out: + if (fd_core_file >= 0) + close(fd_core_file); + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); } self->pid_coredump_server = pid_coredump_server; @@ -335,48 +387,27 @@ TEST_F(coredump, socket) ASSERT_TRUE(WIFSIGNALED(status)); ASSERT_TRUE(WCOREDUMP(status)); - info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; - ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0); + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); - waitpid(pid_coredump_server, &status, 0); - self->pid_coredump_server = -ESRCH; - ASSERT_TRUE(WIFEXITED(status)); - ASSERT_EQ(WEXITSTATUS(status), 0); + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); ASSERT_EQ(stat("/tmp/coredump.file", &st), 0); ASSERT_GT(st.st_size, 0); - /* - * We should somehow validate the produced core file. - * For now just allow for visual inspection - */ system("file /tmp/coredump.file"); } TEST_F(coredump, socket_detect_userspace_client) { - int fd, pidfd, ret, status; - FILE *file; + int pidfd, ret, status; pid_t pid, pid_coredump_server; struct stat st; - char core_file[PATH_MAX]; struct pidfd_info info = {}; int ipc_sockets[2]; char c; - const struct sockaddr_un coredump_sk = { - .sun_family = AF_UNIX, - .sun_path = "/tmp/coredump.socket", - }; - size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) + - sizeof("/tmp/coredump.socket"); - file = fopen("/proc/sys/kernel/core_pattern", "w"); - ASSERT_NE(file, NULL); - - ret = fprintf(file, "@/tmp/coredump.socket"); - ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); - ASSERT_EQ(fclose(file), 0); + ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket")); ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); ASSERT_EQ(ret, 0); @@ -384,93 +415,49 @@ TEST_F(coredump, socket_detect_userspace_client) pid_coredump_server = fork(); ASSERT_GE(pid_coredump_server, 0); if (pid_coredump_server == 0) { - int fd_server, fd_coredump, fd_peer_pidfd, fd_core_file; - socklen_t fd_peer_pidfd_len; + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1; + int exit_code = EXIT_FAILURE; close(ipc_sockets[0]); - fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); if (fd_server < 0) - _exit(EXIT_FAILURE); + goto out; - ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len); - if (ret < 0) { - fprintf(stderr, "Failed to bind coredump socket\n"); - close(fd_server); - close(ipc_sockets[1]); - _exit(EXIT_FAILURE); - } - - ret = listen(fd_server, 1); - if (ret < 0) { - fprintf(stderr, "Failed to listen on coredump socket\n"); - close(fd_server); - close(ipc_sockets[1]); - _exit(EXIT_FAILURE); - } - - if (write_nointr(ipc_sockets[1], "1", 1) < 0) { - close(fd_server); - close(ipc_sockets[1]); - _exit(EXIT_FAILURE); - } + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; close(ipc_sockets[1]); fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); - if (fd_coredump < 0) { - fprintf(stderr, "Failed to accept coredump socket connection\n"); - close(fd_server); - _exit(EXIT_FAILURE); - } - - fd_peer_pidfd_len = sizeof(fd_peer_pidfd); - ret = getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD, - &fd_peer_pidfd, &fd_peer_pidfd_len); - if (ret < 0) { - fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n"); - close(fd_coredump); - close(fd_server); - _exit(EXIT_FAILURE); - } + if (fd_coredump < 0) + goto out; - memset(&info, 0, sizeof(info)); - info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; - ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, &info); - if (ret < 0) { - fprintf(stderr, "Failed to retrieve pidfd info from peer pidfd for coredump socket connection\n"); - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - _exit(EXIT_FAILURE); - } + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; - if (!(info.mask & PIDFD_INFO_COREDUMP)) { - fprintf(stderr, "Missing coredump information from coredumping task\n"); - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - _exit(EXIT_FAILURE); - } + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; - if (info.coredump_mask & PIDFD_COREDUMPED) { - fprintf(stderr, "Received unexpected connection from coredumping task\n"); - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - _exit(EXIT_FAILURE); - } + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; - ret = read(fd_coredump, &c, 1); + if (info.coredump_mask & PIDFD_COREDUMPED) + goto out; - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - close(fd_core_file); + if (read(fd_coredump, &c, 1) < 1) + goto out; - if (ret < 1) - _exit(EXIT_FAILURE); - _exit(EXIT_SUCCESS); + exit_code = EXIT_SUCCESS; +out: + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); } self->pid_coredump_server = pid_coredump_server; @@ -483,17 +470,22 @@ TEST_F(coredump, socket_detect_userspace_client) if (pid == 0) { int fd_socket; ssize_t ret; + const struct sockaddr_un coredump_sk = { + .sun_family = AF_UNIX, + .sun_path = "/tmp/coredump.socket", + }; + size_t coredump_sk_len = + offsetof(struct sockaddr_un, sun_path) + + sizeof("/tmp/coredump.socket"); fd_socket = socket(AF_UNIX, SOCK_STREAM, 0); if (fd_socket < 0) _exit(EXIT_FAILURE); - ret = connect(fd_socket, (const struct sockaddr *)&coredump_sk, coredump_sk_len); if (ret < 0) _exit(EXIT_FAILURE); - (void *)write(fd_socket, &(char){ 0 }, 1); close(fd_socket); _exit(EXIT_SUCCESS); } @@ -505,15 +497,11 @@ TEST_F(coredump, socket_detect_userspace_client) ASSERT_TRUE(WIFEXITED(status)); ASSERT_EQ(WEXITSTATUS(status), 0); - info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; - ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0); + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); ASSERT_EQ((info.coredump_mask & PIDFD_COREDUMPED), 0); - waitpid(pid_coredump_server, &status, 0); - self->pid_coredump_server = -ESRCH; - ASSERT_TRUE(WIFEXITED(status)); - ASSERT_EQ(WEXITSTATUS(status), 0); + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); ASSERT_NE(stat("/tmp/coredump.file", &st), 0); ASSERT_EQ(errno, ENOENT); @@ -521,17 +509,10 @@ TEST_F(coredump, socket_detect_userspace_client) TEST_F(coredump, socket_enoent) { - int pidfd, ret, status; - FILE *file; + int pidfd, status; pid_t pid; - char core_file[PATH_MAX]; - - file = fopen("/proc/sys/kernel/core_pattern", "w"); - ASSERT_NE(file, NULL); - ret = fprintf(file, "@/tmp/coredump.socket"); - ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); - ASSERT_EQ(fclose(file), 0); + ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket")); pid = fork(); ASSERT_GE(pid, 0); @@ -549,7 +530,6 @@ TEST_F(coredump, socket_enoent) TEST_F(coredump, socket_no_listener) { int pidfd, ret, status; - FILE *file; pid_t pid, pid_coredump_server; int ipc_sockets[2]; char c; @@ -560,45 +540,616 @@ TEST_F(coredump, socket_no_listener) size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) + sizeof("/tmp/coredump.socket"); + ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket")); + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); ASSERT_EQ(ret, 0); - file = fopen("/proc/sys/kernel/core_pattern", "w"); - ASSERT_NE(file, NULL); - - ret = fprintf(file, "@/tmp/coredump.socket"); - ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); - ASSERT_EQ(fclose(file), 0); - pid_coredump_server = fork(); ASSERT_GE(pid_coredump_server, 0); if (pid_coredump_server == 0) { - int fd_server; - socklen_t fd_peer_pidfd_len; + int fd_server = -1; + int exit_code = EXIT_FAILURE; close(ipc_sockets[0]); fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); if (fd_server < 0) - _exit(EXIT_FAILURE); + goto out; ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len); - if (ret < 0) { - fprintf(stderr, "Failed to bind coredump socket\n"); + if (ret < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + + exit_code = EXIT_SUCCESS; +out: + if (fd_server >= 0) close(fd_server); - close(ipc_sockets[1]); - _exit(EXIT_FAILURE); + close(ipc_sockets[1]); + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_FALSE(WCOREDUMP(status)); + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +static ssize_t recv_marker(int fd) +{ + enum coredump_mark mark = COREDUMP_MARK_REQACK; + ssize_t ret; + + ret = recv(fd, &mark, sizeof(mark), MSG_WAITALL); + if (ret != sizeof(mark)) + return -1; + + switch (mark) { + case COREDUMP_MARK_REQACK: + fprintf(stderr, "Received marker: ReqAck\n"); + return COREDUMP_MARK_REQACK; + case COREDUMP_MARK_MINSIZE: + fprintf(stderr, "Received marker: MinSize\n"); + return COREDUMP_MARK_MINSIZE; + case COREDUMP_MARK_MAXSIZE: + fprintf(stderr, "Received marker: MaxSize\n"); + return COREDUMP_MARK_MAXSIZE; + case COREDUMP_MARK_UNSUPPORTED: + fprintf(stderr, "Received marker: Unsupported\n"); + return COREDUMP_MARK_UNSUPPORTED; + case COREDUMP_MARK_CONFLICTING: + fprintf(stderr, "Received marker: Conflicting\n"); + return COREDUMP_MARK_CONFLICTING; + default: + fprintf(stderr, "Received unknown marker: %u\n", mark); + break; + } + return -1; +} + +static bool read_marker(int fd, enum coredump_mark mark) +{ + ssize_t ret; + + ret = recv_marker(fd); + if (ret < 0) + return false; + return ret == mark; +} + +static bool read_coredump_req(int fd, struct coredump_req *req) +{ + ssize_t ret; + size_t field_size, user_size, ack_size, kernel_size, remaining_size; + + memset(req, 0, sizeof(*req)); + field_size = sizeof(req->size); + + /* Peek the size of the coredump request. */ + ret = recv(fd, req, field_size, MSG_PEEK | MSG_WAITALL); + if (ret != field_size) + return false; + kernel_size = req->size; + + if (kernel_size < COREDUMP_ACK_SIZE_VER0) + return false; + if (kernel_size >= PAGE_SIZE) + return false; + + /* Use the minimum of user and kernel size to read the full request. */ + user_size = sizeof(struct coredump_req); + ack_size = user_size < kernel_size ? user_size : kernel_size; + ret = recv(fd, req, ack_size, MSG_WAITALL); + if (ret != ack_size) + return false; + + fprintf(stderr, "Read coredump request with size %u and mask 0x%llx\n", + req->size, (unsigned long long)req->mask); + + if (user_size > kernel_size) + remaining_size = user_size - kernel_size; + else + remaining_size = kernel_size - user_size; + + if (PAGE_SIZE <= remaining_size) + return false; + + /* + * Discard any additional data if the kernel's request was larger than + * what we knew about or cared about. + */ + if (remaining_size) { + char buffer[PAGE_SIZE]; + + ret = recv(fd, buffer, sizeof(buffer), MSG_WAITALL); + if (ret != remaining_size) + return false; + fprintf(stderr, "Discarded %zu bytes of data after coredump request\n", remaining_size); + } + + return true; +} + +static bool send_coredump_ack(int fd, const struct coredump_req *req, + __u64 mask, size_t size_ack) +{ + ssize_t ret; + /* + * Wrap struct coredump_ack in a larger struct so we can + * simulate sending to much data to the kernel. + */ + struct large_ack_for_size_testing { + struct coredump_ack ack; + char buffer[PAGE_SIZE]; + } large_ack = {}; + + if (!size_ack) + size_ack = sizeof(struct coredump_ack) < req->size_ack ? + sizeof(struct coredump_ack) : + req->size_ack; + large_ack.ack.mask = mask; + large_ack.ack.size = size_ack; + ret = send(fd, &large_ack, size_ack, MSG_NOSIGNAL); + if (ret != size_ack) + return false; + + fprintf(stderr, "Sent coredump ack with size %zu and mask 0x%llx\n", + size_ack, (unsigned long long)mask); + return true; +} + +static bool check_coredump_req(const struct coredump_req *req, size_t min_size, + __u64 required_mask) +{ + if (req->size < min_size) + return false; + if ((req->mask & required_mask) != required_mask) + return false; + if (req->mask & ~required_mask) + return false; + return true; +} + +TEST_F(coredump, socket_request_kernel) +{ + int pidfd, ret, status; + pid_t pid, pid_coredump_server; + struct stat st; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + struct coredump_req req = {}; + int fd_server = -1, fd_coredump = -1, fd_core_file = -1, fd_peer_pidfd = -1; + int exit_code = EXIT_FAILURE; + + close(ipc_sockets[0]); + + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) + goto out; + + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; + + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; + + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; + + fd_core_file = creat("/tmp/coredump.file", 0644); + if (fd_core_file < 0) + goto out; + + if (!read_coredump_req(fd_coredump, &req)) + goto out; + + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) + goto out; + + if (!send_coredump_ack(fd_coredump, &req, + COREDUMP_KERNEL | COREDUMP_WAIT, 0)) + goto out; + + if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) + goto out; + + for (;;) { + char buffer[4096]; + ssize_t bytes_read, bytes_write; + + bytes_read = read(fd_coredump, buffer, sizeof(buffer)); + if (bytes_read < 0) + goto out; + + if (bytes_read == 0) + break; + + bytes_write = write(fd_core_file, buffer, bytes_read); + if (bytes_read != bytes_write) + goto out; } - if (write_nointr(ipc_sockets[1], "1", 1) < 0) { + exit_code = EXIT_SUCCESS; +out: + if (fd_core_file >= 0) + close(fd_core_file); + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) close(fd_server); - close(ipc_sockets[1]); - _exit(EXIT_FAILURE); + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_TRUE(WCOREDUMP(status)); + + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); + + ASSERT_EQ(stat("/tmp/coredump.file", &st), 0); + ASSERT_GT(st.st_size, 0); + system("file /tmp/coredump.file"); +} + +TEST_F(coredump, socket_request_userspace) +{ + int pidfd, ret, status; + pid_t pid, pid_coredump_server; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + struct coredump_req req = {}; + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1; + int exit_code = EXIT_FAILURE; + + close(ipc_sockets[0]); + + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) + goto out; + + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; + + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; + + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; + + if (!read_coredump_req(fd_coredump, &req)) + goto out; + + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) + goto out; + + if (!send_coredump_ack(fd_coredump, &req, + COREDUMP_USERSPACE | COREDUMP_WAIT, 0)) + goto out; + + if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) + goto out; + + for (;;) { + char buffer[4096]; + ssize_t bytes_read; + + bytes_read = read(fd_coredump, buffer, sizeof(buffer)); + if (bytes_read > 0) + goto out; + + if (bytes_read < 0) + goto out; + + if (bytes_read == 0) + break; + } + + exit_code = EXIT_SUCCESS; +out: + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_TRUE(WCOREDUMP(status)); + + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +TEST_F(coredump, socket_request_reject) +{ + int pidfd, ret, status; + pid_t pid, pid_coredump_server; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + struct coredump_req req = {}; + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1; + int exit_code = EXIT_FAILURE; + + close(ipc_sockets[0]); + + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) + goto out; + + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; + + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; + + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; + + if (!read_coredump_req(fd_coredump, &req)) + goto out; + + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) + goto out; + + if (!send_coredump_ack(fd_coredump, &req, + COREDUMP_REJECT | COREDUMP_WAIT, 0)) + goto out; + + if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) + goto out; + + for (;;) { + char buffer[4096]; + ssize_t bytes_read; + + bytes_read = read(fd_coredump, buffer, sizeof(buffer)); + if (bytes_read > 0) + goto out; + + if (bytes_read < 0) + goto out; + + if (bytes_read == 0) + break; } - close(fd_server); + exit_code = EXIT_SUCCESS; +out: + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_FALSE(WCOREDUMP(status)); + + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +TEST_F(coredump, socket_request_invalid_flag_combination) +{ + int pidfd, ret, status; + pid_t pid, pid_coredump_server; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + struct coredump_req req = {}; + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1; + int exit_code = EXIT_FAILURE; + + close(ipc_sockets[0]); + + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + close(ipc_sockets[1]); - _exit(EXIT_SUCCESS); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) + goto out; + + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; + + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; + + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; + + if (!read_coredump_req(fd_coredump, &req)) + goto out; + + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) + goto out; + + if (!send_coredump_ack(fd_coredump, &req, + COREDUMP_KERNEL | COREDUMP_REJECT | COREDUMP_WAIT, 0)) + goto out; + + if (!read_marker(fd_coredump, COREDUMP_MARK_CONFLICTING)) + goto out; + + exit_code = EXIT_SUCCESS; +out: + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); } self->pid_coredump_server = pid_coredump_server; @@ -618,10 +1169,662 @@ TEST_F(coredump, socket_no_listener) ASSERT_TRUE(WIFSIGNALED(status)); ASSERT_FALSE(WCOREDUMP(status)); - waitpid(pid_coredump_server, &status, 0); - self->pid_coredump_server = -ESRCH; - ASSERT_TRUE(WIFEXITED(status)); - ASSERT_EQ(WEXITSTATUS(status), 0); + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +TEST_F(coredump, socket_request_unknown_flag) +{ + int pidfd, ret, status; + pid_t pid, pid_coredump_server; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + struct coredump_req req = {}; + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1; + int exit_code = EXIT_FAILURE; + + close(ipc_sockets[0]); + + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) + goto out; + + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; + + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; + + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; + + if (!read_coredump_req(fd_coredump, &req)) + goto out; + + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) + goto out; + + if (!send_coredump_ack(fd_coredump, &req, (1ULL << 63), 0)) + goto out; + + if (!read_marker(fd_coredump, COREDUMP_MARK_UNSUPPORTED)) + goto out; + + exit_code = EXIT_SUCCESS; +out: + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_FALSE(WCOREDUMP(status)); + + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +TEST_F(coredump, socket_request_invalid_size_small) +{ + int pidfd, ret, status; + pid_t pid, pid_coredump_server; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + struct coredump_req req = {}; + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1; + int exit_code = EXIT_FAILURE; + + close(ipc_sockets[0]); + + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) + goto out; + + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; + + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; + + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; + + if (!read_coredump_req(fd_coredump, &req)) + goto out; + + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) + goto out; + + if (!send_coredump_ack(fd_coredump, &req, + COREDUMP_REJECT | COREDUMP_WAIT, + COREDUMP_ACK_SIZE_VER0 / 2)) + goto out; + + if (!read_marker(fd_coredump, COREDUMP_MARK_MINSIZE)) + goto out; + + exit_code = EXIT_SUCCESS; +out: + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_FALSE(WCOREDUMP(status)); + + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +TEST_F(coredump, socket_request_invalid_size_large) +{ + int pidfd, ret, status; + pid_t pid, pid_coredump_server; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + struct coredump_req req = {}; + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1; + int exit_code = EXIT_FAILURE; + + close(ipc_sockets[0]); + + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) + goto out; + + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; + + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; + + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; + + if (!read_coredump_req(fd_coredump, &req)) + goto out; + + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) + goto out; + + if (!send_coredump_ack(fd_coredump, &req, + COREDUMP_REJECT | COREDUMP_WAIT, + COREDUMP_ACK_SIZE_VER0 + PAGE_SIZE)) + goto out; + + if (!read_marker(fd_coredump, COREDUMP_MARK_MAXSIZE)) + goto out; + + exit_code = EXIT_SUCCESS; +out: + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_FALSE(WCOREDUMP(status)); + + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +static int open_coredump_tmpfile(int fd_tmpfs_detached) +{ + return openat(fd_tmpfs_detached, ".", O_TMPFILE | O_RDWR | O_EXCL, 0600); +} + +#define NUM_CRASHING_COREDUMPS 5 + +TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps, 500) +{ + int pidfd[NUM_CRASHING_COREDUMPS], status[NUM_CRASHING_COREDUMPS]; + pid_t pid[NUM_CRASHING_COREDUMPS], pid_coredump_server; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1; + int exit_code = EXIT_FAILURE; + struct coredump_req req = {}; + + close(ipc_sockets[0]); + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) { + fprintf(stderr, "Failed to create and listen on unix socket\n"); + goto out; + } + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) { + fprintf(stderr, "Failed to notify parent via ipc socket\n"); + goto out; + } + close(ipc_sockets[1]); + + for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) { + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) { + fprintf(stderr, "accept4 failed: %m\n"); + goto out; + } + + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) { + fprintf(stderr, "get_peer_pidfd failed for fd %d: %m\n", fd_coredump); + goto out; + } + + if (!get_pidfd_info(fd_peer_pidfd, &info)) { + fprintf(stderr, "get_pidfd_info failed for fd %d\n", fd_peer_pidfd); + goto out; + } + + if (!(info.mask & PIDFD_INFO_COREDUMP)) { + fprintf(stderr, "pidfd info missing PIDFD_INFO_COREDUMP for fd %d\n", fd_peer_pidfd); + goto out; + } + if (!(info.coredump_mask & PIDFD_COREDUMPED)) { + fprintf(stderr, "pidfd info missing PIDFD_COREDUMPED for fd %d\n", fd_peer_pidfd); + goto out; + } + + if (!read_coredump_req(fd_coredump, &req)) { + fprintf(stderr, "read_coredump_req failed for fd %d\n", fd_coredump); + goto out; + } + + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) { + fprintf(stderr, "check_coredump_req failed for fd %d\n", fd_coredump); + goto out; + } + + if (!send_coredump_ack(fd_coredump, &req, + COREDUMP_KERNEL | COREDUMP_WAIT, 0)) { + fprintf(stderr, "send_coredump_ack failed for fd %d\n", fd_coredump); + goto out; + } + + if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) { + fprintf(stderr, "read_marker failed for fd %d\n", fd_coredump); + goto out; + } + + fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached); + if (fd_core_file < 0) { + fprintf(stderr, "%m - open_coredump_tmpfile failed for fd %d\n", fd_coredump); + goto out; + } + + for (;;) { + char buffer[4096]; + ssize_t bytes_read, bytes_write; + + bytes_read = read(fd_coredump, buffer, sizeof(buffer)); + if (bytes_read < 0) { + fprintf(stderr, "read failed for fd %d: %m\n", fd_coredump); + goto out; + } + + if (bytes_read == 0) + break; + + bytes_write = write(fd_core_file, buffer, bytes_read); + if (bytes_read != bytes_write) { + fprintf(stderr, "write failed for fd %d: %m\n", fd_core_file); + goto out; + } + } + + close(fd_core_file); + close(fd_peer_pidfd); + close(fd_coredump); + fd_peer_pidfd = -1; + fd_coredump = -1; + } + + exit_code = EXIT_SUCCESS; +out: + if (fd_core_file >= 0) + close(fd_core_file); + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) { + pid[i] = fork(); + ASSERT_GE(pid[i], 0); + if (pid[i] == 0) + crashing_child(); + pidfd[i] = sys_pidfd_open(pid[i], 0); + ASSERT_GE(pidfd[i], 0); + } + + for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) { + waitpid(pid[i], &status[i], 0); + ASSERT_TRUE(WIFSIGNALED(status[i])); + ASSERT_TRUE(WCOREDUMP(status[i])); + } + + for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) { + info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + ASSERT_EQ(ioctl(pidfd[i], PIDFD_GET_INFO, &info), 0); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + } + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +#define MAX_EVENTS 128 + +static void process_coredump_worker(int fd_coredump, int fd_peer_pidfd, int fd_core_file) +{ + int epfd = -1; + int exit_code = EXIT_FAILURE; + + epfd = epoll_create1(0); + if (epfd < 0) + goto out; + + struct epoll_event ev; + ev.events = EPOLLIN | EPOLLRDHUP | EPOLLET; + ev.data.fd = fd_coredump; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd_coredump, &ev) < 0) + goto out; + + for (;;) { + struct epoll_event events[1]; + int n = epoll_wait(epfd, events, 1, -1); + if (n < 0) + break; + + if (events[0].events & (EPOLLIN | EPOLLRDHUP)) { + for (;;) { + char buffer[4096]; + ssize_t bytes_read = read(fd_coredump, buffer, sizeof(buffer)); + if (bytes_read < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) + break; + goto out; + } + if (bytes_read == 0) + goto done; + ssize_t bytes_write = write(fd_core_file, buffer, bytes_read); + if (bytes_write != bytes_read) + goto out; + } + } + } + +done: + exit_code = EXIT_SUCCESS; +out: + if (epfd >= 0) + close(epfd); + if (fd_core_file >= 0) + close(fd_core_file); + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + _exit(exit_code); +} + +TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps_epoll_workers, 500) +{ + int pidfd[NUM_CRASHING_COREDUMPS], status[NUM_CRASHING_COREDUMPS]; + pid_t pid[NUM_CRASHING_COREDUMPS], pid_coredump_server, worker_pids[NUM_CRASHING_COREDUMPS]; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + int fd_server = -1, exit_code = EXIT_FAILURE, n_conns = 0; + fd_server = -1; + exit_code = EXIT_FAILURE; + n_conns = 0; + close(ipc_sockets[0]); + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + close(ipc_sockets[1]); + + while (n_conns < NUM_CRASHING_COREDUMPS) { + int fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1; + struct coredump_req req = {}; + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) + continue; + goto out; + } + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; + if (!(info.mask & PIDFD_INFO_COREDUMP) || !(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; + if (!read_coredump_req(fd_coredump, &req)) + goto out; + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) + goto out; + if (!send_coredump_ack(fd_coredump, &req, COREDUMP_KERNEL | COREDUMP_WAIT, 0)) + goto out; + if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) + goto out; + fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached); + if (fd_core_file < 0) + goto out; + pid_t worker = fork(); + if (worker == 0) { + close(fd_server); + process_coredump_worker(fd_coredump, fd_peer_pidfd, fd_core_file); + } + worker_pids[n_conns] = worker; + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_core_file >= 0) + close(fd_core_file); + n_conns++; + } + exit_code = EXIT_SUCCESS; +out: + if (fd_server >= 0) + close(fd_server); + + // Reap all worker processes + for (int i = 0; i < n_conns; i++) { + int wstatus; + if (waitpid(worker_pids[i], &wstatus, 0) < 0) { + fprintf(stderr, "Failed to wait for worker %d: %m\n", worker_pids[i]); + } else if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != EXIT_SUCCESS) { + fprintf(stderr, "Worker %d exited with error code %d\n", worker_pids[i], WEXITSTATUS(wstatus)); + exit_code = EXIT_FAILURE; + } + } + + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) { + pid[i] = fork(); + ASSERT_GE(pid[i], 0); + if (pid[i] == 0) + crashing_child(); + pidfd[i] = sys_pidfd_open(pid[i], 0); + ASSERT_GE(pidfd[i], 0); + } + + for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) { + ASSERT_GE(waitpid(pid[i], &status[i], 0), 0); + ASSERT_TRUE(WIFSIGNALED(status[i])); + ASSERT_TRUE(WCOREDUMP(status[i])); + } + + for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) { + info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + ASSERT_EQ(ioctl(pidfd[i], PIDFD_GET_INFO, &info), 0); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + } + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +TEST_F(coredump, socket_invalid_paths) +{ + ASSERT_FALSE(set_core_pattern("@ /tmp/coredump.socket")); + ASSERT_FALSE(set_core_pattern("@/tmp/../coredump.socket")); + ASSERT_FALSE(set_core_pattern("@../coredump.socket")); + ASSERT_FALSE(set_core_pattern("@/tmp/coredump.socket/..")); + ASSERT_FALSE(set_core_pattern("@..")); + + ASSERT_FALSE(set_core_pattern("@@ /tmp/coredump.socket")); + ASSERT_FALSE(set_core_pattern("@@/tmp/../coredump.socket")); + ASSERT_FALSE(set_core_pattern("@@../coredump.socket")); + ASSERT_FALSE(set_core_pattern("@@/tmp/coredump.socket/..")); + ASSERT_FALSE(set_core_pattern("@@..")); + + ASSERT_FALSE(set_core_pattern("@@@/tmp/coredump.socket")); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py index d9c10613ae67..44151b7b1a24 100644 --- a/tools/testing/selftests/drivers/net/lib/py/load.py +++ b/tools/testing/selftests/drivers/net/lib/py/load.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +import re import time from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen @@ -10,12 +11,11 @@ class GenerateTraffic: self.env = env - if port is None: - port = rand_port() - self._iperf_server = cmd(f"iperf3 -s -1 -p {port}", background=True) - wait_port_listen(port) + self.port = rand_port() if port is None else port + self._iperf_server = cmd(f"iperf3 -s -1 -p {self.port}", background=True) + wait_port_listen(self.port) time.sleep(0.1) - self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {port} -t 86400", + self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {self.port} -t 86400", background=True, host=env.remote) # Wait for traffic to ramp up @@ -56,3 +56,16 @@ class GenerateTraffic: ksft_pr(">> Server:") ksft_pr(self._iperf_server.stdout) ksft_pr(self._iperf_server.stderr) + self._wait_client_stopped() + + def _wait_client_stopped(self, sleep=0.005, timeout=5): + end = time.monotonic() + timeout + + live_port_pattern = re.compile(fr":{self.port:04X} 0[^6] ") + + while time.monotonic() < end: + data = cmd("cat /proc/net/tcp*", host=self.env.remote).stdout + if not live_port_pattern.search(data): + return + time.sleep(sleep) + raise Exception(f"Waiting for client to stop timed out after {timeout}s") diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore index 7afa58e2bb20..fcbdb1297e24 100644 --- a/tools/testing/selftests/filesystems/.gitignore +++ b/tools/testing/selftests/filesystems/.gitignore @@ -3,3 +3,4 @@ dnotify_test devpts_pts file_stressor anon_inode_test +kernfs_test diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile index b02326193fee..73d4650af1a5 100644 --- a/tools/testing/selftests/filesystems/Makefile +++ b/tools/testing/selftests/filesystems/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test +TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test kernfs_test TEST_GEN_PROGS_EXTENDED := dnotify_test include ../lib.mk diff --git a/tools/testing/selftests/filesystems/kernfs_test.c b/tools/testing/selftests/filesystems/kernfs_test.c new file mode 100644 index 000000000000..16538b3b318e --- /dev/null +++ b/tools/testing/selftests/filesystems/kernfs_test.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ + +#include <fcntl.h> +#include <stdio.h> +#include <sys/stat.h> +#include <sys/xattr.h> + +#include "../kselftest_harness.h" +#include "wrappers.h" + +TEST(kernfs_listxattr) +{ + int fd; + + /* Read-only file that can never have any extended attributes set. */ + fd = open("/sys/kernel/warn_count", O_RDONLY | O_CLOEXEC); + ASSERT_GE(fd, 0); + ASSERT_EQ(flistxattr(fd, NULL, 0), 0); + EXPECT_EQ(close(fd), 0); +} + +TEST(kernfs_getxattr) +{ + int fd; + char buf[1]; + + /* Read-only file that can never have any extended attributes set. */ + fd = open("/sys/kernel/warn_count", O_RDONLY | O_CLOEXEC); + ASSERT_GE(fd, 0); + ASSERT_LT(fgetxattr(fd, "user.foo", buf, sizeof(buf)), 0); + ASSERT_EQ(errno, ENODATA); + EXPECT_EQ(close(fd), 0); +} + +TEST_HARNESS_MAIN + diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h index ea79662405bc..1f625b39948a 100644 --- a/tools/testing/selftests/futex/include/futex2test.h +++ b/tools/testing/selftests/futex/include/futex2test.h @@ -4,6 +4,7 @@ * * Copyright 2021 Collabora Ltd. */ +#include <linux/time_types.h> #include <stdint.h> #define u64_to_ptr(x) ((void *)(uintptr_t)(x)) @@ -65,7 +66,12 @@ struct futex32_numa { static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned long nr_waiters, unsigned long flags, struct timespec *timo, clockid_t clockid) { - return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo, clockid); + struct __kernel_timespec ts = { + .tv_sec = timo->tv_sec, + .tv_nsec = timo->tv_nsec, + }; + + return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, &ts, clockid); } /* diff --git a/tools/testing/selftests/hid/tests/test_mouse.py b/tools/testing/selftests/hid/tests/test_mouse.py index 66daf7e5975c..eb4e15a0e53b 100644 --- a/tools/testing/selftests/hid/tests/test_mouse.py +++ b/tools/testing/selftests/hid/tests/test_mouse.py @@ -439,6 +439,68 @@ class BadResolutionMultiplierMouse(ResolutionMultiplierMouse): return 32 # EPIPE +class BadReportDescriptorMouse(BaseMouse): + """ + This "device" was one autogenerated by syzbot. There are a lot of issues in + it, and the most problematic is that it declares features that have no + size. + + This leads to report->size being set to 0 and can mess up with usbhid + internals. Fortunately, uhid merely passes the incoming buffer, without + touching it so a buffer of size 0 will be translated to [] without + triggering a kernel oops. + + Because the report descriptor is wrong, no input are created, and we need + to tweak a little bit the parameters to make it look correct. + """ + + # fmt: off + report_descriptor = [ + 0x96, 0x01, 0x00, # Report Count (1) 0 + 0x06, 0x01, 0x00, # Usage Page (Generic Desktop) 3 + # 0x03, 0x00, 0x00, 0x00, 0x00, # Ignored by the kernel somehow + 0x2a, 0x90, 0xa0, # Usage Maximum (41104) 6 + 0x27, 0x00, 0x00, 0x00, 0x00, # Logical Maximum (0) 9 + 0xb3, 0x81, 0x3e, 0x25, 0x03, # Feature (Cnst,Arr,Abs,Vol) 14 + 0x1b, 0xdd, 0xe8, 0x40, 0x50, # Usage Minimum (1346431197) 19 + 0x3b, 0x5d, 0x8c, 0x3d, 0xda, # Designator Index 24 + ] + # fmt: on + + def __init__( + self, rdesc=report_descriptor, name=None, input_info=(3, 0x045E, 0x07DA) + ): + super().__init__(rdesc, name, input_info) + self.high_resolution_report_called = False + + def get_evdev(self, application=None): + assert self._input_nodes is None + return ( + "Ok" # should be a list or None, but both would fail, so abusing the system + ) + + def next_sync_events(self, application=None): + # there are no evdev nodes, so no events + return [] + + def is_ready(self): + # we wait for the SET_REPORT command to come + return self.high_resolution_report_called + + def set_report(self, req, rnum, rtype, data): + if rtype != self.UHID_FEATURE_REPORT: + raise InvalidHIDCommunication(f"Unexpected report type: {rtype}") + if rnum != 0x0: + raise InvalidHIDCommunication(f"Unexpected report number: {rnum}") + + if len(data) != 1: + raise InvalidHIDCommunication(f"Unexpected data: {data}, expected '[0]'") + + self.high_resolution_report_called = True + + return 0 + + class ResolutionMultiplierHWheelMouse(TwoWheelMouse): # fmt: off report_descriptor = [ @@ -975,3 +1037,11 @@ class TestMiMouse(TestWheelMouse): # assert below print out the real error pass assert remaining == [] + + +class TestBadReportDescriptorMouse(base.BaseTestCase.TestUhid): + def create_device(self): + return BadReportDescriptorMouse() + + def assertName(self, uhdev): + pass diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c index 390ae2d87493..0eb371c62ab8 100644 --- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c @@ -74,6 +74,7 @@ int main(int argc, char *argv[]) int testcase; char test[80]; + TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); ksft_print_header(); diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h index 18a6014920b5..b16986aa6442 100644 --- a/tools/testing/selftests/landlock/audit.h +++ b/tools/testing/selftests/landlock/audit.h @@ -403,11 +403,12 @@ static int audit_init_filter_exe(struct audit_filter *filter, const char *path) /* It is assume that there is not already filtering rules. */ filter->record_type = AUDIT_EXE; if (!path) { - filter->exe_len = readlink("/proc/self/exe", filter->exe, - sizeof(filter->exe) - 1); - if (filter->exe_len < 0) + int ret = readlink("/proc/self/exe", filter->exe, + sizeof(filter->exe) - 1); + if (ret < 0) return -errno; + filter->exe_len = ret; return 0; } diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c index cfc571afd0eb..46d02d49835a 100644 --- a/tools/testing/selftests/landlock/audit_test.c +++ b/tools/testing/selftests/landlock/audit_test.c @@ -7,6 +7,7 @@ #define _GNU_SOURCE #include <errno.h> +#include <fcntl.h> #include <limits.h> #include <linux/landlock.h> #include <pthread.h> diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index 73729382d40f..fa0f18ec62c4 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -1832,6 +1832,46 @@ TEST_F_FORK(layout1, release_inodes) ASSERT_EQ(ENOENT, test_open(dir_s3d3, O_RDONLY)); } +/* + * This test checks that a rule on a directory used as a mount point does not + * grant access to the mount covering it. It is a generalization of the bind + * mount case in layout3_fs.hostfs.release_inodes that tests hidden mount points. + */ +TEST_F_FORK(layout1, covered_rule) +{ + const struct rule layer1[] = { + { + .path = dir_s3d2, + .access = LANDLOCK_ACCESS_FS_READ_DIR, + }, + {}, + }; + int ruleset_fd; + + /* Unmount to simplify FIXTURE_TEARDOWN. */ + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(0, umount(dir_s3d2)); + clear_cap(_metadata, CAP_SYS_ADMIN); + + /* Creates a ruleset with the future hidden directory. */ + ruleset_fd = + create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_DIR, layer1); + ASSERT_LE(0, ruleset_fd); + + /* Covers with a new mount point. */ + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(0, mount_opt(&mnt_tmp, dir_s3d2)); + clear_cap(_metadata, CAP_SYS_ADMIN); + + ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY)); + + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks that access to the new mount point is denied. */ + ASSERT_EQ(EACCES, test_open(dir_s3d2, O_RDONLY)); +} + enum relative_access { REL_OPEN, REL_CHDIR, diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config index 7afe05e8c4d7..bd09fdaf53e0 100644 --- a/tools/testing/selftests/lkdtm/config +++ b/tools/testing/selftests/lkdtm/config @@ -2,7 +2,7 @@ CONFIG_LKDTM=y CONFIG_DEBUG_LIST=y CONFIG_SLAB_FREELIST_HARDENED=y CONFIG_FORTIFY_SOURCE=y -CONFIG_GCC_PLUGIN_STACKLEAK=y +CONFIG_KSTACK_ERASE=y CONFIG_HARDENED_USERCOPY=y CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y CONFIG_INIT_ON_FREE_DEFAULT_ON=y diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index aa7400ed0e99..f0d9c035641d 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -31,6 +31,7 @@ uint64_t pmd_pagesize; #define INPUT_MAX 80 #define PID_FMT "%d,0x%lx,0x%lx,%d" +#define PID_FMT_OFFSET "%d,0x%lx,0x%lx,%d,%d" #define PATH_FMT "%s,0x%lx,0x%lx,%d" #define PFN_MASK ((1UL<<55)-1) @@ -483,7 +484,7 @@ void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc, write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order); else - write_debugfs(PID_FMT, getpid(), (uint64_t)addr, + write_debugfs(PID_FMT_OFFSET, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order, offset); for (i = 0; i < fd_size; i++) diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c index 7e534594167e..37e034874034 100644 --- a/tools/testing/selftests/net/af_unix/scm_pidfd.c +++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c @@ -15,6 +15,7 @@ #include <sys/types.h> #include <sys/wait.h> +#include "../../pidfd/pidfd.h" #include "../../kselftest_harness.h" #define clean_errno() (errno == 0 ? "None" : strerror(errno)) @@ -26,6 +27,8 @@ #define SCM_PIDFD 0x04 #endif +#define CHILD_EXIT_CODE_OK 123 + static void child_die() { exit(1); @@ -126,16 +129,65 @@ out: return result; } +struct cmsg_data { + struct ucred *ucred; + int *pidfd; +}; + +static int parse_cmsg(struct msghdr *msg, struct cmsg_data *res) +{ + struct cmsghdr *cmsg; + int data = 0; + + if (msg->msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { + log_err("recvmsg: truncated"); + return 1; + } + + for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_PIDFD) { + if (cmsg->cmsg_len < sizeof(*res->pidfd)) { + log_err("CMSG parse: SCM_PIDFD wrong len"); + return 1; + } + + res->pidfd = (void *)CMSG_DATA(cmsg); + } + + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_CREDENTIALS) { + if (cmsg->cmsg_len < sizeof(*res->ucred)) { + log_err("CMSG parse: SCM_CREDENTIALS wrong len"); + return 1; + } + + res->ucred = (void *)CMSG_DATA(cmsg); + } + } + + if (!res->pidfd) { + log_err("CMSG parse: SCM_PIDFD not found"); + return 1; + } + + if (!res->ucred) { + log_err("CMSG parse: SCM_CREDENTIALS not found"); + return 1; + } + + return 0; +} + static int cmsg_check(int fd) { struct msghdr msg = { 0 }; - struct cmsghdr *cmsg; + struct cmsg_data res; struct iovec iov; - struct ucred *ucred = NULL; int data = 0; char control[CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))] = { 0 }; - int *pidfd = NULL; pid_t parent_pid; int err; @@ -158,53 +210,99 @@ static int cmsg_check(int fd) return 1; } - for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; - cmsg = CMSG_NXTHDR(&msg, cmsg)) { - if (cmsg->cmsg_level == SOL_SOCKET && - cmsg->cmsg_type == SCM_PIDFD) { - if (cmsg->cmsg_len < sizeof(*pidfd)) { - log_err("CMSG parse: SCM_PIDFD wrong len"); - return 1; - } + /* send(pfd, "x", sizeof(char), 0) */ + if (data != 'x') { + log_err("recvmsg: data corruption"); + return 1; + } - pidfd = (void *)CMSG_DATA(cmsg); - } + if (parse_cmsg(&msg, &res)) { + log_err("CMSG parse: parse_cmsg() failed"); + return 1; + } - if (cmsg->cmsg_level == SOL_SOCKET && - cmsg->cmsg_type == SCM_CREDENTIALS) { - if (cmsg->cmsg_len < sizeof(*ucred)) { - log_err("CMSG parse: SCM_CREDENTIALS wrong len"); - return 1; - } + /* pidfd from SCM_PIDFD should point to the parent process PID */ + parent_pid = + get_pid_from_fdinfo_file(*res.pidfd, "Pid:", sizeof("Pid:") - 1); + if (parent_pid != getppid()) { + log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid()); + close(*res.pidfd); + return 1; + } - ucred = (void *)CMSG_DATA(cmsg); - } + close(*res.pidfd); + return 0; +} + +static int cmsg_check_dead(int fd, int expected_pid) +{ + int err; + struct msghdr msg = { 0 }; + struct cmsg_data res; + struct iovec iov; + int data = 0; + char control[CMSG_SPACE(sizeof(struct ucred)) + + CMSG_SPACE(sizeof(int))] = { 0 }; + pid_t client_pid; + struct pidfd_info info = { + .mask = PIDFD_INFO_EXIT, + }; + + iov.iov_base = &data; + iov.iov_len = sizeof(data); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + err = recvmsg(fd, &msg, 0); + if (err < 0) { + log_err("recvmsg"); + return 1; } - /* send(pfd, "x", sizeof(char), 0) */ - if (data != 'x') { + if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { + log_err("recvmsg: truncated"); + return 1; + } + + /* send(cfd, "y", sizeof(char), 0) */ + if (data != 'y') { log_err("recvmsg: data corruption"); return 1; } - if (!pidfd) { - log_err("CMSG parse: SCM_PIDFD not found"); + if (parse_cmsg(&msg, &res)) { + log_err("CMSG parse: parse_cmsg() failed"); return 1; } - if (!ucred) { - log_err("CMSG parse: SCM_CREDENTIALS not found"); + /* + * pidfd from SCM_PIDFD should point to the client_pid. + * Let's read exit information and check if it's what + * we expect to see. + */ + if (ioctl(*res.pidfd, PIDFD_GET_INFO, &info)) { + log_err("%s: ioctl(PIDFD_GET_INFO) failed", __func__); + close(*res.pidfd); return 1; } - /* pidfd from SCM_PIDFD should point to the parent process PID */ - parent_pid = - get_pid_from_fdinfo_file(*pidfd, "Pid:", sizeof("Pid:") - 1); - if (parent_pid != getppid()) { - log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid()); + if (!(info.mask & PIDFD_INFO_EXIT)) { + log_err("%s: No exit information from ioctl(PIDFD_GET_INFO)", __func__); + close(*res.pidfd); return 1; } + err = WIFEXITED(info.exit_code) ? WEXITSTATUS(info.exit_code) : 1; + if (err != CHILD_EXIT_CODE_OK) { + log_err("%s: wrong exit_code %d != %d", __func__, err, CHILD_EXIT_CODE_OK); + close(*res.pidfd); + return 1; + } + + close(*res.pidfd); return 0; } @@ -291,6 +389,24 @@ static void fill_sockaddr(struct sock_addr *addr, bool abstract) memcpy(sun_path_buf, addr->sock_name, strlen(addr->sock_name)); } +static int sk_enable_cred_pass(int sk) +{ + int on = 0; + + on = 1; + if (setsockopt(sk, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) { + log_err("Failed to set SO_PASSCRED"); + return 1; + } + + if (setsockopt(sk, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) { + log_err("Failed to set SO_PASSPIDFD"); + return 1; + } + + return 0; +} + static void client(FIXTURE_DATA(scm_pidfd) *self, const FIXTURE_VARIANT(scm_pidfd) *variant) { @@ -299,7 +415,6 @@ static void client(FIXTURE_DATA(scm_pidfd) *self, struct ucred peer_cred; int peer_pidfd; pid_t peer_pid; - int on = 0; cfd = socket(AF_UNIX, variant->type, 0); if (cfd < 0) { @@ -322,14 +437,8 @@ static void client(FIXTURE_DATA(scm_pidfd) *self, child_die(); } - on = 1; - if (setsockopt(cfd, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) { - log_err("Failed to set SO_PASSCRED"); - child_die(); - } - - if (setsockopt(cfd, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) { - log_err("Failed to set SO_PASSPIDFD"); + if (sk_enable_cred_pass(cfd)) { + log_err("sk_enable_cred_pass() failed"); child_die(); } @@ -340,6 +449,12 @@ static void client(FIXTURE_DATA(scm_pidfd) *self, child_die(); } + /* send something to the parent so it can receive SCM_PIDFD too and validate it */ + if (send(cfd, "y", sizeof(char), 0) == -1) { + log_err("Failed to send(cfd, \"y\", sizeof(char), 0)"); + child_die(); + } + /* skip further for SOCK_DGRAM as it's not applicable */ if (variant->type == SOCK_DGRAM) return; @@ -398,7 +513,13 @@ TEST_F(scm_pidfd, test) close(self->server); close(self->startup_pipe[0]); client(self, variant); - exit(0); + + /* + * It's a bit unusual, but in case of success we return non-zero + * exit code (CHILD_EXIT_CODE_OK) and then we expect to read it + * from ioctl(PIDFD_GET_INFO) in cmsg_check_dead(). + */ + exit(CHILD_EXIT_CODE_OK); } close(self->startup_pipe[1]); @@ -421,9 +542,17 @@ TEST_F(scm_pidfd, test) ASSERT_NE(-1, err); } - close(pfd); waitpid(self->client_pid, &child_status, 0); - ASSERT_EQ(0, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1); + /* see comment before exit(CHILD_EXIT_CODE_OK) */ + ASSERT_EQ(CHILD_EXIT_CODE_OK, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1); + + err = sk_enable_cred_pass(pfd); + ASSERT_EQ(0, err); + + err = cmsg_check_dead(pfd, self->client_pid); + ASSERT_EQ(0, err); + + close(pfd); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh index 5b34f6e1f831..48eb999a3120 100755 --- a/tools/testing/selftests/net/gre_ipv6_lladdr.sh +++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh @@ -24,7 +24,10 @@ setup_basenet() ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad } -# Check if network device has an IPv6 link-local address assigned. +# Check the IPv6 configuration of a network device. +# +# We currently check the generation of the link-local IPv6 address and the +# creation of the ff00::/8 multicast route. # # Parameters: # @@ -35,7 +38,7 @@ setup_basenet() # a link-local address) # * $4: The user visible name for the scenario being tested # -check_ipv6_ll_addr() +check_ipv6_device_config() { local DEV="$1" local EXTRA_MATCH="$2" @@ -45,7 +48,11 @@ check_ipv6_ll_addr() RET=0 set +e ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}" - check_err_fail "${XRET}" $? "" + check_err_fail "${XRET}" $? "IPv6 link-local address generation" + + ip -netns "${NS0}" -6 route show table local type multicast ff00::/8 proto kernel | grep -q "${DEV}" + check_err_fail 0 $? "IPv6 multicast route creation" + log_test "${MSG}" set -e } @@ -102,20 +109,20 @@ test_gre_device() ;; esac - # Check that IPv6 link-local address is generated when device goes up + # Check the IPv6 device configuration when it goes up ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" ip -netns "${NS0}" link set dev gretest up - check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" + check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" # Now disable link-local address generation ip -netns "${NS0}" link set dev gretest down ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1 ip -netns "${NS0}" link set dev gretest up - # Check that link-local address generation works when re-enabled while - # the device is already up + # Check the IPv6 device configuration when link-local address + # generation is re-enabled while the device is already up ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" - check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" + check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" ip -netns "${NS0}" link del dev gretest } @@ -126,7 +133,7 @@ test_gre4() local MODE for GRE_TYPE in "gre" "gretap"; do - printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n" for MODE in "eui64" "none" "stable-privacy" "random"; do test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}" @@ -142,7 +149,7 @@ test_gre6() local MODE for GRE_TYPE in "ip6gre" "ip6gretap"; do - printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n" for MODE in "eui64" "none" "stable-privacy" "random"; do test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}" diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 006fdadcc4b9..86a216e9aca8 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -312,7 +312,7 @@ log_test_result() local test_name=$1; shift local opt_str=$1; shift local result=$1; shift - local retmsg=$1; shift + local retmsg=$1 printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result" if [[ $retmsg ]]; then diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index e47788bfa671..4c7e51336ab2 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -4,7 +4,8 @@ top_srcdir = ../../../../.. CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) -TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ +TEST_PROGS := mptcp_connect.sh mptcp_connect_mmap.sh mptcp_connect_sendfile.sh \ + mptcp_connect_checksum.sh pm_netlink.sh mptcp_join.sh diag.sh \ simult_flows.sh mptcp_sockopt.sh userspace_pm.sh TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq mptcp_diag diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh new file mode 100755 index 000000000000..ce93ec2f107f --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \ + "$(dirname "${0}")/mptcp_connect.sh" -C "${@}" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh new file mode 100755 index 000000000000..5dd30f9394af --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \ + "$(dirname "${0}")/mptcp_connect.sh" -m mmap "${@}" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh new file mode 100755 index 000000000000..1d16fb1cc9bb --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \ + "$(dirname "${0}")/mptcp_connect.sh" -m sendfile "${@}" diff --git a/tools/testing/selftests/net/netfilter/.gitignore b/tools/testing/selftests/net/netfilter/.gitignore index 64c4f8d9aa6c..5d2be9a00627 100644 --- a/tools/testing/selftests/net/netfilter/.gitignore +++ b/tools/testing/selftests/net/netfilter/.gitignore @@ -5,3 +5,4 @@ conntrack_dump_flush conntrack_reverse_clash sctp_collision nf_queue +udpclash diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile index e9b2f553588d..a98ed892f55f 100644 --- a/tools/testing/selftests/net/netfilter/Makefile +++ b/tools/testing/selftests/net/netfilter/Makefile @@ -15,6 +15,7 @@ TEST_PROGS += conntrack_tcp_unreplied.sh TEST_PROGS += conntrack_resize.sh TEST_PROGS += conntrack_sctp_collision.sh TEST_PROGS += conntrack_vrf.sh +TEST_PROGS += conntrack_clash.sh TEST_PROGS += conntrack_reverse_clash.sh TEST_PROGS += ipvs.sh TEST_PROGS += nf_conntrack_packetdrill.sh @@ -44,6 +45,7 @@ TEST_GEN_FILES += connect_close nf_queue TEST_GEN_FILES += conntrack_dump_flush TEST_GEN_FILES += conntrack_reverse_clash TEST_GEN_FILES += sctp_collision +TEST_GEN_FILES += udpclash include ../../lib.mk @@ -52,6 +54,7 @@ $(OUTPUT)/nf_queue: LDLIBS += $(MNL_LDLIBS) $(OUTPUT)/conntrack_dump_flush: CFLAGS += $(MNL_CFLAGS) $(OUTPUT)/conntrack_dump_flush: LDLIBS += $(MNL_LDLIBS) +$(OUTPUT)/udpclash: LDLIBS += -lpthread TEST_FILES := lib.sh TEST_FILES += packetdrill diff --git a/tools/testing/selftests/net/netfilter/conntrack_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_clash.sh new file mode 100755 index 000000000000..606a43a60f73 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_clash.sh @@ -0,0 +1,174 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +clash_resolution_active=0 +dport=22111 +ret=0 + +cleanup() +{ + # netns cleanup also zaps any remaining socat echo server. + cleanup_all_ns +} + +checktool "nft --version" "run test without nft" +checktool "conntrack --version" "run test without conntrack" +checktool "socat -h" "run test without socat" + +trap cleanup EXIT + +setup_ns nsclient1 nsclient2 nsrouter + +ip netns exec "$nsrouter" nft -f -<<EOF +table ip t { + chain lb { + meta l4proto udp dnat to numgen random mod 3 map { 0 : 10.0.2.1 . 9000, 1 : 10.0.2.1 . 9001, 2 : 10.0.2.1 . 9002 } + } + + chain prerouting { + type nat hook prerouting priority dstnat + + udp dport $dport counter jump lb + } + + chain output { + type nat hook output priority dstnat + + udp dport $dport counter jump lb + } +} +EOF + +load_simple_ruleset() +{ +ip netns exec "$1" nft -f -<<EOF +table ip t { + chain forward { + type filter hook forward priority 0 + + ct state new counter + } +} +EOF +} + +spawn_servers() +{ + local ns="$1" + local ports="9000 9001 9002" + + for port in $ports; do + ip netns exec "$ns" socat UDP-RECVFROM:$port,fork PIPE 2>/dev/null & + done + + for port in $ports; do + wait_local_port_listen "$ns" $port udp + done +} + +add_addr() +{ + local ns="$1" + local dev="$2" + local i="$3" + local j="$4" + + ip -net "$ns" link set "$dev" up + ip -net "$ns" addr add "10.0.$i.$j/24" dev "$dev" +} + +ping_test() +{ + local ns="$1" + local daddr="$2" + + if ! ip netns exec "$ns" ping -q -c 1 $daddr > /dev/null;then + echo "FAIL: ping from $ns to $daddr" + exit 1 + fi +} + +run_one_clash_test() +{ + local ns="$1" + local ctns="$2" + local daddr="$3" + local dport="$4" + local entries + local cre + + if ! ip netns exec "$ns" ./udpclash $daddr $dport;then + echo "INFO: did not receive expected number of replies for $daddr:$dport" + ip netns exec "$ctns" conntrack -S + # don't fail: check if clash resolution triggered after all. + fi + + entries=$(ip netns exec "$ctns" conntrack -S | wc -l) + cre=$(ip netns exec "$ctns" conntrack -S | grep "clash_resolve=0" | wc -l) + + if [ "$cre" -ne "$entries" ];then + clash_resolution_active=1 + return 0 + fi + + # not a failure: clash resolution logic did not trigger. + # With right timing, xmit completed sequentially and + # no parallel insertion occurs. + return $ksft_skip +} + +run_clash_test() +{ + local ns="$1" + local ctns="$2" + local daddr="$3" + local dport="$4" + local softerr=0 + + for i in $(seq 1 10);do + run_one_clash_test "$ns" "$ctns" "$daddr" "$dport" + local rv=$? + if [ $rv -eq 0 ];then + echo "PASS: clash resolution test for $daddr:$dport on attempt $i" + return 0 + elif [ $rv -eq $ksft_skip ]; then + softerr=1 + fi + done + + [ $softerr -eq 1 ] && echo "SKIP: clash resolution for $daddr:$dport did not trigger" +} + +ip link add veth0 netns "$nsclient1" type veth peer name veth0 netns "$nsrouter" +ip link add veth0 netns "$nsclient2" type veth peer name veth1 netns "$nsrouter" +add_addr "$nsclient1" veth0 1 1 +add_addr "$nsclient2" veth0 2 1 +add_addr "$nsrouter" veth0 1 99 +add_addr "$nsrouter" veth1 2 99 + +ip -net "$nsclient1" route add default via 10.0.1.99 +ip -net "$nsclient2" route add default via 10.0.2.99 +ip netns exec "$nsrouter" sysctl -q net.ipv4.ip_forward=1 + +ping_test "$nsclient1" 10.0.1.99 +ping_test "$nsclient1" 10.0.2.1 +ping_test "$nsclient2" 10.0.1.1 + +spawn_servers "$nsclient2" + +# exercise clash resolution with nat: +# nsrouter is supposed to dnat to 10.0.2.1:900{0,1,2,3}. +run_clash_test "$nsclient1" "$nsrouter" 10.0.1.99 "$dport" + +# exercise clash resolution without nat. +load_simple_ruleset "$nsclient2" +run_clash_test "$nsclient2" "$nsclient2" 127.0.0.1 9001 + +if [ $clash_resolution_active -eq 0 ];then + [ "$ret" -eq 0 ] && ret=$ksft_skip + echo "SKIP: Clash resolution did not trigger" +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh index 9e033e80219e..788cd56ea4a0 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_resize.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh @@ -12,6 +12,9 @@ tmpfile="" tmpfile_proc="" tmpfile_uniq="" ret=0 +have_socat=0 + +socat -h > /dev/null && have_socat=1 insert_count=2000 [ "$KSFT_MACHINE_SLOW" = "yes" ] && insert_count=400 @@ -123,7 +126,7 @@ ctflush() { done } -ctflood() +ct_pingflood() { local ns="$1" local duration="$2" @@ -152,6 +155,44 @@ ctflood() wait } +ct_udpflood() +{ + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + [ $have_socat -ne "1" ] && return + + while [ $now -lt $end ]; do +ip netns exec "$ns" bash<<"EOF" + for i in $(seq 1 100);do + dport=$(((RANDOM%65536)+1)) + + echo bar | socat -u STDIN UDP:"127.0.0.1:$dport" & + done > /dev/null 2>&1 + wait +EOF + now=$(date +%s) + done +} + +ct_udpclash() +{ + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + [ -x udpclash ] || return + + while [ $now -lt $end ]; do + ip netns exec "$ns" ./udpclash 127.0.0.1 $((RANDOM%65536)) > /dev/null 2>&1 + + now=$(date +%s) + done +} + # dump to /dev/null. We don't want dumps to cause infinite loops # or use-after-free even when conntrack table is altered while dumps # are in progress. @@ -169,6 +210,48 @@ ct_nulldump() wait } +ct_nulldump_loop() +{ + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + while [ $now -lt $end ]; do + ct_nulldump "$ns" + sleep $((RANDOM%2)) + now=$(date +%s) + done +} + +change_timeouts() +{ + local ns="$1" + local r1=$((RANDOM%2)) + local r2=$((RANDOM%2)) + + [ "$r1" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=$((RANDOM%5)) + [ "$r2" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=$((RANDOM%5)) +} + +ct_change_timeouts_loop() +{ + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + while [ $now -lt $end ]; do + change_timeouts "$ns" + sleep $((RANDOM%2)) + now=$(date +%s) + done + + # restore defaults + ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=30 + ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=30 +} + check_taint() { local tainted_then="$1" @@ -198,10 +281,14 @@ insert_flood() r=$((RANDOM%$insert_count)) - ctflood "$n" "$timeout" "floodresize" & + ct_pingflood "$n" "$timeout" "floodresize" & + ct_udpflood "$n" "$timeout" & + ct_udpclash "$n" "$timeout" & + insert_ctnetlink "$n" "$r" & ctflush "$n" "$timeout" & - ct_nulldump "$n" & + ct_nulldump_loop "$n" "$timeout" & + ct_change_timeouts_loop "$n" "$timeout" & wait } @@ -306,7 +393,7 @@ test_dump_all() ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=3600 - ctflood "$nsclient1" $timeout "dumpall" & + ct_pingflood "$nsclient1" $timeout "dumpall" & insert_ctnetlink "$nsclient2" $insert_count wait @@ -368,7 +455,7 @@ test_conntrack_disable() ct_flush_once "$nsclient1" ct_flush_once "$nsclient2" - ctflood "$nsclient1" "$timeout" "conntrack disable" + ct_pingflood "$nsclient1" "$timeout" "conntrack disable" ip netns exec "$nsclient2" ping -q -c 1 127.0.0.1 >/dev/null 2>&1 # Disabled, should not have picked up any connection. diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh index cd12b8b5ac0e..20e76b395c85 100755 --- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh @@ -1311,6 +1311,9 @@ maybe_send_match() { # - remove some elements, check that packets don't match anymore test_correctness_main() { range_size=1 + + send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1 + for i in $(seq "${start}" $((start + count))); do local elem="" diff --git a/tools/testing/selftests/net/netfilter/udpclash.c b/tools/testing/selftests/net/netfilter/udpclash.c new file mode 100644 index 000000000000..85c7b906ad08 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/udpclash.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Usage: ./udpclash <IP> <PORT> + * + * Emit THREAD_COUNT UDP packets sharing the same saddr:daddr pair. + * + * This mimics DNS resolver libraries that emit A and AAAA requests + * in parallel. + * + * This exercises conntrack clash resolution logic added and later + * refined in + * + * 71d8c47fc653 ("netfilter: conntrack: introduce clash resolution on insertion race") + * ed07d9a021df ("netfilter: nf_conntrack: resolve clash for matching conntracks") + * 6a757c07e51f ("netfilter: conntrack: allow insertion of clashing entries") + */ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <sys/socket.h> +#include <pthread.h> + +#define THREAD_COUNT 128 + +struct thread_args { + const struct sockaddr_in *si_remote; + int sockfd; +}; + +static int wait = 1; + +static void *thread_main(void *varg) +{ + const struct sockaddr_in *si_remote; + const struct thread_args *args = varg; + static const char msg[] = "foo"; + + si_remote = args->si_remote; + + while (wait == 1) + ; + + if (sendto(args->sockfd, msg, strlen(msg), MSG_NOSIGNAL, + (struct sockaddr *)si_remote, sizeof(*si_remote)) < 0) + exit(111); + + return varg; +} + +static int run_test(int fd, const struct sockaddr_in *si_remote) +{ + struct thread_args thread_args = { + .si_remote = si_remote, + .sockfd = fd, + }; + pthread_t *tid = calloc(THREAD_COUNT, sizeof(pthread_t)); + unsigned int repl_count = 0, timeout = 0; + int i; + + if (!tid) { + perror("calloc"); + return 1; + } + + for (i = 0; i < THREAD_COUNT; i++) { + int err = pthread_create(&tid[i], NULL, &thread_main, &thread_args); + + if (err != 0) { + perror("pthread_create"); + exit(1); + } + } + + wait = 0; + + for (i = 0; i < THREAD_COUNT; i++) + pthread_join(tid[i], NULL); + + while (repl_count < THREAD_COUNT) { + struct sockaddr_in si_repl; + socklen_t si_repl_len = sizeof(si_repl); + char repl[512]; + ssize_t ret; + + ret = recvfrom(fd, repl, sizeof(repl), MSG_NOSIGNAL, + (struct sockaddr *) &si_repl, &si_repl_len); + if (ret < 0) { + if (timeout++ > 5000) { + fputs("timed out while waiting for reply from thread\n", stderr); + break; + } + + /* give reply time to pass though the stack */ + usleep(1000); + continue; + } + + if (si_repl_len != sizeof(*si_remote)) { + fprintf(stderr, "warning: reply has unexpected repl_len %d vs %d\n", + (int)si_repl_len, (int)sizeof(si_repl)); + } else if (si_remote->sin_addr.s_addr != si_repl.sin_addr.s_addr || + si_remote->sin_port != si_repl.sin_port) { + char a[64], b[64]; + + inet_ntop(AF_INET, &si_remote->sin_addr, a, sizeof(a)); + inet_ntop(AF_INET, &si_repl.sin_addr, b, sizeof(b)); + + fprintf(stderr, "reply from wrong source: want %s:%d got %s:%d\n", + a, ntohs(si_remote->sin_port), b, ntohs(si_repl.sin_port)); + } + + repl_count++; + } + + printf("got %d of %d replies\n", repl_count, THREAD_COUNT); + + free(tid); + + return repl_count == THREAD_COUNT ? 0 : 1; +} + +int main(int argc, char *argv[]) +{ + struct sockaddr_in si_local = { + .sin_family = AF_INET, + }; + struct sockaddr_in si_remote = { + .sin_family = AF_INET, + }; + int fd, ret; + + if (argc < 3) { + fputs("Usage: send_udp <daddr> <dport>\n", stderr); + return 1; + } + + si_remote.sin_port = htons(atoi(argv[2])); + si_remote.sin_addr.s_addr = inet_addr(argv[1]); + + fd = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, IPPROTO_UDP); + if (fd < 0) { + perror("socket"); + return 1; + } + + if (bind(fd, (struct sockaddr *)&si_local, sizeof(si_local)) < 0) { + perror("bind"); + return 1; + } + + ret = run_test(fd, &si_remote); + + close(fd); + + return ret; +} diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt new file mode 100644 index 000000000000..09aabc775e80 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"` + +// Test that a not-yet-accepted socket does not change +// its initial sk_rcvbuf (tcp_rmem[1]) when receiving ooo packets. + + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10> + +.1 < . 1:1(0) ack 1 win 257 + +0 < . 2001:41001(39000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:41001> + +0 < . 41001:101001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:101001> + +0 < . 1:1001(1000) ack 1 win 257 + +0 > . 1:1(0) ack 1001 <nop,nop,sack 2001:101001> + +0 < . 1001:2001(1000) ack 1 win 257 + +0 > . 1:1(0) ack 101001 + + +0 accept(3, ..., ...) = 4 + + +0 %{ assert SK_MEMINFO_RCVBUF == 131072, SK_MEMINFO_RCVBUF }% + + +0 close(4) = 0 + +0 close(3) = 0 + +// Test that ooo packets for accepted sockets do increase sk_rcvbuf + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10> + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < . 2001:41001(39000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:41001> + +0 < . 41001:101001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:101001> + + +0 %{ assert SK_MEMINFO_RCVBUF > 131072, SK_MEMINFO_RCVBUF }% + diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 1dc337c709f8..b17e032a6d75 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -48,7 +48,7 @@ run_one() { cfg_veth - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${rx_args} & local PID1=$! wait_local_port_listen ${PEER_NS} 8000 udp @@ -95,7 +95,7 @@ run_one_nat() { # will land on the 'plain' one ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 & local PID1=$! - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${family} -b ${addr2%/*} ${rx_args} & local PID2=$! wait_local_port_listen "${PEER_NS}" 8000 udp @@ -117,9 +117,9 @@ run_one_2sock() { cfg_veth - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${rx_args} -p 12345 & local PID1=$! - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 100 ${rx_args} & local PID2=$! wait_local_port_listen "${PEER_NS}" 12345 udp diff --git a/tools/testing/selftests/net/vlan_hw_filter.sh b/tools/testing/selftests/net/vlan_hw_filter.sh index 7bc804ffaf7c..0fb56baf28e4 100755 --- a/tools/testing/selftests/net/vlan_hw_filter.sh +++ b/tools/testing/selftests/net/vlan_hw_filter.sh @@ -3,27 +3,101 @@ readonly NETNS="ns-$(mktemp -u XXXXXX)" +ALL_TESTS=" + test_vlan_filter_check + test_vlan0_del_crash_01 + test_vlan0_del_crash_02 + test_vlan0_del_crash_03 + test_vid0_memleak +" + ret=0 +setup() { + ip netns add ${NETNS} +} + cleanup() { - ip netns del $NETNS + ip netns del $NETNS 2>/dev/null } trap cleanup EXIT fail() { - echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 - ret=1 + echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 + ret=1 +} + +tests_run() +{ + local current_test + for current_test in ${TESTS:-$ALL_TESTS}; do + $current_test + done +} + +test_vlan_filter_check() { + setup + ip netns exec ${NETNS} ip link add bond0 type bond mode 0 + ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2 + ip netns exec ${NETNS} ip link set bond_slave_1 master bond0 + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0 + ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0 + ip netns exec ${NETNS} ip link set bond_slave_1 nomaster + ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function" + cleanup } -ip netns add ${NETNS} -ip netns exec ${NETNS} ip link add bond0 type bond mode 0 -ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2 -ip netns exec ${NETNS} ip link set bond_slave_1 master bond0 -ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off -ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0 -ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0 -ip netns exec ${NETNS} ip link set bond_slave_1 nomaster -ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function" +#enable vlan_filter feature of real_dev with vlan0 during running time +test_vlan0_del_crash_01() { + setup + ip netns exec ${NETNS} ip link add bond0 type bond mode 0 + ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on + ip netns exec ${NETNS} ifconfig bond0 down + ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function" + cleanup +} + +#enable vlan_filter feature and add vlan0 for real_dev during running time +test_vlan0_del_crash_02() { + setup + ip netns exec ${NETNS} ip link add bond0 type bond mode 0 + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on + ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q + ip netns exec ${NETNS} ifconfig bond0 down + ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function" + cleanup +} + +#enable vlan_filter feature of real_dev during running time +#test kernel_bug of vlan unregister +test_vlan0_del_crash_03() { + setup + ip netns exec ${NETNS} ip link add bond0 type bond mode 0 + ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on + ip netns exec ${NETNS} ifconfig bond0 down + ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function" + cleanup +} + +test_vid0_memleak() { + setup + ip netns exec ${NETNS} ip link add bond0 up type bond mode 0 + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ip link del dev bond0 || fail "Please check vlan HW filter function" + cleanup +} +tests_run exit $ret diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore index 0406a065deb4..144e7ff65d6a 100644 --- a/tools/testing/selftests/pidfd/.gitignore +++ b/tools/testing/selftests/pidfd/.gitignore @@ -10,3 +10,5 @@ pidfd_file_handle_test pidfd_bind_mount pidfd_info_test pidfd_exec_helper +pidfd_xattr_test +pidfd_setattr_test diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile index fcbefc0d77f6..764a8f9ecefa 100644 --- a/tools/testing/selftests/pidfd/Makefile +++ b/tools/testing/selftests/pidfd/Makefile @@ -1,9 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -g $(KHDR_INCLUDES) -pthread -Wall +CFLAGS += -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) -pthread -Wall TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \ pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test \ - pidfd_file_handle_test pidfd_bind_mount pidfd_info_test + pidfd_file_handle_test pidfd_bind_mount pidfd_info_test \ + pidfd_xattr_test pidfd_setattr_test TEST_GEN_PROGS_EXTENDED := pidfd_exec_helper diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index efd74063126e..cd244d0860ff 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -19,6 +19,10 @@ #include "../kselftest.h" #include "../clone3/clone3_selftests.h" +#ifndef FD_PIDFS_ROOT +#define FD_PIDFS_ROOT -10002 +#endif + #ifndef P_PIDFD #define P_PIDFD 3 #endif @@ -56,7 +60,7 @@ #endif #ifndef PIDFD_SELF_THREAD_GROUP -#define PIDFD_SELF_THREAD_GROUP -20000 /* Current thread group leader. */ +#define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */ #endif #ifndef PIDFD_SELF diff --git a/tools/testing/selftests/pidfd/pidfd_file_handle_test.c b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c index 439b9c6c0457..6bd2e9c9565b 100644 --- a/tools/testing/selftests/pidfd/pidfd_file_handle_test.c +++ b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c @@ -500,4 +500,64 @@ TEST_F(file_handle, valid_name_to_handle_at_flags) ASSERT_EQ(close(pidfd), 0); } +/* + * That we decode a file handle without having to pass a pidfd. + */ +TEST_F(file_handle, decode_purely_based_on_file_handle) +{ + int mnt_id; + struct file_handle *fh; + int pidfd = -EBADF; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd1, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd1, &st1), 0); + + pidfd = open_by_handle_at(FD_PIDFS_ROOT, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(FD_PIDFS_ROOT, fh, O_CLOEXEC); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(FD_PIDFS_ROOT, fh, O_NONBLOCK); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(-EBADF, fh, 0); + ASSERT_LT(pidfd, 0); + + pidfd = open_by_handle_at(AT_FDCWD, fh, 0); + ASSERT_LT(pidfd, 0); + + free(fh); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/pidfd/pidfd_setattr_test.c b/tools/testing/selftests/pidfd/pidfd_setattr_test.c new file mode 100644 index 000000000000..d7de05edc4b3 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_setattr_test.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <linux/types.h> +#include <poll.h> +#include <pthread.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syscall.h> +#include <sys/prctl.h> +#include <sys/wait.h> +#include <unistd.h> +#include <sys/socket.h> +#include <linux/kcmp.h> +#include <sys/stat.h> +#include <sys/xattr.h> + +#include "pidfd.h" +#include "../kselftest_harness.h" + +FIXTURE(pidfs_setattr) +{ + pid_t child_pid; + int child_pidfd; +}; + +FIXTURE_SETUP(pidfs_setattr) +{ + self->child_pid = create_child(&self->child_pidfd, CLONE_NEWUSER | CLONE_NEWPID); + EXPECT_GE(self->child_pid, 0); + + if (self->child_pid == 0) + _exit(EXIT_SUCCESS); +} + +FIXTURE_TEARDOWN(pidfs_setattr) +{ + sys_waitid(P_PID, self->child_pid, NULL, WEXITED); + EXPECT_EQ(close(self->child_pidfd), 0); +} + +TEST_F(pidfs_setattr, no_chown) +{ + ASSERT_LT(fchown(self->child_pidfd, 1234, 5678), 0); + ASSERT_EQ(errno, EOPNOTSUPP); +} + +TEST_F(pidfs_setattr, no_chmod) +{ + ASSERT_LT(fchmod(self->child_pidfd, 0777), 0); + ASSERT_EQ(errno, EOPNOTSUPP); +} + +TEST_F(pidfs_setattr, no_exec) +{ + char *const argv[] = { NULL }; + char *const envp[] = { NULL }; + + ASSERT_LT(execveat(self->child_pidfd, "", argv, envp, AT_EMPTY_PATH), 0); + ASSERT_EQ(errno, EACCES); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/pidfd/pidfd_xattr_test.c b/tools/testing/selftests/pidfd/pidfd_xattr_test.c new file mode 100644 index 000000000000..5cf7bb0e4bf2 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_xattr_test.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <linux/types.h> +#include <poll.h> +#include <pthread.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syscall.h> +#include <sys/prctl.h> +#include <sys/wait.h> +#include <unistd.h> +#include <sys/socket.h> +#include <linux/kcmp.h> +#include <sys/stat.h> +#include <sys/xattr.h> + +#include "pidfd.h" +#include "../kselftest_harness.h" + +FIXTURE(pidfs_xattr) +{ + pid_t child_pid; + int child_pidfd; +}; + +FIXTURE_SETUP(pidfs_xattr) +{ + self->child_pid = create_child(&self->child_pidfd, CLONE_NEWUSER | CLONE_NEWPID); + EXPECT_GE(self->child_pid, 0); + + if (self->child_pid == 0) + _exit(EXIT_SUCCESS); +} + +FIXTURE_TEARDOWN(pidfs_xattr) +{ + sys_waitid(P_PID, self->child_pid, NULL, WEXITED); +} + +TEST_F(pidfs_xattr, set_get_list_xattr_multiple) +{ + int ret, i; + char xattr_name[32]; + char xattr_value[32]; + char buf[32]; + const int num_xattrs = 10; + char list[PATH_MAX] = {}; + + for (i = 0; i < num_xattrs; i++) { + snprintf(xattr_name, sizeof(xattr_name), "trusted.testattr%d", i); + snprintf(xattr_value, sizeof(xattr_value), "testvalue%d", i); + ret = fsetxattr(self->child_pidfd, xattr_name, xattr_value, strlen(xattr_value), 0); + ASSERT_EQ(ret, 0); + } + + for (i = 0; i < num_xattrs; i++) { + snprintf(xattr_name, sizeof(xattr_name), "trusted.testattr%d", i); + snprintf(xattr_value, sizeof(xattr_value), "testvalue%d", i); + memset(buf, 0, sizeof(buf)); + ret = fgetxattr(self->child_pidfd, xattr_name, buf, sizeof(buf)); + ASSERT_EQ(ret, strlen(xattr_value)); + ASSERT_EQ(strcmp(buf, xattr_value), 0); + } + + ret = flistxattr(self->child_pidfd, list, sizeof(list)); + ASSERT_GT(ret, 0); + for (i = 0; i < num_xattrs; i++) { + snprintf(xattr_name, sizeof(xattr_name), "trusted.testattr%d", i); + bool found = false; + for (char *it = list; it < list + ret; it += strlen(it) + 1) { + if (strcmp(it, xattr_name)) + continue; + found = true; + break; + } + ASSERT_TRUE(found); + } + + for (i = 0; i < num_xattrs; i++) { + snprintf(xattr_name, sizeof(xattr_name), "trusted.testattr%d", i); + ret = fremovexattr(self->child_pidfd, xattr_name); + ASSERT_EQ(ret, 0); + + ret = fgetxattr(self->child_pidfd, xattr_name, buf, sizeof(buf)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENODATA); + } +} + +TEST_F(pidfs_xattr, set_get_list_xattr_persistent) +{ + int ret; + char buf[32]; + char list[PATH_MAX] = {}; + + ret = fsetxattr(self->child_pidfd, "trusted.persistent", "persistent value", strlen("persistent value"), 0); + ASSERT_EQ(ret, 0); + + memset(buf, 0, sizeof(buf)); + ret = fgetxattr(self->child_pidfd, "trusted.persistent", buf, sizeof(buf)); + ASSERT_EQ(ret, strlen("persistent value")); + ASSERT_EQ(strcmp(buf, "persistent value"), 0); + + ret = flistxattr(self->child_pidfd, list, sizeof(list)); + ASSERT_GT(ret, 0); + ASSERT_EQ(strcmp(list, "trusted.persistent"), 0) + + ASSERT_EQ(close(self->child_pidfd), 0); + self->child_pidfd = -EBADF; + sleep(2); + + self->child_pidfd = sys_pidfd_open(self->child_pid, 0); + ASSERT_GE(self->child_pidfd, 0); + + memset(buf, 0, sizeof(buf)); + ret = fgetxattr(self->child_pidfd, "trusted.persistent", buf, sizeof(buf)); + ASSERT_EQ(ret, strlen("persistent value")); + ASSERT_EQ(strcmp(buf, "persistent value"), 0); + + ret = flistxattr(self->child_pidfd, list, sizeof(list)); + ASSERT_GT(ret, 0); + ASSERT_EQ(strcmp(list, "trusted.persistent"), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/sched_ext/exit.c b/tools/testing/selftests/sched_ext/exit.c index 9451782689de..ee25824b1cbe 100644 --- a/tools/testing/selftests/sched_ext/exit.c +++ b/tools/testing/selftests/sched_ext/exit.c @@ -22,6 +22,14 @@ static enum scx_test_status run(void *ctx) struct bpf_link *link; char buf[16]; + /* + * On single-CPU systems, ops.select_cpu() is never + * invoked, so skip this test to avoid getting stuck + * indefinitely. + */ + if (tc == EXIT_SELECT_CPU && libbpf_num_possible_cpus() == 1) + continue; + skel = exit__open(); SCX_ENUM_INIT(skel); skel->rodata->exit_point = tc; diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 9aa44d8176d9..c6db7fa94f55 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -128,6 +128,32 @@ ] }, { + "id": "5456", + "name": "Test htb_dequeue_tree with deactivation and row emptying", + "category": [ + "qdisc", + "htb" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: htb default 1", + "$TC class add dev $DUMMY parent 1: classid 1:1 htb rate 64bit ", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2: netem", + "$TC qdisc add dev $DUMMY parent 2:1 handle 3: blackhole" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.11.11", + "expExitCode": "1", + "verifyCmd": "$TC -j qdisc show dev $DUMMY", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY root" + ] + }, + { "id": "c024", "name": "Test TBF with SKBPRIO - catch qlen corner cases", "category": [ @@ -635,5 +661,108 @@ "$TC qdisc del dev $DUMMY handle 1:0 root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "d74b", + "name": "Test use-after-free with DRR/NETEM/BLACKHOLE chain", + "category": [ + "qdisc", + "hfsc", + "drr", + "netem", + "blackhole" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: drr", + "$TC filter add dev $DUMMY parent 1: basic classid 1:1", + "$TC class add dev $DUMMY parent 1: classid 1:1 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2: hfsc def 1", + "$TC class add dev $DUMMY parent 2: classid 2:1 hfsc rt m1 8 d 1 m2 0", + "$TC qdisc add dev $DUMMY parent 2:1 handle 3: netem", + "$TC qdisc add dev $DUMMY parent 3:1 handle 4: blackhole", + "ping -c1 -W0.01 -I $DUMMY 10.10.11.11 || true", + "$TC class del dev $DUMMY classid 1:1" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.11.11", + "expExitCode": "1", + "verifyCmd": "$TC -j class ls dev $DUMMY classid 1:1", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1: drr" + ] + }, + { + "id": "be28", + "name": "Try to add fq_codel qdisc as a child of an hhf qdisc", + "category": [ + "qdisc", + "fq_codel", + "hhf" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle a: hhf" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent a: handle b: fq_codel", + "expExitCode": "2", + "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle b:", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY root" + ] + }, + { + "id": "fcb5", + "name": "Try to add pie qdisc as a child of a drr qdisc", + "category": [ + "qdisc", + "pie", + "drr" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle a: drr" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent a: handle b: pie", + "expExitCode": "2", + "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle b:", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY root" + ] + }, + { + "id": "7801", + "name": "Try to add fq qdisc as a child of an inexistent hfsc class", + "category": [ + "qdisc", + "sfq", + "hfsc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle a: hfsc" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent a:fff2 sfq limit 4", + "expExitCode": "2", + "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle b:", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY root" + ] } ] diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c index 6e60f7d97125..b227bd78b252 100644 --- a/tools/testing/selftests/ublk/fault_inject.c +++ b/tools/testing/selftests/ublk/fault_inject.c @@ -38,7 +38,8 @@ static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx, return 0; } -static int ublk_fault_inject_queue_io(struct ublk_queue *q, int tag) +static int ublk_fault_inject_queue_io(struct ublk_thread *t, + struct ublk_queue *q, int tag) { const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); struct io_uring_sqe *sqe; @@ -46,25 +47,27 @@ static int ublk_fault_inject_queue_io(struct ublk_queue *q, int tag) .tv_nsec = (long long)q->dev->private_data, }; - ublk_io_alloc_sqes(ublk_get_io(q, tag), &sqe, 1); + ublk_io_alloc_sqes(t, &sqe, 1); io_uring_prep_timeout(sqe, &ts, 1, 0); sqe->user_data = build_user_data(tag, ublksrv_get_op(iod), 0, q->q_id, 1); - ublk_queued_tgt_io(q, tag, 1); + ublk_queued_tgt_io(t, q, tag, 1); return 0; } -static void ublk_fault_inject_tgt_io_done(struct ublk_queue *q, int tag, +static void ublk_fault_inject_tgt_io_done(struct ublk_thread *t, + struct ublk_queue *q, const struct io_uring_cqe *cqe) { + unsigned tag = user_data_to_tag(cqe->user_data); const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); if (cqe->res != -ETIME) ublk_err("%s: unexpected cqe res %d\n", __func__, cqe->res); - if (ublk_completed_tgt_io(q, tag)) - ublk_complete_io(q, tag, iod->nr_sectors << 9); + if (ublk_completed_tgt_io(t, q, tag)) + ublk_complete_io(t, q, tag, iod->nr_sectors << 9); else ublk_err("%s: io not complete after 1 cqe\n", __func__); } diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c index cfa59b631693..2d93ac860bd5 100644 --- a/tools/testing/selftests/ublk/file_backed.c +++ b/tools/testing/selftests/ublk/file_backed.c @@ -13,12 +13,13 @@ static enum io_uring_op ublk_to_uring_op(const struct ublksrv_io_desc *iod, int assert(0); } -static int loop_queue_flush_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) +static int loop_queue_flush_io(struct ublk_thread *t, struct ublk_queue *q, + const struct ublksrv_io_desc *iod, int tag) { unsigned ublk_op = ublksrv_get_op(iod); struct io_uring_sqe *sqe[1]; - ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 1); + ublk_io_alloc_sqes(t, sqe, 1); io_uring_prep_fsync(sqe[0], 1 /*fds[1]*/, IORING_FSYNC_DATASYNC); io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE); /* bit63 marks us as tgt io */ @@ -26,7 +27,8 @@ static int loop_queue_flush_io(struct ublk_queue *q, const struct ublksrv_io_des return 1; } -static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) +static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, + const struct ublksrv_io_desc *iod, int tag) { unsigned ublk_op = ublksrv_get_op(iod); unsigned zc = ublk_queue_use_zc(q); @@ -36,7 +38,7 @@ static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_de void *addr = (zc | auto_zc) ? NULL : (void *)iod->addr; if (!zc || auto_zc) { - ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 1); + ublk_io_alloc_sqes(t, sqe, 1); if (!sqe[0]) return -ENOMEM; @@ -52,7 +54,7 @@ static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_de return 1; } - ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 3); + ublk_io_alloc_sqes(t, sqe, 3); io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index); sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; @@ -72,7 +74,7 @@ static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_de return 2; } -static int loop_queue_tgt_io(struct ublk_queue *q, int tag) +static int loop_queue_tgt_io(struct ublk_thread *t, struct ublk_queue *q, int tag) { const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); unsigned ublk_op = ublksrv_get_op(iod); @@ -80,7 +82,7 @@ static int loop_queue_tgt_io(struct ublk_queue *q, int tag) switch (ublk_op) { case UBLK_IO_OP_FLUSH: - ret = loop_queue_flush_io(q, iod, tag); + ret = loop_queue_flush_io(t, q, iod, tag); break; case UBLK_IO_OP_WRITE_ZEROES: case UBLK_IO_OP_DISCARD: @@ -88,7 +90,7 @@ static int loop_queue_tgt_io(struct ublk_queue *q, int tag) break; case UBLK_IO_OP_READ: case UBLK_IO_OP_WRITE: - ret = loop_queue_tgt_rw_io(q, iod, tag); + ret = loop_queue_tgt_rw_io(t, q, iod, tag); break; default: ret = -EINVAL; @@ -100,17 +102,19 @@ static int loop_queue_tgt_io(struct ublk_queue *q, int tag) return ret; } -static int ublk_loop_queue_io(struct ublk_queue *q, int tag) +static int ublk_loop_queue_io(struct ublk_thread *t, struct ublk_queue *q, + int tag) { - int queued = loop_queue_tgt_io(q, tag); + int queued = loop_queue_tgt_io(t, q, tag); - ublk_queued_tgt_io(q, tag, queued); + ublk_queued_tgt_io(t, q, tag, queued); return 0; } -static void ublk_loop_io_done(struct ublk_queue *q, int tag, +static void ublk_loop_io_done(struct ublk_thread *t, struct ublk_queue *q, const struct io_uring_cqe *cqe) { + unsigned tag = user_data_to_tag(cqe->user_data); unsigned op = user_data_to_op(cqe->user_data); struct ublk_io *io = ublk_get_io(q, tag); @@ -126,8 +130,8 @@ static void ublk_loop_io_done(struct ublk_queue *q, int tag, if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF)) io->tgt_ios += 1; - if (ublk_completed_tgt_io(q, tag)) - ublk_complete_io(q, tag, io->result); + if (ublk_completed_tgt_io(t, q, tag)) + ublk_complete_io(t, q, tag, io->result); } static int ublk_loop_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index e2d2042810d4..95188065b2e9 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -441,17 +441,10 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) unsigned long off; q->tgt_ops = dev->tgt.ops; - q->state = 0; + q->flags = 0; q->q_depth = depth; - - if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) { - q->state |= UBLKSRV_NO_BUF; - if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) - q->state |= UBLKSRV_ZC; - if (dev->dev_info.flags & UBLK_F_AUTO_BUF_REG) - q->state |= UBLKSRV_AUTO_BUF_REG; - } - q->state |= extra_flags; + q->flags = dev->dev_info.flags; + q->flags |= extra_flags; cmd_buf_size = ublk_queue_cmd_buf_sz(q); off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz(); @@ -466,10 +459,10 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) io_buf_size = dev->dev_info.max_io_buf_bytes; for (i = 0; i < q->q_depth; i++) { q->ios[i].buf_addr = NULL; - q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE; + q->ios[i].flags = UBLKS_IO_NEED_FETCH_RQ | UBLKS_IO_FREE; q->ios[i].tag = i; - if (q->state & UBLKSRV_NO_BUF) + if (ublk_queue_no_buf(q)) continue; if (posix_memalign((void **)&q->ios[i].buf_addr, @@ -583,15 +576,14 @@ static void ublk_set_auto_buf_reg(const struct ublk_queue *q, else buf.index = q->ios[tag].buf_index; - if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK) + if (ublk_queue_auto_zc_fallback(q)) buf.flags = UBLK_AUTO_BUF_REG_FALLBACK; sqe->addr = ublk_auto_buf_reg_to_sqe_addr(&buf); } -int ublk_queue_io_cmd(struct ublk_io *io) +int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io) { - struct ublk_thread *t = io->t; struct ublk_queue *q = ublk_io_to_queue(io); struct ublksrv_io_cmd *cmd; struct io_uring_sqe *sqe[1]; @@ -599,7 +591,7 @@ int ublk_queue_io_cmd(struct ublk_io *io) __u64 user_data; /* only freed io can be issued */ - if (!(io->flags & UBLKSRV_IO_FREE)) + if (!(io->flags & UBLKS_IO_FREE)) return 0; /* @@ -607,20 +599,20 @@ int ublk_queue_io_cmd(struct ublk_io *io) * getting data */ if (!(io->flags & - (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_NEED_GET_DATA))) + (UBLKS_IO_NEED_FETCH_RQ | UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_NEED_GET_DATA))) return 0; - if (io->flags & UBLKSRV_NEED_GET_DATA) + if (io->flags & UBLKS_IO_NEED_GET_DATA) cmd_op = UBLK_U_IO_NEED_GET_DATA; - else if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP) + else if (io->flags & UBLKS_IO_NEED_COMMIT_RQ_COMP) cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ; - else if (io->flags & UBLKSRV_NEED_FETCH_RQ) + else if (io->flags & UBLKS_IO_NEED_FETCH_RQ) cmd_op = UBLK_U_IO_FETCH_REQ; if (io_uring_sq_space_left(&t->ring) < 1) io_uring_submit(&t->ring); - ublk_io_alloc_sqes(io, sqe, 1); + ublk_io_alloc_sqes(t, sqe, 1); if (!sqe[0]) { ublk_err("%s: run out of sqe. thread %u, tag %d\n", __func__, t->idx, io->tag); @@ -640,12 +632,12 @@ int ublk_queue_io_cmd(struct ublk_io *io) sqe[0]->rw_flags = 0; cmd->tag = io->tag; cmd->q_id = q->q_id; - if (!(q->state & UBLKSRV_NO_BUF)) + if (!ublk_queue_no_buf(q)) cmd->addr = (__u64) (uintptr_t) io->buf_addr; else cmd->addr = 0; - if (q->state & UBLKSRV_AUTO_BUF_REG) + if (ublk_queue_use_auto_zc(q)) ublk_set_auto_buf_reg(q, sqe[0], io->tag); user_data = build_user_data(io->tag, _IOC_NR(cmd_op), 0, q->q_id, 0); @@ -657,7 +649,7 @@ int ublk_queue_io_cmd(struct ublk_io *io) ublk_dbg(UBLK_DBG_IO_CMD, "%s: (thread %u qid %d tag %u cmd_op %u) iof %x stopping %d\n", __func__, t->idx, q->q_id, io->tag, cmd_op, - io->flags, !!(t->state & UBLKSRV_THREAD_STOPPING)); + io->flags, !!(t->state & UBLKS_T_STOPPING)); return 1; } @@ -685,9 +677,8 @@ static void ublk_submit_fetch_commands(struct ublk_thread *t) int tag = i % dinfo->queue_depth; q = &t->dev->q[q_id]; io = &q->ios[tag]; - io->t = t; io->buf_index = j++; - ublk_queue_io_cmd(io); + ublk_queue_io_cmd(t, io); } } else { /* @@ -697,9 +688,8 @@ static void ublk_submit_fetch_commands(struct ublk_thread *t) struct ublk_queue *q = &t->dev->q[t->idx]; for (i = 0; i < q->q_depth; i++) { io = &q->ios[i]; - io->t = t; io->buf_index = i; - ublk_queue_io_cmd(io); + ublk_queue_io_cmd(t, io); } } } @@ -711,14 +701,13 @@ static int ublk_thread_is_idle(struct ublk_thread *t) static int ublk_thread_is_done(struct ublk_thread *t) { - return (t->state & UBLKSRV_THREAD_STOPPING) && ublk_thread_is_idle(t); + return (t->state & UBLKS_T_STOPPING) && ublk_thread_is_idle(t); } -static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q, - struct io_uring_cqe *cqe) +static inline void ublksrv_handle_tgt_cqe(struct ublk_thread *t, + struct ublk_queue *q, + struct io_uring_cqe *cqe) { - unsigned tag = user_data_to_tag(cqe->user_data); - if (cqe->res < 0 && cqe->res != -EAGAIN) ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n", __func__, cqe->res, q->q_id, @@ -726,7 +715,41 @@ static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q, user_data_to_op(cqe->user_data)); if (q->tgt_ops->tgt_io_done) - q->tgt_ops->tgt_io_done(q, tag, cqe); + q->tgt_ops->tgt_io_done(t, q, cqe); +} + +static void ublk_handle_uring_cmd(struct ublk_thread *t, + struct ublk_queue *q, + const struct io_uring_cqe *cqe) +{ + int fetch = (cqe->res != UBLK_IO_RES_ABORT) && + !(t->state & UBLKS_T_STOPPING); + unsigned tag = user_data_to_tag(cqe->user_data); + struct ublk_io *io = &q->ios[tag]; + + if (!fetch) { + t->state |= UBLKS_T_STOPPING; + io->flags &= ~UBLKS_IO_NEED_FETCH_RQ; + } + + if (cqe->res == UBLK_IO_RES_OK) { + assert(tag < q->q_depth); + if (q->tgt_ops->queue_io) + q->tgt_ops->queue_io(t, q, tag); + } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) { + io->flags |= UBLKS_IO_NEED_GET_DATA | UBLKS_IO_FREE; + ublk_queue_io_cmd(t, io); + } else { + /* + * COMMIT_REQ will be completed immediately since no fetching + * piggyback is required. + * + * Marking IO_FREE only, then this io won't be issued since + * we only issue io with (UBLKS_IO_FREE | UBLKSRV_NEED_*) + * + * */ + io->flags = UBLKS_IO_FREE; + } } static void ublk_handle_cqe(struct ublk_thread *t, @@ -735,54 +758,27 @@ static void ublk_handle_cqe(struct ublk_thread *t, struct ublk_dev *dev = t->dev; unsigned q_id = user_data_to_q_id(cqe->user_data); struct ublk_queue *q = &dev->q[q_id]; - unsigned tag = user_data_to_tag(cqe->user_data); unsigned cmd_op = user_data_to_op(cqe->user_data); - int fetch = (cqe->res != UBLK_IO_RES_ABORT) && - !(t->state & UBLKSRV_THREAD_STOPPING); - struct ublk_io *io; if (cqe->res < 0 && cqe->res != -ENODEV) ublk_err("%s: res %d userdata %llx queue state %x\n", __func__, - cqe->res, cqe->user_data, q->state); + cqe->res, cqe->user_data, q->flags); ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d/%d) stopping %d\n", - __func__, cqe->res, q->q_id, tag, cmd_op, - is_target_io(cqe->user_data), + __func__, cqe->res, q->q_id, user_data_to_tag(cqe->user_data), + cmd_op, is_target_io(cqe->user_data), user_data_to_tgt_data(cqe->user_data), - (t->state & UBLKSRV_THREAD_STOPPING)); + (t->state & UBLKS_T_STOPPING)); /* Don't retrieve io in case of target io */ if (is_target_io(cqe->user_data)) { - ublksrv_handle_tgt_cqe(q, cqe); + ublksrv_handle_tgt_cqe(t, q, cqe); return; } - io = &q->ios[tag]; t->cmd_inflight--; - if (!fetch) { - t->state |= UBLKSRV_THREAD_STOPPING; - io->flags &= ~UBLKSRV_NEED_FETCH_RQ; - } - - if (cqe->res == UBLK_IO_RES_OK) { - assert(tag < q->q_depth); - if (q->tgt_ops->queue_io) - q->tgt_ops->queue_io(q, tag); - } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) { - io->flags |= UBLKSRV_NEED_GET_DATA | UBLKSRV_IO_FREE; - ublk_queue_io_cmd(io); - } else { - /* - * COMMIT_REQ will be completed immediately since no fetching - * piggyback is required. - * - * Marking IO_FREE only, then this io won't be issued since - * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*) - * - * */ - io->flags = UBLKSRV_IO_FREE; - } + ublk_handle_uring_cmd(t, q, cqe); } static int ublk_reap_events_uring(struct ublk_thread *t) @@ -808,7 +804,7 @@ static int ublk_process_io(struct ublk_thread *t) t->dev->dev_info.dev_id, t->idx, io_uring_sq_ready(&t->ring), t->cmd_inflight, - (t->state & UBLKSRV_THREAD_STOPPING)); + (t->state & UBLKS_T_STOPPING)); if (ublk_thread_is_done(t)) return -ENODEV; @@ -817,8 +813,8 @@ static int ublk_process_io(struct ublk_thread *t) reapped = ublk_reap_events_uring(t); ublk_dbg(UBLK_DBG_THREAD, "submit result %d, reapped %d stop %d idle %d\n", - ret, reapped, (t->state & UBLKSRV_THREAD_STOPPING), - (t->state & UBLKSRV_THREAD_IDLE)); + ret, reapped, (t->state & UBLKS_T_STOPPING), + (t->state & UBLKS_T_IDLE)); return reapped; } @@ -915,7 +911,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) { const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info; struct ublk_thread_info *tinfo; - unsigned extra_flags = 0; + unsigned long long extra_flags = 0; cpu_set_t *affinity_buf; void *thread_ret; sem_t ready; @@ -937,7 +933,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) return ret; if (ctx->auto_zc_fallback) - extra_flags = UBLKSRV_AUTO_BUF_REG_FALLBACK; + extra_flags = UBLKS_Q_AUTO_BUF_REG_FALLBACK; for (i = 0; i < dinfo->nr_hw_queues; i++) { dev->q[i].dev = dev; diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 6be601536b3d..219233f8a053 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -29,13 +29,9 @@ #include "ublk_dep.h" #include <linux/ublk_cmd.h> -#define __maybe_unused __attribute__((unused)) -#define MAX_BACK_FILES 4 -#ifndef min -#define min(a, b) ((a) < (b) ? (a) : (b)) -#endif +#include "utils.h" -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) +#define MAX_BACK_FILES 4 /****************** part 1: libublk ********************/ @@ -45,9 +41,6 @@ #define UBLK_CTRL_RING_DEPTH 32 #define ERROR_EVTFD_DEVID -2 -/* queue idle timeout */ -#define UBLKSRV_IO_IDLE_SECS 20 - #define UBLK_IO_MAX_BYTES (1 << 20) #define UBLK_MAX_QUEUES_SHIFT 5 #define UBLK_MAX_QUEUES (1 << UBLK_MAX_QUEUES_SHIFT) @@ -55,13 +48,6 @@ #define UBLK_MAX_THREADS (1 << UBLK_MAX_THREADS_SHIFT) #define UBLK_QUEUE_DEPTH 1024 -#define UBLK_DBG_DEV (1U << 0) -#define UBLK_DBG_THREAD (1U << 1) -#define UBLK_DBG_IO_CMD (1U << 2) -#define UBLK_DBG_IO (1U << 3) -#define UBLK_DBG_CTRL_CMD (1U << 4) -#define UBLK_LOG (1U << 5) - struct ublk_dev; struct ublk_queue; struct ublk_thread; @@ -121,11 +107,11 @@ struct ublk_ctrl_cmd_data { struct ublk_io { char *buf_addr; -#define UBLKSRV_NEED_FETCH_RQ (1UL << 0) -#define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1) -#define UBLKSRV_IO_FREE (1UL << 2) -#define UBLKSRV_NEED_GET_DATA (1UL << 3) -#define UBLKSRV_NEED_REG_BUF (1UL << 4) +#define UBLKS_IO_NEED_FETCH_RQ (1UL << 0) +#define UBLKS_IO_NEED_COMMIT_RQ_COMP (1UL << 1) +#define UBLKS_IO_FREE (1UL << 2) +#define UBLKS_IO_NEED_GET_DATA (1UL << 3) +#define UBLKS_IO_NEED_REG_BUF (1UL << 4) unsigned short flags; unsigned short refs; /* used by target code only */ @@ -136,7 +122,6 @@ struct ublk_io { unsigned short buf_index; unsigned short tgt_ios; void *private_data; - struct ublk_thread *t; }; struct ublk_tgt_ops { @@ -144,9 +129,9 @@ struct ublk_tgt_ops { int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *); void (*deinit_tgt)(struct ublk_dev *); - int (*queue_io)(struct ublk_queue *, int tag); - void (*tgt_io_done)(struct ublk_queue *, - int tag, const struct io_uring_cqe *); + int (*queue_io)(struct ublk_thread *, struct ublk_queue *, int tag); + void (*tgt_io_done)(struct ublk_thread *, struct ublk_queue *, + const struct io_uring_cqe *); /* * Target specific command line handling @@ -179,12 +164,10 @@ struct ublk_queue { const struct ublk_tgt_ops *tgt_ops; struct ublksrv_io_desc *io_cmd_buf; +/* borrow one bit of ublk uapi flags, which may never be used */ +#define UBLKS_Q_AUTO_BUF_REG_FALLBACK (1ULL << 63) + __u64 flags; struct ublk_io ios[UBLK_QUEUE_DEPTH]; -#define UBLKSRV_NO_BUF (1U << 2) -#define UBLKSRV_ZC (1U << 3) -#define UBLKSRV_AUTO_BUF_REG (1U << 4) -#define UBLKSRV_AUTO_BUF_REG_FALLBACK (1U << 5) - unsigned state; }; struct ublk_thread { @@ -196,8 +179,8 @@ struct ublk_thread { pthread_t thread; unsigned idx; -#define UBLKSRV_THREAD_STOPPING (1U << 0) -#define UBLKSRV_THREAD_IDLE (1U << 1) +#define UBLKS_T_STOPPING (1U << 0) +#define UBLKS_T_IDLE (1U << 1) unsigned state; }; @@ -217,22 +200,7 @@ struct ublk_dev { void *private_data; }; -#ifndef offsetof -#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) -#endif - -#ifndef container_of -#define container_of(ptr, type, member) ({ \ - unsigned long __mptr = (unsigned long)(ptr); \ - ((type *)(__mptr - offsetof(type, member))); }) -#endif - -#define round_up(val, rnd) \ - (((val) + ((rnd) - 1)) & ~((rnd) - 1)) - - -extern unsigned int ublk_dbg_mask; -extern int ublk_queue_io_cmd(struct ublk_io *io); +extern int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io); static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod) @@ -281,43 +249,15 @@ static inline unsigned short ublk_cmd_op_nr(unsigned int op) return _IOC_NR(op); } -static inline void ublk_err(const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); -} - -static inline void ublk_log(const char *fmt, ...) -{ - if (ublk_dbg_mask & UBLK_LOG) { - va_list ap; - - va_start(ap, fmt); - vfprintf(stdout, fmt, ap); - } -} - -static inline void ublk_dbg(int level, const char *fmt, ...) -{ - if (level & ublk_dbg_mask) { - va_list ap; - - va_start(ap, fmt); - vfprintf(stdout, fmt, ap); - } -} - static inline struct ublk_queue *ublk_io_to_queue(const struct ublk_io *io) { return container_of(io, struct ublk_queue, ios[io->tag]); } -static inline int ublk_io_alloc_sqes(struct ublk_io *io, +static inline int ublk_io_alloc_sqes(struct ublk_thread *t, struct io_uring_sqe *sqes[], int nr_sqes) { - struct io_uring *ring = &io->t->ring; + struct io_uring *ring = &t->ring; unsigned left = io_uring_sq_space_left(ring); int i; @@ -380,7 +320,7 @@ static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag) static inline void ublk_mark_io_done(struct ublk_io *io, int res) { - io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE); + io->flags |= (UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_FREE); io->result = res; } @@ -402,45 +342,58 @@ static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag) return &q->ios[tag]; } -static inline int ublk_complete_io(struct ublk_queue *q, unsigned tag, int res) +static inline int ublk_complete_io(struct ublk_thread *t, struct ublk_queue *q, + unsigned tag, int res) { struct ublk_io *io = &q->ios[tag]; ublk_mark_io_done(io, res); - return ublk_queue_io_cmd(io); + return ublk_queue_io_cmd(t, io); } -static inline void ublk_queued_tgt_io(struct ublk_queue *q, unsigned tag, int queued) +static inline void ublk_queued_tgt_io(struct ublk_thread *t, struct ublk_queue *q, + unsigned tag, int queued) { if (queued < 0) - ublk_complete_io(q, tag, queued); + ublk_complete_io(t, q, tag, queued); else { struct ublk_io *io = ublk_get_io(q, tag); - io->t->io_inflight += queued; + t->io_inflight += queued; io->tgt_ios = queued; io->result = 0; } } -static inline int ublk_completed_tgt_io(struct ublk_queue *q, unsigned tag) +static inline int ublk_completed_tgt_io(struct ublk_thread *t, + struct ublk_queue *q, unsigned tag) { struct ublk_io *io = ublk_get_io(q, tag); - io->t->io_inflight--; + t->io_inflight--; return --io->tgt_ios == 0; } static inline int ublk_queue_use_zc(const struct ublk_queue *q) { - return q->state & UBLKSRV_ZC; + return q->flags & UBLK_F_SUPPORT_ZERO_COPY; } static inline int ublk_queue_use_auto_zc(const struct ublk_queue *q) { - return q->state & UBLKSRV_AUTO_BUF_REG; + return q->flags & UBLK_F_AUTO_BUF_REG; +} + +static inline int ublk_queue_auto_zc_fallback(const struct ublk_queue *q) +{ + return q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK; +} + +static inline int ublk_queue_no_buf(const struct ublk_queue *q) +{ + return ublk_queue_use_zc(q) || ublk_queue_use_auto_zc(q); } extern const struct ublk_tgt_ops null_tgt_ops; @@ -451,10 +404,4 @@ extern const struct ublk_tgt_ops fault_inject_tgt_ops; void backing_file_tgt_deinit(struct ublk_dev *dev); int backing_file_tgt_init(struct ublk_dev *dev); -static inline unsigned int ilog2(unsigned int x) -{ - if (x == 0) - return 0; - return (sizeof(x) * 8 - 1) - __builtin_clz(x); -} #endif diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c index afe0b99d77ee..f0e0003a4860 100644 --- a/tools/testing/selftests/ublk/null.c +++ b/tools/testing/selftests/ublk/null.c @@ -55,12 +55,13 @@ static void __setup_nop_io(int tag, const struct ublksrv_io_desc *iod, sqe->user_data = build_user_data(tag, ublk_op, 0, q_id, 1); } -static int null_queue_zc_io(struct ublk_queue *q, int tag) +static int null_queue_zc_io(struct ublk_thread *t, struct ublk_queue *q, + int tag) { const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); struct io_uring_sqe *sqe[3]; - ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 3); + ublk_io_alloc_sqes(t, sqe, 3); io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index); sqe[0]->user_data = build_user_data(tag, @@ -77,19 +78,21 @@ static int null_queue_zc_io(struct ublk_queue *q, int tag) return 2; } -static int null_queue_auto_zc_io(struct ublk_queue *q, int tag) +static int null_queue_auto_zc_io(struct ublk_thread *t, struct ublk_queue *q, + int tag) { const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); struct io_uring_sqe *sqe[1]; - ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 1); + ublk_io_alloc_sqes(t, sqe, 1); __setup_nop_io(tag, iod, sqe[0], q->q_id); return 1; } -static void ublk_null_io_done(struct ublk_queue *q, int tag, - const struct io_uring_cqe *cqe) +static void ublk_null_io_done(struct ublk_thread *t, struct ublk_queue *q, + const struct io_uring_cqe *cqe) { + unsigned tag = user_data_to_tag(cqe->user_data); unsigned op = user_data_to_op(cqe->user_data); struct ublk_io *io = ublk_get_io(q, tag); @@ -105,11 +108,12 @@ static void ublk_null_io_done(struct ublk_queue *q, int tag, if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF)) io->tgt_ios += 1; - if (ublk_completed_tgt_io(q, tag)) - ublk_complete_io(q, tag, io->result); + if (ublk_completed_tgt_io(t, q, tag)) + ublk_complete_io(t, q, tag, io->result); } -static int ublk_null_queue_io(struct ublk_queue *q, int tag) +static int ublk_null_queue_io(struct ublk_thread *t, struct ublk_queue *q, + int tag) { const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); unsigned auto_zc = ublk_queue_use_auto_zc(q); @@ -117,14 +121,14 @@ static int ublk_null_queue_io(struct ublk_queue *q, int tag) int queued; if (auto_zc && !ublk_io_auto_zc_fallback(iod)) - queued = null_queue_auto_zc_io(q, tag); + queued = null_queue_auto_zc_io(t, q, tag); else if (zc) - queued = null_queue_zc_io(q, tag); + queued = null_queue_zc_io(t, q, tag); else { - ublk_complete_io(q, tag, iod->nr_sectors << 9); + ublk_complete_io(t, q, tag, iod->nr_sectors << 9); return 0; } - ublk_queued_tgt_io(q, tag, queued); + ublk_queued_tgt_io(t, q, tag, queued); return 0; } @@ -134,7 +138,7 @@ static int ublk_null_queue_io(struct ublk_queue *q, int tag) */ static unsigned short ublk_null_buf_index(const struct ublk_queue *q, int tag) { - if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK) + if (ublk_queue_auto_zc_fallback(q)) return (unsigned short)-1; return q->ios[tag].buf_index; } diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c index 37d50bbf5f5e..1fb9b7cc281b 100644 --- a/tools/testing/selftests/ublk/stripe.c +++ b/tools/testing/selftests/ublk/stripe.c @@ -123,7 +123,8 @@ static inline enum io_uring_op stripe_to_uring_op( assert(0); } -static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) +static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, + const struct ublksrv_io_desc *iod, int tag) { const struct stripe_conf *conf = get_chunk_shift(q); unsigned auto_zc = (ublk_queue_use_auto_zc(q) != 0); @@ -138,7 +139,7 @@ static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_ io->private_data = s; calculate_stripe_array(conf, iod, s, base); - ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, s->nr + extra); + ublk_io_alloc_sqes(t, sqe, s->nr + extra); if (zc) { io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, io->buf_index); @@ -176,13 +177,14 @@ static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_ return s->nr + zc; } -static int handle_flush(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) +static int handle_flush(struct ublk_thread *t, struct ublk_queue *q, + const struct ublksrv_io_desc *iod, int tag) { const struct stripe_conf *conf = get_chunk_shift(q); struct io_uring_sqe *sqe[NR_STRIPE]; int i; - ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, conf->nr_files); + ublk_io_alloc_sqes(t, sqe, conf->nr_files); for (i = 0; i < conf->nr_files; i++) { io_uring_prep_fsync(sqe[i], i + 1, IORING_FSYNC_DATASYNC); io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE); @@ -191,7 +193,8 @@ static int handle_flush(struct ublk_queue *q, const struct ublksrv_io_desc *iod, return conf->nr_files; } -static int stripe_queue_tgt_io(struct ublk_queue *q, int tag) +static int stripe_queue_tgt_io(struct ublk_thread *t, struct ublk_queue *q, + int tag) { const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); unsigned ublk_op = ublksrv_get_op(iod); @@ -199,7 +202,7 @@ static int stripe_queue_tgt_io(struct ublk_queue *q, int tag) switch (ublk_op) { case UBLK_IO_OP_FLUSH: - ret = handle_flush(q, iod, tag); + ret = handle_flush(t, q, iod, tag); break; case UBLK_IO_OP_WRITE_ZEROES: case UBLK_IO_OP_DISCARD: @@ -207,7 +210,7 @@ static int stripe_queue_tgt_io(struct ublk_queue *q, int tag) break; case UBLK_IO_OP_READ: case UBLK_IO_OP_WRITE: - ret = stripe_queue_tgt_rw_io(q, iod, tag); + ret = stripe_queue_tgt_rw_io(t, q, iod, tag); break; default: ret = -EINVAL; @@ -218,17 +221,19 @@ static int stripe_queue_tgt_io(struct ublk_queue *q, int tag) return ret; } -static int ublk_stripe_queue_io(struct ublk_queue *q, int tag) +static int ublk_stripe_queue_io(struct ublk_thread *t, struct ublk_queue *q, + int tag) { - int queued = stripe_queue_tgt_io(q, tag); + int queued = stripe_queue_tgt_io(t, q, tag); - ublk_queued_tgt_io(q, tag, queued); + ublk_queued_tgt_io(t, q, tag, queued); return 0; } -static void ublk_stripe_io_done(struct ublk_queue *q, int tag, - const struct io_uring_cqe *cqe) +static void ublk_stripe_io_done(struct ublk_thread *t, struct ublk_queue *q, + const struct io_uring_cqe *cqe) { + unsigned tag = user_data_to_tag(cqe->user_data); const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); unsigned op = user_data_to_op(cqe->user_data); struct ublk_io *io = ublk_get_io(q, tag); @@ -257,13 +262,13 @@ static void ublk_stripe_io_done(struct ublk_queue *q, int tag, } } - if (ublk_completed_tgt_io(q, tag)) { + if (ublk_completed_tgt_io(t, q, tag)) { int res = io->result; if (!res) res = iod->nr_sectors << 9; - ublk_complete_io(q, tag, res); + ublk_complete_io(t, q, tag, res); free_stripe_array(io->private_data); io->private_data = NULL; diff --git a/tools/testing/selftests/ublk/utils.h b/tools/testing/selftests/ublk/utils.h new file mode 100644 index 000000000000..36545d1567f1 --- /dev/null +++ b/tools/testing/selftests/ublk/utils.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef KUBLK_UTILS_H +#define KUBLK_UTILS_H + +#define __maybe_unused __attribute__((unused)) + +#ifndef min +#define min(a, b) ((a) < (b) ? (a) : (b)) +#endif + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + +#ifndef offsetof +#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) +#endif + +#ifndef container_of +#define container_of(ptr, type, member) ({ \ + unsigned long __mptr = (unsigned long)(ptr); \ + ((type *)(__mptr - offsetof(type, member))); }) +#endif + +#define round_up(val, rnd) \ + (((val) + ((rnd) - 1)) & ~((rnd) - 1)) + +static inline unsigned int ilog2(unsigned int x) +{ + if (x == 0) + return 0; + return (sizeof(x) * 8 - 1) - __builtin_clz(x); +} + +#define UBLK_DBG_DEV (1U << 0) +#define UBLK_DBG_THREAD (1U << 1) +#define UBLK_DBG_IO_CMD (1U << 2) +#define UBLK_DBG_IO (1U << 3) +#define UBLK_DBG_CTRL_CMD (1U << 4) +#define UBLK_LOG (1U << 5) + +extern unsigned int ublk_dbg_mask; + +static inline void ublk_err(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); +} + +static inline void ublk_log(const char *fmt, ...) +{ + if (ublk_dbg_mask & UBLK_LOG) { + va_list ap; + + va_start(ap, fmt); + vfprintf(stdout, fmt, ap); + } +} + +static inline void ublk_dbg(int level, const char *fmt, ...) +{ + if (level & ublk_dbg_mask) { + va_list ap; + + va_start(ap, fmt); + vfprintf(stdout, fmt, ap); + } +} + +#endif diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 14718ca23a05..816e7e057585 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -1442,8 +1442,29 @@ static inline void free_anon_vma_name(struct vm_area_struct *vma) (void)vma; } +/* Declared in vma.h. */ +static inline void set_vma_from_desc(struct vm_area_struct *vma, + struct vm_area_desc *desc); + +static inline struct vm_area_desc *vma_to_desc(struct vm_area_struct *vma, + struct vm_area_desc *desc); + +static int compat_vma_mmap_prepare(struct file *file, + struct vm_area_struct *vma) +{ + struct vm_area_desc desc; + int err; + + err = file->f_op->mmap_prepare(vma_to_desc(vma, &desc)); + if (err) + return err; + set_vma_from_desc(vma, &desc); + + return 0; +} + /* Did the driver provide valid mmap hook configuration? */ -static inline bool file_has_valid_mmap_hooks(struct file *file) +static inline bool can_mmap_file(struct file *file) { bool has_mmap = file->f_op->mmap; bool has_mmap_prepare = file->f_op->mmap_prepare; @@ -1451,22 +1472,21 @@ static inline bool file_has_valid_mmap_hooks(struct file *file) /* Hooks are mutually exclusive. */ if (WARN_ON_ONCE(has_mmap && has_mmap_prepare)) return false; - if (WARN_ON_ONCE(!has_mmap && !has_mmap_prepare)) + if (!has_mmap && !has_mmap_prepare) return false; return true; } -static inline int call_mmap(struct file *file, struct vm_area_struct *vma) +static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma) { - if (WARN_ON_ONCE(file->f_op->mmap_prepare)) - return -EINVAL; + if (file->f_op->mmap_prepare) + return compat_vma_mmap_prepare(file, vma); return file->f_op->mmap(file, vma); } -static inline int __call_mmap_prepare(struct file *file, - struct vm_area_desc *desc) +static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc) { return file->f_op->mmap_prepare(desc); } |