diff options
Diffstat (limited to 'tools/testing/selftests/pidfd')
-rw-r--r-- | tools/testing/selftests/pidfd/.gitignore | 2 | ||||
-rw-r--r-- | tools/testing/selftests/pidfd/Makefile | 3 | ||||
-rw-r--r-- | tools/testing/selftests/pidfd/config | 2 | ||||
-rw-r--r-- | tools/testing/selftests/pidfd/pidfd.h | 40 | ||||
-rw-r--r-- | tools/testing/selftests/pidfd/pidfd_bind_mount.c | 188 | ||||
-rw-r--r-- | tools/testing/selftests/pidfd/pidfd_fdinfo_test.c | 2 | ||||
-rw-r--r-- | tools/testing/selftests/pidfd/pidfd_file_handle_test.c | 503 | ||||
-rw-r--r-- | tools/testing/selftests/pidfd/pidfd_getfd_test.c | 31 | ||||
-rw-r--r-- | tools/testing/selftests/pidfd/pidfd_open_test.c | 86 | ||||
-rw-r--r-- | tools/testing/selftests/pidfd/pidfd_poll_test.c | 2 | ||||
-rw-r--r-- | tools/testing/selftests/pidfd/pidfd_setns_test.c | 307 | ||||
-rw-r--r-- | tools/testing/selftests/pidfd/pidfd_test.c | 4 | ||||
-rw-r--r-- | tools/testing/selftests/pidfd/pidfd_wait.c | 47 |
13 files changed, 1104 insertions, 113 deletions
diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore index 973198a3ec3d..bf92481f925c 100644 --- a/tools/testing/selftests/pidfd/.gitignore +++ b/tools/testing/selftests/pidfd/.gitignore @@ -6,3 +6,5 @@ pidfd_wait pidfd_fdinfo_test pidfd_getfd_test pidfd_setns_test +pidfd_file_handle_test +pidfd_bind_mount diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile index d731e3e76d5b..301343a11b62 100644 --- a/tools/testing/selftests/pidfd/Makefile +++ b/tools/testing/selftests/pidfd/Makefile @@ -2,7 +2,8 @@ CFLAGS += -g $(KHDR_INCLUDES) -pthread -Wall TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \ - pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test + pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test \ + pidfd_file_handle_test pidfd_bind_mount include ../lib.mk diff --git a/tools/testing/selftests/pidfd/config b/tools/testing/selftests/pidfd/config index f6f2965e17af..6133524710f7 100644 --- a/tools/testing/selftests/pidfd/config +++ b/tools/testing/selftests/pidfd/config @@ -3,5 +3,7 @@ CONFIG_IPC_NS=y CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_NET_NS=y +CONFIG_TIME_NS=y +CONFIG_GENERIC_VDSO_TIME_NS=y CONFIG_CGROUPS=y CONFIG_CHECKPOINT_RESTORE=y diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index 88d6830ee004..0b96ac4b8ce5 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -12,11 +12,11 @@ #include <stdlib.h> #include <string.h> #include <syscall.h> -#include <sys/mount.h> #include <sys/types.h> #include <sys/wait.h> #include "../kselftest.h" +#include "../clone3/clone3_selftests.h" #ifndef P_PIDFD #define P_PIDFD 3 @@ -68,6 +68,11 @@ #define PIDFD_SKIP 3 #define PIDFD_XFAIL 4 +static inline int sys_waitid(int which, pid_t pid, siginfo_t *info, int options) +{ + return syscall(__NR_waitid, which, pid, info, options, NULL); +} + static inline int wait_for_pid(pid_t pid) { int status, ret; @@ -114,4 +119,37 @@ static inline int sys_memfd_create(const char *name, unsigned int flags) return syscall(__NR_memfd_create, name, flags); } +static inline pid_t create_child(int *pidfd, unsigned flags) +{ + struct __clone_args args = { + .flags = CLONE_PIDFD | flags, + .exit_signal = SIGCHLD, + .pidfd = ptr_to_u64(pidfd), + }; + + return sys_clone3(&args, sizeof(struct __clone_args)); +} + +static inline ssize_t read_nointr(int fd, void *buf, size_t count) +{ + ssize_t ret; + + do { + ret = read(fd, buf, count); + } while (ret < 0 && errno == EINTR); + + return ret; +} + +static inline ssize_t write_nointr(int fd, const void *buf, size_t count) +{ + ssize_t ret; + + do { + ret = write(fd, buf, count); + } while (ret < 0 && errno == EINTR); + + return ret; +} + #endif /* __PIDFD_H */ diff --git a/tools/testing/selftests/pidfd/pidfd_bind_mount.c b/tools/testing/selftests/pidfd/pidfd_bind_mount.c new file mode 100644 index 000000000000..7822dd080258 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_bind_mount.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2024 Christian Brauner <brauner@kernel.org> + +#define _GNU_SOURCE +#include <fcntl.h> +#include <limits.h> +#include <sched.h> +#include <stdio.h> +#include <string.h> +#include <linux/fs.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <unistd.h> + +#include "pidfd.h" +#include "../kselftest_harness.h" + +#ifndef __NR_open_tree + #if defined __alpha__ + #define __NR_open_tree 538 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_open_tree 4428 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_open_tree 6428 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_open_tree 5428 + #endif + #elif defined __ia64__ + #define __NR_open_tree (428 + 1024) + #else + #define __NR_open_tree 428 + #endif +#endif + +#ifndef __NR_move_mount + #if defined __alpha__ + #define __NR_move_mount 539 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_move_mount 4429 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_move_mount 6429 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_move_mount 5429 + #endif + #elif defined __ia64__ + #define __NR_move_mount (428 + 1024) + #else + #define __NR_move_mount 429 + #endif +#endif + +#ifndef MOVE_MOUNT_F_EMPTY_PATH +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ +#endif + +#ifndef MOVE_MOUNT_F_EMPTY_PATH +#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ +#endif + +static inline int sys_move_mount(int from_dfd, const char *from_pathname, + int to_dfd, const char *to_pathname, + unsigned int flags) +{ + return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, + to_pathname, flags); +} + +#ifndef OPEN_TREE_CLONE +#define OPEN_TREE_CLONE 1 +#endif + +#ifndef OPEN_TREE_CLOEXEC +#define OPEN_TREE_CLOEXEC O_CLOEXEC +#endif + +#ifndef AT_RECURSIVE +#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ +#endif + +static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags) +{ + return syscall(__NR_open_tree, dfd, filename, flags); +} + +FIXTURE(pidfd_bind_mount) { + char template[PATH_MAX]; + int fd_tmp; + int pidfd; + struct stat st1; + struct stat st2; + __u32 gen1; + __u32 gen2; + bool must_unmount; +}; + +FIXTURE_SETUP(pidfd_bind_mount) +{ + self->fd_tmp = -EBADF; + self->must_unmount = false; + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + ASSERT_LE(snprintf(self->template, PATH_MAX, "%s", P_tmpdir "/pidfd_bind_mount_XXXXXX"), PATH_MAX); + self->fd_tmp = mkstemp(self->template); + ASSERT_GE(self->fd_tmp, 0); + self->pidfd = sys_pidfd_open(getpid(), 0); + ASSERT_GE(self->pidfd, 0); + ASSERT_GE(fstat(self->pidfd, &self->st1), 0); + ASSERT_EQ(ioctl(self->pidfd, FS_IOC_GETVERSION, &self->gen1), 0); +} + +FIXTURE_TEARDOWN(pidfd_bind_mount) +{ + ASSERT_EQ(close(self->fd_tmp), 0); + if (self->must_unmount) + ASSERT_EQ(umount2(self->template, 0), 0); + ASSERT_EQ(unlink(self->template), 0); +} + +/* + * Test that a detached mount can be created for a pidfd and then + * attached to the filesystem hierarchy. + */ +TEST_F(pidfd_bind_mount, bind_mount) +{ + int fd_tree; + + fd_tree = sys_open_tree(self->pidfd, "", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | AT_EMPTY_PATH); + ASSERT_GE(fd_tree, 0); + + ASSERT_EQ(move_mount(fd_tree, "", self->fd_tmp, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0); + self->must_unmount = true; + + ASSERT_EQ(close(fd_tree), 0); +} + +/* Test that a pidfd can be reopened through procfs. */ +TEST_F(pidfd_bind_mount, reopen) +{ + int pidfd; + char proc_path[PATH_MAX]; + + sprintf(proc_path, "/proc/self/fd/%d", self->pidfd); + pidfd = open(proc_path, O_RDONLY | O_NOCTTY | O_CLOEXEC); + ASSERT_GE(pidfd, 0); + + ASSERT_GE(fstat(self->pidfd, &self->st2), 0); + ASSERT_EQ(ioctl(self->pidfd, FS_IOC_GETVERSION, &self->gen2), 0); + + ASSERT_TRUE(self->st1.st_dev == self->st2.st_dev && self->st1.st_ino == self->st2.st_ino); + ASSERT_TRUE(self->gen1 == self->gen2); + + ASSERT_EQ(close(pidfd), 0); +} + +/* + * Test that a detached mount can be created for a pidfd and then + * attached to the filesystem hierarchy and reopened. + */ +TEST_F(pidfd_bind_mount, bind_mount_reopen) +{ + int fd_tree, fd_pidfd_mnt; + + fd_tree = sys_open_tree(self->pidfd, "", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | AT_EMPTY_PATH); + ASSERT_GE(fd_tree, 0); + + ASSERT_EQ(move_mount(fd_tree, "", self->fd_tmp, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0); + self->must_unmount = true; + + fd_pidfd_mnt = openat(-EBADF, self->template, O_RDONLY | O_NOCTTY | O_CLOEXEC); + ASSERT_GE(fd_pidfd_mnt, 0); + + ASSERT_GE(fstat(fd_tree, &self->st2), 0); + ASSERT_EQ(ioctl(fd_pidfd_mnt, FS_IOC_GETVERSION, &self->gen2), 0); + + ASSERT_TRUE(self->st1.st_dev == self->st2.st_dev && self->st1.st_ino == self->st2.st_ino); + ASSERT_TRUE(self->gen1 == self->gen2); + + ASSERT_EQ(close(fd_tree), 0); + ASSERT_EQ(close(fd_pidfd_mnt), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c index 01cc37bf611c..f062a986e382 100644 --- a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c +++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c @@ -307,5 +307,5 @@ int main(int argc, char **argv) test_pidfd_fdinfo_nspid(); test_pidfd_dead_fdinfo(); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/pidfd/pidfd_file_handle_test.c b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c new file mode 100644 index 000000000000..439b9c6c0457 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c @@ -0,0 +1,503 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <linux/types.h> +#include <poll.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syscall.h> +#include <sys/prctl.h> +#include <sys/wait.h> +#include <unistd.h> +#include <sys/socket.h> +#include <linux/kcmp.h> +#include <sys/stat.h> + +#include "pidfd.h" +#include "../kselftest_harness.h" + +FIXTURE(file_handle) +{ + pid_t pid; + int pidfd; + + pid_t child_pid1; + int child_pidfd1; + + pid_t child_pid2; + int child_pidfd2; + + pid_t child_pid3; + int child_pidfd3; +}; + +FIXTURE_SETUP(file_handle) +{ + int ret; + int ipc_sockets[2]; + char c; + + self->pid = getpid(); + self->pidfd = sys_pidfd_open(self->pid, 0); + ASSERT_GE(self->pidfd, 0); + + ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + EXPECT_EQ(ret, 0); + + self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWUSER); + EXPECT_GE(self->child_pid1, 0); + + if (self->child_pid1 == 0) { + close(ipc_sockets[0]); + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + _exit(EXIT_FAILURE); + + close(ipc_sockets[1]); + + pause(); + _exit(EXIT_SUCCESS); + } + + close(ipc_sockets[1]); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + close(ipc_sockets[0]); + + ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + EXPECT_EQ(ret, 0); + + self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWUSER | CLONE_NEWPID); + EXPECT_GE(self->child_pid2, 0); + + if (self->child_pid2 == 0) { + close(ipc_sockets[0]); + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + _exit(EXIT_FAILURE); + + close(ipc_sockets[1]); + + pause(); + _exit(EXIT_SUCCESS); + } + + close(ipc_sockets[1]); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + close(ipc_sockets[0]); + + ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + EXPECT_EQ(ret, 0); + + self->child_pid3 = create_child(&self->child_pidfd3, CLONE_NEWUSER | CLONE_NEWPID); + EXPECT_GE(self->child_pid3, 0); + + if (self->child_pid3 == 0) { + close(ipc_sockets[0]); + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + _exit(EXIT_FAILURE); + + close(ipc_sockets[1]); + + pause(); + _exit(EXIT_SUCCESS); + } + + close(ipc_sockets[1]); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + close(ipc_sockets[0]); +} + +FIXTURE_TEARDOWN(file_handle) +{ + EXPECT_EQ(close(self->pidfd), 0); + + EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd1, SIGKILL, NULL, 0), 0); + if (self->child_pidfd1 >= 0) + EXPECT_EQ(0, close(self->child_pidfd1)); + + EXPECT_EQ(sys_waitid(P_PID, self->child_pid1, NULL, WEXITED), 0); + + EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd2, SIGKILL, NULL, 0), 0); + if (self->child_pidfd2 >= 0) + EXPECT_EQ(0, close(self->child_pidfd2)); + + EXPECT_EQ(sys_waitid(P_PID, self->child_pid2, NULL, WEXITED), 0); + + if (self->child_pidfd3 >= 0) { + EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd3, SIGKILL, NULL, 0), 0); + EXPECT_EQ(0, close(self->child_pidfd3)); + EXPECT_EQ(sys_waitid(P_PID, self->child_pid3, NULL, WEXITED), 0); + } +} + +/* + * Test that we can decode a pidfs file handle in the same pid + * namespace. + */ +TEST_F(file_handle, file_handle_same_pidns) +{ + int mnt_id; + struct file_handle *fh; + int pidfd = -EBADF; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd1, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd1, &st1), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, O_CLOEXEC); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, O_NONBLOCK); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + free(fh); +} + +/* + * Test that we can decode a pidfs file handle from a child pid + * namespace. + */ +TEST_F(file_handle, file_handle_child_pidns) +{ + int mnt_id; + struct file_handle *fh; + int pidfd = -EBADF; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd2, &st1), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, O_CLOEXEC); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, O_NONBLOCK); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + free(fh); +} + +/* + * Test that we fail to decode a pidfs file handle from an ancestor + * child pid namespace. + */ +TEST_F(file_handle, file_handle_foreign_pidns) +{ + int mnt_id; + struct file_handle *fh; + pid_t pid; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->pidfd, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(setns(self->child_pidfd2, CLONE_NEWUSER | CLONE_NEWPID), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + int pidfd = open_by_handle_at(self->pidfd, fh, 0); + if (pidfd >= 0) { + TH_LOG("Managed to open pidfd outside of the caller's pid namespace hierarchy"); + _exit(1); + } + _exit(0); + } + + ASSERT_EQ(wait_for_pid(pid), 0); + + free(fh); +} + +/* + * Test that we can decode a pidfs file handle of a process that has + * exited but not been reaped. + */ +TEST_F(file_handle, pid_has_exited) +{ + int mnt_id, pidfd, child_pidfd3; + struct file_handle *fh; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd3, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd3, &st1), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + child_pidfd3 = self->child_pidfd3; + self->child_pidfd3 = -EBADF; + EXPECT_EQ(sys_pidfd_send_signal(child_pidfd3, SIGKILL, NULL, 0), 0); + EXPECT_EQ(close(child_pidfd3), 0); + EXPECT_EQ(sys_waitid(P_PID, self->child_pid3, NULL, WEXITED | WNOWAIT), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + EXPECT_EQ(sys_waitid(P_PID, self->child_pid3, NULL, WEXITED), 0); +} + +/* + * Test that we fail to decode a pidfs file handle of a process that has + * already been reaped. + */ +TEST_F(file_handle, pid_has_been_reaped) +{ + int mnt_id, pidfd, child_pidfd3; + struct file_handle *fh; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd3, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd3, &st1), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + child_pidfd3 = self->child_pidfd3; + self->child_pidfd3 = -EBADF; + EXPECT_EQ(sys_pidfd_send_signal(child_pidfd3, SIGKILL, NULL, 0), 0); + EXPECT_EQ(close(child_pidfd3), 0); + EXPECT_EQ(sys_waitid(P_PID, self->child_pid3, NULL, WEXITED), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_LT(pidfd, 0); +} + +/* + * Test valid flags to open a pidfd file handle. Note, that + * PIDFD_NONBLOCK is defined as O_NONBLOCK and O_NONBLOCK is an alias to + * O_NDELAY. Also note that PIDFD_THREAD is an alias for O_EXCL. + */ +TEST_F(file_handle, open_by_handle_at_valid_flags) +{ + int mnt_id; + struct file_handle *fh; + int pidfd = -EBADF; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd2, &st1), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, + O_RDONLY | + O_WRONLY | + O_RDWR | + O_NONBLOCK | + O_NDELAY | + O_CLOEXEC | + O_EXCL); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); +} + +/* + * Test that invalid flags passed to open a pidfd file handle are + * rejected. + */ +TEST_F(file_handle, open_by_handle_at_invalid_flags) +{ + int mnt_id; + struct file_handle *fh; + int pidfd = -EBADF; + static const struct invalid_pidfs_file_handle_flags { + int oflag; + const char *oflag_name; + } invalid_pidfs_file_handle_flags[] = { + { FASYNC, "FASYNC" }, + { O_CREAT, "O_CREAT" }, + { O_NOCTTY, "O_NOCTTY" }, + { O_CREAT, "O_CREAT" }, + { O_TRUNC, "O_TRUNC" }, + { O_APPEND, "O_APPEND" }, + { O_SYNC, "O_SYNC" }, + { O_DSYNC, "O_DSYNC" }, + { O_DIRECT, "O_DIRECT" }, + { O_DIRECTORY, "O_DIRECTORY" }, + { O_NOFOLLOW, "O_NOFOLLOW" }, + { O_NOATIME, "O_NOATIME" }, + { O_PATH, "O_PATH" }, + { O_TMPFILE, "O_TMPFILE" }, + /* + * O_LARGEFILE is added implicitly by + * open_by_handle_at() so pidfs simply masks it off. + */ + }; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + for (int i = 0; i < ARRAY_SIZE(invalid_pidfs_file_handle_flags); i++) { + pidfd = open_by_handle_at(self->pidfd, fh, invalid_pidfs_file_handle_flags[i].oflag); + ASSERT_LT(pidfd, 0) { + TH_LOG("open_by_handle_at() succeeded with invalid flags: %s", invalid_pidfs_file_handle_flags[i].oflag_name); + } + } +} + +/* Test that lookup fails. */ +TEST_F(file_handle, lookup_must_fail) +{ + int mnt_id; + struct file_handle *fh; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_NE(name_to_handle_at(self->child_pidfd2, "lookup-is-not-possible-with-pidfs", fh, &mnt_id, AT_EMPTY_PATH), 0); + ASSERT_EQ(errno, ENOTDIR); + ASSERT_NE(name_to_handle_at(self->child_pidfd2, "lookup-is-not-possible-with-pidfs", fh, &mnt_id, 0), 0); + ASSERT_EQ(errno, ENOTDIR); +} + +#ifndef AT_HANDLE_CONNECTABLE +#define AT_HANDLE_CONNECTABLE 0x002 +#endif + +/* + * Test that AT_HANDLE_CONNECTABLE is rejected. Connectable file handles + * don't make sense for pidfs. Note that currently AT_HANDLE_CONNECTABLE + * is rejected because it is incompatible with AT_EMPTY_PATH which is + * required with pidfds as we don't support lookup. + */ +TEST_F(file_handle, invalid_name_to_handle_at_flags) +{ + int mnt_id; + struct file_handle *fh; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_NE(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH | AT_HANDLE_CONNECTABLE), 0); +} + +#ifndef AT_HANDLE_FID +#define AT_HANDLE_FID 0x200 +#endif + +/* + * Test that a request with AT_HANDLE_FID always leads to decodable file + * handle as pidfs always provides export operations. + */ +TEST_F(file_handle, valid_name_to_handle_at_flags) +{ + int mnt_id, pidfd; + struct file_handle *fh; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH | AT_HANDLE_FID), 0); + + ASSERT_EQ(fstat(self->child_pidfd2, &st1), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/pidfd/pidfd_getfd_test.c b/tools/testing/selftests/pidfd/pidfd_getfd_test.c index 0930e2411dfb..cd51d547b751 100644 --- a/tools/testing/selftests/pidfd/pidfd_getfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_getfd_test.c @@ -5,6 +5,7 @@ #include <fcntl.h> #include <limits.h> #include <linux/types.h> +#include <poll.h> #include <sched.h> #include <signal.h> #include <stdio.h> @@ -129,6 +130,7 @@ FIXTURE(child) * When it is closed, the child will exit. */ int sk; + bool ignore_child_result; }; FIXTURE_SETUP(child) @@ -165,10 +167,14 @@ FIXTURE_SETUP(child) FIXTURE_TEARDOWN(child) { + int ret; + EXPECT_EQ(0, close(self->pidfd)); EXPECT_EQ(0, close(self->sk)); - EXPECT_EQ(0, wait_for_pid(self->pid)); + ret = wait_for_pid(self->pid); + if (!self->ignore_child_result) + EXPECT_EQ(0, ret); } TEST_F(child, disable_ptrace) @@ -235,6 +241,29 @@ TEST(flags_set) EXPECT_EQ(errno, EINVAL); } +TEST_F(child, no_strange_EBADF) +{ + struct pollfd fds; + + self->ignore_child_result = true; + + fds.fd = self->pidfd; + fds.events = POLLIN; + + ASSERT_EQ(kill(self->pid, SIGKILL), 0); + ASSERT_EQ(poll(&fds, 1, 5000), 1); + + /* + * It used to be that pidfd_getfd() could race with the exiting thread + * between exit_files() and release_task(), and get a non-null task + * with a NULL files struct, and you'd get EBADF, which was slightly + * confusing. + */ + errno = 0; + EXPECT_EQ(sys_pidfd_getfd(self->pidfd, self->remote_fd, 0), -1); + EXPECT_EQ(errno, ESRCH); +} + #if __NR_pidfd_getfd == -1 int main(void) { diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c index 8a59438ccc78..ce413a221bac 100644 --- a/tools/testing/selftests/pidfd/pidfd_open_test.c +++ b/tools/testing/selftests/pidfd/pidfd_open_test.c @@ -13,6 +13,7 @@ #include <stdlib.h> #include <string.h> #include <syscall.h> +#include <sys/ioctl.h> #include <sys/mount.h> #include <sys/prctl.h> #include <sys/wait.h> @@ -21,6 +22,32 @@ #include "pidfd.h" #include "../kselftest.h" +#ifndef PIDFS_IOCTL_MAGIC +#define PIDFS_IOCTL_MAGIC 0xFF +#endif + +#ifndef PIDFD_GET_INFO +#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info) +#define PIDFD_INFO_CGROUPID (1UL << 0) + +struct pidfd_info { + __u64 request_mask; + __u64 cgroupid; + __u32 pid; + __u32 tgid; + __u32 ppid; + __u32 ruid; + __u32 rgid; + __u32 euid; + __u32 egid; + __u32 suid; + __u32 sgid; + __u32 fsuid; + __u32 fsgid; + __u32 spare0[1]; +}; +#endif + static int safe_int(const char *numstr, int *converted) { char *err = NULL; @@ -120,10 +147,13 @@ out: int main(int argc, char **argv) { + struct pidfd_info info = { + .request_mask = PIDFD_INFO_CGROUPID, + }; int pidfd = -1, ret = 1; pid_t pid; - ksft_set_plan(3); + ksft_set_plan(4); pidfd = sys_pidfd_open(-1, 0); if (pidfd >= 0) { @@ -153,11 +183,63 @@ int main(int argc, char **argv) pid = get_pid_from_fdinfo_file(pidfd, "Pid:", sizeof("Pid:") - 1); ksft_print_msg("pidfd %d refers to process with pid %d\n", pidfd, pid); + if (ioctl(pidfd, PIDFD_GET_INFO, &info) < 0) { + ksft_print_msg("%s - failed to get info from pidfd\n", strerror(errno)); + goto on_error; + } + if (info.pid != pid) { + ksft_print_msg("pid from fdinfo file %d does not match pid from ioctl %d\n", + pid, info.pid); + goto on_error; + } + if (info.ppid != getppid()) { + ksft_print_msg("ppid %d does not match ppid from ioctl %d\n", + pid, info.pid); + goto on_error; + } + if (info.ruid != getuid()) { + ksft_print_msg("uid %d does not match uid from ioctl %d\n", + getuid(), info.ruid); + goto on_error; + } + if (info.rgid != getgid()) { + ksft_print_msg("gid %d does not match gid from ioctl %d\n", + getgid(), info.rgid); + goto on_error; + } + if (info.euid != geteuid()) { + ksft_print_msg("euid %d does not match euid from ioctl %d\n", + geteuid(), info.euid); + goto on_error; + } + if (info.egid != getegid()) { + ksft_print_msg("egid %d does not match egid from ioctl %d\n", + getegid(), info.egid); + goto on_error; + } + if (info.suid != geteuid()) { + ksft_print_msg("suid %d does not match suid from ioctl %d\n", + geteuid(), info.suid); + goto on_error; + } + if (info.sgid != getegid()) { + ksft_print_msg("sgid %d does not match sgid from ioctl %d\n", + getegid(), info.sgid); + goto on_error; + } + if ((info.request_mask & PIDFD_INFO_CGROUPID) && info.cgroupid == 0) { + ksft_print_msg("cgroupid should not be 0 when PIDFD_INFO_CGROUPID is set\n"); + goto on_error; + } + ksft_test_result_pass("get info from pidfd test: passed\n"); + ret = 0; on_error: if (pidfd >= 0) close(pidfd); - return !ret ? ksft_exit_pass() : ksft_exit_fail(); + if (ret) + ksft_exit_fail(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/pidfd/pidfd_poll_test.c b/tools/testing/selftests/pidfd/pidfd_poll_test.c index 610811275357..55d74a50358f 100644 --- a/tools/testing/selftests/pidfd/pidfd_poll_test.c +++ b/tools/testing/selftests/pidfd/pidfd_poll_test.c @@ -112,5 +112,5 @@ int main(int argc, char **argv) } ksft_test_result_pass("pidfd poll test: pass\n"); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c index 6e2f2cd400ca..222f8131283b 100644 --- a/tools/testing/selftests/pidfd/pidfd_setns_test.c +++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c @@ -16,11 +16,55 @@ #include <unistd.h> #include <sys/socket.h> #include <sys/stat.h> +#include <linux/ioctl.h> #include "pidfd.h" -#include "../clone3/clone3_selftests.h" #include "../kselftest_harness.h" +#ifndef PIDFS_IOCTL_MAGIC +#define PIDFS_IOCTL_MAGIC 0xFF +#endif + +#ifndef PIDFD_GET_CGROUP_NAMESPACE +#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1) +#endif + +#ifndef PIDFD_GET_IPC_NAMESPACE +#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2) +#endif + +#ifndef PIDFD_GET_MNT_NAMESPACE +#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3) +#endif + +#ifndef PIDFD_GET_NET_NAMESPACE +#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4) +#endif + +#ifndef PIDFD_GET_PID_NAMESPACE +#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5) +#endif + +#ifndef PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE +#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6) +#endif + +#ifndef PIDFD_GET_TIME_NAMESPACE +#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7) +#endif + +#ifndef PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE +#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8) +#endif + +#ifndef PIDFD_GET_USER_NAMESPACE +#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9) +#endif + +#ifndef PIDFD_GET_UTS_NAMESPACE +#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10) +#endif + enum { PIDFD_NS_USER, PIDFD_NS_MNT, @@ -31,22 +75,25 @@ enum { PIDFD_NS_CGROUP, PIDFD_NS_PIDCLD, PIDFD_NS_TIME, + PIDFD_NS_TIMECLD, PIDFD_NS_MAX }; const struct ns_info { const char *name; int flag; + unsigned int pidfd_ioctl; } ns_info[] = { - [PIDFD_NS_USER] = { "user", CLONE_NEWUSER, }, - [PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, }, - [PIDFD_NS_PID] = { "pid", CLONE_NEWPID, }, - [PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, }, - [PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, }, - [PIDFD_NS_NET] = { "net", CLONE_NEWNET, }, - [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, }, - [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, }, - [PIDFD_NS_TIME] = { "time", CLONE_NEWTIME, }, + [PIDFD_NS_USER] = { "user", CLONE_NEWUSER, PIDFD_GET_USER_NAMESPACE, }, + [PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, PIDFD_GET_MNT_NAMESPACE, }, + [PIDFD_NS_PID] = { "pid", CLONE_NEWPID, PIDFD_GET_PID_NAMESPACE, }, + [PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, PIDFD_GET_UTS_NAMESPACE, }, + [PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, PIDFD_GET_IPC_NAMESPACE, }, + [PIDFD_NS_NET] = { "net", CLONE_NEWNET, PIDFD_GET_NET_NAMESPACE, }, + [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, PIDFD_GET_CGROUP_NAMESPACE, }, + [PIDFD_NS_TIME] = { "time", CLONE_NEWTIME, PIDFD_GET_TIME_NAMESPACE, }, + [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE, }, + [PIDFD_NS_TIMECLD] = { "time_for_children", 0, PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE, }, }; FIXTURE(current_nsset) @@ -54,6 +101,7 @@ FIXTURE(current_nsset) pid_t pid; int pidfd; int nsfds[PIDFD_NS_MAX]; + int child_pidfd_derived_nsfds[PIDFD_NS_MAX]; pid_t child_pid_exited; int child_pidfd_exited; @@ -61,28 +109,14 @@ FIXTURE(current_nsset) pid_t child_pid1; int child_pidfd1; int child_nsfds1[PIDFD_NS_MAX]; + int child_pidfd_derived_nsfds1[PIDFD_NS_MAX]; pid_t child_pid2; int child_pidfd2; int child_nsfds2[PIDFD_NS_MAX]; + int child_pidfd_derived_nsfds2[PIDFD_NS_MAX]; }; -static int sys_waitid(int which, pid_t pid, int options) -{ - return syscall(__NR_waitid, which, pid, NULL, options, NULL); -} - -pid_t create_child(int *pidfd, unsigned flags) -{ - struct __clone_args args = { - .flags = CLONE_PIDFD | flags, - .exit_signal = SIGCHLD, - .pidfd = ptr_to_u64(pidfd), - }; - - return sys_clone3(&args, sizeof(struct clone_args)); -} - static bool switch_timens(void) { int fd, ret; @@ -99,28 +133,6 @@ static bool switch_timens(void) return ret == 0; } -static ssize_t read_nointr(int fd, void *buf, size_t count) -{ - ssize_t ret; - - do { - ret = read(fd, buf, count); - } while (ret < 0 && errno == EINTR); - - return ret; -} - -static ssize_t write_nointr(int fd, const void *buf, size_t count) -{ - ssize_t ret; - - do { - ret = write(fd, buf, count); - } while (ret < 0 && errno == EINTR); - - return ret; -} - FIXTURE_SETUP(current_nsset) { int i, proc_fd, ret; @@ -128,9 +140,12 @@ FIXTURE_SETUP(current_nsset) char c; for (i = 0; i < PIDFD_NS_MAX; i++) { - self->nsfds[i] = -EBADF; - self->child_nsfds1[i] = -EBADF; - self->child_nsfds2[i] = -EBADF; + self->nsfds[i] = -EBADF; + self->child_nsfds1[i] = -EBADF; + self->child_nsfds2[i] = -EBADF; + self->child_pidfd_derived_nsfds[i] = -EBADF; + self->child_pidfd_derived_nsfds1[i] = -EBADF; + self->child_pidfd_derived_nsfds2[i] = -EBADF; } proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC); @@ -139,6 +154,11 @@ FIXTURE_SETUP(current_nsset) } self->pid = getpid(); + self->pidfd = sys_pidfd_open(self->pid, 0); + EXPECT_GT(self->pidfd, 0) { + TH_LOG("%m - Failed to open pidfd for process %d", self->pid); + } + for (i = 0; i < PIDFD_NS_MAX; i++) { const struct ns_info *info = &ns_info[i]; self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC); @@ -148,22 +168,29 @@ FIXTURE_SETUP(current_nsset) info->name, self->pid); } } - } - self->pidfd = sys_pidfd_open(self->pid, 0); - EXPECT_GT(self->pidfd, 0) { - TH_LOG("%m - Failed to open pidfd for process %d", self->pid); + self->child_pidfd_derived_nsfds[i] = ioctl(self->pidfd, info->pidfd_ioctl, 0); + if (self->child_pidfd_derived_nsfds[i] < 0) { + EXPECT_EQ(errno, EOPNOTSUPP) { + TH_LOG("%m - Failed to derive %s namespace from pidfd of process %d", + info->name, self->pid); + } + } } /* Create task that exits right away. */ - self->child_pid_exited = create_child(&self->child_pidfd_exited, - CLONE_NEWUSER | CLONE_NEWNET); - EXPECT_GT(self->child_pid_exited, 0); + self->child_pid_exited = create_child(&self->child_pidfd_exited, 0); + EXPECT_GE(self->child_pid_exited, 0); - if (self->child_pid_exited == 0) + if (self->child_pid_exited == 0) { + if (self->nsfds[PIDFD_NS_USER] >= 0 && unshare(CLONE_NEWUSER) < 0) + _exit(EXIT_FAILURE); + if (self->nsfds[PIDFD_NS_NET] >= 0 && unshare(CLONE_NEWNET) < 0) + _exit(EXIT_FAILURE); _exit(EXIT_SUCCESS); + } - ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0); + ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, NULL, WEXITED | WNOWAIT), 0); self->pidfd = sys_pidfd_open(self->pid, 0); EXPECT_GE(self->pidfd, 0) { @@ -174,18 +201,43 @@ FIXTURE_SETUP(current_nsset) EXPECT_EQ(ret, 0); /* Create tasks that will be stopped. */ - self->child_pid1 = create_child(&self->child_pidfd1, - CLONE_NEWUSER | CLONE_NEWNS | - CLONE_NEWCGROUP | CLONE_NEWIPC | - CLONE_NEWUTS | CLONE_NEWPID | - CLONE_NEWNET); + if (self->nsfds[PIDFD_NS_USER] >= 0 && self->nsfds[PIDFD_NS_PID] >= 0) + self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWUSER | CLONE_NEWPID); + else if (self->nsfds[PIDFD_NS_PID] >= 0) + self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWPID); + else if (self->nsfds[PIDFD_NS_USER] >= 0) + self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWUSER); + else + self->child_pid1 = create_child(&self->child_pidfd1, 0); EXPECT_GE(self->child_pid1, 0); if (self->child_pid1 == 0) { close(ipc_sockets[0]); - if (!switch_timens()) + if (self->nsfds[PIDFD_NS_MNT] >= 0 && unshare(CLONE_NEWNS) < 0) { + TH_LOG("%m - Failed to unshare mount namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_CGROUP] >= 0 && unshare(CLONE_NEWCGROUP) < 0) { + TH_LOG("%m - Failed to unshare cgroup namespace for process %d", self->pid); _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_IPC] >= 0 && unshare(CLONE_NEWIPC) < 0) { + TH_LOG("%m - Failed to unshare ipc namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_UTS] >= 0 && unshare(CLONE_NEWUTS) < 0) { + TH_LOG("%m - Failed to unshare uts namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_NET] >= 0 && unshare(CLONE_NEWNET) < 0) { + TH_LOG("%m - Failed to unshare net namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_TIME] >= 0 && !switch_timens()) { + TH_LOG("%m - Failed to unshare time namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } if (write_nointr(ipc_sockets[1], "1", 1) < 0) _exit(EXIT_FAILURE); @@ -203,18 +255,43 @@ FIXTURE_SETUP(current_nsset) ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); EXPECT_EQ(ret, 0); - self->child_pid2 = create_child(&self->child_pidfd2, - CLONE_NEWUSER | CLONE_NEWNS | - CLONE_NEWCGROUP | CLONE_NEWIPC | - CLONE_NEWUTS | CLONE_NEWPID | - CLONE_NEWNET); + if (self->nsfds[PIDFD_NS_USER] >= 0 && self->nsfds[PIDFD_NS_PID] >= 0) + self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWUSER | CLONE_NEWPID); + else if (self->nsfds[PIDFD_NS_PID] >= 0) + self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWPID); + else if (self->nsfds[PIDFD_NS_USER] >= 0) + self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWUSER); + else + self->child_pid2 = create_child(&self->child_pidfd2, 0); EXPECT_GE(self->child_pid2, 0); if (self->child_pid2 == 0) { close(ipc_sockets[0]); - if (!switch_timens()) + if (self->nsfds[PIDFD_NS_MNT] >= 0 && unshare(CLONE_NEWNS) < 0) { + TH_LOG("%m - Failed to unshare mount namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_CGROUP] >= 0 && unshare(CLONE_NEWCGROUP) < 0) { + TH_LOG("%m - Failed to unshare cgroup namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_IPC] >= 0 && unshare(CLONE_NEWIPC) < 0) { + TH_LOG("%m - Failed to unshare ipc namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_UTS] >= 0 && unshare(CLONE_NEWUTS) < 0) { + TH_LOG("%m - Failed to unshare uts namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_NET] >= 0 && unshare(CLONE_NEWNET) < 0) { + TH_LOG("%m - Failed to unshare net namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_TIME] >= 0 && !switch_timens()) { + TH_LOG("%m - Failed to unshare time namespace for process %d", self->pid); _exit(EXIT_FAILURE); + } if (write_nointr(ipc_sockets[1], "1", 1) < 0) _exit(EXIT_FAILURE); @@ -267,6 +344,22 @@ FIXTURE_SETUP(current_nsset) info->name, self->child_pid1); } } + + self->child_pidfd_derived_nsfds1[i] = ioctl(self->child_pidfd1, info->pidfd_ioctl, 0); + if (self->child_pidfd_derived_nsfds1[i] < 0) { + EXPECT_EQ(errno, EOPNOTSUPP) { + TH_LOG("%m - Failed to derive %s namespace from pidfd of process %d", + info->name, self->child_pid1); + } + } + + self->child_pidfd_derived_nsfds2[i] = ioctl(self->child_pidfd2, info->pidfd_ioctl, 0); + if (self->child_pidfd_derived_nsfds2[i] < 0) { + EXPECT_EQ(errno, EOPNOTSUPP) { + TH_LOG("%m - Failed to derive %s namespace from pidfd of process %d", + info->name, self->child_pid2); + } + } } close(proc_fd); @@ -288,15 +381,21 @@ FIXTURE_TEARDOWN(current_nsset) close(self->child_nsfds1[i]); if (self->child_nsfds2[i] >= 0) close(self->child_nsfds2[i]); + if (self->child_pidfd_derived_nsfds[i] >= 0) + close(self->child_pidfd_derived_nsfds[i]); + if (self->child_pidfd_derived_nsfds1[i] >= 0) + close(self->child_pidfd_derived_nsfds1[i]); + if (self->child_pidfd_derived_nsfds2[i] >= 0) + close(self->child_pidfd_derived_nsfds2[i]); } if (self->child_pidfd1 >= 0) EXPECT_EQ(0, close(self->child_pidfd1)); if (self->child_pidfd2 >= 0) EXPECT_EQ(0, close(self->child_pidfd2)); - ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0); - ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0); - ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0); + ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, NULL, WEXITED), 0); + ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, NULL, WEXITED), 0); + ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, NULL, WEXITED), 0); } static int preserve_ns(const int pid, const char *ns) @@ -446,6 +545,42 @@ TEST_F(current_nsset, nsfd_incremental_setns) } } +TEST_F(current_nsset, pidfd_derived_nsfd_incremental_setns) +{ + int i; + pid_t pid; + + pid = getpid(); + for (i = 0; i < PIDFD_NS_MAX; i++) { + const struct ns_info *info = &ns_info[i]; + int nsfd; + + if (self->child_pidfd_derived_nsfds1[i] < 0) + continue; + + if (info->flag) { + ASSERT_EQ(setns(self->child_pidfd_derived_nsfds1[i], info->flag), 0) { + TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d", + info->name, self->child_pid1, + self->child_pidfd_derived_nsfds1[i]); + } + } + + /* Verify that we have changed to the correct namespaces. */ + if (info->flag == CLONE_NEWPID) + nsfd = self->child_pidfd_derived_nsfds[i]; + else + nsfd = self->child_pidfd_derived_nsfds1[i]; + ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { + TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d", + info->name, self->child_pid1, + self->child_pidfd_derived_nsfds1[i]); + } + TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d", + info->name, self->child_pid1, self->child_pidfd_derived_nsfds1[i]); + } +} + TEST_F(current_nsset, pidfd_one_shot_setns) { unsigned flags = 0; @@ -542,6 +677,28 @@ TEST_F(current_nsset, no_foul_play) info->name, self->child_pid2, self->child_nsfds2[i]); } + + /* + * Can't setns to a user namespace outside of our hierarchy since we + * don't have caps in there and didn't create it. That means that under + * no circumstances should we be able to setns to any of the other + * ones since they aren't owned by our user namespace. + */ + for (i = 0; i < PIDFD_NS_MAX; i++) { + const struct ns_info *info = &ns_info[i]; + + if (self->child_pidfd_derived_nsfds2[i] < 0 || !info->flag) + continue; + + ASSERT_NE(setns(self->child_pidfd_derived_nsfds2[i], info->flag), 0) { + TH_LOG("Managed to setns to %s namespace of %d via nsfd %d", + info->name, self->child_pid2, + self->child_pidfd_derived_nsfds2[i]); + } + TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d", + info->name, self->child_pid2, + self->child_pidfd_derived_nsfds2[i]); + } } TEST(setns_einval) diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c index c081ae91313a..e9728e86b4f2 100644 --- a/tools/testing/selftests/pidfd/pidfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -497,7 +497,7 @@ static int child_poll_leader_exit_test(void *args) pthread_create(&t2, NULL, test_pidfd_poll_leader_exit_thread, NULL); /* - * glibc exit calls exit_group syscall, so explicity call exit only + * glibc exit calls exit_group syscall, so explicitly call exit only * so that only the group leader exits, leaving the threads alone. */ *child_exit_secs = time(NULL); @@ -572,5 +572,5 @@ int main(int argc, char **argv) test_pidfd_send_signal_exited_fail(); test_pidfd_send_signal_recycled_pid_fail(); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c index 0dcb8365ddc3..1e2d49751cde 100644 --- a/tools/testing/selftests/pidfd/pidfd_wait.c +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -26,22 +26,11 @@ #define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) #endif -static pid_t sys_clone3(struct clone_args *args) -{ - return syscall(__NR_clone3, args, sizeof(struct clone_args)); -} - -static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options, - struct rusage *ru) -{ - return syscall(__NR_waitid, which, pid, info, options, ru); -} - TEST(wait_simple) { int pidfd = -1; pid_t parent_tid = -1; - struct clone_args args = { + struct __clone_args args = { .parent_tid = ptr_to_u64(&parent_tid), .pidfd = ptr_to_u64(&pidfd), .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, @@ -55,7 +44,7 @@ TEST(wait_simple) pidfd = open("/proc/self", O_DIRECTORY | O_RDONLY | O_CLOEXEC); ASSERT_GE(pidfd, 0); - pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED); ASSERT_NE(pid, 0); EXPECT_EQ(close(pidfd), 0); pidfd = -1; @@ -63,18 +52,18 @@ TEST(wait_simple) pidfd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(pidfd, 0); - pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED); ASSERT_NE(pid, 0); EXPECT_EQ(close(pidfd), 0); pidfd = -1; - pid = sys_clone3(&args); + pid = sys_clone3(&args, sizeof(args)); ASSERT_GE(pid, 0); if (pid == 0) exit(EXIT_SUCCESS); - pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED); ASSERT_GE(pid, 0); ASSERT_EQ(WIFEXITED(info.si_status), true); ASSERT_EQ(WEXITSTATUS(info.si_status), 0); @@ -89,7 +78,7 @@ TEST(wait_states) { int pidfd = -1; pid_t parent_tid = -1; - struct clone_args args = { + struct __clone_args args = { .parent_tid = ptr_to_u64(&parent_tid), .pidfd = ptr_to_u64(&pidfd), .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, @@ -102,7 +91,7 @@ TEST(wait_states) }; ASSERT_EQ(pipe(pfd), 0); - pid = sys_clone3(&args); + pid = sys_clone3(&args, sizeof(args)); ASSERT_GE(pid, 0); if (pid == 0) { @@ -117,28 +106,28 @@ TEST(wait_states) } close(pfd[0]); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_STOPPED); ASSERT_EQ(info.si_pid, parent_tid); ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED), 0); ASSERT_EQ(write(pfd[1], "C", 1), 1); close(pfd[1]); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_CONTINUED); ASSERT_EQ(info.si_pid, parent_tid); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_STOPPED); ASSERT_EQ(info.si_pid, parent_tid); ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0), 0); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_KILLED); ASSERT_EQ(info.si_pid, parent_tid); @@ -151,7 +140,7 @@ TEST(wait_nonblock) int pidfd; unsigned int flags = 0; pid_t parent_tid = -1; - struct clone_args args = { + struct __clone_args args = { .parent_tid = ptr_to_u64(&parent_tid), .flags = CLONE_PARENT_SETTID, .exit_signal = SIGCHLD, @@ -173,12 +162,12 @@ TEST(wait_nonblock) SKIP(return, "Skipping PIDFD_NONBLOCK test"); } - ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED); ASSERT_LT(ret, 0); ASSERT_EQ(errno, ECHILD); EXPECT_EQ(close(pidfd), 0); - pid = sys_clone3(&args); + pid = sys_clone3(&args, sizeof(args)); ASSERT_GE(pid, 0); if (pid == 0) { @@ -201,7 +190,7 @@ TEST(wait_nonblock) * Callers need to see EAGAIN/EWOULDBLOCK with non-blocking pidfd when * child processes exist but none have exited. */ - ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED); ASSERT_LT(ret, 0); ASSERT_EQ(errno, EAGAIN); @@ -210,19 +199,19 @@ TEST(wait_nonblock) * WNOHANG raised explicitly when child processes exist but none have * exited. */ - ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED | WNOHANG, NULL); + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED | WNOHANG); ASSERT_EQ(ret, 0); ASSERT_EQ(fcntl(pidfd, F_SETFL, (flags & ~O_NONBLOCK)), 0); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_STOPPED); ASSERT_EQ(info.si_pid, parent_tid); ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_EXITED); ASSERT_EQ(info.si_pid, parent_tid); |