summaryrefslogtreecommitdiff
path: root/tools/testing/selftests/seccomp/seccomp_bpf.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests/seccomp/seccomp_bpf.c')
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c678
1 files changed, 653 insertions, 25 deletions
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 9c2f448bb3a9..32e2d4df397b 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -24,6 +24,7 @@
#include <linux/filter.h>
#include <sys/prctl.h>
#include <sys/ptrace.h>
+#include <sys/time.h>
#include <sys/user.h>
#include <linux/prctl.h>
#include <linux/ptrace.h>
@@ -47,12 +48,13 @@
#include <linux/kcmp.h>
#include <sys/resource.h>
#include <sys/capability.h>
+#include <linux/perf_event.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <poll.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#include "../clone3/clone3_selftests.h"
/* Attempt to de-conflict with the selftests tree. */
@@ -60,12 +62,26 @@
#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
#endif
+#ifndef MIN
#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+#endif
#ifndef PR_SET_PTRACER
# define PR_SET_PTRACER 0x59616d61
#endif
+#ifndef noinline
+#define noinline __attribute__((noinline))
+#endif
+
+#ifndef __nocf_check
+#define __nocf_check __attribute__((nocf_check))
+#endif
+
+#ifndef __naked
+#define __naked __attribute__((__naked__))
+#endif
+
#ifndef PR_SET_NO_NEW_PRIVS
#define PR_SET_NO_NEW_PRIVS 38
#define PR_GET_NO_NEW_PRIVS 39
@@ -128,6 +144,8 @@ struct seccomp_data {
# define __NR_seccomp 277
# elif defined(__csky__)
# define __NR_seccomp 277
+# elif defined(__loongarch__)
+# define __NR_seccomp 277
# elif defined(__hppa__)
# define __NR_seccomp 338
# elif defined(__powerpc__)
@@ -138,12 +156,20 @@ struct seccomp_data {
# define __NR_seccomp 337
# elif defined(__sh__)
# define __NR_seccomp 372
+# elif defined(__mc68000__)
+# define __NR_seccomp 380
# else
# warning "seccomp syscall number unknown for this architecture"
# define __NR_seccomp 0xffff
# endif
#endif
+#ifndef __NR_uretprobe
+# if defined(__x86_64__)
+# define __NR_uretprobe 335
+# endif
+#endif
+
#ifndef SECCOMP_SET_MODE_STRICT
#define SECCOMP_SET_MODE_STRICT 0
#endif
@@ -780,7 +806,7 @@ void *kill_thread(void *data)
bool die = (bool)data;
if (die) {
- prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+ syscall(__NR_getpid);
return (void *)SIBLING_EXIT_FAILURE;
}
@@ -799,11 +825,11 @@ void kill_thread_or_group(struct __test_metadata *_metadata,
{
pthread_t thread;
void *status;
- /* Kill only when calling __NR_prctl. */
+ /* Kill only when calling __NR_getpid. */
struct sock_filter filter_thread[] = {
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
offsetof(struct seccomp_data, nr)),
- BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
@@ -815,7 +841,7 @@ void kill_thread_or_group(struct __test_metadata *_metadata,
struct sock_filter filter_process[] = {
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
offsetof(struct seccomp_data, nr)),
- BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
BPF_STMT(BPF_RET|BPF_K, kill),
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
@@ -1572,7 +1598,7 @@ void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
ASSERT_EQ(0, ret);
}
/* Directly report the status of our test harness results. */
- syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+ syscall(__NR_exit, _metadata->exit_code);
}
/* Common tracer setup/teardown functions. */
@@ -1612,14 +1638,8 @@ void teardown_trace_fixture(struct __test_metadata *_metadata,
{
if (tracer) {
int status;
- /*
- * Extract the exit code from the other process and
- * adopt it for ourselves in case its asserts failed.
- */
ASSERT_EQ(0, kill(tracer, SIGUSR1));
ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
- if (WEXITSTATUS(status))
- _metadata->passed = 0;
}
}
@@ -1753,6 +1773,10 @@ TEST_F(TRACE_poke, getpid_runs_normally)
NT_ARM_SYSTEM_CALL, &__v)); \
} while (0)
# define SYSCALL_RET(_regs) (_regs).regs[0]
+#elif defined(__loongarch__)
+# define ARCH_REGS struct user_pt_regs
+# define SYSCALL_NUM(_regs) (_regs).regs[11]
+# define SYSCALL_RET(_regs) (_regs).regs[4]
#elif defined(__riscv) && __riscv_xlen == 64
# define ARCH_REGS struct user_regs_struct
# define SYSCALL_NUM(_regs) (_regs).a7
@@ -1838,6 +1862,10 @@ TEST_F(TRACE_poke, getpid_runs_normally)
# define ARCH_REGS struct pt_regs
# define SYSCALL_NUM(_regs) (_regs).regs[3]
# define SYSCALL_RET(_regs) (_regs).regs[0]
+#elif defined(__mc68000__)
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM(_regs) (_regs).orig_d0
+# define SYSCALL_RET(_regs) (_regs).d0
#else
# error "Do not know how to find your architecture's registers and syscalls"
#endif
@@ -1902,7 +1930,7 @@ const bool ptrace_entry_set_syscall_ret =
* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
* architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
*/
-#if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
+#if defined(__x86_64__) || defined(__i386__) || defined(__mips__) || defined(__mc68000__)
# define ARCH_GETREGS(_regs) ptrace(PTRACE_GETREGS, tracee, 0, &(_regs))
# define ARCH_SETREGS(_regs) ptrace(PTRACE_SETREGS, tracee, 0, &(_regs))
#else
@@ -2172,6 +2200,9 @@ FIXTURE_TEARDOWN(TRACE_syscall)
TEST(negative_ENOSYS)
{
+#if defined(__arm__)
+ SKIP(return, "arm32 does not support calling syscall -1");
+#endif
/*
* There should be no difference between an "internal" skip
* and userspace asking for syscall "-1".
@@ -3060,7 +3091,8 @@ TEST(syscall_restart)
timeout.tv_sec = 1;
errno = 0;
EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
- TH_LOG("Call to nanosleep() failed (errno %d)", errno);
+ TH_LOG("Call to nanosleep() failed (errno %d: %s)",
+ errno, strerror(errno));
}
/* Read final sync from parent. */
@@ -3072,8 +3104,7 @@ TEST(syscall_restart)
}
/* Directly report the status of our test harness results. */
- syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
- : EXIT_FAILURE);
+ syscall(__NR_exit, _metadata->exit_code);
}
EXPECT_EQ(0, close(pipefd[0]));
@@ -3138,12 +3169,15 @@ TEST(syscall_restart)
ret = get_syscall(_metadata, child_pid);
#if defined(__arm__)
/*
- * FIXME:
* - native ARM registers do NOT expose true syscall.
* - compat ARM registers on ARM64 DO expose true syscall.
+ * - values of utsbuf.machine include 'armv8l' or 'armb8b'
+ * for ARM64 running in compat mode.
*/
ASSERT_EQ(0, uname(&utsbuf));
- if (strncmp(utsbuf.machine, "arm", 3) == 0) {
+ if ((strncmp(utsbuf.machine, "arm", 3) == 0) &&
+ (strncmp(utsbuf.machine, "armv8l", 6) != 0) &&
+ (strncmp(utsbuf.machine, "armv8b", 6) != 0)) {
EXPECT_EQ(__NR_nanosleep, ret);
} else
#endif
@@ -3158,7 +3192,7 @@ TEST(syscall_restart)
ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
if (WIFSIGNALED(status) || WEXITSTATUS(status))
- _metadata->passed = 0;
+ _metadata->exit_code = KSFT_FAIL;
}
TEST_SIGNAL(filter_flag_log, SIGSYS)
@@ -3522,6 +3556,10 @@ static void signal_handler(int signal)
perror("write from signal");
}
+static void signal_handler_nop(int signal)
+{
+}
+
TEST(user_notification_signal)
{
pid_t pid;
@@ -3693,7 +3731,12 @@ TEST(user_notification_sibling_pid_ns)
ASSERT_GE(pid, 0);
if (pid == 0) {
- ASSERT_EQ(unshare(CLONE_NEWPID), 0);
+ ASSERT_EQ(unshare(CLONE_NEWPID), 0) {
+ if (errno == EPERM)
+ SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN");
+ else if (errno == EINVAL)
+ SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)");
+ }
pid2 = fork();
ASSERT_GE(pid2, 0);
@@ -3711,6 +3754,8 @@ TEST(user_notification_sibling_pid_ns)
ASSERT_EQ(unshare(CLONE_NEWPID), 0) {
if (errno == EPERM)
SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN");
+ else if (errno == EINVAL)
+ SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)");
}
ASSERT_EQ(errno, 0);
@@ -3896,6 +3941,9 @@ TEST(user_notification_filter_empty)
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
+ if (__NR_clone3 < 0)
+ SKIP(return, "Test not built with clone3 support");
+
pid = sys_clone3(&args, sizeof(args));
ASSERT_GE(pid, 0);
@@ -3929,6 +3977,60 @@ TEST(user_notification_filter_empty)
EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
}
+TEST(user_ioctl_notification_filter_empty)
+{
+ pid_t pid;
+ long ret;
+ int status, p[2];
+ struct __clone_args args = {
+ .flags = CLONE_FILES,
+ .exit_signal = SIGCHLD,
+ };
+ struct seccomp_notif req = {};
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ if (__NR_clone3 < 0)
+ SKIP(return, "Test not built with clone3 support");
+
+ ASSERT_EQ(0, pipe(p));
+
+ pid = sys_clone3(&args, sizeof(args));
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ int listener;
+
+ listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ if (listener < 0)
+ _exit(EXIT_FAILURE);
+
+ if (dup2(listener, 200) != 200)
+ _exit(EXIT_FAILURE);
+ close(p[1]);
+ close(listener);
+ sleep(1);
+
+ _exit(EXIT_SUCCESS);
+ }
+ if (read(p[0], &status, 1) != 0)
+ _exit(EXIT_SUCCESS);
+ close(p[0]);
+ /*
+ * The seccomp filter has become unused so we should be notified once
+ * the kernel gets around to cleaning up task struct.
+ */
+ EXPECT_EQ(ioctl(200, SECCOMP_IOCTL_NOTIF_RECV, &req), -1);
+ EXPECT_EQ(errno, ENOENT);
+
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+}
+
static void *do_thread(void *data)
{
return NULL;
@@ -3950,6 +4052,9 @@ TEST(user_notification_filter_empty_threaded)
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
+ if (__NR_clone3 < 0)
+ SKIP(return, "Test not built with clone3 support");
+
pid = sys_clone3(&args, sizeof(args));
ASSERT_GE(pid, 0);
@@ -4015,6 +4120,16 @@ TEST(user_notification_filter_empty_threaded)
EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
}
+
+int get_next_fd(int prev_fd)
+{
+ for (int i = prev_fd + 1; i < FD_SETSIZE; ++i) {
+ if (fcntl(i, F_GETFD) == -1)
+ return i;
+ }
+ _exit(EXIT_FAILURE);
+}
+
TEST(user_notification_addfd)
{
pid_t pid;
@@ -4031,7 +4146,7 @@ TEST(user_notification_addfd)
/* There may be arbitrary already-open fds at test start. */
memfd = memfd_create("test", 0);
ASSERT_GE(memfd, 0);
- nextfd = memfd + 1;
+ nextfd = get_next_fd(memfd);
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret) {
@@ -4042,7 +4157,8 @@ TEST(user_notification_addfd)
/* Check that the basic notification machinery works */
listener = user_notif_syscall(__NR_getppid,
SECCOMP_FILTER_FLAG_NEW_LISTENER);
- ASSERT_EQ(listener, nextfd++);
+ ASSERT_EQ(listener, nextfd);
+ nextfd = get_next_fd(nextfd);
pid = fork();
ASSERT_GE(pid, 0);
@@ -4097,14 +4213,16 @@ TEST(user_notification_addfd)
/* Verify we can set an arbitrary remote fd */
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
- EXPECT_EQ(fd, nextfd++);
+ EXPECT_EQ(fd, nextfd);
+ nextfd = get_next_fd(nextfd);
EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
/* Verify we can set an arbitrary remote fd with large size */
memset(&big, 0x0, sizeof(big));
big.addfd = addfd;
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big);
- EXPECT_EQ(fd, nextfd++);
+ EXPECT_EQ(fd, nextfd);
+ nextfd = get_next_fd(nextfd);
/* Verify we can set a specific remote fd */
addfd.newfd = 42;
@@ -4142,7 +4260,8 @@ TEST(user_notification_addfd)
* Child has earlier "low" fds and now 42, so we expect the next
* lowest available fd to be assigned here.
*/
- EXPECT_EQ(fd, nextfd++);
+ EXPECT_EQ(fd, nextfd);
+ nextfd = get_next_fd(nextfd);
ASSERT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
/*
@@ -4243,6 +4362,61 @@ TEST(user_notification_addfd_rlimit)
close(memfd);
}
+#ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP
+#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0)
+#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64)
+#endif
+
+TEST(user_notification_sync)
+{
+ struct seccomp_notif req = {};
+ struct seccomp_notif_resp resp = {};
+ int status, listener;
+ pid_t pid;
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ ASSERT_GE(listener, 0);
+
+ /* Try to set invalid flags. */
+ EXPECT_SYSCALL_RETURN(-EINVAL,
+ ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, 0xffffffff, 0));
+
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS,
+ SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ ret = syscall(__NR_getppid);
+ ASSERT_EQ(ret, USER_NOTIF_MAGIC) {
+ _exit(1);
+ }
+ _exit(0);
+ }
+
+ req.pid = 0;
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+
+ ASSERT_EQ(req.data.nr, __NR_getppid);
+
+ resp.id = req.id;
+ resp.error = 0;
+ resp.val = USER_NOTIF_MAGIC;
+ resp.flags = 0;
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+
+ ASSERT_EQ(waitpid(pid, &status, 0), pid);
+ ASSERT_EQ(status, 0);
+}
+
+
/* Make sure PTRACE_O_SUSPEND_SECCOMP requires CAP_SYS_ADMIN. */
FIXTURE(O_SUSPEND_SECCOMP) {
pid_t pid;
@@ -4658,6 +4832,460 @@ TEST(user_notification_wait_killable_fatal)
EXPECT_EQ(SIGTERM, WTERMSIG(status));
}
+/* Ensure signals after the reply do not interrupt */
+TEST(user_notification_wait_killable_after_reply)
+{
+ int i, max_iter = 100000;
+ int listener, status;
+ int pipe_fds[2];
+ pid_t pid;
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret)
+ {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ listener = user_notif_syscall(
+ __NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER |
+ SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
+ ASSERT_GE(listener, 0);
+
+ /*
+ * Used to count invocations. One token is transferred from the child
+ * to the parent per syscall invocation, the parent tries to take
+ * one token per successful RECV. If the syscall is restarted after
+ * RECV the parent will try to get two tokens while the child only
+ * provided one.
+ */
+ ASSERT_EQ(pipe(pipe_fds), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ struct sigaction new_action = {
+ .sa_handler = signal_handler_nop,
+ .sa_flags = SA_RESTART,
+ };
+ struct itimerval timer = {
+ .it_value = { .tv_usec = 1000 },
+ .it_interval = { .tv_usec = 1000 },
+ };
+ char c = 'a';
+
+ close(pipe_fds[0]);
+
+ /* Setup the sigaction with SA_RESTART */
+ if (sigaction(SIGALRM, &new_action, NULL)) {
+ perror("sigaction");
+ exit(1);
+ }
+
+ /*
+ * Kill with SIGALRM repeatedly, to try to hit the race when
+ * handling the syscall.
+ */
+ if (setitimer(ITIMER_REAL, &timer, NULL) < 0)
+ perror("setitimer");
+
+ for (i = 0; i < max_iter; ++i) {
+ int fd;
+
+ /* Send one token per iteration to catch repeats. */
+ if (write(pipe_fds[1], &c, sizeof(c)) != 1) {
+ perror("write");
+ exit(1);
+ }
+
+ fd = syscall(__NR_dup, 0);
+ if (fd < 0) {
+ perror("dup");
+ exit(1);
+ }
+ close(fd);
+ }
+
+ exit(0);
+ }
+
+ close(pipe_fds[1]);
+
+ for (i = 0; i < max_iter; ++i) {
+ struct seccomp_notif req = {};
+ struct seccomp_notif_addfd addfd = {};
+ struct pollfd pfd = {
+ .fd = pipe_fds[0],
+ .events = POLLIN,
+ };
+ char c;
+
+ /*
+ * Try to receive one token. If it failed, one child syscall
+ * was restarted after RECV and needed to be handled twice.
+ */
+ ASSERT_EQ(poll(&pfd, 1, 1000), 1)
+ kill(pid, SIGKILL);
+
+ ASSERT_EQ(read(pipe_fds[0], &c, sizeof(c)), 1)
+ kill(pid, SIGKILL);
+
+ /*
+ * Get the notification, reply to it as fast as possible to test
+ * whether the child wrongly skips going into the non-preemptible
+ * (TASK_KILLABLE) state.
+ */
+ do
+ ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req);
+ while (ret < 0 && errno == ENOENT); /* Accept interruptions before RECV */
+ ASSERT_EQ(ret, 0)
+ kill(pid, SIGKILL);
+
+ addfd.id = req.id;
+ addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
+ addfd.srcfd = 0;
+ ASSERT_GE(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), 0)
+ kill(pid, SIGKILL);
+ }
+
+ /*
+ * Wait for the process to exit, and make sure the process terminated
+ * with a zero exit code..
+ */
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+}
+
+struct tsync_vs_thread_leader_args {
+ pthread_t leader;
+};
+
+static void *tsync_vs_dead_thread_leader_sibling(void *_args)
+{
+ struct sock_filter allow_filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog allow_prog = {
+ .len = (unsigned short)ARRAY_SIZE(allow_filter),
+ .filter = allow_filter,
+ };
+ struct tsync_vs_thread_leader_args *args = _args;
+ void *retval;
+ long ret;
+
+ ret = pthread_join(args->leader, &retval);
+ if (ret)
+ exit(1);
+ if (retval != _args)
+ exit(2);
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &allow_prog);
+ if (ret)
+ exit(3);
+
+ exit(0);
+}
+
+/*
+ * Ensure that a dead thread leader doesn't prevent installing new filters with
+ * SECCOMP_FILTER_FLAG_TSYNC from other threads.
+ */
+TEST(tsync_vs_dead_thread_leader)
+{
+ int status;
+ pid_t pid;
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ struct sock_filter allow_filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog allow_prog = {
+ .len = (unsigned short)ARRAY_SIZE(allow_filter),
+ .filter = allow_filter,
+ };
+ struct tsync_vs_thread_leader_args *args;
+ pthread_t sibling;
+
+ args = malloc(sizeof(*args));
+ ASSERT_NE(NULL, args);
+ args->leader = pthread_self();
+
+ ret = pthread_create(&sibling, NULL,
+ tsync_vs_dead_thread_leader_sibling, args);
+ ASSERT_EQ(0, ret);
+
+ /* Install a new filter just to the leader thread. */
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
+ ASSERT_EQ(0, ret);
+ pthread_exit(args);
+ exit(1);
+ }
+
+ EXPECT_EQ(pid, waitpid(pid, &status, 0));
+ EXPECT_EQ(0, status);
+}
+
+#ifdef __x86_64__
+
+/*
+ * We need naked probed_uprobe function. Using __nocf_check
+ * check to skip possible endbr64 instruction and ignoring
+ * -Wattributes, otherwise the compilation might fail.
+ */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wattributes"
+
+__naked __nocf_check noinline int probed_uprobe(void)
+{
+ /*
+ * Optimized uprobe is possible only on top of nop5 instruction.
+ */
+ asm volatile (" \n"
+ ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00 \n"
+ "ret \n"
+ );
+}
+#pragma GCC diagnostic pop
+
+#else
+noinline int probed_uprobe(void)
+{
+ return 1;
+}
+#endif
+
+noinline int probed_uretprobe(void)
+{
+ return 1;
+}
+
+static int parse_uint_from_file(const char *file, const char *fmt)
+{
+ int err = -1, ret;
+ FILE *f;
+
+ f = fopen(file, "re");
+ if (f) {
+ err = fscanf(f, fmt, &ret);
+ fclose(f);
+ }
+ return err == 1 ? ret : err;
+}
+
+static int determine_uprobe_perf_type(void)
+{
+ const char *file = "/sys/bus/event_source/devices/uprobe/type";
+
+ return parse_uint_from_file(file, "%d\n");
+}
+
+static int determine_uprobe_retprobe_bit(void)
+{
+ const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
+
+ return parse_uint_from_file(file, "config:%d\n");
+}
+
+static ssize_t get_uprobe_offset(const void *addr)
+{
+ size_t start, base, end;
+ bool found = false;
+ char buf[256];
+ FILE *f;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f)
+ return -1;
+
+ while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) {
+ if (buf[2] == 'x' && (uintptr_t)addr >= start && (uintptr_t)addr < end) {
+ found = true;
+ break;
+ }
+ }
+ fclose(f);
+ return found ? (uintptr_t)addr - start + base : -1;
+}
+
+FIXTURE(UPROBE) {
+ int fd;
+};
+
+FIXTURE_VARIANT(UPROBE) {
+ /*
+ * All of the U(RET)PROBE behaviors can be tested with either
+ * u(ret)probe attached or not
+ */
+ bool attach;
+ /*
+ * Test both uprobe and uretprobe.
+ */
+ bool uretprobe;
+};
+
+FIXTURE_VARIANT_ADD(UPROBE, not_attached) {
+ .attach = false,
+ .uretprobe = false,
+};
+
+FIXTURE_VARIANT_ADD(UPROBE, uprobe_attached) {
+ .attach = true,
+ .uretprobe = false,
+};
+
+FIXTURE_VARIANT_ADD(UPROBE, uretprobe_attached) {
+ .attach = true,
+ .uretprobe = true,
+};
+
+FIXTURE_SETUP(UPROBE)
+{
+ const size_t attr_sz = sizeof(struct perf_event_attr);
+ struct perf_event_attr attr;
+ ssize_t offset;
+ int type, bit;
+
+#if !defined(__NR_uprobe) || !defined(__NR_uretprobe)
+ SKIP(return, "__NR_uprobe ot __NR_uretprobe syscalls not defined");
+#endif
+
+ if (!variant->attach)
+ return;
+
+ memset(&attr, 0, attr_sz);
+
+ type = determine_uprobe_perf_type();
+ ASSERT_GE(type, 0);
+
+ if (variant->uretprobe) {
+ bit = determine_uprobe_retprobe_bit();
+ ASSERT_GE(bit, 0);
+ }
+
+ offset = get_uprobe_offset(variant->uretprobe ? probed_uretprobe : probed_uprobe);
+ ASSERT_GE(offset, 0);
+
+ if (variant->uretprobe)
+ attr.config |= 1 << bit;
+ attr.size = attr_sz;
+ attr.type = type;
+ attr.config1 = ptr_to_u64("/proc/self/exe");
+ attr.config2 = offset;
+
+ self->fd = syscall(__NR_perf_event_open, &attr,
+ getpid() /* pid */, -1 /* cpu */, -1 /* group_fd */,
+ PERF_FLAG_FD_CLOEXEC);
+}
+
+FIXTURE_TEARDOWN(UPROBE)
+{
+ /* we could call close(self->fd), but we'd need extra filter for
+ * that and since we are calling _exit right away..
+ */
+}
+
+static int run_probed_with_filter(struct sock_fprog *prog)
+{
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) ||
+ seccomp(SECCOMP_SET_MODE_FILTER, 0, prog)) {
+ return -1;
+ }
+
+ /*
+ * Uprobe is optimized after first hit, so let's hit twice.
+ */
+ probed_uprobe();
+ probed_uprobe();
+
+ probed_uretprobe();
+ return 0;
+}
+
+TEST_F(UPROBE, uprobe_default_allow)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ ASSERT_EQ(0, run_probed_with_filter(&prog));
+}
+
+TEST_F(UPROBE, uprobe_default_block)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ ASSERT_EQ(0, run_probed_with_filter(&prog));
+}
+
+TEST_F(UPROBE, uprobe_block_syscall)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+#ifdef __NR_uprobe
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uprobe, 1, 2),
+#endif
+#ifdef __NR_uretprobe
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 0, 1),
+#endif
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ ASSERT_EQ(0, run_probed_with_filter(&prog));
+}
+
+TEST_F(UPROBE, uprobe_default_block_with_syscall)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+#ifdef __NR_uprobe
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uprobe, 3, 0),
+#endif
+#ifdef __NR_uretprobe
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 2, 0),
+#endif
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ ASSERT_EQ(0, run_probed_with_filter(&prog));
+}
+
/*
* TODO:
* - expand NNP testing