summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-06-28 19:49:37 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-06-28 19:49:37 -0700
commit616ea5cc4a7b058f8c27e37b9a597d8704c49130 (patch)
tree7a3a2186026a580de17847f2e574190740a62204
parent233a806b00e31b3ab8d57a68f1aab40cf1e5eaea (diff)
parent9a03abc16c77062c73972df08206f1031862d9b4 (diff)
Merge tag 'seccomp-v5.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux
Pull seccomp updates from Kees Cook: - Add "atomic addfd + send reply" mode to SECCOMP_USER_NOTIF to better handle EINTR races visible to seccomp monitors. (Rodrigo Campos, Sargun Dhillon) - Improve seccomp selftests for readability in CI systems. (Kees Cook) * tag 'seccomp-v5.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux: selftests/seccomp: Avoid using "sysctl" for report selftests/seccomp: Flush benchmark output selftests/seccomp: More closely track fds being assigned selftests/seccomp: Add test for atomic addfd+send seccomp: Support atomic "addfd + send reply"
-rw-r--r--Documentation/userspace-api/seccomp_filter.rst12
-rw-r--r--include/uapi/linux/seccomp.h1
-rw-r--r--kernel/seccomp.c51
-rw-r--r--tools/testing/selftests/seccomp/seccomp_benchmark.c10
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c51
5 files changed, 113 insertions, 12 deletions
diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst
index 6efb41cc8072..d61219889e49 100644
--- a/Documentation/userspace-api/seccomp_filter.rst
+++ b/Documentation/userspace-api/seccomp_filter.rst
@@ -259,6 +259,18 @@ and ``ioctl(SECCOMP_IOCTL_NOTIF_SEND)`` a response, indicating what should be
returned to userspace. The ``id`` member of ``struct seccomp_notif_resp`` should
be the same ``id`` as in ``struct seccomp_notif``.
+Userspace can also add file descriptors to the notifying process via
+``ioctl(SECCOMP_IOCTL_NOTIF_ADDFD)``. The ``id`` member of
+``struct seccomp_notif_addfd`` should be the same ``id`` as in
+``struct seccomp_notif``. The ``newfd_flags`` flag may be used to set flags
+like O_EXEC on the file descriptor in the notifying process. If the supervisor
+wants to inject the file descriptor with a specific number, the
+``SECCOMP_ADDFD_FLAG_SETFD`` flag can be used, and set the ``newfd`` member to
+the specific number to use. If that file descriptor is already open in the
+notifying process it will be replaced. The supervisor can also add an FD, and
+respond atomically by using the ``SECCOMP_ADDFD_FLAG_SEND`` flag and the return
+value will be the injected file descriptor number.
+
It is worth noting that ``struct seccomp_data`` contains the values of register
arguments to the syscall, but does not contain pointers to memory. The task's
memory is accessible to suitably privileged traces via ``ptrace()`` or
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 6ba18b82a02e..78074254ab98 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -115,6 +115,7 @@ struct seccomp_notif_resp {
/* valid flags for seccomp_notif_addfd */
#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */
+#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */
/**
* struct seccomp_notif_addfd
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 9f58049ac16d..057e17f3215d 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -107,6 +107,7 @@ struct seccomp_knotif {
* installing process should allocate the fd as normal.
* @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC
* is allowed.
+ * @ioctl_flags: The flags used for the seccomp_addfd ioctl.
* @ret: The return value of the installing process. It is set to the fd num
* upon success (>= 0).
* @completion: Indicates that the installing process has completed fd
@@ -118,6 +119,7 @@ struct seccomp_kaddfd {
struct file *file;
int fd;
unsigned int flags;
+ __u32 ioctl_flags;
union {
bool setfd;
@@ -1065,18 +1067,37 @@ static u64 seccomp_next_notify_id(struct seccomp_filter *filter)
return filter->notif->next_id++;
}
-static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd)
+static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd, struct seccomp_knotif *n)
{
+ int fd;
+
/*
* Remove the notification, and reset the list pointers, indicating
* that it has been handled.
*/
list_del_init(&addfd->list);
if (!addfd->setfd)
- addfd->ret = receive_fd(addfd->file, addfd->flags);
+ fd = receive_fd(addfd->file, addfd->flags);
else
- addfd->ret = receive_fd_replace(addfd->fd, addfd->file,
- addfd->flags);
+ fd = receive_fd_replace(addfd->fd, addfd->file, addfd->flags);
+ addfd->ret = fd;
+
+ if (addfd->ioctl_flags & SECCOMP_ADDFD_FLAG_SEND) {
+ /* If we fail reset and return an error to the notifier */
+ if (fd < 0) {
+ n->state = SECCOMP_NOTIFY_SENT;
+ } else {
+ /* Return the FD we just added */
+ n->flags = 0;
+ n->error = 0;
+ n->val = fd;
+ }
+ }
+
+ /*
+ * Mark the notification as completed. From this point, addfd mem
+ * might be invalidated and we can't safely read it anymore.
+ */
complete(&addfd->completion);
}
@@ -1120,7 +1141,7 @@ static int seccomp_do_user_notification(int this_syscall,
struct seccomp_kaddfd, list);
/* Check if we were woken up by a addfd message */
if (addfd)
- seccomp_handle_addfd(addfd);
+ seccomp_handle_addfd(addfd, &n);
} while (n.state != SECCOMP_NOTIFY_REPLIED);
@@ -1581,7 +1602,7 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter,
if (addfd.newfd_flags & ~O_CLOEXEC)
return -EINVAL;
- if (addfd.flags & ~SECCOMP_ADDFD_FLAG_SETFD)
+ if (addfd.flags & ~(SECCOMP_ADDFD_FLAG_SETFD | SECCOMP_ADDFD_FLAG_SEND))
return -EINVAL;
if (addfd.newfd && !(addfd.flags & SECCOMP_ADDFD_FLAG_SETFD))
@@ -1591,6 +1612,7 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter,
if (!kaddfd.file)
return -EBADF;
+ kaddfd.ioctl_flags = addfd.flags;
kaddfd.flags = addfd.newfd_flags;
kaddfd.setfd = addfd.flags & SECCOMP_ADDFD_FLAG_SETFD;
kaddfd.fd = addfd.newfd;
@@ -1616,6 +1638,23 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter,
goto out_unlock;
}
+ if (addfd.flags & SECCOMP_ADDFD_FLAG_SEND) {
+ /*
+ * Disallow queuing an atomic addfd + send reply while there are
+ * some addfd requests still to process.
+ *
+ * There is no clear reason to support it and allows us to keep
+ * the loop on the other side straight-forward.
+ */
+ if (!list_empty(&knotif->addfd)) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+
+ /* Allow exactly only one reply */
+ knotif->state = SECCOMP_NOTIFY_REPLIED;
+ }
+
list_add(&kaddfd.list, &knotif->addfd);
complete(&knotif->ready);
mutex_unlock(&filter->notify_lock);
diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
index fcc806585266..6e5102a7d7c9 100644
--- a/tools/testing/selftests/seccomp/seccomp_benchmark.c
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -143,9 +143,15 @@ int main(int argc, char *argv[])
unsigned long long native, filter1, filter2, bitmap1, bitmap2;
unsigned long long entry, per_filter1, per_filter2;
+ setbuf(stdout, NULL);
+
+ printf("Running on:\n");
+ system("uname -a");
+
printf("Current BPF sysctl settings:\n");
- system("sysctl net.core.bpf_jit_enable");
- system("sysctl net.core.bpf_jit_harden");
+ /* Avoid using "sysctl" which may not be installed. */
+ system("grep -H . /proc/sys/net/core/bpf_jit_enable");
+ system("grep -H . /proc/sys/net/core/bpf_jit_harden");
if (argc > 1)
samples = strtoull(argv[1], NULL, 0);
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index e3d5c77a8612..1d64891e6492 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -235,6 +235,10 @@ struct seccomp_notif_addfd {
};
#endif
+#ifndef SECCOMP_ADDFD_FLAG_SEND
+#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */
+#endif
+
struct seccomp_notif_addfd_small {
__u64 id;
char weird[4];
@@ -3959,7 +3963,7 @@ TEST(user_notification_addfd)
{
pid_t pid;
long ret;
- int status, listener, memfd, fd;
+ int status, listener, memfd, fd, nextfd;
struct seccomp_notif_addfd addfd = {};
struct seccomp_notif_addfd_small small = {};
struct seccomp_notif_addfd_big big = {};
@@ -3968,25 +3972,34 @@ TEST(user_notification_addfd)
/* 100 ms */
struct timespec delay = { .tv_nsec = 100000000 };
+ /* There may be arbitrary already-open fds at test start. */
memfd = memfd_create("test", 0);
ASSERT_GE(memfd, 0);
+ nextfd = memfd + 1;
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
+ /* fd: 4 */
/* Check that the basic notification machinery works */
listener = user_notif_syscall(__NR_getppid,
SECCOMP_FILTER_FLAG_NEW_LISTENER);
- ASSERT_GE(listener, 0);
+ ASSERT_EQ(listener, nextfd++);
pid = fork();
ASSERT_GE(pid, 0);
if (pid == 0) {
+ /* fds will be added and this value is expected */
if (syscall(__NR_getppid) != USER_NOTIF_MAGIC)
exit(1);
+
+ /* Atomic addfd+send is received here. Check it is a valid fd */
+ if (fcntl(syscall(__NR_getppid), F_GETFD) == -1)
+ exit(1);
+
exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
}
@@ -4028,14 +4041,14 @@ TEST(user_notification_addfd)
/* Verify we can set an arbitrary remote fd */
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
- EXPECT_GE(fd, 0);
+ EXPECT_EQ(fd, nextfd++);
EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
/* Verify we can set an arbitrary remote fd with large size */
memset(&big, 0x0, sizeof(big));
big.addfd = addfd;
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big);
- EXPECT_GE(fd, 0);
+ EXPECT_EQ(fd, nextfd++);
/* Verify we can set a specific remote fd */
addfd.newfd = 42;
@@ -4065,6 +4078,32 @@ TEST(user_notification_addfd)
ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
ASSERT_EQ(addfd.id, req.id);
+ /* Verify we can do an atomic addfd and send */
+ addfd.newfd = 0;
+ addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
+ fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
+ /*
+ * Child has earlier "low" fds and now 42, so we expect the next
+ * lowest available fd to be assigned here.
+ */
+ EXPECT_EQ(fd, nextfd++);
+ EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
+
+ /*
+ * This sets the ID of the ADD FD to the last request plus 1. The
+ * notification ID increments 1 per notification.
+ */
+ addfd.id = req.id + 1;
+
+ /* This spins until the underlying notification is generated */
+ while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 &&
+ errno != -EINPROGRESS)
+ nanosleep(&delay, NULL);
+
+ memset(&req, 0, sizeof(req));
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ ASSERT_EQ(addfd.id, req.id);
+
resp.id = req.id;
resp.error = 0;
resp.val = USER_NOTIF_MAGIC;
@@ -4125,6 +4164,10 @@ TEST(user_notification_addfd_rlimit)
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
EXPECT_EQ(errno, EMFILE);
+ addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
+ EXPECT_EQ(errno, EMFILE);
+
addfd.newfd = 100;
addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);