summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/pidfs.c9
-rw-r--r--kernel/exit.c6
-rw-r--r--kernel/signal.c3
-rw-r--r--tools/testing/selftests/pidfd/pidfd_info_test.c237
4 files changed, 225 insertions, 30 deletions
diff --git a/fs/pidfs.c b/fs/pidfs.c
index a48cc44ced6f..1b3d23e0ffdd 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -210,20 +210,21 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
{
struct pid *pid = pidfd_pid(file);
- bool thread = file->f_flags & PIDFD_THREAD;
struct task_struct *task;
__poll_t poll_flags = 0;
poll_wait(file, &pid->wait_pidfd, pts);
/*
- * Depending on PIDFD_THREAD, inform pollers when the thread
- * or the whole thread-group exits.
+ * Don't wake waiters if the thread-group leader exited
+ * prematurely. They either get notified when the last subthread
+ * exits or not at all if one of the remaining subthreads execs
+ * and assumes the struct pid of the old thread-group leader.
*/
guard(rcu)();
task = pid_task(pid, PIDTYPE_PID);
if (!task)
poll_flags = EPOLLIN | EPOLLRDNORM | EPOLLHUP;
- else if (task->exit_state && (thread || thread_group_empty(task)))
+ else if (task->exit_state && !delay_group_leader(task))
poll_flags = EPOLLIN | EPOLLRDNORM;
return poll_flags;
diff --git a/kernel/exit.c b/kernel/exit.c
index 9916305e34d3..683766316a3d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -743,10 +743,10 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
tsk->exit_state = EXIT_ZOMBIE;
/*
- * sub-thread or delay_group_leader(), wake up the
- * PIDFD_THREAD waiters.
+ * Ignore thread-group leaders that exited before all
+ * subthreads did.
*/
- if (!thread_group_empty(tsk))
+ if (!delay_group_leader(tsk))
do_notify_pidfd(tsk);
if (unlikely(tsk->ptrace)) {
diff --git a/kernel/signal.c b/kernel/signal.c
index 081f19a24506..027ad9e97417 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2180,8 +2180,7 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
WARN_ON_ONCE(!tsk->ptrace &&
(tsk->group_leader != tsk || !thread_group_empty(tsk)));
/*
- * tsk is a group leader and has no threads, wake up the
- * non-PIDFD_THREAD waiters.
+ * Notify for thread-group leaders without subthreads.
*/
if (thread_group_empty(tsk))
do_notify_pidfd(tsk);
diff --git a/tools/testing/selftests/pidfd/pidfd_info_test.c b/tools/testing/selftests/pidfd/pidfd_info_test.c
index 09bc4ae7aed5..1758a1b0457b 100644
--- a/tools/testing/selftests/pidfd/pidfd_info_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_info_test.c
@@ -236,7 +236,7 @@ static void *pidfd_info_pause_thread(void *arg)
TEST_F(pidfd_info, thread_group)
{
- pid_t pid_leader, pid_thread;
+ pid_t pid_leader, pid_poller, pid_thread;
pthread_t thread;
int nevents, pidfd_leader, pidfd_thread, pidfd_leader_thread, ret;
int ipc_sockets[2];
@@ -262,6 +262,35 @@ TEST_F(pidfd_info, thread_group)
syscall(__NR_exit, EXIT_SUCCESS);
}
+ /*
+ * Opening a PIDFD_THREAD aka thread-specific pidfd based on a
+ * thread-group leader must succeed.
+ */
+ pidfd_leader_thread = sys_pidfd_open(pid_leader, PIDFD_THREAD);
+ ASSERT_GE(pidfd_leader_thread, 0);
+
+ pid_poller = fork();
+ ASSERT_GE(pid_poller, 0);
+ if (pid_poller == 0) {
+ /*
+ * We can't poll and wait for the old thread-group
+ * leader to exit using a thread-specific pidfd. The
+ * thread-group leader exited prematurely and
+ * notification is delayed until all subthreads have
+ * exited.
+ */
+ fds.events = POLLIN;
+ fds.fd = pidfd_leader_thread;
+ nevents = poll(&fds, 1, 10000 /* wait 5 seconds */);
+ if (nevents != 0)
+ _exit(EXIT_FAILURE);
+ if (fds.revents & POLLIN)
+ _exit(EXIT_FAILURE);
+ if (fds.revents & POLLHUP)
+ _exit(EXIT_FAILURE);
+ _exit(EXIT_SUCCESS);
+ }
+
/* Retrieve the tid of the thread. */
EXPECT_EQ(close(ipc_sockets[1]), 0);
ASSERT_EQ(read_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
@@ -275,12 +304,7 @@ TEST_F(pidfd_info, thread_group)
pidfd_thread = sys_pidfd_open(pid_thread, PIDFD_THREAD);
ASSERT_GE(pidfd_thread, 0);
- /*
- * Opening a PIDFD_THREAD aka thread-specific pidfd based on a
- * thread-group leader must succeed.
- */
- pidfd_leader_thread = sys_pidfd_open(pid_leader, PIDFD_THREAD);
- ASSERT_GE(pidfd_leader_thread, 0);
+ ASSERT_EQ(wait_for_pid(pid_poller), 0);
/*
* Note that pidfd_leader is a thread-group pidfd, so polling on it
@@ -389,7 +413,7 @@ static void *pidfd_info_thread_exec(void *arg)
TEST_F(pidfd_info, thread_group_exec)
{
- pid_t pid_leader, pid_thread;
+ pid_t pid_leader, pid_poller, pid_thread;
pthread_t thread;
int nevents, pidfd_leader, pidfd_leader_thread, pidfd_thread, ret;
int ipc_sockets[2];
@@ -415,6 +439,37 @@ TEST_F(pidfd_info, thread_group_exec)
syscall(__NR_exit, EXIT_SUCCESS);
}
+ /* Open a thread-specific pidfd for the thread-group leader. */
+ pidfd_leader_thread = sys_pidfd_open(pid_leader, PIDFD_THREAD);
+ ASSERT_GE(pidfd_leader_thread, 0);
+
+ pid_poller = fork();
+ ASSERT_GE(pid_poller, 0);
+ if (pid_poller == 0) {
+ /*
+ * We can't poll and wait for the old thread-group
+ * leader to exit using a thread-specific pidfd. The
+ * thread-group leader exited prematurely and
+ * notification is delayed until all subthreads have
+ * exited.
+ *
+ * When the thread has execed it will taken over the old
+ * thread-group leaders struct pid. Calling poll after
+ * the thread execed will thus block again because a new
+ * thread-group has started.
+ */
+ fds.events = POLLIN;
+ fds.fd = pidfd_leader_thread;
+ nevents = poll(&fds, 1, 10000 /* wait 5 seconds */);
+ if (nevents != 0)
+ _exit(EXIT_FAILURE);
+ if (fds.revents & POLLIN)
+ _exit(EXIT_FAILURE);
+ if (fds.revents & POLLHUP)
+ _exit(EXIT_FAILURE);
+ _exit(EXIT_SUCCESS);
+ }
+
/* Retrieve the tid of the thread. */
EXPECT_EQ(close(ipc_sockets[1]), 0);
ASSERT_EQ(read_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
@@ -423,32 +478,158 @@ TEST_F(pidfd_info, thread_group_exec)
pidfd_thread = sys_pidfd_open(pid_thread, PIDFD_THREAD);
ASSERT_GE(pidfd_thread, 0);
- /* Open a thread-specific pidfd for the thread-group leader. */
- pidfd_leader_thread = sys_pidfd_open(pid_leader, PIDFD_THREAD);
- ASSERT_GE(pidfd_leader_thread, 0);
+ /* Now that we've opened a thread-specific pidfd the thread can exec. */
+ ASSERT_EQ(write_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ ASSERT_EQ(wait_for_pid(pid_poller), 0);
+
+ /* Wait until the kernel has SIGKILLed the thread. */
+ fds.events = POLLHUP;
+ fds.fd = pidfd_thread;
+ nevents = poll(&fds, 1, -1);
+ ASSERT_EQ(nevents, 1);
+ /* The thread has been reaped. */
+ ASSERT_TRUE(!!(fds.revents & POLLHUP));
+
+ /* Retrieve thread-specific exit info from pidfd. */
+ ASSERT_EQ(ioctl(pidfd_thread, PIDFD_GET_INFO, &info), 0);
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+ /*
+ * While the kernel will have SIGKILLed the whole thread-group
+ * during exec it will cause the individual threads to exit
+ * cleanly.
+ */
+ ASSERT_TRUE(WIFEXITED(info.exit_code));
+ ASSERT_EQ(WEXITSTATUS(info.exit_code), 0);
+
+ /*
+ * The thread-group leader is still alive, the thread has taken
+ * over its struct pid and thus its pid number.
+ */
+ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
+ ASSERT_EQ(info.pid, pid_leader);
+
+ /* Take down the thread-group leader. */
+ EXPECT_EQ(sys_pidfd_send_signal(pidfd_leader, SIGKILL, NULL, 0), 0);
/*
- * We can poll and wait for the old thread-group leader to exit
- * using a thread-specific pidfd.
- *
- * This only works until the thread has execed. When the thread
- * has execed it will have taken over the old thread-group
- * leaders struct pid. Calling poll after the thread execed will
- * thus block again because a new thread-group has started (Yes,
- * it's fscked.).
+ * Afte the exec we're dealing with an empty thread-group so now
+ * we must see an exit notification on the thread-specific pidfd
+ * for the thread-group leader as there's no subthread that can
+ * revive the struct pid.
*/
fds.events = POLLIN;
fds.fd = pidfd_leader_thread;
nevents = poll(&fds, 1, -1);
ASSERT_EQ(nevents, 1);
- /* The thread-group leader has exited. */
ASSERT_TRUE(!!(fds.revents & POLLIN));
- /* The thread-group leader hasn't been reaped. */
ASSERT_FALSE(!!(fds.revents & POLLHUP));
+ EXPECT_EQ(sys_waitid(P_PIDFD, pidfd_leader, NULL, WEXITED), 0);
+
+ /* Retrieve exit information for the thread-group leader. */
+ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+
+ EXPECT_EQ(close(pidfd_leader), 0);
+ EXPECT_EQ(close(pidfd_thread), 0);
+}
+
+static void *pidfd_info_thread_exec_sane(void *arg)
+{
+ pid_t pid_thread = gettid();
+ int ipc_socket = *(int *)arg;
+
+ /* Inform the grand-parent what the tid of this thread is. */
+ if (write_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread))
+ return NULL;
+
+ if (read_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread))
+ return NULL;
+
+ close(ipc_socket);
+
+ sys_execveat(AT_FDCWD, "pidfd_exec_helper", NULL, NULL, 0);
+ return NULL;
+}
+
+TEST_F(pidfd_info, thread_group_exec_thread)
+{
+ pid_t pid_leader, pid_poller, pid_thread;
+ pthread_t thread;
+ int nevents, pidfd_leader, pidfd_leader_thread, pidfd_thread, ret;
+ int ipc_sockets[2];
+ struct pollfd fds = {};
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT,
+ };
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ EXPECT_EQ(ret, 0);
+
+ pid_leader = create_child(&pidfd_leader, 0);
+ EXPECT_GE(pid_leader, 0);
+
+ if (pid_leader == 0) {
+ close(ipc_sockets[0]);
+
+ /* The thread will outlive the thread-group leader. */
+ if (pthread_create(&thread, NULL, pidfd_info_thread_exec_sane, &ipc_sockets[1]))
+ syscall(__NR_exit, EXIT_FAILURE);
+
+ /*
+ * Pause the thread-group leader. It will be killed once
+ * the subthread execs.
+ */
+ pause();
+ syscall(__NR_exit, EXIT_SUCCESS);
+ }
+
+ /* Retrieve the tid of the thread. */
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
+
+ /* Opening a thread as a PIDFD_THREAD must succeed. */
+ pidfd_thread = sys_pidfd_open(pid_thread, PIDFD_THREAD);
+ ASSERT_GE(pidfd_thread, 0);
+
+ /* Open a thread-specific pidfd for the thread-group leader. */
+ pidfd_leader_thread = sys_pidfd_open(pid_leader, PIDFD_THREAD);
+ ASSERT_GE(pidfd_leader_thread, 0);
+
+ pid_poller = fork();
+ ASSERT_GE(pid_poller, 0);
+ if (pid_poller == 0) {
+ /*
+ * The subthread will now exec. The struct pid of the old
+ * thread-group leader will be assumed by the subthread which
+ * becomes the new thread-group leader. So no exit notification
+ * must be generated. Wait for 5 seconds and call it a success
+ * if no notification has been received.
+ */
+ fds.events = POLLIN;
+ fds.fd = pidfd_leader_thread;
+ nevents = poll(&fds, 1, 10000 /* wait 5 seconds */);
+ if (nevents != 0)
+ _exit(EXIT_FAILURE);
+ if (fds.revents & POLLIN)
+ _exit(EXIT_FAILURE);
+ if (fds.revents & POLLHUP)
+ _exit(EXIT_FAILURE);
+ _exit(EXIT_SUCCESS);
+ }
+
/* Now that we've opened a thread-specific pidfd the thread can exec. */
ASSERT_EQ(write_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
EXPECT_EQ(close(ipc_sockets[0]), 0);
+ ASSERT_EQ(wait_for_pid(pid_poller), 0);
/* Wait until the kernel has SIGKILLed the thread. */
fds.events = POLLHUP;
@@ -482,6 +663,20 @@ TEST_F(pidfd_info, thread_group_exec)
/* Take down the thread-group leader. */
EXPECT_EQ(sys_pidfd_send_signal(pidfd_leader, SIGKILL, NULL, 0), 0);
+
+ /*
+ * Afte the exec we're dealing with an empty thread-group so now
+ * we must see an exit notification on the thread-specific pidfd
+ * for the thread-group leader as there's no subthread that can
+ * revive the struct pid.
+ */
+ fds.events = POLLIN;
+ fds.fd = pidfd_leader_thread;
+ nevents = poll(&fds, 1, -1);
+ ASSERT_EQ(nevents, 1);
+ ASSERT_TRUE(!!(fds.revents & POLLIN));
+ ASSERT_FALSE(!!(fds.revents & POLLHUP));
+
EXPECT_EQ(sys_waitid(P_PIDFD, pidfd_leader, NULL, WEXITED), 0);
/* Retrieve exit information for the thread-group leader. */