From 3d4775df0a89240f671861c6ab6e8d59af8e9e41 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Nov 2019 22:55:37 +0100 Subject: futex: Replace PF_EXITPIDONE with a state The futex exit handling relies on PF_ flags. That's suboptimal as it requires a smp_mb() and an ugly lock/unlock of the exiting tasks pi_lock in the middle of do_exit() to enforce the observability of PF_EXITING in the futex code. Add a futex_state member to task_struct and convert the PF_EXITPIDONE logic over to the new state. The PF_EXITING dependency will be cleaned up in a later step. This prepares for handling various futex exit issues later. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20191106224556.149449274@linutronix.de --- kernel/exit.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index a46a50d67002..d11bdcaac2e1 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -746,16 +746,7 @@ void __noreturn do_exit(long code) */ if (unlikely(tsk->flags & PF_EXITING)) { pr_alert("Fixing recursive fault but reboot is needed!\n"); - /* - * We can do this unlocked here. The futex code uses - * this flag just to verify whether the pi state - * cleanup has been done or not. In the worst case it - * loops once more. We pretend that the cleanup was - * done as there is no way to return. Either the - * OWNER_DIED bit is set by now or we push the blocked - * task into the wait for ever nirwana as well. - */ - tsk->flags |= PF_EXITPIDONE; + futex_exit_done(tsk); set_current_state(TASK_UNINTERRUPTIBLE); schedule(); } @@ -846,12 +837,7 @@ void __noreturn do_exit(long code) * Make sure we are holding no locks: */ debug_check_no_locks_held(); - /* - * We can do this unlocked here. The futex code uses this flag - * just to verify whether the pi state cleanup has been done - * or not. In the worst case it loops once more. - */ - tsk->flags |= PF_EXITPIDONE; + futex_exit_done(tsk); if (tsk->io_context) exit_io_context(tsk); -- cgit From 4610ba7ad877fafc0a25a30c6c82015304120426 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Nov 2019 22:55:38 +0100 Subject: exit/exec: Seperate mm_release() mm_release() contains the futex exit handling. mm_release() is called from do_exit()->exit_mm() and from exec()->exec_mm(). In the exit_mm() case PF_EXITING and the futex state is updated. In the exec_mm() case these states are not touched. As the futex exit code needs further protections against exit races, this needs to be split into two functions. Preparatory only, no functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20191106224556.240518241@linutronix.de --- kernel/exit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index d11bdcaac2e1..cd893b530902 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -437,7 +437,7 @@ static void exit_mm(void) struct mm_struct *mm = current->mm; struct core_state *core_state; - mm_release(current, mm); + exit_mm_release(current, mm); if (!mm) return; sync_mm_rss(mm); -- cgit From f24f22435dcc11389acc87e5586239c1819d217c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Nov 2019 22:55:40 +0100 Subject: futex: Set task::futex_state to DEAD right after handling futex exit Setting task::futex_state in do_exit() is rather arbitrarily placed for no reason. Move it into the futex code. Note, this is only done for the exit cleanup as the exec cleanup cannot set the state to FUTEX_STATE_DEAD because the task struct is still in active use. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20191106224556.439511191@linutronix.de --- kernel/exit.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index cd893b530902..f3b8fa1b8945 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -837,7 +837,6 @@ void __noreturn do_exit(long code) * Make sure we are holding no locks: */ debug_check_no_locks_held(); - futex_exit_done(tsk); if (tsk->io_context) exit_io_context(tsk); -- cgit From 18f694385c4fd77a09851fd301236746ca83f3cb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Nov 2019 22:55:41 +0100 Subject: futex: Mark the begin of futex exit explicitly Instead of relying on PF_EXITING use an explicit state for the futex exit and set it in the futex exit function. This moves the smp barrier and the lock/unlock serialization into the futex code. As with the DEAD state this is restricted to the exit path as exec continues to use the same task struct. This allows to simplify that logic in a next step. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20191106224556.539409004@linutronix.de --- kernel/exit.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index f3b8fa1b8945..d351fd09e739 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -746,23 +746,12 @@ void __noreturn do_exit(long code) */ if (unlikely(tsk->flags & PF_EXITING)) { pr_alert("Fixing recursive fault but reboot is needed!\n"); - futex_exit_done(tsk); + futex_exit_recursive(tsk); set_current_state(TASK_UNINTERRUPTIBLE); schedule(); } exit_signals(tsk); /* sets PF_EXITING */ - /* - * Ensure that all new tsk->pi_lock acquisitions must observe - * PF_EXITING. Serializes against futex.c:attach_to_pi_owner(). - */ - smp_mb(); - /* - * Ensure that we must observe the pi_state in exit_mm() -> - * mm_release() -> exit_pi_state_list(). - */ - raw_spin_lock_irq(&tsk->pi_lock); - raw_spin_unlock_irq(&tsk->pi_lock); if (unlikely(in_atomic())) { pr_info("note: %s[%d] exited with preempt_count %d\n", -- cgit