From 1cef1150ef40ec52f507436a14230cbc2623299c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 7 Jun 2018 11:45:49 +0200 Subject: kthread, sched/core: Fix kthread_parkme() (again...) Gaurav reports that commit: 85f1abe0019f ("kthread, sched/wait: Fix kthread_parkme() completion issue") isn't working for him. Because of the following race: > controller Thread CPUHP Thread > takedown_cpu > kthread_park > kthread_parkme > Set KTHREAD_SHOULD_PARK > smpboot_thread_fn > set Task interruptible > > > wake_up_process > if (!(p->state & state)) > goto out; > > Kthread_parkme > SET TASK_PARKED > schedule > raw_spin_lock(&rq->lock) > ttwu_remote > waiting for __task_rq_lock > context_switch > > finish_lock_switch > > > > Case TASK_PARKED > kthread_park_complete > > > SET Running Furthermore, Oleg noticed that the whole scheduler TASK_PARKED handling is buggered because the TASK_DEAD thing is done with preemption disabled, the current code can still complete early on preemption :/ So basically revert that earlier fix and go with a variant of the alternative mentioned in the commit. Promote TASK_PARKED to special state to avoid the store-store issue on task->state leading to the WARN in kthread_unpark() -> __kthread_bind(). But in addition, add wait_task_inactive() to kthread_park() to ensure the task really is PARKED when we return from kthread_park(). This avoids the whole kthread still gets migrated nonsense -- although it would be really good to get this done differently. Reported-by: Gaurav Kohli Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 85f1abe0019f ("kthread, sched/wait: Fix kthread_parkme() completion issue") Signed-off-by: Ingo Molnar --- kernel/kthread.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'kernel/kthread.c') diff --git a/kernel/kthread.c b/kernel/kthread.c index 481951bf091d..750cb8082694 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -177,9 +177,20 @@ void *kthread_probe_data(struct task_struct *task) static void __kthread_parkme(struct kthread *self) { for (;;) { - set_current_state(TASK_PARKED); + /* + * TASK_PARKED is a special state; we must serialize against + * possible pending wakeups to avoid store-store collisions on + * task->state. + * + * Such a collision might possibly result in the task state + * changin from TASK_PARKED and us failing the + * wait_task_inactive() in kthread_park(). + */ + set_special_state(TASK_PARKED); if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags)) break; + + complete_all(&self->parked); schedule(); } __set_current_state(TASK_RUNNING); @@ -191,11 +202,6 @@ void kthread_parkme(void) } EXPORT_SYMBOL_GPL(kthread_parkme); -void kthread_park_complete(struct task_struct *k) -{ - complete_all(&to_kthread(k)->parked); -} - static int kthread(void *_create) { /* Copy data: it's on kthread's stack */ @@ -461,6 +467,9 @@ void kthread_unpark(struct task_struct *k) reinit_completion(&kthread->parked); clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); + /* + * __kthread_parkme() will either see !SHOULD_PARK or get the wakeup. + */ wake_up_state(k, TASK_PARKED); } EXPORT_SYMBOL_GPL(kthread_unpark); @@ -487,7 +496,16 @@ int kthread_park(struct task_struct *k) set_bit(KTHREAD_SHOULD_PARK, &kthread->flags); if (k != current) { wake_up_process(k); + /* + * Wait for __kthread_parkme() to complete(), this means we + * _will_ have TASK_PARKED and are about to call schedule(). + */ wait_for_completion(&kthread->parked); + /* + * Now wait for that schedule() to complete and the task to + * get scheduled out. + */ + WARN_ON_ONCE(!wait_task_inactive(k, TASK_PARKED)); } return 0; -- cgit