From 50fb4f7fc907efff65eadb0b74387a9ffed6e849 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:22 -0800 Subject: freezer: fix current->state restoration race in refrigerator() refrigerator() saves current->state before entering frozen state and restores it before returning using __set_current_state(); however, this is racy, for example, please consider the following sequence. set_current_state(TASK_INTERRUPTIBLE); try_to_freeze(); if (kthread_should_stop()) break; schedule(); If kthread_stop() races with ->state restoration, the restoration can restore ->state to TASK_INTERRUPTIBLE after kthread_stop() sets it to TASK_RUNNING but kthread_should_stop() may still see zero ->should_stop because there's no memory barrier between restoring TASK_INTERRUPTIBLE and kthread_should_stop() test. This isn't restricted to kthread_should_stop(). current->state is often used in memory barrier based synchronization and silently restoring it w/o mb breaks them. Use set_current_state() instead. Signed-off-by: Tejun Heo --- kernel/freezer.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/freezer.c b/kernel/freezer.c index 7be56c534397..3f460104a9d6 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -58,7 +58,13 @@ void refrigerator(void) current->flags &= ~PF_FREEZING; pr_debug("%s left refrigerator\n", current->comm); - __set_current_state(save); + + /* + * Restore saved task state before returning. The mb'd version + * needs to be used; otherwise, it might silently break + * synchronization which depends on ordered task state change. + */ + set_current_state(save); } EXPORT_SYMBOL(refrigerator); -- cgit From a0acae0e886d44bd5ce6d2f173c1ace0fcf0d9f6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:22 -0800 Subject: freezer: unexport refrigerator() and update try_to_freeze() slightly There is no reason to export two functions for entering the refrigerator. Calling refrigerator() instead of try_to_freeze() doesn't save anything noticeable or removes any race condition. * Rename refrigerator() to __refrigerator() and make it return bool indicating whether it scheduled out for freezing. * Update try_to_freeze() to return bool and relay the return value of __refrigerator() if freezing(). * Convert all refrigerator() users to try_to_freeze(). * Update documentation accordingly. * While at it, add might_sleep() to try_to_freeze(). Signed-off-by: Tejun Heo Cc: Samuel Ortiz Cc: Chris Mason Cc: "Theodore Ts'o" Cc: Steven Whitehouse Cc: Andrew Morton Cc: Jan Kara Cc: KONISHI Ryusuke Cc: Christoph Hellwig --- kernel/freezer.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/freezer.c b/kernel/freezer.c index 3f460104a9d6..732f14f5944f 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -23,10 +23,11 @@ static inline void frozen_process(void) } /* Refrigerator is place where frozen processes are stored :-). */ -void refrigerator(void) +bool __refrigerator(void) { /* Hmm, should we be allowed to suspend when there are realtime processes around? */ + bool was_frozen = false; long save; task_lock(current); @@ -35,7 +36,7 @@ void refrigerator(void) task_unlock(current); } else { task_unlock(current); - return; + return was_frozen; } save = current->state; pr_debug("%s entered refrigerator\n", current->comm); @@ -51,6 +52,7 @@ void refrigerator(void) set_current_state(TASK_UNINTERRUPTIBLE); if (!frozen(current)) break; + was_frozen = true; schedule(); } @@ -65,8 +67,10 @@ void refrigerator(void) * synchronization which depends on ordered task state change. */ set_current_state(save); + + return was_frozen; } -EXPORT_SYMBOL(refrigerator); +EXPORT_SYMBOL(__refrigerator); static void fake_signal_wake_up(struct task_struct *p) { -- cgit From 8a32c441c1609f80e55df75422324a1151208f40 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:23 -0800 Subject: freezer: implement and use kthread_freezable_should_stop() Writeback and thinkpad_acpi have been using thaw_process() to prevent deadlock between the freezer and kthread_stop(); unfortunately, this is inherently racy - nothing prevents freezing from happening between thaw_process() and kthread_stop(). This patch implements kthread_freezable_should_stop() which enters refrigerator if necessary but is guaranteed to return if kthread_stop() is invoked. Both thaw_process() users are converted to use the new function. Note that this deadlock condition exists for many of freezable kthreads. They need to be converted to use the new should_stop or freezable workqueue. Tested with synthetic test case. Signed-off-by: Tejun Heo Acked-by: Henrique de Moraes Holschuh Cc: Jens Axboe Cc: Oleg Nesterov --- kernel/freezer.c | 6 ++++-- kernel/kthread.c | 25 +++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/freezer.c b/kernel/freezer.c index 732f14f5944f..b83c30e9483a 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -9,6 +9,7 @@ #include #include #include +#include /* * freezing is complete, mark current process as frozen @@ -23,7 +24,7 @@ static inline void frozen_process(void) } /* Refrigerator is place where frozen processes are stored :-). */ -bool __refrigerator(void) +bool __refrigerator(bool check_kthr_stop) { /* Hmm, should we be allowed to suspend when there are realtime processes around? */ @@ -50,7 +51,8 @@ bool __refrigerator(void) for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); - if (!frozen(current)) + if (!frozen(current) || + (check_kthr_stop && kthread_should_stop())) break; was_frozen = true; schedule(); diff --git a/kernel/kthread.c b/kernel/kthread.c index b6d216a92639..1c36deaae2f1 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -58,6 +58,31 @@ int kthread_should_stop(void) } EXPORT_SYMBOL(kthread_should_stop); +/** + * kthread_freezable_should_stop - should this freezable kthread return now? + * @was_frozen: optional out parameter, indicates whether %current was frozen + * + * kthread_should_stop() for freezable kthreads, which will enter + * refrigerator if necessary. This function is safe from kthread_stop() / + * freezer deadlock and freezable kthreads should use this function instead + * of calling try_to_freeze() directly. + */ +bool kthread_freezable_should_stop(bool *was_frozen) +{ + bool frozen = false; + + might_sleep(); + + if (unlikely(freezing(current))) + frozen = __refrigerator(true); + + if (was_frozen) + *was_frozen = frozen; + + return kthread_should_stop(); +} +EXPORT_SYMBOL_GPL(kthread_freezable_should_stop); + /** * kthread_data - return data value specified on kthread creation * @task: kthread task in question -- cgit From a5be2d0d1a8746e7be5210e3d6b904455000443c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:23 -0800 Subject: freezer: rename thaw_process() to __thaw_task() and simplify the implementation thaw_process() now has only internal users - system and cgroup freezers. Remove the unnecessary return value, rename, unexport and collapse __thaw_process() into it. This will help further updates to the freezer code. -v3: oom_kill grew a use of thaw_process() while this patch was pending. Convert it to use __thaw_task() for now. In the longer term, this should be handled by allowing tasks to die if killed even if it's frozen. -v2: minor style update as suggested by Matt. Signed-off-by: Tejun Heo Cc: Paul Menage Cc: Matt Helsley --- kernel/cgroup_freezer.c | 7 +++---- kernel/freezer.c | 31 ++++++++++++------------------- kernel/power/process.c | 2 +- 3 files changed, 16 insertions(+), 24 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 5e828a2ca8e6..a6d405a86ee0 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -130,7 +130,7 @@ struct cgroup_subsys freezer_subsys; * write_lock css_set_lock (cgroup iterator start) * task->alloc_lock * read_lock css_set_lock (cgroup iterator start) - * task->alloc_lock (inside thaw_process(), prevents race with refrigerator()) + * task->alloc_lock (inside __thaw_task(), prevents race with refrigerator()) * sighand->siglock */ static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, @@ -300,9 +300,8 @@ static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) struct task_struct *task; cgroup_iter_start(cgroup, &it); - while ((task = cgroup_iter_next(cgroup, &it))) { - thaw_process(task); - } + while ((task = cgroup_iter_next(cgroup, &it))) + __thaw_task(task); cgroup_iter_end(cgroup, &it); freezer->state = CGROUP_THAWED; diff --git a/kernel/freezer.c b/kernel/freezer.c index b83c30e9483a..c851d588e29f 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -145,18 +145,8 @@ void cancel_freezing(struct task_struct *p) } } -static int __thaw_process(struct task_struct *p) -{ - if (frozen(p)) { - p->flags &= ~PF_FROZEN; - return 1; - } - clear_freeze_flag(p); - return 0; -} - /* - * Wake up a frozen process + * Wake up a frozen task * * task_lock() is needed to prevent the race with refrigerator() which may * occur if the freezing of tasks fails. Namely, without the lock, if the @@ -164,15 +154,18 @@ static int __thaw_process(struct task_struct *p) * refrigerator() could call frozen_process(), in which case the task would be * frozen and no one would thaw it. */ -int thaw_process(struct task_struct *p) +void __thaw_task(struct task_struct *p) { + bool was_frozen; + task_lock(p); - if (__thaw_process(p) == 1) { - task_unlock(p); - wake_up_process(p); - return 1; - } + was_frozen = frozen(p); + if (was_frozen) + p->flags &= ~PF_FROZEN; + else + clear_freeze_flag(p); task_unlock(p); - return 0; + + if (was_frozen) + wake_up_process(p); } -EXPORT_SYMBOL(thaw_process); diff --git a/kernel/power/process.c b/kernel/power/process.c index addbbe5531bc..fe2787207f00 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -186,7 +186,7 @@ static void thaw_tasks(bool nosig_only) if (cgroup_freezing_or_frozen(p)) continue; - thaw_process(p); + __thaw_task(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); } -- cgit From a585042f7b933539a0b6bc63650c2d49ffb2e55d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:23 -0800 Subject: freezer: remove racy clear_freeze_flag() and set PF_NOFREEZE on dead tasks clear_freeze_flag() in exit_mm() is racy. Freezing can start afterwards. Remove it. Skipping freezer for exiting task will be properly implemented later. Also, freezable() was testing exit_state directly to make system freezer ignore dead tasks. Let the exiting task set PF_NOFREEZE after entering TASK_DEAD instead. Signed-off-by: Tejun Heo Cc: Oleg Nesterov --- kernel/exit.c | 3 +-- kernel/power/process.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index d0b7d988f873..95a4141d07e7 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -679,8 +679,6 @@ static void exit_mm(struct task_struct * tsk) tsk->mm = NULL; up_read(&mm->mmap_sem); enter_lazy_tlb(mm, current); - /* We don't want this task to be frozen prematurely */ - clear_freeze_flag(tsk); task_unlock(tsk); mm_update_next_owner(mm); mmput(mm); @@ -1040,6 +1038,7 @@ NORET_TYPE void do_exit(long code) exit_rcu(); /* causes final put_task_struct in finish_task_switch(). */ tsk->state = TASK_DEAD; + tsk->flags |= PF_NOFREEZE; /* tell freezer to ignore us */ schedule(); BUG(); /* Avoid "noreturn function does return". */ diff --git a/kernel/power/process.c b/kernel/power/process.c index fe2787207f00..23822dc14b6c 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -25,8 +25,7 @@ static inline int freezable(struct task_struct * p) { if ((p == current) || - (p->flags & PF_NOFREEZE) || - (p->exit_state != 0)) + (p->flags & PF_NOFREEZE)) return 0; return 1; } -- cgit From 6cd8dedcdd8e8de01391a7cf25f0b2afeb24f8f4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:23 -0800 Subject: freezer: don't distinguish nosig tasks on thaw There's no point in thawing nosig tasks before others. There's no ordering requirement between the two groups on thaw, which the staged thawing can't guarantee anyway. Simplify thaw_processes() by removing the distinction and collapsing thaw_tasks() into thaw_processes(). This will help further updates to freezer. Signed-off-by: Tejun Heo --- kernel/power/process.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/power/process.c b/kernel/power/process.c index 23822dc14b6c..9db048fb0d70 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -170,34 +170,28 @@ int freeze_kernel_threads(void) return error; } -static void thaw_tasks(bool nosig_only) +void thaw_processes(void) { struct task_struct *g, *p; + oom_killer_enable(); + + printk("Restarting tasks ... "); + + thaw_workqueues(); + read_lock(&tasklist_lock); do_each_thread(g, p) { if (!freezable(p)) continue; - if (nosig_only && should_send_signal(p)) - continue; - if (cgroup_freezing_or_frozen(p)) continue; __thaw_task(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); -} - -void thaw_processes(void) -{ - oom_killer_enable(); - printk("Restarting tasks ... "); - thaw_workqueues(); - thaw_tasks(true); - thaw_tasks(false); schedule(); printk("done.\n"); } -- cgit From 0c9af09262864a2744091ee94c98c4a8fd60c98b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:24 -0800 Subject: freezer: use dedicated lock instead of task_lock() + memory barrier Freezer synchronization is needlessly complicated - it's by no means a hot path and the priority is staying unintrusive and safe. This patch makes it simply use a dedicated lock instead of piggy-backing on task_lock() and playing with memory barriers. On the failure path of try_to_freeze_tasks(), locking is moved from it to cancel_freezing(). This makes the frozen() test racy but the race here is a non-issue as the warning is printed for tasks which failed to enter frozen for 20 seconds and race on PF_FROZEN at the last moment doesn't change anything. This simplifies freezer implementation and eases further changes including some race fixes. Signed-off-by: Tejun Heo --- kernel/freezer.c | 84 ++++++++++++++++++++++---------------------------- kernel/power/process.c | 2 -- 2 files changed, 37 insertions(+), 49 deletions(-) (limited to 'kernel') diff --git a/kernel/freezer.c b/kernel/freezer.c index c851d588e29f..4130e48649bb 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -11,17 +11,8 @@ #include #include -/* - * freezing is complete, mark current process as frozen - */ -static inline void frozen_process(void) -{ - if (!unlikely(current->flags & PF_NOFREEZE)) { - current->flags |= PF_FROZEN; - smp_wmb(); - } - clear_freeze_flag(current); -} +/* protects freezing and frozen transitions */ +static DEFINE_SPINLOCK(freezer_lock); /* Refrigerator is place where frozen processes are stored :-). */ bool __refrigerator(bool check_kthr_stop) @@ -31,14 +22,16 @@ bool __refrigerator(bool check_kthr_stop) bool was_frozen = false; long save; - task_lock(current); - if (freezing(current)) { - frozen_process(); - task_unlock(current); - } else { - task_unlock(current); + spin_lock_irq(&freezer_lock); + if (!freezing(current)) { + spin_unlock_irq(&freezer_lock); return was_frozen; } + if (!(current->flags & PF_NOFREEZE)) + current->flags |= PF_FROZEN; + clear_freeze_flag(current); + spin_unlock_irq(&freezer_lock); + save = current->state; pr_debug("%s entered refrigerator\n", current->comm); @@ -99,21 +92,18 @@ static void fake_signal_wake_up(struct task_struct *p) */ bool freeze_task(struct task_struct *p, bool sig_only) { - /* - * We first check if the task is freezing and next if it has already - * been frozen to avoid the race with frozen_process() which first marks - * the task as frozen and next clears its TIF_FREEZE. - */ - if (!freezing(p)) { - smp_rmb(); - if (frozen(p)) - return false; - - if (!sig_only || should_send_signal(p)) - set_freeze_flag(p); - else - return false; - } + unsigned long flags; + bool ret = false; + + spin_lock_irqsave(&freezer_lock, flags); + + if (sig_only && !should_send_signal(p)) + goto out_unlock; + + if (frozen(p)) + goto out_unlock; + + set_freeze_flag(p); if (should_send_signal(p)) { fake_signal_wake_up(p); @@ -123,26 +113,28 @@ bool freeze_task(struct task_struct *p, bool sig_only) * TASK_RUNNING transition can't race with task state * testing in try_to_freeze_tasks(). */ - } else if (sig_only) { - return false; } else { wake_up_state(p, TASK_INTERRUPTIBLE); } - - return true; + ret = true; +out_unlock: + spin_unlock_irqrestore(&freezer_lock, flags); + return ret; } void cancel_freezing(struct task_struct *p) { unsigned long flags; + spin_lock_irqsave(&freezer_lock, flags); if (freezing(p)) { pr_debug(" clean up: %s\n", p->comm); clear_freeze_flag(p); - spin_lock_irqsave(&p->sighand->siglock, flags); + spin_lock(&p->sighand->siglock); recalc_sigpending_and_wake(p); - spin_unlock_irqrestore(&p->sighand->siglock, flags); + spin_unlock(&p->sighand->siglock); } + spin_unlock_irqrestore(&freezer_lock, flags); } /* @@ -156,16 +148,14 @@ void cancel_freezing(struct task_struct *p) */ void __thaw_task(struct task_struct *p) { - bool was_frozen; + unsigned long flags; - task_lock(p); - was_frozen = frozen(p); - if (was_frozen) + spin_lock_irqsave(&freezer_lock, flags); + if (frozen(p)) { p->flags &= ~PF_FROZEN; - else - clear_freeze_flag(p); - task_unlock(p); - - if (was_frozen) wake_up_process(p); + } else { + clear_freeze_flag(p); + } + spin_unlock_irqrestore(&freezer_lock, flags); } diff --git a/kernel/power/process.c b/kernel/power/process.c index 9db048fb0d70..bd420ca48261 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -118,11 +118,9 @@ static int try_to_freeze_tasks(bool sig_only) read_lock(&tasklist_lock); do_each_thread(g, p) { - task_lock(p); if (!wakeup && freezing(p) && !freezer_should_skip(p)) sched_show_task(p); cancel_freezing(p); - task_unlock(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); } else { -- cgit From 6907483b4e803a20f0b48cc9afa3817420ce61c5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:24 -0800 Subject: freezer: make freezing indicate freeze condition in effect Currently freezing (TIF_FREEZE) and frozen (PF_FROZEN) states are interlocked - freezing is set to request freeze and when the task actually freezes, it clears freezing and sets frozen. This interlocking makes things more complex than necessary - freezing doesn't mean there's freezing condition in effect and frozen doesn't match the task actually entering and leaving frozen state (it's cleared by the thawing task). This patch makes freezing indicate that freeze condition is in effect. A task enters and stays frozen if freezing. This makes PF_FROZEN manipulation done only by the task itself and prevents wakeup from __thaw_task() leaking outside of refrigerator. The only place which needs to tell freezing && !frozen is try_to_freeze_task() to whine about tasks which don't enter frozen. It's updated to test the condition explicitly. With the change, frozen() state my linger after __thaw_task() until the task wakes up and exits fridge. This can trigger BUG_ON() in update_if_frozen(). Work it around by testing freezing() && frozen() instead of frozen(). -v2: Oleg pointed out missing re-check of freezing() when trying to clear FROZEN and possible spurious BUG_ON() trigger in update_if_frozen(). Both fixed. Signed-off-by: Tejun Heo Cc: Oleg Nesterov Cc: Paul Menage --- kernel/cgroup_freezer.c | 2 +- kernel/freezer.c | 42 ++++++++++++++++++++++++------------------ kernel/power/process.c | 3 ++- 3 files changed, 27 insertions(+), 20 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index a6d405a86ee0..cd27b0825560 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -231,7 +231,7 @@ static void update_if_frozen(struct cgroup *cgroup, cgroup_iter_start(cgroup, &it); while ((task = cgroup_iter_next(cgroup, &it))) { ntotal++; - if (frozen(task)) + if (freezing(task) && frozen(task)) nfrozen++; } diff --git a/kernel/freezer.c b/kernel/freezer.c index 4130e48649bb..a8822be43da0 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -22,14 +22,19 @@ bool __refrigerator(bool check_kthr_stop) bool was_frozen = false; long save; + /* + * Enter FROZEN. If NOFREEZE, schedule immediate thawing by + * clearing freezing. + */ spin_lock_irq(&freezer_lock); +repeat: if (!freezing(current)) { spin_unlock_irq(&freezer_lock); return was_frozen; } - if (!(current->flags & PF_NOFREEZE)) - current->flags |= PF_FROZEN; - clear_freeze_flag(current); + if (current->flags & PF_NOFREEZE) + clear_freeze_flag(current); + current->flags |= PF_FROZEN; spin_unlock_irq(&freezer_lock); save = current->state; @@ -44,7 +49,7 @@ bool __refrigerator(bool check_kthr_stop) for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); - if (!frozen(current) || + if (!freezing(current) || (check_kthr_stop && kthread_should_stop())) break; was_frozen = true; @@ -54,6 +59,13 @@ bool __refrigerator(bool check_kthr_stop) /* Remove the accounting blocker */ current->flags &= ~PF_FREEZING; + /* leave FROZEN */ + spin_lock_irq(&freezer_lock); + if (freezing(current)) + goto repeat; + current->flags &= ~PF_FROZEN; + spin_unlock_irq(&freezer_lock); + pr_debug("%s left refrigerator\n", current->comm); /* @@ -137,25 +149,19 @@ void cancel_freezing(struct task_struct *p) spin_unlock_irqrestore(&freezer_lock, flags); } -/* - * Wake up a frozen task - * - * task_lock() is needed to prevent the race with refrigerator() which may - * occur if the freezing of tasks fails. Namely, without the lock, if the - * freezing of tasks failed, thaw_tasks() might have run before a task in - * refrigerator() could call frozen_process(), in which case the task would be - * frozen and no one would thaw it. - */ void __thaw_task(struct task_struct *p) { unsigned long flags; + /* + * Clear freezing and kick @p if FROZEN. Clearing is guaranteed to + * be visible to @p as waking up implies wmb. Waking up inside + * freezer_lock also prevents wakeups from leaking outside + * refrigerator. + */ spin_lock_irqsave(&freezer_lock, flags); - if (frozen(p)) { - p->flags &= ~PF_FROZEN; + clear_freeze_flag(p); + if (frozen(p)) wake_up_process(p); - } else { - clear_freeze_flag(p); - } spin_unlock_irqrestore(&freezer_lock, flags); } diff --git a/kernel/power/process.c b/kernel/power/process.c index bd420ca48261..e6e2739190b5 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -118,7 +118,8 @@ static int try_to_freeze_tasks(bool sig_only) read_lock(&tasklist_lock); do_each_thread(g, p) { - if (!wakeup && freezing(p) && !freezer_should_skip(p)) + if (!wakeup && !freezer_should_skip(p) && + freezing(p) && !frozen(p)) sched_show_task(p); cancel_freezing(p); } while_each_thread(g, p); -- cgit From 85f1d476653f52c97ca75466b2494e67c1cbd25d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:24 -0800 Subject: freezer: test freezable conditions while holding freezer_lock try_to_freeze_tasks() and thaw_processes() use freezable() and frozen() as preliminary tests before initiating operations on a task. These are done without any synchronization and hinder with synchronization cleanup without any real performance benefits. In try_to_freeze_tasks(), open code self test and move PF_NOFREEZE and frozen() tests inside freezer_lock in freeze_task(). thaw_processes() can simply drop freezable() test as frozen() test in __thaw_task() is enough. Note: This used to be a part of larger patch to fix set_freezable() race. Separated out to satisfy ordering among dependent fixes. Signed-off-by: Tejun Heo Cc: Oleg Nesterov --- kernel/freezer.c | 3 ++- kernel/power/process.c | 16 +--------------- 2 files changed, 3 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/kernel/freezer.c b/kernel/freezer.c index a8822be43da0..a257ecd37c48 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -109,7 +109,8 @@ bool freeze_task(struct task_struct *p, bool sig_only) spin_lock_irqsave(&freezer_lock, flags); - if (sig_only && !should_send_signal(p)) + if ((p->flags & PF_NOFREEZE) || + (sig_only && !should_send_signal(p))) goto out_unlock; if (frozen(p)) diff --git a/kernel/power/process.c b/kernel/power/process.c index e6e2739190b5..e59676f5811d 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -22,14 +22,6 @@ */ #define TIMEOUT (20 * HZ) -static inline int freezable(struct task_struct * p) -{ - if ((p == current) || - (p->flags & PF_NOFREEZE)) - return 0; - return 1; -} - static int try_to_freeze_tasks(bool sig_only) { struct task_struct *g, *p; @@ -52,10 +44,7 @@ static int try_to_freeze_tasks(bool sig_only) todo = 0; read_lock(&tasklist_lock); do_each_thread(g, p) { - if (frozen(p) || !freezable(p)) - continue; - - if (!freeze_task(p, sig_only)) + if (p == current || !freeze_task(p, sig_only)) continue; /* @@ -181,9 +170,6 @@ void thaw_processes(void) read_lock(&tasklist_lock); do_each_thread(g, p) { - if (!freezable(p)) - continue; - if (cgroup_freezing_or_frozen(p)) continue; -- cgit From 376fede80e74d98b49d1ba9ac18f23c9fd026ddd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:24 -0800 Subject: freezer: kill PF_FREEZING With the previous changes, there's no meaningful difference between PF_FREEZING and PF_FROZEN. Remove PF_FREEZING and use PF_FROZEN instead in task_contributes_to_load(). Signed-off-by: Tejun Heo --- kernel/freezer.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'kernel') diff --git a/kernel/freezer.c b/kernel/freezer.c index a257ecd37c48..b8b562124ba9 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -44,9 +44,6 @@ repeat: recalc_sigpending(); /* We sent fake signal, clean it up */ spin_unlock_irq(¤t->sighand->siglock); - /* prevent accounting of that task to load */ - current->flags |= PF_FREEZING; - for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); if (!freezing(current) || @@ -56,9 +53,6 @@ repeat: schedule(); } - /* Remove the accounting blocker */ - current->flags &= ~PF_FREEZING; - /* leave FROZEN */ spin_lock_irq(&freezer_lock); if (freezing(current)) -- cgit From 03afed8bc296fa70186ba832c1126228bb992465 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:24 -0800 Subject: freezer: clean up freeze_processes() failure path freeze_processes() failure path is rather messy. Freezing is canceled for workqueues and tasks which aren't frozen yet but frozen tasks are left alone and should be thawed by the caller and of course some callers (xen and kexec) didn't do it. This patch updates __thaw_task() to handle cancelation correctly and makes freeze_processes() and freeze_kernel_threads() call thaw_processes() on failure instead so that the system is fully thawed on failure. Unnecessary [suspend_]thaw_processes() calls are removed from kernel/power/hibernate.c, suspend.c and user.c. While at it, restructure error checking if clause in suspend_prepare() to be less weird. -v2: Srivatsa spotted missing removal of suspend_thaw_processes() in suspend_prepare() and error in commit message. Updated. Signed-off-by: Tejun Heo Acked-by: Srivatsa S. Bhat --- kernel/freezer.c | 25 +++++++++---------------- kernel/power/hibernate.c | 15 ++------------- kernel/power/process.c | 16 ++++++++-------- kernel/power/suspend.c | 8 +++----- kernel/power/user.c | 4 +--- 5 files changed, 23 insertions(+), 45 deletions(-) (limited to 'kernel') diff --git a/kernel/freezer.c b/kernel/freezer.c index b8b562124ba9..11e32d419dec 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -129,21 +129,6 @@ out_unlock: return ret; } -void cancel_freezing(struct task_struct *p) -{ - unsigned long flags; - - spin_lock_irqsave(&freezer_lock, flags); - if (freezing(p)) { - pr_debug(" clean up: %s\n", p->comm); - clear_freeze_flag(p); - spin_lock(&p->sighand->siglock); - recalc_sigpending_and_wake(p); - spin_unlock(&p->sighand->siglock); - } - spin_unlock_irqrestore(&freezer_lock, flags); -} - void __thaw_task(struct task_struct *p) { unsigned long flags; @@ -153,10 +138,18 @@ void __thaw_task(struct task_struct *p) * be visible to @p as waking up implies wmb. Waking up inside * freezer_lock also prevents wakeups from leaking outside * refrigerator. + * + * If !FROZEN, @p hasn't reached refrigerator, recalc sigpending to + * avoid leaving dangling TIF_SIGPENDING behind. */ spin_lock_irqsave(&freezer_lock, flags); clear_freeze_flag(p); - if (frozen(p)) + if (frozen(p)) { wake_up_process(p); + } else { + spin_lock(&p->sighand->siglock); + recalc_sigpending_and_wake(p); + spin_unlock(&p->sighand->siglock); + } spin_unlock_irqrestore(&freezer_lock, flags); } diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 196c01268ebd..ba2319ffc860 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -607,17 +607,6 @@ static void power_down(void) while(1); } -static int prepare_processes(void) -{ - int error = 0; - - if (freeze_processes()) { - error = -EBUSY; - thaw_processes(); - } - return error; -} - /** * hibernate - Carry out system hibernation, including saving the image. */ @@ -650,7 +639,7 @@ int hibernate(void) sys_sync(); printk("done.\n"); - error = prepare_processes(); + error = freeze_processes(); if (error) goto Finish; @@ -811,7 +800,7 @@ static int software_resume(void) goto close_finish; pr_debug("PM: Preparing processes for restore.\n"); - error = prepare_processes(); + error = freeze_processes(); if (error) { swsusp_close(FMODE_READ); goto Done; diff --git a/kernel/power/process.c b/kernel/power/process.c index e59676f5811d..ce643838a00c 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -91,11 +91,6 @@ static int try_to_freeze_tasks(bool sig_only) elapsed_csecs = elapsed_csecs64; if (todo) { - /* This does not unfreeze processes that are already frozen - * (we have slightly ugly calling convention in that respect, - * and caller must call thaw_processes() if something fails), - * but it cleans up leftover PF_FREEZE requests. - */ printk("\n"); printk(KERN_ERR "Freezing of tasks %s after %d.%02d seconds " "(%d tasks refusing to freeze, wq_busy=%d):\n", @@ -103,14 +98,11 @@ static int try_to_freeze_tasks(bool sig_only) elapsed_csecs / 100, elapsed_csecs % 100, todo - wq_busy, wq_busy); - thaw_workqueues(); - read_lock(&tasklist_lock); do_each_thread(g, p) { if (!wakeup && !freezer_should_skip(p) && freezing(p) && !frozen(p)) sched_show_task(p); - cancel_freezing(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); } else { @@ -123,6 +115,8 @@ static int try_to_freeze_tasks(bool sig_only) /** * freeze_processes - Signal user space processes to enter the refrigerator. + * + * On success, returns 0. On failure, -errno and system is fully thawed. */ int freeze_processes(void) { @@ -137,11 +131,15 @@ int freeze_processes(void) printk("\n"); BUG_ON(in_atomic()); + if (error) + thaw_processes(); return error; } /** * freeze_kernel_threads - Make freezable kernel threads go to the refrigerator. + * + * On success, returns 0. On failure, -errno and system is fully thawed. */ int freeze_kernel_threads(void) { @@ -155,6 +153,8 @@ int freeze_kernel_threads(void) printk("\n"); BUG_ON(in_atomic()); + if (error) + thaw_processes(); return error; } diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 4953dc054c53..d336b27d1104 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -106,13 +106,11 @@ static int suspend_prepare(void) goto Finish; error = suspend_freeze_processes(); - if (error) { - suspend_stats.failed_freeze++; - dpm_save_failed_step(SUSPEND_FREEZE); - } else + if (!error) return 0; - suspend_thaw_processes(); + suspend_stats.failed_freeze++; + dpm_save_failed_step(SUSPEND_FREEZE); usermodehelper_enable(); Finish: pm_notifier_call_chain(PM_POST_SUSPEND); diff --git a/kernel/power/user.c b/kernel/power/user.c index 6d8f535c2b88..7cc3f5bc5c24 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -257,10 +257,8 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, break; error = freeze_processes(); - if (error) { - thaw_processes(); + if (error) usermodehelper_enable(); - } if (!error) data->frozen = 1; break; -- cgit From 22b4e111fa01a1147aa562ceaf18a752a928ef4e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:25 -0800 Subject: cgroup_freezer: prepare for removal of TIF_FREEZE TIF_FREEZE will be removed soon and freezing() will directly test whether any freezing condition is in effect. Make the following changes in preparation. * Rename cgroup_freezing_or_frozen() to cgroup_freezing() and make it return bool. * Make cgroup_freezing() access task_freezer() under rcu read lock instead of task_lock(). This makes the state dereferencing racy against task moving to another cgroup; however, it was already racy without this change as ->state dereference wasn't synchronized. This will be later dealt with using attach hooks. * freezer->state is now set before trying to push tasks into the target state. -v2: Oleg pointed out that freeze_change_state() was setting freeze->state incorrectly to CGROUP_FROZEN instead of CGROUP_FREEZING. Fixed. -v3: Matt pointed out that setting CGROUP_FROZEN used to always invoke try_to_freeze_cgroup() regardless of the current state. Patch updated such that the actual freeze/thaw operations are always performed on invocation. This shouldn't make any difference unless something is broken. Signed-off-by: Tejun Heo Acked-by: Paul Menage Cc: Li Zefan Cc: Oleg Nesterov --- kernel/cgroup_freezer.c | 40 +++++++++++++--------------------------- kernel/power/process.c | 2 +- 2 files changed, 14 insertions(+), 28 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index cd27b0825560..e6a1b8d1b8bc 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -48,19 +48,17 @@ static inline struct freezer *task_freezer(struct task_struct *task) struct freezer, css); } -static inline int __cgroup_freezing_or_frozen(struct task_struct *task) +bool cgroup_freezing(struct task_struct *task) { - enum freezer_state state = task_freezer(task)->state; - return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN); -} + enum freezer_state state; + bool ret; -int cgroup_freezing_or_frozen(struct task_struct *task) -{ - int result; - task_lock(task); - result = __cgroup_freezing_or_frozen(task); - task_unlock(task); - return result; + rcu_read_lock(); + state = task_freezer(task)->state; + ret = state == CGROUP_FREEZING || state == CGROUP_FROZEN; + rcu_read_unlock(); + + return ret; } /* @@ -102,9 +100,6 @@ struct cgroup_subsys freezer_subsys; * freezer_can_attach(): * cgroup_mutex (held by caller of can_attach) * - * cgroup_freezing_or_frozen(): - * task->alloc_lock (to get task's cgroup) - * * freezer_fork() (preserving fork() performance means can't take cgroup_mutex): * freezer->lock * sighand->siglock (if the cgroup is freezing) @@ -177,13 +172,7 @@ static int freezer_can_attach(struct cgroup_subsys *ss, static int freezer_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk) { - rcu_read_lock(); - if (__cgroup_freezing_or_frozen(tsk)) { - rcu_read_unlock(); - return -EBUSY; - } - rcu_read_unlock(); - return 0; + return cgroup_freezing(tsk) ? -EBUSY : 0; } static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) @@ -279,7 +268,6 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) struct task_struct *task; unsigned int num_cant_freeze_now = 0; - freezer->state = CGROUP_FREEZING; cgroup_iter_start(cgroup, &it); while ((task = cgroup_iter_next(cgroup, &it))) { if (!freeze_task(task, true)) @@ -303,8 +291,6 @@ static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) while ((task = cgroup_iter_next(cgroup, &it))) __thaw_task(task); cgroup_iter_end(cgroup, &it); - - freezer->state = CGROUP_THAWED; } static int freezer_change_state(struct cgroup *cgroup, @@ -318,20 +304,20 @@ static int freezer_change_state(struct cgroup *cgroup, spin_lock_irq(&freezer->lock); update_if_frozen(cgroup, freezer); - if (goal_state == freezer->state) - goto out; switch (goal_state) { case CGROUP_THAWED: + freezer->state = CGROUP_THAWED; unfreeze_cgroup(cgroup, freezer); break; case CGROUP_FROZEN: + freezer->state = CGROUP_FREEZING; retval = try_to_freeze_cgroup(cgroup, freezer); break; default: BUG(); } -out: + spin_unlock_irq(&freezer->lock); return retval; diff --git a/kernel/power/process.c b/kernel/power/process.c index ce643838a00c..9f6f5c755cfa 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -170,7 +170,7 @@ void thaw_processes(void) read_lock(&tasklist_lock); do_each_thread(g, p) { - if (cgroup_freezing_or_frozen(p)) + if (cgroup_freezing(p)) continue; __thaw_task(p); -- cgit From a3201227f803ad7fd43180c5195dbe5a2bf998aa Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:25 -0800 Subject: freezer: make freezing() test freeze conditions in effect instead of TIF_FREEZE Using TIF_FREEZE for freezing worked when there was only single freezing condition (the PM one); however, now there is also the cgroup_freezer and single bit flag is getting clumsy. thaw_processes() is already testing whether cgroup freezing in in effect to avoid thawing tasks which were frozen by both PM and cgroup freezers. This is racy (nothing prevents race against cgroup freezing) and fragile. A much simpler way is to test actual freeze conditions from freezing() - ie. directly test whether PM or cgroup freezing is in effect. This patch adds variables to indicate whether and what type of freezing conditions are in effect and reimplements freezing() such that it directly tests whether any of the two freezing conditions is active and the task should freeze. On fast path, freezing() is still very cheap - it only tests system_freezing_cnt. This makes the clumsy dancing aroung TIF_FREEZE unnecessary and freeze/thaw operations more usual - updating state variables for the new state and nudging target tasks so that they notice the new state and comply. As long as the nudging happens after state update, it's race-free. * This allows use of freezing() in freeze_task(). Replace the open coded tests with freezing(). * p != current test is added to warning printing conditions in try_to_freeze_tasks() failure path. This is necessary as freezing() is now true for the task which initiated freezing too. -v2: Oleg pointed out that re-freezing FROZEN cgroup could increment system_freezing_cnt. Fixed. Signed-off-by: Tejun Heo Acked-by: Paul Menage (for the cgroup portions) --- kernel/cgroup_freezer.c | 10 +++++++- kernel/fork.c | 1 - kernel/freezer.c | 62 +++++++++++++++++++++++++++++++------------------ kernel/power/process.c | 15 ++++++++---- 4 files changed, 60 insertions(+), 28 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index e6a1b8d1b8bc..2327ad11725f 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -145,7 +145,11 @@ static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, static void freezer_destroy(struct cgroup_subsys *ss, struct cgroup *cgroup) { - kfree(cgroup_freezer(cgroup)); + struct freezer *freezer = cgroup_freezer(cgroup); + + if (freezer->state != CGROUP_THAWED) + atomic_dec(&system_freezing_cnt); + kfree(freezer); } /* @@ -307,10 +311,14 @@ static int freezer_change_state(struct cgroup *cgroup, switch (goal_state) { case CGROUP_THAWED: + if (freezer->state != CGROUP_THAWED) + atomic_dec(&system_freezing_cnt); freezer->state = CGROUP_THAWED; unfreeze_cgroup(cgroup, freezer); break; case CGROUP_FROZEN: + if (freezer->state == CGROUP_THAWED) + atomic_inc(&system_freezing_cnt); freezer->state = CGROUP_FREEZING; retval = try_to_freeze_cgroup(cgroup, freezer); break; diff --git a/kernel/fork.c b/kernel/fork.c index ba0d17261329..d53316e88d9d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -997,7 +997,6 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p) new_flags |= PF_FORKNOEXEC; new_flags |= PF_STARTING; p->flags = new_flags; - clear_freeze_flag(p); } SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) diff --git a/kernel/freezer.c b/kernel/freezer.c index 11e32d419dec..f53cd5aa5b2e 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -11,9 +11,41 @@ #include #include +/* total number of freezing conditions in effect */ +atomic_t system_freezing_cnt = ATOMIC_INIT(0); +EXPORT_SYMBOL(system_freezing_cnt); + +/* indicate whether PM freezing is in effect, protected by pm_mutex */ +bool pm_freezing; +bool pm_nosig_freezing; + /* protects freezing and frozen transitions */ static DEFINE_SPINLOCK(freezer_lock); +/** + * freezing_slow_path - slow path for testing whether a task needs to be frozen + * @p: task to be tested + * + * This function is called by freezing() if system_freezing_cnt isn't zero + * and tests whether @p needs to enter and stay in frozen state. Can be + * called under any context. The freezers are responsible for ensuring the + * target tasks see the updated state. + */ +bool freezing_slow_path(struct task_struct *p) +{ + if (p->flags & PF_NOFREEZE) + return false; + + if (pm_nosig_freezing || cgroup_freezing(p)) + return true; + + if (pm_freezing && !(p->flags & PF_FREEZER_NOSIG)) + return true; + + return false; +} +EXPORT_SYMBOL(freezing_slow_path); + /* Refrigerator is place where frozen processes are stored :-). */ bool __refrigerator(bool check_kthr_stop) { @@ -23,17 +55,11 @@ bool __refrigerator(bool check_kthr_stop) long save; /* - * Enter FROZEN. If NOFREEZE, schedule immediate thawing by - * clearing freezing. + * No point in checking freezing() again - the caller already did. + * Proceed to enter FROZEN. */ spin_lock_irq(&freezer_lock); repeat: - if (!freezing(current)) { - spin_unlock_irq(&freezer_lock); - return was_frozen; - } - if (current->flags & PF_NOFREEZE) - clear_freeze_flag(current); current->flags |= PF_FROZEN; spin_unlock_irq(&freezer_lock); @@ -99,18 +125,12 @@ static void fake_signal_wake_up(struct task_struct *p) bool freeze_task(struct task_struct *p, bool sig_only) { unsigned long flags; - bool ret = false; spin_lock_irqsave(&freezer_lock, flags); - - if ((p->flags & PF_NOFREEZE) || - (sig_only && !should_send_signal(p))) - goto out_unlock; - - if (frozen(p)) - goto out_unlock; - - set_freeze_flag(p); + if (!freezing(p) || frozen(p)) { + spin_unlock_irqrestore(&freezer_lock, flags); + return false; + } if (should_send_signal(p)) { fake_signal_wake_up(p); @@ -123,10 +143,9 @@ bool freeze_task(struct task_struct *p, bool sig_only) } else { wake_up_state(p, TASK_INTERRUPTIBLE); } - ret = true; -out_unlock: + spin_unlock_irqrestore(&freezer_lock, flags); - return ret; + return true; } void __thaw_task(struct task_struct *p) @@ -143,7 +162,6 @@ void __thaw_task(struct task_struct *p) * avoid leaving dangling TIF_SIGPENDING behind. */ spin_lock_irqsave(&freezer_lock, flags); - clear_freeze_flag(p); if (frozen(p)) { wake_up_process(p); } else { diff --git a/kernel/power/process.c b/kernel/power/process.c index 9f6f5c755cfa..0beb51e1dec9 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -101,7 +101,7 @@ static int try_to_freeze_tasks(bool sig_only) read_lock(&tasklist_lock); do_each_thread(g, p) { if (!wakeup && !freezer_should_skip(p) && - freezing(p) && !frozen(p)) + p != current && freezing(p) && !frozen(p)) sched_show_task(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); @@ -122,7 +122,11 @@ int freeze_processes(void) { int error; + if (!pm_freezing) + atomic_inc(&system_freezing_cnt); + printk("Freezing user space processes ... "); + pm_freezing = true; error = try_to_freeze_tasks(true); if (!error) { printk("done."); @@ -146,6 +150,7 @@ int freeze_kernel_threads(void) int error; printk("Freezing remaining freezable tasks ... "); + pm_nosig_freezing = true; error = try_to_freeze_tasks(false); if (!error) printk("done."); @@ -162,6 +167,11 @@ void thaw_processes(void) { struct task_struct *g, *p; + if (pm_freezing) + atomic_dec(&system_freezing_cnt); + pm_freezing = false; + pm_nosig_freezing = false; + oom_killer_enable(); printk("Restarting tasks ... "); @@ -170,9 +180,6 @@ void thaw_processes(void) read_lock(&tasklist_lock); do_each_thread(g, p) { - if (cgroup_freezing(p)) - continue; - __thaw_task(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); -- cgit From 948246f70a811c872b9d93bb4a8ab5823c4c79e0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:25 -0800 Subject: freezer: remove should_send_signal() and update frozen() should_send_signal() is only used in freezer.c. Exporting them only increases chance of abuse. Open code the two users and remove it. Update frozen() to return bool. Signed-off-by: Tejun Heo --- kernel/freezer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/freezer.c b/kernel/freezer.c index f53cd5aa5b2e..95a123844241 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -132,7 +132,7 @@ bool freeze_task(struct task_struct *p, bool sig_only) return false; } - if (should_send_signal(p)) { + if (!(p->flags & PF_FREEZER_NOSIG)) { fake_signal_wake_up(p); /* * fake_signal_wake_up() goes through p's scheduler -- cgit From 96ee6d8539c9fc6742908d85eb9723abb5c91854 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:25 -0800 Subject: freezer: fix set_freezable[_with_signal]() race A kthread doing set_freezable*() may race with on-going PM freeze and the freezer might think all tasks are frozen while the new freezable kthread is merrily proceeding to execute code paths which aren't supposed to be executing during PM freeze. Reimplement set_freezable[_with_signal]() using __set_freezable() such that freezable PF flags are modified under freezer_lock and try_to_freeze() is called afterwards. This eliminates race condition against freezing. Note: Separated out from larger patch to resolve fix order dependency Oleg pointed out. Signed-off-by: Tejun Heo Cc: Oleg Nesterov --- kernel/freezer.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'kernel') diff --git a/kernel/freezer.c b/kernel/freezer.c index 95a123844241..b1e7a7b3d2cd 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -171,3 +171,28 @@ void __thaw_task(struct task_struct *p) } spin_unlock_irqrestore(&freezer_lock, flags); } + +/** + * __set_freezable - make %current freezable + * @with_signal: do we want %TIF_SIGPENDING for notification too? + * + * Mark %current freezable and enter refrigerator if necessary. + */ +bool __set_freezable(bool with_signal) +{ + might_sleep(); + + /* + * Modify flags while holding freezer_lock. This ensures the + * freezer notices that we aren't frozen yet or the freezing + * condition is visible to try_to_freeze() below. + */ + spin_lock_irq(&freezer_lock); + current->flags &= ~PF_NOFREEZE; + if (with_signal) + current->flags &= ~PF_FREEZER_NOSIG; + spin_unlock_irq(&freezer_lock); + + return try_to_freeze(); +} +EXPORT_SYMBOL(__set_freezable); -- cgit From 5ece3eae4cdb968c269e0bc7e2c0e2b223552025 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:26 -0800 Subject: freezer: restructure __refrigerator() If another freeze happens before all tasks leave FROZEN state after being thawed, the freezer can see the existing FROZEN and consider the tasks to be frozen but they can clear FROZEN without checking the new freezing(). Oleg suggested restructuring __refrigerator() such that there's single condition check section inside freezer_lock and sigpending is cleared afterwards, which fixes the problem and simplifies the code. Restructure accordingly. -v2: Frozen loop exited without releasing freezer_lock. Fixed. Signed-off-by: Tejun Heo Reported-by: Oleg Nesterov Acked-by: Oleg Nesterov Cc: "Rafael J. Wysocki" --- kernel/freezer.c | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) (limited to 'kernel') diff --git a/kernel/freezer.c b/kernel/freezer.c index b1e7a7b3d2cd..c3496c6a91b2 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -52,39 +52,29 @@ bool __refrigerator(bool check_kthr_stop) /* Hmm, should we be allowed to suspend when there are realtime processes around? */ bool was_frozen = false; - long save; + long save = current->state; - /* - * No point in checking freezing() again - the caller already did. - * Proceed to enter FROZEN. - */ - spin_lock_irq(&freezer_lock); -repeat: - current->flags |= PF_FROZEN; - spin_unlock_irq(&freezer_lock); - - save = current->state; pr_debug("%s entered refrigerator\n", current->comm); - spin_lock_irq(¤t->sighand->siglock); - recalc_sigpending(); /* We sent fake signal, clean it up */ - spin_unlock_irq(¤t->sighand->siglock); - for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); + + spin_lock_irq(&freezer_lock); + current->flags |= PF_FROZEN; if (!freezing(current) || (check_kthr_stop && kthread_should_stop())) + current->flags &= ~PF_FROZEN; + spin_unlock_irq(&freezer_lock); + + if (!(current->flags & PF_FROZEN)) break; was_frozen = true; schedule(); } - /* leave FROZEN */ - spin_lock_irq(&freezer_lock); - if (freezing(current)) - goto repeat; - current->flags &= ~PF_FROZEN; - spin_unlock_irq(&freezer_lock); + spin_lock_irq(¤t->sighand->siglock); + recalc_sigpending(); /* We sent fake signal, clean it up */ + spin_unlock_irq(¤t->sighand->siglock); pr_debug("%s left refrigerator\n", current->comm); -- cgit From 37ad8aca94a1da2112a7c56151390914e80d1113 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:26 -0800 Subject: freezer: use lock_task_sighand() in fake_signal_wake_up() cgroup_freezer calls freeze_task() without holding tasklist_lock and, if the task is exiting, its ->sighand may be gone by the time fake_signal_wake_up() is called. Use lock_task_sighand() instead of accessing ->sighand directly. Signed-off-by: Tejun Heo Reported-by: Oleg Nesterov Acked-by: Oleg Nesterov Cc: "Rafael J. Wysocki" Cc: Paul Menage --- kernel/freezer.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/freezer.c b/kernel/freezer.c index c3496c6a91b2..389549f0a94e 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -93,9 +93,10 @@ static void fake_signal_wake_up(struct task_struct *p) { unsigned long flags; - spin_lock_irqsave(&p->sighand->siglock, flags); - signal_wake_up(p, 0); - spin_unlock_irqrestore(&p->sighand->siglock, flags); + if (lock_task_sighand(p, &flags)) { + signal_wake_up(p, 0); + unlock_task_sighand(p, &flags); + } } /** -- cgit From 839e3407d90a810318d17c17ceb3d5928a910704 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:26 -0800 Subject: freezer: remove unused @sig_only from freeze_task() After "freezer: make freezing() test freeze conditions in effect instead of TIF_FREEZE", freezing() returns authoritative answer on whether the current task should freeze or not and freeze_task() doesn't need or use @sig_only. Remove it. While at it, rewrite function comment for freeze_task() and rename @sig_only to @user_only in try_to_freeze_tasks(). This patch doesn't cause any functional change. Signed-off-by: Tejun Heo Acked-by: Oleg Nesterov --- kernel/cgroup_freezer.c | 4 ++-- kernel/freezer.c | 21 +++++++++------------ kernel/power/process.c | 8 ++++---- 3 files changed, 15 insertions(+), 18 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 2327ad11725f..e411a60cc2c8 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -206,7 +206,7 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) /* Locking avoids race with FREEZING -> THAWED transitions. */ if (freezer->state == CGROUP_FREEZING) - freeze_task(task, true); + freeze_task(task); spin_unlock_irq(&freezer->lock); } @@ -274,7 +274,7 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) cgroup_iter_start(cgroup, &it); while ((task = cgroup_iter_next(cgroup, &it))) { - if (!freeze_task(task, true)) + if (!freeze_task(task)) continue; if (frozen(task)) continue; diff --git a/kernel/freezer.c b/kernel/freezer.c index 389549f0a94e..2589a61de44c 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -100,20 +100,17 @@ static void fake_signal_wake_up(struct task_struct *p) } /** - * freeze_task - send a freeze request to given task - * @p: task to send the request to - * @sig_only: if set, the request will only be sent if the task has the - * PF_FREEZER_NOSIG flag unset - * Return value: 'false', if @sig_only is set and the task has - * PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise + * freeze_task - send a freeze request to given task + * @p: task to send the request to * - * The freeze request is sent by setting the tasks's TIF_FREEZE flag and - * either sending a fake signal to it or waking it up, depending on whether - * or not it has PF_FREEZER_NOSIG set. If @sig_only is set and the task - * has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its - * TIF_FREEZE flag will not be set. + * If @p is freezing, the freeze request is sent by setting %TIF_FREEZE + * flag and either sending a fake signal to it or waking it up, depending + * on whether it has %PF_FREEZER_NOSIG set. + * + * RETURNS: + * %false, if @p is not freezing or already frozen; %true, otherwise */ -bool freeze_task(struct task_struct *p, bool sig_only) +bool freeze_task(struct task_struct *p) { unsigned long flags; diff --git a/kernel/power/process.c b/kernel/power/process.c index 0beb51e1dec9..77274c9ba2f1 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -22,7 +22,7 @@ */ #define TIMEOUT (20 * HZ) -static int try_to_freeze_tasks(bool sig_only) +static int try_to_freeze_tasks(bool user_only) { struct task_struct *g, *p; unsigned long end_time; @@ -37,14 +37,14 @@ static int try_to_freeze_tasks(bool sig_only) end_time = jiffies + TIMEOUT; - if (!sig_only) + if (!user_only) freeze_workqueues_begin(); while (true) { todo = 0; read_lock(&tasklist_lock); do_each_thread(g, p) { - if (p == current || !freeze_task(p, sig_only)) + if (p == current || !freeze_task(p)) continue; /* @@ -65,7 +65,7 @@ static int try_to_freeze_tasks(bool sig_only) } while_each_thread(g, p); read_unlock(&tasklist_lock); - if (!sig_only) { + if (!user_only) { wq_busy = freeze_workqueues_busy(); todo += wq_busy; } -- cgit From 34b087e48367c252e343c2f8de65676a78af1e4a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Nov 2011 09:28:17 -0800 Subject: freezer: kill unused set_freezable_with_signal() There's no in-kernel user of set_freezable_with_signal() left. Mixing TIF_SIGPENDING with kernel threads can lead to nasty corner cases as kernel threads never travel signal delivery path on their own. e.g. the current implementation is buggy in the cancelation path of __thaw_task(). It calls recalc_sigpending_and_wake() in an attempt to clear TIF_SIGPENDING but the function never clears it regardless of sigpending state. This means that signallable freezable kthreads may continue executing with !freezing() && stuck TIF_SIGPENDING, which can be troublesome. This patch removes set_freezable_with_signal() along with PF_FREEZER_NOSIG and recalc_sigpending*() calls in freezer. User tasks get TIF_SIGPENDING, kernel tasks get woken up and the spurious sigpending is dealt with in the usual signal delivery path. Signed-off-by: Tejun Heo Acked-by: Oleg Nesterov --- kernel/freezer.c | 27 ++++++--------------------- kernel/kthread.c | 2 +- 2 files changed, 7 insertions(+), 22 deletions(-) (limited to 'kernel') diff --git a/kernel/freezer.c b/kernel/freezer.c index 2589a61de44c..9815b8d1eed5 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -39,7 +39,7 @@ bool freezing_slow_path(struct task_struct *p) if (pm_nosig_freezing || cgroup_freezing(p)) return true; - if (pm_freezing && !(p->flags & PF_FREEZER_NOSIG)) + if (pm_freezing && !(p->flags & PF_KTHREAD)) return true; return false; @@ -72,10 +72,6 @@ bool __refrigerator(bool check_kthr_stop) schedule(); } - spin_lock_irq(¤t->sighand->siglock); - recalc_sigpending(); /* We sent fake signal, clean it up */ - spin_unlock_irq(¤t->sighand->siglock); - pr_debug("%s left refrigerator\n", current->comm); /* @@ -120,7 +116,7 @@ bool freeze_task(struct task_struct *p) return false; } - if (!(p->flags & PF_FREEZER_NOSIG)) { + if (!(p->flags & PF_KTHREAD)) { fake_signal_wake_up(p); /* * fake_signal_wake_up() goes through p's scheduler @@ -145,28 +141,19 @@ void __thaw_task(struct task_struct *p) * be visible to @p as waking up implies wmb. Waking up inside * freezer_lock also prevents wakeups from leaking outside * refrigerator. - * - * If !FROZEN, @p hasn't reached refrigerator, recalc sigpending to - * avoid leaving dangling TIF_SIGPENDING behind. */ spin_lock_irqsave(&freezer_lock, flags); - if (frozen(p)) { + if (frozen(p)) wake_up_process(p); - } else { - spin_lock(&p->sighand->siglock); - recalc_sigpending_and_wake(p); - spin_unlock(&p->sighand->siglock); - } spin_unlock_irqrestore(&freezer_lock, flags); } /** - * __set_freezable - make %current freezable - * @with_signal: do we want %TIF_SIGPENDING for notification too? + * set_freezable - make %current freezable * * Mark %current freezable and enter refrigerator if necessary. */ -bool __set_freezable(bool with_signal) +bool set_freezable(void) { might_sleep(); @@ -177,10 +164,8 @@ bool __set_freezable(bool with_signal) */ spin_lock_irq(&freezer_lock); current->flags &= ~PF_NOFREEZE; - if (with_signal) - current->flags &= ~PF_FREEZER_NOSIG; spin_unlock_irq(&freezer_lock); return try_to_freeze(); } -EXPORT_SYMBOL(__set_freezable); +EXPORT_SYMBOL(set_freezable); diff --git a/kernel/kthread.c b/kernel/kthread.c index 1c36deaae2f1..3d3de633702e 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -282,7 +282,7 @@ int kthreadd(void *unused) set_cpus_allowed_ptr(tsk, cpu_all_mask); set_mems_allowed(node_states[N_HIGH_MEMORY]); - current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG; + current->flags |= PF_NOFREEZE; for (;;) { set_current_state(TASK_INTERRUPTIBLE); -- cgit From 341d4166175e9b7911444f5a33b1c9efb8f15c85 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Sat, 19 Nov 2011 14:39:01 +0100 Subject: PM: Fix indentation and remove extraneous whitespaces in kernel/power/main.c Lack of proper indentation of the goto statement decreases the readability of code significantly. In fact, this made me look twice at the code to check whether it really does what it should be doing. Fix this. And in the same file, there are some extra whitespaces. Get rid of them too. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki --- kernel/power/main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/power/main.c b/kernel/power/main.c index 36e0f0903c32..7d36fb31e4c4 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -3,7 +3,7 @@ * * Copyright (c) 2003 Patrick Mochel * Copyright (c) 2003 Open Source Development Lab - * + * * This file is released under the GPLv2 * */ @@ -240,7 +240,7 @@ struct kobject *power_kobj; * 'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and * 'disk' (Suspend-to-Disk). * - * store() accepts one of those strings, translates it into the + * store() accepts one of those strings, translates it into the * proper enumerated value, and initiates a suspend transition. */ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, @@ -282,7 +282,7 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, /* First, check if we are requested to hibernate */ if (len == 4 && !strncmp(buf, "disk", len)) { error = hibernate(); - goto Exit; + goto Exit; } #ifdef CONFIG_SUSPEND -- cgit From 953a206393b1533ceb0e7d725cc5a8c8d7ed97dd Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Tue, 22 Nov 2011 23:20:31 +0100 Subject: PM / Hibernate: Refactor and simplify hibernation_snapshot() code The goto statements in hibernation_snapshot() are a bit complex. Refactor the code to remove some of them, thereby simplifying the implementation. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index a6b0503574ee..ebf62c3bc9f7 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -333,7 +333,7 @@ static int create_image(int platform_mode) */ int hibernation_snapshot(int platform_mode) { - pm_message_t msg = PMSG_RECOVER; + pm_message_t msg; int error; error = platform_begin(platform_mode); @@ -362,26 +362,26 @@ int hibernation_snapshot(int platform_mode) error = dpm_prepare(PMSG_FREEZE); if (error) { - dpm_complete(msg); + dpm_complete(PMSG_RECOVER); goto Cleanup; } suspend_console(); pm_restrict_gfp_mask(); + error = dpm_suspend(PMSG_FREEZE); - if (error) - goto Recover_platform; - if (hibernation_test(TEST_DEVICES)) - goto Recover_platform; + if (error || hibernation_test(TEST_DEVICES)) + platform_recover(platform_mode); + else + error = create_image(platform_mode); - error = create_image(platform_mode); /* - * Control returns here (1) after the image has been created or the + * In the case that we call create_image() above, the control + * returns here (1) after the image has been created or the * image creation has failed and (2) after a successful restore. */ - Resume_devices: /* We may need to release the preallocated image pages here. */ if (error || !in_suspend) swsusp_free(); @@ -399,10 +399,6 @@ int hibernation_snapshot(int platform_mode) platform_end(platform_mode); return error; - Recover_platform: - platform_recover(platform_mode); - goto Resume_devices; - Cleanup: swsusp_free(); goto Close; -- cgit From 0118521cc7acb3ccbc1a01d6144ac32be9d56a4c Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Thu, 1 Dec 2011 22:32:43 +0100 Subject: PM / Hibernate: Enable usermodehelpers in software_resume() error path In the software_resume() function defined in kernel/power/hibernate.c, if the call to create_basic_memory_bitmaps() fails, the usermodehelpers are not enabled (which had been disabled in the previous step). Fix it. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index ebf62c3bc9f7..1fcf9de4506d 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -807,8 +807,10 @@ static int software_resume(void) goto close_finish; error = create_basic_memory_bitmaps(); - if (error) + if (error) { + usermodehelper_enable(); goto close_finish; + } pr_debug("PM: Preparing processes for restore.\n"); error = prepare_processes(); -- cgit From 97819a26224f019e73d88bb2fd4eb5a614860461 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Thu, 1 Dec 2011 22:33:10 +0100 Subject: PM / Hibernate: Thaw processes in SNAPSHOT_CREATE_IMAGE ioctl test path Commit 2aede851ddf08666f68ffc17be446420e9d2a056 (PM / Hibernate: Freeze kernel threads after preallocating memory) moved the freezing of kernel threads to hibernation_snapshot() function. So now, if the call to hibernation_snapshot() returns early due to a successful hibernation test, the caller has to thaw processes to ensure that the system gets back to its original state. But in SNAPSHOT_CREATE_IMAGE hibernation ioctl, the caller does not thaw processes in case hibernation_snapshot() returned due to a successful freezer test. Fix this issue. But note we still send the value of 'in_suspend' (which is now 0) to userspace, because we are not in an error path per-se, and moreover, the value of in_suspend correctly depicts the situation here. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 2 +- kernel/power/power.h | 2 ++ kernel/power/user.c | 11 ++++++++--- 3 files changed, 11 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 1fcf9de4506d..c10cb0f916e2 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -55,7 +55,7 @@ enum { static int hibernation_mode = HIBERNATION_SHUTDOWN; -static bool freezer_test_done; +bool freezer_test_done; static const struct platform_hibernation_ops *hibernation_ops; diff --git a/kernel/power/power.h b/kernel/power/power.h index 23a2db1ec442..0c4defe6d3b8 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -50,6 +50,8 @@ static inline char *check_image_kernel(struct swsusp_info *info) #define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT) /* kernel/power/hibernate.c */ +extern bool freezer_test_done; + extern int hibernation_snapshot(int platform_mode); extern int hibernation_restore(int platform_mode); extern int hibernation_platform_enter(void); diff --git a/kernel/power/user.c b/kernel/power/user.c index 6d8f535c2b88..e2aff0fc2697 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -283,10 +283,15 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, } pm_restore_gfp_mask(); error = hibernation_snapshot(data->platform_support); - if (!error) + if (!error) { error = put_user(in_suspend, (int __user *)arg); - if (!error) - data->ready = 1; + if (!error && !freezer_test_done) + data->ready = 1; + if (freezer_test_done) { + freezer_test_done = false; + thaw_processes(); + } + } break; case SNAPSHOT_ATOMIC_RESTORE: -- cgit From 48580ab8729865c81e148d59159fbe2aa7865511 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Thu, 1 Dec 2011 22:33:20 +0100 Subject: PM / Hibernate: Remove deprecated hibernation test modes The hibernation test modes 'test' and 'testproc' are deprecated, because the 'pm_test' framework offers much more fine-grained control for debugging suspend and hibernation related problems. So, remove the deprecated 'test' and 'testproc' hibernation test modes. Suggested-by: Rafael J. Wysocki Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 33 ++++----------------------------- 1 file changed, 4 insertions(+), 29 deletions(-) (limited to 'kernel') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index c10cb0f916e2..5314a94a92c1 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -43,8 +43,6 @@ int in_suspend __nosavedata; enum { HIBERNATION_INVALID, HIBERNATION_PLATFORM, - HIBERNATION_TEST, - HIBERNATION_TESTPROC, HIBERNATION_SHUTDOWN, HIBERNATION_REBOOT, /* keep last */ @@ -96,15 +94,6 @@ static void hibernation_debug_sleep(void) mdelay(5000); } -static int hibernation_testmode(int mode) -{ - if (hibernation_mode == mode) { - hibernation_debug_sleep(); - return 1; - } - return 0; -} - static int hibernation_test(int level) { if (pm_test_level == level) { @@ -114,7 +103,6 @@ static int hibernation_test(int level) return 0; } #else /* !CONFIG_PM_DEBUG */ -static int hibernation_testmode(int mode) { return 0; } static int hibernation_test(int level) { return 0; } #endif /* !CONFIG_PM_DEBUG */ @@ -278,8 +266,7 @@ static int create_image(int platform_mode) goto Platform_finish; error = disable_nonboot_cpus(); - if (error || hibernation_test(TEST_CPUS) - || hibernation_testmode(HIBERNATION_TEST)) + if (error || hibernation_test(TEST_CPUS)) goto Enable_cpus; local_irq_disable(); @@ -349,8 +336,7 @@ int hibernation_snapshot(int platform_mode) if (error) goto Cleanup; - if (hibernation_test(TEST_FREEZER) || - hibernation_testmode(HIBERNATION_TESTPROC)) { + if (hibernation_test(TEST_FREEZER)) { /* * Indicate to the caller that we are returning due to a @@ -586,9 +572,6 @@ int hibernation_platform_enter(void) static void power_down(void) { switch (hibernation_mode) { - case HIBERNATION_TEST: - case HIBERNATION_TESTPROC: - break; case HIBERNATION_REBOOT: kernel_restart(NULL); break; @@ -853,8 +836,6 @@ static const char * const hibernation_modes[] = { [HIBERNATION_PLATFORM] = "platform", [HIBERNATION_SHUTDOWN] = "shutdown", [HIBERNATION_REBOOT] = "reboot", - [HIBERNATION_TEST] = "test", - [HIBERNATION_TESTPROC] = "testproc", }; /* @@ -863,17 +844,15 @@ static const char * const hibernation_modes[] = { * Hibernation can be handled in several ways. There are a few different ways * to put the system into the sleep state: using the platform driver (e.g. ACPI * or other hibernation_ops), powering it off or rebooting it (for testing - * mostly), or using one of the two available test modes. + * mostly). * * The sysfs file /sys/power/disk provides an interface for selecting the * hibernation mode to use. Reading from this file causes the available modes - * to be printed. There are 5 modes that can be supported: + * to be printed. There are 3 modes that can be supported: * * 'platform' * 'shutdown' * 'reboot' - * 'test' - * 'testproc' * * If a platform hibernation driver is in use, 'platform' will be supported * and will be used by default. Otherwise, 'shutdown' will be used by default. @@ -897,8 +876,6 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr, switch (i) { case HIBERNATION_SHUTDOWN: case HIBERNATION_REBOOT: - case HIBERNATION_TEST: - case HIBERNATION_TESTPROC: break; case HIBERNATION_PLATFORM: if (hibernation_ops) @@ -939,8 +916,6 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, switch (mode) { case HIBERNATION_SHUTDOWN: case HIBERNATION_REBOOT: - case HIBERNATION_TEST: - case HIBERNATION_TESTPROC: hibernation_mode = mode; break; case HIBERNATION_PLATFORM: -- cgit From e5b16746f0f2d6883c226af52d90904ce0f7eee8 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Sat, 3 Dec 2011 00:20:30 +0100 Subject: PM / Hibernate: Replace unintuitive 'if' condition in kernel/power/user.c with 'else' In the snapshot_ioctl() function, under SNAPSHOT_FREEZE, the code below freeze_processes() is a bit unintuitive. Improve it by replacing the second 'if' condition with an 'else' clause. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki --- kernel/power/user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/power/user.c b/kernel/power/user.c index c202e2e1a2d5..06ea33df8560 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -259,7 +259,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, error = freeze_processes(); if (error) usermodehelper_enable(); - if (!error) + else data->frozen = 1; break; -- cgit From bcda53faf5814c0c6025a0bd47108adfcbe9f199 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Wed, 7 Dec 2011 22:29:54 +0100 Subject: PM / Sleep: Replace mutex_[un]lock(&pm_mutex) with [un]lock_system_sleep() Using [un]lock_system_sleep() is safer than directly using mutex_[un]lock() on 'pm_mutex', since the latter could lead to freezing failures. Hence convert all the present users of mutex_[un]lock(&pm_mutex) to use these safe APIs instead. Suggested-by: Tejun Heo Signed-off-by: Srivatsa S. Bhat Reviewed-by: Simon Horman Signed-off-by: Rafael J. Wysocki --- kernel/kexec.c | 4 ++-- kernel/power/hibernate.c | 16 ++++++++-------- kernel/power/main.c | 4 ++-- kernel/power/suspend.c | 4 ++-- kernel/power/user.c | 16 ++++++++-------- 5 files changed, 22 insertions(+), 22 deletions(-) (limited to 'kernel') diff --git a/kernel/kexec.c b/kernel/kexec.c index dc7bc0829286..090ee10d9604 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1523,7 +1523,7 @@ int kernel_kexec(void) #ifdef CONFIG_KEXEC_JUMP if (kexec_image->preserve_context) { - mutex_lock(&pm_mutex); + lock_system_sleep(); pm_prepare_console(); error = freeze_processes(); if (error) { @@ -1576,7 +1576,7 @@ int kernel_kexec(void) thaw_processes(); Restore_console: pm_restore_console(); - mutex_unlock(&pm_mutex); + unlock_system_sleep(); } #endif diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 605149a6d219..6d6d28870335 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -69,14 +69,14 @@ void hibernation_set_ops(const struct platform_hibernation_ops *ops) WARN_ON(1); return; } - mutex_lock(&pm_mutex); + lock_system_sleep(); hibernation_ops = ops; if (ops) hibernation_mode = HIBERNATION_PLATFORM; else if (hibernation_mode == HIBERNATION_PLATFORM) hibernation_mode = HIBERNATION_SHUTDOWN; - mutex_unlock(&pm_mutex); + unlock_system_sleep(); } static bool entering_platform_hibernation; @@ -597,7 +597,7 @@ int hibernate(void) { int error; - mutex_lock(&pm_mutex); + lock_system_sleep(); /* The snapshot device should not be opened while we're running */ if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { error = -EBUSY; @@ -665,7 +665,7 @@ int hibernate(void) pm_restore_console(); atomic_inc(&snapshot_device_available); Unlock: - mutex_unlock(&pm_mutex); + unlock_system_sleep(); return error; } @@ -893,7 +893,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, p = memchr(buf, '\n', n); len = p ? p - buf : n; - mutex_lock(&pm_mutex); + lock_system_sleep(); for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { if (len == strlen(hibernation_modes[i]) && !strncmp(buf, hibernation_modes[i], len)) { @@ -919,7 +919,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, if (!error) pr_debug("PM: Hibernation mode set to '%s'\n", hibernation_modes[mode]); - mutex_unlock(&pm_mutex); + unlock_system_sleep(); return error ? error : n; } @@ -946,9 +946,9 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, if (maj != MAJOR(res) || min != MINOR(res)) goto out; - mutex_lock(&pm_mutex); + lock_system_sleep(); swsusp_resume_device = res; - mutex_unlock(&pm_mutex); + unlock_system_sleep(); printk(KERN_INFO "PM: Starting manual resume from disk\n"); noresume = 0; software_resume(); diff --git a/kernel/power/main.c b/kernel/power/main.c index 7d36fb31e4c4..9824b41e5a18 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -116,7 +116,7 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr, p = memchr(buf, '\n', n); len = p ? p - buf : n; - mutex_lock(&pm_mutex); + lock_system_sleep(); level = TEST_FIRST; for (s = &pm_tests[level]; level <= TEST_MAX; s++, level++) @@ -126,7 +126,7 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr, break; } - mutex_unlock(&pm_mutex); + unlock_system_sleep(); return error ? error : n; } diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index d336b27d1104..4fd51beed879 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -42,9 +42,9 @@ static const struct platform_suspend_ops *suspend_ops; */ void suspend_set_ops(const struct platform_suspend_ops *ops) { - mutex_lock(&pm_mutex); + lock_system_sleep(); suspend_ops = ops; - mutex_unlock(&pm_mutex); + unlock_system_sleep(); } EXPORT_SYMBOL_GPL(suspend_set_ops); diff --git a/kernel/power/user.c b/kernel/power/user.c index 06ea33df8560..98ade217da6c 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -71,7 +71,7 @@ static int snapshot_open(struct inode *inode, struct file *filp) struct snapshot_data *data; int error; - mutex_lock(&pm_mutex); + lock_system_sleep(); if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { error = -EBUSY; @@ -123,7 +123,7 @@ static int snapshot_open(struct inode *inode, struct file *filp) data->platform_support = 0; Unlock: - mutex_unlock(&pm_mutex); + unlock_system_sleep(); return error; } @@ -132,7 +132,7 @@ static int snapshot_release(struct inode *inode, struct file *filp) { struct snapshot_data *data; - mutex_lock(&pm_mutex); + lock_system_sleep(); swsusp_free(); free_basic_memory_bitmaps(); @@ -146,7 +146,7 @@ static int snapshot_release(struct inode *inode, struct file *filp) PM_POST_HIBERNATION : PM_POST_RESTORE); atomic_inc(&snapshot_device_available); - mutex_unlock(&pm_mutex); + unlock_system_sleep(); return 0; } @@ -158,7 +158,7 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf, ssize_t res; loff_t pg_offp = *offp & ~PAGE_MASK; - mutex_lock(&pm_mutex); + lock_system_sleep(); data = filp->private_data; if (!data->ready) { @@ -179,7 +179,7 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf, *offp += res; Unlock: - mutex_unlock(&pm_mutex); + unlock_system_sleep(); return res; } @@ -191,7 +191,7 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, ssize_t res; loff_t pg_offp = *offp & ~PAGE_MASK; - mutex_lock(&pm_mutex); + lock_system_sleep(); data = filp->private_data; @@ -208,7 +208,7 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, if (res > 0) *offp += res; unlock: - mutex_unlock(&pm_mutex); + unlock_system_sleep(); return res; } -- cgit From b298d289c79211508f11cb50749b0d1d54eb244a Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Fri, 9 Dec 2011 23:36:36 +0100 Subject: PM / Sleep: Fix freezer failures due to racy usermodehelper_is_disabled() Commit a144c6a (PM: Print a warning if firmware is requested when tasks are frozen) introduced usermodehelper_is_disabled() to warn and exit immediately if firmware is requested when usermodehelpers are disabled. However, it is racy. Consider the following scenario, currently used in drivers/base/firmware_class.c: ... if (usermodehelper_is_disabled()) goto out; /* Do actual work */ ... out: return err; Nothing prevents someone from disabling usermodehelpers just after the check in the 'if' condition, which means that it is quite possible to try doing the "actual work" with usermodehelpers disabled, leading to undesirable consequences. In particular, this race condition in _request_firmware() causes task freezing failures whenever suspend/hibernation is in progress because, it wrongly waits to get the firmware/microcode image from userspace when actually the usermodehelpers are disabled or userspace has been frozen. Some of the example scenarios that cause freezing failures due to this race are those that depend on userspace via request_firmware(), such as x86 microcode module initialization and microcode image reload. Previous discussions about this issue can be found at: http://thread.gmane.org/gmane.linux.kernel/1198291/focus=1200591 This patch adds proper synchronization to fix this issue. It is to be noted that this patchset fixes the freezing failures but doesn't remove the warnings. IOW, it does not attempt to add explicit synchronization to x86 microcode driver to avoid requesting microcode image at inopportune moments. Because, the warnings were introduced to highlight such cases, in the first place. And we need not silence the warnings, since we take care of the *real* problem (freezing failure) and hence, after that, the warnings are pretty harmless anyway. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki --- kernel/kmod.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/kmod.c b/kernel/kmod.c index a4bea97c75b6..81b4a27261b2 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -50,6 +51,7 @@ static struct workqueue_struct *khelper_wq; static kernel_cap_t usermodehelper_bset = CAP_FULL_SET; static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET; static DEFINE_SPINLOCK(umh_sysctl_lock); +static DECLARE_RWSEM(umhelper_sem); #ifdef CONFIG_MODULES @@ -275,6 +277,7 @@ static void __call_usermodehelper(struct work_struct *work) * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY * (used for preventing user land processes from being created after the user * land has been frozen during a system-wide hibernation or suspend operation). + * Should always be manipulated under umhelper_sem acquired for write. */ static int usermodehelper_disabled = 1; @@ -293,6 +296,18 @@ static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq); */ #define RUNNING_HELPERS_TIMEOUT (5 * HZ) +void read_lock_usermodehelper(void) +{ + down_read(&umhelper_sem); +} +EXPORT_SYMBOL_GPL(read_lock_usermodehelper); + +void read_unlock_usermodehelper(void) +{ + up_read(&umhelper_sem); +} +EXPORT_SYMBOL_GPL(read_unlock_usermodehelper); + /** * usermodehelper_disable - prevent new helpers from being started */ @@ -300,8 +315,10 @@ int usermodehelper_disable(void) { long retval; + down_write(&umhelper_sem); usermodehelper_disabled = 1; - smp_mb(); + up_write(&umhelper_sem); + /* * From now on call_usermodehelper_exec() won't start any new * helpers, so it is sufficient if running_helpers turns out to @@ -314,7 +331,9 @@ int usermodehelper_disable(void) if (retval) return 0; + down_write(&umhelper_sem); usermodehelper_disabled = 0; + up_write(&umhelper_sem); return -EAGAIN; } @@ -323,7 +342,9 @@ int usermodehelper_disable(void) */ void usermodehelper_enable(void) { + down_write(&umhelper_sem); usermodehelper_disabled = 0; + up_write(&umhelper_sem); } /** -- cgit From cf007e3526a785a95a738d5a8fba44f1f4fe33e0 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Thu, 8 Dec 2011 23:42:53 +0100 Subject: PM / Hibernate: Remove deprecated hibernation snapshot ioctls Several snapshot ioctls were marked for removal quite some time ago, since they were deprecated. Remove them. Suggested-by: Rafael J. Wysocki Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki --- kernel/power/user.c | 87 ----------------------------------------------------- 1 file changed, 87 deletions(-) (limited to 'kernel') diff --git a/kernel/power/user.c b/kernel/power/user.c index 98ade217da6c..78bdb4404aab 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -30,28 +30,6 @@ #include "power.h" -/* - * NOTE: The SNAPSHOT_SET_SWAP_FILE and SNAPSHOT_PMOPS ioctls are obsolete and - * will be removed in the future. They are only preserved here for - * compatibility with existing userland utilities. - */ -#define SNAPSHOT_SET_SWAP_FILE _IOW(SNAPSHOT_IOC_MAGIC, 10, unsigned int) -#define SNAPSHOT_PMOPS _IOW(SNAPSHOT_IOC_MAGIC, 12, unsigned int) - -#define PMOPS_PREPARE 1 -#define PMOPS_ENTER 2 -#define PMOPS_FINISH 3 - -/* - * NOTE: The following ioctl definitions are wrong and have been replaced with - * correct ones. They are only preserved here for compatibility with existing - * userland utilities and will be removed in the future. - */ -#define SNAPSHOT_ATOMIC_SNAPSHOT _IOW(SNAPSHOT_IOC_MAGIC, 3, void *) -#define SNAPSHOT_SET_IMAGE_SIZE _IOW(SNAPSHOT_IOC_MAGIC, 6, unsigned long) -#define SNAPSHOT_AVAIL_SWAP _IOR(SNAPSHOT_IOC_MAGIC, 7, void *) -#define SNAPSHOT_GET_SWAP_PAGE _IOR(SNAPSHOT_IOC_MAGIC, 8, void *) - #define SNAPSHOT_MINOR 231 @@ -213,15 +191,6 @@ unlock: return res; } -static void snapshot_deprecated_ioctl(unsigned int cmd) -{ - if (printk_ratelimit()) - printk(KERN_NOTICE "%pf: ioctl '%.8x' is deprecated and will " - "be removed soon, update your suspend-to-disk " - "utilities\n", - __builtin_return_address(0), cmd); -} - static long snapshot_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -272,8 +241,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, data->frozen = 0; break; - case SNAPSHOT_ATOMIC_SNAPSHOT: - snapshot_deprecated_ioctl(cmd); case SNAPSHOT_CREATE_IMAGE: if (data->mode != O_RDONLY || !data->frozen || data->ready) { error = -EPERM; @@ -308,8 +275,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, data->ready = 0; break; - case SNAPSHOT_SET_IMAGE_SIZE: - snapshot_deprecated_ioctl(cmd); case SNAPSHOT_PREF_IMAGE_SIZE: image_size = arg; break; @@ -324,16 +289,12 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, error = put_user(size, (loff_t __user *)arg); break; - case SNAPSHOT_AVAIL_SWAP: - snapshot_deprecated_ioctl(cmd); case SNAPSHOT_AVAIL_SWAP_SIZE: size = count_swap_pages(data->swap, 1); size <<= PAGE_SHIFT; error = put_user(size, (loff_t __user *)arg); break; - case SNAPSHOT_GET_SWAP_PAGE: - snapshot_deprecated_ioctl(cmd); case SNAPSHOT_ALLOC_SWAP_PAGE: if (data->swap < 0 || data->swap >= MAX_SWAPFILES) { error = -ENODEV; @@ -356,27 +317,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, free_all_swap_pages(data->swap); break; - case SNAPSHOT_SET_SWAP_FILE: /* This ioctl is deprecated */ - snapshot_deprecated_ioctl(cmd); - if (!swsusp_swap_in_use()) { - /* - * User space encodes device types as two-byte values, - * so we need to recode them - */ - if (old_decode_dev(arg)) { - data->swap = swap_type_of(old_decode_dev(arg), - 0, NULL); - if (data->swap < 0) - error = -ENODEV; - } else { - data->swap = -1; - error = -EINVAL; - } - } else { - error = -EPERM; - } - break; - case SNAPSHOT_S2RAM: if (!data->frozen) { error = -EPERM; @@ -399,33 +339,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, error = hibernation_platform_enter(); break; - case SNAPSHOT_PMOPS: /* This ioctl is deprecated */ - snapshot_deprecated_ioctl(cmd); - error = -EINVAL; - - switch (arg) { - - case PMOPS_PREPARE: - data->platform_support = 1; - error = 0; - break; - - case PMOPS_ENTER: - if (data->platform_support) - error = hibernation_platform_enter(); - break; - - case PMOPS_FINISH: - if (data->platform_support) - error = 0; - break; - - default: - printk(KERN_ERR "SNAPSHOT_PMOPS: invalid argument %ld\n", arg); - - } - break; - case SNAPSHOT_SET_SWAP_AREA: if (swsusp_swap_in_use()) { error = -EPERM; -- cgit