From 50fb4f7fc907efff65eadb0b74387a9ffed6e849 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:22 -0800
Subject: freezer: fix current->state restoration race in refrigerator()

refrigerator() saves current->state before entering frozen state and
restores it before returning using __set_current_state(); however,
this is racy, for example, please consider the following sequence.

	set_current_state(TASK_INTERRUPTIBLE);
	try_to_freeze();
	if (kthread_should_stop())
		break;
	schedule();

If kthread_stop() races with ->state restoration, the restoration can
restore ->state to TASK_INTERRUPTIBLE after kthread_stop() sets it to
TASK_RUNNING but kthread_should_stop() may still see zero
->should_stop because there's no memory barrier between restoring
TASK_INTERRUPTIBLE and kthread_should_stop() test.

This isn't restricted to kthread_should_stop().  current->state is
often used in memory barrier based synchronization and silently
restoring it w/o mb breaks them.

Use set_current_state() instead.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/freezer.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/freezer.c b/kernel/freezer.c
index 7be56c534397..3f460104a9d6 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -58,7 +58,13 @@ void refrigerator(void)
 	current->flags &= ~PF_FREEZING;
 
 	pr_debug("%s left refrigerator\n", current->comm);
-	__set_current_state(save);
+
+	/*
+	 * Restore saved task state before returning.  The mb'd version
+	 * needs to be used; otherwise, it might silently break
+	 * synchronization which depends on ordered task state change.
+	 */
+	set_current_state(save);
 }
 EXPORT_SYMBOL(refrigerator);
 
-- 
cgit 


From a0acae0e886d44bd5ce6d2f173c1ace0fcf0d9f6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:22 -0800
Subject: freezer: unexport refrigerator() and update try_to_freeze() slightly

There is no reason to export two functions for entering the
refrigerator.  Calling refrigerator() instead of try_to_freeze()
doesn't save anything noticeable or removes any race condition.

* Rename refrigerator() to __refrigerator() and make it return bool
  indicating whether it scheduled out for freezing.

* Update try_to_freeze() to return bool and relay the return value of
  __refrigerator() if freezing().

* Convert all refrigerator() users to try_to_freeze().

* Update documentation accordingly.

* While at it, add might_sleep() to try_to_freeze().

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Samuel Ortiz <samuel@sortiz.org>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jan Kara <jack@suse.cz>
Cc: KONISHI Ryusuke <konishi.ryusuke@lab.ntt.co.jp>
Cc: Christoph Hellwig <hch@infradead.org>
---
 kernel/freezer.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/freezer.c b/kernel/freezer.c
index 3f460104a9d6..732f14f5944f 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -23,10 +23,11 @@ static inline void frozen_process(void)
 }
 
 /* Refrigerator is place where frozen processes are stored :-). */
-void refrigerator(void)
+bool __refrigerator(void)
 {
 	/* Hmm, should we be allowed to suspend when there are realtime
 	   processes around? */
+	bool was_frozen = false;
 	long save;
 
 	task_lock(current);
@@ -35,7 +36,7 @@ void refrigerator(void)
 		task_unlock(current);
 	} else {
 		task_unlock(current);
-		return;
+		return was_frozen;
 	}
 	save = current->state;
 	pr_debug("%s entered refrigerator\n", current->comm);
@@ -51,6 +52,7 @@ void refrigerator(void)
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		if (!frozen(current))
 			break;
+		was_frozen = true;
 		schedule();
 	}
 
@@ -65,8 +67,10 @@ void refrigerator(void)
 	 * synchronization which depends on ordered task state change.
 	 */
 	set_current_state(save);
+
+	return was_frozen;
 }
-EXPORT_SYMBOL(refrigerator);
+EXPORT_SYMBOL(__refrigerator);
 
 static void fake_signal_wake_up(struct task_struct *p)
 {
-- 
cgit 


From 8a32c441c1609f80e55df75422324a1151208f40 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:23 -0800
Subject: freezer: implement and use kthread_freezable_should_stop()

Writeback and thinkpad_acpi have been using thaw_process() to prevent
deadlock between the freezer and kthread_stop(); unfortunately, this
is inherently racy - nothing prevents freezing from happening between
thaw_process() and kthread_stop().

This patch implements kthread_freezable_should_stop() which enters
refrigerator if necessary but is guaranteed to return if
kthread_stop() is invoked.  Both thaw_process() users are converted to
use the new function.

Note that this deadlock condition exists for many of freezable
kthreads.  They need to be converted to use the new should_stop or
freezable workqueue.

Tested with synthetic test case.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Henrique de Moraes Holschuh <ibm-acpi@hmh.eng.br>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Oleg Nesterov <oleg@redhat.com>
---
 kernel/freezer.c |  6 ++++--
 kernel/kthread.c | 25 +++++++++++++++++++++++++
 2 files changed, 29 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/freezer.c b/kernel/freezer.c
index 732f14f5944f..b83c30e9483a 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -9,6 +9,7 @@
 #include <linux/export.h>
 #include <linux/syscalls.h>
 #include <linux/freezer.h>
+#include <linux/kthread.h>
 
 /*
  * freezing is complete, mark current process as frozen
@@ -23,7 +24,7 @@ static inline void frozen_process(void)
 }
 
 /* Refrigerator is place where frozen processes are stored :-). */
-bool __refrigerator(void)
+bool __refrigerator(bool check_kthr_stop)
 {
 	/* Hmm, should we be allowed to suspend when there are realtime
 	   processes around? */
@@ -50,7 +51,8 @@ bool __refrigerator(void)
 
 	for (;;) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
-		if (!frozen(current))
+		if (!frozen(current) ||
+		    (check_kthr_stop && kthread_should_stop()))
 			break;
 		was_frozen = true;
 		schedule();
diff --git a/kernel/kthread.c b/kernel/kthread.c
index b6d216a92639..1c36deaae2f1 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -58,6 +58,31 @@ int kthread_should_stop(void)
 }
 EXPORT_SYMBOL(kthread_should_stop);
 
+/**
+ * kthread_freezable_should_stop - should this freezable kthread return now?
+ * @was_frozen: optional out parameter, indicates whether %current was frozen
+ *
+ * kthread_should_stop() for freezable kthreads, which will enter
+ * refrigerator if necessary.  This function is safe from kthread_stop() /
+ * freezer deadlock and freezable kthreads should use this function instead
+ * of calling try_to_freeze() directly.
+ */
+bool kthread_freezable_should_stop(bool *was_frozen)
+{
+	bool frozen = false;
+
+	might_sleep();
+
+	if (unlikely(freezing(current)))
+		frozen = __refrigerator(true);
+
+	if (was_frozen)
+		*was_frozen = frozen;
+
+	return kthread_should_stop();
+}
+EXPORT_SYMBOL_GPL(kthread_freezable_should_stop);
+
 /**
  * kthread_data - return data value specified on kthread creation
  * @task: kthread task in question
-- 
cgit 


From a5be2d0d1a8746e7be5210e3d6b904455000443c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:23 -0800
Subject: freezer: rename thaw_process() to __thaw_task() and simplify the
 implementation

thaw_process() now has only internal users - system and cgroup
freezers.  Remove the unnecessary return value, rename, unexport and
collapse __thaw_process() into it.  This will help further updates to
the freezer code.

-v3: oom_kill grew a use of thaw_process() while this patch was
     pending.  Convert it to use __thaw_task() for now.  In the longer
     term, this should be handled by allowing tasks to die if killed
     even if it's frozen.

-v2: minor style update as suggested by Matt.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Paul Menage <menage@google.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
---
 kernel/cgroup_freezer.c |  7 +++----
 kernel/freezer.c        | 31 ++++++++++++-------------------
 kernel/power/process.c  |  2 +-
 3 files changed, 16 insertions(+), 24 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 5e828a2ca8e6..a6d405a86ee0 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -130,7 +130,7 @@ struct cgroup_subsys freezer_subsys;
  *   write_lock css_set_lock (cgroup iterator start)
  *    task->alloc_lock
  *   read_lock css_set_lock (cgroup iterator start)
- *    task->alloc_lock (inside thaw_process(), prevents race with refrigerator())
+ *    task->alloc_lock (inside __thaw_task(), prevents race with refrigerator())
  *     sighand->siglock
  */
 static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
@@ -300,9 +300,8 @@ static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
 	struct task_struct *task;
 
 	cgroup_iter_start(cgroup, &it);
-	while ((task = cgroup_iter_next(cgroup, &it))) {
-		thaw_process(task);
-	}
+	while ((task = cgroup_iter_next(cgroup, &it)))
+		__thaw_task(task);
 	cgroup_iter_end(cgroup, &it);
 
 	freezer->state = CGROUP_THAWED;
diff --git a/kernel/freezer.c b/kernel/freezer.c
index b83c30e9483a..c851d588e29f 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -145,18 +145,8 @@ void cancel_freezing(struct task_struct *p)
 	}
 }
 
-static int __thaw_process(struct task_struct *p)
-{
-	if (frozen(p)) {
-		p->flags &= ~PF_FROZEN;
-		return 1;
-	}
-	clear_freeze_flag(p);
-	return 0;
-}
-
 /*
- * Wake up a frozen process
+ * Wake up a frozen task
  *
  * task_lock() is needed to prevent the race with refrigerator() which may
  * occur if the freezing of tasks fails.  Namely, without the lock, if the
@@ -164,15 +154,18 @@ static int __thaw_process(struct task_struct *p)
  * refrigerator() could call frozen_process(), in which case the task would be
  * frozen and no one would thaw it.
  */
-int thaw_process(struct task_struct *p)
+void __thaw_task(struct task_struct *p)
 {
+	bool was_frozen;
+
 	task_lock(p);
-	if (__thaw_process(p) == 1) {
-		task_unlock(p);
-		wake_up_process(p);
-		return 1;
-	}
+	was_frozen = frozen(p);
+	if (was_frozen)
+		p->flags &= ~PF_FROZEN;
+	else
+		clear_freeze_flag(p);
 	task_unlock(p);
-	return 0;
+
+	if (was_frozen)
+		wake_up_process(p);
 }
-EXPORT_SYMBOL(thaw_process);
diff --git a/kernel/power/process.c b/kernel/power/process.c
index addbbe5531bc..fe2787207f00 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -186,7 +186,7 @@ static void thaw_tasks(bool nosig_only)
 		if (cgroup_freezing_or_frozen(p))
 			continue;
 
-		thaw_process(p);
+		__thaw_task(p);
 	} while_each_thread(g, p);
 	read_unlock(&tasklist_lock);
 }
-- 
cgit 


From a585042f7b933539a0b6bc63650c2d49ffb2e55d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:23 -0800
Subject: freezer: remove racy clear_freeze_flag() and set PF_NOFREEZE on dead
 tasks

clear_freeze_flag() in exit_mm() is racy.  Freezing can start
afterwards.  Remove it.  Skipping freezer for exiting task will be
properly implemented later.

Also, freezable() was testing exit_state directly to make system
freezer ignore dead tasks.  Let the exiting task set PF_NOFREEZE after
entering TASK_DEAD instead.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
---
 kernel/exit.c          | 3 +--
 kernel/power/process.c | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/exit.c b/kernel/exit.c
index d0b7d988f873..95a4141d07e7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -679,8 +679,6 @@ static void exit_mm(struct task_struct * tsk)
 	tsk->mm = NULL;
 	up_read(&mm->mmap_sem);
 	enter_lazy_tlb(mm, current);
-	/* We don't want this task to be frozen prematurely */
-	clear_freeze_flag(tsk);
 	task_unlock(tsk);
 	mm_update_next_owner(mm);
 	mmput(mm);
@@ -1040,6 +1038,7 @@ NORET_TYPE void do_exit(long code)
 	exit_rcu();
 	/* causes final put_task_struct in finish_task_switch(). */
 	tsk->state = TASK_DEAD;
+	tsk->flags |= PF_NOFREEZE;	/* tell freezer to ignore us */
 	schedule();
 	BUG();
 	/* Avoid "noreturn function does return".  */
diff --git a/kernel/power/process.c b/kernel/power/process.c
index fe2787207f00..23822dc14b6c 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -25,8 +25,7 @@
 static inline int freezable(struct task_struct * p)
 {
 	if ((p == current) ||
-	    (p->flags & PF_NOFREEZE) ||
-	    (p->exit_state != 0))
+	    (p->flags & PF_NOFREEZE))
 		return 0;
 	return 1;
 }
-- 
cgit 


From 6cd8dedcdd8e8de01391a7cf25f0b2afeb24f8f4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:23 -0800
Subject: freezer: don't distinguish nosig tasks on thaw

There's no point in thawing nosig tasks before others.  There's no
ordering requirement between the two groups on thaw, which the staged
thawing can't guarantee anyway.  Simplify thaw_processes() by removing
the distinction and collapsing thaw_tasks() into thaw_processes().
This will help further updates to freezer.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/power/process.c | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/process.c b/kernel/power/process.c
index 23822dc14b6c..9db048fb0d70 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -170,34 +170,28 @@ int freeze_kernel_threads(void)
 	return error;
 }
 
-static void thaw_tasks(bool nosig_only)
+void thaw_processes(void)
 {
 	struct task_struct *g, *p;
 
+	oom_killer_enable();
+
+	printk("Restarting tasks ... ");
+
+	thaw_workqueues();
+
 	read_lock(&tasklist_lock);
 	do_each_thread(g, p) {
 		if (!freezable(p))
 			continue;
 
-		if (nosig_only && should_send_signal(p))
-			continue;
-
 		if (cgroup_freezing_or_frozen(p))
 			continue;
 
 		__thaw_task(p);
 	} while_each_thread(g, p);
 	read_unlock(&tasklist_lock);
-}
-
-void thaw_processes(void)
-{
-	oom_killer_enable();
 
-	printk("Restarting tasks ... ");
-	thaw_workqueues();
-	thaw_tasks(true);
-	thaw_tasks(false);
 	schedule();
 	printk("done.\n");
 }
-- 
cgit 


From 0c9af09262864a2744091ee94c98c4a8fd60c98b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:24 -0800
Subject: freezer: use dedicated lock instead of task_lock() + memory barrier

Freezer synchronization is needlessly complicated - it's by no means a
hot path and the priority is staying unintrusive and safe.  This patch
makes it simply use a dedicated lock instead of piggy-backing on
task_lock() and playing with memory barriers.

On the failure path of try_to_freeze_tasks(), locking is moved from it
to cancel_freezing().  This makes the frozen() test racy but the race
here is a non-issue as the warning is printed for tasks which failed
to enter frozen for 20 seconds and race on PF_FROZEN at the last
moment doesn't change anything.

This simplifies freezer implementation and eases further changes
including some race fixes.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/freezer.c       | 84 ++++++++++++++++++++++----------------------------
 kernel/power/process.c |  2 --
 2 files changed, 37 insertions(+), 49 deletions(-)

(limited to 'kernel')

diff --git a/kernel/freezer.c b/kernel/freezer.c
index c851d588e29f..4130e48649bb 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -11,17 +11,8 @@
 #include <linux/freezer.h>
 #include <linux/kthread.h>
 
-/*
- * freezing is complete, mark current process as frozen
- */
-static inline void frozen_process(void)
-{
-	if (!unlikely(current->flags & PF_NOFREEZE)) {
-		current->flags |= PF_FROZEN;
-		smp_wmb();
-	}
-	clear_freeze_flag(current);
-}
+/* protects freezing and frozen transitions */
+static DEFINE_SPINLOCK(freezer_lock);
 
 /* Refrigerator is place where frozen processes are stored :-). */
 bool __refrigerator(bool check_kthr_stop)
@@ -31,14 +22,16 @@ bool __refrigerator(bool check_kthr_stop)
 	bool was_frozen = false;
 	long save;
 
-	task_lock(current);
-	if (freezing(current)) {
-		frozen_process();
-		task_unlock(current);
-	} else {
-		task_unlock(current);
+	spin_lock_irq(&freezer_lock);
+	if (!freezing(current)) {
+		spin_unlock_irq(&freezer_lock);
 		return was_frozen;
 	}
+	if (!(current->flags & PF_NOFREEZE))
+		current->flags |= PF_FROZEN;
+	clear_freeze_flag(current);
+	spin_unlock_irq(&freezer_lock);
+
 	save = current->state;
 	pr_debug("%s entered refrigerator\n", current->comm);
 
@@ -99,21 +92,18 @@ static void fake_signal_wake_up(struct task_struct *p)
  */
 bool freeze_task(struct task_struct *p, bool sig_only)
 {
-	/*
-	 * We first check if the task is freezing and next if it has already
-	 * been frozen to avoid the race with frozen_process() which first marks
-	 * the task as frozen and next clears its TIF_FREEZE.
-	 */
-	if (!freezing(p)) {
-		smp_rmb();
-		if (frozen(p))
-			return false;
-
-		if (!sig_only || should_send_signal(p))
-			set_freeze_flag(p);
-		else
-			return false;
-	}
+	unsigned long flags;
+	bool ret = false;
+
+	spin_lock_irqsave(&freezer_lock, flags);
+
+	if (sig_only && !should_send_signal(p))
+		goto out_unlock;
+
+	if (frozen(p))
+		goto out_unlock;
+
+	set_freeze_flag(p);
 
 	if (should_send_signal(p)) {
 		fake_signal_wake_up(p);
@@ -123,26 +113,28 @@ bool freeze_task(struct task_struct *p, bool sig_only)
 		 * TASK_RUNNING transition can't race with task state
 		 * testing in try_to_freeze_tasks().
 		 */
-	} else if (sig_only) {
-		return false;
 	} else {
 		wake_up_state(p, TASK_INTERRUPTIBLE);
 	}
-
-	return true;
+	ret = true;
+out_unlock:
+	spin_unlock_irqrestore(&freezer_lock, flags);
+	return ret;
 }
 
 void cancel_freezing(struct task_struct *p)
 {
 	unsigned long flags;
 
+	spin_lock_irqsave(&freezer_lock, flags);
 	if (freezing(p)) {
 		pr_debug("  clean up: %s\n", p->comm);
 		clear_freeze_flag(p);
-		spin_lock_irqsave(&p->sighand->siglock, flags);
+		spin_lock(&p->sighand->siglock);
 		recalc_sigpending_and_wake(p);
-		spin_unlock_irqrestore(&p->sighand->siglock, flags);
+		spin_unlock(&p->sighand->siglock);
 	}
+	spin_unlock_irqrestore(&freezer_lock, flags);
 }
 
 /*
@@ -156,16 +148,14 @@ void cancel_freezing(struct task_struct *p)
  */
 void __thaw_task(struct task_struct *p)
 {
-	bool was_frozen;
+	unsigned long flags;
 
-	task_lock(p);
-	was_frozen = frozen(p);
-	if (was_frozen)
+	spin_lock_irqsave(&freezer_lock, flags);
+	if (frozen(p)) {
 		p->flags &= ~PF_FROZEN;
-	else
-		clear_freeze_flag(p);
-	task_unlock(p);
-
-	if (was_frozen)
 		wake_up_process(p);
+	} else {
+		clear_freeze_flag(p);
+	}
+	spin_unlock_irqrestore(&freezer_lock, flags);
 }
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 9db048fb0d70..bd420ca48261 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -118,11 +118,9 @@ static int try_to_freeze_tasks(bool sig_only)
 
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
-			task_lock(p);
 			if (!wakeup && freezing(p) && !freezer_should_skip(p))
 				sched_show_task(p);
 			cancel_freezing(p);
-			task_unlock(p);
 		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
 	} else {
-- 
cgit 


From 6907483b4e803a20f0b48cc9afa3817420ce61c5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:24 -0800
Subject: freezer: make freezing indicate freeze condition in effect

Currently freezing (TIF_FREEZE) and frozen (PF_FROZEN) states are
interlocked - freezing is set to request freeze and when the task
actually freezes, it clears freezing and sets frozen.

This interlocking makes things more complex than necessary - freezing
doesn't mean there's freezing condition in effect and frozen doesn't
match the task actually entering and leaving frozen state (it's
cleared by the thawing task).

This patch makes freezing indicate that freeze condition is in effect.
A task enters and stays frozen if freezing.  This makes PF_FROZEN
manipulation done only by the task itself and prevents wakeup from
__thaw_task() leaking outside of refrigerator.

The only place which needs to tell freezing && !frozen is
try_to_freeze_task() to whine about tasks which don't enter frozen.
It's updated to test the condition explicitly.

With the change, frozen() state my linger after __thaw_task() until
the task wakes up and exits fridge.  This can trigger BUG_ON() in
update_if_frozen().  Work it around by testing freezing() && frozen()
instead of frozen().

-v2: Oleg pointed out missing re-check of freezing() when trying to
     clear FROZEN and possible spurious BUG_ON() trigger in
     update_if_frozen().  Both fixed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul Menage <paul@paulmenage.org>
---
 kernel/cgroup_freezer.c |  2 +-
 kernel/freezer.c        | 42 ++++++++++++++++++++++++------------------
 kernel/power/process.c  |  3 ++-
 3 files changed, 27 insertions(+), 20 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index a6d405a86ee0..cd27b0825560 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -231,7 +231,7 @@ static void update_if_frozen(struct cgroup *cgroup,
 	cgroup_iter_start(cgroup, &it);
 	while ((task = cgroup_iter_next(cgroup, &it))) {
 		ntotal++;
-		if (frozen(task))
+		if (freezing(task) && frozen(task))
 			nfrozen++;
 	}
 
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 4130e48649bb..a8822be43da0 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -22,14 +22,19 @@ bool __refrigerator(bool check_kthr_stop)
 	bool was_frozen = false;
 	long save;
 
+	/*
+	 * Enter FROZEN.  If NOFREEZE, schedule immediate thawing by
+	 * clearing freezing.
+	 */
 	spin_lock_irq(&freezer_lock);
+repeat:
 	if (!freezing(current)) {
 		spin_unlock_irq(&freezer_lock);
 		return was_frozen;
 	}
-	if (!(current->flags & PF_NOFREEZE))
-		current->flags |= PF_FROZEN;
-	clear_freeze_flag(current);
+	if (current->flags & PF_NOFREEZE)
+		clear_freeze_flag(current);
+	current->flags |= PF_FROZEN;
 	spin_unlock_irq(&freezer_lock);
 
 	save = current->state;
@@ -44,7 +49,7 @@ bool __refrigerator(bool check_kthr_stop)
 
 	for (;;) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
-		if (!frozen(current) ||
+		if (!freezing(current) ||
 		    (check_kthr_stop && kthread_should_stop()))
 			break;
 		was_frozen = true;
@@ -54,6 +59,13 @@ bool __refrigerator(bool check_kthr_stop)
 	/* Remove the accounting blocker */
 	current->flags &= ~PF_FREEZING;
 
+	/* leave FROZEN */
+	spin_lock_irq(&freezer_lock);
+	if (freezing(current))
+		goto repeat;
+	current->flags &= ~PF_FROZEN;
+	spin_unlock_irq(&freezer_lock);
+
 	pr_debug("%s left refrigerator\n", current->comm);
 
 	/*
@@ -137,25 +149,19 @@ void cancel_freezing(struct task_struct *p)
 	spin_unlock_irqrestore(&freezer_lock, flags);
 }
 
-/*
- * Wake up a frozen task
- *
- * task_lock() is needed to prevent the race with refrigerator() which may
- * occur if the freezing of tasks fails.  Namely, without the lock, if the
- * freezing of tasks failed, thaw_tasks() might have run before a task in
- * refrigerator() could call frozen_process(), in which case the task would be
- * frozen and no one would thaw it.
- */
 void __thaw_task(struct task_struct *p)
 {
 	unsigned long flags;
 
+	/*
+	 * Clear freezing and kick @p if FROZEN.  Clearing is guaranteed to
+	 * be visible to @p as waking up implies wmb.  Waking up inside
+	 * freezer_lock also prevents wakeups from leaking outside
+	 * refrigerator.
+	 */
 	spin_lock_irqsave(&freezer_lock, flags);
-	if (frozen(p)) {
-		p->flags &= ~PF_FROZEN;
+	clear_freeze_flag(p);
+	if (frozen(p))
 		wake_up_process(p);
-	} else {
-		clear_freeze_flag(p);
-	}
 	spin_unlock_irqrestore(&freezer_lock, flags);
 }
diff --git a/kernel/power/process.c b/kernel/power/process.c
index bd420ca48261..e6e2739190b5 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -118,7 +118,8 @@ static int try_to_freeze_tasks(bool sig_only)
 
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
-			if (!wakeup && freezing(p) && !freezer_should_skip(p))
+			if (!wakeup && !freezer_should_skip(p) &&
+			    freezing(p) && !frozen(p))
 				sched_show_task(p);
 			cancel_freezing(p);
 		} while_each_thread(g, p);
-- 
cgit 


From 85f1d476653f52c97ca75466b2494e67c1cbd25d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:24 -0800
Subject: freezer: test freezable conditions while holding freezer_lock

try_to_freeze_tasks() and thaw_processes() use freezable() and
frozen() as preliminary tests before initiating operations on a task.
These are done without any synchronization and hinder with
synchronization cleanup without any real performance benefits.

In try_to_freeze_tasks(), open code self test and move PF_NOFREEZE and
frozen() tests inside freezer_lock in freeze_task().

thaw_processes() can simply drop freezable() test as frozen() test in
__thaw_task() is enough.

Note: This used to be a part of larger patch to fix set_freezable()
      race.  Separated out to satisfy ordering among dependent fixes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
---
 kernel/freezer.c       |  3 ++-
 kernel/power/process.c | 16 +---------------
 2 files changed, 3 insertions(+), 16 deletions(-)

(limited to 'kernel')

diff --git a/kernel/freezer.c b/kernel/freezer.c
index a8822be43da0..a257ecd37c48 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -109,7 +109,8 @@ bool freeze_task(struct task_struct *p, bool sig_only)
 
 	spin_lock_irqsave(&freezer_lock, flags);
 
-	if (sig_only && !should_send_signal(p))
+	if ((p->flags & PF_NOFREEZE) ||
+	    (sig_only && !should_send_signal(p)))
 		goto out_unlock;
 
 	if (frozen(p))
diff --git a/kernel/power/process.c b/kernel/power/process.c
index e6e2739190b5..e59676f5811d 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -22,14 +22,6 @@
  */
 #define TIMEOUT	(20 * HZ)
 
-static inline int freezable(struct task_struct * p)
-{
-	if ((p == current) ||
-	    (p->flags & PF_NOFREEZE))
-		return 0;
-	return 1;
-}
-
 static int try_to_freeze_tasks(bool sig_only)
 {
 	struct task_struct *g, *p;
@@ -52,10 +44,7 @@ static int try_to_freeze_tasks(bool sig_only)
 		todo = 0;
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
-			if (frozen(p) || !freezable(p))
-				continue;
-
-			if (!freeze_task(p, sig_only))
+			if (p == current || !freeze_task(p, sig_only))
 				continue;
 
 			/*
@@ -181,9 +170,6 @@ void thaw_processes(void)
 
 	read_lock(&tasklist_lock);
 	do_each_thread(g, p) {
-		if (!freezable(p))
-			continue;
-
 		if (cgroup_freezing_or_frozen(p))
 			continue;
 
-- 
cgit 


From 376fede80e74d98b49d1ba9ac18f23c9fd026ddd Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:24 -0800
Subject: freezer: kill PF_FREEZING

With the previous changes, there's no meaningful difference between
PF_FREEZING and PF_FROZEN.  Remove PF_FREEZING and use PF_FROZEN
instead in task_contributes_to_load().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/freezer.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'kernel')

diff --git a/kernel/freezer.c b/kernel/freezer.c
index a257ecd37c48..b8b562124ba9 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -44,9 +44,6 @@ repeat:
 	recalc_sigpending(); /* We sent fake signal, clean it up */
 	spin_unlock_irq(&current->sighand->siglock);
 
-	/* prevent accounting of that task to load */
-	current->flags |= PF_FREEZING;
-
 	for (;;) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		if (!freezing(current) ||
@@ -56,9 +53,6 @@ repeat:
 		schedule();
 	}
 
-	/* Remove the accounting blocker */
-	current->flags &= ~PF_FREEZING;
-
 	/* leave FROZEN */
 	spin_lock_irq(&freezer_lock);
 	if (freezing(current))
-- 
cgit 


From 03afed8bc296fa70186ba832c1126228bb992465 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:24 -0800
Subject: freezer: clean up freeze_processes() failure path

freeze_processes() failure path is rather messy.  Freezing is canceled
for workqueues and tasks which aren't frozen yet but frozen tasks are
left alone and should be thawed by the caller and of course some
callers (xen and kexec) didn't do it.

This patch updates __thaw_task() to handle cancelation correctly and
makes freeze_processes() and freeze_kernel_threads() call
thaw_processes() on failure instead so that the system is fully thawed
on failure.  Unnecessary [suspend_]thaw_processes() calls are removed
from kernel/power/hibernate.c, suspend.c and user.c.

While at it, restructure error checking if clause in suspend_prepare()
to be less weird.

-v2: Srivatsa spotted missing removal of suspend_thaw_processes() in
     suspend_prepare() and error in commit message.  Updated.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---
 kernel/freezer.c         | 25 +++++++++----------------
 kernel/power/hibernate.c | 15 ++-------------
 kernel/power/process.c   | 16 ++++++++--------
 kernel/power/suspend.c   |  8 +++-----
 kernel/power/user.c      |  4 +---
 5 files changed, 23 insertions(+), 45 deletions(-)

(limited to 'kernel')

diff --git a/kernel/freezer.c b/kernel/freezer.c
index b8b562124ba9..11e32d419dec 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -129,21 +129,6 @@ out_unlock:
 	return ret;
 }
 
-void cancel_freezing(struct task_struct *p)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&freezer_lock, flags);
-	if (freezing(p)) {
-		pr_debug("  clean up: %s\n", p->comm);
-		clear_freeze_flag(p);
-		spin_lock(&p->sighand->siglock);
-		recalc_sigpending_and_wake(p);
-		spin_unlock(&p->sighand->siglock);
-	}
-	spin_unlock_irqrestore(&freezer_lock, flags);
-}
-
 void __thaw_task(struct task_struct *p)
 {
 	unsigned long flags;
@@ -153,10 +138,18 @@ void __thaw_task(struct task_struct *p)
 	 * be visible to @p as waking up implies wmb.  Waking up inside
 	 * freezer_lock also prevents wakeups from leaking outside
 	 * refrigerator.
+	 *
+	 * If !FROZEN, @p hasn't reached refrigerator, recalc sigpending to
+	 * avoid leaving dangling TIF_SIGPENDING behind.
 	 */
 	spin_lock_irqsave(&freezer_lock, flags);
 	clear_freeze_flag(p);
-	if (frozen(p))
+	if (frozen(p)) {
 		wake_up_process(p);
+	} else {
+		spin_lock(&p->sighand->siglock);
+		recalc_sigpending_and_wake(p);
+		spin_unlock(&p->sighand->siglock);
+	}
 	spin_unlock_irqrestore(&freezer_lock, flags);
 }
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 196c01268ebd..ba2319ffc860 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -607,17 +607,6 @@ static void power_down(void)
 	while(1);
 }
 
-static int prepare_processes(void)
-{
-	int error = 0;
-
-	if (freeze_processes()) {
-		error = -EBUSY;
-		thaw_processes();
-	}
-	return error;
-}
-
 /**
  * hibernate - Carry out system hibernation, including saving the image.
  */
@@ -650,7 +639,7 @@ int hibernate(void)
 	sys_sync();
 	printk("done.\n");
 
-	error = prepare_processes();
+	error = freeze_processes();
 	if (error)
 		goto Finish;
 
@@ -811,7 +800,7 @@ static int software_resume(void)
 		goto close_finish;
 
 	pr_debug("PM: Preparing processes for restore.\n");
-	error = prepare_processes();
+	error = freeze_processes();
 	if (error) {
 		swsusp_close(FMODE_READ);
 		goto Done;
diff --git a/kernel/power/process.c b/kernel/power/process.c
index e59676f5811d..ce643838a00c 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -91,11 +91,6 @@ static int try_to_freeze_tasks(bool sig_only)
 	elapsed_csecs = elapsed_csecs64;
 
 	if (todo) {
-		/* This does not unfreeze processes that are already frozen
-		 * (we have slightly ugly calling convention in that respect,
-		 * and caller must call thaw_processes() if something fails),
-		 * but it cleans up leftover PF_FREEZE requests.
-		 */
 		printk("\n");
 		printk(KERN_ERR "Freezing of tasks %s after %d.%02d seconds "
 		       "(%d tasks refusing to freeze, wq_busy=%d):\n",
@@ -103,14 +98,11 @@ static int try_to_freeze_tasks(bool sig_only)
 		       elapsed_csecs / 100, elapsed_csecs % 100,
 		       todo - wq_busy, wq_busy);
 
-		thaw_workqueues();
-
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
 			if (!wakeup && !freezer_should_skip(p) &&
 			    freezing(p) && !frozen(p))
 				sched_show_task(p);
-			cancel_freezing(p);
 		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
 	} else {
@@ -123,6 +115,8 @@ static int try_to_freeze_tasks(bool sig_only)
 
 /**
  * freeze_processes - Signal user space processes to enter the refrigerator.
+ *
+ * On success, returns 0.  On failure, -errno and system is fully thawed.
  */
 int freeze_processes(void)
 {
@@ -137,11 +131,15 @@ int freeze_processes(void)
 	printk("\n");
 	BUG_ON(in_atomic());
 
+	if (error)
+		thaw_processes();
 	return error;
 }
 
 /**
  * freeze_kernel_threads - Make freezable kernel threads go to the refrigerator.
+ *
+ * On success, returns 0.  On failure, -errno and system is fully thawed.
  */
 int freeze_kernel_threads(void)
 {
@@ -155,6 +153,8 @@ int freeze_kernel_threads(void)
 	printk("\n");
 	BUG_ON(in_atomic());
 
+	if (error)
+		thaw_processes();
 	return error;
 }
 
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 4953dc054c53..d336b27d1104 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -106,13 +106,11 @@ static int suspend_prepare(void)
 		goto Finish;
 
 	error = suspend_freeze_processes();
-	if (error) {
-		suspend_stats.failed_freeze++;
-		dpm_save_failed_step(SUSPEND_FREEZE);
-	} else
+	if (!error)
 		return 0;
 
-	suspend_thaw_processes();
+	suspend_stats.failed_freeze++;
+	dpm_save_failed_step(SUSPEND_FREEZE);
 	usermodehelper_enable();
  Finish:
 	pm_notifier_call_chain(PM_POST_SUSPEND);
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 6d8f535c2b88..7cc3f5bc5c24 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -257,10 +257,8 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 			break;
 
 		error = freeze_processes();
-		if (error) {
-			thaw_processes();
+		if (error)
 			usermodehelper_enable();
-		}
 		if (!error)
 			data->frozen = 1;
 		break;
-- 
cgit 


From 22b4e111fa01a1147aa562ceaf18a752a928ef4e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:25 -0800
Subject: cgroup_freezer: prepare for removal of TIF_FREEZE

TIF_FREEZE will be removed soon and freezing() will directly test
whether any freezing condition is in effect.  Make the following
changes in preparation.

* Rename cgroup_freezing_or_frozen() to cgroup_freezing() and make it
  return bool.

* Make cgroup_freezing() access task_freezer() under rcu read lock
  instead of task_lock().  This makes the state dereferencing racy
  against task moving to another cgroup; however, it was already racy
  without this change as ->state dereference wasn't synchronized.
  This will be later dealt with using attach hooks.

* freezer->state is now set before trying to push tasks into the
  target state.

-v2: Oleg pointed out that freeze_change_state() was setting
     freeze->state incorrectly to CGROUP_FROZEN instead of
     CGROUP_FREEZING.  Fixed.

-v3: Matt pointed out that setting CGROUP_FROZEN used to always invoke
     try_to_freeze_cgroup() regardless of the current state.  Patch
     updated such that the actual freeze/thaw operations are always
     performed on invocation.  This shouldn't make any difference
     unless something is broken.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Paul Menage <paul@paulmenage.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Oleg Nesterov <oleg@redhat.com>
---
 kernel/cgroup_freezer.c | 40 +++++++++++++---------------------------
 kernel/power/process.c  |  2 +-
 2 files changed, 14 insertions(+), 28 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index cd27b0825560..e6a1b8d1b8bc 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -48,19 +48,17 @@ static inline struct freezer *task_freezer(struct task_struct *task)
 			    struct freezer, css);
 }
 
-static inline int __cgroup_freezing_or_frozen(struct task_struct *task)
+bool cgroup_freezing(struct task_struct *task)
 {
-	enum freezer_state state = task_freezer(task)->state;
-	return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN);
-}
+	enum freezer_state state;
+	bool ret;
 
-int cgroup_freezing_or_frozen(struct task_struct *task)
-{
-	int result;
-	task_lock(task);
-	result = __cgroup_freezing_or_frozen(task);
-	task_unlock(task);
-	return result;
+	rcu_read_lock();
+	state = task_freezer(task)->state;
+	ret = state == CGROUP_FREEZING || state == CGROUP_FROZEN;
+	rcu_read_unlock();
+
+	return ret;
 }
 
 /*
@@ -102,9 +100,6 @@ struct cgroup_subsys freezer_subsys;
  * freezer_can_attach():
  * cgroup_mutex (held by caller of can_attach)
  *
- * cgroup_freezing_or_frozen():
- * task->alloc_lock (to get task's cgroup)
- *
  * freezer_fork() (preserving fork() performance means can't take cgroup_mutex):
  * freezer->lock
  *  sighand->siglock (if the cgroup is freezing)
@@ -177,13 +172,7 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
 
 static int freezer_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 {
-	rcu_read_lock();
-	if (__cgroup_freezing_or_frozen(tsk)) {
-		rcu_read_unlock();
-		return -EBUSY;
-	}
-	rcu_read_unlock();
-	return 0;
+	return cgroup_freezing(tsk) ? -EBUSY : 0;
 }
 
 static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
@@ -279,7 +268,6 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
 	struct task_struct *task;
 	unsigned int num_cant_freeze_now = 0;
 
-	freezer->state = CGROUP_FREEZING;
 	cgroup_iter_start(cgroup, &it);
 	while ((task = cgroup_iter_next(cgroup, &it))) {
 		if (!freeze_task(task, true))
@@ -303,8 +291,6 @@ static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
 	while ((task = cgroup_iter_next(cgroup, &it)))
 		__thaw_task(task);
 	cgroup_iter_end(cgroup, &it);
-
-	freezer->state = CGROUP_THAWED;
 }
 
 static int freezer_change_state(struct cgroup *cgroup,
@@ -318,20 +304,20 @@ static int freezer_change_state(struct cgroup *cgroup,
 	spin_lock_irq(&freezer->lock);
 
 	update_if_frozen(cgroup, freezer);
-	if (goal_state == freezer->state)
-		goto out;
 
 	switch (goal_state) {
 	case CGROUP_THAWED:
+		freezer->state = CGROUP_THAWED;
 		unfreeze_cgroup(cgroup, freezer);
 		break;
 	case CGROUP_FROZEN:
+		freezer->state = CGROUP_FREEZING;
 		retval = try_to_freeze_cgroup(cgroup, freezer);
 		break;
 	default:
 		BUG();
 	}
-out:
+
 	spin_unlock_irq(&freezer->lock);
 
 	return retval;
diff --git a/kernel/power/process.c b/kernel/power/process.c
index ce643838a00c..9f6f5c755cfa 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -170,7 +170,7 @@ void thaw_processes(void)
 
 	read_lock(&tasklist_lock);
 	do_each_thread(g, p) {
-		if (cgroup_freezing_or_frozen(p))
+		if (cgroup_freezing(p))
 			continue;
 
 		__thaw_task(p);
-- 
cgit 


From a3201227f803ad7fd43180c5195dbe5a2bf998aa Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:25 -0800
Subject: freezer: make freezing() test freeze conditions in effect instead of
 TIF_FREEZE

Using TIF_FREEZE for freezing worked when there was only single
freezing condition (the PM one); however, now there is also the
cgroup_freezer and single bit flag is getting clumsy.
thaw_processes() is already testing whether cgroup freezing in in
effect to avoid thawing tasks which were frozen by both PM and cgroup
freezers.

This is racy (nothing prevents race against cgroup freezing) and
fragile.  A much simpler way is to test actual freeze conditions from
freezing() - ie. directly test whether PM or cgroup freezing is in
effect.

This patch adds variables to indicate whether and what type of
freezing conditions are in effect and reimplements freezing() such
that it directly tests whether any of the two freezing conditions is
active and the task should freeze.  On fast path, freezing() is still
very cheap - it only tests system_freezing_cnt.

This makes the clumsy dancing aroung TIF_FREEZE unnecessary and
freeze/thaw operations more usual - updating state variables for the
new state and nudging target tasks so that they notice the new state
and comply.  As long as the nudging happens after state update, it's
race-free.

* This allows use of freezing() in freeze_task().  Replace the open
  coded tests with freezing().

* p != current test is added to warning printing conditions in
  try_to_freeze_tasks() failure path.  This is necessary as freezing()
  is now true for the task which initiated freezing too.

-v2: Oleg pointed out that re-freezing FROZEN cgroup could increment
     system_freezing_cnt.  Fixed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Paul Menage <paul@paulmenage.org>  (for the cgroup portions)
---
 kernel/cgroup_freezer.c | 10 +++++++-
 kernel/fork.c           |  1 -
 kernel/freezer.c        | 62 +++++++++++++++++++++++++++++++------------------
 kernel/power/process.c  | 15 ++++++++----
 4 files changed, 60 insertions(+), 28 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index e6a1b8d1b8bc..2327ad11725f 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -145,7 +145,11 @@ static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
 static void freezer_destroy(struct cgroup_subsys *ss,
 			    struct cgroup *cgroup)
 {
-	kfree(cgroup_freezer(cgroup));
+	struct freezer *freezer = cgroup_freezer(cgroup);
+
+	if (freezer->state != CGROUP_THAWED)
+		atomic_dec(&system_freezing_cnt);
+	kfree(freezer);
 }
 
 /*
@@ -307,10 +311,14 @@ static int freezer_change_state(struct cgroup *cgroup,
 
 	switch (goal_state) {
 	case CGROUP_THAWED:
+		if (freezer->state != CGROUP_THAWED)
+			atomic_dec(&system_freezing_cnt);
 		freezer->state = CGROUP_THAWED;
 		unfreeze_cgroup(cgroup, freezer);
 		break;
 	case CGROUP_FROZEN:
+		if (freezer->state == CGROUP_THAWED)
+			atomic_inc(&system_freezing_cnt);
 		freezer->state = CGROUP_FREEZING;
 		retval = try_to_freeze_cgroup(cgroup, freezer);
 		break;
diff --git a/kernel/fork.c b/kernel/fork.c
index ba0d17261329..d53316e88d9d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -997,7 +997,6 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
 	new_flags |= PF_FORKNOEXEC;
 	new_flags |= PF_STARTING;
 	p->flags = new_flags;
-	clear_freeze_flag(p);
 }
 
 SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 11e32d419dec..f53cd5aa5b2e 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -11,9 +11,41 @@
 #include <linux/freezer.h>
 #include <linux/kthread.h>
 
+/* total number of freezing conditions in effect */
+atomic_t system_freezing_cnt = ATOMIC_INIT(0);
+EXPORT_SYMBOL(system_freezing_cnt);
+
+/* indicate whether PM freezing is in effect, protected by pm_mutex */
+bool pm_freezing;
+bool pm_nosig_freezing;
+
 /* protects freezing and frozen transitions */
 static DEFINE_SPINLOCK(freezer_lock);
 
+/**
+ * freezing_slow_path - slow path for testing whether a task needs to be frozen
+ * @p: task to be tested
+ *
+ * This function is called by freezing() if system_freezing_cnt isn't zero
+ * and tests whether @p needs to enter and stay in frozen state.  Can be
+ * called under any context.  The freezers are responsible for ensuring the
+ * target tasks see the updated state.
+ */
+bool freezing_slow_path(struct task_struct *p)
+{
+	if (p->flags & PF_NOFREEZE)
+		return false;
+
+	if (pm_nosig_freezing || cgroup_freezing(p))
+		return true;
+
+	if (pm_freezing && !(p->flags & PF_FREEZER_NOSIG))
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL(freezing_slow_path);
+
 /* Refrigerator is place where frozen processes are stored :-). */
 bool __refrigerator(bool check_kthr_stop)
 {
@@ -23,17 +55,11 @@ bool __refrigerator(bool check_kthr_stop)
 	long save;
 
 	/*
-	 * Enter FROZEN.  If NOFREEZE, schedule immediate thawing by
-	 * clearing freezing.
+	 * No point in checking freezing() again - the caller already did.
+	 * Proceed to enter FROZEN.
 	 */
 	spin_lock_irq(&freezer_lock);
 repeat:
-	if (!freezing(current)) {
-		spin_unlock_irq(&freezer_lock);
-		return was_frozen;
-	}
-	if (current->flags & PF_NOFREEZE)
-		clear_freeze_flag(current);
 	current->flags |= PF_FROZEN;
 	spin_unlock_irq(&freezer_lock);
 
@@ -99,18 +125,12 @@ static void fake_signal_wake_up(struct task_struct *p)
 bool freeze_task(struct task_struct *p, bool sig_only)
 {
 	unsigned long flags;
-	bool ret = false;
 
 	spin_lock_irqsave(&freezer_lock, flags);
-
-	if ((p->flags & PF_NOFREEZE) ||
-	    (sig_only && !should_send_signal(p)))
-		goto out_unlock;
-
-	if (frozen(p))
-		goto out_unlock;
-
-	set_freeze_flag(p);
+	if (!freezing(p) || frozen(p)) {
+		spin_unlock_irqrestore(&freezer_lock, flags);
+		return false;
+	}
 
 	if (should_send_signal(p)) {
 		fake_signal_wake_up(p);
@@ -123,10 +143,9 @@ bool freeze_task(struct task_struct *p, bool sig_only)
 	} else {
 		wake_up_state(p, TASK_INTERRUPTIBLE);
 	}
-	ret = true;
-out_unlock:
+
 	spin_unlock_irqrestore(&freezer_lock, flags);
-	return ret;
+	return true;
 }
 
 void __thaw_task(struct task_struct *p)
@@ -143,7 +162,6 @@ void __thaw_task(struct task_struct *p)
 	 * avoid leaving dangling TIF_SIGPENDING behind.
 	 */
 	spin_lock_irqsave(&freezer_lock, flags);
-	clear_freeze_flag(p);
 	if (frozen(p)) {
 		wake_up_process(p);
 	} else {
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 9f6f5c755cfa..0beb51e1dec9 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -101,7 +101,7 @@ static int try_to_freeze_tasks(bool sig_only)
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
 			if (!wakeup && !freezer_should_skip(p) &&
-			    freezing(p) && !frozen(p))
+			    p != current && freezing(p) && !frozen(p))
 				sched_show_task(p);
 		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
@@ -122,7 +122,11 @@ int freeze_processes(void)
 {
 	int error;
 
+	if (!pm_freezing)
+		atomic_inc(&system_freezing_cnt);
+
 	printk("Freezing user space processes ... ");
+	pm_freezing = true;
 	error = try_to_freeze_tasks(true);
 	if (!error) {
 		printk("done.");
@@ -146,6 +150,7 @@ int freeze_kernel_threads(void)
 	int error;
 
 	printk("Freezing remaining freezable tasks ... ");
+	pm_nosig_freezing = true;
 	error = try_to_freeze_tasks(false);
 	if (!error)
 		printk("done.");
@@ -162,6 +167,11 @@ void thaw_processes(void)
 {
 	struct task_struct *g, *p;
 
+	if (pm_freezing)
+		atomic_dec(&system_freezing_cnt);
+	pm_freezing = false;
+	pm_nosig_freezing = false;
+
 	oom_killer_enable();
 
 	printk("Restarting tasks ... ");
@@ -170,9 +180,6 @@ void thaw_processes(void)
 
 	read_lock(&tasklist_lock);
 	do_each_thread(g, p) {
-		if (cgroup_freezing(p))
-			continue;
-
 		__thaw_task(p);
 	} while_each_thread(g, p);
 	read_unlock(&tasklist_lock);
-- 
cgit 


From 948246f70a811c872b9d93bb4a8ab5823c4c79e0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:25 -0800
Subject: freezer: remove should_send_signal() and update frozen()

should_send_signal() is only used in freezer.c.  Exporting them only
increases chance of abuse.  Open code the two users and remove it.

Update frozen() to return bool.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/freezer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/freezer.c b/kernel/freezer.c
index f53cd5aa5b2e..95a123844241 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -132,7 +132,7 @@ bool freeze_task(struct task_struct *p, bool sig_only)
 		return false;
 	}
 
-	if (should_send_signal(p)) {
+	if (!(p->flags & PF_FREEZER_NOSIG)) {
 		fake_signal_wake_up(p);
 		/*
 		 * fake_signal_wake_up() goes through p's scheduler
-- 
cgit 


From 96ee6d8539c9fc6742908d85eb9723abb5c91854 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:25 -0800
Subject: freezer: fix set_freezable[_with_signal]() race

A kthread doing set_freezable*() may race with on-going PM freeze and
the freezer might think all tasks are frozen while the new freezable
kthread is merrily proceeding to execute code paths which aren't
supposed to be executing during PM freeze.

Reimplement set_freezable[_with_signal]() using __set_freezable() such
that freezable PF flags are modified under freezer_lock and
try_to_freeze() is called afterwards.  This eliminates race condition
against freezing.

Note: Separated out from larger patch to resolve fix order dependency
      Oleg pointed out.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
---
 kernel/freezer.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'kernel')

diff --git a/kernel/freezer.c b/kernel/freezer.c
index 95a123844241..b1e7a7b3d2cd 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -171,3 +171,28 @@ void __thaw_task(struct task_struct *p)
 	}
 	spin_unlock_irqrestore(&freezer_lock, flags);
 }
+
+/**
+ * __set_freezable - make %current freezable
+ * @with_signal: do we want %TIF_SIGPENDING for notification too?
+ *
+ * Mark %current freezable and enter refrigerator if necessary.
+ */
+bool __set_freezable(bool with_signal)
+{
+	might_sleep();
+
+	/*
+	 * Modify flags while holding freezer_lock.  This ensures the
+	 * freezer notices that we aren't frozen yet or the freezing
+	 * condition is visible to try_to_freeze() below.
+	 */
+	spin_lock_irq(&freezer_lock);
+	current->flags &= ~PF_NOFREEZE;
+	if (with_signal)
+		current->flags &= ~PF_FREEZER_NOSIG;
+	spin_unlock_irq(&freezer_lock);
+
+	return try_to_freeze();
+}
+EXPORT_SYMBOL(__set_freezable);
-- 
cgit 


From 5ece3eae4cdb968c269e0bc7e2c0e2b223552025 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:26 -0800
Subject: freezer: restructure __refrigerator()

If another freeze happens before all tasks leave FROZEN state after
being thawed, the freezer can see the existing FROZEN and consider the
tasks to be frozen but they can clear FROZEN without checking the new
freezing().

Oleg suggested restructuring __refrigerator() such that there's single
condition check section inside freezer_lock and sigpending is cleared
afterwards, which fixes the problem and simplifies the code.
Restructure accordingly.

-v2: Frozen loop exited without releasing freezer_lock.  Fixed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
---
 kernel/freezer.c | 32 +++++++++++---------------------
 1 file changed, 11 insertions(+), 21 deletions(-)

(limited to 'kernel')

diff --git a/kernel/freezer.c b/kernel/freezer.c
index b1e7a7b3d2cd..c3496c6a91b2 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -52,39 +52,29 @@ bool __refrigerator(bool check_kthr_stop)
 	/* Hmm, should we be allowed to suspend when there are realtime
 	   processes around? */
 	bool was_frozen = false;
-	long save;
+	long save = current->state;
 
-	/*
-	 * No point in checking freezing() again - the caller already did.
-	 * Proceed to enter FROZEN.
-	 */
-	spin_lock_irq(&freezer_lock);
-repeat:
-	current->flags |= PF_FROZEN;
-	spin_unlock_irq(&freezer_lock);
-
-	save = current->state;
 	pr_debug("%s entered refrigerator\n", current->comm);
 
-	spin_lock_irq(&current->sighand->siglock);
-	recalc_sigpending(); /* We sent fake signal, clean it up */
-	spin_unlock_irq(&current->sighand->siglock);
-
 	for (;;) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
+
+		spin_lock_irq(&freezer_lock);
+		current->flags |= PF_FROZEN;
 		if (!freezing(current) ||
 		    (check_kthr_stop && kthread_should_stop()))
+			current->flags &= ~PF_FROZEN;
+		spin_unlock_irq(&freezer_lock);
+
+		if (!(current->flags & PF_FROZEN))
 			break;
 		was_frozen = true;
 		schedule();
 	}
 
-	/* leave FROZEN */
-	spin_lock_irq(&freezer_lock);
-	if (freezing(current))
-		goto repeat;
-	current->flags &= ~PF_FROZEN;
-	spin_unlock_irq(&freezer_lock);
+	spin_lock_irq(&current->sighand->siglock);
+	recalc_sigpending(); /* We sent fake signal, clean it up */
+	spin_unlock_irq(&current->sighand->siglock);
 
 	pr_debug("%s left refrigerator\n", current->comm);
 
-- 
cgit 


From 37ad8aca94a1da2112a7c56151390914e80d1113 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:26 -0800
Subject: freezer: use lock_task_sighand() in fake_signal_wake_up()

cgroup_freezer calls freeze_task() without holding tasklist_lock and,
if the task is exiting, its ->sighand may be gone by the time
fake_signal_wake_up() is called.  Use lock_task_sighand() instead of
accessing ->sighand directly.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Paul Menage <paul@paulmenage.org>
---
 kernel/freezer.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/freezer.c b/kernel/freezer.c
index c3496c6a91b2..389549f0a94e 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -93,9 +93,10 @@ static void fake_signal_wake_up(struct task_struct *p)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&p->sighand->siglock, flags);
-	signal_wake_up(p, 0);
-	spin_unlock_irqrestore(&p->sighand->siglock, flags);
+	if (lock_task_sighand(p, &flags)) {
+		signal_wake_up(p, 0);
+		unlock_task_sighand(p, &flags);
+	}
 }
 
 /**
-- 
cgit 


From 839e3407d90a810318d17c17ceb3d5928a910704 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:26 -0800
Subject: freezer: remove unused @sig_only from freeze_task()

After "freezer: make freezing() test freeze conditions in effect
instead of TIF_FREEZE", freezing() returns authoritative answer on
whether the current task should freeze or not and freeze_task()
doesn't need or use @sig_only.  Remove it.

While at it, rewrite function comment for freeze_task() and rename
@sig_only to @user_only in try_to_freeze_tasks().

This patch doesn't cause any functional change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
---
 kernel/cgroup_freezer.c |  4 ++--
 kernel/freezer.c        | 21 +++++++++------------
 kernel/power/process.c  |  8 ++++----
 3 files changed, 15 insertions(+), 18 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 2327ad11725f..e411a60cc2c8 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -206,7 +206,7 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
 
 	/* Locking avoids race with FREEZING -> THAWED transitions. */
 	if (freezer->state == CGROUP_FREEZING)
-		freeze_task(task, true);
+		freeze_task(task);
 	spin_unlock_irq(&freezer->lock);
 }
 
@@ -274,7 +274,7 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
 
 	cgroup_iter_start(cgroup, &it);
 	while ((task = cgroup_iter_next(cgroup, &it))) {
-		if (!freeze_task(task, true))
+		if (!freeze_task(task))
 			continue;
 		if (frozen(task))
 			continue;
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 389549f0a94e..2589a61de44c 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -100,20 +100,17 @@ static void fake_signal_wake_up(struct task_struct *p)
 }
 
 /**
- *	freeze_task - send a freeze request to given task
- *	@p: task to send the request to
- *	@sig_only: if set, the request will only be sent if the task has the
- *		PF_FREEZER_NOSIG flag unset
- *	Return value: 'false', if @sig_only is set and the task has
- *		PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
+ * freeze_task - send a freeze request to given task
+ * @p: task to send the request to
  *
- *	The freeze request is sent by setting the tasks's TIF_FREEZE flag and
- *	either sending a fake signal to it or waking it up, depending on whether
- *	or not it has PF_FREEZER_NOSIG set.  If @sig_only is set and the task
- *	has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
- *	TIF_FREEZE flag will not be set.
+ * If @p is freezing, the freeze request is sent by setting %TIF_FREEZE
+ * flag and either sending a fake signal to it or waking it up, depending
+ * on whether it has %PF_FREEZER_NOSIG set.
+ *
+ * RETURNS:
+ * %false, if @p is not freezing or already frozen; %true, otherwise
  */
-bool freeze_task(struct task_struct *p, bool sig_only)
+bool freeze_task(struct task_struct *p)
 {
 	unsigned long flags;
 
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 0beb51e1dec9..77274c9ba2f1 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -22,7 +22,7 @@
  */
 #define TIMEOUT	(20 * HZ)
 
-static int try_to_freeze_tasks(bool sig_only)
+static int try_to_freeze_tasks(bool user_only)
 {
 	struct task_struct *g, *p;
 	unsigned long end_time;
@@ -37,14 +37,14 @@ static int try_to_freeze_tasks(bool sig_only)
 
 	end_time = jiffies + TIMEOUT;
 
-	if (!sig_only)
+	if (!user_only)
 		freeze_workqueues_begin();
 
 	while (true) {
 		todo = 0;
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
-			if (p == current || !freeze_task(p, sig_only))
+			if (p == current || !freeze_task(p))
 				continue;
 
 			/*
@@ -65,7 +65,7 @@ static int try_to_freeze_tasks(bool sig_only)
 		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
 
-		if (!sig_only) {
+		if (!user_only) {
 			wq_busy = freeze_workqueues_busy();
 			todo += wq_busy;
 		}
-- 
cgit 


From 34b087e48367c252e343c2f8de65676a78af1e4a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Nov 2011 09:28:17 -0800
Subject: freezer: kill unused set_freezable_with_signal()

There's no in-kernel user of set_freezable_with_signal() left.  Mixing
TIF_SIGPENDING with kernel threads can lead to nasty corner cases as
kernel threads never travel signal delivery path on their own.

e.g. the current implementation is buggy in the cancelation path of
__thaw_task().  It calls recalc_sigpending_and_wake() in an attempt to
clear TIF_SIGPENDING but the function never clears it regardless of
sigpending state.  This means that signallable freezable kthreads may
continue executing with !freezing() && stuck TIF_SIGPENDING, which can
be troublesome.

This patch removes set_freezable_with_signal() along with
PF_FREEZER_NOSIG and recalc_sigpending*() calls in freezer.  User
tasks get TIF_SIGPENDING, kernel tasks get woken up and the spurious
sigpending is dealt with in the usual signal delivery path.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
---
 kernel/freezer.c | 27 ++++++---------------------
 kernel/kthread.c |  2 +-
 2 files changed, 7 insertions(+), 22 deletions(-)

(limited to 'kernel')

diff --git a/kernel/freezer.c b/kernel/freezer.c
index 2589a61de44c..9815b8d1eed5 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -39,7 +39,7 @@ bool freezing_slow_path(struct task_struct *p)
 	if (pm_nosig_freezing || cgroup_freezing(p))
 		return true;
 
-	if (pm_freezing && !(p->flags & PF_FREEZER_NOSIG))
+	if (pm_freezing && !(p->flags & PF_KTHREAD))
 		return true;
 
 	return false;
@@ -72,10 +72,6 @@ bool __refrigerator(bool check_kthr_stop)
 		schedule();
 	}
 
-	spin_lock_irq(&current->sighand->siglock);
-	recalc_sigpending(); /* We sent fake signal, clean it up */
-	spin_unlock_irq(&current->sighand->siglock);
-
 	pr_debug("%s left refrigerator\n", current->comm);
 
 	/*
@@ -120,7 +116,7 @@ bool freeze_task(struct task_struct *p)
 		return false;
 	}
 
-	if (!(p->flags & PF_FREEZER_NOSIG)) {
+	if (!(p->flags & PF_KTHREAD)) {
 		fake_signal_wake_up(p);
 		/*
 		 * fake_signal_wake_up() goes through p's scheduler
@@ -145,28 +141,19 @@ void __thaw_task(struct task_struct *p)
 	 * be visible to @p as waking up implies wmb.  Waking up inside
 	 * freezer_lock also prevents wakeups from leaking outside
 	 * refrigerator.
-	 *
-	 * If !FROZEN, @p hasn't reached refrigerator, recalc sigpending to
-	 * avoid leaving dangling TIF_SIGPENDING behind.
 	 */
 	spin_lock_irqsave(&freezer_lock, flags);
-	if (frozen(p)) {
+	if (frozen(p))
 		wake_up_process(p);
-	} else {
-		spin_lock(&p->sighand->siglock);
-		recalc_sigpending_and_wake(p);
-		spin_unlock(&p->sighand->siglock);
-	}
 	spin_unlock_irqrestore(&freezer_lock, flags);
 }
 
 /**
- * __set_freezable - make %current freezable
- * @with_signal: do we want %TIF_SIGPENDING for notification too?
+ * set_freezable - make %current freezable
  *
  * Mark %current freezable and enter refrigerator if necessary.
  */
-bool __set_freezable(bool with_signal)
+bool set_freezable(void)
 {
 	might_sleep();
 
@@ -177,10 +164,8 @@ bool __set_freezable(bool with_signal)
 	 */
 	spin_lock_irq(&freezer_lock);
 	current->flags &= ~PF_NOFREEZE;
-	if (with_signal)
-		current->flags &= ~PF_FREEZER_NOSIG;
 	spin_unlock_irq(&freezer_lock);
 
 	return try_to_freeze();
 }
-EXPORT_SYMBOL(__set_freezable);
+EXPORT_SYMBOL(set_freezable);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 1c36deaae2f1..3d3de633702e 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -282,7 +282,7 @@ int kthreadd(void *unused)
 	set_cpus_allowed_ptr(tsk, cpu_all_mask);
 	set_mems_allowed(node_states[N_HIGH_MEMORY]);
 
-	current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG;
+	current->flags |= PF_NOFREEZE;
 
 	for (;;) {
 		set_current_state(TASK_INTERRUPTIBLE);
-- 
cgit 


From 341d4166175e9b7911444f5a33b1c9efb8f15c85 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Sat, 19 Nov 2011 14:39:01 +0100
Subject: PM: Fix indentation and remove extraneous whitespaces in
 kernel/power/main.c

Lack of proper indentation of the goto statement decreases the readability
of code significantly. In fact, this made me look twice at the code to check
whether it really does what it should be doing. Fix this.

And in the same file, there are some extra whitespaces. Get rid of them too.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 kernel/power/main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/main.c b/kernel/power/main.c
index 36e0f0903c32..7d36fb31e4c4 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2003 Patrick Mochel
  * Copyright (c) 2003 Open Source Development Lab
- * 
+ *
  * This file is released under the GPLv2
  *
  */
@@ -240,7 +240,7 @@ struct kobject *power_kobj;
  *	'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and
  *	'disk' (Suspend-to-Disk).
  *
- *	store() accepts one of those strings, translates it into the 
+ *	store() accepts one of those strings, translates it into the
  *	proper enumerated value, and initiates a suspend transition.
  */
 static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
@@ -282,7 +282,7 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
 	/* First, check if we are requested to hibernate */
 	if (len == 4 && !strncmp(buf, "disk", len)) {
 		error = hibernate();
-  goto Exit;
+		goto Exit;
 	}
 
 #ifdef CONFIG_SUSPEND
-- 
cgit 


From 953a206393b1533ceb0e7d725cc5a8c8d7ed97dd Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Tue, 22 Nov 2011 23:20:31 +0100
Subject: PM / Hibernate: Refactor and simplify hibernation_snapshot() code

The goto statements in hibernation_snapshot() are a bit complex.
Refactor the code to remove some of them, thereby simplifying the
implementation.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 kernel/power/hibernate.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index a6b0503574ee..ebf62c3bc9f7 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -333,7 +333,7 @@ static int create_image(int platform_mode)
  */
 int hibernation_snapshot(int platform_mode)
 {
-	pm_message_t msg = PMSG_RECOVER;
+	pm_message_t msg;
 	int error;
 
 	error = platform_begin(platform_mode);
@@ -362,26 +362,26 @@ int hibernation_snapshot(int platform_mode)
 
 	error = dpm_prepare(PMSG_FREEZE);
 	if (error) {
-		dpm_complete(msg);
+		dpm_complete(PMSG_RECOVER);
 		goto Cleanup;
 	}
 
 	suspend_console();
 	pm_restrict_gfp_mask();
+
 	error = dpm_suspend(PMSG_FREEZE);
-	if (error)
-		goto Recover_platform;
 
-	if (hibernation_test(TEST_DEVICES))
-		goto Recover_platform;
+	if (error || hibernation_test(TEST_DEVICES))
+		platform_recover(platform_mode);
+	else
+		error = create_image(platform_mode);
 
-	error = create_image(platform_mode);
 	/*
-	 * Control returns here (1) after the image has been created or the
+	 * In the case that we call create_image() above, the control
+	 * returns here (1) after the image has been created or the
 	 * image creation has failed and (2) after a successful restore.
 	 */
 
- Resume_devices:
 	/* We may need to release the preallocated image pages here. */
 	if (error || !in_suspend)
 		swsusp_free();
@@ -399,10 +399,6 @@ int hibernation_snapshot(int platform_mode)
 	platform_end(platform_mode);
 	return error;
 
- Recover_platform:
-	platform_recover(platform_mode);
-	goto Resume_devices;
-
  Cleanup:
 	swsusp_free();
 	goto Close;
-- 
cgit 


From 0118521cc7acb3ccbc1a01d6144ac32be9d56a4c Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Thu, 1 Dec 2011 22:32:43 +0100
Subject: PM / Hibernate: Enable usermodehelpers in software_resume() error
 path

In the software_resume() function defined in kernel/power/hibernate.c,
if the call to create_basic_memory_bitmaps() fails, the usermodehelpers
are not enabled (which had been disabled in the previous step). Fix it.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 kernel/power/hibernate.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index ebf62c3bc9f7..1fcf9de4506d 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -807,8 +807,10 @@ static int software_resume(void)
 		goto close_finish;
 
 	error = create_basic_memory_bitmaps();
-	if (error)
+	if (error) {
+		usermodehelper_enable();
 		goto close_finish;
+	}
 
 	pr_debug("PM: Preparing processes for restore.\n");
 	error = prepare_processes();
-- 
cgit 


From 97819a26224f019e73d88bb2fd4eb5a614860461 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Thu, 1 Dec 2011 22:33:10 +0100
Subject: PM / Hibernate: Thaw processes in SNAPSHOT_CREATE_IMAGE ioctl test
 path

Commit 2aede851ddf08666f68ffc17be446420e9d2a056 (PM / Hibernate: Freeze
kernel threads after preallocating memory) moved the freezing of kernel
threads to hibernation_snapshot() function.

So now, if the call to hibernation_snapshot() returns early due to a
successful hibernation test, the caller has to thaw processes to ensure
that the system gets back to its original state.

But in SNAPSHOT_CREATE_IMAGE hibernation ioctl, the caller does not thaw
processes in case hibernation_snapshot() returned due to a successful
freezer test. Fix this issue. But note we still send the value of 'in_suspend'
(which is now 0) to userspace, because we are not in an error path per-se,
and moreover, the value of in_suspend correctly depicts the situation here.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 kernel/power/hibernate.c |  2 +-
 kernel/power/power.h     |  2 ++
 kernel/power/user.c      | 11 ++++++++---
 3 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 1fcf9de4506d..c10cb0f916e2 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -55,7 +55,7 @@ enum {
 
 static int hibernation_mode = HIBERNATION_SHUTDOWN;
 
-static bool freezer_test_done;
+bool freezer_test_done;
 
 static const struct platform_hibernation_ops *hibernation_ops;
 
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 23a2db1ec442..0c4defe6d3b8 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -50,6 +50,8 @@ static inline char *check_image_kernel(struct swsusp_info *info)
 #define SPARE_PAGES	((1024 * 1024) >> PAGE_SHIFT)
 
 /* kernel/power/hibernate.c */
+extern bool freezer_test_done;
+
 extern int hibernation_snapshot(int platform_mode);
 extern int hibernation_restore(int platform_mode);
 extern int hibernation_platform_enter(void);
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 6d8f535c2b88..e2aff0fc2697 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -283,10 +283,15 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 		}
 		pm_restore_gfp_mask();
 		error = hibernation_snapshot(data->platform_support);
-		if (!error)
+		if (!error) {
 			error = put_user(in_suspend, (int __user *)arg);
-		if (!error)
-			data->ready = 1;
+			if (!error && !freezer_test_done)
+				data->ready = 1;
+			if (freezer_test_done) {
+				freezer_test_done = false;
+				thaw_processes();
+			}
+		}
 		break;
 
 	case SNAPSHOT_ATOMIC_RESTORE:
-- 
cgit 


From 48580ab8729865c81e148d59159fbe2aa7865511 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Thu, 1 Dec 2011 22:33:20 +0100
Subject: PM / Hibernate: Remove deprecated hibernation test modes

The hibernation test modes 'test' and 'testproc' are deprecated, because
the 'pm_test' framework offers much more fine-grained control for debugging
suspend and hibernation related problems.

So, remove the deprecated 'test' and 'testproc' hibernation test modes.

Suggested-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 kernel/power/hibernate.c | 33 ++++-----------------------------
 1 file changed, 4 insertions(+), 29 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index c10cb0f916e2..5314a94a92c1 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -43,8 +43,6 @@ int in_suspend __nosavedata;
 enum {
 	HIBERNATION_INVALID,
 	HIBERNATION_PLATFORM,
-	HIBERNATION_TEST,
-	HIBERNATION_TESTPROC,
 	HIBERNATION_SHUTDOWN,
 	HIBERNATION_REBOOT,
 	/* keep last */
@@ -96,15 +94,6 @@ static void hibernation_debug_sleep(void)
 	mdelay(5000);
 }
 
-static int hibernation_testmode(int mode)
-{
-	if (hibernation_mode == mode) {
-		hibernation_debug_sleep();
-		return 1;
-	}
-	return 0;
-}
-
 static int hibernation_test(int level)
 {
 	if (pm_test_level == level) {
@@ -114,7 +103,6 @@ static int hibernation_test(int level)
 	return 0;
 }
 #else /* !CONFIG_PM_DEBUG */
-static int hibernation_testmode(int mode) { return 0; }
 static int hibernation_test(int level) { return 0; }
 #endif /* !CONFIG_PM_DEBUG */
 
@@ -278,8 +266,7 @@ static int create_image(int platform_mode)
 		goto Platform_finish;
 
 	error = disable_nonboot_cpus();
-	if (error || hibernation_test(TEST_CPUS)
-	    || hibernation_testmode(HIBERNATION_TEST))
+	if (error || hibernation_test(TEST_CPUS))
 		goto Enable_cpus;
 
 	local_irq_disable();
@@ -349,8 +336,7 @@ int hibernation_snapshot(int platform_mode)
 	if (error)
 		goto Cleanup;
 
-	if (hibernation_test(TEST_FREEZER) ||
-		hibernation_testmode(HIBERNATION_TESTPROC)) {
+	if (hibernation_test(TEST_FREEZER)) {
 
 		/*
 		 * Indicate to the caller that we are returning due to a
@@ -586,9 +572,6 @@ int hibernation_platform_enter(void)
 static void power_down(void)
 {
 	switch (hibernation_mode) {
-	case HIBERNATION_TEST:
-	case HIBERNATION_TESTPROC:
-		break;
 	case HIBERNATION_REBOOT:
 		kernel_restart(NULL);
 		break;
@@ -853,8 +836,6 @@ static const char * const hibernation_modes[] = {
 	[HIBERNATION_PLATFORM]	= "platform",
 	[HIBERNATION_SHUTDOWN]	= "shutdown",
 	[HIBERNATION_REBOOT]	= "reboot",
-	[HIBERNATION_TEST]	= "test",
-	[HIBERNATION_TESTPROC]	= "testproc",
 };
 
 /*
@@ -863,17 +844,15 @@ static const char * const hibernation_modes[] = {
  * Hibernation can be handled in several ways.  There are a few different ways
  * to put the system into the sleep state: using the platform driver (e.g. ACPI
  * or other hibernation_ops), powering it off or rebooting it (for testing
- * mostly), or using one of the two available test modes.
+ * mostly).
  *
  * The sysfs file /sys/power/disk provides an interface for selecting the
  * hibernation mode to use.  Reading from this file causes the available modes
- * to be printed.  There are 5 modes that can be supported:
+ * to be printed.  There are 3 modes that can be supported:
  *
  *	'platform'
  *	'shutdown'
  *	'reboot'
- *	'test'
- *	'testproc'
  *
  * If a platform hibernation driver is in use, 'platform' will be supported
  * and will be used by default.  Otherwise, 'shutdown' will be used by default.
@@ -897,8 +876,6 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,
 		switch (i) {
 		case HIBERNATION_SHUTDOWN:
 		case HIBERNATION_REBOOT:
-		case HIBERNATION_TEST:
-		case HIBERNATION_TESTPROC:
 			break;
 		case HIBERNATION_PLATFORM:
 			if (hibernation_ops)
@@ -939,8 +916,6 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
 		switch (mode) {
 		case HIBERNATION_SHUTDOWN:
 		case HIBERNATION_REBOOT:
-		case HIBERNATION_TEST:
-		case HIBERNATION_TESTPROC:
 			hibernation_mode = mode;
 			break;
 		case HIBERNATION_PLATFORM:
-- 
cgit 


From e5b16746f0f2d6883c226af52d90904ce0f7eee8 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Sat, 3 Dec 2011 00:20:30 +0100
Subject: PM / Hibernate: Replace unintuitive 'if' condition in
 kernel/power/user.c with 'else'

In the snapshot_ioctl() function, under SNAPSHOT_FREEZE, the code below
freeze_processes() is a bit unintuitive. Improve it by replacing the
second 'if' condition with an 'else' clause.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 kernel/power/user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/power/user.c b/kernel/power/user.c
index c202e2e1a2d5..06ea33df8560 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -259,7 +259,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 		error = freeze_processes();
 		if (error)
 			usermodehelper_enable();
-		if (!error)
+		else
 			data->frozen = 1;
 		break;
 
-- 
cgit 


From bcda53faf5814c0c6025a0bd47108adfcbe9f199 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Wed, 7 Dec 2011 22:29:54 +0100
Subject: PM / Sleep: Replace mutex_[un]lock(&pm_mutex) with
 [un]lock_system_sleep()

Using [un]lock_system_sleep() is safer than directly using mutex_[un]lock()
on 'pm_mutex', since the latter could lead to freezing failures. Hence convert
all the present users of mutex_[un]lock(&pm_mutex) to use these safe APIs
instead.

Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Reviewed-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 kernel/kexec.c           |  4 ++--
 kernel/power/hibernate.c | 16 ++++++++--------
 kernel/power/main.c      |  4 ++--
 kernel/power/suspend.c   |  4 ++--
 kernel/power/user.c      | 16 ++++++++--------
 5 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'kernel')

diff --git a/kernel/kexec.c b/kernel/kexec.c
index dc7bc0829286..090ee10d9604 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1523,7 +1523,7 @@ int kernel_kexec(void)
 
 #ifdef CONFIG_KEXEC_JUMP
 	if (kexec_image->preserve_context) {
-		mutex_lock(&pm_mutex);
+		lock_system_sleep();
 		pm_prepare_console();
 		error = freeze_processes();
 		if (error) {
@@ -1576,7 +1576,7 @@ int kernel_kexec(void)
 		thaw_processes();
  Restore_console:
 		pm_restore_console();
-		mutex_unlock(&pm_mutex);
+		unlock_system_sleep();
 	}
 #endif
 
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 605149a6d219..6d6d28870335 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -69,14 +69,14 @@ void hibernation_set_ops(const struct platform_hibernation_ops *ops)
 		WARN_ON(1);
 		return;
 	}
-	mutex_lock(&pm_mutex);
+	lock_system_sleep();
 	hibernation_ops = ops;
 	if (ops)
 		hibernation_mode = HIBERNATION_PLATFORM;
 	else if (hibernation_mode == HIBERNATION_PLATFORM)
 		hibernation_mode = HIBERNATION_SHUTDOWN;
 
-	mutex_unlock(&pm_mutex);
+	unlock_system_sleep();
 }
 
 static bool entering_platform_hibernation;
@@ -597,7 +597,7 @@ int hibernate(void)
 {
 	int error;
 
-	mutex_lock(&pm_mutex);
+	lock_system_sleep();
 	/* The snapshot device should not be opened while we're running */
 	if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
 		error = -EBUSY;
@@ -665,7 +665,7 @@ int hibernate(void)
 	pm_restore_console();
 	atomic_inc(&snapshot_device_available);
  Unlock:
-	mutex_unlock(&pm_mutex);
+	unlock_system_sleep();
 	return error;
 }
 
@@ -893,7 +893,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
 	p = memchr(buf, '\n', n);
 	len = p ? p - buf : n;
 
-	mutex_lock(&pm_mutex);
+	lock_system_sleep();
 	for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
 		if (len == strlen(hibernation_modes[i])
 		    && !strncmp(buf, hibernation_modes[i], len)) {
@@ -919,7 +919,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
 	if (!error)
 		pr_debug("PM: Hibernation mode set to '%s'\n",
 			 hibernation_modes[mode]);
-	mutex_unlock(&pm_mutex);
+	unlock_system_sleep();
 	return error ? error : n;
 }
 
@@ -946,9 +946,9 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
 	if (maj != MAJOR(res) || min != MINOR(res))
 		goto out;
 
-	mutex_lock(&pm_mutex);
+	lock_system_sleep();
 	swsusp_resume_device = res;
-	mutex_unlock(&pm_mutex);
+	unlock_system_sleep();
 	printk(KERN_INFO "PM: Starting manual resume from disk\n");
 	noresume = 0;
 	software_resume();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 7d36fb31e4c4..9824b41e5a18 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -116,7 +116,7 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,
 	p = memchr(buf, '\n', n);
 	len = p ? p - buf : n;
 
-	mutex_lock(&pm_mutex);
+	lock_system_sleep();
 
 	level = TEST_FIRST;
 	for (s = &pm_tests[level]; level <= TEST_MAX; s++, level++)
@@ -126,7 +126,7 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,
 			break;
 		}
 
-	mutex_unlock(&pm_mutex);
+	unlock_system_sleep();
 
 	return error ? error : n;
 }
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index d336b27d1104..4fd51beed879 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -42,9 +42,9 @@ static const struct platform_suspend_ops *suspend_ops;
  */
 void suspend_set_ops(const struct platform_suspend_ops *ops)
 {
-	mutex_lock(&pm_mutex);
+	lock_system_sleep();
 	suspend_ops = ops;
-	mutex_unlock(&pm_mutex);
+	unlock_system_sleep();
 }
 EXPORT_SYMBOL_GPL(suspend_set_ops);
 
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 06ea33df8560..98ade217da6c 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -71,7 +71,7 @@ static int snapshot_open(struct inode *inode, struct file *filp)
 	struct snapshot_data *data;
 	int error;
 
-	mutex_lock(&pm_mutex);
+	lock_system_sleep();
 
 	if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
 		error = -EBUSY;
@@ -123,7 +123,7 @@ static int snapshot_open(struct inode *inode, struct file *filp)
 	data->platform_support = 0;
 
  Unlock:
-	mutex_unlock(&pm_mutex);
+	unlock_system_sleep();
 
 	return error;
 }
@@ -132,7 +132,7 @@ static int snapshot_release(struct inode *inode, struct file *filp)
 {
 	struct snapshot_data *data;
 
-	mutex_lock(&pm_mutex);
+	lock_system_sleep();
 
 	swsusp_free();
 	free_basic_memory_bitmaps();
@@ -146,7 +146,7 @@ static int snapshot_release(struct inode *inode, struct file *filp)
 			PM_POST_HIBERNATION : PM_POST_RESTORE);
 	atomic_inc(&snapshot_device_available);
 
-	mutex_unlock(&pm_mutex);
+	unlock_system_sleep();
 
 	return 0;
 }
@@ -158,7 +158,7 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
 	ssize_t res;
 	loff_t pg_offp = *offp & ~PAGE_MASK;
 
-	mutex_lock(&pm_mutex);
+	lock_system_sleep();
 
 	data = filp->private_data;
 	if (!data->ready) {
@@ -179,7 +179,7 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
 		*offp += res;
 
  Unlock:
-	mutex_unlock(&pm_mutex);
+	unlock_system_sleep();
 
 	return res;
 }
@@ -191,7 +191,7 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
 	ssize_t res;
 	loff_t pg_offp = *offp & ~PAGE_MASK;
 
-	mutex_lock(&pm_mutex);
+	lock_system_sleep();
 
 	data = filp->private_data;
 
@@ -208,7 +208,7 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
 	if (res > 0)
 		*offp += res;
 unlock:
-	mutex_unlock(&pm_mutex);
+	unlock_system_sleep();
 
 	return res;
 }
-- 
cgit 


From b298d289c79211508f11cb50749b0d1d54eb244a Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Fri, 9 Dec 2011 23:36:36 +0100
Subject: PM / Sleep: Fix freezer failures due to racy
 usermodehelper_is_disabled()

Commit a144c6a (PM: Print a warning if firmware is requested when tasks
are frozen) introduced usermodehelper_is_disabled() to warn and exit
immediately if firmware is requested when usermodehelpers are disabled.

However, it is racy. Consider the following scenario, currently used in
drivers/base/firmware_class.c:

...
if (usermodehelper_is_disabled())
        goto out;

/* Do actual work */
...

out:
        return err;

Nothing prevents someone from disabling usermodehelpers just after the check
in the 'if' condition, which means that it is quite possible to try doing the
"actual work" with usermodehelpers disabled, leading to undesirable
consequences.

In particular, this race condition in _request_firmware() causes task freezing
failures whenever suspend/hibernation is in progress because, it wrongly waits
to get the firmware/microcode image from userspace when actually the
usermodehelpers are disabled or userspace has been frozen.
Some of the example scenarios that cause freezing failures due to this race
are those that depend on userspace via request_firmware(), such as x86
microcode module initialization and microcode image reload.

Previous discussions about this issue can be found at:
http://thread.gmane.org/gmane.linux.kernel/1198291/focus=1200591

This patch adds proper synchronization to fix this issue.

It is to be noted that this patchset fixes the freezing failures but doesn't
remove the warnings. IOW, it does not attempt to add explicit synchronization
to x86 microcode driver to avoid requesting microcode image at inopportune
moments. Because, the warnings were introduced to highlight such cases, in the
first place. And we need not silence the warnings, since we take care of the
*real* problem (freezing failure) and hence, after that, the warnings are
pretty harmless anyway.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 kernel/kmod.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/kmod.c b/kernel/kmod.c
index a4bea97c75b6..81b4a27261b2 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -36,6 +36,7 @@
 #include <linux/resource.h>
 #include <linux/notifier.h>
 #include <linux/suspend.h>
+#include <linux/rwsem.h>
 #include <asm/uaccess.h>
 
 #include <trace/events/module.h>
@@ -50,6 +51,7 @@ static struct workqueue_struct *khelper_wq;
 static kernel_cap_t usermodehelper_bset = CAP_FULL_SET;
 static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET;
 static DEFINE_SPINLOCK(umh_sysctl_lock);
+static DECLARE_RWSEM(umhelper_sem);
 
 #ifdef CONFIG_MODULES
 
@@ -275,6 +277,7 @@ static void __call_usermodehelper(struct work_struct *work)
  * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY
  * (used for preventing user land processes from being created after the user
  * land has been frozen during a system-wide hibernation or suspend operation).
+ * Should always be manipulated under umhelper_sem acquired for write.
  */
 static int usermodehelper_disabled = 1;
 
@@ -293,6 +296,18 @@ static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq);
  */
 #define RUNNING_HELPERS_TIMEOUT	(5 * HZ)
 
+void read_lock_usermodehelper(void)
+{
+	down_read(&umhelper_sem);
+}
+EXPORT_SYMBOL_GPL(read_lock_usermodehelper);
+
+void read_unlock_usermodehelper(void)
+{
+	up_read(&umhelper_sem);
+}
+EXPORT_SYMBOL_GPL(read_unlock_usermodehelper);
+
 /**
  * usermodehelper_disable - prevent new helpers from being started
  */
@@ -300,8 +315,10 @@ int usermodehelper_disable(void)
 {
 	long retval;
 
+	down_write(&umhelper_sem);
 	usermodehelper_disabled = 1;
-	smp_mb();
+	up_write(&umhelper_sem);
+
 	/*
 	 * From now on call_usermodehelper_exec() won't start any new
 	 * helpers, so it is sufficient if running_helpers turns out to
@@ -314,7 +331,9 @@ int usermodehelper_disable(void)
 	if (retval)
 		return 0;
 
+	down_write(&umhelper_sem);
 	usermodehelper_disabled = 0;
+	up_write(&umhelper_sem);
 	return -EAGAIN;
 }
 
@@ -323,7 +342,9 @@ int usermodehelper_disable(void)
  */
 void usermodehelper_enable(void)
 {
+	down_write(&umhelper_sem);
 	usermodehelper_disabled = 0;
+	up_write(&umhelper_sem);
 }
 
 /**
-- 
cgit 


From cf007e3526a785a95a738d5a8fba44f1f4fe33e0 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Thu, 8 Dec 2011 23:42:53 +0100
Subject: PM / Hibernate: Remove deprecated hibernation snapshot ioctls

Several snapshot ioctls were marked for removal quite some time ago,
since they were deprecated. Remove them.

Suggested-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 kernel/power/user.c | 87 -----------------------------------------------------
 1 file changed, 87 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/user.c b/kernel/power/user.c
index 98ade217da6c..78bdb4404aab 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -30,28 +30,6 @@
 
 #include "power.h"
 
-/*
- * NOTE: The SNAPSHOT_SET_SWAP_FILE and SNAPSHOT_PMOPS ioctls are obsolete and
- * will be removed in the future.  They are only preserved here for
- * compatibility with existing userland utilities.
- */
-#define SNAPSHOT_SET_SWAP_FILE	_IOW(SNAPSHOT_IOC_MAGIC, 10, unsigned int)
-#define SNAPSHOT_PMOPS		_IOW(SNAPSHOT_IOC_MAGIC, 12, unsigned int)
-
-#define PMOPS_PREPARE	1
-#define PMOPS_ENTER	2
-#define PMOPS_FINISH	3
-
-/*
- * NOTE: The following ioctl definitions are wrong and have been replaced with
- * correct ones.  They are only preserved here for compatibility with existing
- * userland utilities and will be removed in the future.
- */
-#define SNAPSHOT_ATOMIC_SNAPSHOT	_IOW(SNAPSHOT_IOC_MAGIC, 3, void *)
-#define SNAPSHOT_SET_IMAGE_SIZE		_IOW(SNAPSHOT_IOC_MAGIC, 6, unsigned long)
-#define SNAPSHOT_AVAIL_SWAP		_IOR(SNAPSHOT_IOC_MAGIC, 7, void *)
-#define SNAPSHOT_GET_SWAP_PAGE		_IOR(SNAPSHOT_IOC_MAGIC, 8, void *)
-
 
 #define SNAPSHOT_MINOR	231
 
@@ -213,15 +191,6 @@ unlock:
 	return res;
 }
 
-static void snapshot_deprecated_ioctl(unsigned int cmd)
-{
-	if (printk_ratelimit())
-		printk(KERN_NOTICE "%pf: ioctl '%.8x' is deprecated and will "
-				"be removed soon, update your suspend-to-disk "
-				"utilities\n",
-				__builtin_return_address(0), cmd);
-}
-
 static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 							unsigned long arg)
 {
@@ -272,8 +241,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 		data->frozen = 0;
 		break;
 
-	case SNAPSHOT_ATOMIC_SNAPSHOT:
-		snapshot_deprecated_ioctl(cmd);
 	case SNAPSHOT_CREATE_IMAGE:
 		if (data->mode != O_RDONLY || !data->frozen  || data->ready) {
 			error = -EPERM;
@@ -308,8 +275,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 		data->ready = 0;
 		break;
 
-	case SNAPSHOT_SET_IMAGE_SIZE:
-		snapshot_deprecated_ioctl(cmd);
 	case SNAPSHOT_PREF_IMAGE_SIZE:
 		image_size = arg;
 		break;
@@ -324,16 +289,12 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 		error = put_user(size, (loff_t __user *)arg);
 		break;
 
-	case SNAPSHOT_AVAIL_SWAP:
-		snapshot_deprecated_ioctl(cmd);
 	case SNAPSHOT_AVAIL_SWAP_SIZE:
 		size = count_swap_pages(data->swap, 1);
 		size <<= PAGE_SHIFT;
 		error = put_user(size, (loff_t __user *)arg);
 		break;
 
-	case SNAPSHOT_GET_SWAP_PAGE:
-		snapshot_deprecated_ioctl(cmd);
 	case SNAPSHOT_ALLOC_SWAP_PAGE:
 		if (data->swap < 0 || data->swap >= MAX_SWAPFILES) {
 			error = -ENODEV;
@@ -356,27 +317,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 		free_all_swap_pages(data->swap);
 		break;
 
-	case SNAPSHOT_SET_SWAP_FILE: /* This ioctl is deprecated */
-		snapshot_deprecated_ioctl(cmd);
-		if (!swsusp_swap_in_use()) {
-			/*
-			 * User space encodes device types as two-byte values,
-			 * so we need to recode them
-			 */
-			if (old_decode_dev(arg)) {
-				data->swap = swap_type_of(old_decode_dev(arg),
-							0, NULL);
-				if (data->swap < 0)
-					error = -ENODEV;
-			} else {
-				data->swap = -1;
-				error = -EINVAL;
-			}
-		} else {
-			error = -EPERM;
-		}
-		break;
-
 	case SNAPSHOT_S2RAM:
 		if (!data->frozen) {
 			error = -EPERM;
@@ -399,33 +339,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 			error = hibernation_platform_enter();
 		break;
 
-	case SNAPSHOT_PMOPS: /* This ioctl is deprecated */
-		snapshot_deprecated_ioctl(cmd);
-		error = -EINVAL;
-
-		switch (arg) {
-
-		case PMOPS_PREPARE:
-			data->platform_support = 1;
-			error = 0;
-			break;
-
-		case PMOPS_ENTER:
-			if (data->platform_support)
-				error = hibernation_platform_enter();
-			break;
-
-		case PMOPS_FINISH:
-			if (data->platform_support)
-				error = 0;
-			break;
-
-		default:
-			printk(KERN_ERR "SNAPSHOT_PMOPS: invalid argument %ld\n", arg);
-
-		}
-		break;
-
 	case SNAPSHOT_SET_SWAP_AREA:
 		if (swsusp_swap_in_use()) {
 			error = -EPERM;
-- 
cgit