summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile5
-rw-r--r--kernel/async.c4
-rw-r--r--kernel/cgroup.c18
-rw-r--r--kernel/cred.c24
-rw-r--r--kernel/events/core.c67
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/freezer.c2
-rw-r--r--kernel/futex.c10
-rw-r--r--kernel/irq/chip.c2
-rw-r--r--kernel/irq/generic-chip.c4
-rw-r--r--kernel/irq/irqdesc.c37
-rw-r--r--kernel/irq/irqdomain.c6
-rw-r--r--kernel/irq/manage.c17
-rw-r--r--kernel/kmod.c4
-rw-r--r--kernel/kprobes.c34
-rw-r--r--kernel/latencytop.c14
-rw-r--r--kernel/lockdep.c164
-rw-r--r--kernel/params.c21
-rw-r--r--kernel/posix-cpu-timers.c22
-rw-r--r--kernel/power/Kconfig8
-rw-r--r--kernel/power/Makefile4
-rw-r--r--kernel/power/console.c4
-rw-r--r--kernel/power/hibernate.c53
-rw-r--r--kernel/power/main.c102
-rw-r--r--kernel/power/power.h4
-rw-r--r--kernel/power/process.c30
-rw-r--r--kernel/power/qos.c (renamed from kernel/pm_qos_params.c)273
-rw-r--r--kernel/power/snapshot.c18
-rw-r--r--kernel/power/suspend.c17
-rw-r--r--kernel/power/swap.c818
-rw-r--r--kernel/printk.c54
-rw-r--r--kernel/ptrace.c23
-rw-r--r--kernel/resource.c7
-rw-r--r--kernel/rtmutex-debug.c77
-rw-r--r--kernel/sched.c69
-rw-r--r--kernel/sched_rt.c4
-rw-r--r--kernel/sched_stats.h12
-rw-r--r--kernel/semaphore.c28
-rw-r--r--kernel/signal.c24
-rw-r--r--kernel/sys.c54
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/sysctl_binary.c4
-rw-r--r--kernel/sysctl_check.c2
-rw-r--r--kernel/taskstats.c1
-rw-r--r--kernel/time/alarmtimer.c18
-rw-r--r--kernel/time/timer_stats.c6
-rw-r--r--kernel/trace/Kconfig2
-rw-r--r--kernel/trace/Makefile3
-rw-r--r--kernel/trace/blktrace.c21
-rw-r--r--kernel/trace/ring_buffer.c52
-rw-r--r--kernel/trace/rpm-traces.c20
-rw-r--r--kernel/trace/trace.c10
-rw-r--r--kernel/trace/trace_irqsoff.c6
-rw-r--r--kernel/tsacct.c15
-rw-r--r--kernel/workqueue.c7
55 files changed, 1621 insertions, 686 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index d06467fc8f7c..2da48d3515eb 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,8 +9,8 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
rcupdate.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
- notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
- async.o range.o jump_label.o
+ notifier.o ksysfs.o sched_clock.o cred.o \
+ async.o range.o
obj-y += groups.o
ifdef CONFIG_FUNCTION_TRACER
@@ -107,6 +107,7 @@ obj-$(CONFIG_PERF_EVENTS) += events/
obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
obj-$(CONFIG_PADATA) += padata.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
+obj-$(CONFIG_JUMP_LABEL) += jump_label.o
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/async.c b/kernel/async.c
index d5fe7af0de2e..4c2843c0043e 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -120,7 +120,7 @@ static void async_run_entry_fn(struct work_struct *work)
struct async_entry *entry =
container_of(work, struct async_entry, work);
unsigned long flags;
- ktime_t calltime, delta, rettime;
+ ktime_t uninitialized_var(calltime), delta, rettime;
/* 1) move self to the running queue */
spin_lock_irqsave(&async_lock, flags);
@@ -269,7 +269,7 @@ EXPORT_SYMBOL_GPL(async_synchronize_full_domain);
void async_synchronize_cookie_domain(async_cookie_t cookie,
struct list_head *running)
{
- ktime_t starttime, delta, endtime;
+ ktime_t uninitialized_var(starttime), delta, endtime;
if (initcall_debug && system_state == SYSTEM_BOOTING) {
printk(KERN_DEBUG "async_waiting @ %i\n", task_pid_nr(current));
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 1d2b6ceea95d..453100a4159d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -265,7 +265,7 @@ list_for_each_entry(_root, &roots, root_list)
/* the list of cgroups eligible for automatic release. Protected by
* release_list_lock */
static LIST_HEAD(release_list);
-static DEFINE_SPINLOCK(release_list_lock);
+static DEFINE_RAW_SPINLOCK(release_list_lock);
static void cgroup_release_agent(struct work_struct *work);
static DECLARE_WORK(release_agent_work, cgroup_release_agent);
static void check_for_release(struct cgroup *cgrp);
@@ -4014,11 +4014,11 @@ again:
finish_wait(&cgroup_rmdir_waitq, &wait);
clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
- spin_lock(&release_list_lock);
+ raw_spin_lock(&release_list_lock);
set_bit(CGRP_REMOVED, &cgrp->flags);
if (!list_empty(&cgrp->release_list))
list_del_init(&cgrp->release_list);
- spin_unlock(&release_list_lock);
+ raw_spin_unlock(&release_list_lock);
cgroup_lock_hierarchy(cgrp->root);
/* delete this cgroup from parent->children */
@@ -4671,13 +4671,13 @@ static void check_for_release(struct cgroup *cgrp)
* already queued for a userspace notification, queue
* it now */
int need_schedule_work = 0;
- spin_lock(&release_list_lock);
+ raw_spin_lock(&release_list_lock);
if (!cgroup_is_removed(cgrp) &&
list_empty(&cgrp->release_list)) {
list_add(&cgrp->release_list, &release_list);
need_schedule_work = 1;
}
- spin_unlock(&release_list_lock);
+ raw_spin_unlock(&release_list_lock);
if (need_schedule_work)
schedule_work(&release_agent_work);
}
@@ -4729,7 +4729,7 @@ static void cgroup_release_agent(struct work_struct *work)
{
BUG_ON(work != &release_agent_work);
mutex_lock(&cgroup_mutex);
- spin_lock(&release_list_lock);
+ raw_spin_lock(&release_list_lock);
while (!list_empty(&release_list)) {
char *argv[3], *envp[3];
int i;
@@ -4738,7 +4738,7 @@ static void cgroup_release_agent(struct work_struct *work)
struct cgroup,
release_list);
list_del_init(&cgrp->release_list);
- spin_unlock(&release_list_lock);
+ raw_spin_unlock(&release_list_lock);
pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!pathbuf)
goto continue_free;
@@ -4768,9 +4768,9 @@ static void cgroup_release_agent(struct work_struct *work)
continue_free:
kfree(pathbuf);
kfree(agentbuf);
- spin_lock(&release_list_lock);
+ raw_spin_lock(&release_list_lock);
}
- spin_unlock(&release_list_lock);
+ raw_spin_unlock(&release_list_lock);
mutex_unlock(&cgroup_mutex);
}
diff --git a/kernel/cred.c b/kernel/cred.c
index 174fa84eca30..bb55d052d858 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -508,10 +508,8 @@ int commit_creds(struct cred *new)
key_fsgid_changed(task);
/* do it
- * - What if a process setreuid()'s and this brings the
- * new uid over his NPROC rlimit? We can check this now
- * cheaply with the new uid cache, so if it matters
- * we should be checking for it. -DaveM
+ * RLIMIT_NPROC limits on user->processes have already been checked
+ * in set_user().
*/
alter_cred_subscribers(new, 2);
if (new->user != old->user)
@@ -646,6 +644,9 @@ void __init cred_init(void)
*/
struct cred *prepare_kernel_cred(struct task_struct *daemon)
{
+#ifdef CONFIG_KEYS
+ struct thread_group_cred *tgcred;
+#endif
const struct cred *old;
struct cred *new;
@@ -653,6 +654,14 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
if (!new)
return NULL;
+#ifdef CONFIG_KEYS
+ tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL);
+ if (!tgcred) {
+ kmem_cache_free(cred_jar, new);
+ return NULL;
+ }
+#endif
+
kdebug("prepare_kernel_cred() alloc %p", new);
if (daemon)
@@ -669,8 +678,11 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
get_group_info(new->group_info);
#ifdef CONFIG_KEYS
- atomic_inc(&init_tgcred.usage);
- new->tgcred = &init_tgcred;
+ atomic_set(&tgcred->usage, 1);
+ spin_lock_init(&tgcred->lock);
+ tgcred->process_keyring = NULL;
+ tgcred->session_keyring = NULL;
+ new->tgcred = tgcred;
new->request_key_auth = NULL;
new->thread_keyring = NULL;
new->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b8785e26ee1c..0f857782d06f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -399,14 +399,54 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
local_irq_restore(flags);
}
-static inline void perf_cgroup_sched_out(struct task_struct *task)
+static inline void perf_cgroup_sched_out(struct task_struct *task,
+ struct task_struct *next)
{
- perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
+ struct perf_cgroup *cgrp1;
+ struct perf_cgroup *cgrp2 = NULL;
+
+ /*
+ * we come here when we know perf_cgroup_events > 0
+ */
+ cgrp1 = perf_cgroup_from_task(task);
+
+ /*
+ * next is NULL when called from perf_event_enable_on_exec()
+ * that will systematically cause a cgroup_switch()
+ */
+ if (next)
+ cgrp2 = perf_cgroup_from_task(next);
+
+ /*
+ * only schedule out current cgroup events if we know
+ * that we are switching to a different cgroup. Otherwise,
+ * do no touch the cgroup events.
+ */
+ if (cgrp1 != cgrp2)
+ perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
}
-static inline void perf_cgroup_sched_in(struct task_struct *task)
+static inline void perf_cgroup_sched_in(struct task_struct *prev,
+ struct task_struct *task)
{
- perf_cgroup_switch(task, PERF_CGROUP_SWIN);
+ struct perf_cgroup *cgrp1;
+ struct perf_cgroup *cgrp2 = NULL;
+
+ /*
+ * we come here when we know perf_cgroup_events > 0
+ */
+ cgrp1 = perf_cgroup_from_task(task);
+
+ /* prev can never be NULL */
+ cgrp2 = perf_cgroup_from_task(prev);
+
+ /*
+ * only need to schedule in cgroup events if we are changing
+ * cgroup during ctxsw. Cgroup events were not scheduled
+ * out of ctxsw out if that was not the case.
+ */
+ if (cgrp1 != cgrp2)
+ perf_cgroup_switch(task, PERF_CGROUP_SWIN);
}
static inline int perf_cgroup_connect(int fd, struct perf_event *event,
@@ -518,11 +558,13 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
{
}
-static inline void perf_cgroup_sched_out(struct task_struct *task)
+static inline void perf_cgroup_sched_out(struct task_struct *task,
+ struct task_struct *next)
{
}
-static inline void perf_cgroup_sched_in(struct task_struct *task)
+static inline void perf_cgroup_sched_in(struct task_struct *prev,
+ struct task_struct *task)
{
}
@@ -1988,7 +2030,7 @@ void __perf_event_task_sched_out(struct task_struct *task,
* cgroup event are system-wide mode only
*/
if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
- perf_cgroup_sched_out(task);
+ perf_cgroup_sched_out(task, next);
}
static void task_ctx_sched_out(struct perf_event_context *ctx)
@@ -2153,7 +2195,8 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
* accessing the event control register. If a NMI hits, then it will
* keep the event running.
*/
-void __perf_event_task_sched_in(struct task_struct *task)
+void __perf_event_task_sched_in(struct task_struct *prev,
+ struct task_struct *task)
{
struct perf_event_context *ctx;
int ctxn;
@@ -2171,7 +2214,7 @@ void __perf_event_task_sched_in(struct task_struct *task)
* cgroup event are system-wide mode only
*/
if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
- perf_cgroup_sched_in(task);
+ perf_cgroup_sched_in(prev, task);
}
static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
@@ -2427,7 +2470,7 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
* ctxswin cgroup events which are already scheduled
* in.
*/
- perf_cgroup_sched_out(current);
+ perf_cgroup_sched_out(current, NULL);
raw_spin_lock(&ctx->lock);
task_ctx_sched_out(ctx);
@@ -3353,8 +3396,8 @@ static int perf_event_index(struct perf_event *event)
}
static void calc_timer_values(struct perf_event *event,
- u64 *running,
- u64 *enabled)
+ u64 *enabled,
+ u64 *running)
{
u64 now, ctx_time;
diff --git a/kernel/fork.c b/kernel/fork.c
index e7ceaca89609..8e6b6f4fb272 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1111,6 +1111,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->real_cred->user != INIT_USER)
goto bad_fork_free;
}
+ current->flags &= ~PF_NPROC_EXCEEDED;
retval = copy_creds(p, clone_flags);
if (retval < 0)
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 7b01de98bb6a..66a594e8ad2f 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -67,7 +67,7 @@ static void fake_signal_wake_up(struct task_struct *p)
unsigned long flags;
spin_lock_irqsave(&p->sighand->siglock, flags);
- signal_wake_up(p, 0);
+ signal_wake_up(p, 1);
spin_unlock_irqrestore(&p->sighand->siglock, flags);
}
diff --git a/kernel/futex.c b/kernel/futex.c
index 11cbe052b2e8..1511dff0cfd6 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -854,7 +854,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
{
struct task_struct *new_owner;
struct futex_pi_state *pi_state = this->pi_state;
- u32 curval, newval;
+ u32 uninitialized_var(curval), newval;
if (!pi_state)
return -EINVAL;
@@ -916,7 +916,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
{
- u32 oldval;
+ u32 uninitialized_var(oldval);
/*
* There is no waiter, so we unlock the futex. The owner died
@@ -1576,7 +1576,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
struct futex_pi_state *pi_state = q->pi_state;
struct task_struct *oldowner = pi_state->owner;
- u32 uval, curval, newval;
+ u32 uval, uninitialized_var(curval), newval;
int ret;
/* Owner died? */
@@ -1793,7 +1793,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
*
* Returns:
* 0 - uaddr contains val and hb has been locked
- * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked
+ * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked
*/
static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
struct futex_q *q, struct futex_hash_bucket **hb)
@@ -2481,7 +2481,7 @@ err_unlock:
*/
int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
{
- u32 uval, nval, mval;
+ u32 uval, uninitialized_var(nval), mval;
retry:
if (get_user(uval, uaddr))
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index d5a3009da71a..dc5114b4c16c 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -178,7 +178,7 @@ void irq_shutdown(struct irq_desc *desc)
desc->depth = 1;
if (desc->irq_data.chip->irq_shutdown)
desc->irq_data.chip->irq_shutdown(&desc->irq_data);
- if (desc->irq_data.chip->irq_disable)
+ else if (desc->irq_data.chip->irq_disable)
desc->irq_data.chip->irq_disable(&desc->irq_data);
else
desc->irq_data.chip->irq_mask(&desc->irq_data);
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index 3a2cab407b93..e38544dddb18 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -246,7 +246,7 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
gc->mask_cache = irq_reg_readl(gc->reg_base + ct->regs.mask);
for (i = gc->irq_base; msk; msk >>= 1, i++) {
- if (!msk & 0x01)
+ if (!(msk & 0x01))
continue;
if (flags & IRQ_GC_INIT_NESTED_LOCK)
@@ -301,7 +301,7 @@ void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk,
raw_spin_unlock(&gc_lock);
for (; msk; msk >>= 1, i++) {
- if (!msk & 0x01)
+ if (!(msk & 0x01))
continue;
/* Remove handler first. That will mask the irq line */
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 4c60a50e66b2..039b889ea053 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -70,7 +70,8 @@ static inline void desc_smp_init(struct irq_desc *desc, int node) { }
static inline int desc_node(struct irq_desc *desc) { return 0; }
#endif
-static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
+static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
+ struct module *owner)
{
int cpu;
@@ -86,6 +87,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
desc->irq_count = 0;
desc->irqs_unhandled = 0;
desc->name = NULL;
+ desc->owner = owner;
for_each_possible_cpu(cpu)
*per_cpu_ptr(desc->kstat_irqs, cpu) = 0;
desc_smp_init(desc, node);
@@ -128,7 +130,7 @@ static void free_masks(struct irq_desc *desc)
static inline void free_masks(struct irq_desc *desc) { }
#endif
-static struct irq_desc *alloc_desc(int irq, int node)
+static struct irq_desc *alloc_desc(int irq, int node, struct module *owner)
{
struct irq_desc *desc;
gfp_t gfp = GFP_KERNEL;
@@ -147,7 +149,7 @@ static struct irq_desc *alloc_desc(int irq, int node)
raw_spin_lock_init(&desc->lock);
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
- desc_set_defaults(irq, desc, node);
+ desc_set_defaults(irq, desc, node, owner);
return desc;
@@ -173,13 +175,14 @@ static void free_desc(unsigned int irq)
kfree(desc);
}
-static int alloc_descs(unsigned int start, unsigned int cnt, int node)
+static int alloc_descs(unsigned int start, unsigned int cnt, int node,
+ struct module *owner)
{
struct irq_desc *desc;
int i;
for (i = 0; i < cnt; i++) {
- desc = alloc_desc(start + i, node);
+ desc = alloc_desc(start + i, node, owner);
if (!desc)
goto err;
mutex_lock(&sparse_irq_lock);
@@ -227,7 +230,7 @@ int __init early_irq_init(void)
nr_irqs = initcnt;
for (i = 0; i < initcnt; i++) {
- desc = alloc_desc(i, node);
+ desc = alloc_desc(i, node, NULL);
set_bit(i, allocated_irqs);
irq_insert_desc(i, desc);
}
@@ -261,7 +264,7 @@ int __init early_irq_init(void)
alloc_masks(&desc[i], GFP_KERNEL, node);
raw_spin_lock_init(&desc[i].lock);
lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
- desc_set_defaults(i, &desc[i], node);
+ desc_set_defaults(i, &desc[i], node, NULL);
}
return arch_early_irq_init();
}
@@ -276,8 +279,16 @@ static void free_desc(unsigned int irq)
dynamic_irq_cleanup(irq);
}
-static inline int alloc_descs(unsigned int start, unsigned int cnt, int node)
+static inline int alloc_descs(unsigned int start, unsigned int cnt, int node,
+ struct module *owner)
{
+ u32 i;
+
+ for (i = 0; i < cnt; i++) {
+ struct irq_desc *desc = irq_to_desc(start + i);
+
+ desc->owner = owner;
+ }
return start;
}
@@ -333,11 +344,13 @@ EXPORT_SYMBOL_GPL(irq_free_descs);
* @from: Start the search from this irq number
* @cnt: Number of consecutive irqs to allocate.
* @node: Preferred node on which the irq descriptor should be allocated
+ * @owner: Owning module (can be NULL)
*
* Returns the first irq number or error code
*/
int __ref
-irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node)
+__irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
+ struct module *owner)
{
int start, ret;
@@ -366,13 +379,13 @@ irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node)
bitmap_set(allocated_irqs, start, cnt);
mutex_unlock(&sparse_irq_lock);
- return alloc_descs(start, cnt, node);
+ return alloc_descs(start, cnt, node, owner);
err:
mutex_unlock(&sparse_irq_lock);
return ret;
}
-EXPORT_SYMBOL_GPL(irq_alloc_descs);
+EXPORT_SYMBOL_GPL(__irq_alloc_descs);
/**
* irq_reserve_irqs - mark irqs allocated
@@ -440,7 +453,7 @@ void dynamic_irq_cleanup(unsigned int irq)
unsigned long flags;
raw_spin_lock_irqsave(&desc->lock, flags);
- desc_set_defaults(irq, desc, desc_node(desc));
+ desc_set_defaults(irq, desc, desc_node(desc), NULL);
raw_spin_unlock_irqrestore(&desc->lock, flags);
}
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index d5828da3fd38..b57a3776de44 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -29,7 +29,11 @@ void irq_domain_add(struct irq_domain *domain)
*/
for (hwirq = 0; hwirq < domain->nr_irq; hwirq++) {
d = irq_get_irq_data(irq_domain_to_irq(domain, hwirq));
- if (d || d->domain) {
+ if (!d) {
+ WARN(1, "error: assigning domain to non existant irq_desc");
+ return;
+ }
+ if (d->domain) {
/* things are broken; just report, don't clean up */
WARN(1, "error: irq_desc already assigned to a domain");
return;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0a7840aeb0fb..9b956fa20308 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -883,6 +883,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
if (desc->irq_data.chip == &no_irq_chip)
return -ENOSYS;
+ if (!try_module_get(desc->owner))
+ return -ENODEV;
/*
* Some drivers like serial.c use request_irq() heavily,
* so we have to be careful not to interfere with a
@@ -906,8 +908,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
*/
nested = irq_settings_is_nested_thread(desc);
if (nested) {
- if (!new->thread_fn)
- return -EINVAL;
+ if (!new->thread_fn) {
+ ret = -EINVAL;
+ goto out_mput;
+ }
/*
* Replace the primary handler which was provided from
* the driver for non nested interrupt handling by the
@@ -929,8 +933,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
new->name);
- if (IS_ERR(t))
- return PTR_ERR(t);
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
+ goto out_mput;
+ }
/*
* We keep the reference to the task struct even if
* the thread dies to avoid that the interrupt code
@@ -1095,6 +1101,8 @@ out_thread:
kthread_stop(t);
put_task_struct(t);
}
+out_mput:
+ module_put(desc->owner);
return ret;
}
@@ -1203,6 +1211,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
put_task_struct(action->thread);
}
+ module_put(desc->owner);
return action;
}
diff --git a/kernel/kmod.c b/kernel/kmod.c
index ddc7644c1305..a4bea97c75b6 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -114,10 +114,12 @@ int __request_module(bool wait, const char *fmt, ...)
atomic_inc(&kmod_concurrent);
if (atomic_read(&kmod_concurrent) > max_modprobes) {
/* We may be blaming an innocent here, but unlikely */
- if (kmod_loop_msg++ < 5)
+ if (kmod_loop_msg < 5) {
printk(KERN_ERR
"request_module: runaway loop modprobe %s\n",
module_name);
+ kmod_loop_msg++;
+ }
atomic_dec(&kmod_concurrent);
return -ENOMEM;
}
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index b30fd54eb985..2f193d0ba7f2 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -78,10 +78,10 @@ static bool kprobes_all_disarmed;
static DEFINE_MUTEX(kprobe_mutex);
static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
static struct {
- spinlock_t lock ____cacheline_aligned_in_smp;
+ raw_spinlock_t lock ____cacheline_aligned_in_smp;
} kretprobe_table_locks[KPROBE_TABLE_SIZE];
-static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
+static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
{
return &(kretprobe_table_locks[hash].lock);
}
@@ -1013,9 +1013,9 @@ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
hlist_del(&ri->hlist);
INIT_HLIST_NODE(&ri->hlist);
if (likely(rp)) {
- spin_lock(&rp->lock);
+ raw_spin_lock(&rp->lock);
hlist_add_head(&ri->hlist, &rp->free_instances);
- spin_unlock(&rp->lock);
+ raw_spin_unlock(&rp->lock);
} else
/* Unregistering */
hlist_add_head(&ri->hlist, head);
@@ -1026,19 +1026,19 @@ void __kprobes kretprobe_hash_lock(struct task_struct *tsk,
__acquires(hlist_lock)
{
unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
- spinlock_t *hlist_lock;
+ raw_spinlock_t *hlist_lock;
*head = &kretprobe_inst_table[hash];
hlist_lock = kretprobe_table_lock_ptr(hash);
- spin_lock_irqsave(hlist_lock, *flags);
+ raw_spin_lock_irqsave(hlist_lock, *flags);
}
static void __kprobes kretprobe_table_lock(unsigned long hash,
unsigned long *flags)
__acquires(hlist_lock)
{
- spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
- spin_lock_irqsave(hlist_lock, *flags);
+ raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
+ raw_spin_lock_irqsave(hlist_lock, *flags);
}
void __kprobes kretprobe_hash_unlock(struct task_struct *tsk,
@@ -1046,18 +1046,18 @@ void __kprobes kretprobe_hash_unlock(struct task_struct *tsk,
__releases(hlist_lock)
{
unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
- spinlock_t *hlist_lock;
+ raw_spinlock_t *hlist_lock;
hlist_lock = kretprobe_table_lock_ptr(hash);
- spin_unlock_irqrestore(hlist_lock, *flags);
+ raw_spin_unlock_irqrestore(hlist_lock, *flags);
}
static void __kprobes kretprobe_table_unlock(unsigned long hash,
unsigned long *flags)
__releases(hlist_lock)
{
- spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
- spin_unlock_irqrestore(hlist_lock, *flags);
+ raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
+ raw_spin_unlock_irqrestore(hlist_lock, *flags);
}
/*
@@ -1663,12 +1663,12 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
/*TODO: consider to only swap the RA after the last pre_handler fired */
hash = hash_ptr(current, KPROBE_HASH_BITS);
- spin_lock_irqsave(&rp->lock, flags);
+ raw_spin_lock_irqsave(&rp->lock, flags);
if (!hlist_empty(&rp->free_instances)) {
ri = hlist_entry(rp->free_instances.first,
struct kretprobe_instance, hlist);
hlist_del(&ri->hlist);
- spin_unlock_irqrestore(&rp->lock, flags);
+ raw_spin_unlock_irqrestore(&rp->lock, flags);
ri->rp = rp;
ri->task = current;
@@ -1685,7 +1685,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
kretprobe_table_unlock(hash, &flags);
} else {
rp->nmissed++;
- spin_unlock_irqrestore(&rp->lock, flags);
+ raw_spin_unlock_irqrestore(&rp->lock, flags);
}
return 0;
}
@@ -1721,7 +1721,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
rp->maxactive = num_possible_cpus();
#endif
}
- spin_lock_init(&rp->lock);
+ raw_spin_lock_init(&rp->lock);
INIT_HLIST_HEAD(&rp->free_instances);
for (i = 0; i < rp->maxactive; i++) {
inst = kmalloc(sizeof(struct kretprobe_instance) +
@@ -1959,7 +1959,7 @@ static int __init init_kprobes(void)
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
INIT_HLIST_HEAD(&kprobe_table[i]);
INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
- spin_lock_init(&(kretprobe_table_locks[i].lock));
+ raw_spin_lock_init(&(kretprobe_table_locks[i].lock));
}
/*
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index 376066e10413..4ac8ebfcab59 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -58,7 +58,7 @@
#include <linux/list.h>
#include <linux/stacktrace.h>
-static DEFINE_SPINLOCK(latency_lock);
+static DEFINE_RAW_SPINLOCK(latency_lock);
#define MAXLR 128
static struct latency_record latency_record[MAXLR];
@@ -72,19 +72,19 @@ void clear_all_latency_tracing(struct task_struct *p)
if (!latencytop_enabled)
return;
- spin_lock_irqsave(&latency_lock, flags);
+ raw_spin_lock_irqsave(&latency_lock, flags);
memset(&p->latency_record, 0, sizeof(p->latency_record));
p->latency_record_count = 0;
- spin_unlock_irqrestore(&latency_lock, flags);
+ raw_spin_unlock_irqrestore(&latency_lock, flags);
}
static void clear_global_latency_tracing(void)
{
unsigned long flags;
- spin_lock_irqsave(&latency_lock, flags);
+ raw_spin_lock_irqsave(&latency_lock, flags);
memset(&latency_record, 0, sizeof(latency_record));
- spin_unlock_irqrestore(&latency_lock, flags);
+ raw_spin_unlock_irqrestore(&latency_lock, flags);
}
static void __sched
@@ -190,7 +190,7 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
lat.max = usecs;
store_stacktrace(tsk, &lat);
- spin_lock_irqsave(&latency_lock, flags);
+ raw_spin_lock_irqsave(&latency_lock, flags);
account_global_scheduler_latency(tsk, &lat);
@@ -231,7 +231,7 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record));
out_unlock:
- spin_unlock_irqrestore(&latency_lock, flags);
+ raw_spin_unlock_irqrestore(&latency_lock, flags);
}
static int lstats_show(struct seq_file *m, void *v)
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 8c24294e477f..c081fa967c8f 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -96,8 +96,13 @@ static int graph_lock(void)
static inline int graph_unlock(void)
{
- if (debug_locks && !arch_spin_is_locked(&lockdep_lock))
+ if (debug_locks && !arch_spin_is_locked(&lockdep_lock)) {
+ /*
+ * The lockdep graph lock isn't locked while we expect it to
+ * be, we're confused now, bye!
+ */
return DEBUG_LOCKS_WARN_ON(1);
+ }
current->lockdep_recursion--;
arch_spin_unlock(&lockdep_lock);
@@ -134,6 +139,9 @@ static struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
static inline struct lock_class *hlock_class(struct held_lock *hlock)
{
if (!hlock->class_idx) {
+ /*
+ * Someone passed in garbage, we give up.
+ */
DEBUG_LOCKS_WARN_ON(1);
return NULL;
}
@@ -687,6 +695,10 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
*/
list_for_each_entry(class, hash_head, hash_entry) {
if (class->key == key) {
+ /*
+ * Huh! same key, different name? Did someone trample
+ * on some memory? We're most confused.
+ */
WARN_ON_ONCE(class->name != lock->name);
return class;
}
@@ -800,6 +812,10 @@ out_unlock_set:
else if (subclass < NR_LOCKDEP_CACHING_CLASSES)
lock->class_cache[subclass] = class;
+ /*
+ * Hash collision, did we smoke some? We found a class with a matching
+ * hash but the subclass -- which is hashed in -- didn't match.
+ */
if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass))
return NULL;
@@ -926,7 +942,7 @@ static inline void mark_lock_accessed(struct lock_list *lock,
unsigned long nr;
nr = lock - list_entries;
- WARN_ON(nr >= nr_list_entries);
+ WARN_ON(nr >= nr_list_entries); /* Out-of-bounds, input fail */
lock->parent = parent;
lock->class->dep_gen_id = lockdep_dependency_gen_id;
}
@@ -936,7 +952,7 @@ static inline unsigned long lock_accessed(struct lock_list *lock)
unsigned long nr;
nr = lock - list_entries;
- WARN_ON(nr >= nr_list_entries);
+ WARN_ON(nr >= nr_list_entries); /* Out-of-bounds, input fail */
return lock->class->dep_gen_id == lockdep_dependency_gen_id;
}
@@ -1196,6 +1212,9 @@ static noinline int print_bfs_bug(int ret)
if (!debug_locks_off_graph_unlock())
return 0;
+ /*
+ * Breadth-first-search failed, graph got corrupted?
+ */
WARN(1, "lockdep bfs error:%d\n", ret);
return 0;
@@ -1944,6 +1963,11 @@ out_bug:
if (!debug_locks_off_graph_unlock())
return 0;
+ /*
+ * Clearly we all shouldn't be here, but since we made it we
+ * can reliable say we messed up our state. See the above two
+ * gotos for reasons why we could possibly end up here.
+ */
WARN_ON(1);
return 0;
@@ -1975,6 +1999,11 @@ static inline int lookup_chain_cache(struct task_struct *curr,
struct held_lock *hlock_curr, *hlock_next;
int i, j;
+ /*
+ * We might need to take the graph lock, ensure we've got IRQs
+ * disabled to make this an IRQ-safe lock.. for recursion reasons
+ * lockdep won't complain about its own locking errors.
+ */
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return 0;
/*
@@ -2126,6 +2155,10 @@ static void check_chain_key(struct task_struct *curr)
hlock = curr->held_locks + i;
if (chain_key != hlock->prev_chain_key) {
debug_locks_off();
+ /*
+ * We got mighty confused, our chain keys don't match
+ * with what we expect, someone trample on our task state?
+ */
WARN(1, "hm#1, depth: %u [%u], %016Lx != %016Lx\n",
curr->lockdep_depth, i,
(unsigned long long)chain_key,
@@ -2133,6 +2166,9 @@ static void check_chain_key(struct task_struct *curr)
return;
}
id = hlock->class_idx - 1;
+ /*
+ * Whoops ran out of static storage again?
+ */
if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
return;
@@ -2144,6 +2180,10 @@ static void check_chain_key(struct task_struct *curr)
}
if (chain_key != curr->curr_chain_key) {
debug_locks_off();
+ /*
+ * More smoking hash instead of calculating it, damn see these
+ * numbers float.. I bet that a pink elephant stepped on my memory.
+ */
WARN(1, "hm#2, depth: %u [%u], %016Lx != %016Lx\n",
curr->lockdep_depth, i,
(unsigned long long)chain_key,
@@ -2525,12 +2565,24 @@ void trace_hardirqs_on_caller(unsigned long ip)
return;
}
+ /*
+ * We're enabling irqs and according to our state above irqs weren't
+ * already enabled, yet we find the hardware thinks they are in fact
+ * enabled.. someone messed up their IRQ state tracing.
+ */
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return;
+ /*
+ * See the fine text that goes along with this variable definition.
+ */
if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled)))
return;
+ /*
+ * Can't allow enabling interrupts while in an interrupt handler,
+ * that's general bad form and such. Recursion, limited stack etc..
+ */
if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
return;
@@ -2558,6 +2610,10 @@ void trace_hardirqs_off_caller(unsigned long ip)
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
+ /*
+ * So we're supposed to get called after you mask local IRQs, but for
+ * some reason the hardware doesn't quite think you did a proper job.
+ */
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return;
@@ -2590,6 +2646,10 @@ void trace_softirqs_on(unsigned long ip)
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
+ /*
+ * We fancy IRQs being disabled here, see softirq.c, avoids
+ * funny state and nesting things.
+ */
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return;
@@ -2626,6 +2686,9 @@ void trace_softirqs_off(unsigned long ip)
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
+ /*
+ * We fancy IRQs being disabled here, see softirq.c
+ */
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return;
@@ -2637,6 +2700,9 @@ void trace_softirqs_off(unsigned long ip)
curr->softirq_disable_ip = ip;
curr->softirq_disable_event = ++curr->irq_events;
debug_atomic_inc(softirqs_off_events);
+ /*
+ * Whoops, we wanted softirqs off, so why aren't they?
+ */
DEBUG_LOCKS_WARN_ON(!softirq_count());
} else
debug_atomic_inc(redundant_softirqs_off);
@@ -2661,6 +2727,9 @@ static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
if (!(gfp_mask & __GFP_FS))
return;
+ /*
+ * Oi! Can't be having __GFP_FS allocations with IRQs disabled.
+ */
if (DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags)))
return;
@@ -2773,13 +2842,13 @@ static int separate_irq_context(struct task_struct *curr,
return 0;
}
-#else
+#else /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
static inline
int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
enum lock_usage_bit new_bit)
{
- WARN_ON(1);
+ WARN_ON(1); /* Impossible innit? when we don't have TRACE_IRQFLAG */
return 1;
}
@@ -2799,7 +2868,7 @@ void lockdep_trace_alloc(gfp_t gfp_mask)
{
}
-#endif
+#endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
/*
* Mark a lock with a usage bit, and validate the state transition:
@@ -2880,6 +2949,9 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
lock->cpu = raw_smp_processor_id();
#endif
+ /*
+ * Can't be having no nameless bastards around this place!
+ */
if (DEBUG_LOCKS_WARN_ON(!name)) {
lock->name = "NULL";
return;
@@ -2887,6 +2959,9 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
lock->name = name;
+ /*
+ * No key, no joy, we need to hash something.
+ */
if (DEBUG_LOCKS_WARN_ON(!key))
return;
/*
@@ -2894,6 +2969,9 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
*/
if (!static_obj(key)) {
printk("BUG: key %p not in .data!\n", key);
+ /*
+ * What it says above ^^^^^, I suggest you read it.
+ */
DEBUG_LOCKS_WARN_ON(1);
return;
}
@@ -2932,6 +3010,11 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
if (unlikely(!debug_locks))
return 0;
+ /*
+ * Lockdep should run with IRQs disabled, otherwise we could
+ * get an interrupt which would want to take locks, which would
+ * end up in lockdep and have you got a head-ache already?
+ */
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return 0;
@@ -2963,6 +3046,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
* dependency checks are done)
*/
depth = curr->lockdep_depth;
+ /*
+ * Ran out of static storage for our per-task lock stack again have we?
+ */
if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH))
return 0;
@@ -2981,6 +3067,10 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
}
hlock = curr->held_locks + depth;
+ /*
+ * Plain impossible, we just registered it and checked it weren't no
+ * NULL like.. I bet this mushroom I ate was good!
+ */
if (DEBUG_LOCKS_WARN_ON(!class))
return 0;
hlock->class_idx = class_idx;
@@ -3015,11 +3105,17 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
* the hash, not class->key.
*/
id = class - lock_classes;
+ /*
+ * Whoops, we did it again.. ran straight out of our static allocation.
+ */
if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
return 0;
chain_key = curr->curr_chain_key;
if (!depth) {
+ /*
+ * How can we have a chain hash when we ain't got no keys?!
+ */
if (DEBUG_LOCKS_WARN_ON(chain_key != 0))
return 0;
chain_head = 1;
@@ -3091,6 +3187,9 @@ static int check_unlock(struct task_struct *curr, struct lockdep_map *lock,
{
if (unlikely(!debug_locks))
return 0;
+ /*
+ * Lockdep should run with IRQs disabled, recursion, head-ache, etc..
+ */
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return 0;
@@ -3111,9 +3210,20 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock)
if (!class)
class = look_up_lock_class(lock, 0);
- if (DEBUG_LOCKS_WARN_ON(!class))
+ /*
+ * If look_up_lock_class() failed to find a class, we're trying
+ * to test if we hold a lock that has never yet been acquired.
+ * Clearly if the lock hasn't been acquired _ever_, we're not
+ * holding it either, so report failure.
+ */
+ if (!class)
return 0;
+ /*
+ * References, but not a lock we're actually ref-counting?
+ * State got messed up, follow the sites that change ->references
+ * and try to make sense of it.
+ */
if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock))
return 0;
@@ -3136,6 +3246,10 @@ __lock_set_class(struct lockdep_map *lock, const char *name,
int i;
depth = curr->lockdep_depth;
+ /*
+ * This function is about (re)setting the class of a held lock,
+ * yet we're not actually holding any locks. Naughty user!
+ */
if (DEBUG_LOCKS_WARN_ON(!depth))
return 0;
@@ -3171,6 +3285,10 @@ found_it:
return 0;
}
+ /*
+ * I took it apart and put it back together again, except now I have
+ * these 'spare' parts.. where shall I put them.
+ */
if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth))
return 0;
return 1;
@@ -3195,6 +3313,10 @@ lock_release_non_nested(struct task_struct *curr,
* of held locks:
*/
depth = curr->lockdep_depth;
+ /*
+ * So we're all set to release this lock.. wait what lock? We don't
+ * own any locks, you've been drinking again?
+ */
if (DEBUG_LOCKS_WARN_ON(!depth))
return 0;
@@ -3247,6 +3369,10 @@ found_it:
return 0;
}
+ /*
+ * We had N bottles of beer on the wall, we drank one, but now
+ * there's not N-1 bottles of beer left on the wall...
+ */
if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1))
return 0;
return 1;
@@ -3277,6 +3403,9 @@ static int lock_release_nested(struct task_struct *curr,
return lock_release_non_nested(curr, lock, ip);
curr->lockdep_depth--;
+ /*
+ * No more locks, but somehow we've got hash left over, who left it?
+ */
if (DEBUG_LOCKS_WARN_ON(!depth && (hlock->prev_chain_key != 0)))
return 0;
@@ -3359,10 +3488,13 @@ static void check_flags(unsigned long flags)
* check if not in hardirq contexts:
*/
if (!hardirq_count()) {
- if (softirq_count())
+ if (softirq_count()) {
+ /* like the above, but with softirqs */
DEBUG_LOCKS_WARN_ON(current->softirqs_enabled);
- else
+ } else {
+ /* lick the above, does it taste good? */
DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
+ }
}
if (!debug_locks)
@@ -3500,6 +3632,10 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
int i, contention_point, contending_point;
depth = curr->lockdep_depth;
+ /*
+ * Whee, we contended on this lock, except it seems we're not
+ * actually trying to acquire anything much at all..
+ */
if (DEBUG_LOCKS_WARN_ON(!depth))
return;
@@ -3549,6 +3685,10 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip)
int i, cpu;
depth = curr->lockdep_depth;
+ /*
+ * Yay, we acquired ownership of this lock we didn't try to
+ * acquire, how the heck did that happen?
+ */
if (DEBUG_LOCKS_WARN_ON(!depth))
return;
@@ -3753,8 +3893,12 @@ void lockdep_reset_lock(struct lockdep_map *lock)
match |= class == lock->class_cache[j];
if (unlikely(match)) {
- if (debug_locks_off_graph_unlock())
+ if (debug_locks_off_graph_unlock()) {
+ /*
+ * We all just reset everything, how did it match?
+ */
WARN_ON(1);
+ }
goto out_restore;
}
}
diff --git a/kernel/params.c b/kernel/params.c
index 22df3e0d142a..821788947e40 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -67,20 +67,27 @@ static void maybe_kfree_parameter(void *param)
}
}
-static inline char dash2underscore(char c)
+static char dash2underscore(char c)
{
if (c == '-')
return '_';
return c;
}
-static inline int parameq(const char *input, const char *paramname)
+bool parameqn(const char *a, const char *b, size_t n)
{
- unsigned int i;
- for (i = 0; dash2underscore(input[i]) == paramname[i]; i++)
- if (input[i] == '\0')
- return 1;
- return 0;
+ size_t i;
+
+ for (i = 0; i < n; i++) {
+ if (dash2underscore(a[i]) != dash2underscore(b[i]))
+ return false;
+ }
+ return true;
+}
+
+bool parameq(const char *a, const char *b)
+{
+ return parameqn(a, b, strlen(a)+1);
}
static int parse_one(char *param,
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 58f405b581e7..e7cb76dc18f5 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -250,7 +250,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
do {
times->utime = cputime_add(times->utime, t->utime);
times->stime = cputime_add(times->stime, t->stime);
- times->sum_exec_runtime += t->se.sum_exec_runtime;
+ times->sum_exec_runtime += task_sched_runtime(t);
} while_each_thread(tsk, t);
out:
rcu_read_unlock();
@@ -274,9 +274,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
struct task_cputime sum;
unsigned long flags;
- spin_lock_irqsave(&cputimer->lock, flags);
if (!cputimer->running) {
- cputimer->running = 1;
/*
* The POSIX timer interface allows for absolute time expiry
* values through the TIMER_ABSTIME flag, therefore we have
@@ -284,10 +282,13 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
* it.
*/
thread_group_cputime(tsk, &sum);
+ raw_spin_lock_irqsave(&cputimer->lock, flags);
+ cputimer->running = 1;
update_gt_cputime(&cputimer->cputime, &sum);
- }
+ } else
+ raw_spin_lock_irqsave(&cputimer->lock, flags);
*times = cputimer->cputime;
- spin_unlock_irqrestore(&cputimer->lock, flags);
+ raw_spin_unlock_irqrestore(&cputimer->lock, flags);
}
/*
@@ -312,7 +313,8 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
cpu->cpu = cputime.utime;
break;
case CPUCLOCK_SCHED:
- cpu->sched = thread_group_sched_runtime(p);
+ thread_group_cputime(p, &cputime);
+ cpu->sched = cputime.sum_exec_runtime;
break;
}
return 0;
@@ -997,9 +999,9 @@ static void stop_process_timers(struct signal_struct *sig)
struct thread_group_cputimer *cputimer = &sig->cputimer;
unsigned long flags;
- spin_lock_irqsave(&cputimer->lock, flags);
+ raw_spin_lock_irqsave(&cputimer->lock, flags);
cputimer->running = 0;
- spin_unlock_irqrestore(&cputimer->lock, flags);
+ raw_spin_unlock_irqrestore(&cputimer->lock, flags);
}
static u32 onecputick;
@@ -1289,9 +1291,9 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
if (sig->cputimer.running) {
struct task_cputime group_sample;
- spin_lock(&sig->cputimer.lock);
+ raw_spin_lock(&sig->cputimer.lock);
group_sample = sig->cputimer.cputime;
- spin_unlock(&sig->cputimer.lock);
+ raw_spin_unlock(&sig->cputimer.lock);
if (task_cputime_expired(&group_sample, &sig->cputime_expires))
return 1;
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index b1914cb9095c..cedd9982306a 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -27,6 +27,7 @@ config HIBERNATION
select HIBERNATE_CALLBACKS
select LZO_COMPRESS
select LZO_DECOMPRESS
+ select CRC32
---help---
Enable the suspend to disk (STD) functionality, which is usually
called "hibernation" in user interfaces. STD checkpoints the
@@ -65,6 +66,9 @@ config HIBERNATION
For more information take a look at <file:Documentation/power/swsusp.txt>.
+config ARCH_SAVE_PAGE_KEYS
+ bool
+
config PM_STD_PARTITION
string "Default resume partition"
depends on HIBERNATION
@@ -231,3 +235,7 @@ config PM_CLK
config PM_GENERIC_DOMAINS
bool
depends on PM
+
+config PM_GENERIC_DOMAINS_RUNTIME
+ def_bool y
+ depends on PM_RUNTIME && PM_GENERIC_DOMAINS
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index c5ebc6a90643..07e0e28ffba7 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -1,8 +1,8 @@
ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG
-obj-$(CONFIG_PM) += main.o
-obj-$(CONFIG_PM_SLEEP) += console.o
+obj-$(CONFIG_PM) += main.o qos.o
+obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o
obj-$(CONFIG_FREEZER) += process.o
obj-$(CONFIG_SUSPEND) += suspend.o
obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o
diff --git a/kernel/power/console.c b/kernel/power/console.c
index 218e5af90156..b1dc456474b5 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -1,5 +1,5 @@
/*
- * drivers/power/process.c - Functions for saving/restoring console.
+ * Functions for saving/restoring console.
*
* Originally from swsusp.
*/
@@ -10,7 +10,6 @@
#include <linux/module.h>
#include "power.h"
-#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
#define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1)
static int orig_fgconsole, orig_kmsg;
@@ -32,4 +31,3 @@ void pm_restore_console(void)
vt_kmsg_redirect(orig_kmsg);
}
}
-#endif
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 8f7b1db1ece1..1c53f7fad5f7 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -14,6 +14,7 @@
#include <linux/reboot.h>
#include <linux/string.h>
#include <linux/device.h>
+#include <linux/async.h>
#include <linux/kmod.h>
#include <linux/delay.h>
#include <linux/fs.h>
@@ -29,12 +30,14 @@
#include "power.h"
-static int nocompress = 0;
-static int noresume = 0;
+static int nocompress;
+static int noresume;
+static int resume_wait;
+static int resume_delay;
static char resume_file[256] = CONFIG_PM_STD_PARTITION;
dev_t swsusp_resume_device;
sector_t swsusp_resume_block;
-int in_suspend __nosavedata = 0;
+int in_suspend __nosavedata;
enum {
HIBERNATION_INVALID,
@@ -334,13 +337,17 @@ int hibernation_snapshot(int platform_mode)
if (error)
goto Close;
- error = dpm_prepare(PMSG_FREEZE);
- if (error)
- goto Complete_devices;
-
/* Preallocate image memory before shutting down devices. */
error = hibernate_preallocate_memory();
if (error)
+ goto Close;
+
+ error = freeze_kernel_threads();
+ if (error)
+ goto Close;
+
+ error = dpm_prepare(PMSG_FREEZE);
+ if (error)
goto Complete_devices;
suspend_console();
@@ -463,7 +470,7 @@ static int resume_target_kernel(bool platform_mode)
* @platform_mode: If set, use platform driver to prepare for the transition.
*
* This routine must be called with pm_mutex held. If it is successful, control
- * reappears in the restored target kernel in hibernation_snaphot().
+ * reappears in the restored target kernel in hibernation_snapshot().
*/
int hibernation_restore(int platform_mode)
{
@@ -650,6 +657,9 @@ int hibernate(void)
flags |= SF_PLATFORM_MODE;
if (nocompress)
flags |= SF_NOCOMPRESS_MODE;
+ else
+ flags |= SF_CRC32_MODE;
+
pr_debug("PM: writing image.\n");
error = swsusp_write(flags);
swsusp_free();
@@ -724,6 +734,12 @@ static int software_resume(void)
pr_debug("PM: Checking hibernation image partition %s\n", resume_file);
+ if (resume_delay) {
+ printk(KERN_INFO "Waiting %dsec before reading resume device...\n",
+ resume_delay);
+ ssleep(resume_delay);
+ }
+
/* Check if the device is there */
swsusp_resume_device = name_to_dev_t(resume_file);
if (!swsusp_resume_device) {
@@ -732,6 +748,13 @@ static int software_resume(void)
* to wait for this to finish.
*/
wait_for_device_probe();
+
+ if (resume_wait) {
+ while ((swsusp_resume_device = name_to_dev_t(resume_file)) == 0)
+ msleep(10);
+ async_synchronize_full();
+ }
+
/*
* We can't depend on SCSI devices being available after loading
* one of their modules until scsi_complete_async_scans() is
@@ -1060,7 +1083,21 @@ static int __init noresume_setup(char *str)
return 1;
}
+static int __init resumewait_setup(char *str)
+{
+ resume_wait = 1;
+ return 1;
+}
+
+static int __init resumedelay_setup(char *str)
+{
+ resume_delay = simple_strtoul(str, NULL, 0);
+ return 1;
+}
+
__setup("noresume", noresume_setup);
__setup("resume_offset=", resume_offset_setup);
__setup("resume=", resume_setup);
__setup("hibernate=", hibernate_setup);
+__setup("resumewait", resumewait_setup);
+__setup("resumedelay=", resumedelay_setup);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 6c601f871964..a52e88425a31 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -12,6 +12,8 @@
#include <linux/string.h>
#include <linux/resume-trace.h>
#include <linux/workqueue.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
#include "power.h"
@@ -131,6 +133,101 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,
power_attr(pm_test);
#endif /* CONFIG_PM_DEBUG */
+#ifdef CONFIG_DEBUG_FS
+static char *suspend_step_name(enum suspend_stat_step step)
+{
+ switch (step) {
+ case SUSPEND_FREEZE:
+ return "freeze";
+ case SUSPEND_PREPARE:
+ return "prepare";
+ case SUSPEND_SUSPEND:
+ return "suspend";
+ case SUSPEND_SUSPEND_NOIRQ:
+ return "suspend_noirq";
+ case SUSPEND_RESUME_NOIRQ:
+ return "resume_noirq";
+ case SUSPEND_RESUME:
+ return "resume";
+ default:
+ return "";
+ }
+}
+
+static int suspend_stats_show(struct seq_file *s, void *unused)
+{
+ int i, index, last_dev, last_errno, last_step;
+
+ last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1;
+ last_dev %= REC_FAILED_NUM;
+ last_errno = suspend_stats.last_failed_errno + REC_FAILED_NUM - 1;
+ last_errno %= REC_FAILED_NUM;
+ last_step = suspend_stats.last_failed_step + REC_FAILED_NUM - 1;
+ last_step %= REC_FAILED_NUM;
+ seq_printf(s, "%s: %d\n%s: %d\n%s: %d\n%s: %d\n"
+ "%s: %d\n%s: %d\n%s: %d\n%s: %d\n",
+ "success", suspend_stats.success,
+ "fail", suspend_stats.fail,
+ "failed_freeze", suspend_stats.failed_freeze,
+ "failed_prepare", suspend_stats.failed_prepare,
+ "failed_suspend", suspend_stats.failed_suspend,
+ "failed_suspend_noirq",
+ suspend_stats.failed_suspend_noirq,
+ "failed_resume", suspend_stats.failed_resume,
+ "failed_resume_noirq",
+ suspend_stats.failed_resume_noirq);
+ seq_printf(s, "failures:\n last_failed_dev:\t%-s\n",
+ suspend_stats.failed_devs[last_dev]);
+ for (i = 1; i < REC_FAILED_NUM; i++) {
+ index = last_dev + REC_FAILED_NUM - i;
+ index %= REC_FAILED_NUM;
+ seq_printf(s, "\t\t\t%-s\n",
+ suspend_stats.failed_devs[index]);
+ }
+ seq_printf(s, " last_failed_errno:\t%-d\n",
+ suspend_stats.errno[last_errno]);
+ for (i = 1; i < REC_FAILED_NUM; i++) {
+ index = last_errno + REC_FAILED_NUM - i;
+ index %= REC_FAILED_NUM;
+ seq_printf(s, "\t\t\t%-d\n",
+ suspend_stats.errno[index]);
+ }
+ seq_printf(s, " last_failed_step:\t%-s\n",
+ suspend_step_name(
+ suspend_stats.failed_steps[last_step]));
+ for (i = 1; i < REC_FAILED_NUM; i++) {
+ index = last_step + REC_FAILED_NUM - i;
+ index %= REC_FAILED_NUM;
+ seq_printf(s, "\t\t\t%-s\n",
+ suspend_step_name(
+ suspend_stats.failed_steps[index]));
+ }
+
+ return 0;
+}
+
+static int suspend_stats_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, suspend_stats_show, NULL);
+}
+
+static const struct file_operations suspend_stats_operations = {
+ .open = suspend_stats_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init pm_debugfs_init(void)
+{
+ debugfs_create_file("suspend_stats", S_IFREG | S_IRUGO,
+ NULL, NULL, &suspend_stats_operations);
+ return 0;
+}
+
+late_initcall(pm_debugfs_init);
+#endif /* CONFIG_DEBUG_FS */
+
#endif /* CONFIG_PM_SLEEP */
struct kobject *power_kobj;
@@ -194,6 +291,11 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
}
if (state < PM_SUSPEND_MAX && *s)
error = enter_state(state);
+ if (error) {
+ suspend_stats.fail++;
+ dpm_save_failed_errno(error);
+ } else
+ suspend_stats.success++;
#endif
Exit:
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 9a00a0a26280..23a2db1ec442 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -146,6 +146,7 @@ extern int swsusp_swap_in_use(void);
*/
#define SF_PLATFORM_MODE 1
#define SF_NOCOMPRESS_MODE 2
+#define SF_CRC32_MODE 4
/* kernel/power/hibernate.c */
extern int swsusp_check(void);
@@ -228,7 +229,8 @@ extern int pm_test_level;
#ifdef CONFIG_SUSPEND_FREEZER
static inline int suspend_freeze_processes(void)
{
- return freeze_processes();
+ int error = freeze_processes();
+ return error ? : freeze_kernel_threads();
}
static inline void suspend_thaw_processes(void)
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 0cf3a27a6c9d..addbbe5531bc 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -135,7 +135,7 @@ static int try_to_freeze_tasks(bool sig_only)
}
/**
- * freeze_processes - tell processes to enter the refrigerator
+ * freeze_processes - Signal user space processes to enter the refrigerator.
*/
int freeze_processes(void)
{
@@ -143,20 +143,30 @@ int freeze_processes(void)
printk("Freezing user space processes ... ");
error = try_to_freeze_tasks(true);
- if (error)
- goto Exit;
- printk("done.\n");
+ if (!error) {
+ printk("done.");
+ oom_killer_disable();
+ }
+ printk("\n");
+ BUG_ON(in_atomic());
+
+ return error;
+}
+
+/**
+ * freeze_kernel_threads - Make freezable kernel threads go to the refrigerator.
+ */
+int freeze_kernel_threads(void)
+{
+ int error;
printk("Freezing remaining freezable tasks ... ");
error = try_to_freeze_tasks(false);
- if (error)
- goto Exit;
- printk("done.");
+ if (!error)
+ printk("done.");
- oom_killer_disable();
- Exit:
- BUG_ON(in_atomic());
printk("\n");
+ BUG_ON(in_atomic());
return error;
}
diff --git a/kernel/pm_qos_params.c b/kernel/power/qos.c
index 37f05d0f0793..1c1797dd1d1d 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/power/qos.c
@@ -29,7 +29,7 @@
/*#define DEBUG*/
-#include <linux/pm_qos_params.h>
+#include <linux/pm_qos.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
@@ -45,62 +45,57 @@
#include <linux/uaccess.h>
/*
- * locking rule: all changes to requests or notifiers lists
+ * locking rule: all changes to constraints or notifiers lists
* or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock
* held, taken with _irqsave. One lock to rule them all
*/
-enum pm_qos_type {
- PM_QOS_MAX, /* return the largest value */
- PM_QOS_MIN /* return the smallest value */
-};
-
-/*
- * Note: The lockless read path depends on the CPU accessing
- * target_value atomically. Atomic access is only guaranteed on all CPU
- * types linux supports for 32 bit quantites
- */
struct pm_qos_object {
- struct plist_head requests;
- struct blocking_notifier_head *notifiers;
+ struct pm_qos_constraints *constraints;
struct miscdevice pm_qos_power_miscdev;
char *name;
- s32 target_value; /* Do not change to 64 bit */
- s32 default_value;
- enum pm_qos_type type;
};
static DEFINE_SPINLOCK(pm_qos_lock);
static struct pm_qos_object null_pm_qos;
+
static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier);
-static struct pm_qos_object cpu_dma_pm_qos = {
- .requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests),
- .notifiers = &cpu_dma_lat_notifier,
- .name = "cpu_dma_latency",
+static struct pm_qos_constraints cpu_dma_constraints = {
+ .list = PLIST_HEAD_INIT(cpu_dma_constraints.list),
.target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE,
.default_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE,
.type = PM_QOS_MIN,
+ .notifiers = &cpu_dma_lat_notifier,
+};
+static struct pm_qos_object cpu_dma_pm_qos = {
+ .constraints = &cpu_dma_constraints,
};
static BLOCKING_NOTIFIER_HEAD(network_lat_notifier);
-static struct pm_qos_object network_lat_pm_qos = {
- .requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests),
- .notifiers = &network_lat_notifier,
- .name = "network_latency",
+static struct pm_qos_constraints network_lat_constraints = {
+ .list = PLIST_HEAD_INIT(network_lat_constraints.list),
.target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE,
.default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE,
- .type = PM_QOS_MIN
+ .type = PM_QOS_MIN,
+ .notifiers = &network_lat_notifier,
+};
+static struct pm_qos_object network_lat_pm_qos = {
+ .constraints = &network_lat_constraints,
+ .name = "network_latency",
};
static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier);
-static struct pm_qos_object network_throughput_pm_qos = {
- .requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests),
- .notifiers = &network_throughput_notifier,
- .name = "network_throughput",
+static struct pm_qos_constraints network_tput_constraints = {
+ .list = PLIST_HEAD_INIT(network_tput_constraints.list),
.target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE,
.default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE,
.type = PM_QOS_MAX,
+ .notifiers = &network_throughput_notifier,
+};
+static struct pm_qos_object network_throughput_pm_qos = {
+ .constraints = &network_tput_constraints,
+ .name = "network_throughput",
};
@@ -127,17 +122,17 @@ static const struct file_operations pm_qos_power_fops = {
};
/* unlocked internal variant */
-static inline int pm_qos_get_value(struct pm_qos_object *o)
+static inline int pm_qos_get_value(struct pm_qos_constraints *c)
{
- if (plist_head_empty(&o->requests))
- return o->default_value;
+ if (plist_head_empty(&c->list))
+ return c->default_value;
- switch (o->type) {
+ switch (c->type) {
case PM_QOS_MIN:
- return plist_first(&o->requests)->prio;
+ return plist_first(&c->list)->prio;
case PM_QOS_MAX:
- return plist_last(&o->requests)->prio;
+ return plist_last(&c->list)->prio;
default:
/* runtime check for not using enum */
@@ -145,69 +140,73 @@ static inline int pm_qos_get_value(struct pm_qos_object *o)
}
}
-static inline s32 pm_qos_read_value(struct pm_qos_object *o)
+s32 pm_qos_read_value(struct pm_qos_constraints *c)
{
- return o->target_value;
+ return c->target_value;
}
-static inline void pm_qos_set_value(struct pm_qos_object *o, s32 value)
+static inline void pm_qos_set_value(struct pm_qos_constraints *c, s32 value)
{
- o->target_value = value;
+ c->target_value = value;
}
-static void update_target(struct pm_qos_object *o, struct plist_node *node,
- int del, int value)
+/**
+ * pm_qos_update_target - manages the constraints list and calls the notifiers
+ * if needed
+ * @c: constraints data struct
+ * @node: request to add to the list, to update or to remove
+ * @action: action to take on the constraints list
+ * @value: value of the request to add or update
+ *
+ * This function returns 1 if the aggregated constraint value has changed, 0
+ * otherwise.
+ */
+int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node,
+ enum pm_qos_req_action action, int value)
{
unsigned long flags;
- int prev_value, curr_value;
+ int prev_value, curr_value, new_value;
spin_lock_irqsave(&pm_qos_lock, flags);
- prev_value = pm_qos_get_value(o);
- /* PM_QOS_DEFAULT_VALUE is a signal that the value is unchanged */
- if (value != PM_QOS_DEFAULT_VALUE) {
+ prev_value = pm_qos_get_value(c);
+ if (value == PM_QOS_DEFAULT_VALUE)
+ new_value = c->default_value;
+ else
+ new_value = value;
+
+ switch (action) {
+ case PM_QOS_REMOVE_REQ:
+ plist_del(node, &c->list);
+ break;
+ case PM_QOS_UPDATE_REQ:
/*
* to change the list, we atomically remove, reinit
* with new value and add, then see if the extremal
* changed
*/
- plist_del(node, &o->requests);
- plist_node_init(node, value);
- plist_add(node, &o->requests);
- } else if (del) {
- plist_del(node, &o->requests);
- } else {
- plist_add(node, &o->requests);
+ plist_del(node, &c->list);
+ case PM_QOS_ADD_REQ:
+ plist_node_init(node, new_value);
+ plist_add(node, &c->list);
+ break;
+ default:
+ /* no action */
+ ;
}
- curr_value = pm_qos_get_value(o);
- pm_qos_set_value(o, curr_value);
+
+ curr_value = pm_qos_get_value(c);
+ pm_qos_set_value(c, curr_value);
+
spin_unlock_irqrestore(&pm_qos_lock, flags);
- if (prev_value != curr_value)
- blocking_notifier_call_chain(o->notifiers,
+ if (prev_value != curr_value) {
+ blocking_notifier_call_chain(c->notifiers,
(unsigned long)curr_value,
NULL);
-}
-
-static int register_pm_qos_misc(struct pm_qos_object *qos)
-{
- qos->pm_qos_power_miscdev.minor = MISC_DYNAMIC_MINOR;
- qos->pm_qos_power_miscdev.name = qos->name;
- qos->pm_qos_power_miscdev.fops = &pm_qos_power_fops;
-
- return misc_register(&qos->pm_qos_power_miscdev);
-}
-
-static int find_pm_qos_object_by_minor(int minor)
-{
- int pm_qos_class;
-
- for (pm_qos_class = 0;
- pm_qos_class < PM_QOS_NUM_CLASSES; pm_qos_class++) {
- if (minor ==
- pm_qos_array[pm_qos_class]->pm_qos_power_miscdev.minor)
- return pm_qos_class;
+ return 1;
+ } else {
+ return 0;
}
- return -1;
}
/**
@@ -218,11 +217,11 @@ static int find_pm_qos_object_by_minor(int minor)
*/
int pm_qos_request(int pm_qos_class)
{
- return pm_qos_read_value(pm_qos_array[pm_qos_class]);
+ return pm_qos_read_value(pm_qos_array[pm_qos_class]->constraints);
}
EXPORT_SYMBOL_GPL(pm_qos_request);
-int pm_qos_request_active(struct pm_qos_request_list *req)
+int pm_qos_request_active(struct pm_qos_request *req)
{
return req->pm_qos_class != 0;
}
@@ -230,40 +229,36 @@ EXPORT_SYMBOL_GPL(pm_qos_request_active);
/**
* pm_qos_add_request - inserts new qos request into the list
- * @dep: pointer to a preallocated handle
+ * @req: pointer to a preallocated handle
* @pm_qos_class: identifies which list of qos request to use
* @value: defines the qos request
*
* This function inserts a new entry in the pm_qos_class list of requested qos
* performance characteristics. It recomputes the aggregate QoS expectations
- * for the pm_qos_class of parameters and initializes the pm_qos_request_list
+ * for the pm_qos_class of parameters and initializes the pm_qos_request
* handle. Caller needs to save this handle for later use in updates and
* removal.
*/
-void pm_qos_add_request(struct pm_qos_request_list *dep,
+void pm_qos_add_request(struct pm_qos_request *req,
int pm_qos_class, s32 value)
{
- struct pm_qos_object *o = pm_qos_array[pm_qos_class];
- int new_value;
+ if (!req) /*guard against callers passing in null */
+ return;
- if (pm_qos_request_active(dep)) {
+ if (pm_qos_request_active(req)) {
WARN(1, KERN_ERR "pm_qos_add_request() called for already added request\n");
return;
}
- if (value == PM_QOS_DEFAULT_VALUE)
- new_value = o->default_value;
- else
- new_value = value;
- plist_node_init(&dep->list, new_value);
- dep->pm_qos_class = pm_qos_class;
- update_target(o, &dep->list, 0, PM_QOS_DEFAULT_VALUE);
+ req->pm_qos_class = pm_qos_class;
+ pm_qos_update_target(pm_qos_array[pm_qos_class]->constraints,
+ &req->node, PM_QOS_ADD_REQ, value);
}
EXPORT_SYMBOL_GPL(pm_qos_add_request);
/**
* pm_qos_update_request - modifies an existing qos request
- * @pm_qos_req : handle to list element holding a pm_qos request to use
+ * @req : handle to list element holding a pm_qos request to use
* @value: defines the qos request
*
* Updates an existing qos request for the pm_qos_class of parameters along
@@ -271,56 +266,47 @@ EXPORT_SYMBOL_GPL(pm_qos_add_request);
*
* Attempts are made to make this code callable on hot code paths.
*/
-void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req,
+void pm_qos_update_request(struct pm_qos_request *req,
s32 new_value)
{
- s32 temp;
- struct pm_qos_object *o;
-
- if (!pm_qos_req) /*guard against callers passing in null */
+ if (!req) /*guard against callers passing in null */
return;
- if (!pm_qos_request_active(pm_qos_req)) {
+ if (!pm_qos_request_active(req)) {
WARN(1, KERN_ERR "pm_qos_update_request() called for unknown object\n");
return;
}
- o = pm_qos_array[pm_qos_req->pm_qos_class];
-
- if (new_value == PM_QOS_DEFAULT_VALUE)
- temp = o->default_value;
- else
- temp = new_value;
-
- if (temp != pm_qos_req->list.prio)
- update_target(o, &pm_qos_req->list, 0, temp);
+ if (new_value != req->node.prio)
+ pm_qos_update_target(
+ pm_qos_array[req->pm_qos_class]->constraints,
+ &req->node, PM_QOS_UPDATE_REQ, new_value);
}
EXPORT_SYMBOL_GPL(pm_qos_update_request);
/**
* pm_qos_remove_request - modifies an existing qos request
- * @pm_qos_req: handle to request list element
+ * @req: handle to request list element
*
- * Will remove pm qos request from the list of requests and
+ * Will remove pm qos request from the list of constraints and
* recompute the current target value for the pm_qos_class. Call this
* on slow code paths.
*/
-void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req)
+void pm_qos_remove_request(struct pm_qos_request *req)
{
- struct pm_qos_object *o;
-
- if (pm_qos_req == NULL)
+ if (!req) /*guard against callers passing in null */
return;
/* silent return to keep pcm code cleaner */
- if (!pm_qos_request_active(pm_qos_req)) {
+ if (!pm_qos_request_active(req)) {
WARN(1, KERN_ERR "pm_qos_remove_request() called for unknown object\n");
return;
}
- o = pm_qos_array[pm_qos_req->pm_qos_class];
- update_target(o, &pm_qos_req->list, 1, PM_QOS_DEFAULT_VALUE);
- memset(pm_qos_req, 0, sizeof(*pm_qos_req));
+ pm_qos_update_target(pm_qos_array[req->pm_qos_class]->constraints,
+ &req->node, PM_QOS_REMOVE_REQ,
+ PM_QOS_DEFAULT_VALUE);
+ memset(req, 0, sizeof(*req));
}
EXPORT_SYMBOL_GPL(pm_qos_remove_request);
@@ -337,7 +323,8 @@ int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier)
int retval;
retval = blocking_notifier_chain_register(
- pm_qos_array[pm_qos_class]->notifiers, notifier);
+ pm_qos_array[pm_qos_class]->constraints->notifiers,
+ notifier);
return retval;
}
@@ -356,19 +343,43 @@ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier)
int retval;
retval = blocking_notifier_chain_unregister(
- pm_qos_array[pm_qos_class]->notifiers, notifier);
+ pm_qos_array[pm_qos_class]->constraints->notifiers,
+ notifier);
return retval;
}
EXPORT_SYMBOL_GPL(pm_qos_remove_notifier);
+/* User space interface to PM QoS classes via misc devices */
+static int register_pm_qos_misc(struct pm_qos_object *qos)
+{
+ qos->pm_qos_power_miscdev.minor = MISC_DYNAMIC_MINOR;
+ qos->pm_qos_power_miscdev.name = qos->name;
+ qos->pm_qos_power_miscdev.fops = &pm_qos_power_fops;
+
+ return misc_register(&qos->pm_qos_power_miscdev);
+}
+
+static int find_pm_qos_object_by_minor(int minor)
+{
+ int pm_qos_class;
+
+ for (pm_qos_class = 0;
+ pm_qos_class < PM_QOS_NUM_CLASSES; pm_qos_class++) {
+ if (minor ==
+ pm_qos_array[pm_qos_class]->pm_qos_power_miscdev.minor)
+ return pm_qos_class;
+ }
+ return -1;
+}
+
static int pm_qos_power_open(struct inode *inode, struct file *filp)
{
long pm_qos_class;
pm_qos_class = find_pm_qos_object_by_minor(iminor(inode));
if (pm_qos_class >= 0) {
- struct pm_qos_request_list *req = kzalloc(sizeof(*req), GFP_KERNEL);
+ struct pm_qos_request *req = kzalloc(sizeof(*req), GFP_KERNEL);
if (!req)
return -ENOMEM;
@@ -383,7 +394,7 @@ static int pm_qos_power_open(struct inode *inode, struct file *filp)
static int pm_qos_power_release(struct inode *inode, struct file *filp)
{
- struct pm_qos_request_list *req;
+ struct pm_qos_request *req;
req = filp->private_data;
pm_qos_remove_request(req);
@@ -398,17 +409,15 @@ static ssize_t pm_qos_power_read(struct file *filp, char __user *buf,
{
s32 value;
unsigned long flags;
- struct pm_qos_object *o;
- struct pm_qos_request_list *pm_qos_req = filp->private_data;
+ struct pm_qos_request *req = filp->private_data;
- if (!pm_qos_req)
+ if (!req)
return -EINVAL;
- if (!pm_qos_request_active(pm_qos_req))
+ if (!pm_qos_request_active(req))
return -EINVAL;
- o = pm_qos_array[pm_qos_req->pm_qos_class];
spin_lock_irqsave(&pm_qos_lock, flags);
- value = pm_qos_get_value(o);
+ value = pm_qos_get_value(pm_qos_array[req->pm_qos_class]->constraints);
spin_unlock_irqrestore(&pm_qos_lock, flags);
return simple_read_from_buffer(buf, count, f_pos, &value, sizeof(s32));
@@ -418,7 +427,7 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
size_t count, loff_t *f_pos)
{
s32 value;
- struct pm_qos_request_list *pm_qos_req;
+ struct pm_qos_request *req;
if (count == sizeof(s32)) {
if (copy_from_user(&value, buf, sizeof(s32)))
@@ -449,8 +458,8 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
return -EINVAL;
}
- pm_qos_req = filp->private_data;
- pm_qos_update_request(pm_qos_req, value);
+ req = filp->private_data;
+ pm_qos_update_request(req, value);
return count;
}
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 06efa54f93d6..cbe2c1441392 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1339,6 +1339,9 @@ int hibernate_preallocate_memory(void)
count += highmem;
count -= totalreserve_pages;
+ /* Add number of pages required for page keys (s390 only). */
+ size += page_key_additional_pages(saveable);
+
/* Compute the maximum number of saveable pages to leave in memory. */
max_size = (count - (size + PAGES_FOR_IO)) / 2
- 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE);
@@ -1662,6 +1665,8 @@ pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
buf[j] = memory_bm_next_pfn(bm);
if (unlikely(buf[j] == BM_END_OF_MAP))
break;
+ /* Save page key for data page (s390 only). */
+ page_key_read(buf + j);
}
}
@@ -1821,6 +1826,9 @@ static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
if (unlikely(buf[j] == BM_END_OF_MAP))
break;
+ /* Extract and buffer page key for data page (s390 only). */
+ page_key_memorize(buf + j);
+
if (memory_bm_pfn_present(bm, buf[j]))
memory_bm_set_bit(bm, buf[j]);
else
@@ -2223,6 +2231,11 @@ int snapshot_write_next(struct snapshot_handle *handle)
if (error)
return error;
+ /* Allocate buffer for page keys. */
+ error = page_key_alloc(nr_copy_pages);
+ if (error)
+ return error;
+
} else if (handle->cur <= nr_meta_pages + 1) {
error = unpack_orig_pfns(buffer, &copy_bm);
if (error)
@@ -2243,6 +2256,8 @@ int snapshot_write_next(struct snapshot_handle *handle)
}
} else {
copy_last_highmem_page();
+ /* Restore page key for data page (s390 only). */
+ page_key_write(handle->buffer);
handle->buffer = get_buffer(&orig_bm, &ca);
if (IS_ERR(handle->buffer))
return PTR_ERR(handle->buffer);
@@ -2264,6 +2279,9 @@ int snapshot_write_next(struct snapshot_handle *handle)
void snapshot_write_finalize(struct snapshot_handle *handle)
{
copy_last_highmem_page();
+ /* Restore page key for data page (s390 only). */
+ page_key_write(handle->buffer);
+ page_key_free();
/* Free only if we have loaded the image entirely */
if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index b6b71ad2208f..fdd4263b995d 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -104,7 +104,10 @@ static int suspend_prepare(void)
goto Finish;
error = suspend_freeze_processes();
- if (!error)
+ if (error) {
+ suspend_stats.failed_freeze++;
+ dpm_save_failed_step(SUSPEND_FREEZE);
+ } else
return 0;
suspend_thaw_processes();
@@ -315,8 +318,16 @@ int enter_state(suspend_state_t state)
*/
int pm_suspend(suspend_state_t state)
{
- if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX)
- return enter_state(state);
+ int ret;
+ if (state > PM_SUSPEND_ON && state < PM_SUSPEND_MAX) {
+ ret = enter_state(state);
+ if (ret) {
+ suspend_stats.fail++;
+ dpm_save_failed_errno(ret);
+ } else
+ suspend_stats.success++;
+ return ret;
+ }
return -EINVAL;
}
EXPORT_SYMBOL(pm_suspend);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 7c97c3a0eee3..11a594c4ba25 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -27,6 +27,10 @@
#include <linux/slab.h>
#include <linux/lzo.h>
#include <linux/vmalloc.h>
+#include <linux/cpumask.h>
+#include <linux/atomic.h>
+#include <linux/kthread.h>
+#include <linux/crc32.h>
#include "power.h"
@@ -43,8 +47,7 @@
* allocated and populated one at a time, so we only need one memory
* page to set up the entire structure.
*
- * During resume we also only need to use one swap_map_page structure
- * at a time.
+ * During resume we pick up all swap_map_page structures into a list.
*/
#define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1)
@@ -54,6 +57,11 @@ struct swap_map_page {
sector_t next_swap;
};
+struct swap_map_page_list {
+ struct swap_map_page *map;
+ struct swap_map_page_list *next;
+};
+
/**
* The swap_map_handle structure is used for handling swap in
* a file-alike way
@@ -61,13 +69,18 @@ struct swap_map_page {
struct swap_map_handle {
struct swap_map_page *cur;
+ struct swap_map_page_list *maps;
sector_t cur_swap;
sector_t first_sector;
unsigned int k;
+ unsigned long nr_free_pages, written;
+ u32 crc32;
};
struct swsusp_header {
- char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int)];
+ char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int) -
+ sizeof(u32)];
+ u32 crc32;
sector_t image;
unsigned int flags; /* Flags to pass to the "boot" kernel */
char orig_sig[10];
@@ -199,6 +212,8 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
memcpy(swsusp_header->sig, HIBERNATE_SIG, 10);
swsusp_header->image = handle->first_sector;
swsusp_header->flags = flags;
+ if (flags & SF_CRC32_MODE)
+ swsusp_header->crc32 = handle->crc32;
error = hib_bio_write_page(swsusp_resume_block,
swsusp_header, NULL);
} else {
@@ -245,6 +260,7 @@ static int swsusp_swap_check(void)
static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
{
void *src;
+ int ret;
if (!offset)
return -ENOSPC;
@@ -254,9 +270,17 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
if (src) {
copy_page(src, buf);
} else {
- WARN_ON_ONCE(1);
- bio_chain = NULL; /* Go synchronous */
- src = buf;
+ ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */
+ if (ret)
+ return ret;
+ src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
+ if (src) {
+ copy_page(src, buf);
+ } else {
+ WARN_ON_ONCE(1);
+ bio_chain = NULL; /* Go synchronous */
+ src = buf;
+ }
}
} else {
src = buf;
@@ -293,6 +317,8 @@ static int get_swap_writer(struct swap_map_handle *handle)
goto err_rel;
}
handle->k = 0;
+ handle->nr_free_pages = nr_free_pages() >> 1;
+ handle->written = 0;
handle->first_sector = handle->cur_swap;
return 0;
err_rel:
@@ -316,20 +342,23 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
return error;
handle->cur->entries[handle->k++] = offset;
if (handle->k >= MAP_PAGE_ENTRIES) {
- error = hib_wait_on_bio_chain(bio_chain);
- if (error)
- goto out;
offset = alloc_swapdev_block(root_swap);
if (!offset)
return -ENOSPC;
handle->cur->next_swap = offset;
- error = write_page(handle->cur, handle->cur_swap, NULL);
+ error = write_page(handle->cur, handle->cur_swap, bio_chain);
if (error)
goto out;
clear_page(handle->cur);
handle->cur_swap = offset;
handle->k = 0;
}
+ if (bio_chain && ++handle->written > handle->nr_free_pages) {
+ error = hib_wait_on_bio_chain(bio_chain);
+ if (error)
+ goto out;
+ handle->written = 0;
+ }
out:
return error;
}
@@ -372,6 +401,13 @@ static int swap_writer_finish(struct swap_map_handle *handle,
LZO_HEADER, PAGE_SIZE)
#define LZO_CMP_SIZE (LZO_CMP_PAGES * PAGE_SIZE)
+/* Maximum number of threads for compression/decompression. */
+#define LZO_THREADS 3
+
+/* Maximum number of pages for read buffering. */
+#define LZO_READ_PAGES (MAP_PAGE_ENTRIES * 8)
+
+
/**
* save_image - save the suspend image data
*/
@@ -419,6 +455,92 @@ static int save_image(struct swap_map_handle *handle,
return ret;
}
+/**
+ * Structure used for CRC32.
+ */
+struct crc_data {
+ struct task_struct *thr; /* thread */
+ atomic_t ready; /* ready to start flag */
+ atomic_t stop; /* ready to stop flag */
+ unsigned run_threads; /* nr current threads */
+ wait_queue_head_t go; /* start crc update */
+ wait_queue_head_t done; /* crc update done */
+ u32 *crc32; /* points to handle's crc32 */
+ size_t *unc_len[LZO_THREADS]; /* uncompressed lengths */
+ unsigned char *unc[LZO_THREADS]; /* uncompressed data */
+};
+
+/**
+ * CRC32 update function that runs in its own thread.
+ */
+static int crc32_threadfn(void *data)
+{
+ struct crc_data *d = data;
+ unsigned i;
+
+ while (1) {
+ wait_event(d->go, atomic_read(&d->ready) ||
+ kthread_should_stop());
+ if (kthread_should_stop()) {
+ d->thr = NULL;
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ break;
+ }
+ atomic_set(&d->ready, 0);
+
+ for (i = 0; i < d->run_threads; i++)
+ *d->crc32 = crc32_le(*d->crc32,
+ d->unc[i], *d->unc_len[i]);
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ }
+ return 0;
+}
+/**
+ * Structure used for LZO data compression.
+ */
+struct cmp_data {
+ struct task_struct *thr; /* thread */
+ atomic_t ready; /* ready to start flag */
+ atomic_t stop; /* ready to stop flag */
+ int ret; /* return code */
+ wait_queue_head_t go; /* start compression */
+ wait_queue_head_t done; /* compression done */
+ size_t unc_len; /* uncompressed length */
+ size_t cmp_len; /* compressed length */
+ unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */
+ unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */
+ unsigned char wrk[LZO1X_1_MEM_COMPRESS]; /* compression workspace */
+};
+
+/**
+ * Compression function that runs in its own thread.
+ */
+static int lzo_compress_threadfn(void *data)
+{
+ struct cmp_data *d = data;
+
+ while (1) {
+ wait_event(d->go, atomic_read(&d->ready) ||
+ kthread_should_stop());
+ if (kthread_should_stop()) {
+ d->thr = NULL;
+ d->ret = -1;
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ break;
+ }
+ atomic_set(&d->ready, 0);
+
+ d->ret = lzo1x_1_compress(d->unc, d->unc_len,
+ d->cmp + LZO_HEADER, &d->cmp_len,
+ d->wrk);
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ }
+ return 0;
+}
/**
* save_image_lzo - Save the suspend image data compressed with LZO.
@@ -437,42 +559,93 @@ static int save_image_lzo(struct swap_map_handle *handle,
struct bio *bio;
struct timeval start;
struct timeval stop;
- size_t off, unc_len, cmp_len;
- unsigned char *unc, *cmp, *wrk, *page;
+ size_t off;
+ unsigned thr, run_threads, nr_threads;
+ unsigned char *page = NULL;
+ struct cmp_data *data = NULL;
+ struct crc_data *crc = NULL;
+
+ /*
+ * We'll limit the number of threads for compression to limit memory
+ * footprint.
+ */
+ nr_threads = num_online_cpus() - 1;
+ nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
if (!page) {
printk(KERN_ERR "PM: Failed to allocate LZO page\n");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out_clean;
}
- wrk = vmalloc(LZO1X_1_MEM_COMPRESS);
- if (!wrk) {
- printk(KERN_ERR "PM: Failed to allocate LZO workspace\n");
- free_page((unsigned long)page);
- return -ENOMEM;
+ data = vmalloc(sizeof(*data) * nr_threads);
+ if (!data) {
+ printk(KERN_ERR "PM: Failed to allocate LZO data\n");
+ ret = -ENOMEM;
+ goto out_clean;
}
+ for (thr = 0; thr < nr_threads; thr++)
+ memset(&data[thr], 0, offsetof(struct cmp_data, go));
- unc = vmalloc(LZO_UNC_SIZE);
- if (!unc) {
- printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
- vfree(wrk);
- free_page((unsigned long)page);
- return -ENOMEM;
+ crc = kmalloc(sizeof(*crc), GFP_KERNEL);
+ if (!crc) {
+ printk(KERN_ERR "PM: Failed to allocate crc\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ }
+ memset(crc, 0, offsetof(struct crc_data, go));
+
+ /*
+ * Start the compression threads.
+ */
+ for (thr = 0; thr < nr_threads; thr++) {
+ init_waitqueue_head(&data[thr].go);
+ init_waitqueue_head(&data[thr].done);
+
+ data[thr].thr = kthread_run(lzo_compress_threadfn,
+ &data[thr],
+ "image_compress/%u", thr);
+ if (IS_ERR(data[thr].thr)) {
+ data[thr].thr = NULL;
+ printk(KERN_ERR
+ "PM: Cannot start compression threads\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ }
}
- cmp = vmalloc(LZO_CMP_SIZE);
- if (!cmp) {
- printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
- vfree(unc);
- vfree(wrk);
- free_page((unsigned long)page);
- return -ENOMEM;
+ /*
+ * Adjust number of free pages after all allocations have been done.
+ * We don't want to run out of pages when writing.
+ */
+ handle->nr_free_pages = nr_free_pages() >> 1;
+
+ /*
+ * Start the CRC32 thread.
+ */
+ init_waitqueue_head(&crc->go);
+ init_waitqueue_head(&crc->done);
+
+ handle->crc32 = 0;
+ crc->crc32 = &handle->crc32;
+ for (thr = 0; thr < nr_threads; thr++) {
+ crc->unc[thr] = data[thr].unc;
+ crc->unc_len[thr] = &data[thr].unc_len;
+ }
+
+ crc->thr = kthread_run(crc32_threadfn, crc, "image_crc32");
+ if (IS_ERR(crc->thr)) {
+ crc->thr = NULL;
+ printk(KERN_ERR "PM: Cannot start CRC32 thread\n");
+ ret = -ENOMEM;
+ goto out_clean;
}
printk(KERN_INFO
+ "PM: Using %u thread(s) for compression.\n"
"PM: Compressing and saving image data (%u pages) ... ",
- nr_to_write);
+ nr_threads, nr_to_write);
m = nr_to_write / 100;
if (!m)
m = 1;
@@ -480,55 +653,83 @@ static int save_image_lzo(struct swap_map_handle *handle,
bio = NULL;
do_gettimeofday(&start);
for (;;) {
- for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
- ret = snapshot_read_next(snapshot);
- if (ret < 0)
- goto out_finish;
-
- if (!ret)
+ for (thr = 0; thr < nr_threads; thr++) {
+ for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
+ ret = snapshot_read_next(snapshot);
+ if (ret < 0)
+ goto out_finish;
+
+ if (!ret)
+ break;
+
+ memcpy(data[thr].unc + off,
+ data_of(*snapshot), PAGE_SIZE);
+
+ if (!(nr_pages % m))
+ printk(KERN_CONT "\b\b\b\b%3d%%",
+ nr_pages / m);
+ nr_pages++;
+ }
+ if (!off)
break;
- memcpy(unc + off, data_of(*snapshot), PAGE_SIZE);
+ data[thr].unc_len = off;
- if (!(nr_pages % m))
- printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m);
- nr_pages++;
+ atomic_set(&data[thr].ready, 1);
+ wake_up(&data[thr].go);
}
- if (!off)
+ if (!thr)
break;
- unc_len = off;
- ret = lzo1x_1_compress(unc, unc_len,
- cmp + LZO_HEADER, &cmp_len, wrk);
- if (ret < 0) {
- printk(KERN_ERR "PM: LZO compression failed\n");
- break;
- }
+ crc->run_threads = thr;
+ atomic_set(&crc->ready, 1);
+ wake_up(&crc->go);
- if (unlikely(!cmp_len ||
- cmp_len > lzo1x_worst_compress(unc_len))) {
- printk(KERN_ERR "PM: Invalid LZO compressed length\n");
- ret = -1;
- break;
- }
+ for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
+ wait_event(data[thr].done,
+ atomic_read(&data[thr].stop));
+ atomic_set(&data[thr].stop, 0);
- *(size_t *)cmp = cmp_len;
+ ret = data[thr].ret;
- /*
- * Given we are writing one page at a time to disk, we copy
- * that much from the buffer, although the last bit will likely
- * be smaller than full page. This is OK - we saved the length
- * of the compressed data, so any garbage at the end will be
- * discarded when we read it.
- */
- for (off = 0; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) {
- memcpy(page, cmp + off, PAGE_SIZE);
+ if (ret < 0) {
+ printk(KERN_ERR "PM: LZO compression failed\n");
+ goto out_finish;
+ }
- ret = swap_write_page(handle, page, &bio);
- if (ret)
+ if (unlikely(!data[thr].cmp_len ||
+ data[thr].cmp_len >
+ lzo1x_worst_compress(data[thr].unc_len))) {
+ printk(KERN_ERR
+ "PM: Invalid LZO compressed length\n");
+ ret = -1;
goto out_finish;
+ }
+
+ *(size_t *)data[thr].cmp = data[thr].cmp_len;
+
+ /*
+ * Given we are writing one page at a time to disk, we
+ * copy that much from the buffer, although the last
+ * bit will likely be smaller than full page. This is
+ * OK - we saved the length of the compressed data, so
+ * any garbage at the end will be discarded when we
+ * read it.
+ */
+ for (off = 0;
+ off < LZO_HEADER + data[thr].cmp_len;
+ off += PAGE_SIZE) {
+ memcpy(page, data[thr].cmp + off, PAGE_SIZE);
+
+ ret = swap_write_page(handle, page, &bio);
+ if (ret)
+ goto out_finish;
+ }
}
+
+ wait_event(crc->done, atomic_read(&crc->stop));
+ atomic_set(&crc->stop, 0);
}
out_finish:
@@ -536,16 +737,25 @@ out_finish:
do_gettimeofday(&stop);
if (!ret)
ret = err2;
- if (!ret)
+ if (!ret) {
printk(KERN_CONT "\b\b\b\bdone\n");
- else
+ } else {
printk(KERN_CONT "\n");
+ }
swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
-
- vfree(cmp);
- vfree(unc);
- vfree(wrk);
- free_page((unsigned long)page);
+out_clean:
+ if (crc) {
+ if (crc->thr)
+ kthread_stop(crc->thr);
+ kfree(crc);
+ }
+ if (data) {
+ for (thr = 0; thr < nr_threads; thr++)
+ if (data[thr].thr)
+ kthread_stop(data[thr].thr);
+ vfree(data);
+ }
+ if (page) free_page((unsigned long)page);
return ret;
}
@@ -625,8 +835,15 @@ out_finish:
static void release_swap_reader(struct swap_map_handle *handle)
{
- if (handle->cur)
- free_page((unsigned long)handle->cur);
+ struct swap_map_page_list *tmp;
+
+ while (handle->maps) {
+ if (handle->maps->map)
+ free_page((unsigned long)handle->maps->map);
+ tmp = handle->maps;
+ handle->maps = handle->maps->next;
+ kfree(tmp);
+ }
handle->cur = NULL;
}
@@ -634,22 +851,46 @@ static int get_swap_reader(struct swap_map_handle *handle,
unsigned int *flags_p)
{
int error;
+ struct swap_map_page_list *tmp, *last;
+ sector_t offset;
*flags_p = swsusp_header->flags;
if (!swsusp_header->image) /* how can this happen? */
return -EINVAL;
- handle->cur = (struct swap_map_page *)get_zeroed_page(__GFP_WAIT | __GFP_HIGH);
- if (!handle->cur)
- return -ENOMEM;
+ handle->cur = NULL;
+ last = handle->maps = NULL;
+ offset = swsusp_header->image;
+ while (offset) {
+ tmp = kmalloc(sizeof(*handle->maps), GFP_KERNEL);
+ if (!tmp) {
+ release_swap_reader(handle);
+ return -ENOMEM;
+ }
+ memset(tmp, 0, sizeof(*tmp));
+ if (!handle->maps)
+ handle->maps = tmp;
+ if (last)
+ last->next = tmp;
+ last = tmp;
+
+ tmp->map = (struct swap_map_page *)
+ __get_free_page(__GFP_WAIT | __GFP_HIGH);
+ if (!tmp->map) {
+ release_swap_reader(handle);
+ return -ENOMEM;
+ }
- error = hib_bio_read_page(swsusp_header->image, handle->cur, NULL);
- if (error) {
- release_swap_reader(handle);
- return error;
+ error = hib_bio_read_page(offset, tmp->map, NULL);
+ if (error) {
+ release_swap_reader(handle);
+ return error;
+ }
+ offset = tmp->map->next_swap;
}
handle->k = 0;
+ handle->cur = handle->maps->map;
return 0;
}
@@ -658,6 +899,7 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf,
{
sector_t offset;
int error;
+ struct swap_map_page_list *tmp;
if (!handle->cur)
return -EINVAL;
@@ -668,13 +910,15 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf,
if (error)
return error;
if (++handle->k >= MAP_PAGE_ENTRIES) {
- error = hib_wait_on_bio_chain(bio_chain);
handle->k = 0;
- offset = handle->cur->next_swap;
- if (!offset)
+ free_page((unsigned long)handle->maps->map);
+ tmp = handle->maps;
+ handle->maps = handle->maps->next;
+ kfree(tmp);
+ if (!handle->maps)
release_swap_reader(handle);
- else if (!error)
- error = hib_bio_read_page(offset, handle->cur, NULL);
+ else
+ handle->cur = handle->maps->map;
}
return error;
}
@@ -697,7 +941,7 @@ static int load_image(struct swap_map_handle *handle,
unsigned int nr_to_read)
{
unsigned int m;
- int error = 0;
+ int ret = 0;
struct timeval start;
struct timeval stop;
struct bio *bio;
@@ -713,15 +957,15 @@ static int load_image(struct swap_map_handle *handle,
bio = NULL;
do_gettimeofday(&start);
for ( ; ; ) {
- error = snapshot_write_next(snapshot);
- if (error <= 0)
+ ret = snapshot_write_next(snapshot);
+ if (ret <= 0)
break;
- error = swap_read_page(handle, data_of(*snapshot), &bio);
- if (error)
+ ret = swap_read_page(handle, data_of(*snapshot), &bio);
+ if (ret)
break;
if (snapshot->sync_read)
- error = hib_wait_on_bio_chain(&bio);
- if (error)
+ ret = hib_wait_on_bio_chain(&bio);
+ if (ret)
break;
if (!(nr_pages % m))
printk("\b\b\b\b%3d%%", nr_pages / m);
@@ -729,17 +973,61 @@ static int load_image(struct swap_map_handle *handle,
}
err2 = hib_wait_on_bio_chain(&bio);
do_gettimeofday(&stop);
- if (!error)
- error = err2;
- if (!error) {
+ if (!ret)
+ ret = err2;
+ if (!ret) {
printk("\b\b\b\bdone\n");
snapshot_write_finalize(snapshot);
if (!snapshot_image_loaded(snapshot))
- error = -ENODATA;
+ ret = -ENODATA;
} else
printk("\n");
swsusp_show_speed(&start, &stop, nr_to_read, "Read");
- return error;
+ return ret;
+}
+
+/**
+ * Structure used for LZO data decompression.
+ */
+struct dec_data {
+ struct task_struct *thr; /* thread */
+ atomic_t ready; /* ready to start flag */
+ atomic_t stop; /* ready to stop flag */
+ int ret; /* return code */
+ wait_queue_head_t go; /* start decompression */
+ wait_queue_head_t done; /* decompression done */
+ size_t unc_len; /* uncompressed length */
+ size_t cmp_len; /* compressed length */
+ unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */
+ unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */
+};
+
+/**
+ * Deompression function that runs in its own thread.
+ */
+static int lzo_decompress_threadfn(void *data)
+{
+ struct dec_data *d = data;
+
+ while (1) {
+ wait_event(d->go, atomic_read(&d->ready) ||
+ kthread_should_stop());
+ if (kthread_should_stop()) {
+ d->thr = NULL;
+ d->ret = -1;
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ break;
+ }
+ atomic_set(&d->ready, 0);
+
+ d->unc_len = LZO_UNC_SIZE;
+ d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len,
+ d->unc, &d->unc_len);
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ }
+ return 0;
}
/**
@@ -753,50 +1041,120 @@ static int load_image_lzo(struct swap_map_handle *handle,
unsigned int nr_to_read)
{
unsigned int m;
- int error = 0;
+ int ret = 0;
+ int eof = 0;
struct bio *bio;
struct timeval start;
struct timeval stop;
unsigned nr_pages;
- size_t i, off, unc_len, cmp_len;
- unsigned char *unc, *cmp, *page[LZO_CMP_PAGES];
-
- for (i = 0; i < LZO_CMP_PAGES; i++) {
- page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
- if (!page[i]) {
- printk(KERN_ERR "PM: Failed to allocate LZO page\n");
+ size_t off;
+ unsigned i, thr, run_threads, nr_threads;
+ unsigned ring = 0, pg = 0, ring_size = 0,
+ have = 0, want, need, asked = 0;
+ unsigned long read_pages;
+ unsigned char **page = NULL;
+ struct dec_data *data = NULL;
+ struct crc_data *crc = NULL;
+
+ /*
+ * We'll limit the number of threads for decompression to limit memory
+ * footprint.
+ */
+ nr_threads = num_online_cpus() - 1;
+ nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
+
+ page = vmalloc(sizeof(*page) * LZO_READ_PAGES);
+ if (!page) {
+ printk(KERN_ERR "PM: Failed to allocate LZO page\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ }
- while (i)
- free_page((unsigned long)page[--i]);
+ data = vmalloc(sizeof(*data) * nr_threads);
+ if (!data) {
+ printk(KERN_ERR "PM: Failed to allocate LZO data\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ }
+ for (thr = 0; thr < nr_threads; thr++)
+ memset(&data[thr], 0, offsetof(struct dec_data, go));
- return -ENOMEM;
+ crc = kmalloc(sizeof(*crc), GFP_KERNEL);
+ if (!crc) {
+ printk(KERN_ERR "PM: Failed to allocate crc\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ }
+ memset(crc, 0, offsetof(struct crc_data, go));
+
+ /*
+ * Start the decompression threads.
+ */
+ for (thr = 0; thr < nr_threads; thr++) {
+ init_waitqueue_head(&data[thr].go);
+ init_waitqueue_head(&data[thr].done);
+
+ data[thr].thr = kthread_run(lzo_decompress_threadfn,
+ &data[thr],
+ "image_decompress/%u", thr);
+ if (IS_ERR(data[thr].thr)) {
+ data[thr].thr = NULL;
+ printk(KERN_ERR
+ "PM: Cannot start decompression threads\n");
+ ret = -ENOMEM;
+ goto out_clean;
}
}
- unc = vmalloc(LZO_UNC_SIZE);
- if (!unc) {
- printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
-
- for (i = 0; i < LZO_CMP_PAGES; i++)
- free_page((unsigned long)page[i]);
-
- return -ENOMEM;
+ /*
+ * Start the CRC32 thread.
+ */
+ init_waitqueue_head(&crc->go);
+ init_waitqueue_head(&crc->done);
+
+ handle->crc32 = 0;
+ crc->crc32 = &handle->crc32;
+ for (thr = 0; thr < nr_threads; thr++) {
+ crc->unc[thr] = data[thr].unc;
+ crc->unc_len[thr] = &data[thr].unc_len;
}
- cmp = vmalloc(LZO_CMP_SIZE);
- if (!cmp) {
- printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
+ crc->thr = kthread_run(crc32_threadfn, crc, "image_crc32");
+ if (IS_ERR(crc->thr)) {
+ crc->thr = NULL;
+ printk(KERN_ERR "PM: Cannot start CRC32 thread\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ }
- vfree(unc);
- for (i = 0; i < LZO_CMP_PAGES; i++)
- free_page((unsigned long)page[i]);
+ /*
+ * Adjust number of pages for read buffering, in case we are short.
+ */
+ read_pages = (nr_free_pages() - snapshot_get_image_size()) >> 1;
+ read_pages = clamp_val(read_pages, LZO_CMP_PAGES, LZO_READ_PAGES);
- return -ENOMEM;
+ for (i = 0; i < read_pages; i++) {
+ page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ?
+ __GFP_WAIT | __GFP_HIGH :
+ __GFP_WAIT);
+ if (!page[i]) {
+ if (i < LZO_CMP_PAGES) {
+ ring_size = i;
+ printk(KERN_ERR
+ "PM: Failed to allocate LZO pages\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ } else {
+ break;
+ }
+ }
}
+ want = ring_size = i;
printk(KERN_INFO
+ "PM: Using %u thread(s) for decompression.\n"
"PM: Loading and decompressing image data (%u pages) ... ",
- nr_to_read);
+ nr_threads, nr_to_read);
m = nr_to_read / 100;
if (!m)
m = 1;
@@ -804,85 +1162,189 @@ static int load_image_lzo(struct swap_map_handle *handle,
bio = NULL;
do_gettimeofday(&start);
- error = snapshot_write_next(snapshot);
- if (error <= 0)
+ ret = snapshot_write_next(snapshot);
+ if (ret <= 0)
goto out_finish;
- for (;;) {
- error = swap_read_page(handle, page[0], NULL); /* sync */
- if (error)
- break;
-
- cmp_len = *(size_t *)page[0];
- if (unlikely(!cmp_len ||
- cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) {
- printk(KERN_ERR "PM: Invalid LZO compressed length\n");
- error = -1;
- break;
+ for(;;) {
+ for (i = 0; !eof && i < want; i++) {
+ ret = swap_read_page(handle, page[ring], &bio);
+ if (ret) {
+ /*
+ * On real read error, finish. On end of data,
+ * set EOF flag and just exit the read loop.
+ */
+ if (handle->cur &&
+ handle->cur->entries[handle->k]) {
+ goto out_finish;
+ } else {
+ eof = 1;
+ break;
+ }
+ }
+ if (++ring >= ring_size)
+ ring = 0;
}
+ asked += i;
+ want -= i;
- for (off = PAGE_SIZE, i = 1;
- off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
- error = swap_read_page(handle, page[i], &bio);
- if (error)
+ /*
+ * We are out of data, wait for some more.
+ */
+ if (!have) {
+ if (!asked)
+ break;
+
+ ret = hib_wait_on_bio_chain(&bio);
+ if (ret)
goto out_finish;
+ have += asked;
+ asked = 0;
+ if (eof)
+ eof = 2;
}
- error = hib_wait_on_bio_chain(&bio); /* need all data now */
- if (error)
- goto out_finish;
-
- for (off = 0, i = 0;
- off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) {
- memcpy(cmp + off, page[i], PAGE_SIZE);
+ if (crc->run_threads) {
+ wait_event(crc->done, atomic_read(&crc->stop));
+ atomic_set(&crc->stop, 0);
+ crc->run_threads = 0;
}
- unc_len = LZO_UNC_SIZE;
- error = lzo1x_decompress_safe(cmp + LZO_HEADER, cmp_len,
- unc, &unc_len);
- if (error < 0) {
- printk(KERN_ERR "PM: LZO decompression failed\n");
- break;
+ for (thr = 0; have && thr < nr_threads; thr++) {
+ data[thr].cmp_len = *(size_t *)page[pg];
+ if (unlikely(!data[thr].cmp_len ||
+ data[thr].cmp_len >
+ lzo1x_worst_compress(LZO_UNC_SIZE))) {
+ printk(KERN_ERR
+ "PM: Invalid LZO compressed length\n");
+ ret = -1;
+ goto out_finish;
+ }
+
+ need = DIV_ROUND_UP(data[thr].cmp_len + LZO_HEADER,
+ PAGE_SIZE);
+ if (need > have) {
+ if (eof > 1) {
+ ret = -1;
+ goto out_finish;
+ }
+ break;
+ }
+
+ for (off = 0;
+ off < LZO_HEADER + data[thr].cmp_len;
+ off += PAGE_SIZE) {
+ memcpy(data[thr].cmp + off,
+ page[pg], PAGE_SIZE);
+ have--;
+ want++;
+ if (++pg >= ring_size)
+ pg = 0;
+ }
+
+ atomic_set(&data[thr].ready, 1);
+ wake_up(&data[thr].go);
}
- if (unlikely(!unc_len ||
- unc_len > LZO_UNC_SIZE ||
- unc_len & (PAGE_SIZE - 1))) {
- printk(KERN_ERR "PM: Invalid LZO uncompressed length\n");
- error = -1;
- break;
+ /*
+ * Wait for more data while we are decompressing.
+ */
+ if (have < LZO_CMP_PAGES && asked) {
+ ret = hib_wait_on_bio_chain(&bio);
+ if (ret)
+ goto out_finish;
+ have += asked;
+ asked = 0;
+ if (eof)
+ eof = 2;
}
- for (off = 0; off < unc_len; off += PAGE_SIZE) {
- memcpy(data_of(*snapshot), unc + off, PAGE_SIZE);
+ for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
+ wait_event(data[thr].done,
+ atomic_read(&data[thr].stop));
+ atomic_set(&data[thr].stop, 0);
+
+ ret = data[thr].ret;
- if (!(nr_pages % m))
- printk("\b\b\b\b%3d%%", nr_pages / m);
- nr_pages++;
+ if (ret < 0) {
+ printk(KERN_ERR
+ "PM: LZO decompression failed\n");
+ goto out_finish;
+ }
- error = snapshot_write_next(snapshot);
- if (error <= 0)
+ if (unlikely(!data[thr].unc_len ||
+ data[thr].unc_len > LZO_UNC_SIZE ||
+ data[thr].unc_len & (PAGE_SIZE - 1))) {
+ printk(KERN_ERR
+ "PM: Invalid LZO uncompressed length\n");
+ ret = -1;
goto out_finish;
+ }
+
+ for (off = 0;
+ off < data[thr].unc_len; off += PAGE_SIZE) {
+ memcpy(data_of(*snapshot),
+ data[thr].unc + off, PAGE_SIZE);
+
+ if (!(nr_pages % m))
+ printk("\b\b\b\b%3d%%", nr_pages / m);
+ nr_pages++;
+
+ ret = snapshot_write_next(snapshot);
+ if (ret <= 0) {
+ crc->run_threads = thr + 1;
+ atomic_set(&crc->ready, 1);
+ wake_up(&crc->go);
+ goto out_finish;
+ }
+ }
}
+
+ crc->run_threads = thr;
+ atomic_set(&crc->ready, 1);
+ wake_up(&crc->go);
}
out_finish:
+ if (crc->run_threads) {
+ wait_event(crc->done, atomic_read(&crc->stop));
+ atomic_set(&crc->stop, 0);
+ }
do_gettimeofday(&stop);
- if (!error) {
+ if (!ret) {
printk("\b\b\b\bdone\n");
snapshot_write_finalize(snapshot);
if (!snapshot_image_loaded(snapshot))
- error = -ENODATA;
+ ret = -ENODATA;
+ if (!ret) {
+ if (swsusp_header->flags & SF_CRC32_MODE) {
+ if(handle->crc32 != swsusp_header->crc32) {
+ printk(KERN_ERR
+ "PM: Invalid image CRC32!\n");
+ ret = -ENODATA;
+ }
+ }
+ }
} else
printk("\n");
swsusp_show_speed(&start, &stop, nr_to_read, "Read");
-
- vfree(cmp);
- vfree(unc);
- for (i = 0; i < LZO_CMP_PAGES; i++)
+out_clean:
+ for (i = 0; i < ring_size; i++)
free_page((unsigned long)page[i]);
+ if (crc) {
+ if (crc->thr)
+ kthread_stop(crc->thr);
+ kfree(crc);
+ }
+ if (data) {
+ for (thr = 0; thr < nr_threads; thr++)
+ if (data[thr].thr)
+ kthread_stop(data[thr].thr);
+ vfree(data);
+ }
+ if (page) vfree(page);
- return error;
+ return ret;
}
/**
diff --git a/kernel/printk.c b/kernel/printk.c
index 37dff3429adb..b7da18391c38 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -100,7 +100,7 @@ static int console_locked, console_suspended;
* It is also used in interesting ways to provide interlocking in
* console_unlock();.
*/
-static DEFINE_SPINLOCK(logbuf_lock);
+static DEFINE_RAW_SPINLOCK(logbuf_lock);
#define LOG_BUF_MASK (log_buf_len-1)
#define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK])
@@ -212,7 +212,7 @@ void __init setup_log_buf(int early)
return;
}
- spin_lock_irqsave(&logbuf_lock, flags);
+ raw_spin_lock_irqsave(&logbuf_lock, flags);
log_buf_len = new_log_buf_len;
log_buf = new_log_buf;
new_log_buf_len = 0;
@@ -230,7 +230,7 @@ void __init setup_log_buf(int early)
log_start -= offset;
con_start -= offset;
log_end -= offset;
- spin_unlock_irqrestore(&logbuf_lock, flags);
+ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
pr_info("log_buf_len: %d\n", log_buf_len);
pr_info("early log buf free: %d(%d%%)\n",
@@ -318,8 +318,10 @@ static int check_syslog_permissions(int type, bool from_file)
return 0;
/* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */
if (capable(CAP_SYS_ADMIN)) {
- WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN "
- "but no CAP_SYSLOG (deprecated).\n");
+ printk_once(KERN_WARNING "%s (%d): "
+ "Attempt to access syslog with CAP_SYS_ADMIN "
+ "but no CAP_SYSLOG (deprecated).\n",
+ current->comm, task_pid_nr(current));
return 0;
}
return -EPERM;
@@ -363,18 +365,18 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
if (error)
goto out;
i = 0;
- spin_lock_irq(&logbuf_lock);
+ raw_spin_lock_irq(&logbuf_lock);
while (!error && (log_start != log_end) && i < len) {
c = LOG_BUF(log_start);
log_start++;
- spin_unlock_irq(&logbuf_lock);
+ raw_spin_unlock_irq(&logbuf_lock);
error = __put_user(c,buf);
buf++;
i++;
cond_resched();
- spin_lock_irq(&logbuf_lock);
+ raw_spin_lock_irq(&logbuf_lock);
}
- spin_unlock_irq(&logbuf_lock);
+ raw_spin_unlock_irq(&logbuf_lock);
if (!error)
error = i;
break;
@@ -397,7 +399,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
count = len;
if (count > log_buf_len)
count = log_buf_len;
- spin_lock_irq(&logbuf_lock);
+ raw_spin_lock_irq(&logbuf_lock);
if (count > logged_chars)
count = logged_chars;
if (do_clear)
@@ -414,12 +416,12 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
if (j + log_buf_len < log_end)
break;
c = LOG_BUF(j);
- spin_unlock_irq(&logbuf_lock);
+ raw_spin_unlock_irq(&logbuf_lock);
error = __put_user(c,&buf[count-1-i]);
cond_resched();
- spin_lock_irq(&logbuf_lock);
+ raw_spin_lock_irq(&logbuf_lock);
}
- spin_unlock_irq(&logbuf_lock);
+ raw_spin_unlock_irq(&logbuf_lock);
if (error)
break;
error = i;
@@ -687,7 +689,7 @@ static void zap_locks(void)
oops_timestamp = jiffies;
/* If a crash is occurring, make sure we can't deadlock */
- spin_lock_init(&logbuf_lock);
+ raw_spin_lock_init(&logbuf_lock);
/* And make sure that we print immediately */
sema_init(&console_sem, 1);
}
@@ -800,9 +802,9 @@ static int console_trylock_for_printk(unsigned int cpu)
}
}
printk_cpu = UINT_MAX;
- spin_unlock(&logbuf_lock);
if (wake)
up(&console_sem);
+ raw_spin_unlock(&logbuf_lock);
return retval;
}
static const char recursion_bug_msg [] =
@@ -862,7 +864,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
}
lockdep_off();
- spin_lock(&logbuf_lock);
+ raw_spin_lock(&logbuf_lock);
printk_cpu = this_cpu;
if (recursion_bug) {
@@ -1255,14 +1257,14 @@ void console_unlock(void)
again:
for ( ; ; ) {
- spin_lock_irqsave(&logbuf_lock, flags);
+ raw_spin_lock_irqsave(&logbuf_lock, flags);
wake_klogd |= log_start - log_end;
if (con_start == log_end)
break; /* Nothing to print */
_con_start = con_start;
_log_end = log_end;
con_start = log_end; /* Flush */
- spin_unlock(&logbuf_lock);
+ raw_spin_unlock(&logbuf_lock);
stop_critical_timings(); /* don't trace print latency */
call_console_drivers(_con_start, _log_end);
start_critical_timings();
@@ -1274,7 +1276,7 @@ again:
if (unlikely(exclusive_console))
exclusive_console = NULL;
- spin_unlock(&logbuf_lock);
+ raw_spin_unlock(&logbuf_lock);
up(&console_sem);
@@ -1284,13 +1286,13 @@ again:
* there's a new owner and the console_unlock() from them will do the
* flush, no worries.
*/
- spin_lock(&logbuf_lock);
+ raw_spin_lock(&logbuf_lock);
if (con_start != log_end)
retry = 1;
- spin_unlock_irqrestore(&logbuf_lock, flags);
if (retry && console_trylock())
goto again;
+ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
if (wake_klogd)
wake_up_klogd();
}
@@ -1520,9 +1522,9 @@ void register_console(struct console *newcon)
* console_unlock(); will print out the buffered messages
* for us.
*/
- spin_lock_irqsave(&logbuf_lock, flags);
+ raw_spin_lock_irqsave(&logbuf_lock, flags);
con_start = log_start;
- spin_unlock_irqrestore(&logbuf_lock, flags);
+ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
/*
* We're about to replay the log buffer. Only do this to the
* just-registered console to avoid excessive message spam to
@@ -1602,7 +1604,7 @@ static int __init printk_late_init(void)
struct console *con;
for_each_console(con) {
- if (con->flags & CON_BOOT) {
+ if (!keep_bootcon && con->flags & CON_BOOT) {
printk(KERN_INFO "turn off boot console %s%d\n",
con->name, con->index);
unregister_console(con);
@@ -1729,10 +1731,10 @@ void kmsg_dump(enum kmsg_dump_reason reason)
/* Theoretically, the log could move on after we do this, but
there's not a lot we can do about that. The new messages
will overwrite the start of what we dump. */
- spin_lock_irqsave(&logbuf_lock, flags);
+ raw_spin_lock_irqsave(&logbuf_lock, flags);
end = log_end & LOG_BUF_MASK;
chars = logged_chars;
- spin_unlock_irqrestore(&logbuf_lock, flags);
+ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
if (chars > end) {
s1 = log_buf + log_buf_len - chars + end;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 9de3ecfd20f9..a70d2a5d8c7b 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -744,20 +744,17 @@ int ptrace_request(struct task_struct *child, long request,
break;
si = child->last_siginfo;
- if (unlikely(!si || si->si_code >> 8 != PTRACE_EVENT_STOP))
- break;
-
- child->jobctl |= JOBCTL_LISTENING;
-
- /*
- * If NOTIFY is set, it means event happened between start
- * of this trap and now. Trigger re-trap immediately.
- */
- if (child->jobctl & JOBCTL_TRAP_NOTIFY)
- signal_wake_up(child, true);
-
+ if (likely(si && (si->si_code >> 8) == PTRACE_EVENT_STOP)) {
+ child->jobctl |= JOBCTL_LISTENING;
+ /*
+ * If NOTIFY is set, it means event happened between
+ * start of this trap and now. Trigger re-trap.
+ */
+ if (child->jobctl & JOBCTL_TRAP_NOTIFY)
+ signal_wake_up(child, true);
+ ret = 0;
+ }
unlock_task_sighand(child, &flags);
- ret = 0;
break;
case PTRACE_DETACH: /* detach a process that was attached. */
diff --git a/kernel/resource.c b/kernel/resource.c
index 3b3cedc52592..c8dc249da5ce 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -419,6 +419,9 @@ static int __find_resource(struct resource *root, struct resource *old,
else
tmp.end = root->end;
+ if (tmp.end < tmp.start)
+ goto next;
+
resource_clip(&tmp, constraint->min, constraint->max);
arch_remove_reservations(&tmp);
@@ -436,8 +439,10 @@ static int __find_resource(struct resource *root, struct resource *old,
return 0;
}
}
- if (!this)
+
+next: if (!this || this->end == root->end)
break;
+
if (this != old)
tmp.start = this->end + 1;
this = this->sibling;
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index 3c7cbc2c33be..a2e7e7210f3e 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -29,61 +29,6 @@
#include "rtmutex_common.h"
-# define TRACE_WARN_ON(x) WARN_ON(x)
-# define TRACE_BUG_ON(x) BUG_ON(x)
-
-# define TRACE_OFF() \
-do { \
- if (rt_trace_on) { \
- rt_trace_on = 0; \
- console_verbose(); \
- if (raw_spin_is_locked(&current->pi_lock)) \
- raw_spin_unlock(&current->pi_lock); \
- } \
-} while (0)
-
-# define TRACE_OFF_NOLOCK() \
-do { \
- if (rt_trace_on) { \
- rt_trace_on = 0; \
- console_verbose(); \
- } \
-} while (0)
-
-# define TRACE_BUG_LOCKED() \
-do { \
- TRACE_OFF(); \
- BUG(); \
-} while (0)
-
-# define TRACE_WARN_ON_LOCKED(c) \
-do { \
- if (unlikely(c)) { \
- TRACE_OFF(); \
- WARN_ON(1); \
- } \
-} while (0)
-
-# define TRACE_BUG_ON_LOCKED(c) \
-do { \
- if (unlikely(c)) \
- TRACE_BUG_LOCKED(); \
-} while (0)
-
-#ifdef CONFIG_SMP
-# define SMP_TRACE_BUG_ON_LOCKED(c) TRACE_BUG_ON_LOCKED(c)
-#else
-# define SMP_TRACE_BUG_ON_LOCKED(c) do { } while (0)
-#endif
-
-/*
- * deadlock detection flag. We turn it off when we detect
- * the first problem because we dont want to recurse back
- * into the tracing code when doing error printk or
- * executing a BUG():
- */
-static int rt_trace_on = 1;
-
static void printk_task(struct task_struct *p)
{
if (p)
@@ -111,8 +56,8 @@ static void printk_lock(struct rt_mutex *lock, int print_owner)
void rt_mutex_debug_task_free(struct task_struct *task)
{
- WARN_ON(!plist_head_empty(&task->pi_waiters));
- WARN_ON(task->pi_blocked_on);
+ DEBUG_LOCKS_WARN_ON(!plist_head_empty(&task->pi_waiters));
+ DEBUG_LOCKS_WARN_ON(task->pi_blocked_on);
}
/*
@@ -125,7 +70,7 @@ void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *act_waiter,
{
struct task_struct *task;
- if (!rt_trace_on || detect || !act_waiter)
+ if (!debug_locks || detect || !act_waiter)
return;
task = rt_mutex_owner(act_waiter->lock);
@@ -139,7 +84,7 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)
{
struct task_struct *task;
- if (!waiter->deadlock_lock || !rt_trace_on)
+ if (!waiter->deadlock_lock || !debug_locks)
return;
rcu_read_lock();
@@ -149,7 +94,10 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)
return;
}
- TRACE_OFF_NOLOCK();
+ if (!debug_locks_off()) {
+ rcu_read_unlock();
+ return;
+ }
printk("\n============================================\n");
printk( "[ BUG: circular locking deadlock detected! ]\n");
@@ -180,7 +128,6 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)
printk("[ turning off deadlock detection."
"Please report this trace. ]\n\n");
- local_irq_disable();
}
void debug_rt_mutex_lock(struct rt_mutex *lock)
@@ -189,7 +136,7 @@ void debug_rt_mutex_lock(struct rt_mutex *lock)
void debug_rt_mutex_unlock(struct rt_mutex *lock)
{
- TRACE_WARN_ON_LOCKED(rt_mutex_owner(lock) != current);
+ DEBUG_LOCKS_WARN_ON(rt_mutex_owner(lock) != current);
}
void
@@ -199,7 +146,7 @@ debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner)
void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock)
{
- TRACE_WARN_ON_LOCKED(!rt_mutex_owner(lock));
+ DEBUG_LOCKS_WARN_ON(!rt_mutex_owner(lock));
}
void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
@@ -213,8 +160,8 @@ void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
{
put_pid(waiter->deadlock_task_pid);
- TRACE_WARN_ON(!plist_node_empty(&waiter->list_entry));
- TRACE_WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
+ DEBUG_LOCKS_WARN_ON(!plist_node_empty(&waiter->list_entry));
+ DEBUG_LOCKS_WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
memset(waiter, 0x22, sizeof(*waiter));
}
diff --git a/kernel/sched.c b/kernel/sched.c
index ccacdbdecf45..8aa00803c1ec 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1739,7 +1739,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
#ifdef CONFIG_SMP
/*
* After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
- * successfuly executed on another CPU. We must ensure that updates of
+ * successfully executed on another CPU. We must ensure that updates of
* per-task data have been completed by this moment.
*/
smp_wmb();
@@ -3065,7 +3065,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
local_irq_disable();
#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
- perf_event_task_sched_in(current);
+ perf_event_task_sched_in(prev, current);
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
local_irq_enable();
#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
@@ -3725,30 +3725,6 @@ unsigned long long task_sched_runtime(struct task_struct *p)
}
/*
- * Return sum_exec_runtime for the thread group.
- * In case the task is currently running, return the sum plus current's
- * pending runtime that have not been accounted yet.
- *
- * Note that the thread group might have other running tasks as well,
- * so the return value not includes other pending runtime that other
- * running tasks might have.
- */
-unsigned long long thread_group_sched_runtime(struct task_struct *p)
-{
- struct task_cputime totals;
- unsigned long flags;
- struct rq *rq;
- u64 ns;
-
- rq = task_rq_lock(p, &flags);
- thread_group_cputime(p, &totals);
- ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
- task_rq_unlock(rq, p, &flags);
-
- return ns;
-}
-
-/*
* Account user cpu time to a process.
* @p: the process that the cpu time gets accounted to
* @cputime: the cpu time spent in user space since the last update
@@ -4279,9 +4255,9 @@ pick_next_task(struct rq *rq)
}
/*
- * schedule() is the main scheduler function.
+ * __schedule() is the main scheduler function.
*/
-asmlinkage void __sched schedule(void)
+static void __sched __schedule(void)
{
struct task_struct *prev, *next;
unsigned long *switch_count;
@@ -4322,16 +4298,6 @@ need_resched:
if (to_wakeup)
try_to_wake_up_local(to_wakeup);
}
-
- /*
- * If we are going to sleep and we have plugged IO
- * queued, make sure to submit it to avoid deadlocks.
- */
- if (blk_needs_flush_plug(prev)) {
- raw_spin_unlock(&rq->lock);
- blk_schedule_flush_plug(prev);
- raw_spin_lock(&rq->lock);
- }
}
switch_count = &prev->nvcsw;
}
@@ -4369,6 +4335,26 @@ need_resched:
if (need_resched())
goto need_resched;
}
+
+static inline void sched_submit_work(struct task_struct *tsk)
+{
+ if (!tsk->state)
+ return;
+ /*
+ * If we are going to sleep and we have plugged IO queued,
+ * make sure to submit it to avoid deadlocks.
+ */
+ if (blk_needs_flush_plug(tsk))
+ blk_schedule_flush_plug(tsk);
+}
+
+asmlinkage void __sched schedule(void)
+{
+ struct task_struct *tsk = current;
+
+ sched_submit_work(tsk);
+ __schedule();
+}
EXPORT_SYMBOL(schedule);
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
@@ -4435,7 +4421,7 @@ asmlinkage void __sched notrace preempt_schedule(void)
do {
add_preempt_count_notrace(PREEMPT_ACTIVE);
- schedule();
+ __schedule();
sub_preempt_count_notrace(PREEMPT_ACTIVE);
/*
@@ -4463,7 +4449,7 @@ asmlinkage void __sched preempt_schedule_irq(void)
do {
add_preempt_count(PREEMPT_ACTIVE);
local_irq_enable();
- schedule();
+ __schedule();
local_irq_disable();
sub_preempt_count(PREEMPT_ACTIVE);
@@ -5588,7 +5574,7 @@ static inline int should_resched(void)
static void __cond_resched(void)
{
add_preempt_count(PREEMPT_ACTIVE);
- schedule();
+ __schedule();
sub_preempt_count(PREEMPT_ACTIVE);
}
@@ -7443,6 +7429,7 @@ static void __sdt_free(const struct cpumask *cpu_map)
struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j);
if (sd && (sd->flags & SD_OVERLAP))
free_sched_groups(sd->groups, 0);
+ kfree(*per_cpu_ptr(sdd->sd, j));
kfree(*per_cpu_ptr(sdd->sg, j));
kfree(*per_cpu_ptr(sdd->sgp, j));
}
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 97540f0c9e47..af1177858be3 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1050,7 +1050,7 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
*/
if (curr && unlikely(rt_task(curr)) &&
(curr->rt.nr_cpus_allowed < 2 ||
- curr->prio < p->prio) &&
+ curr->prio <= p->prio) &&
(p->rt.nr_cpus_allowed > 1)) {
int target = find_lowest_rq(p);
@@ -1581,7 +1581,7 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p)
p->rt.nr_cpus_allowed > 1 &&
rt_task(rq->curr) &&
(rq->curr->rt.nr_cpus_allowed < 2 ||
- rq->curr->prio < p->prio))
+ rq->curr->prio <= p->prio))
push_rt_tasks(rq);
}
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 331e01bcd026..87f9e36ea56e 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -282,10 +282,10 @@ static inline void account_group_user_time(struct task_struct *tsk,
if (!cputimer->running)
return;
- spin_lock(&cputimer->lock);
+ raw_spin_lock(&cputimer->lock);
cputimer->cputime.utime =
cputime_add(cputimer->cputime.utime, cputime);
- spin_unlock(&cputimer->lock);
+ raw_spin_unlock(&cputimer->lock);
}
/**
@@ -306,10 +306,10 @@ static inline void account_group_system_time(struct task_struct *tsk,
if (!cputimer->running)
return;
- spin_lock(&cputimer->lock);
+ raw_spin_lock(&cputimer->lock);
cputimer->cputime.stime =
cputime_add(cputimer->cputime.stime, cputime);
- spin_unlock(&cputimer->lock);
+ raw_spin_unlock(&cputimer->lock);
}
/**
@@ -330,7 +330,7 @@ static inline void account_group_exec_runtime(struct task_struct *tsk,
if (!cputimer->running)
return;
- spin_lock(&cputimer->lock);
+ raw_spin_lock(&cputimer->lock);
cputimer->cputime.sum_exec_runtime += ns;
- spin_unlock(&cputimer->lock);
+ raw_spin_unlock(&cputimer->lock);
}
diff --git a/kernel/semaphore.c b/kernel/semaphore.c
index 94a62c0d4ade..d831841e55a7 100644
--- a/kernel/semaphore.c
+++ b/kernel/semaphore.c
@@ -54,12 +54,12 @@ void down(struct semaphore *sem)
{
unsigned long flags;
- spin_lock_irqsave(&sem->lock, flags);
+ raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(sem->count > 0))
sem->count--;
else
__down(sem);
- spin_unlock_irqrestore(&sem->lock, flags);
+ raw_spin_unlock_irqrestore(&sem->lock, flags);
}
EXPORT_SYMBOL(down);
@@ -77,12 +77,12 @@ int down_interruptible(struct semaphore *sem)
unsigned long flags;
int result = 0;
- spin_lock_irqsave(&sem->lock, flags);
+ raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(sem->count > 0))
sem->count--;
else
result = __down_interruptible(sem);
- spin_unlock_irqrestore(&sem->lock, flags);
+ raw_spin_unlock_irqrestore(&sem->lock, flags);
return result;
}
@@ -103,12 +103,12 @@ int down_killable(struct semaphore *sem)
unsigned long flags;
int result = 0;
- spin_lock_irqsave(&sem->lock, flags);
+ raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(sem->count > 0))
sem->count--;
else
result = __down_killable(sem);
- spin_unlock_irqrestore(&sem->lock, flags);
+ raw_spin_unlock_irqrestore(&sem->lock, flags);
return result;
}
@@ -132,11 +132,11 @@ int down_trylock(struct semaphore *sem)
unsigned long flags;
int count;
- spin_lock_irqsave(&sem->lock, flags);
+ raw_spin_lock_irqsave(&sem->lock, flags);
count = sem->count - 1;
if (likely(count >= 0))
sem->count = count;
- spin_unlock_irqrestore(&sem->lock, flags);
+ raw_spin_unlock_irqrestore(&sem->lock, flags);
return (count < 0);
}
@@ -157,12 +157,12 @@ int down_timeout(struct semaphore *sem, long jiffies)
unsigned long flags;
int result = 0;
- spin_lock_irqsave(&sem->lock, flags);
+ raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(sem->count > 0))
sem->count--;
else
result = __down_timeout(sem, jiffies);
- spin_unlock_irqrestore(&sem->lock, flags);
+ raw_spin_unlock_irqrestore(&sem->lock, flags);
return result;
}
@@ -179,12 +179,12 @@ void up(struct semaphore *sem)
{
unsigned long flags;
- spin_lock_irqsave(&sem->lock, flags);
+ raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(list_empty(&sem->wait_list)))
sem->count++;
else
__up(sem);
- spin_unlock_irqrestore(&sem->lock, flags);
+ raw_spin_unlock_irqrestore(&sem->lock, flags);
}
EXPORT_SYMBOL(up);
@@ -217,9 +217,9 @@ static inline int __sched __down_common(struct semaphore *sem, long state,
if (timeout <= 0)
goto timed_out;
__set_task_state(task, state);
- spin_unlock_irq(&sem->lock);
+ raw_spin_unlock_irq(&sem->lock);
timeout = schedule_timeout(timeout);
- spin_lock_irq(&sem->lock);
+ raw_spin_lock_irq(&sem->lock);
if (waiter.up)
return 0;
}
diff --git a/kernel/signal.c b/kernel/signal.c
index 291c9700be75..d252be2d3de5 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1344,13 +1344,24 @@ int kill_proc_info(int sig, struct siginfo *info, pid_t pid)
return error;
}
+static int kill_as_cred_perm(const struct cred *cred,
+ struct task_struct *target)
+{
+ const struct cred *pcred = __task_cred(target);
+ if (cred->user_ns != pcred->user_ns)
+ return 0;
+ if (cred->euid != pcred->suid && cred->euid != pcred->uid &&
+ cred->uid != pcred->suid && cred->uid != pcred->uid)
+ return 0;
+ return 1;
+}
+
/* like kill_pid_info(), but doesn't use uid/euid of "current" */
-int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
- uid_t uid, uid_t euid, u32 secid)
+int kill_pid_info_as_cred(int sig, struct siginfo *info, struct pid *pid,
+ const struct cred *cred, u32 secid)
{
int ret = -EINVAL;
struct task_struct *p;
- const struct cred *pcred;
unsigned long flags;
if (!valid_signal(sig))
@@ -1362,10 +1373,7 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
ret = -ESRCH;
goto out_unlock;
}
- pcred = __task_cred(p);
- if (si_fromuser(info) &&
- euid != pcred->suid && euid != pcred->uid &&
- uid != pcred->suid && uid != pcred->uid) {
+ if (si_fromuser(info) && !kill_as_cred_perm(cred, p)) {
ret = -EPERM;
goto out_unlock;
}
@@ -1384,7 +1392,7 @@ out_unlock:
rcu_read_unlock();
return ret;
}
-EXPORT_SYMBOL_GPL(kill_pid_info_as_uid);
+EXPORT_SYMBOL_GPL(kill_pid_info_as_cred);
/*
* kill_something_info() interprets pid in interesting ways just like kill(2).
diff --git a/kernel/sys.c b/kernel/sys.c
index a101ba36c444..58459509b14c 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -37,6 +37,8 @@
#include <linux/fs_struct.h>
#include <linux/gfp.h>
#include <linux/syscore_ops.h>
+#include <linux/version.h>
+#include <linux/ctype.h>
#include <linux/compat.h>
#include <linux/syscalls.h>
@@ -44,6 +46,8 @@
#include <linux/user_namespace.h>
#include <linux/kmsg_dump.h>
+/* Move somewhere else to avoid recompiling? */
+#include <generated/utsrelease.h>
#include <asm/uaccess.h>
#include <asm/io.h>
@@ -621,11 +625,18 @@ static int set_user(struct cred *new)
if (!new_user)
return -EAGAIN;
+ /*
+ * We don't fail in case of NPROC limit excess here because too many
+ * poorly written programs don't check set*uid() return code, assuming
+ * it never fails if called by root. We may still enforce NPROC limit
+ * for programs doing set*uid()+execve() by harmlessly deferring the
+ * failure to the execve() stage.
+ */
if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) &&
- new_user != INIT_USER) {
- free_uid(new_user);
- return -EAGAIN;
- }
+ new_user != INIT_USER)
+ current->flags |= PF_NPROC_EXCEEDED;
+ else
+ current->flags &= ~PF_NPROC_EXCEEDED;
free_uid(new->user);
new->user = new_user;
@@ -1154,6 +1165,34 @@ DECLARE_RWSEM(uts_sem);
#define override_architecture(name) 0
#endif
+/*
+ * Work around broken programs that cannot handle "Linux 3.0".
+ * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
+ */
+static int override_release(char __user *release, int len)
+{
+ int ret = 0;
+ char buf[65];
+
+ if (current->personality & UNAME26) {
+ char *rest = UTS_RELEASE;
+ int ndots = 0;
+ unsigned v;
+
+ while (*rest) {
+ if (*rest == '.' && ++ndots >= 3)
+ break;
+ if (!isdigit(*rest) && *rest != '.')
+ break;
+ rest++;
+ }
+ v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40;
+ snprintf(buf, len, "2.6.%u%s", v, rest);
+ ret = copy_to_user(release, buf, len);
+ }
+ return ret;
+}
+
SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
{
int errno = 0;
@@ -1163,6 +1202,8 @@ SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
errno = -EFAULT;
up_read(&uts_sem);
+ if (!errno && override_release(name->release, sizeof(name->release)))
+ errno = -EFAULT;
if (!errno && override_architecture(name))
errno = -EFAULT;
return errno;
@@ -1184,6 +1225,8 @@ SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
error = -EFAULT;
up_read(&uts_sem);
+ if (!error && override_release(name->release, sizeof(name->release)))
+ error = -EFAULT;
if (!error && override_architecture(name))
error = -EFAULT;
return error;
@@ -1218,6 +1261,8 @@ SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
if (!error && override_architecture(name))
error = -EFAULT;
+ if (!error && override_release(name->release, sizeof(name->release)))
+ error = -EFAULT;
return error ? -EFAULT : 0;
}
#endif
@@ -1714,6 +1759,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
sizeof(me->comm) - 1) < 0)
return -EFAULT;
set_task_comm(me, comm);
+ proc_comm_connector(me);
return 0;
case PR_GET_NAME:
get_task_comm(comm, me);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 62cbc8877fef..a9a5de07c4f1 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -16,7 +16,6 @@ asmlinkage long sys_ni_syscall(void)
return -ENOSYS;
}
-cond_syscall(sys_nfsservctl);
cond_syscall(sys_quotactl);
cond_syscall(sys32_quotactl);
cond_syscall(sys_acct);
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 3b8e028b9601..6318b511afa1 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1,6 +1,6 @@
#include <linux/stat.h>
#include <linux/sysctl.h>
-#include "../fs/xfs/linux-2.6/xfs_sysctl.h"
+#include "../fs/xfs/xfs_sysctl.h"
#include <linux/sunrpc/debug.h>
#include <linux/string.h>
#include <net/ip_vs.h>
@@ -214,7 +214,7 @@ static const struct bin_table bin_net_ipv4_route_table[] = {
{ CTL_INT, NET_IPV4_ROUTE_GC_MIN_INTERVAL, "gc_min_interval" },
{ CTL_INT, NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS, "gc_min_interval_ms" },
{ CTL_INT, NET_IPV4_ROUTE_GC_TIMEOUT, "gc_timeout" },
- { CTL_INT, NET_IPV4_ROUTE_GC_INTERVAL, "gc_interval" },
+ /* NET_IPV4_ROUTE_GC_INTERVAL "gc_interval" no longer used */
{ CTL_INT, NET_IPV4_ROUTE_REDIRECT_LOAD, "redirect_load" },
{ CTL_INT, NET_IPV4_ROUTE_REDIRECT_NUMBER, "redirect_number" },
{ CTL_INT, NET_IPV4_ROUTE_REDIRECT_SILENCE, "redirect_silence" },
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
index 4e4932a7b360..362da653813d 100644
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -1,6 +1,6 @@
#include <linux/stat.h>
#include <linux/sysctl.h>
-#include "../fs/xfs/linux-2.6/xfs_sysctl.h"
+#include "../fs/xfs/xfs_sysctl.h"
#include <linux/sunrpc/debug.h>
#include <linux/string.h>
#include <net/ip_vs.h>
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index e19ce1454ee1..e66046456f4f 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -655,6 +655,7 @@ static struct genl_ops taskstats_ops = {
.cmd = TASKSTATS_CMD_GET,
.doit = taskstats_user_cmd,
.policy = taskstats_cmd_get_policy,
+ .flags = GENL_ADMIN_PERM,
};
static struct genl_ops cgroupstats_ops = {
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 59f369f98a04..ea5e1a928d5b 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -441,6 +441,8 @@ static int alarm_timer_create(struct k_itimer *new_timer)
static void alarm_timer_get(struct k_itimer *timr,
struct itimerspec *cur_setting)
{
+ memset(cur_setting, 0, sizeof(struct itimerspec));
+
cur_setting->it_interval =
ktime_to_timespec(timr->it.alarmtimer.period);
cur_setting->it_value =
@@ -479,11 +481,17 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
if (!rtcdev)
return -ENOTSUPP;
- /* Save old values */
- old_setting->it_interval =
- ktime_to_timespec(timr->it.alarmtimer.period);
- old_setting->it_value =
- ktime_to_timespec(timr->it.alarmtimer.node.expires);
+ /*
+ * XXX HACK! Currently we can DOS a system if the interval
+ * period on alarmtimers is too small. Cap the interval here
+ * to 100us and solve this properly in a future patch! -jstultz
+ */
+ if ((new_setting->it_interval.tv_sec == 0) &&
+ (new_setting->it_interval.tv_nsec < 100000))
+ new_setting->it_interval.tv_nsec = 100000;
+
+ if (old_setting)
+ alarm_timer_get(timr, old_setting);
/* If the timer was already set, cancel it */
alarm_cancel(&timr->it.alarmtimer);
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index a5d0a3a85dd8..0b537f27b559 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -81,7 +81,7 @@ struct entry {
/*
* Spinlock protecting the tables - not taken during lookup:
*/
-static DEFINE_SPINLOCK(table_lock);
+static DEFINE_RAW_SPINLOCK(table_lock);
/*
* Per-CPU lookup locks for fast hash lookup:
@@ -188,7 +188,7 @@ static struct entry *tstat_lookup(struct entry *entry, char *comm)
prev = NULL;
curr = *head;
- spin_lock(&table_lock);
+ raw_spin_lock(&table_lock);
/*
* Make sure we have not raced with another CPU:
*/
@@ -215,7 +215,7 @@ static struct entry *tstat_lookup(struct entry *entry, char *comm)
*head = curr;
}
out_unlock:
- spin_unlock(&table_lock);
+ raw_spin_unlock(&table_lock);
return curr;
}
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 2ad39e556cb4..cd3134510f3d 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -82,7 +82,7 @@ config EVENT_POWER_TRACING_DEPRECATED
power:power_frequency
This is for userspace compatibility
and will vanish after 5 kernel iterations,
- namely 2.6.41.
+ namely 3.1.
config CONTEXT_SWITCH_TRACER
bool
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 761c510a06c5..f49405f842f4 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -53,6 +53,9 @@ endif
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
obj-$(CONFIG_TRACEPOINTS) += power-traces.o
+ifeq ($(CONFIG_PM_RUNTIME),y)
+obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o
+endif
ifeq ($(CONFIG_TRACING),y)
obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
endif
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 6957aa298dfa..7c910a5593a6 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -206,6 +206,8 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
what |= MASK_TC_BIT(rw, RAHEAD);
what |= MASK_TC_BIT(rw, META);
what |= MASK_TC_BIT(rw, DISCARD);
+ what |= MASK_TC_BIT(rw, FLUSH);
+ what |= MASK_TC_BIT(rw, FUA);
pid = tsk->pid;
if (act_log_check(bt, what, sector, pid))
@@ -1054,6 +1056,9 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
goto out;
}
+ if (tc & BLK_TC_FLUSH)
+ rwbs[i++] = 'F';
+
if (tc & BLK_TC_DISCARD)
rwbs[i++] = 'D';
else if (tc & BLK_TC_WRITE)
@@ -1063,10 +1068,10 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
else
rwbs[i++] = 'N';
+ if (tc & BLK_TC_FUA)
+ rwbs[i++] = 'F';
if (tc & BLK_TC_AHEAD)
rwbs[i++] = 'A';
- if (tc & BLK_TC_BARRIER)
- rwbs[i++] = 'B';
if (tc & BLK_TC_SYNC)
rwbs[i++] = 'S';
if (tc & BLK_TC_META)
@@ -1132,7 +1137,7 @@ typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act);
static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
{
- char rwbs[6];
+ char rwbs[RWBS_LEN];
unsigned long long ts = iter->ts;
unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC);
unsigned secs = (unsigned long)ts;
@@ -1148,7 +1153,7 @@ static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
static int blk_log_action(struct trace_iterator *iter, const char *act)
{
- char rwbs[6];
+ char rwbs[RWBS_LEN];
const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
fill_rwbs(rwbs, t);
@@ -1561,7 +1566,7 @@ static const struct {
} mask_maps[] = {
{ BLK_TC_READ, "read" },
{ BLK_TC_WRITE, "write" },
- { BLK_TC_BARRIER, "barrier" },
+ { BLK_TC_FLUSH, "flush" },
{ BLK_TC_SYNC, "sync" },
{ BLK_TC_QUEUE, "queue" },
{ BLK_TC_REQUEUE, "requeue" },
@@ -1573,6 +1578,7 @@ static const struct {
{ BLK_TC_META, "meta" },
{ BLK_TC_DISCARD, "discard" },
{ BLK_TC_DRV_DATA, "drv_data" },
+ { BLK_TC_FUA, "fua" },
};
static int blk_trace_str2mask(const char *str)
@@ -1788,6 +1794,9 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
{
int i = 0;
+ if (rw & REQ_FLUSH)
+ rwbs[i++] = 'F';
+
if (rw & WRITE)
rwbs[i++] = 'W';
else if (rw & REQ_DISCARD)
@@ -1797,6 +1806,8 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
else
rwbs[i++] = 'N';
+ if (rw & REQ_FUA)
+ rwbs[i++] = 'F';
if (rw & REQ_RAHEAD)
rwbs[i++] = 'A';
if (rw & REQ_SYNC)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 731201bf4acc..f2f821acc597 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -478,7 +478,7 @@ struct ring_buffer_per_cpu {
int cpu;
atomic_t record_disabled;
struct ring_buffer *buffer;
- spinlock_t reader_lock; /* serialize readers */
+ raw_spinlock_t reader_lock; /* serialize readers */
arch_spinlock_t lock;
struct lock_class_key lock_key;
struct list_head *pages;
@@ -1062,7 +1062,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
cpu_buffer->cpu = cpu;
cpu_buffer->buffer = buffer;
- spin_lock_init(&cpu_buffer->reader_lock);
+ raw_spin_lock_init(&cpu_buffer->reader_lock);
lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
@@ -1259,7 +1259,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
struct list_head *p;
unsigned i;
- spin_lock_irq(&cpu_buffer->reader_lock);
+ raw_spin_lock_irq(&cpu_buffer->reader_lock);
rb_head_page_deactivate(cpu_buffer);
for (i = 0; i < nr_pages; i++) {
@@ -1277,7 +1277,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
rb_check_pages(cpu_buffer);
out:
- spin_unlock_irq(&cpu_buffer->reader_lock);
+ raw_spin_unlock_irq(&cpu_buffer->reader_lock);
}
static void
@@ -1288,7 +1288,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
struct list_head *p;
unsigned i;
- spin_lock_irq(&cpu_buffer->reader_lock);
+ raw_spin_lock_irq(&cpu_buffer->reader_lock);
rb_head_page_deactivate(cpu_buffer);
for (i = 0; i < nr_pages; i++) {
@@ -1303,7 +1303,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
rb_check_pages(cpu_buffer);
out:
- spin_unlock_irq(&cpu_buffer->reader_lock);
+ raw_spin_unlock_irq(&cpu_buffer->reader_lock);
}
/**
@@ -2804,9 +2804,9 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
cpu_buffer = iter->cpu_buffer;
- spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
rb_iter_reset(iter);
- spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
}
EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
@@ -3265,12 +3265,12 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
again:
local_irq_save(flags);
if (dolock)
- spin_lock(&cpu_buffer->reader_lock);
+ raw_spin_lock(&cpu_buffer->reader_lock);
event = rb_buffer_peek(cpu_buffer, ts, lost_events);
if (event && event->type_len == RINGBUF_TYPE_PADDING)
rb_advance_reader(cpu_buffer);
if (dolock)
- spin_unlock(&cpu_buffer->reader_lock);
+ raw_spin_unlock(&cpu_buffer->reader_lock);
local_irq_restore(flags);
if (event && event->type_len == RINGBUF_TYPE_PADDING)
@@ -3295,9 +3295,9 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
unsigned long flags;
again:
- spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
event = rb_iter_peek(iter, ts);
- spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
if (event && event->type_len == RINGBUF_TYPE_PADDING)
goto again;
@@ -3337,7 +3337,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
cpu_buffer = buffer->buffers[cpu];
local_irq_save(flags);
if (dolock)
- spin_lock(&cpu_buffer->reader_lock);
+ raw_spin_lock(&cpu_buffer->reader_lock);
event = rb_buffer_peek(cpu_buffer, ts, lost_events);
if (event) {
@@ -3346,7 +3346,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
}
if (dolock)
- spin_unlock(&cpu_buffer->reader_lock);
+ raw_spin_unlock(&cpu_buffer->reader_lock);
local_irq_restore(flags);
out:
@@ -3438,11 +3438,11 @@ ring_buffer_read_start(struct ring_buffer_iter *iter)
cpu_buffer = iter->cpu_buffer;
- spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
arch_spin_lock(&cpu_buffer->lock);
rb_iter_reset(iter);
arch_spin_unlock(&cpu_buffer->lock);
- spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
}
EXPORT_SYMBOL_GPL(ring_buffer_read_start);
@@ -3477,7 +3477,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
unsigned long flags;
- spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
again:
event = rb_iter_peek(iter, ts);
if (!event)
@@ -3488,7 +3488,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
rb_advance_iter(iter);
out:
- spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
return event;
}
@@ -3557,7 +3557,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
atomic_inc(&cpu_buffer->record_disabled);
- spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
goto out;
@@ -3569,7 +3569,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
arch_spin_unlock(&cpu_buffer->lock);
out:
- spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
atomic_dec(&cpu_buffer->record_disabled);
}
@@ -3607,10 +3607,10 @@ int ring_buffer_empty(struct ring_buffer *buffer)
cpu_buffer = buffer->buffers[cpu];
local_irq_save(flags);
if (dolock)
- spin_lock(&cpu_buffer->reader_lock);
+ raw_spin_lock(&cpu_buffer->reader_lock);
ret = rb_per_cpu_empty(cpu_buffer);
if (dolock)
- spin_unlock(&cpu_buffer->reader_lock);
+ raw_spin_unlock(&cpu_buffer->reader_lock);
local_irq_restore(flags);
if (!ret)
@@ -3641,10 +3641,10 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
cpu_buffer = buffer->buffers[cpu];
local_irq_save(flags);
if (dolock)
- spin_lock(&cpu_buffer->reader_lock);
+ raw_spin_lock(&cpu_buffer->reader_lock);
ret = rb_per_cpu_empty(cpu_buffer);
if (dolock)
- spin_unlock(&cpu_buffer->reader_lock);
+ raw_spin_unlock(&cpu_buffer->reader_lock);
local_irq_restore(flags);
return ret;
@@ -3841,7 +3841,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
if (!bpage)
goto out;
- spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
reader = rb_get_reader_page(cpu_buffer);
if (!reader)
@@ -3964,7 +3964,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit);
out_unlock:
- spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
out:
return ret;
diff --git a/kernel/trace/rpm-traces.c b/kernel/trace/rpm-traces.c
new file mode 100644
index 000000000000..4b3b5eaf94d1
--- /dev/null
+++ b/kernel/trace/rpm-traces.c
@@ -0,0 +1,20 @@
+/*
+ * Power trace points
+ *
+ * Copyright (C) 2009 Ming Lei <ming.lei@canonical.com>
+ */
+
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/usb.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/rpm.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_return_int);
+EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_idle);
+EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_suspend);
+EXPORT_TRACEPOINT_SYMBOL_GPL(rpm_resume);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index e5df02c69b1d..0c8bdeeb358b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -341,7 +341,7 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE;
static int trace_stop_count;
-static DEFINE_SPINLOCK(tracing_start_lock);
+static DEFINE_RAW_SPINLOCK(tracing_start_lock);
static void wakeup_work_handler(struct work_struct *work)
{
@@ -960,7 +960,7 @@ void tracing_start(void)
if (tracing_disabled)
return;
- spin_lock_irqsave(&tracing_start_lock, flags);
+ raw_spin_lock_irqsave(&tracing_start_lock, flags);
if (--trace_stop_count) {
if (trace_stop_count < 0) {
/* Someone screwed up their debugging */
@@ -985,7 +985,7 @@ void tracing_start(void)
ftrace_start();
out:
- spin_unlock_irqrestore(&tracing_start_lock, flags);
+ raw_spin_unlock_irqrestore(&tracing_start_lock, flags);
}
/**
@@ -1000,7 +1000,7 @@ void tracing_stop(void)
unsigned long flags;
ftrace_stop();
- spin_lock_irqsave(&tracing_start_lock, flags);
+ raw_spin_lock_irqsave(&tracing_start_lock, flags);
if (trace_stop_count++)
goto out;
@@ -1018,7 +1018,7 @@ void tracing_stop(void)
arch_spin_unlock(&ftrace_max_lock);
out:
- spin_unlock_irqrestore(&tracing_start_lock, flags);
+ raw_spin_unlock_irqrestore(&tracing_start_lock, flags);
}
void trace_stop_cmdline_recording(void);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 667aa8cc0cfc..11186212068c 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -23,7 +23,7 @@ static int tracer_enabled __read_mostly;
static DEFINE_PER_CPU(int, tracing_cpu);
-static DEFINE_SPINLOCK(max_trace_lock);
+static DEFINE_RAW_SPINLOCK(max_trace_lock);
enum {
TRACER_IRQS_OFF = (1 << 1),
@@ -321,7 +321,7 @@ check_critical_timing(struct trace_array *tr,
if (!report_latency(delta))
goto out;
- spin_lock_irqsave(&max_trace_lock, flags);
+ raw_spin_lock_irqsave(&max_trace_lock, flags);
/* check if we are still the max latency */
if (!report_latency(delta))
@@ -344,7 +344,7 @@ check_critical_timing(struct trace_array *tr,
max_sequence++;
out_unlock:
- spin_unlock_irqrestore(&max_trace_lock, flags);
+ raw_spin_unlock_irqrestore(&max_trace_lock, flags);
out:
data->critical_sequence = max_sequence;
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 24dc60d9fa1f..5bbfac85866e 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -78,6 +78,7 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
#define KB 1024
#define MB (1024*KB)
+#define KB_MASK (~(KB-1))
/*
* fill in extended accounting fields
*/
@@ -95,14 +96,14 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
stats->hiwater_vm = get_mm_hiwater_vm(mm) * PAGE_SIZE / KB;
mmput(mm);
}
- stats->read_char = p->ioac.rchar;
- stats->write_char = p->ioac.wchar;
- stats->read_syscalls = p->ioac.syscr;
- stats->write_syscalls = p->ioac.syscw;
+ stats->read_char = p->ioac.rchar & KB_MASK;
+ stats->write_char = p->ioac.wchar & KB_MASK;
+ stats->read_syscalls = p->ioac.syscr & KB_MASK;
+ stats->write_syscalls = p->ioac.syscw & KB_MASK;
#ifdef CONFIG_TASK_IO_ACCOUNTING
- stats->read_bytes = p->ioac.read_bytes;
- stats->write_bytes = p->ioac.write_bytes;
- stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes;
+ stats->read_bytes = p->ioac.read_bytes & KB_MASK;
+ stats->write_bytes = p->ioac.write_bytes & KB_MASK;
+ stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes & KB_MASK;
#else
stats->read_bytes = 0;
stats->write_bytes = 0;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 25fb1b0e53fa..1783aabc6128 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2412,8 +2412,13 @@ reflush:
for_each_cwq_cpu(cpu, wq) {
struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+ bool drained;
- if (!cwq->nr_active && list_empty(&cwq->delayed_works))
+ spin_lock_irq(&cwq->gcwq->lock);
+ drained = !cwq->nr_active && list_empty(&cwq->delayed_works);
+ spin_unlock_irq(&cwq->gcwq->lock);
+
+ if (drained)
continue;
if (++flush_cnt == 10 ||