diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/debug/kdb/kdb_main.c | 21 | ||||
| -rw-r--r-- | kernel/exit.c | 9 | ||||
| -rw-r--r-- | kernel/futex.c | 3 | ||||
| -rw-r--r-- | kernel/futex_compat.c | 3 | ||||
| -rw-r--r-- | kernel/hw_breakpoint.c | 3 | ||||
| -rw-r--r-- | kernel/irq_work.c | 4 | ||||
| -rw-r--r-- | kernel/module.c | 12 | ||||
| -rw-r--r-- | kernel/perf_event.c | 93 | ||||
| -rw-r--r-- | kernel/pm_qos_params.c | 4 | ||||
| -rw-r--r-- | kernel/posix-cpu-timers.c | 12 | ||||
| -rw-r--r-- | kernel/power/Kconfig | 4 | ||||
| -rw-r--r-- | kernel/power/hibernate.c | 22 | ||||
| -rw-r--r-- | kernel/power/suspend.c | 5 | ||||
| -rw-r--r-- | kernel/power/swap.c | 53 | ||||
| -rw-r--r-- | kernel/power/user.c | 2 | ||||
| -rw-r--r-- | kernel/sched.c | 39 | ||||
| -rw-r--r-- | kernel/sched_fair.c | 48 | ||||
| -rw-r--r-- | kernel/sched_stoptask.c | 4 | ||||
| -rw-r--r-- | kernel/sysctl.c | 2 | ||||
| -rw-r--r-- | kernel/trace/Kconfig | 2 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 20 | 
21 files changed, 268 insertions, 97 deletions
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 37755d621924..a6e729766821 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -82,7 +82,7 @@ static kdbtab_t kdb_base_commands[50];  #define for_each_kdbcmd(cmd, num)					\  	for ((cmd) = kdb_base_commands, (num) = 0;			\  	     num < kdb_max_commands;					\ -	     num == KDB_BASE_CMD_MAX ? cmd = kdb_commands : cmd++, num++) +	     num++, num == KDB_BASE_CMD_MAX ? cmd = kdb_commands : cmd++)  typedef struct _kdbmsg {  	int	km_diag;	/* kdb diagnostic */ @@ -646,7 +646,7 @@ static int kdb_defcmd2(const char *cmdstr, const char *argv0)  	}  	if (!s->usable)  		return KDB_NOTIMP; -	s->command = kmalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB); +	s->command = kzalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB);  	if (!s->command) {  		kdb_printf("Could not allocate new kdb_defcmd table for %s\n",  			   cmdstr); @@ -2361,7 +2361,7 @@ static int kdb_pid(int argc, const char **argv)   */  static int kdb_ll(int argc, const char **argv)  { -	int diag; +	int diag = 0;  	unsigned long addr;  	long offset = 0;  	unsigned long va; @@ -2400,20 +2400,21 @@ static int kdb_ll(int argc, const char **argv)  		char buf[80];  		if (KDB_FLAG(CMD_INTERRUPT)) -			return 0; +			goto out;  		sprintf(buf, "%s " kdb_machreg_fmt "\n", command, va);  		diag = kdb_parse(buf);  		if (diag) -			return diag; +			goto out;  		addr = va + linkoffset;  		if (kdb_getword(&va, addr, sizeof(va))) -			return 0; +			goto out;  	} -	kfree(command); -	return 0; +out: +	kfree(command); +	return diag;  }  static int kdb_kgdb(int argc, const char **argv) @@ -2739,13 +2740,13 @@ int kdb_register_repeat(char *cmd,  		}  		if (kdb_commands) {  			memcpy(new, kdb_commands, -			       kdb_max_commands * sizeof(*new)); +			  (kdb_max_commands - KDB_BASE_CMD_MAX) * sizeof(*new));  			kfree(kdb_commands);  		}  		memset(new + kdb_max_commands, 0,  		       kdb_command_extend * sizeof(*new));  		kdb_commands = new; -		kp = kdb_commands + kdb_max_commands; +		kp = kdb_commands + kdb_max_commands - KDB_BASE_CMD_MAX;  		kdb_max_commands += kdb_command_extend;  	} diff --git a/kernel/exit.c b/kernel/exit.c index 21aa7b3001fb..676149a4ac5f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -914,6 +914,15 @@ NORET_TYPE void do_exit(long code)  	if (unlikely(!tsk->pid))  		panic("Attempted to kill the idle task!"); +	/* +	 * If do_exit is called because this processes oopsed, it's possible +	 * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before +	 * continuing. Amongst other possible reasons, this is to prevent +	 * mm_release()->clear_child_tid() from writing to a user-controlled +	 * kernel address. +	 */ +	set_fs(USER_DS); +  	tracehook_report_exit(&code);  	validate_creds_for_do_exit(tsk); diff --git a/kernel/futex.c b/kernel/futex.c index 6c683b37f2ce..40a8777a27d0 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2489,7 +2489,8 @@ void exit_robust_list(struct task_struct *curr)  {  	struct robust_list_head __user *head = curr->robust_list;  	struct robust_list __user *entry, *next_entry, *pending; -	unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip; +	unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; +	unsigned int uninitialized_var(next_pi);  	unsigned long futex_offset;  	int rc; diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 06da4dfc339b..a7934ac75e5b 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -49,7 +49,8 @@ void compat_exit_robust_list(struct task_struct *curr)  {  	struct compat_robust_list_head __user *head = curr->compat_robust_list;  	struct robust_list __user *entry, *next_entry, *pending; -	unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip; +	unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; +	unsigned int uninitialized_var(next_pi);  	compat_uptr_t uentry, next_uentry, upending;  	compat_long_t futex_offset;  	int rc; diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 2c9120f0afca..e5325825aeb6 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -620,7 +620,7 @@ static struct pmu perf_breakpoint = {  	.read		= hw_breakpoint_pmu_read,  }; -static int __init init_hw_breakpoint(void) +int __init init_hw_breakpoint(void)  {  	unsigned int **task_bp_pinned;  	int cpu, err_cpu; @@ -655,6 +655,5 @@ static int __init init_hw_breakpoint(void)  	return -ENOMEM;  } -core_initcall(init_hw_breakpoint); diff --git a/kernel/irq_work.c b/kernel/irq_work.c index f16763ff8481..90f881904bb1 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -145,7 +145,9 @@ void irq_work_run(void)  		 * Clear the BUSY bit and return to the free state if  		 * no-one else claimed it meanwhile.  		 */ -		cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL); +		(void)cmpxchg(&entry->next, +			      next_flags(NULL, IRQ_WORK_BUSY), +			      NULL);  	}  }  EXPORT_SYMBOL_GPL(irq_work_run); diff --git a/kernel/module.c b/kernel/module.c index 437a74a7524a..d190664f25ff 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2326,6 +2326,18 @@ static void find_module_sections(struct module *mod, struct load_info *info)  	kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) *  			   mod->num_trace_events, GFP_KERNEL);  #endif +#ifdef CONFIG_TRACING +	mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt", +					 sizeof(*mod->trace_bprintk_fmt_start), +					 &mod->num_trace_bprintk_fmt); +	/* +	 * This section contains pointers to allocated objects in the trace +	 * code and not scanning it leads to false positives. +	 */ +	kmemleak_scan_area(mod->trace_bprintk_fmt_start, +			   sizeof(*mod->trace_bprintk_fmt_start) * +			   mod->num_trace_bprintk_fmt, GFP_KERNEL); +#endif  #ifdef CONFIG_FTRACE_MCOUNT_RECORD  	/* sechdrs[0].sh_size is always zero */  	mod->ftrace_callsites = section_objs(info, "__mcount_loc", diff --git a/kernel/perf_event.c b/kernel/perf_event.c index cb6c0d2af68f..eac7e3364335 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -31,6 +31,7 @@  #include <linux/kernel_stat.h>  #include <linux/perf_event.h>  #include <linux/ftrace_event.h> +#include <linux/hw_breakpoint.h>  #include <asm/irq_regs.h> @@ -1286,8 +1287,6 @@ void __perf_event_task_sched_out(struct task_struct *task,  {  	int ctxn; -	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); -  	for_each_task_context_nr(ctxn)  		perf_event_context_sched_out(task, ctxn, next);  } @@ -1621,8 +1620,12 @@ static void rotate_ctx(struct perf_event_context *ctx)  {  	raw_spin_lock(&ctx->lock); -	/* Rotate the first entry last of non-pinned groups */ -	list_rotate_left(&ctx->flexible_groups); +	/* +	 * Rotate the first entry last of non-pinned groups. Rotation might be +	 * disabled by the inheritance code. +	 */ +	if (!ctx->rotate_disable) +		list_rotate_left(&ctx->flexible_groups);  	raw_spin_unlock(&ctx->lock);  } @@ -2234,11 +2237,6 @@ int perf_event_release_kernel(struct perf_event *event)  	raw_spin_unlock_irq(&ctx->lock);  	mutex_unlock(&ctx->mutex); -	mutex_lock(&event->owner->perf_event_mutex); -	list_del_init(&event->owner_entry); -	mutex_unlock(&event->owner->perf_event_mutex); -	put_task_struct(event->owner); -  	free_event(event);  	return 0; @@ -2251,9 +2249,43 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);  static int perf_release(struct inode *inode, struct file *file)  {  	struct perf_event *event = file->private_data; +	struct task_struct *owner;  	file->private_data = NULL; +	rcu_read_lock(); +	owner = ACCESS_ONCE(event->owner); +	/* +	 * Matches the smp_wmb() in perf_event_exit_task(). If we observe +	 * !owner it means the list deletion is complete and we can indeed +	 * free this event, otherwise we need to serialize on +	 * owner->perf_event_mutex. +	 */ +	smp_read_barrier_depends(); +	if (owner) { +		/* +		 * Since delayed_put_task_struct() also drops the last +		 * task reference we can safely take a new reference +		 * while holding the rcu_read_lock(). +		 */ +		get_task_struct(owner); +	} +	rcu_read_unlock(); + +	if (owner) { +		mutex_lock(&owner->perf_event_mutex); +		/* +		 * We have to re-check the event->owner field, if it is cleared +		 * we raced with perf_event_exit_task(), acquiring the mutex +		 * ensured they're done, and we can proceed with freeing the +		 * event. +		 */ +		if (event->owner) +			list_del_init(&event->owner_entry); +		mutex_unlock(&owner->perf_event_mutex); +		put_task_struct(owner); +	} +  	return perf_event_release_kernel(event);  } @@ -5677,7 +5709,7 @@ SYSCALL_DEFINE5(perf_event_open,  	mutex_unlock(&ctx->mutex);  	event->owner = current; -	get_task_struct(current); +  	mutex_lock(¤t->perf_event_mutex);  	list_add_tail(&event->owner_entry, ¤t->perf_event_list);  	mutex_unlock(¤t->perf_event_mutex); @@ -5745,12 +5777,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,  	++ctx->generation;  	mutex_unlock(&ctx->mutex); -	event->owner = current; -	get_task_struct(current); -	mutex_lock(¤t->perf_event_mutex); -	list_add_tail(&event->owner_entry, ¤t->perf_event_list); -	mutex_unlock(¤t->perf_event_mutex); -  	return event;  err_free: @@ -5901,8 +5927,24 @@ again:   */  void perf_event_exit_task(struct task_struct *child)  { +	struct perf_event *event, *tmp;  	int ctxn; +	mutex_lock(&child->perf_event_mutex); +	list_for_each_entry_safe(event, tmp, &child->perf_event_list, +				 owner_entry) { +		list_del_init(&event->owner_entry); + +		/* +		 * Ensure the list deletion is visible before we clear +		 * the owner, closes a race against perf_release() where +		 * we need to serialize on the owner->perf_event_mutex. +		 */ +		smp_wmb(); +		event->owner = NULL; +	} +	mutex_unlock(&child->perf_event_mutex); +  	for_each_task_context_nr(ctxn)  		perf_event_exit_task_context(child, ctxn);  } @@ -6122,6 +6164,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn)  	struct perf_event *event;  	struct task_struct *parent = current;  	int inherited_all = 1; +	unsigned long flags;  	int ret = 0;  	child->perf_event_ctxp[ctxn] = NULL; @@ -6162,6 +6205,15 @@ int perf_event_init_context(struct task_struct *child, int ctxn)  			break;  	} +	/* +	 * We can't hold ctx->lock when iterating the ->flexible_group list due +	 * to allocations, but we need to prevent rotation because +	 * rotate_ctx() will change the list from interrupt context. +	 */ +	raw_spin_lock_irqsave(&parent_ctx->lock, flags); +	parent_ctx->rotate_disable = 1; +	raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); +  	list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {  		ret = inherit_task_group(event, parent, parent_ctx,  					 child, ctxn, &inherited_all); @@ -6169,6 +6221,10 @@ int perf_event_init_context(struct task_struct *child, int ctxn)  			break;  	} +	raw_spin_lock_irqsave(&parent_ctx->lock, flags); +	parent_ctx->rotate_disable = 0; +	raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); +  	child_ctx = child->perf_event_ctxp[ctxn];  	if (child_ctx && inherited_all) { @@ -6321,6 +6377,8 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)  void __init perf_event_init(void)  { +	int ret; +  	perf_event_init_all_cpus();  	init_srcu_struct(&pmus_srcu);  	perf_pmu_register(&perf_swevent); @@ -6328,4 +6386,7 @@ void __init perf_event_init(void)  	perf_pmu_register(&perf_task_clock);  	perf_tp_register();  	perf_cpu_notifier(perf_cpu_notify); + +	ret = init_hw_breakpoint(); +	WARN(ret, "hw_breakpoint initialization failed with: %d", ret);  } diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index c7a8f453919e..aeaa7f846821 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c @@ -121,10 +121,10 @@ static inline int pm_qos_get_value(struct pm_qos_object *o)  	switch (o->type) {  	case PM_QOS_MIN: -		return plist_last(&o->requests)->prio; +		return plist_first(&o->requests)->prio;  	case PM_QOS_MAX: -		return plist_first(&o->requests)->prio; +		return plist_last(&o->requests)->prio;  	default:  		/* runtime check for not using enum */ diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 6842eeba5879..05bb7173850e 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -37,13 +37,13 @@ static int check_clock(const clockid_t which_clock)  	if (pid == 0)  		return 0; -	read_lock(&tasklist_lock); +	rcu_read_lock();  	p = find_task_by_vpid(pid);  	if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ? -		   same_thread_group(p, current) : thread_group_leader(p))) { +		   same_thread_group(p, current) : has_group_leader_pid(p))) {  		error = -EINVAL;  	} -	read_unlock(&tasklist_lock); +	rcu_read_unlock();  	return error;  } @@ -390,7 +390,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)  	INIT_LIST_HEAD(&new_timer->it.cpu.entry); -	read_lock(&tasklist_lock); +	rcu_read_lock();  	if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {  		if (pid == 0) {  			p = current; @@ -404,7 +404,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)  			p = current->group_leader;  		} else {  			p = find_task_by_vpid(pid); -			if (p && !thread_group_leader(p)) +			if (p && !has_group_leader_pid(p))  				p = NULL;  		}  	} @@ -414,7 +414,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)  	} else {  		ret = -EINVAL;  	} -	read_unlock(&tasklist_lock); +	rcu_read_unlock();  	return ret;  } diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 29bff6117abc..a5aff3ebad38 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -246,9 +246,13 @@ config PM_OPS  	depends on PM_SLEEP || PM_RUNTIME  	default y +config ARCH_HAS_OPP +	bool +  config PM_OPP  	bool "Operating Performance Point (OPP) Layer library"  	depends on PM +	depends on ARCH_HAS_OPP  	---help---  	  SOCs have a standard set of tuples consisting of frequency and  	  voltage pairs that the device will support per voltage domain. This diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 657272e91d0a..048d0b514831 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -327,7 +327,6 @@ static int create_image(int platform_mode)  int hibernation_snapshot(int platform_mode)  {  	int error; -	gfp_t saved_mask;  	error = platform_begin(platform_mode);  	if (error) @@ -339,7 +338,7 @@ int hibernation_snapshot(int platform_mode)  		goto Close;  	suspend_console(); -	saved_mask = clear_gfp_allowed_mask(GFP_IOFS); +	pm_restrict_gfp_mask();  	error = dpm_suspend_start(PMSG_FREEZE);  	if (error)  		goto Recover_platform; @@ -348,7 +347,10 @@ int hibernation_snapshot(int platform_mode)  		goto Recover_platform;  	error = create_image(platform_mode); -	/* Control returns here after successful restore */ +	/* +	 * Control returns here (1) after the image has been created or the +	 * image creation has failed and (2) after a successful restore. +	 */   Resume_devices:  	/* We may need to release the preallocated image pages here. */ @@ -357,7 +359,10 @@ int hibernation_snapshot(int platform_mode)  	dpm_resume_end(in_suspend ?  		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); -	set_gfp_allowed_mask(saved_mask); + +	if (error || !in_suspend) +		pm_restore_gfp_mask(); +  	resume_console();   Close:  	platform_end(platform_mode); @@ -452,17 +457,16 @@ static int resume_target_kernel(bool platform_mode)  int hibernation_restore(int platform_mode)  {  	int error; -	gfp_t saved_mask;  	pm_prepare_console();  	suspend_console(); -	saved_mask = clear_gfp_allowed_mask(GFP_IOFS); +	pm_restrict_gfp_mask();  	error = dpm_suspend_start(PMSG_QUIESCE);  	if (!error) {  		error = resume_target_kernel(platform_mode);  		dpm_resume_end(PMSG_RECOVER);  	} -	set_gfp_allowed_mask(saved_mask); +	pm_restore_gfp_mask();  	resume_console();  	pm_restore_console();  	return error; @@ -476,7 +480,6 @@ int hibernation_restore(int platform_mode)  int hibernation_platform_enter(void)  {  	int error; -	gfp_t saved_mask;  	if (!hibernation_ops)  		return -ENOSYS; @@ -492,7 +495,6 @@ int hibernation_platform_enter(void)  	entering_platform_hibernation = true;  	suspend_console(); -	saved_mask = clear_gfp_allowed_mask(GFP_IOFS);  	error = dpm_suspend_start(PMSG_HIBERNATE);  	if (error) {  		if (hibernation_ops->recover) @@ -536,7 +538,6 @@ int hibernation_platform_enter(void)   Resume_devices:  	entering_platform_hibernation = false;  	dpm_resume_end(PMSG_RESTORE); -	set_gfp_allowed_mask(saved_mask);  	resume_console();   Close: @@ -646,6 +647,7 @@ int hibernate(void)  		swsusp_free();  		if (!error)  			power_down(); +		pm_restore_gfp_mask();  	} else {  		pr_debug("PM: Image restored successfully.\n");  	} diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 7335952ee473..ecf770509d0d 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -197,7 +197,6 @@ static int suspend_enter(suspend_state_t state)  int suspend_devices_and_enter(suspend_state_t state)  {  	int error; -	gfp_t saved_mask;  	if (!suspend_ops)  		return -ENOSYS; @@ -208,7 +207,7 @@ int suspend_devices_and_enter(suspend_state_t state)  			goto Close;  	}  	suspend_console(); -	saved_mask = clear_gfp_allowed_mask(GFP_IOFS); +	pm_restrict_gfp_mask();  	suspend_test_start();  	error = dpm_suspend_start(PMSG_SUSPEND);  	if (error) { @@ -225,7 +224,7 @@ int suspend_devices_and_enter(suspend_state_t state)  	suspend_test_start();  	dpm_resume_end(PMSG_RESUME);  	suspend_test_finish("resume devices"); -	set_gfp_allowed_mask(saved_mask); +	pm_restore_gfp_mask();  	resume_console();   Close:  	if (suspend_ops->end) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index a0e4a86ccf94..baf667bb2794 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -6,6 +6,7 @@   *   * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz>   * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> + * Copyright (C) 2010 Bojan Smojver <bojan@rexursive.com>   *   * This file is released under the GPLv2.   * @@ -753,30 +754,43 @@ static int load_image_lzo(struct swap_map_handle *handle,  {  	unsigned int m;  	int error = 0; +	struct bio *bio;  	struct timeval start;  	struct timeval stop;  	unsigned nr_pages; -	size_t off, unc_len, cmp_len; -	unsigned char *unc, *cmp, *page; +	size_t i, off, unc_len, cmp_len; +	unsigned char *unc, *cmp, *page[LZO_CMP_PAGES]; -	page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); -	if (!page) { -		printk(KERN_ERR "PM: Failed to allocate LZO page\n"); -		return -ENOMEM; +	for (i = 0; i < LZO_CMP_PAGES; i++) { +		page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); +		if (!page[i]) { +			printk(KERN_ERR "PM: Failed to allocate LZO page\n"); + +			while (i) +				free_page((unsigned long)page[--i]); + +			return -ENOMEM; +		}  	}  	unc = vmalloc(LZO_UNC_SIZE);  	if (!unc) {  		printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n"); -		free_page((unsigned long)page); + +		for (i = 0; i < LZO_CMP_PAGES; i++) +			free_page((unsigned long)page[i]); +  		return -ENOMEM;  	}  	cmp = vmalloc(LZO_CMP_SIZE);  	if (!cmp) {  		printk(KERN_ERR "PM: Failed to allocate LZO compressed\n"); +  		vfree(unc); -		free_page((unsigned long)page); +		for (i = 0; i < LZO_CMP_PAGES; i++) +			free_page((unsigned long)page[i]); +  		return -ENOMEM;  	} @@ -787,6 +801,7 @@ static int load_image_lzo(struct swap_map_handle *handle,  	if (!m)  		m = 1;  	nr_pages = 0; +	bio = NULL;  	do_gettimeofday(&start);  	error = snapshot_write_next(snapshot); @@ -794,11 +809,11 @@ static int load_image_lzo(struct swap_map_handle *handle,  		goto out_finish;  	for (;;) { -		error = swap_read_page(handle, page, NULL); /* sync */ +		error = swap_read_page(handle, page[0], NULL); /* sync */  		if (error)  			break; -		cmp_len = *(size_t *)page; +		cmp_len = *(size_t *)page[0];  		if (unlikely(!cmp_len ||  		             cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) {  			printk(KERN_ERR "PM: Invalid LZO compressed length\n"); @@ -806,13 +821,20 @@ static int load_image_lzo(struct swap_map_handle *handle,  			break;  		} -		memcpy(cmp, page, PAGE_SIZE); -		for (off = PAGE_SIZE; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) { -			error = swap_read_page(handle, page, NULL); /* sync */ +		for (off = PAGE_SIZE, i = 1; +		     off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) { +			error = swap_read_page(handle, page[i], &bio);  			if (error)  				goto out_finish; +		} -			memcpy(cmp + off, page, PAGE_SIZE); +		error = hib_wait_on_bio_chain(&bio); /* need all data now */ +		if (error) +			goto out_finish; + +		for (off = 0, i = 0; +		     off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) { +			memcpy(cmp + off, page[i], PAGE_SIZE);  		}  		unc_len = LZO_UNC_SIZE; @@ -857,7 +879,8 @@ out_finish:  	vfree(cmp);  	vfree(unc); -	free_page((unsigned long)page); +	for (i = 0; i < LZO_CMP_PAGES; i++) +		free_page((unsigned long)page[i]);  	return error;  } diff --git a/kernel/power/user.c b/kernel/power/user.c index e819e17877ca..1b2ea31e6bd8 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -263,6 +263,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,  	case SNAPSHOT_UNFREEZE:  		if (!data->frozen || data->ready)  			break; +		pm_restore_gfp_mask();  		thaw_processes();  		usermodehelper_enable();  		data->frozen = 0; @@ -275,6 +276,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,  			error = -EPERM;  			break;  		} +		pm_restore_gfp_mask();  		error = hibernation_snapshot(data->platform_support);  		if (!error)  			error = put_user(in_suspend, (int __user *)arg); diff --git a/kernel/sched.c b/kernel/sched.c index aa14a56f9d03..dc91a4d09ac3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -560,18 +560,8 @@ struct rq {  static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); -static inline -void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) -{ -	rq->curr->sched_class->check_preempt_curr(rq, p, flags); -	/* -	 * A queue event has occurred, and we're going to schedule.  In -	 * this case, we can save a useless back to back clock update. -	 */ -	if (test_tsk_need_resched(p)) -		rq->skip_clock_update = 1; -} +static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);  static inline int cpu_of(struct rq *rq)  { @@ -2118,6 +2108,31 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,  		p->sched_class->prio_changed(rq, p, oldprio, running);  } +static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) +{ +	const struct sched_class *class; + +	if (p->sched_class == rq->curr->sched_class) { +		rq->curr->sched_class->check_preempt_curr(rq, p, flags); +	} else { +		for_each_class(class) { +			if (class == rq->curr->sched_class) +				break; +			if (class == p->sched_class) { +				resched_task(rq->curr); +				break; +			} +		} +	} + +	/* +	 * A queue event has occurred, and we're going to schedule.  In +	 * this case, we can save a useless back to back clock update. +	 */ +	if (test_tsk_need_resched(rq->curr)) +		rq->skip_clock_update = 1; +} +  #ifdef CONFIG_SMP  /*   * Is this task likely cache-hot: @@ -6960,6 +6975,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)  	if (cpu != group_first_cpu(sd->groups))  		return; +	sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups)); +  	child = sd->child;  	sd->groups->cpu_power = 0; diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f4f6a8326dd0..00ebd7686676 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1654,12 +1654,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_  	struct cfs_rq *cfs_rq = task_cfs_rq(curr);  	int scale = cfs_rq->nr_running >= sched_nr_latency; -	if (unlikely(rt_prio(p->prio))) -		goto preempt; - -	if (unlikely(p->sched_class != &fair_sched_class)) -		return; -  	if (unlikely(se == pse))  		return; @@ -1764,10 +1758,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p,  	set_task_cpu(p, this_cpu);  	activate_task(this_rq, p, 0);  	check_preempt_curr(this_rq, p, 0); - -	/* re-arm NEWIDLE balancing when moving tasks */ -	src_rq->avg_idle = this_rq->avg_idle = 2*sysctl_sched_migration_cost; -	this_rq->idle_stamp = 0;  }  /* @@ -2035,13 +2025,16 @@ struct sd_lb_stats {  	unsigned long this_load_per_task;  	unsigned long this_nr_running;  	unsigned long this_has_capacity; +	unsigned int  this_idle_cpus;  	/* Statistics of the busiest group */ +	unsigned int  busiest_idle_cpus;  	unsigned long max_load;  	unsigned long busiest_load_per_task;  	unsigned long busiest_nr_running;  	unsigned long busiest_group_capacity;  	unsigned long busiest_has_capacity; +	unsigned int  busiest_group_weight;  	int group_imb; /* Is there imbalance in this sd */  #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) @@ -2063,6 +2056,8 @@ struct sg_lb_stats {  	unsigned long sum_nr_running; /* Nr tasks running in the group */  	unsigned long sum_weighted_load; /* Weighted load of group's tasks */  	unsigned long group_capacity; +	unsigned long idle_cpus; +	unsigned long group_weight;  	int group_imb; /* Is there an imbalance in the group ? */  	int group_has_capacity; /* Is there extra capacity in the group? */  }; @@ -2431,7 +2426,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,  		sgs->group_load += load;  		sgs->sum_nr_running += rq->nr_running;  		sgs->sum_weighted_load += weighted_cpuload(i); - +		if (idle_cpu(i)) +			sgs->idle_cpus++;  	}  	/* @@ -2469,6 +2465,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,  	sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);  	if (!sgs->group_capacity)  		sgs->group_capacity = fix_small_capacity(sd, group); +	sgs->group_weight = group->group_weight;  	if (sgs->group_capacity > sgs->sum_nr_running)  		sgs->group_has_capacity = 1; @@ -2576,13 +2573,16 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,  			sds->this_nr_running = sgs.sum_nr_running;  			sds->this_load_per_task = sgs.sum_weighted_load;  			sds->this_has_capacity = sgs.group_has_capacity; +			sds->this_idle_cpus = sgs.idle_cpus;  		} else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) {  			sds->max_load = sgs.avg_load;  			sds->busiest = sg;  			sds->busiest_nr_running = sgs.sum_nr_running; +			sds->busiest_idle_cpus = sgs.idle_cpus;  			sds->busiest_group_capacity = sgs.group_capacity;  			sds->busiest_load_per_task = sgs.sum_weighted_load;  			sds->busiest_has_capacity = sgs.group_has_capacity; +			sds->busiest_group_weight = sgs.group_weight;  			sds->group_imb = sgs.group_imb;  		} @@ -2860,8 +2860,26 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,  	if (sds.this_load >= sds.avg_load)  		goto out_balanced; -	if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) -		goto out_balanced; +	/* +	 * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative. +	 * And to check for busy balance use !idle_cpu instead of +	 * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE +	 * even when they are idle. +	 */ +	if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) { +		if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) +			goto out_balanced; +	} else { +		/* +		 * This cpu is idle. If the busiest group load doesn't +		 * have more tasks than the number of available cpu's and +		 * there is no imbalance between this and busiest group +		 * wrt to idle cpu's, it is balanced. +		 */ +		if ((sds.this_idle_cpus  <= sds.busiest_idle_cpus + 1) && +		    sds.busiest_nr_running <= sds.busiest_group_weight) +			goto out_balanced; +	}  force_balance:  	/* Looks like there is an imbalance. Compute it */ @@ -3197,8 +3215,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq)  		interval = msecs_to_jiffies(sd->balance_interval);  		if (time_after(next_balance, sd->last_balance + interval))  			next_balance = sd->last_balance + interval; -		if (pulled_task) +		if (pulled_task) { +			this_rq->idle_stamp = 0;  			break; +		}  	}  	raw_spin_lock(&this_rq->lock); diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c index 45bddc0c1048..2bf6b47058c1 100644 --- a/kernel/sched_stoptask.c +++ b/kernel/sched_stoptask.c @@ -19,14 +19,14 @@ select_task_rq_stop(struct rq *rq, struct task_struct *p,  static void  check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags)  { -	resched_task(rq->curr); /* we preempt everything */ +	/* we're never preempted */  }  static struct task_struct *pick_next_task_stop(struct rq *rq)  {  	struct task_struct *stop = rq->stop; -	if (stop && stop->state == TASK_RUNNING) +	if (stop && stop->se.on_rq)  		return stop;  	return NULL; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b65bf634035e..5abfa1518554 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -702,7 +702,6 @@ static struct ctl_table kern_table[] = {  		.extra1		= &zero,  		.extra2		= &ten_thousand,  	}, -#endif  	{  		.procname	= "dmesg_restrict",  		.data		= &dmesg_restrict, @@ -712,6 +711,7 @@ static struct ctl_table kern_table[] = {  		.extra1		= &zero,  		.extra2		= &one,  	}, +#endif  	{  		.procname	= "ngroups_max",  		.data		= &ngroups_max, diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e04b8bcdef88..ea37e2ff4164 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -126,7 +126,7 @@ if FTRACE  config FUNCTION_TRACER  	bool "Kernel Function Tracer"  	depends on HAVE_FUNCTION_TRACER -	select FRAME_POINTER if (!ARM_UNWIND) +	select FRAME_POINTER if !ARM_UNWIND && !S390  	select KALLSYMS  	select GENERIC_TRACER  	select CONTEXT_SWITCH_TRACER diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 82d9b8106cd0..c380612273bf 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -17,7 +17,6 @@  #include <linux/writeback.h>  #include <linux/kallsyms.h>  #include <linux/seq_file.h> -#include <linux/smp_lock.h>  #include <linux/notifier.h>  #include <linux/irqflags.h>  #include <linux/debugfs.h> @@ -1284,6 +1283,8 @@ void trace_dump_stack(void)  	__ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());  } +static DEFINE_PER_CPU(int, user_stack_count); +  void  ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)  { @@ -1302,6 +1303,18 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)  	if (unlikely(in_nmi()))  		return; +	/* +	 * prevent recursion, since the user stack tracing may +	 * trigger other kernel events. +	 */ +	preempt_disable(); +	if (__this_cpu_read(user_stack_count)) +		goto out; + +	__this_cpu_inc(user_stack_count); + + +  	event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,  					  sizeof(*entry), flags, pc);  	if (!event) @@ -1319,6 +1332,11 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)  	save_stack_trace_user(&trace);  	if (!filter_check_discard(call, entry, buffer, event))  		ring_buffer_unlock_commit(buffer, event); + +	__this_cpu_dec(user_stack_count); + + out: +	preempt_enable();  }  #ifdef UNUSED  | 
