diff options
| author | Ingo Molnar <mingo@kernel.org> | 2012-12-07 12:15:33 +0100 | 
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2012-12-07 12:15:33 +0100 | 
| commit | 222e82bef4bd520a31d48c31ab24e49dd46daa46 (patch) | |
| tree | b6e73cad8e0b3a1c3e1acc537789e97aadaefa92 /kernel | |
| parent | 38ca9c927c7d3db61f57e3d3a9334958c3af6e9a (diff) | |
| parent | 18a2f371f5edf41810f6469cb9be39931ef9deb9 (diff) | |
Merge branch 'linus' into sched/core
Pick up the autogroups fix and other fixes.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/Makefile | 6 | ||||
| -rw-r--r-- | kernel/cgroup.c | 41 | ||||
| -rw-r--r-- | kernel/events/hw_breakpoint.c | 12 | ||||
| -rw-r--r-- | kernel/futex.c | 59 | ||||
| -rw-r--r-- | kernel/modsign_pubkey.c | 4 | ||||
| -rw-r--r-- | kernel/module.c | 27 | ||||
| -rw-r--r-- | kernel/module_signing.c | 14 | ||||
| -rw-r--r-- | kernel/pid_namespace.c | 12 | ||||
| -rw-r--r-- | kernel/sched/auto_group.c | 4 | ||||
| -rw-r--r-- | kernel/sched/auto_group.h | 5 | ||||
| -rw-r--r-- | kernel/watchdog.c | 7 | ||||
| -rw-r--r-- | kernel/workqueue.c | 24 | 
12 files changed, 116 insertions, 99 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 0dfeca4324ee..86e3285ae7e5 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -174,10 +174,8 @@ signing_key.priv signing_key.x509: x509.genkey  	@echo "###"  	@echo "### If this takes a long time, you might wish to run rngd in the"  	@echo "### background to keep the supply of entropy topped up.  It" -	@echo "### needs to be run as root, and should use a hardware random" -	@echo "### number generator if one is available, eg:" -	@echo "###" -	@echo "###     rngd -r /dev/hwrandom" +	@echo "### needs to be run as root, and uses a hardware random" +	@echo "### number generator if one is available."  	@echo "###"  	openssl req -new -nodes -utf8 $(sign_key_with_hash) -days 36500 -batch \  		-x509 -config x509.genkey \ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 13774b3b39aa..f24f724620dd 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1962,9 +1962,8 @@ static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,  	 * trading it for newcg is protected by cgroup_mutex, we're safe to drop  	 * it here; it will be freed under RCU.  	 */ -	put_css_set(oldcg); -  	set_bit(CGRP_RELEASABLE, &oldcgrp->flags); +	put_css_set(oldcg);  }  /** @@ -4815,31 +4814,20 @@ static const struct file_operations proc_cgroupstats_operations = {   *   * A pointer to the shared css_set was automatically copied in   * fork.c by dup_task_struct().  However, we ignore that copy, since - * it was not made under the protection of RCU, cgroup_mutex or - * threadgroup_change_begin(), so it might no longer be a valid - * cgroup pointer.  cgroup_attach_task() might have already changed - * current->cgroups, allowing the previously referenced cgroup - * group to be removed and freed. - * - * Outside the pointer validity we also need to process the css_set - * inheritance between threadgoup_change_begin() and - * threadgoup_change_end(), this way there is no leak in any process - * wide migration performed by cgroup_attach_proc() that could otherwise - * miss a thread because it is too early or too late in the fork stage. + * it was not made under the protection of RCU or cgroup_mutex, so + * might no longer be a valid cgroup pointer.  cgroup_attach_task() might + * have already changed current->cgroups, allowing the previously + * referenced cgroup group to be removed and freed.   *   * At the point that cgroup_fork() is called, 'current' is the parent   * task, and the passed argument 'child' points to the child task.   */  void cgroup_fork(struct task_struct *child)  { -	/* -	 * We don't need to task_lock() current because current->cgroups -	 * can't be changed concurrently here. The parent obviously hasn't -	 * exited and called cgroup_exit(), and we are synchronized against -	 * cgroup migration through threadgroup_change_begin(). -	 */ +	task_lock(current);  	child->cgroups = current->cgroups;  	get_css_set(child->cgroups); +	task_unlock(current);  	INIT_LIST_HEAD(&child->cg_list);  } @@ -4895,19 +4883,10 @@ void cgroup_post_fork(struct task_struct *child)  	 */  	if (use_task_css_set_links) {  		write_lock(&css_set_lock); -		if (list_empty(&child->cg_list)) { -			/* -			 * It's safe to use child->cgroups without task_lock() -			 * here because we are protected through -			 * threadgroup_change_begin() against concurrent -			 * css_set change in cgroup_task_migrate(). Also -			 * the task can't exit at that point until -			 * wake_up_new_task() is called, so we are protected -			 * against cgroup_exit() setting child->cgroup to -			 * init_css_set. -			 */ +		task_lock(child); +		if (list_empty(&child->cg_list))  			list_add(&child->cg_list, &child->cgroups->tasks); -		} +		task_unlock(child);  		write_unlock(&css_set_lock);  	}  } diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index 9a7b487c6fe2..fe8a916507ed 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -111,14 +111,16 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)   * Count the number of breakpoints of the same type and same task.   * The given event must be not on the list.   */ -static int task_bp_pinned(struct perf_event *bp, enum bp_type_idx type) +static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)  {  	struct task_struct *tsk = bp->hw.bp_target;  	struct perf_event *iter;  	int count = 0;  	list_for_each_entry(iter, &bp_task_head, hw.bp_list) { -		if (iter->hw.bp_target == tsk && find_slot_idx(iter) == type) +		if (iter->hw.bp_target == tsk && +		    find_slot_idx(iter) == type && +		    cpu == iter->cpu)  			count += hw_breakpoint_weight(iter);  	} @@ -141,7 +143,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,  		if (!tsk)  			slots->pinned += max_task_bp_pinned(cpu, type);  		else -			slots->pinned += task_bp_pinned(bp, type); +			slots->pinned += task_bp_pinned(cpu, bp, type);  		slots->flexible = per_cpu(nr_bp_flexible[type], cpu);  		return; @@ -154,7 +156,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,  		if (!tsk)  			nr += max_task_bp_pinned(cpu, type);  		else -			nr += task_bp_pinned(bp, type); +			nr += task_bp_pinned(cpu, bp, type);  		if (nr > slots->pinned)  			slots->pinned = nr; @@ -188,7 +190,7 @@ static void toggle_bp_task_slot(struct perf_event *bp, int cpu, bool enable,  	int old_idx = 0;  	int idx = 0; -	old_count = task_bp_pinned(bp, type); +	old_count = task_bp_pinned(cpu, bp, type);  	old_idx = old_count - 1;  	idx = old_idx + weight; diff --git a/kernel/futex.c b/kernel/futex.c index 3717e7b306e0..19eb089ca003 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -716,7 +716,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,  				struct futex_pi_state **ps,  				struct task_struct *task, int set_waiters)  { -	int lock_taken, ret, ownerdied = 0; +	int lock_taken, ret, force_take = 0;  	u32 uval, newval, curval, vpid = task_pid_vnr(task);  retry: @@ -755,17 +755,15 @@ retry:  	newval = curval | FUTEX_WAITERS;  	/* -	 * There are two cases, where a futex might have no owner (the -	 * owner TID is 0): OWNER_DIED. We take over the futex in this -	 * case. We also do an unconditional take over, when the owner -	 * of the futex died. -	 * -	 * This is safe as we are protected by the hash bucket lock ! +	 * Should we force take the futex? See below.  	 */ -	if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { -		/* Keep the OWNER_DIED bit */ +	if (unlikely(force_take)) { +		/* +		 * Keep the OWNER_DIED and the WAITERS bit and set the +		 * new TID value. +		 */  		newval = (curval & ~FUTEX_TID_MASK) | vpid; -		ownerdied = 0; +		force_take = 0;  		lock_taken = 1;  	} @@ -775,7 +773,7 @@ retry:  		goto retry;  	/* -	 * We took the lock due to owner died take over. +	 * We took the lock due to forced take over.  	 */  	if (unlikely(lock_taken))  		return 1; @@ -790,20 +788,25 @@ retry:  		switch (ret) {  		case -ESRCH:  			/* -			 * No owner found for this futex. Check if the -			 * OWNER_DIED bit is set to figure out whether -			 * this is a robust futex or not. +			 * We failed to find an owner for this +			 * futex. So we have no pi_state to block +			 * on. This can happen in two cases: +			 * +			 * 1) The owner died +			 * 2) A stale FUTEX_WAITERS bit +			 * +			 * Re-read the futex value.  			 */  			if (get_futex_value_locked(&curval, uaddr))  				return -EFAULT;  			/* -			 * We simply start over in case of a robust -			 * futex. The code above will take the futex -			 * and return happy. +			 * If the owner died or we have a stale +			 * WAITERS bit the owner TID in the user space +			 * futex is 0.  			 */ -			if (curval & FUTEX_OWNER_DIED) { -				ownerdied = 1; +			if (!(curval & FUTEX_TID_MASK)) { +				force_take = 1;  				goto retry;  			}  		default: @@ -840,6 +843,9 @@ static void wake_futex(struct futex_q *q)  {  	struct task_struct *p = q->task; +	if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n")) +		return; +  	/*  	 * We set q->lock_ptr = NULL _before_ we wake up the task. If  	 * a non-futex wake up happens on another CPU then the task @@ -1075,6 +1081,10 @@ retry_private:  	plist_for_each_entry_safe(this, next, head, list) {  		if (match_futex (&this->key, &key1)) { +			if (this->pi_state || this->rt_waiter) { +				ret = -EINVAL; +				goto out_unlock; +			}  			wake_futex(this);  			if (++ret >= nr_wake)  				break; @@ -1087,6 +1097,10 @@ retry_private:  		op_ret = 0;  		plist_for_each_entry_safe(this, next, head, list) {  			if (match_futex (&this->key, &key2)) { +				if (this->pi_state || this->rt_waiter) { +					ret = -EINVAL; +					goto out_unlock; +				}  				wake_futex(this);  				if (++op_ret >= nr_wake2)  					break; @@ -1095,6 +1109,7 @@ retry_private:  		ret += op_ret;  	} +out_unlock:  	double_unlock_hb(hb1, hb2);  out_put_keys:  	put_futex_key(&key2); @@ -1384,9 +1399,13 @@ retry_private:  		/*  		 * FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always  		 * be paired with each other and no other futex ops. +		 * +		 * We should never be requeueing a futex_q with a pi_state, +		 * which is awaiting a futex_unlock_pi().  		 */  		if ((requeue_pi && !this->rt_waiter) || -		    (!requeue_pi && this->rt_waiter)) { +		    (!requeue_pi && this->rt_waiter) || +		    this->pi_state) {  			ret = -EINVAL;  			break;  		} diff --git a/kernel/modsign_pubkey.c b/kernel/modsign_pubkey.c index 4646eb2c3820..767e559dfb10 100644 --- a/kernel/modsign_pubkey.c +++ b/kernel/modsign_pubkey.c @@ -21,10 +21,10 @@ struct key *modsign_keyring;  extern __initdata const u8 modsign_certificate_list[];  extern __initdata const u8 modsign_certificate_list_end[];  asm(".section .init.data,\"aw\"\n" -    "modsign_certificate_list:\n" +    SYMBOL_PREFIX "modsign_certificate_list:\n"      ".incbin \"signing_key.x509\"\n"      ".incbin \"extra_certificates\"\n" -    "modsign_certificate_list_end:" +    SYMBOL_PREFIX "modsign_certificate_list_end:"      );  /* diff --git a/kernel/module.c b/kernel/module.c index 6085f5ef88ea..6e48c3a43599 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2293,12 +2293,17 @@ static void layout_symtab(struct module *mod, struct load_info *info)  	src = (void *)info->hdr + symsect->sh_offset;  	nsrc = symsect->sh_size / sizeof(*src); +	/* strtab always starts with a nul, so offset 0 is the empty string. */ +	strtab_size = 1; +  	/* Compute total space required for the core symbols' strtab. */ -	for (ndst = i = strtab_size = 1; i < nsrc; ++i, ++src) -		if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) { -			strtab_size += strlen(&info->strtab[src->st_name]) + 1; +	for (ndst = i = 0; i < nsrc; i++) { +		if (i == 0 || +		    is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) { +			strtab_size += strlen(&info->strtab[src[i].st_name])+1;  			ndst++;  		} +	}  	/* Append room for core symbols at end of core part. */  	info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); @@ -2332,15 +2337,15 @@ static void add_kallsyms(struct module *mod, const struct load_info *info)  	mod->core_symtab = dst = mod->module_core + info->symoffs;  	mod->core_strtab = s = mod->module_core + info->stroffs;  	src = mod->symtab; -	*dst = *src;  	*s++ = 0; -	for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) { -		if (!is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) -			continue; - -		dst[ndst] = *src; -		dst[ndst++].st_name = s - mod->core_strtab; -		s += strlcpy(s, &mod->strtab[src->st_name], KSYM_NAME_LEN) + 1; +	for (ndst = i = 0; i < mod->num_symtab; i++) { +		if (i == 0 || +		    is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) { +			dst[ndst] = src[i]; +			dst[ndst++].st_name = s - mod->core_strtab; +			s += strlcpy(s, &mod->strtab[src[i].st_name], +				     KSYM_NAME_LEN) + 1; +		}  	}  	mod->core_num_syms = ndst;  } diff --git a/kernel/module_signing.c b/kernel/module_signing.c index ea1b1df5dbb0..f2970bddc5ea 100644 --- a/kernel/module_signing.c +++ b/kernel/module_signing.c @@ -27,13 +27,13 @@   *	- Information block   */  struct module_signature { -	enum pkey_algo		algo : 8;	/* Public-key crypto algorithm */ -	enum pkey_hash_algo	hash : 8;	/* Digest algorithm */ -	enum pkey_id_type	id_type : 8;	/* Key identifier type */ -	u8			signer_len;	/* Length of signer's name */ -	u8			key_id_len;	/* Length of key identifier */ -	u8			__pad[3]; -	__be32			sig_len;	/* Length of signature data */ +	u8	algo;		/* Public-key crypto algorithm [enum pkey_algo] */ +	u8	hash;		/* Digest algorithm [enum pkey_hash_algo] */ +	u8	id_type;	/* Key identifier type [enum pkey_id_type] */ +	u8	signer_len;	/* Length of signer's name */ +	u8	key_id_len;	/* Length of key identifier */ +	u8	__pad[3]; +	__be32	sig_len;	/* Length of signature data */  };  /* diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index eb00be205811..7b07cc0dfb75 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -71,12 +71,22 @@ err_alloc:  	return NULL;  } +/* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ +#define MAX_PID_NS_LEVEL 32 +  static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns)  {  	struct pid_namespace *ns;  	unsigned int level = parent_pid_ns->level + 1; -	int i, err = -ENOMEM; +	int i; +	int err; + +	if (level > MAX_PID_NS_LEVEL) { +		err = -EINVAL; +		goto out; +	} +	err = -ENOMEM;  	ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);  	if (ns == NULL)  		goto out; diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c index 0984a21076a3..15f60d01198b 100644 --- a/kernel/sched/auto_group.c +++ b/kernel/sched/auto_group.c @@ -143,15 +143,11 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag)  	p->signal->autogroup = autogroup_kref_get(ag); -	if (!ACCESS_ONCE(sysctl_sched_autogroup_enabled)) -		goto out; -  	t = p;  	do {  		sched_move_task(t);  	} while_each_thread(p, t); -out:  	unlock_task_sighand(p, &flags);  	autogroup_kref_put(prev);  } diff --git a/kernel/sched/auto_group.h b/kernel/sched/auto_group.h index 8bd047142816..443232ebbb53 100644 --- a/kernel/sched/auto_group.h +++ b/kernel/sched/auto_group.h @@ -4,11 +4,6 @@  #include <linux/rwsem.h>  struct autogroup { -	/* -	 * reference doesn't mean how many thread attach to this -	 * autogroup now. It just stands for the number of task -	 * could use this autogroup. -	 */  	struct kref		kref;  	struct task_group	*tg;  	struct rw_semaphore	lock; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 9d4c8d5a1f53..c8c21be11ab4 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -116,7 +116,7 @@ static unsigned long get_timestamp(int this_cpu)  	return cpu_clock(this_cpu) >> 30LL;  /* 2^30 ~= 10^9 */  } -static unsigned long get_sample_period(void) +static u64 get_sample_period(void)  {  	/*  	 * convert watchdog_thresh from seconds to ns @@ -125,7 +125,7 @@ static unsigned long get_sample_period(void)  	 * and hard thresholds) to increment before the  	 * hardlockup detector generates a warning  	 */ -	return get_softlockup_thresh() * (NSEC_PER_SEC / 5); +	return get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);  }  /* Commands for resetting the watchdog */ @@ -368,6 +368,9 @@ static void watchdog_disable(unsigned int cpu)  {  	struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); +	if (!watchdog_enabled) +		return; +  	watchdog_set_prio(SCHED_NORMAL, 0);  	hrtimer_cancel(hrtimer);  	/* disable the perf event */ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index d951daa0ca9a..1dae900df798 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1361,8 +1361,19 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,  	WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||  		     timer->data != (unsigned long)dwork); -	BUG_ON(timer_pending(timer)); -	BUG_ON(!list_empty(&work->entry)); +	WARN_ON_ONCE(timer_pending(timer)); +	WARN_ON_ONCE(!list_empty(&work->entry)); + +	/* +	 * If @delay is 0, queue @dwork->work immediately.  This is for +	 * both optimization and correctness.  The earliest @timer can +	 * expire is on the closest next tick and delayed_work users depend +	 * on that there's no such delay when @delay is 0. +	 */ +	if (!delay) { +		__queue_work(cpu, wq, &dwork->work); +		return; +	}  	timer_stats_timer_set_start_info(&dwork->timer); @@ -1417,9 +1428,6 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,  	bool ret = false;  	unsigned long flags; -	if (!delay) -		return queue_work_on(cpu, wq, &dwork->work); -  	/* read the comment in __queue_work() */  	local_irq_save(flags); @@ -2407,8 +2415,10 @@ static int rescuer_thread(void *__wq)  repeat:  	set_current_state(TASK_INTERRUPTIBLE); -	if (kthread_should_stop()) +	if (kthread_should_stop()) { +		__set_current_state(TASK_RUNNING);  		return 0; +	}  	/*  	 * See whether any cpu is asking for help.  Unbounded @@ -2982,7 +2992,7 @@ bool cancel_delayed_work(struct delayed_work *dwork)  	set_work_cpu_and_clear_pending(&dwork->work, work_cpu(&dwork->work));  	local_irq_restore(flags); -	return true; +	return ret;  }  EXPORT_SYMBOL(cancel_delayed_work);  | 
