diff options
| author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2020-05-25 09:11:19 +0200 | 
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2020-05-25 09:11:19 +0200 | 
| commit | c2312ff575fcaed96889d5bb5392afcc604c9442 (patch) | |
| tree | 475d63360ee17732fac41c7c1e4a1636ed92e7cb /kernel | |
| parent | a3975dea1696b7c81319dc4b66e3c378dd47ccfb (diff) | |
| parent | 9cb1fd0efd195590b828b9b865421ad345a4a145 (diff) | |
Merge 5.7-rc7 into staging-next
We need the staging/iio fixes in here as well.
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/bpf/arraymap.c | 7 | ||||
| -rw-r--r-- | kernel/bpf/syscall.c | 21 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 21 | ||||
| -rw-r--r-- | kernel/fork.c | 13 | ||||
| -rw-r--r-- | kernel/sched/debug.c | 4 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 49 | ||||
| -rw-r--r-- | kernel/trace/bpf_trace.c | 100 | ||||
| -rw-r--r-- | kernel/trace/ftrace_internal.h | 22 | ||||
| -rw-r--r-- | kernel/trace/preemptirq_delay_test.c | 12 | ||||
| -rw-r--r-- | kernel/trace/ring_buffer.c | 34 | ||||
| -rw-r--r-- | kernel/umh.c | 6 | 
11 files changed, 184 insertions, 105 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 95d77770353c..1d6120fd5ba6 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -486,7 +486,12 @@ static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)  	if (!(map->map_flags & BPF_F_MMAPABLE))  		return -EINVAL; -	return remap_vmalloc_range(vma, array_map_vmalloc_addr(array), pgoff); +	if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) > +	    PAGE_ALIGN((u64)array->map.max_entries * array->elem_size)) +		return -EINVAL; + +	return remap_vmalloc_range(vma, array_map_vmalloc_addr(array), +				   vma->vm_pgoff + pgoff);  }  const struct bpf_map_ops array_map_ops = { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7626b8024471..4e6dee19a668 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -623,9 +623,20 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)  	mutex_lock(&map->freeze_mutex); -	if ((vma->vm_flags & VM_WRITE) && map->frozen) { -		err = -EPERM; -		goto out; +	if (vma->vm_flags & VM_WRITE) { +		if (map->frozen) { +			err = -EPERM; +			goto out; +		} +		/* map is meant to be read-only, so do not allow mapping as +		 * writable, because it's possible to leak a writable page +		 * reference and allows user-space to still modify it after +		 * freezing, while verifier will assume contents do not change +		 */ +		if (map->map_flags & BPF_F_RDONLY_PROG) { +			err = -EACCES; +			goto out; +		}  	}  	/* set default open/close callbacks */ @@ -1485,8 +1496,10 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)  	if (err)  		goto free_value; -	if (copy_to_user(uvalue, value, value_size) != 0) +	if (copy_to_user(uvalue, value, value_size) != 0) { +		err = -EFAULT;  		goto free_value; +	}  	err = 0; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index fa1d8245b925..8d7ee40e2748 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4340,7 +4340,9 @@ static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,  	if (ret_type != RET_INTEGER ||  	    (func_id != BPF_FUNC_get_stack && -	     func_id != BPF_FUNC_probe_read_str)) +	     func_id != BPF_FUNC_probe_read_str && +	     func_id != BPF_FUNC_probe_read_kernel_str && +	     func_id != BPF_FUNC_probe_read_user_str))  		return;  	ret_reg->smax_value = meta->msize_max_value; @@ -7059,6 +7061,23 @@ static int check_return_code(struct bpf_verifier_env *env)  			return 0;  		range = tnum_const(0);  		break; +	case BPF_PROG_TYPE_TRACING: +		switch (env->prog->expected_attach_type) { +		case BPF_TRACE_FENTRY: +		case BPF_TRACE_FEXIT: +			range = tnum_const(0); +			break; +		case BPF_TRACE_RAW_TP: +		case BPF_MODIFY_RETURN: +			return 0; +		default: +			return -ENOTSUPP; +		} +		break; +	case BPF_PROG_TYPE_EXT: +		/* freplace program can return anything as its return value +		 * depends on the to-be-replaced kernel func or bpf program. +		 */  	default:  		return 0;  	} diff --git a/kernel/fork.c b/kernel/fork.c index 8c700f881d92..48ed22774efa 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2486,11 +2486,11 @@ long do_fork(unsigned long clone_flags,  	      int __user *child_tidptr)  {  	struct kernel_clone_args args = { -		.flags		= (clone_flags & ~CSIGNAL), +		.flags		= (lower_32_bits(clone_flags) & ~CSIGNAL),  		.pidfd		= parent_tidptr,  		.child_tid	= child_tidptr,  		.parent_tid	= parent_tidptr, -		.exit_signal	= (clone_flags & CSIGNAL), +		.exit_signal	= (lower_32_bits(clone_flags) & CSIGNAL),  		.stack		= stack_start,  		.stack_size	= stack_size,  	}; @@ -2508,8 +2508,9 @@ long do_fork(unsigned long clone_flags,  pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)  {  	struct kernel_clone_args args = { -		.flags		= ((flags | CLONE_VM | CLONE_UNTRACED) & ~CSIGNAL), -		.exit_signal	= (flags & CSIGNAL), +		.flags		= ((lower_32_bits(flags) | CLONE_VM | +				    CLONE_UNTRACED) & ~CSIGNAL), +		.exit_signal	= (lower_32_bits(flags) & CSIGNAL),  		.stack		= (unsigned long)fn,  		.stack_size	= (unsigned long)arg,  	}; @@ -2570,11 +2571,11 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,  #endif  {  	struct kernel_clone_args args = { -		.flags		= (clone_flags & ~CSIGNAL), +		.flags		= (lower_32_bits(clone_flags) & ~CSIGNAL),  		.pidfd		= parent_tidptr,  		.child_tid	= child_tidptr,  		.parent_tid	= parent_tidptr, -		.exit_signal	= (clone_flags & CSIGNAL), +		.exit_signal	= (lower_32_bits(clone_flags) & CSIGNAL),  		.stack		= newsp,  		.tls		= tls,  	}; diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index a562df57a86e..239970b991c0 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -948,8 +948,8 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,  	P(se.avg.util_est.enqueued);  #endif  #ifdef CONFIG_UCLAMP_TASK -	__PS("uclamp.min", p->uclamp[UCLAMP_MIN].value); -	__PS("uclamp.max", p->uclamp[UCLAMP_MAX].value); +	__PS("uclamp.min", p->uclamp_req[UCLAMP_MIN].value); +	__PS("uclamp.max", p->uclamp_req[UCLAMP_MAX].value);  	__PS("effective uclamp.min", uclamp_eff_value(p, UCLAMP_MIN));  	__PS("effective uclamp.max", uclamp_eff_value(p, UCLAMP_MAX));  #endif diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 02f323b85b6d..538ba5d94e99 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4774,7 +4774,6 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)  	struct rq *rq = rq_of(cfs_rq);  	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);  	struct sched_entity *se; -	int enqueue = 1;  	long task_delta, idle_task_delta;  	se = cfs_rq->tg->se[cpu_of(rq)]; @@ -4798,26 +4797,44 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)  	idle_task_delta = cfs_rq->idle_h_nr_running;  	for_each_sched_entity(se) {  		if (se->on_rq) -			enqueue = 0; +			break; +		cfs_rq = cfs_rq_of(se); +		enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP); + +		cfs_rq->h_nr_running += task_delta; +		cfs_rq->idle_h_nr_running += idle_task_delta; + +		/* end evaluation on encountering a throttled cfs_rq */ +		if (cfs_rq_throttled(cfs_rq)) +			goto unthrottle_throttle; +	} +	for_each_sched_entity(se) {  		cfs_rq = cfs_rq_of(se); -		if (enqueue) { -			enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP); -		} else { -			update_load_avg(cfs_rq, se, 0); -			se_update_runnable(se); -		} + +		update_load_avg(cfs_rq, se, UPDATE_TG); +		se_update_runnable(se);  		cfs_rq->h_nr_running += task_delta;  		cfs_rq->idle_h_nr_running += idle_task_delta; + +		/* end evaluation on encountering a throttled cfs_rq */  		if (cfs_rq_throttled(cfs_rq)) -			break; +			goto unthrottle_throttle; + +		/* +		 * One parent has been throttled and cfs_rq removed from the +		 * list. Add it back to not break the leaf list. +		 */ +		if (throttled_hierarchy(cfs_rq)) +			list_add_leaf_cfs_rq(cfs_rq);  	} -	if (!se) -		add_nr_running(rq, task_delta); +	/* At this point se is NULL and we are at root level*/ +	add_nr_running(rq, task_delta); +unthrottle_throttle:  	/*  	 * The cfs_rq_throttled() breaks in the above iteration can result in  	 * incomplete leaf list maintenance, resulting in triggering the @@ -4826,7 +4843,8 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)  	for_each_sched_entity(se) {  		cfs_rq = cfs_rq_of(se); -		list_add_leaf_cfs_rq(cfs_rq); +		if (list_add_leaf_cfs_rq(cfs_rq)) +			break;  	}  	assert_list_leaf_cfs_rq(rq); @@ -5479,6 +5497,13 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)  		/* end evaluation on encountering a throttled cfs_rq */  		if (cfs_rq_throttled(cfs_rq))  			goto enqueue_throttle; + +               /* +                * One parent has been throttled and cfs_rq removed from the +                * list. Add it back to not break the leaf list. +                */ +               if (throttled_hierarchy(cfs_rq)) +                       list_add_leaf_cfs_rq(cfs_rq);  	}  enqueue_throttle: diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index ca1796747a77..a010edc37ee0 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -323,17 +323,15 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)  /*   * Only limited trace_printk() conversion specifiers allowed: - * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s + * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pks %pus %s   */  BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,  	   u64, arg2, u64, arg3)  { +	int i, mod[3] = {}, fmt_cnt = 0; +	char buf[64], fmt_ptype; +	void *unsafe_ptr = NULL;  	bool str_seen = false; -	int mod[3] = {}; -	int fmt_cnt = 0; -	u64 unsafe_addr; -	char buf[64]; -	int i;  	/*  	 * bpf_check()->check_func_arg()->check_stack_boundary() @@ -359,40 +357,71 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,  		if (fmt[i] == 'l') {  			mod[fmt_cnt]++;  			i++; -		} else if (fmt[i] == 'p' || fmt[i] == 's') { +		} else if (fmt[i] == 'p') {  			mod[fmt_cnt]++; +			if ((fmt[i + 1] == 'k' || +			     fmt[i + 1] == 'u') && +			    fmt[i + 2] == 's') { +				fmt_ptype = fmt[i + 1]; +				i += 2; +				goto fmt_str; +			} +  			/* disallow any further format extensions */  			if (fmt[i + 1] != 0 &&  			    !isspace(fmt[i + 1]) &&  			    !ispunct(fmt[i + 1]))  				return -EINVAL; -			fmt_cnt++; -			if (fmt[i] == 's') { -				if (str_seen) -					/* allow only one '%s' per fmt string */ -					return -EINVAL; -				str_seen = true; - -				switch (fmt_cnt) { -				case 1: -					unsafe_addr = arg1; -					arg1 = (long) buf; -					break; -				case 2: -					unsafe_addr = arg2; -					arg2 = (long) buf; -					break; -				case 3: -					unsafe_addr = arg3; -					arg3 = (long) buf; -					break; -				} -				buf[0] = 0; -				strncpy_from_unsafe(buf, -						    (void *) (long) unsafe_addr, + +			goto fmt_next; +		} else if (fmt[i] == 's') { +			mod[fmt_cnt]++; +			fmt_ptype = fmt[i]; +fmt_str: +			if (str_seen) +				/* allow only one '%s' per fmt string */ +				return -EINVAL; +			str_seen = true; + +			if (fmt[i + 1] != 0 && +			    !isspace(fmt[i + 1]) && +			    !ispunct(fmt[i + 1])) +				return -EINVAL; + +			switch (fmt_cnt) { +			case 0: +				unsafe_ptr = (void *)(long)arg1; +				arg1 = (long)buf; +				break; +			case 1: +				unsafe_ptr = (void *)(long)arg2; +				arg2 = (long)buf; +				break; +			case 2: +				unsafe_ptr = (void *)(long)arg3; +				arg3 = (long)buf; +				break; +			} + +			buf[0] = 0; +			switch (fmt_ptype) { +			case 's': +#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE +				strncpy_from_unsafe(buf, unsafe_ptr,  						    sizeof(buf)); +				break; +#endif +			case 'k': +				strncpy_from_unsafe_strict(buf, unsafe_ptr, +							   sizeof(buf)); +				break; +			case 'u': +				strncpy_from_unsafe_user(buf, +					(__force void __user *)unsafe_ptr, +							 sizeof(buf)); +				break;  			} -			continue; +			goto fmt_next;  		}  		if (fmt[i] == 'l') { @@ -403,6 +432,7 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,  		if (fmt[i] != 'i' && fmt[i] != 'd' &&  		    fmt[i] != 'u' && fmt[i] != 'x')  			return -EINVAL; +fmt_next:  		fmt_cnt++;  	} @@ -825,14 +855,16 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)  		return &bpf_probe_read_user_proto;  	case BPF_FUNC_probe_read_kernel:  		return &bpf_probe_read_kernel_proto; -	case BPF_FUNC_probe_read: -		return &bpf_probe_read_compat_proto;  	case BPF_FUNC_probe_read_user_str:  		return &bpf_probe_read_user_str_proto;  	case BPF_FUNC_probe_read_kernel_str:  		return &bpf_probe_read_kernel_str_proto; +#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE +	case BPF_FUNC_probe_read: +		return &bpf_probe_read_compat_proto;  	case BPF_FUNC_probe_read_str:  		return &bpf_probe_read_compat_str_proto; +#endif  #ifdef CONFIG_CGROUPS  	case BPF_FUNC_get_current_cgroup_id:  		return &bpf_get_current_cgroup_id_proto; diff --git a/kernel/trace/ftrace_internal.h b/kernel/trace/ftrace_internal.h index 0456e0a3dab1..382775edf690 100644 --- a/kernel/trace/ftrace_internal.h +++ b/kernel/trace/ftrace_internal.h @@ -4,28 +4,6 @@  #ifdef CONFIG_FUNCTION_TRACER -/* - * Traverse the ftrace_global_list, invoking all entries.  The reason that we - * can use rcu_dereference_raw_check() is that elements removed from this list - * are simply leaked, so there is no need to interact with a grace-period - * mechanism.  The rcu_dereference_raw_check() calls are needed to handle - * concurrent insertions into the ftrace_global_list. - * - * Silly Alpha and silly pointer-speculation compiler optimizations! - */ -#define do_for_each_ftrace_op(op, list)			\ -	op = rcu_dereference_raw_check(list);			\ -	do - -/* - * Optimized for just a single item in the list (as that is the normal case). - */ -#define while_for_each_ftrace_op(op)				\ -	while (likely(op = rcu_dereference_raw_check((op)->next)) &&	\ -	       unlikely((op) != &ftrace_list_end)) - -extern struct ftrace_ops __rcu *ftrace_ops_list; -extern struct ftrace_ops ftrace_list_end;  extern struct mutex ftrace_lock;  extern struct ftrace_ops global_ops; diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c index c4c86de63cf9..312d1a0ca3b6 100644 --- a/kernel/trace/preemptirq_delay_test.c +++ b/kernel/trace/preemptirq_delay_test.c @@ -16,6 +16,7 @@  #include <linux/printk.h>  #include <linux/string.h>  #include <linux/sysfs.h> +#include <linux/completion.h>  static ulong delay = 100;  static char test_mode[12] = "irq"; @@ -28,6 +29,8 @@ MODULE_PARM_DESC(delay, "Period in microseconds (100 us default)");  MODULE_PARM_DESC(test_mode, "Mode of the test such as preempt, irq, or alternate (default irq)");  MODULE_PARM_DESC(burst_size, "The size of a burst (default 1)"); +static struct completion done; +  #define MIN(x, y) ((x) < (y) ? (x) : (y))  static void busy_wait(ulong time) @@ -114,6 +117,8 @@ static int preemptirq_delay_run(void *data)  	for (i = 0; i < s; i++)  		(testfuncs[i])(i); +	complete(&done); +  	set_current_state(TASK_INTERRUPTIBLE);  	while (!kthread_should_stop()) {  		schedule(); @@ -128,15 +133,18 @@ static int preemptirq_delay_run(void *data)  static int preemptirq_run_test(void)  {  	struct task_struct *task; -  	char task_name[50]; +	init_completion(&done); +  	snprintf(task_name, sizeof(task_name), "%s_test", test_mode);  	task =  kthread_run(preemptirq_delay_run, NULL, task_name);  	if (IS_ERR(task))  		return PTR_ERR(task); -	if (task) +	if (task) { +		wait_for_completion(&done);  		kthread_stop(task); +	}  	return 0;  } diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 6f0b42ceeb00..b8e1ca48be50 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -193,7 +193,7 @@ rb_event_length(struct ring_buffer_event *event)  	case RINGBUF_TYPE_DATA:  		return rb_event_data_length(event);  	default: -		BUG(); +		WARN_ON_ONCE(1);  	}  	/* not hit */  	return 0; @@ -249,7 +249,7 @@ rb_event_data(struct ring_buffer_event *event)  {  	if (extended_time(event))  		event = skip_time_extend(event); -	BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); +	WARN_ON_ONCE(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);  	/* If length is in len field, then array[0] has the data */  	if (event->type_len)  		return (void *)&event->array[0]; @@ -3727,7 +3727,7 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,  		return;  	default: -		BUG(); +		RB_WARN_ON(cpu_buffer, 1);  	}  	return;  } @@ -3757,7 +3757,7 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter,  		return;  	default: -		BUG(); +		RB_WARN_ON(iter->cpu_buffer, 1);  	}  	return;  } @@ -4020,7 +4020,7 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,  		return event;  	default: -		BUG(); +		RB_WARN_ON(cpu_buffer, 1);  	}  	return NULL; @@ -4034,7 +4034,6 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)  	struct ring_buffer_per_cpu *cpu_buffer;  	struct ring_buffer_event *event;  	int nr_loops = 0; -	bool failed = false;  	if (ts)  		*ts = 0; @@ -4056,19 +4055,14 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)  		return NULL;  	/* -	 * We repeat when a time extend is encountered or we hit -	 * the end of the page. Since the time extend is always attached -	 * to a data event, we should never loop more than three times. -	 * Once for going to next page, once on time extend, and -	 * finally once to get the event. -	 * We should never hit the following condition more than thrice, -	 * unless the buffer is very small, and there's a writer -	 * that is causing the reader to fail getting an event. +	 * As the writer can mess with what the iterator is trying +	 * to read, just give up if we fail to get an event after +	 * three tries. The iterator is not as reliable when reading +	 * the ring buffer with an active write as the consumer is. +	 * Do not warn if the three failures is reached.  	 */ -	if (++nr_loops > 3) { -		RB_WARN_ON(cpu_buffer, !failed); +	if (++nr_loops > 3)  		return NULL; -	}  	if (rb_per_cpu_empty(cpu_buffer))  		return NULL; @@ -4079,10 +4073,8 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)  	}  	event = rb_iter_head_event(iter); -	if (!event) { -		failed = true; +	if (!event)  		goto again; -	}  	switch (event->type_len) {  	case RINGBUF_TYPE_PADDING: @@ -4117,7 +4109,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)  		return event;  	default: -		BUG(); +		RB_WARN_ON(cpu_buffer, 1);  	}  	return NULL; diff --git a/kernel/umh.c b/kernel/umh.c index 11bf5eea474c..3474d6aa55d8 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -475,6 +475,12 @@ static void umh_clean_and_save_pid(struct subprocess_info *info)  {  	struct umh_info *umh_info = info->data; +	/* cleanup if umh_pipe_setup() was successful but exec failed */ +	if (info->pid && info->retval) { +		fput(umh_info->pipe_to_umh); +		fput(umh_info->pipe_from_umh); +	} +  	argv_free(info->argv);  	umh_info->pid = info->pid;  }  | 
