diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/btf.c | 46 | ||||
-rw-r--r-- | kernel/bpf/devmap.c | 7 | ||||
-rw-r--r-- | kernel/bpf/hashtab.c | 16 | ||||
-rw-r--r-- | kernel/bpf/sockmap.c | 43 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 4 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 11 | ||||
-rw-r--r-- | kernel/fork.c | 33 | ||||
-rw-r--r-- | kernel/freezer.c | 4 | ||||
-rw-r--r-- | kernel/kthread.c | 8 | ||||
-rw-r--r-- | kernel/memremap.c | 22 | ||||
-rw-r--r-- | kernel/power/hibernate.c | 16 | ||||
-rw-r--r-- | kernel/power/main.c | 12 | ||||
-rw-r--r-- | kernel/power/suspend.c | 4 | ||||
-rw-r--r-- | kernel/power/swap.c | 4 | ||||
-rw-r--r-- | kernel/power/user.c | 4 | ||||
-rw-r--r-- | kernel/reboot.c | 6 | ||||
-rw-r--r-- | kernel/rseq.c | 41 | ||||
-rw-r--r-- | kernel/sched/deadline.c | 11 | ||||
-rw-r--r-- | kernel/softirq.c | 12 | ||||
-rw-r--r-- | kernel/stop_machine.c | 6 | ||||
-rw-r--r-- | kernel/time/tick-common.c | 3 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 16 | ||||
-rw-r--r-- | kernel/trace/trace.c | 14 | ||||
-rw-r--r-- | kernel/trace/trace_events_trigger.c | 18 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 21 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 5 |
26 files changed, 269 insertions, 118 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 2d49d18b793a..9704934252b3 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -450,7 +450,7 @@ static const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) */ static bool btf_type_int_is_regular(const struct btf_type *t) { - u16 nr_bits, nr_bytes; + u8 nr_bits, nr_bytes; u32 int_data; int_data = btf_type_int(t); @@ -991,38 +991,38 @@ static void btf_int_bits_seq_show(const struct btf *btf, void *data, u8 bits_offset, struct seq_file *m) { + u16 left_shift_bits, right_shift_bits; u32 int_data = btf_type_int(t); - u16 nr_bits = BTF_INT_BITS(int_data); - u16 total_bits_offset; - u16 nr_copy_bytes; - u16 nr_copy_bits; - u8 nr_upper_bits; - union { - u64 u64_num; - u8 u8_nums[8]; - } print_num; + u8 nr_bits = BTF_INT_BITS(int_data); + u8 total_bits_offset; + u8 nr_copy_bytes; + u8 nr_copy_bits; + u64 print_num; + /* + * bits_offset is at most 7. + * BTF_INT_OFFSET() cannot exceed 64 bits. + */ total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data); data += BITS_ROUNDDOWN_BYTES(total_bits_offset); bits_offset = BITS_PER_BYTE_MASKED(total_bits_offset); nr_copy_bits = nr_bits + bits_offset; nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits); - print_num.u64_num = 0; - memcpy(&print_num.u64_num, data, nr_copy_bytes); + print_num = 0; + memcpy(&print_num, data, nr_copy_bytes); - /* Ditch the higher order bits */ - nr_upper_bits = BITS_PER_BYTE_MASKED(nr_copy_bits); - if (nr_upper_bits) { - /* We need to mask out some bits of the upper byte. */ - u8 mask = (1 << nr_upper_bits) - 1; - - print_num.u8_nums[nr_copy_bytes - 1] &= mask; - } +#ifdef __BIG_ENDIAN_BITFIELD + left_shift_bits = bits_offset; +#else + left_shift_bits = BITS_PER_U64 - nr_copy_bits; +#endif + right_shift_bits = BITS_PER_U64 - nr_bits; - print_num.u64_num >>= bits_offset; + print_num <<= left_shift_bits; + print_num >>= right_shift_bits; - seq_printf(m, "0x%llx", print_num.u64_num); + seq_printf(m, "0x%llx", print_num); } static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t, @@ -1032,7 +1032,7 @@ static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t, u32 int_data = btf_type_int(t); u8 encoding = BTF_INT_ENCODING(int_data); bool sign = encoding & BTF_INT_SIGNED; - u32 nr_bits = BTF_INT_BITS(int_data); + u8 nr_bits = BTF_INT_BITS(int_data); if (bits_offset || BTF_INT_OFFSET(int_data) || BITS_PER_BYTE_MASKED(nr_bits)) { diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 642c97f6d1b8..d361fc1e3bf3 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -334,10 +334,15 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, { struct net_device *dev = dst->dev; struct xdp_frame *xdpf; + int err; if (!dev->netdev_ops->ndo_xdp_xmit) return -EOPNOTSUPP; + err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data); + if (unlikely(err)) + return err; + xdpf = convert_to_xdp_frame(xdp); if (unlikely(!xdpf)) return -EOVERFLOW; @@ -350,7 +355,7 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, { int err; - err = __xdp_generic_ok_fwd_dev(skb, dst->dev); + err = xdp_ok_fwd_dev(dst->dev, skb->len); if (unlikely(err)) return err; skb->dev = dst->dev; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 3ca2198a6d22..513d9dfcf4ee 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -747,13 +747,15 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, * old element will be freed immediately. * Otherwise return an error */ - atomic_dec(&htab->count); - return ERR_PTR(-E2BIG); + l_new = ERR_PTR(-E2BIG); + goto dec_count; } l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN, htab->map.numa_node); - if (!l_new) - return ERR_PTR(-ENOMEM); + if (!l_new) { + l_new = ERR_PTR(-ENOMEM); + goto dec_count; + } } memcpy(l_new->key, key, key_size); @@ -766,7 +768,8 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, GFP_ATOMIC | __GFP_NOWARN); if (!pptr) { kfree(l_new); - return ERR_PTR(-ENOMEM); + l_new = ERR_PTR(-ENOMEM); + goto dec_count; } } @@ -780,6 +783,9 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, l_new->hash = hash; return l_new; +dec_count: + atomic_dec(&htab->count); + return l_new; } static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old, diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index cf7b6a6dbd1f..98fb7938beea 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -312,10 +312,12 @@ static void bpf_tcp_close(struct sock *sk, long timeout) struct smap_psock *psock; struct sock *osk; + lock_sock(sk); rcu_read_lock(); psock = smap_psock_sk(sk); if (unlikely(!psock)) { rcu_read_unlock(); + release_sock(sk); return sk->sk_prot->close(sk, timeout); } @@ -371,6 +373,7 @@ static void bpf_tcp_close(struct sock *sk, long timeout) e = psock_map_pop(sk, psock); } rcu_read_unlock(); + release_sock(sk); close_fun(sk, timeout); } @@ -568,7 +571,8 @@ static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md) while (sg[i].length) { free += sg[i].length; sk_mem_uncharge(sk, sg[i].length); - put_page(sg_page(&sg[i])); + if (!md->skb) + put_page(sg_page(&sg[i])); sg[i].length = 0; sg[i].page_link = 0; sg[i].offset = 0; @@ -577,6 +581,8 @@ static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md) if (i == MAX_SKB_FRAGS) i = 0; } + if (md->skb) + consume_skb(md->skb); return free; } @@ -1230,7 +1236,7 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) */ TCP_SKB_CB(skb)->bpf.sk_redir = NULL; skb->sk = psock->sock; - bpf_compute_data_pointers(skb); + bpf_compute_data_end_sk_skb(skb); preempt_disable(); rc = (*prog->bpf_func)(skb, prog->insnsi); preempt_enable(); @@ -1485,7 +1491,7 @@ static int smap_parse_func_strparser(struct strparser *strp, * any socket yet. */ skb->sk = psock->sock; - bpf_compute_data_pointers(skb); + bpf_compute_data_end_sk_skb(skb); rc = (*prog->bpf_func)(skb, prog->insnsi); skb->sk = NULL; rcu_read_unlock(); @@ -1896,7 +1902,7 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map, e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN); if (!e) { err = -ENOMEM; - goto out_progs; + goto out_free; } } @@ -2069,7 +2075,13 @@ static int sock_map_update_elem(struct bpf_map *map, return -EOPNOTSUPP; } + lock_sock(skops.sk); + preempt_disable(); + rcu_read_lock(); err = sock_map_ctx_update_elem(&skops, map, key, flags); + rcu_read_unlock(); + preempt_enable(); + release_sock(skops.sk); fput(socket->file); return err; } @@ -2342,7 +2354,10 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops, if (err) goto err; - /* bpf_map_update_elem() can be called in_irq() */ + /* psock is valid here because otherwise above *ctx_update_elem would + * have thrown an error. It is safe to skip error check. + */ + psock = smap_psock_sk(sock); raw_spin_lock_bh(&b->lock); l_old = lookup_elem_raw(head, hash, key, key_size); if (l_old && map_flags == BPF_NOEXIST) { @@ -2360,12 +2375,6 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops, goto bucket_err; } - psock = smap_psock_sk(sock); - if (unlikely(!psock)) { - err = -EINVAL; - goto bucket_err; - } - rcu_assign_pointer(e->hash_link, l_new); rcu_assign_pointer(e->htab, container_of(map, struct bpf_htab, map)); @@ -2388,12 +2397,10 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops, raw_spin_unlock_bh(&b->lock); return 0; bucket_err: + smap_release_sock(psock, sock); raw_spin_unlock_bh(&b->lock); err: kfree(e); - psock = smap_psock_sk(sock); - if (psock) - smap_release_sock(psock, sock); return err; } @@ -2415,7 +2422,13 @@ static int sock_hash_update_elem(struct bpf_map *map, return -EINVAL; } + lock_sock(skops.sk); + preempt_disable(); + rcu_read_lock(); err = sock_hash_ctx_update_elem(&skops, map, key, flags); + rcu_read_unlock(); + preempt_enable(); + release_sock(skops.sk); fput(socket->file); return err; } @@ -2472,10 +2485,8 @@ struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key) b = __select_bucket(htab, hash); head = &b->head; - raw_spin_lock_bh(&b->lock); l = lookup_elem_raw(head, hash, key, key_size); sk = l ? l->sk : NULL; - raw_spin_unlock_bh(&b->lock); return sk; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index d10ecd78105f..a31a1ba0f8ea 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -735,7 +735,9 @@ static int map_update_elem(union bpf_attr *attr) if (bpf_map_is_dev_bound(map)) { err = bpf_map_offload_update_elem(map, key, value, attr->flags); goto out; - } else if (map->map_type == BPF_MAP_TYPE_CPUMAP) { + } else if (map->map_type == BPF_MAP_TYPE_CPUMAP || + map->map_type == BPF_MAP_TYPE_SOCKHASH || + map->map_type == BPF_MAP_TYPE_SOCKMAP) { err = map->ops->map_update_elem(map, key, value, attr->flags); goto out; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9e2bf834f13a..63aaac52a265 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5430,6 +5430,10 @@ static int jit_subprogs(struct bpf_verifier_env *env) if (insn->code != (BPF_JMP | BPF_CALL) || insn->src_reg != BPF_PSEUDO_CALL) continue; + /* Upon error here we cannot fall back to interpreter but + * need a hard reject of the program. Thus -EFAULT is + * propagated in any case. + */ subprog = find_subprog(env, i + insn->imm + 1); if (subprog < 0) { WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", @@ -5450,7 +5454,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL); if (!func) - return -ENOMEM; + goto out_undo_insn; for (i = 0; i < env->subprog_cnt; i++) { subprog_start = subprog_end; @@ -5515,7 +5519,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) tmp = bpf_int_jit_compile(func[i]); if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) { verbose(env, "JIT doesn't support bpf-to-bpf calls\n"); - err = -EFAULT; + err = -ENOTSUPP; goto out_free; } cond_resched(); @@ -5552,6 +5556,7 @@ out_free: if (func[i]) bpf_jit_free(func[i]); kfree(func); +out_undo_insn: /* cleanup main prog to be interpreted */ prog->jit_requested = 0; for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { @@ -5578,6 +5583,8 @@ static int fixup_call_args(struct bpf_verifier_env *env) err = jit_subprogs(env); if (err == 0) return 0; + if (err == -EFAULT) + return err; } #ifndef CONFIG_BPF_JIT_ALWAYS_ON for (i = 0; i < prog->len; i++, insn++) { diff --git a/kernel/fork.c b/kernel/fork.c index 9440d61b925c..1b27babc4c78 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -303,11 +303,36 @@ struct kmem_cache *files_cachep; struct kmem_cache *fs_cachep; /* SLAB cache for vm_area_struct structures */ -struct kmem_cache *vm_area_cachep; +static struct kmem_cache *vm_area_cachep; /* SLAB cache for mm_struct structures (tsk->mm) */ static struct kmem_cache *mm_cachep; +struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) +{ + struct vm_area_struct *vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + + if (vma) + vma_init(vma, mm); + return vma; +} + +struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) +{ + struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); + + if (new) { + *new = *orig; + INIT_LIST_HEAD(&new->anon_vma_chain); + } + return new; +} + +void vm_area_free(struct vm_area_struct *vma) +{ + kmem_cache_free(vm_area_cachep, vma); +} + static void account_kernel_stack(struct task_struct *tsk, int account) { void *stack = task_stack_page(tsk); @@ -455,11 +480,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, goto fail_nomem; charge = len; } - tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); + tmp = vm_area_dup(mpnt); if (!tmp) goto fail_nomem; - *tmp = *mpnt; - INIT_LIST_HEAD(&tmp->anon_vma_chain); retval = vma_dup_policy(mpnt, tmp); if (retval) goto fail_nomem_policy; @@ -539,7 +562,7 @@ fail_uprobe_end: fail_nomem_anon_vma_fork: mpol_put(vma_policy(tmp)); fail_nomem_policy: - kmem_cache_free(vm_area_cachep, tmp); + vm_area_free(tmp); fail_nomem: retval = -ENOMEM; vm_unacct_memory(charge); diff --git a/kernel/freezer.c b/kernel/freezer.c index 6f56a9e219fa..b162b74611e4 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -15,7 +15,9 @@ atomic_t system_freezing_cnt = ATOMIC_INIT(0); EXPORT_SYMBOL(system_freezing_cnt); -/* indicate whether PM freezing is in effect, protected by pm_mutex */ +/* indicate whether PM freezing is in effect, protected by + * system_transition_mutex + */ bool pm_freezing; bool pm_nosig_freezing; diff --git a/kernel/kthread.c b/kernel/kthread.c index 750cb8082694..486dedbd9af5 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -325,8 +325,14 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data), task = create->result; if (!IS_ERR(task)) { static const struct sched_param param = { .sched_priority = 0 }; + char name[TASK_COMM_LEN]; - vsnprintf(task->comm, sizeof(task->comm), namefmt, args); + /* + * task is already visible to other tasks, so updating + * COMM must be protected. + */ + vsnprintf(name, sizeof(name), namefmt, args); + set_task_comm(task, name); /* * root may have changed our (kthreadd's) priority or CPU mask. * The kernel thread should not inherit these properties. diff --git a/kernel/memremap.c b/kernel/memremap.c index 5857267a4af5..38283363da06 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -176,10 +176,27 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) unsigned long pfn, pgoff, order; pgprot_t pgprot = PAGE_KERNEL; int error, nid, is_ram; + struct dev_pagemap *conflict_pgmap; align_start = res->start & ~(SECTION_SIZE - 1); align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) - align_start; + align_end = align_start + align_size - 1; + + conflict_pgmap = get_dev_pagemap(PHYS_PFN(align_start), NULL); + if (conflict_pgmap) { + dev_WARN(dev, "Conflicting mapping in same section\n"); + put_dev_pagemap(conflict_pgmap); + return ERR_PTR(-ENOMEM); + } + + conflict_pgmap = get_dev_pagemap(PHYS_PFN(align_end), NULL); + if (conflict_pgmap) { + dev_WARN(dev, "Conflicting mapping in same section\n"); + put_dev_pagemap(conflict_pgmap); + return ERR_PTR(-ENOMEM); + } + is_ram = region_intersects(align_start, align_size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE); @@ -199,7 +216,6 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) mutex_lock(&pgmap_lock); error = 0; - align_end = align_start + align_size - 1; foreach_order_pgoff(res, order, pgoff) { error = __radix_tree_insert(&pgmap_radix, @@ -305,7 +321,7 @@ EXPORT_SYMBOL_GPL(get_dev_pagemap); #ifdef CONFIG_DEV_PAGEMAP_OPS DEFINE_STATIC_KEY_FALSE(devmap_managed_key); -EXPORT_SYMBOL_GPL(devmap_managed_key); +EXPORT_SYMBOL(devmap_managed_key); static atomic_t devmap_enable; /* @@ -346,5 +362,5 @@ void __put_devmap_managed_page(struct page *page) } else if (!count) __put_page(page); } -EXPORT_SYMBOL_GPL(__put_devmap_managed_page); +EXPORT_SYMBOL(__put_devmap_managed_page); #endif /* CONFIG_DEV_PAGEMAP_OPS */ diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 9c85c7822383..abef759de7c8 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -338,7 +338,7 @@ static int create_image(int platform_mode) * hibernation_snapshot - Quiesce devices and create a hibernation image. * @platform_mode: If set, use platform driver to prepare for the transition. * - * This routine must be called with pm_mutex held. + * This routine must be called with system_transition_mutex held. */ int hibernation_snapshot(int platform_mode) { @@ -500,8 +500,9 @@ static int resume_target_kernel(bool platform_mode) * hibernation_restore - Quiesce devices and restore from a hibernation image. * @platform_mode: If set, use platform driver to prepare for the transition. * - * This routine must be called with pm_mutex held. If it is successful, control - * reappears in the restored target kernel in hibernation_snapshot(). + * This routine must be called with system_transition_mutex held. If it is + * successful, control reappears in the restored target kernel in + * hibernation_snapshot(). */ int hibernation_restore(int platform_mode) { @@ -638,6 +639,7 @@ static void power_down(void) break; case HIBERNATION_PLATFORM: hibernation_platform_enter(); + /* Fall through */ case HIBERNATION_SHUTDOWN: if (pm_power_off) kernel_power_off(); @@ -805,13 +807,13 @@ static int software_resume(void) * name_to_dev_t() below takes a sysfs buffer mutex when sysfs * is configured into the kernel. Since the regular hibernate * trigger path is via sysfs which takes a buffer mutex before - * calling hibernate functions (which take pm_mutex) this can - * cause lockdep to complain about a possible ABBA deadlock + * calling hibernate functions (which take system_transition_mutex) + * this can cause lockdep to complain about a possible ABBA deadlock * which cannot happen since we're in the boot code here and * sysfs can't be invoked yet. Therefore, we use a subclass * here to avoid lockdep complaining. */ - mutex_lock_nested(&pm_mutex, SINGLE_DEPTH_NESTING); + mutex_lock_nested(&system_transition_mutex, SINGLE_DEPTH_NESTING); if (swsusp_resume_device) goto Check_image; @@ -899,7 +901,7 @@ static int software_resume(void) atomic_inc(&snapshot_device_available); /* For success case, the suspend path will release the lock */ Unlock: - mutex_unlock(&pm_mutex); + mutex_unlock(&system_transition_mutex); pm_pr_dbg("Hibernation image not present or could not be loaded.\n"); return error; Close_Finish: diff --git a/kernel/power/main.c b/kernel/power/main.c index d9706da10930..35b50823d83b 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -15,17 +15,16 @@ #include <linux/workqueue.h> #include <linux/debugfs.h> #include <linux/seq_file.h> +#include <linux/suspend.h> #include "power.h" -DEFINE_MUTEX(pm_mutex); - #ifdef CONFIG_PM_SLEEP void lock_system_sleep(void) { current->flags |= PF_FREEZER_SKIP; - mutex_lock(&pm_mutex); + mutex_lock(&system_transition_mutex); } EXPORT_SYMBOL_GPL(lock_system_sleep); @@ -37,8 +36,9 @@ void unlock_system_sleep(void) * * Reason: * Fundamentally, we just don't need it, because freezing condition - * doesn't come into effect until we release the pm_mutex lock, - * since the freezer always works with pm_mutex held. + * doesn't come into effect until we release the + * system_transition_mutex lock, since the freezer always works with + * system_transition_mutex held. * * More importantly, in the case of hibernation, * unlock_system_sleep() gets called in snapshot_read() and @@ -47,7 +47,7 @@ void unlock_system_sleep(void) * enter the refrigerator, thus causing hibernation to lockup. */ current->flags &= ~PF_FREEZER_SKIP; - mutex_unlock(&pm_mutex); + mutex_unlock(&system_transition_mutex); } EXPORT_SYMBOL_GPL(unlock_system_sleep); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 87331565e505..9e13afe65a14 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -556,7 +556,7 @@ static int enter_state(suspend_state_t state) } else if (!valid_state(state)) { return -EINVAL; } - if (!mutex_trylock(&pm_mutex)) + if (!mutex_trylock(&system_transition_mutex)) return -EBUSY; if (state == PM_SUSPEND_TO_IDLE) @@ -590,7 +590,7 @@ static int enter_state(suspend_state_t state) pm_pr_dbg("Finishing wakeup.\n"); suspend_finish(); Unlock: - mutex_unlock(&pm_mutex); + mutex_unlock(&system_transition_mutex); return error; } diff --git a/kernel/power/swap.c b/kernel/power/swap.c index c2bcf97d24c8..d7f6c1a288d3 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -923,7 +923,7 @@ int swsusp_write(unsigned int flags) } memset(&snapshot, 0, sizeof(struct snapshot_handle)); error = snapshot_read_next(&snapshot); - if (error < PAGE_SIZE) { + if (error < (int)PAGE_SIZE) { if (error >= 0) error = -EFAULT; @@ -1483,7 +1483,7 @@ int swsusp_read(unsigned int *flags_p) memset(&snapshot, 0, sizeof(struct snapshot_handle)); error = snapshot_write_next(&snapshot); - if (error < PAGE_SIZE) + if (error < (int)PAGE_SIZE) return error < 0 ? error : -EFAULT; header = (struct swsusp_info *)data_of(snapshot); error = get_swap_reader(&handle, flags_p); diff --git a/kernel/power/user.c b/kernel/power/user.c index abd225550271..2d8b60a3c86b 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -216,7 +216,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!mutex_trylock(&pm_mutex)) + if (!mutex_trylock(&system_transition_mutex)) return -EBUSY; lock_device_hotplug(); @@ -394,7 +394,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, } unlock_device_hotplug(); - mutex_unlock(&pm_mutex); + mutex_unlock(&system_transition_mutex); return error; } diff --git a/kernel/reboot.c b/kernel/reboot.c index e4ced883d8de..8fb44dec9ad7 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -294,7 +294,7 @@ void kernel_power_off(void) } EXPORT_SYMBOL_GPL(kernel_power_off); -static DEFINE_MUTEX(reboot_mutex); +DEFINE_MUTEX(system_transition_mutex); /* * Reboot system call: for obvious reasons only root may call it, @@ -338,7 +338,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) cmd = LINUX_REBOOT_CMD_HALT; - mutex_lock(&reboot_mutex); + mutex_lock(&system_transition_mutex); switch (cmd) { case LINUX_REBOOT_CMD_RESTART: kernel_restart(NULL); @@ -389,7 +389,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, ret = -EINVAL; break; } - mutex_unlock(&reboot_mutex); + mutex_unlock(&system_transition_mutex); return ret; } diff --git a/kernel/rseq.c b/kernel/rseq.c index 22b6acf1ad63..c6242d8594dc 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -85,9 +85,9 @@ static int rseq_update_cpu_id(struct task_struct *t) { u32 cpu_id = raw_smp_processor_id(); - if (__put_user(cpu_id, &t->rseq->cpu_id_start)) + if (put_user(cpu_id, &t->rseq->cpu_id_start)) return -EFAULT; - if (__put_user(cpu_id, &t->rseq->cpu_id)) + if (put_user(cpu_id, &t->rseq->cpu_id)) return -EFAULT; trace_rseq_update(t); return 0; @@ -100,14 +100,14 @@ static int rseq_reset_rseq_cpu_id(struct task_struct *t) /* * Reset cpu_id_start to its initial state (0). */ - if (__put_user(cpu_id_start, &t->rseq->cpu_id_start)) + if (put_user(cpu_id_start, &t->rseq->cpu_id_start)) return -EFAULT; /* * Reset cpu_id to RSEQ_CPU_ID_UNINITIALIZED, so any user coming * in after unregistration can figure out that rseq needs to be * registered again. */ - if (__put_user(cpu_id, &t->rseq->cpu_id)) + if (put_user(cpu_id, &t->rseq->cpu_id)) return -EFAULT; return 0; } @@ -115,29 +115,36 @@ static int rseq_reset_rseq_cpu_id(struct task_struct *t) static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs) { struct rseq_cs __user *urseq_cs; - unsigned long ptr; + u64 ptr; u32 __user *usig; u32 sig; int ret; - ret = __get_user(ptr, &t->rseq->rseq_cs); - if (ret) - return ret; + if (copy_from_user(&ptr, &t->rseq->rseq_cs.ptr64, sizeof(ptr))) + return -EFAULT; if (!ptr) { memset(rseq_cs, 0, sizeof(*rseq_cs)); return 0; } - urseq_cs = (struct rseq_cs __user *)ptr; + if (ptr >= TASK_SIZE) + return -EINVAL; + urseq_cs = (struct rseq_cs __user *)(unsigned long)ptr; if (copy_from_user(rseq_cs, urseq_cs, sizeof(*rseq_cs))) return -EFAULT; - if (rseq_cs->version > 0) - return -EINVAL; + if (rseq_cs->start_ip >= TASK_SIZE || + rseq_cs->start_ip + rseq_cs->post_commit_offset >= TASK_SIZE || + rseq_cs->abort_ip >= TASK_SIZE || + rseq_cs->version > 0) + return -EINVAL; + /* Check for overflow. */ + if (rseq_cs->start_ip + rseq_cs->post_commit_offset < rseq_cs->start_ip) + return -EINVAL; /* Ensure that abort_ip is not in the critical section. */ if (rseq_cs->abort_ip - rseq_cs->start_ip < rseq_cs->post_commit_offset) return -EINVAL; - usig = (u32 __user *)(rseq_cs->abort_ip - sizeof(u32)); + usig = (u32 __user *)(unsigned long)(rseq_cs->abort_ip - sizeof(u32)); ret = get_user(sig, usig); if (ret) return ret; @@ -146,7 +153,7 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs) printk_ratelimited(KERN_WARNING "Possible attack attempt. Unexpected rseq signature 0x%x, expecting 0x%x (pid=%d, addr=%p).\n", sig, current->rseq_sig, current->pid, usig); - return -EPERM; + return -EINVAL; } return 0; } @@ -157,7 +164,7 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags) int ret; /* Get thread flags. */ - ret = __get_user(flags, &t->rseq->flags); + ret = get_user(flags, &t->rseq->flags); if (ret) return ret; @@ -195,9 +202,11 @@ static int clear_rseq_cs(struct task_struct *t) * of code outside of the rseq assembly block. This performs * a lazy clear of the rseq_cs field. * - * Set rseq_cs to NULL with single-copy atomicity. + * Set rseq_cs to NULL. */ - return __put_user(0UL, &t->rseq->rseq_cs); + if (clear_user(&t->rseq->rseq_cs.ptr64, sizeof(t->rseq->rseq_cs.ptr64))) + return -EFAULT; + return 0; } /* diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index fbfc3f1d368a..10c7b51c0d1f 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -2290,8 +2290,17 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p) if (task_on_rq_queued(p) && p->dl.dl_runtime) task_non_contending(p); - if (!task_on_rq_queued(p)) + if (!task_on_rq_queued(p)) { + /* + * Inactive timer is armed. However, p is leaving DEADLINE and + * might migrate away from this rq while continuing to run on + * some other class. We need to remove its contribution from + * this rq running_bw now, or sub_rq_bw (below) will complain. + */ + if (p->dl.dl_non_contending) + sub_running_bw(&p->dl, &rq->dl); sub_rq_bw(&p->dl, &rq->dl); + } /* * We cannot use inactive_task_timer() to invoke sub_running_bw() diff --git a/kernel/softirq.c b/kernel/softirq.c index 900dcfee542c..75ffc1d1a2e0 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -79,12 +79,16 @@ static void wakeup_softirqd(void) /* * If ksoftirqd is scheduled, we do not want to process pending softirqs - * right now. Let ksoftirqd handle this at its own rate, to get fairness. + * right now. Let ksoftirqd handle this at its own rate, to get fairness, + * unless we're doing some of the synchronous softirqs. */ -static bool ksoftirqd_running(void) +#define SOFTIRQ_NOW_MASK ((1 << HI_SOFTIRQ) | (1 << TASKLET_SOFTIRQ)) +static bool ksoftirqd_running(unsigned long pending) { struct task_struct *tsk = __this_cpu_read(ksoftirqd); + if (pending & SOFTIRQ_NOW_MASK) + return false; return tsk && (tsk->state == TASK_RUNNING); } @@ -328,7 +332,7 @@ asmlinkage __visible void do_softirq(void) pending = local_softirq_pending(); - if (pending && !ksoftirqd_running()) + if (pending && !ksoftirqd_running(pending)) do_softirq_own_stack(); local_irq_restore(flags); @@ -355,7 +359,7 @@ void irq_enter(void) static inline void invoke_softirq(void) { - if (ksoftirqd_running()) + if (ksoftirqd_running(local_softirq_pending())) return; if (!force_irqthreads) { diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index f89014a2c238..1ff523dae6e2 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -270,7 +270,11 @@ unlock: goto retry; } - wake_up_q(&wakeq); + if (!err) { + preempt_disable(); + wake_up_q(&wakeq); + preempt_enable(); + } return err; } diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index b7005dd21ec1..14de3727b18e 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -277,8 +277,7 @@ static bool tick_check_preferred(struct clock_event_device *curdev, */ return !curdev || newdev->rating > curdev->rating || - (!cpumask_equal(curdev->cpumask, newdev->cpumask) && - !tick_check_percpu(curdev, newdev, smp_processor_id())); + !cpumask_equal(curdev->cpumask, newdev->cpumask); } /* diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 6a46af21765c..0b0b688ea166 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3227,6 +3227,22 @@ int ring_buffer_record_is_on(struct ring_buffer *buffer) } /** + * ring_buffer_record_is_set_on - return true if the ring buffer is set writable + * @buffer: The ring buffer to see if write is set enabled + * + * Returns true if the ring buffer is set writable by ring_buffer_record_on(). + * Note that this does NOT mean it is in a writable state. + * + * It may return true when the ring buffer has been disabled by + * ring_buffer_record_disable(), as that is a temporary disabling of + * the ring buffer. + */ +int ring_buffer_record_is_set_on(struct ring_buffer *buffer) +{ + return !(atomic_read(&buffer->record_disabled) & RB_BUFFER_OFF); +} + +/** * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer * @buffer: The ring buffer to stop writes to. * @cpu: The CPU buffer to stop diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f054bd6a1c66..823687997b01 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1373,6 +1373,12 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) arch_spin_lock(&tr->max_lock); + /* Inherit the recordable setting from trace_buffer */ + if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer)) + ring_buffer_record_on(tr->max_buffer.buffer); + else + ring_buffer_record_off(tr->max_buffer.buffer); + swap(tr->trace_buffer.buffer, tr->max_buffer.buffer); __update_max_tr(tr, tsk, cpu); @@ -3365,8 +3371,8 @@ static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m, print_event_info(buf, m); - seq_printf(m, "# TASK-PID CPU# %s TIMESTAMP FUNCTION\n", tgid ? "TGID " : ""); - seq_printf(m, "# | | | %s | |\n", tgid ? " | " : ""); + seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? "TGID " : ""); + seq_printf(m, "# | | %s | | |\n", tgid ? " | " : ""); } static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m, @@ -3386,9 +3392,9 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file tgid ? tgid_space : space); seq_printf(m, "# %s||| / delay\n", tgid ? tgid_space : space); - seq_printf(m, "# TASK-PID CPU#%s|||| TIMESTAMP FUNCTION\n", + seq_printf(m, "# TASK-PID %sCPU# |||| TIMESTAMP FUNCTION\n", tgid ? " TGID " : space); - seq_printf(m, "# | | | %s|||| | |\n", + seq_printf(m, "# | | %s | |||| | |\n", tgid ? " | " : space); } diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index d18249683682..5dea177cef53 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -679,6 +679,8 @@ event_trigger_callback(struct event_command *cmd_ops, goto out_free; out_reg: + /* Up the trigger_data count to make sure reg doesn't free it on failure */ + event_trigger_init(trigger_ops, trigger_data); ret = cmd_ops->reg(glob, trigger_ops, trigger_data, file); /* * The above returns on success the # of functions enabled, @@ -686,11 +688,13 @@ event_trigger_callback(struct event_command *cmd_ops, * Consider no functions a failure too. */ if (!ret) { + cmd_ops->unreg(glob, trigger_ops, trigger_data, file); ret = -ENOENT; - goto out_free; - } else if (ret < 0) - goto out_free; - ret = 0; + } else if (ret > 0) + ret = 0; + + /* Down the counter of trigger_data or free it if not used anymore */ + event_trigger_free(trigger_ops, trigger_data); out: return ret; @@ -1416,6 +1420,9 @@ int event_enable_trigger_func(struct event_command *cmd_ops, goto out; } + /* Up the trigger_data count to make sure nothing frees it on failure */ + event_trigger_init(trigger_ops, trigger_data); + if (trigger) { number = strsep(&trigger, ":"); @@ -1466,6 +1473,7 @@ int event_enable_trigger_func(struct event_command *cmd_ops, goto out_disable; /* Just return zero, not the number of enabled functions */ ret = 0; + event_trigger_free(trigger_ops, trigger_data); out: return ret; @@ -1476,7 +1484,7 @@ int event_enable_trigger_func(struct event_command *cmd_ops, out_free: if (cmd_ops->set_filter) cmd_ops->set_filter(NULL, trigger_data, NULL); - kfree(trigger_data); + event_trigger_free(trigger_ops, trigger_data); kfree(enable_data); goto out; } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index daa81571b22a..6b71860f3998 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -400,11 +400,10 @@ static struct trace_kprobe *find_trace_kprobe(const char *event, static int enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) { + struct event_file_link *link = NULL; int ret = 0; if (file) { - struct event_file_link *link; - link = kmalloc(sizeof(*link), GFP_KERNEL); if (!link) { ret = -ENOMEM; @@ -424,6 +423,18 @@ enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) else ret = enable_kprobe(&tk->rp.kp); } + + if (ret) { + if (file) { + /* Notice the if is true on not WARN() */ + if (!WARN_ON_ONCE(!link)) + list_del_rcu(&link->list); + kfree(link); + tk->tp.flags &= ~TP_FLAG_TRACE; + } else { + tk->tp.flags &= ~TP_FLAG_PROFILE; + } + } out: return ret; } @@ -1480,8 +1491,10 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs, } ret = __register_trace_kprobe(tk); - if (ret < 0) + if (ret < 0) { + kfree(tk->tp.call.print_fmt); goto error; + } return &tk->tp.call; error: @@ -1501,6 +1514,8 @@ void destroy_local_trace_kprobe(struct trace_event_call *event_call) } __unregister_trace_kprobe(tk); + + kfree(tk->tp.call.print_fmt); free_trace_kprobe(tk); } #endif /* CONFIG_PERF_EVENTS */ diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 90db994ac900..1c8e30fda46a 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -594,8 +594,7 @@ int trace_print_context(struct trace_iterator *iter) trace_find_cmdline(entry->pid, comm); - trace_seq_printf(s, "%16s-%-5d [%03d] ", - comm, entry->pid, iter->cpu); + trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); if (tr->trace_flags & TRACE_ITER_RECORD_TGID) { unsigned int tgid = trace_find_tgid(entry->pid); @@ -606,6 +605,8 @@ int trace_print_context(struct trace_iterator *iter) trace_seq_printf(s, "(%5d) ", tgid); } + trace_seq_printf(s, "[%03d] ", iter->cpu); + if (tr->trace_flags & TRACE_ITER_IRQ_INFO) trace_print_lat_fmt(s, entry); |