diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/arraymap.c | 26 | ||||
-rw-r--r-- | kernel/bpf/core.c | 30 | ||||
-rw-r--r-- | kernel/bpf/hashtab.c | 4 | ||||
-rw-r--r-- | kernel/bpf/helpers.c | 7 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 83 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 111 | ||||
-rw-r--r-- | kernel/events/core.c | 129 | ||||
-rw-r--r-- | kernel/irq/handle.c | 2 | ||||
-rw-r--r-- | kernel/irq/manage.c | 9 | ||||
-rw-r--r-- | kernel/irq/msi.c | 6 | ||||
-rw-r--r-- | kernel/irq/proc.c | 19 | ||||
-rw-r--r-- | kernel/locking/lockdep.c | 10 | ||||
-rw-r--r-- | kernel/ptrace.c | 5 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 5 | ||||
-rw-r--r-- | kernel/sched/core.c | 24 | ||||
-rw-r--r-- | kernel/sched/sched.h | 5 | ||||
-rw-r--r-- | kernel/seccomp.c | 78 | ||||
-rw-r--r-- | kernel/sysctl.c | 13 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 2 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 2 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 55 | ||||
-rw-r--r-- | kernel/workqueue.c | 8 |
22 files changed, 527 insertions, 106 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 29ace107f236..3f4c99e06c6b 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -15,6 +15,7 @@ #include <linux/slab.h> #include <linux/mm.h> #include <linux/filter.h> +#include <linux/perf_event.h> /* Called from syscall */ static struct bpf_map *array_map_alloc(union bpf_attr *attr) @@ -48,7 +49,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) array->map.key_size = attr->key_size; array->map.value_size = attr->value_size; array->map.max_entries = attr->max_entries; - + array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT; array->elem_size = elem_size; return &array->map; @@ -291,14 +292,23 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd) attr = perf_event_attrs(event); if (IS_ERR(attr)) - return (void *)attr; + goto err; - if (attr->type != PERF_TYPE_RAW && - attr->type != PERF_TYPE_HARDWARE) { - perf_event_release_kernel(event); - return ERR_PTR(-EINVAL); - } - return event; + if (attr->inherit) + goto err; + + if (attr->type == PERF_TYPE_RAW) + return event; + + if (attr->type == PERF_TYPE_HARDWARE) + return event; + + if (attr->type == PERF_TYPE_SOFTWARE && + attr->config == PERF_COUNT_SW_BPF_OUTPUT) + return event; +err: + perf_event_release_kernel(event); + return ERR_PTR(-EINVAL); } static void perf_event_fd_array_put_ptr(void *ptr) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 67c380cfa9ca..80864712d2c4 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -82,6 +82,8 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) if (fp == NULL) return NULL; + kmemcheck_annotate_bitfield(fp, meta); + aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags); if (aux == NULL) { vfree(fp); @@ -110,6 +112,8 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, fp = __vmalloc(size, gfp_flags, PAGE_KERNEL); if (fp != NULL) { + kmemcheck_annotate_bitfield(fp, meta); + memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE); fp->pages = size / PAGE_SIZE; @@ -727,6 +731,32 @@ void bpf_prog_free(struct bpf_prog *fp) } EXPORT_SYMBOL_GPL(bpf_prog_free); +/* RNG for unpriviledged user space with separated state from prandom_u32(). */ +static DEFINE_PER_CPU(struct rnd_state, bpf_user_rnd_state); + +void bpf_user_rnd_init_once(void) +{ + prandom_init_once(&bpf_user_rnd_state); +} + +u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + /* Should someone ever have the rather unwise idea to use some + * of the registers passed into this function, then note that + * this function is called from native eBPF and classic-to-eBPF + * transformations. Register assignments from both sides are + * different, f.e. classic always sets fn(ctx, A, X) here. + */ + struct rnd_state *state; + u32 res; + + state = &get_cpu_var(bpf_user_rnd_state); + res = prandom_u32_state(state); + put_cpu_var(state); + + return res; +} + /* Weak definitions of helper functions in case we don't have bpf syscall. */ const struct bpf_func_proto bpf_map_lookup_elem_proto __weak; const struct bpf_func_proto bpf_map_update_elem_proto __weak; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 83c209d9b17a..28592d79502b 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -88,6 +88,10 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) htab->elem_size = sizeof(struct htab_elem) + round_up(htab->map.key_size, 8) + htab->map.value_size; + + htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) + + htab->elem_size * htab->map.max_entries, + PAGE_SIZE) >> PAGE_SHIFT; return &htab->map; free_htab: diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 1447ec09421e..4504ca66118d 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -93,13 +93,8 @@ const struct bpf_func_proto bpf_map_delete_elem_proto = { .arg2_type = ARG_PTR_TO_MAP_KEY, }; -static u64 bpf_get_prandom_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) -{ - return prandom_u32(); -} - const struct bpf_func_proto bpf_get_prandom_u32_proto = { - .func = bpf_get_prandom_u32, + .func = bpf_user_rnd_u32, .gpl_only = false, .ret_type = RET_INTEGER, }; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 35bac8e8b071..687dd6ca574d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -18,6 +18,8 @@ #include <linux/filter.h> #include <linux/version.h> +int sysctl_unprivileged_bpf_disabled __read_mostly; + static LIST_HEAD(bpf_map_types); static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) @@ -44,11 +46,38 @@ void bpf_register_map_type(struct bpf_map_type_list *tl) list_add(&tl->list_node, &bpf_map_types); } +static int bpf_map_charge_memlock(struct bpf_map *map) +{ + struct user_struct *user = get_current_user(); + unsigned long memlock_limit; + + memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + + atomic_long_add(map->pages, &user->locked_vm); + + if (atomic_long_read(&user->locked_vm) > memlock_limit) { + atomic_long_sub(map->pages, &user->locked_vm); + free_uid(user); + return -EPERM; + } + map->user = user; + return 0; +} + +static void bpf_map_uncharge_memlock(struct bpf_map *map) +{ + struct user_struct *user = map->user; + + atomic_long_sub(map->pages, &user->locked_vm); + free_uid(user); +} + /* called from workqueue */ static void bpf_map_free_deferred(struct work_struct *work) { struct bpf_map *map = container_of(work, struct bpf_map, work); + bpf_map_uncharge_memlock(map); /* implementation dependent freeing */ map->ops->map_free(map); } @@ -108,6 +137,10 @@ static int map_create(union bpf_attr *attr) atomic_set(&map->refcnt, 1); + err = bpf_map_charge_memlock(map); + if (err) + goto free_map; + err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC); if (err < 0) @@ -402,6 +435,10 @@ static void fixup_bpf_calls(struct bpf_prog *prog) */ BUG_ON(!prog->aux->ops->get_func_proto); + if (insn->imm == BPF_FUNC_get_route_realm) + prog->dst_needed = 1; + if (insn->imm == BPF_FUNC_get_prandom_u32) + bpf_user_rnd_init_once(); if (insn->imm == BPF_FUNC_tail_call) { /* mark bpf_tail_call as different opcode * to avoid conditional branch in @@ -436,11 +473,37 @@ static void free_used_maps(struct bpf_prog_aux *aux) kfree(aux->used_maps); } +static int bpf_prog_charge_memlock(struct bpf_prog *prog) +{ + struct user_struct *user = get_current_user(); + unsigned long memlock_limit; + + memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + + atomic_long_add(prog->pages, &user->locked_vm); + if (atomic_long_read(&user->locked_vm) > memlock_limit) { + atomic_long_sub(prog->pages, &user->locked_vm); + free_uid(user); + return -EPERM; + } + prog->aux->user = user; + return 0; +} + +static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) +{ + struct user_struct *user = prog->aux->user; + + atomic_long_sub(prog->pages, &user->locked_vm); + free_uid(user); +} + static void __prog_put_rcu(struct rcu_head *rcu) { struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); free_used_maps(aux); + bpf_prog_uncharge_memlock(aux->prog); bpf_prog_free(aux->prog); } @@ -457,6 +520,7 @@ void bpf_prog_put(struct bpf_prog *prog) { if (atomic_dec_and_test(&prog->aux->refcnt)) { free_used_maps(prog->aux); + bpf_prog_uncharge_memlock(prog); bpf_prog_free(prog); } } @@ -540,11 +604,18 @@ static int bpf_prog_load(union bpf_attr *attr) attr->kern_version != LINUX_VERSION_CODE) return -EINVAL; + if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN)) + return -EPERM; + /* plain bpf_prog allocation */ prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); if (!prog) return -ENOMEM; + err = bpf_prog_charge_memlock(prog); + if (err) + goto free_prog_nouncharge; + prog->len = attr->insn_cnt; err = -EFAULT; @@ -553,10 +624,10 @@ static int bpf_prog_load(union bpf_attr *attr) goto free_prog; prog->orig_prog = NULL; - prog->jited = false; + prog->jited = 0; atomic_set(&prog->aux->refcnt, 1); - prog->gpl_compatible = is_gpl; + prog->gpl_compatible = is_gpl ? 1 : 0; /* find program type: socket_filter vs tracing_filter */ err = find_prog_type(type, prog); @@ -586,6 +657,8 @@ static int bpf_prog_load(union bpf_attr *attr) free_used_maps: free_used_maps(prog->aux); free_prog: + bpf_prog_uncharge_memlock(prog); +free_prog_nouncharge: bpf_prog_free(prog); return err; } @@ -595,11 +668,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz union bpf_attr attr = {}; int err; - /* the syscall is limited to root temporarily. This restriction will be - * lifted when security audit is clean. Note that eBPF+tracing must have - * this restriction, since it may pass kernel data to user space - */ - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled) return -EPERM; if (!access_ok(VERIFY_READ, uattr, 1)) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b074b23000d6..b56cf51f8d42 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -199,6 +199,7 @@ struct verifier_env { struct verifier_state_list **explored_states; /* search pruning optimization */ struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ u32 used_map_cnt; /* number of used maps */ + bool allow_ptr_leaks; }; /* verbose verifier prints what it's seeing @@ -244,6 +245,7 @@ static const struct { } func_limit[] = { {BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call}, {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read}, + {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output}, }; static void print_verifier_state(struct verifier_env *env) @@ -538,6 +540,21 @@ static int bpf_size_to_bytes(int bpf_size) return -EINVAL; } +static bool is_spillable_regtype(enum bpf_reg_type type) +{ + switch (type) { + case PTR_TO_MAP_VALUE: + case PTR_TO_MAP_VALUE_OR_NULL: + case PTR_TO_STACK: + case PTR_TO_CTX: + case FRAME_PTR: + case CONST_PTR_TO_MAP: + return true; + default: + return false; + } +} + /* check_stack_read/write functions track spill/fill of registers, * stack boundary and alignment are checked in check_mem_access() */ @@ -550,9 +567,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size, */ if (value_regno >= 0 && - (state->regs[value_regno].type == PTR_TO_MAP_VALUE || - state->regs[value_regno].type == PTR_TO_STACK || - state->regs[value_regno].type == PTR_TO_CTX)) { + is_spillable_regtype(state->regs[value_regno].type)) { /* register containing pointer is being spilled into stack */ if (size != BPF_REG_SIZE) { @@ -643,6 +658,20 @@ static int check_ctx_access(struct verifier_env *env, int off, int size, return -EACCES; } +static bool is_pointer_value(struct verifier_env *env, int regno) +{ + if (env->allow_ptr_leaks) + return false; + + switch (env->cur_state.regs[regno].type) { + case UNKNOWN_VALUE: + case CONST_IMM: + return false; + default: + return true; + } +} + /* check whether memory at (regno + off) is accessible for t = (read | write) * if t==write, value_regno is a register which value is stored into memory * if t==read, value_regno is a register which will receive the value from memory @@ -669,11 +698,21 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, } if (state->regs[regno].type == PTR_TO_MAP_VALUE) { + if (t == BPF_WRITE && value_regno >= 0 && + is_pointer_value(env, value_regno)) { + verbose("R%d leaks addr into map\n", value_regno); + return -EACCES; + } err = check_map_access(env, regno, off, size); if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown_value(state->regs, value_regno); } else if (state->regs[regno].type == PTR_TO_CTX) { + if (t == BPF_WRITE && value_regno >= 0 && + is_pointer_value(env, value_regno)) { + verbose("R%d leaks addr into ctx\n", value_regno); + return -EACCES; + } err = check_ctx_access(env, off, size, t); if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown_value(state->regs, value_regno); @@ -684,10 +723,17 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, verbose("invalid stack off=%d size=%d\n", off, size); return -EACCES; } - if (t == BPF_WRITE) + if (t == BPF_WRITE) { + if (!env->allow_ptr_leaks && + state->stack_slot_type[MAX_BPF_STACK + off] == STACK_SPILL && + size != BPF_REG_SIZE) { + verbose("attempt to corrupt spilled pointer on stack\n"); + return -EACCES; + } err = check_stack_write(state, off, size, value_regno); - else + } else { err = check_stack_read(state, off, size, value_regno); + } } else { verbose("R%d invalid mem access '%s'\n", regno, reg_type_str[state->regs[regno].type]); @@ -775,8 +821,13 @@ static int check_func_arg(struct verifier_env *env, u32 regno, return -EACCES; } - if (arg_type == ARG_ANYTHING) + if (arg_type == ARG_ANYTHING) { + if (is_pointer_value(env, regno)) { + verbose("R%d leaks addr into helper function\n", regno); + return -EACCES; + } return 0; + } if (arg_type == ARG_PTR_TO_STACK || arg_type == ARG_PTR_TO_MAP_KEY || arg_type == ARG_PTR_TO_MAP_VALUE) { @@ -860,7 +911,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) * don't allow any other map type to be passed into * the special func; */ - if (bool_map != bool_func) + if (bool_func && bool_map != bool_func) return -EINVAL; } @@ -950,8 +1001,9 @@ static int check_call(struct verifier_env *env, int func_id) } /* check validity of 32-bit and 64-bit arithmetic operations */ -static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn) +static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) { + struct reg_state *regs = env->cur_state.regs; u8 opcode = BPF_OP(insn->code); int err; @@ -976,6 +1028,12 @@ static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn) if (err) return err; + if (is_pointer_value(env, insn->dst_reg)) { + verbose("R%d pointer arithmetic prohibited\n", + insn->dst_reg); + return -EACCES; + } + /* check dest operand */ err = check_reg_arg(regs, insn->dst_reg, DST_OP); if (err) @@ -1012,6 +1070,11 @@ static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn) */ regs[insn->dst_reg] = regs[insn->src_reg]; } else { + if (is_pointer_value(env, insn->src_reg)) { + verbose("R%d partial copy of pointer\n", + insn->src_reg); + return -EACCES; + } regs[insn->dst_reg].type = UNKNOWN_VALUE; regs[insn->dst_reg].map_ptr = NULL; } @@ -1061,8 +1124,18 @@ static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn) /* pattern match 'bpf_add Rx, imm' instruction */ if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 && regs[insn->dst_reg].type == FRAME_PTR && - BPF_SRC(insn->code) == BPF_K) + BPF_SRC(insn->code) == BPF_K) { stack_relative = true; + } else if (is_pointer_value(env, insn->dst_reg)) { + verbose("R%d pointer arithmetic prohibited\n", + insn->dst_reg); + return -EACCES; + } else if (BPF_SRC(insn->code) == BPF_X && + is_pointer_value(env, insn->src_reg)) { + verbose("R%d pointer arithmetic prohibited\n", + insn->src_reg); + return -EACCES; + } /* check dest operand */ err = check_reg_arg(regs, insn->dst_reg, DST_OP); @@ -1101,6 +1174,12 @@ static int check_cond_jmp_op(struct verifier_env *env, err = check_reg_arg(regs, insn->src_reg, SRC_OP); if (err) return err; + + if (is_pointer_value(env, insn->src_reg)) { + verbose("R%d pointer comparison prohibited\n", + insn->src_reg); + return -EACCES; + } } else { if (insn->src_reg != BPF_REG_0) { verbose("BPF_JMP uses reserved fields\n"); @@ -1155,6 +1234,9 @@ static int check_cond_jmp_op(struct verifier_env *env, regs[insn->dst_reg].type = CONST_IMM; regs[insn->dst_reg].imm = 0; } + } else if (is_pointer_value(env, insn->dst_reg)) { + verbose("R%d pointer comparison prohibited\n", insn->dst_reg); + return -EACCES; } else if (BPF_SRC(insn->code) == BPF_K && (opcode == BPF_JEQ || opcode == BPF_JNE)) { @@ -1658,7 +1740,7 @@ static int do_check(struct verifier_env *env) } if (class == BPF_ALU || class == BPF_ALU64) { - err = check_alu_op(regs, insn); + err = check_alu_op(env, insn); if (err) return err; @@ -1816,6 +1898,11 @@ static int do_check(struct verifier_env *env) if (err) return err; + if (is_pointer_value(env, BPF_REG_0)) { + verbose("R0 leaks addr as return value\n"); + return -EACCES; + } + process_bpf_exit: insn_idx = pop_stack(env, &prev_insn_idx); if (insn_idx < 0) { @@ -2024,7 +2111,7 @@ static int convert_ctx_accesses(struct verifier_env *env) cnt = env->prog->aux->ops-> convert_ctx_access(type, insn->dst_reg, insn->src_reg, - insn->off, insn_buf); + insn->off, insn_buf, env->prog); if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { verbose("bpf verifier is misconfigured\n"); return -EINVAL; @@ -2144,6 +2231,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) if (ret < 0) goto skip_full_check; + env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); + ret = do_check(env); skip_full_check: diff --git a/kernel/events/core.c b/kernel/events/core.c index f548f69c4299..64754bfecd70 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1243,11 +1243,7 @@ static inline void perf_event__state_init(struct perf_event *event) PERF_EVENT_STATE_INACTIVE; } -/* - * Called at perf_event creation and when events are attached/detached from a - * group. - */ -static void perf_event__read_size(struct perf_event *event) +static void __perf_event_read_size(struct perf_event *event, int nr_siblings) { int entry = sizeof(u64); /* value */ int size = 0; @@ -1263,7 +1259,7 @@ static void perf_event__read_size(struct perf_event *event) entry += sizeof(u64); if (event->attr.read_format & PERF_FORMAT_GROUP) { - nr += event->group_leader->nr_siblings; + nr += nr_siblings; size += sizeof(u64); } @@ -1271,14 +1267,11 @@ static void perf_event__read_size(struct perf_event *event) event->read_size = size; } -static void perf_event__header_size(struct perf_event *event) +static void __perf_event_header_size(struct perf_event *event, u64 sample_type) { struct perf_sample_data *data; - u64 sample_type = event->attr.sample_type; u16 size = 0; - perf_event__read_size(event); - if (sample_type & PERF_SAMPLE_IP) size += sizeof(data->ip); @@ -1303,6 +1296,17 @@ static void perf_event__header_size(struct perf_event *event) event->header_size = size; } +/* + * Called at perf_event creation and when events are attached/detached from a + * group. + */ +static void perf_event__header_size(struct perf_event *event) +{ + __perf_event_read_size(event, + event->group_leader->nr_siblings); + __perf_event_header_size(event, event->attr.sample_type); +} + static void perf_event__id_header_size(struct perf_event *event) { struct perf_sample_data *data; @@ -1330,6 +1334,27 @@ static void perf_event__id_header_size(struct perf_event *event) event->id_header_size = size; } +static bool perf_event_validate_size(struct perf_event *event) +{ + /* + * The values computed here will be over-written when we actually + * attach the event. + */ + __perf_event_read_size(event, event->group_leader->nr_siblings + 1); + __perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ); + perf_event__id_header_size(event); + + /* + * Sum the lot; should not exceed the 64k limit we have on records. + * Conservative limit to allow for callchains and other variable fields. + */ + if (event->read_size + event->header_size + + event->id_header_size + sizeof(struct perf_event_header) >= 16*1024) + return false; + + return true; +} + static void perf_group_attach(struct perf_event *event) { struct perf_event *group_leader = event->group_leader, *pos; @@ -5261,9 +5286,15 @@ void perf_output_sample(struct perf_output_handle *handle, if (sample_type & PERF_SAMPLE_RAW) { if (data->raw) { - perf_output_put(handle, data->raw->size); - __output_copy(handle, data->raw->data, - data->raw->size); + u32 raw_size = data->raw->size; + u32 real_size = round_up(raw_size + sizeof(u32), + sizeof(u64)) - sizeof(u32); + u64 zero = 0; + + perf_output_put(handle, real_size); + __output_copy(handle, data->raw->data, raw_size); + if (real_size - raw_size) + __output_copy(handle, &zero, real_size - raw_size); } else { struct { u32 size; @@ -5395,8 +5426,7 @@ void perf_prepare_sample(struct perf_event_header *header, else size += sizeof(u32); - WARN_ON_ONCE(size & (sizeof(u64)-1)); - header->size += size; + header->size += round_up(size, sizeof(u64)); } if (sample_type & PERF_SAMPLE_BRANCH_STACK) { @@ -8297,13 +8327,35 @@ SYSCALL_DEFINE5(perf_event_open, if (move_group) { gctx = group_leader->ctx; + mutex_lock_double(&gctx->mutex, &ctx->mutex); + } else { + mutex_lock(&ctx->mutex); + } + + if (!perf_event_validate_size(event)) { + err = -E2BIG; + goto err_locked; + } + + /* + * Must be under the same ctx::mutex as perf_install_in_context(), + * because we need to serialize with concurrent event creation. + */ + if (!exclusive_event_installable(event, ctx)) { + /* exclusive and group stuff are assumed mutually exclusive */ + WARN_ON_ONCE(move_group); + err = -EBUSY; + goto err_locked; + } + + WARN_ON_ONCE(ctx->parent_ctx); + + if (move_group) { /* * See perf_event_ctx_lock() for comments on the details * of swizzling perf_event::ctx. */ - mutex_lock_double(&gctx->mutex, &ctx->mutex); - perf_remove_from_context(group_leader, false); list_for_each_entry(sibling, &group_leader->sibling_list, @@ -8311,13 +8363,7 @@ SYSCALL_DEFINE5(perf_event_open, perf_remove_from_context(sibling, false); put_ctx(gctx); } - } else { - mutex_lock(&ctx->mutex); - } - - WARN_ON_ONCE(ctx->parent_ctx); - if (move_group) { /* * Wait for everybody to stop referencing the events through * the old lists, before installing it on new lists. @@ -8349,22 +8395,29 @@ SYSCALL_DEFINE5(perf_event_open, perf_event__state_init(group_leader); perf_install_in_context(ctx, group_leader, group_leader->cpu); get_ctx(ctx); - } - if (!exclusive_event_installable(event, ctx)) { - err = -EBUSY; - mutex_unlock(&ctx->mutex); - fput(event_file); - goto err_context; + /* + * Now that all events are installed in @ctx, nothing + * references @gctx anymore, so drop the last reference we have + * on it. + */ + put_ctx(gctx); } + /* + * Precalculate sample_data sizes; do while holding ctx::mutex such + * that we're serialized against further additions and before + * perf_install_in_context() which is the point the event is active and + * can use these values. + */ + perf_event__header_size(event); + perf_event__id_header_size(event); + perf_install_in_context(ctx, event, event->cpu); perf_unpin_context(ctx); - if (move_group) { + if (move_group) mutex_unlock(&gctx->mutex); - put_ctx(gctx); - } mutex_unlock(&ctx->mutex); put_online_cpus(); @@ -8376,12 +8429,6 @@ SYSCALL_DEFINE5(perf_event_open, mutex_unlock(¤t->perf_event_mutex); /* - * Precalculate sample_data sizes - */ - perf_event__header_size(event); - perf_event__id_header_size(event); - - /* * Drop the reference on the group_event after placing the * new event on the sibling_list. This ensures destruction * of the group leader will find the pointer to itself in @@ -8391,6 +8438,12 @@ SYSCALL_DEFINE5(perf_event_open, fd_install(event_fd, event_file); return event_fd; +err_locked: + if (move_group) + mutex_unlock(&gctx->mutex); + mutex_unlock(&ctx->mutex); +/* err_file: */ + fput(event_file); err_context: perf_unpin_context(ctx); put_ctx(ctx); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index de41a68fc038..e25a83b67cce 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -22,7 +22,6 @@ /** * handle_bad_irq - handle spurious and unhandled irqs - * @irq: the interrupt number * @desc: description of the interrupt * * Handles spurious and unhandled IRQ's. It also prints a debugmessage. @@ -35,6 +34,7 @@ void handle_bad_irq(struct irq_desc *desc) kstat_incr_irqs_this_cpu(desc); ack_bad_irq(irq); } +EXPORT_SYMBOL_GPL(handle_bad_irq); /* * Special, empty irq handler: diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index f9a59f6cabd2..4c213864ec45 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1761,6 +1761,7 @@ void free_percpu_irq(unsigned int irq, void __percpu *dev_id) kfree(__free_percpu_irq(irq, dev_id)); chip_bus_sync_unlock(desc); } +EXPORT_SYMBOL_GPL(free_percpu_irq); /** * setup_percpu_irq - setup a per-cpu interrupt @@ -1790,9 +1791,10 @@ int setup_percpu_irq(unsigned int irq, struct irqaction *act) * @devname: An ascii name for the claiming device * @dev_id: A percpu cookie passed back to the handler function * - * This call allocates interrupt resources, but doesn't - * automatically enable the interrupt. It has to be done on each - * CPU using enable_percpu_irq(). + * This call allocates interrupt resources and enables the + * interrupt on the local CPU. If the interrupt is supposed to be + * enabled on other CPUs, it has to be done on each CPU using + * enable_percpu_irq(). * * Dev_id must be globally unique. It is a per-cpu variable, and * the handler gets called with the interrupted CPU's instance of @@ -1831,6 +1833,7 @@ int request_percpu_irq(unsigned int irq, irq_handler_t handler, return retval; } +EXPORT_SYMBOL_GPL(request_percpu_irq); /** * irq_get_irqchip_state - returns the irqchip state of a interrupt. diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 7e6512b9dc1f..be9149f62eb8 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -228,11 +228,7 @@ static void msi_domain_update_chip_ops(struct msi_domain_info *info) { struct irq_chip *chip = info->chip; - BUG_ON(!chip); - if (!chip->irq_mask) - chip->irq_mask = pci_msi_mask_irq; - if (!chip->irq_unmask) - chip->irq_unmask = pci_msi_unmask_irq; + BUG_ON(!chip || !chip->irq_mask || !chip->irq_unmask); if (!chip->irq_set_affinity) chip->irq_set_affinity = msi_domain_set_affinity; } diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index e3a8c9577ba6..a50ddc9417ff 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -12,6 +12,7 @@ #include <linux/seq_file.h> #include <linux/interrupt.h> #include <linux/kernel_stat.h> +#include <linux/mutex.h> #include "internals.h" @@ -323,18 +324,29 @@ void register_handler_proc(unsigned int irq, struct irqaction *action) void register_irq_proc(unsigned int irq, struct irq_desc *desc) { + static DEFINE_MUTEX(register_lock); char name [MAX_NAMELEN]; - if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip) || desc->dir) + if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip)) return; + /* + * irq directories are registered only when a handler is + * added, not when the descriptor is created, so multiple + * tasks might try to register at the same time. + */ + mutex_lock(®ister_lock); + + if (desc->dir) + goto out_unlock; + memset(name, 0, MAX_NAMELEN); sprintf(name, "%d", irq); /* create /proc/irq/1234 */ desc->dir = proc_mkdir(name, root_irq_dir); if (!desc->dir) - return; + goto out_unlock; #ifdef CONFIG_SMP /* create /proc/irq/<irq>/smp_affinity */ @@ -355,6 +367,9 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) proc_create_data("spurious", 0444, desc->dir, &irq_spurious_proc_fops, (void *)(long)irq); + +out_unlock: + mutex_unlock(®ister_lock); } void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 8acfbf773e06..4e49cc4c9952 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3068,7 +3068,7 @@ static int __lock_is_held(struct lockdep_map *lock); static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, int trylock, int read, int check, int hardirqs_off, struct lockdep_map *nest_lock, unsigned long ip, - int references) + int references, int pin_count) { struct task_struct *curr = current; struct lock_class *class = NULL; @@ -3157,7 +3157,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, hlock->waittime_stamp = 0; hlock->holdtime_stamp = lockstat_clock(); #endif - hlock->pin_count = 0; + hlock->pin_count = pin_count; if (check && !mark_irqflags(curr, hlock)) return 0; @@ -3343,7 +3343,7 @@ found_it: hlock_class(hlock)->subclass, hlock->trylock, hlock->read, hlock->check, hlock->hardirqs_off, hlock->nest_lock, hlock->acquire_ip, - hlock->references)) + hlock->references, hlock->pin_count)) return 0; } @@ -3433,7 +3433,7 @@ found_it: hlock_class(hlock)->subclass, hlock->trylock, hlock->read, hlock->check, hlock->hardirqs_off, hlock->nest_lock, hlock->acquire_ip, - hlock->references)) + hlock->references, hlock->pin_count)) return 0; } @@ -3583,7 +3583,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, current->lockdep_recursion = 1; trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip); __lock_acquire(lock, subclass, trylock, read, check, - irqs_disabled_flags(flags), nest_lock, ip, 0); + irqs_disabled_flags(flags), nest_lock, ip, 0, 0); current->lockdep_recursion = 0; raw_local_irq_restore(flags); } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 787320de68e0..b760bae64cf1 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -1016,6 +1016,11 @@ int ptrace_request(struct task_struct *child, long request, break; } #endif + + case PTRACE_SECCOMP_GET_FILTER: + ret = seccomp_get_filter(child, addr, datavp); + break; + default: break; } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 9f75f25cc5d9..775d36cc0050 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3868,6 +3868,7 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf) static void __init rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) { + static struct lock_class_key rcu_exp_sched_rdp_class; unsigned long flags; struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp = rcu_get_root(rsp); @@ -3883,6 +3884,10 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) mutex_init(&rdp->exp_funnel_mutex); rcu_boot_init_nocb_percpu_data(rdp); raw_spin_unlock_irqrestore(&rnp->lock, flags); + if (rsp == &rcu_sched_state) + lockdep_set_class_and_name(&rdp->exp_funnel_mutex, + &rcu_exp_sched_rdp_class, + "rcu_data_exp_sched"); } /* diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2f9c92884817..10a8faa1b0d4 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2517,11 +2517,11 @@ static struct rq *finish_task_switch(struct task_struct *prev) * If a task dies, then it sets TASK_DEAD in tsk->state and calls * schedule one last time. The schedule call will never return, and * the scheduled task must drop that reference. - * The test for TASK_DEAD must occur while the runqueue locks are - * still held, otherwise prev could be scheduled on another cpu, die - * there before we look at prev->state, and then the reference would - * be dropped twice. - * Manfred Spraul <manfred@colorfullife.com> + * + * We must observe prev->state before clearing prev->on_cpu (in + * finish_lock_switch), otherwise a concurrent wakeup can get prev + * running on another CPU and we could rave with its RUNNING -> DEAD + * transition, resulting in a double drop. */ prev_state = prev->state; vtime_task_switch(prev); @@ -4934,7 +4934,15 @@ void init_idle(struct task_struct *idle, int cpu) idle->state = TASK_RUNNING; idle->se.exec_start = sched_clock(); - do_set_cpus_allowed(idle, cpumask_of(cpu)); +#ifdef CONFIG_SMP + /* + * Its possible that init_idle() gets called multiple times on a task, + * in that case do_set_cpus_allowed() will not do the right thing. + * + * And since this is boot we can forgo the serialization. + */ + set_cpus_allowed_common(idle, cpumask_of(cpu)); +#endif /* * We're having a chicken and egg problem, even though we are * holding rq->lock, the cpu isn't yet set to this cpu so the @@ -4951,7 +4959,7 @@ void init_idle(struct task_struct *idle, int cpu) rq->curr = rq->idle = idle; idle->on_rq = TASK_ON_RQ_QUEUED; -#if defined(CONFIG_SMP) +#ifdef CONFIG_SMP idle->on_cpu = 1; #endif raw_spin_unlock(&rq->lock); @@ -4966,7 +4974,7 @@ void init_idle(struct task_struct *idle, int cpu) idle->sched_class = &idle_sched_class; ftrace_graph_init_idle_task(idle, cpu); vtime_init_idle(idle, cpu); -#if defined(CONFIG_SMP) +#ifdef CONFIG_SMP sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); #endif } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 68cda117574c..6d2a119c7ad9 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1078,9 +1078,10 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) * After ->on_cpu is cleared, the task can be moved to a different CPU. * We must ensure this doesn't happen until the switch is completely * finished. + * + * Pairs with the control dependency and rmb in try_to_wake_up(). */ - smp_wmb(); - prev->on_cpu = 0; + smp_store_release(&prev->on_cpu, 0); #endif #ifdef CONFIG_DEBUG_SPINLOCK /* this is a valid case when another task releases the spinlock */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 5bd4779282df..580ac2d4024f 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -347,6 +347,7 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) { struct seccomp_filter *sfilter; int ret; + const bool save_orig = config_enabled(CONFIG_CHECKPOINT_RESTORE); if (fprog->len == 0 || fprog->len > BPF_MAXINSNS) return ERR_PTR(-EINVAL); @@ -370,7 +371,7 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) return ERR_PTR(-ENOMEM); ret = bpf_prog_create_from_user(&sfilter->prog, fprog, - seccomp_check_filter); + seccomp_check_filter, save_orig); if (ret < 0) { kfree(sfilter); return ERR_PTR(ret); @@ -469,7 +470,7 @@ void get_seccomp_filter(struct task_struct *tsk) static inline void seccomp_filter_free(struct seccomp_filter *filter) { if (filter) { - bpf_prog_free(filter->prog); + bpf_prog_destroy(filter->prog); kfree(filter); } } @@ -867,3 +868,76 @@ long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter) /* prctl interface doesn't have flags, so they are always zero. */ return do_seccomp(op, 0, uargs); } + +#if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE) +long seccomp_get_filter(struct task_struct *task, unsigned long filter_off, + void __user *data) +{ + struct seccomp_filter *filter; + struct sock_fprog_kern *fprog; + long ret; + unsigned long count = 0; + + if (!capable(CAP_SYS_ADMIN) || + current->seccomp.mode != SECCOMP_MODE_DISABLED) { + return -EACCES; + } + + spin_lock_irq(&task->sighand->siglock); + if (task->seccomp.mode != SECCOMP_MODE_FILTER) { + ret = -EINVAL; + goto out; + } + + filter = task->seccomp.filter; + while (filter) { + filter = filter->prev; + count++; + } + + if (filter_off >= count) { + ret = -ENOENT; + goto out; + } + count -= filter_off; + + filter = task->seccomp.filter; + while (filter && count > 1) { + filter = filter->prev; + count--; + } + + if (WARN_ON(count != 1 || !filter)) { + /* The filter tree shouldn't shrink while we're using it. */ + ret = -ENOENT; + goto out; + } + + fprog = filter->prog->orig_prog; + if (!fprog) { + /* This must be a new non-cBPF filter, since we save every + * every cBPF filter's orig_prog above when + * CONFIG_CHECKPOINT_RESTORE is enabled. + */ + ret = -EMEDIUMTYPE; + goto out; + } + + ret = fprog->len; + if (!data) + goto out; + + get_seccomp_filter(task); + spin_unlock_irq(&task->sighand->siglock); + + if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog))) + ret = -EFAULT; + + put_seccomp_filter(task); + return ret; + +out: + spin_unlock_irq(&task->sighand->siglock); + return ret; +} +#endif diff --git a/kernel/sysctl.c b/kernel/sysctl.c index e69201d8094e..96c856b04081 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -64,6 +64,7 @@ #include <linux/binfmts.h> #include <linux/sched/sysctl.h> #include <linux/kexec.h> +#include <linux/bpf.h> #include <asm/uaccess.h> #include <asm/processor.h> @@ -1139,6 +1140,18 @@ static struct ctl_table kern_table[] = { .proc_handler = timer_migration_handler, }, #endif +#ifdef CONFIG_BPF_SYSCALL + { + .procname = "unprivileged_bpf_disabled", + .data = &sysctl_unprivileged_bpf_disabled, + .maxlen = sizeof(sysctl_unprivileged_bpf_disabled), + .mode = 0644, + /* only handle a transition from default "0" to "1" */ + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &one, + }, +#endif { } }; diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 841b72f720e8..3a38775b50c2 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -217,7 +217,7 @@ static void clocksource_watchdog(unsigned long data) continue; /* Check the deviation from the watchdog clocksource. */ - if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) { + if (abs64(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable because the skew is too large:\n", cs->name); pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n", diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3739ac6aa473..44d2cc0436f4 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1251,7 +1251,7 @@ void __init timekeeping_init(void) set_normalized_timespec64(&tmp, -boot.tv_sec, -boot.tv_nsec); tk_set_wall_to_mono(tk, tmp); - timekeeping_update(tk, TK_MIRROR); + timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); write_seqcount_end(&tk_core.seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 0fe96c7c8803..4228fd3682c3 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -199,6 +199,11 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5) if (!event) return -ENOENT; + /* make sure event is local and doesn't have pmu::count */ + if (event->oncpu != smp_processor_id() || + event->pmu->count) + return -EINVAL; + /* * we don't know if the function is run successfully by the * return value. It can be judged in other places, such as @@ -207,14 +212,58 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5) return perf_event_read_local(event); } -const struct bpf_func_proto bpf_perf_event_read_proto = { +static const struct bpf_func_proto bpf_perf_event_read_proto = { .func = bpf_perf_event_read, - .gpl_only = false, + .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_ANYTHING, }; +static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size) +{ + struct pt_regs *regs = (struct pt_regs *) (long) r1; + struct bpf_map *map = (struct bpf_map *) (long) r2; + struct bpf_array *array = container_of(map, struct bpf_array, map); + void *data = (void *) (long) r4; + struct perf_sample_data sample_data; + struct perf_event *event; + struct perf_raw_record raw = { + .size = size, + .data = data, + }; + + if (unlikely(index >= array->map.max_entries)) + return -E2BIG; + + event = (struct perf_event *)array->ptrs[index]; + if (unlikely(!event)) + return -ENOENT; + + if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE || + event->attr.config != PERF_COUNT_SW_BPF_OUTPUT)) + return -EINVAL; + + if (unlikely(event->oncpu != smp_processor_id())) + return -EOPNOTSUPP; + + perf_sample_data_init(&sample_data, 0, 0); + sample_data.raw = &raw; + perf_event_output(event, &sample_data, regs); + return 0; +} + +static const struct bpf_func_proto bpf_perf_event_output_proto = { + .func = bpf_perf_event_output, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_STACK, + .arg5_type = ARG_CONST_STACK_SIZE, +}; + static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -242,6 +291,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_get_smp_processor_id_proto; case BPF_FUNC_perf_event_read: return &bpf_perf_event_read_proto; + case BPF_FUNC_perf_event_output: + return &bpf_perf_event_output_proto; default: return NULL; } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ca71582fcfab..bcb14cafe007 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1458,13 +1458,13 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, timer_stats_timer_set_start_info(&dwork->timer); dwork->wq = wq; + /* timer isn't guaranteed to run in this cpu, record earlier */ + if (cpu == WORK_CPU_UNBOUND) + cpu = raw_smp_processor_id(); dwork->cpu = cpu; timer->expires = jiffies + delay; - if (unlikely(cpu != WORK_CPU_UNBOUND)) - add_timer_on(timer, cpu); - else - add_timer(timer); + add_timer_on(timer, cpu); } /** |