diff options
Diffstat (limited to 'kernel/bpf/trampoline.c')
| -rw-r--r-- | kernel/bpf/trampoline.c | 217 |
1 files changed, 152 insertions, 65 deletions
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index f8302a5ca400..976d89011b15 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -115,10 +115,14 @@ bool bpf_prog_has_trampoline(const struct bpf_prog *prog) (ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC); } -void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym) +void bpf_image_ksym_init(void *data, unsigned int size, struct bpf_ksym *ksym) { ksym->start = (unsigned long) data; ksym->end = ksym->start + size; +} + +void bpf_image_ksym_add(struct bpf_ksym *ksym) +{ bpf_ksym_add(ksym); perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start, PAGE_SIZE, false, ksym->name); @@ -171,23 +175,42 @@ out: return tr; } -static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) +static int bpf_trampoline_update_fentry(struct bpf_trampoline *tr, u32 orig_flags, + void *old_addr, void *new_addr) { + enum bpf_text_poke_type new_t = BPF_MOD_CALL, old_t = BPF_MOD_CALL; void *ip = tr->func.addr; + + if (!new_addr) + new_t = BPF_MOD_NOP; + else if (bpf_trampoline_use_jmp(tr->flags)) + new_t = BPF_MOD_JUMP; + + if (!old_addr) + old_t = BPF_MOD_NOP; + else if (bpf_trampoline_use_jmp(orig_flags)) + old_t = BPF_MOD_JUMP; + + return bpf_arch_text_poke(ip, old_t, new_t, old_addr, new_addr); +} + +static int unregister_fentry(struct bpf_trampoline *tr, u32 orig_flags, + void *old_addr) +{ int ret; if (tr->func.ftrace_managed) ret = unregister_ftrace_direct(tr->fops, (long)old_addr, false); else - ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL); + ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr, NULL); return ret; } -static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr, +static int modify_fentry(struct bpf_trampoline *tr, u32 orig_flags, + void *old_addr, void *new_addr, bool lock_direct_mutex) { - void *ip = tr->func.addr; int ret; if (tr->func.ftrace_managed) { @@ -196,7 +219,8 @@ static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_ad else ret = modify_ftrace_direct_nolock(tr->fops, (long)new_addr); } else { - ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr); + ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr, + new_addr); } return ret; } @@ -216,10 +240,12 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) } if (tr->func.ftrace_managed) { - ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1); + ret = ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1); + if (ret) + return ret; ret = register_ftrace_direct(tr->fops, (long)new_addr); } else { - ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr); + ret = bpf_trampoline_update_fentry(tr, 0, NULL, new_addr); } return ret; @@ -330,8 +356,9 @@ static void bpf_tramp_image_put(struct bpf_tramp_image *im) * call_rcu_tasks() is not necessary. */ if (im->ip_after_call) { - int err = bpf_arch_text_poke(im->ip_after_call, BPF_MOD_JUMP, - NULL, im->ip_epilogue); + int err = bpf_arch_text_poke(im->ip_after_call, BPF_MOD_NOP, + BPF_MOD_JUMP, NULL, + im->ip_epilogue); WARN_ON(err); if (IS_ENABLED(CONFIG_TASKS_RCU)) call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu_tasks); @@ -377,7 +404,8 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, int size) ksym = &im->ksym; INIT_LIST_HEAD_RCU(&ksym->lnode); snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", key); - bpf_image_ksym_add(image, size, ksym); + bpf_image_ksym_init(image, size, ksym); + bpf_image_ksym_add(ksym); return im; out_free_image: @@ -403,7 +431,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut return PTR_ERR(tlinks); if (total == 0) { - err = unregister_fentry(tr, tr->cur_image->image); + err = unregister_fentry(tr, orig_flags, tr->cur_image->image); bpf_tramp_image_put(tr->cur_image); tr->cur_image = NULL; goto out; @@ -427,9 +455,20 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS again: - if ((tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY) && - (tr->flags & BPF_TRAMP_F_CALL_ORIG)) - tr->flags |= BPF_TRAMP_F_ORIG_STACK; + if (tr->flags & BPF_TRAMP_F_CALL_ORIG) { + if (tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY) { + /* The BPF_TRAMP_F_SKIP_FRAME can be cleared in the + * first try, reset it in the second try. + */ + tr->flags |= BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SKIP_FRAME; + } else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_JMP)) { + /* Use "jmp" instead of "call" for the trampoline + * in the origin call case, and we don't need to + * skip the frame. + */ + tr->flags &= ~BPF_TRAMP_F_SKIP_FRAME; + } + } #endif size = arch_bpf_trampoline_size(&tr->func.model, tr->flags, @@ -460,10 +499,18 @@ again: if (err) goto out_free; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP + if (bpf_trampoline_use_jmp(tr->flags)) + tr->fops->flags |= FTRACE_OPS_FL_JMP; + else + tr->fops->flags &= ~FTRACE_OPS_FL_JMP; +#endif + WARN_ON(tr->cur_image && total == 0); if (tr->cur_image) /* progs already running at this address */ - err = modify_fentry(tr, tr->cur_image->image, im->image, lock_direct_mutex); + err = modify_fentry(tr, orig_flags, tr->cur_image->image, + im->image, lock_direct_mutex); else /* first time registering */ err = register_fentry(tr, im->image); @@ -474,11 +521,6 @@ again: * BPF_TRAMP_F_SHARE_IPMODIFY is set, we can generate the * trampoline again, and retry register. */ - /* reset fops->func and fops->trampoline for re-register */ - tr->fops->func = NULL; - tr->fops->trampoline = 0; - - /* free im memory and reallocate later */ bpf_tramp_image_free(im); goto again; } @@ -491,8 +533,15 @@ again: tr->cur_image = im; out: /* If any error happens, restore previous flags */ - if (err) + if (err) { tr->flags = orig_flags; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP + if (bpf_trampoline_use_jmp(tr->flags)) + tr->fops->flags |= FTRACE_OPS_FL_JMP; + else + tr->fops->flags &= ~FTRACE_OPS_FL_JMP; +#endif + } kfree(tlinks); return err; @@ -523,7 +572,27 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog) } } -static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) +static int bpf_freplace_check_tgt_prog(struct bpf_prog *tgt_prog) +{ + struct bpf_prog_aux *aux = tgt_prog->aux; + + guard(mutex)(&aux->ext_mutex); + if (aux->prog_array_member_cnt) + /* Program extensions can not extend target prog when the target + * prog has been updated to any prog_array map as tail callee. + * It's to prevent a potential infinite loop like: + * tgt prog entry -> tgt prog subprog -> freplace prog entry + * --tailcall-> tgt prog entry. + */ + return -EBUSY; + + aux->is_extended = true; + return 0; +} + +static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog) { enum bpf_tramp_prog_type kind; struct bpf_tramp_link *link_exiting; @@ -544,8 +613,12 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_tr /* Cannot attach extension if fentry/fexit are in use. */ if (cnt) return -EBUSY; + err = bpf_freplace_check_tgt_prog(tgt_prog); + if (err) + return err; tr->extension_prog = link->link.prog; - return bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL, + return bpf_arch_text_poke(tr->func.addr, BPF_MOD_NOP, + BPF_MOD_JUMP, NULL, link->link.prog->bpf_func); } if (cnt >= BPF_MAX_TRAMP_LINKS) @@ -570,17 +643,21 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_tr return err; } -int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) +int bpf_trampoline_link_prog(struct bpf_tramp_link *link, + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog) { int err; mutex_lock(&tr->mutex); - err = __bpf_trampoline_link_prog(link, tr); + err = __bpf_trampoline_link_prog(link, tr, tgt_prog); mutex_unlock(&tr->mutex); return err; } -static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) +static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog) { enum bpf_tramp_prog_type kind; int err; @@ -589,8 +666,11 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_ if (kind == BPF_TRAMP_REPLACE) { WARN_ON_ONCE(!tr->extension_prog); err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, + BPF_MOD_NOP, tr->extension_prog->bpf_func, NULL); tr->extension_prog = NULL; + guard(mutex)(&tgt_prog->aux->ext_mutex); + tgt_prog->aux->is_extended = false; return err; } hlist_del_init(&link->tramp_hlist); @@ -599,12 +679,14 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_ } /* bpf_trampoline_unlink_prog() should never fail. */ -int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) +int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog) { int err; mutex_lock(&tr->mutex); - err = __bpf_trampoline_unlink_prog(link, tr); + err = __bpf_trampoline_unlink_prog(link, tr, tgt_prog); mutex_unlock(&tr->mutex); return err; } @@ -619,7 +701,7 @@ static void bpf_shim_tramp_link_release(struct bpf_link *link) if (!shim_link->trampoline) return; - WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline)); + WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline, NULL)); bpf_trampoline_put(shim_link->trampoline); } @@ -638,7 +720,8 @@ static const struct bpf_link_ops bpf_shim_tramp_link_lops = { static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog, bpf_func_t bpf_func, - int cgroup_atype) + int cgroup_atype, + enum bpf_attach_type attach_type) { struct bpf_shim_tramp_link *shim_link = NULL; struct bpf_prog *p; @@ -665,7 +748,7 @@ static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog p->expected_attach_type = BPF_LSM_MAC; bpf_prog_inc(p); bpf_link_init(&shim_link->link.link, BPF_LINK_TYPE_UNSPEC, - &bpf_shim_tramp_link_lops, p); + &bpf_shim_tramp_link_lops, p, attach_type); bpf_cgroup_atype_get(p->aux->attach_btf_id, cgroup_atype); return shim_link; @@ -690,7 +773,8 @@ static struct bpf_shim_tramp_link *cgroup_shim_find(struct bpf_trampoline *tr, } int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, - int cgroup_atype) + int cgroup_atype, + enum bpf_attach_type attach_type) { struct bpf_shim_tramp_link *shim_link = NULL; struct bpf_attach_target_info tgt_info = {}; @@ -727,13 +811,13 @@ int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, /* Allocate and install new shim. */ - shim_link = cgroup_shim_alloc(prog, bpf_func, cgroup_atype); + shim_link = cgroup_shim_alloc(prog, bpf_func, cgroup_atype, attach_type); if (!shim_link) { err = -ENOMEM; goto err; } - err = __bpf_trampoline_link_prog(&shim_link->link, tr); + err = __bpf_trampoline_link_prog(&shim_link->link, tr, NULL); if (err) goto err; @@ -861,39 +945,45 @@ static __always_inline u64 notrace bpf_prog_start_time(void) static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx) __acquires(RCU) { - rcu_read_lock(); - migrate_disable(); + rcu_read_lock_dont_migrate(); run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { bpf_prog_inc_misses_counter(prog); + if (prog->aux->recursion_detected) + prog->aux->recursion_detected(prog); return 0; } return bpf_prog_start_time(); } -static void notrace update_prog_stats(struct bpf_prog *prog, - u64 start) +static void notrace __update_prog_stats(struct bpf_prog *prog, u64 start) { struct bpf_prog_stats *stats; + unsigned long flags; + u64 duration; - if (static_branch_unlikely(&bpf_stats_enabled_key) && - /* static_key could be enabled in __bpf_prog_enter* - * and disabled in __bpf_prog_exit*. - * And vice versa. - * Hence check that 'start' is valid. - */ - start > NO_START_TIME) { - u64 duration = sched_clock() - start; - unsigned long flags; - - stats = this_cpu_ptr(prog->stats); - flags = u64_stats_update_begin_irqsave(&stats->syncp); - u64_stats_inc(&stats->cnt); - u64_stats_add(&stats->nsecs, duration); - u64_stats_update_end_irqrestore(&stats->syncp, flags); - } + /* + * static_key could be enabled in __bpf_prog_enter* and disabled in + * __bpf_prog_exit*. And vice versa. Check that 'start' is valid. + */ + if (start <= NO_START_TIME) + return; + + duration = sched_clock() - start; + stats = this_cpu_ptr(prog->stats); + flags = u64_stats_update_begin_irqsave(&stats->syncp); + u64_stats_inc(&stats->cnt); + u64_stats_add(&stats->nsecs, duration); + u64_stats_update_end_irqrestore(&stats->syncp, flags); +} + +static __always_inline void notrace update_prog_stats(struct bpf_prog *prog, + u64 start) +{ + if (static_branch_unlikely(&bpf_stats_enabled_key)) + __update_prog_stats(prog, start); } static void notrace __bpf_prog_exit_recur(struct bpf_prog *prog, u64 start, @@ -904,8 +994,7 @@ static void notrace __bpf_prog_exit_recur(struct bpf_prog *prog, u64 start, update_prog_stats(prog, start); this_cpu_dec(*(prog->active)); - migrate_enable(); - rcu_read_unlock(); + rcu_read_unlock_migrate(); } static u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog, @@ -915,8 +1004,7 @@ static u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog, /* Runtime stats are exported via actual BPF_LSM_CGROUP * programs, not the shims. */ - rcu_read_lock(); - migrate_disable(); + rcu_read_lock_dont_migrate(); run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); @@ -929,8 +1017,7 @@ static void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start, { bpf_reset_run_ctx(run_ctx->saved_run_ctx); - migrate_enable(); - rcu_read_unlock(); + rcu_read_unlock_migrate(); } u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog, @@ -944,6 +1031,8 @@ u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog, if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { bpf_prog_inc_misses_counter(prog); + if (prog->aux->recursion_detected) + prog->aux->recursion_detected(prog); return 0; } return bpf_prog_start_time(); @@ -986,8 +1075,7 @@ static u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx) __acquires(RCU) { - rcu_read_lock(); - migrate_disable(); + rcu_read_lock_dont_migrate(); run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); @@ -1001,8 +1089,7 @@ static void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, bpf_reset_run_ctx(run_ctx->saved_run_ctx); update_prog_stats(prog, start); - migrate_enable(); - rcu_read_unlock(); + rcu_read_unlock_migrate(); } void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr) |
