diff options
Diffstat (limited to 'net/bpf/test_run.c')
| -rw-r--r-- | net/bpf/test_run.c | 1027 |
1 files changed, 896 insertions, 131 deletions
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 46dd95755967..655efac6f133 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -5,25 +5,30 @@ #include <linux/btf.h> #include <linux/btf_ids.h> #include <linux/slab.h> +#include <linux/init.h> #include <linux/vmalloc.h> #include <linux/etherdevice.h> #include <linux/filter.h> #include <linux/rcupdate_trace.h> #include <linux/sched/signal.h> #include <net/bpf_sk_storage.h> +#include <net/hotdata.h> #include <net/sock.h> #include <net/tcp.h> #include <net/net_namespace.h> +#include <net/page_pool/helpers.h> #include <linux/error-injection.h> #include <linux/smp.h> #include <linux/sock_diag.h> +#include <linux/netfilter.h> +#include <net/netdev_rx_queue.h> #include <net/xdp.h> +#include <net/netfilter/nf_bpf_link.h> #define CREATE_TRACE_POINTS #include <trace/events/bpf_test_run.h> struct bpf_test_timer { - enum { NO_PREEMPT, NO_MIGRATE } mode; u32 i; u64 time_start, time_spent; }; @@ -31,12 +36,7 @@ struct bpf_test_timer { static void bpf_test_timer_enter(struct bpf_test_timer *t) __acquires(rcu) { - rcu_read_lock(); - if (t->mode == NO_PREEMPT) - preempt_disable(); - else - migrate_disable(); - + rcu_read_lock_dont_migrate(); t->time_start = ktime_get_ns(); } @@ -44,18 +44,14 @@ static void bpf_test_timer_leave(struct bpf_test_timer *t) __releases(rcu) { t->time_start = 0; - - if (t->mode == NO_PREEMPT) - preempt_enable(); - else - migrate_enable(); - rcu_read_unlock(); + rcu_read_unlock_migrate(); } -static bool bpf_test_timer_continue(struct bpf_test_timer *t, u32 repeat, int *err, u32 *duration) +static bool bpf_test_timer_continue(struct bpf_test_timer *t, int iterations, + u32 repeat, int *err, u32 *duration) __must_hold(rcu) { - t->i++; + t->i += iterations; if (t->i >= repeat) { /* We're done. */ t->time_spent += ktime_get_ns() - t->time_start; @@ -87,13 +83,317 @@ reset: return false; } +/* We put this struct at the head of each page with a context and frame + * initialised when the page is allocated, so we don't have to do this on each + * repetition of the test run. + */ +struct xdp_page_head { + struct xdp_buff orig_ctx; + struct xdp_buff ctx; + union { + /* ::data_hard_start starts here */ + DECLARE_FLEX_ARRAY(struct xdp_frame, frame); + DECLARE_FLEX_ARRAY(u8, data); + }; +}; + +struct xdp_test_data { + struct xdp_buff *orig_ctx; + struct xdp_rxq_info rxq; + struct net_device *dev; + struct page_pool *pp; + struct xdp_frame **frames; + struct sk_buff **skbs; + struct xdp_mem_info mem; + u32 batch_size; + u32 frame_cnt; +}; + +/* tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c:%MAX_PKT_SIZE + * must be updated accordingly this gets changed, otherwise BPF selftests + * will fail. + */ +#define TEST_XDP_FRAME_SIZE (PAGE_SIZE - sizeof(struct xdp_page_head)) +#define TEST_XDP_MAX_BATCH 256 + +static void xdp_test_run_init_page(netmem_ref netmem, void *arg) +{ + struct xdp_page_head *head = + phys_to_virt(page_to_phys(netmem_to_page(netmem))); + struct xdp_buff *new_ctx, *orig_ctx; + u32 headroom = XDP_PACKET_HEADROOM; + struct xdp_test_data *xdp = arg; + size_t frm_len, meta_len; + struct xdp_frame *frm; + void *data; + + orig_ctx = xdp->orig_ctx; + frm_len = orig_ctx->data_end - orig_ctx->data_meta; + meta_len = orig_ctx->data - orig_ctx->data_meta; + headroom -= meta_len; + + new_ctx = &head->ctx; + frm = head->frame; + data = head->data; + memcpy(data + headroom, orig_ctx->data_meta, frm_len); + + xdp_init_buff(new_ctx, TEST_XDP_FRAME_SIZE, &xdp->rxq); + xdp_prepare_buff(new_ctx, data, headroom, frm_len, true); + new_ctx->data = new_ctx->data_meta + meta_len; + + xdp_update_frame_from_buff(new_ctx, frm); + frm->mem_type = new_ctx->rxq->mem.type; + + memcpy(&head->orig_ctx, new_ctx, sizeof(head->orig_ctx)); +} + +static int xdp_test_run_setup(struct xdp_test_data *xdp, struct xdp_buff *orig_ctx) +{ + struct page_pool *pp; + int err = -ENOMEM; + struct page_pool_params pp_params = { + .order = 0, + .flags = 0, + .pool_size = xdp->batch_size, + .nid = NUMA_NO_NODE, + .init_callback = xdp_test_run_init_page, + .init_arg = xdp, + }; + + xdp->frames = kvmalloc_array(xdp->batch_size, sizeof(void *), GFP_KERNEL); + if (!xdp->frames) + return -ENOMEM; + + xdp->skbs = kvmalloc_array(xdp->batch_size, sizeof(void *), GFP_KERNEL); + if (!xdp->skbs) + goto err_skbs; + + pp = page_pool_create(&pp_params); + if (IS_ERR(pp)) { + err = PTR_ERR(pp); + goto err_pp; + } + + /* will copy 'mem.id' into pp->xdp_mem_id */ + err = xdp_reg_mem_model(&xdp->mem, MEM_TYPE_PAGE_POOL, pp); + if (err) + goto err_mmodel; + + xdp->pp = pp; + + /* We create a 'fake' RXQ referencing the original dev, but with an + * xdp_mem_info pointing to our page_pool + */ + xdp_rxq_info_reg(&xdp->rxq, orig_ctx->rxq->dev, 0, 0); + xdp->rxq.mem.type = MEM_TYPE_PAGE_POOL; + xdp->rxq.mem.id = pp->xdp_mem_id; + xdp->dev = orig_ctx->rxq->dev; + xdp->orig_ctx = orig_ctx; + + return 0; + +err_mmodel: + page_pool_destroy(pp); +err_pp: + kvfree(xdp->skbs); +err_skbs: + kvfree(xdp->frames); + return err; +} + +static void xdp_test_run_teardown(struct xdp_test_data *xdp) +{ + xdp_unreg_mem_model(&xdp->mem); + page_pool_destroy(xdp->pp); + kfree(xdp->frames); + kfree(xdp->skbs); +} + +static bool frame_was_changed(const struct xdp_page_head *head) +{ + /* xdp_scrub_frame() zeroes the data pointer, flags is the last field, + * i.e. has the highest chances to be overwritten. If those two are + * untouched, it's most likely safe to skip the context reset. + */ + return head->frame->data != head->orig_ctx.data || + head->frame->flags != head->orig_ctx.flags; +} + +static bool ctx_was_changed(struct xdp_page_head *head) +{ + return head->orig_ctx.data != head->ctx.data || + head->orig_ctx.data_meta != head->ctx.data_meta || + head->orig_ctx.data_end != head->ctx.data_end; +} + +static void reset_ctx(struct xdp_page_head *head) +{ + if (likely(!frame_was_changed(head) && !ctx_was_changed(head))) + return; + + head->ctx.data = head->orig_ctx.data; + head->ctx.data_meta = head->orig_ctx.data_meta; + head->ctx.data_end = head->orig_ctx.data_end; + xdp_update_frame_from_buff(&head->ctx, head->frame); + head->frame->mem_type = head->orig_ctx.rxq->mem.type; +} + +static int xdp_recv_frames(struct xdp_frame **frames, int nframes, + struct sk_buff **skbs, + struct net_device *dev) +{ + gfp_t gfp = __GFP_ZERO | GFP_ATOMIC; + int i, n; + LIST_HEAD(list); + + n = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, gfp, nframes, + (void **)skbs); + if (unlikely(n == 0)) { + for (i = 0; i < nframes; i++) + xdp_return_frame(frames[i]); + return -ENOMEM; + } + + for (i = 0; i < nframes; i++) { + struct xdp_frame *xdpf = frames[i]; + struct sk_buff *skb = skbs[i]; + + skb = __xdp_build_skb_from_frame(xdpf, skb, dev); + if (!skb) { + xdp_return_frame(xdpf); + continue; + } + + list_add_tail(&skb->list, &list); + } + netif_receive_skb_list(&list); + + return 0; +} + +static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog, + u32 repeat) +{ + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; + int err = 0, act, ret, i, nframes = 0, batch_sz; + struct xdp_frame **frames = xdp->frames; + struct bpf_redirect_info *ri; + struct xdp_page_head *head; + struct xdp_frame *frm; + bool redirect = false; + struct xdp_buff *ctx; + struct page *page; + + batch_sz = min_t(u32, repeat, xdp->batch_size); + + local_bh_disable(); + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); + ri = bpf_net_ctx_get_ri(); + xdp_set_return_frame_no_direct(); + + for (i = 0; i < batch_sz; i++) { + page = page_pool_dev_alloc_pages(xdp->pp); + if (!page) { + err = -ENOMEM; + goto out; + } + + head = phys_to_virt(page_to_phys(page)); + reset_ctx(head); + ctx = &head->ctx; + frm = head->frame; + xdp->frame_cnt++; + + act = bpf_prog_run_xdp(prog, ctx); + + /* if program changed pkt bounds we need to update the xdp_frame */ + if (unlikely(ctx_was_changed(head))) { + ret = xdp_update_frame_from_buff(ctx, frm); + if (ret) { + xdp_return_buff(ctx); + continue; + } + } + + switch (act) { + case XDP_TX: + /* we can't do a real XDP_TX since we're not in the + * driver, so turn it into a REDIRECT back to the same + * index + */ + ri->tgt_index = xdp->dev->ifindex; + ri->map_id = INT_MAX; + ri->map_type = BPF_MAP_TYPE_UNSPEC; + fallthrough; + case XDP_REDIRECT: + redirect = true; + ret = xdp_do_redirect_frame(xdp->dev, ctx, frm, prog); + if (ret) + xdp_return_buff(ctx); + break; + case XDP_PASS: + frames[nframes++] = frm; + break; + default: + bpf_warn_invalid_xdp_action(NULL, prog, act); + fallthrough; + case XDP_DROP: + xdp_return_buff(ctx); + break; + } + } + +out: + if (redirect) + xdp_do_flush(); + if (nframes) { + ret = xdp_recv_frames(frames, nframes, xdp->skbs, xdp->dev); + if (ret) + err = ret; + } + + xdp_clear_return_frame_no_direct(); + bpf_net_ctx_clear(bpf_net_ctx); + local_bh_enable(); + return err; +} + +static int bpf_test_run_xdp_live(struct bpf_prog *prog, struct xdp_buff *ctx, + u32 repeat, u32 batch_size, u32 *time) + +{ + struct xdp_test_data xdp = { .batch_size = batch_size }; + struct bpf_test_timer t = {}; + int ret; + + if (!repeat) + repeat = 1; + + ret = xdp_test_run_setup(&xdp, ctx); + if (ret) + return ret; + + bpf_test_timer_enter(&t); + do { + xdp.frame_cnt = 0; + ret = xdp_test_run_batch(&xdp, prog, repeat - t.i); + if (unlikely(ret < 0)) + break; + } while (bpf_test_timer_continue(&t, xdp.frame_cnt, repeat, &ret, time)); + bpf_test_timer_leave(&t); + + xdp_test_run_teardown(&xdp); + return ret; +} + static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *retval, u32 *time, bool xdp) { + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; struct bpf_prog_array_item item = {.prog = prog}; struct bpf_run_ctx *old_ctx; struct bpf_cg_run_ctx run_ctx; - struct bpf_test_timer t = { NO_MIGRATE }; + struct bpf_test_timer t = {}; enum bpf_cgroup_storage_type stype; int ret; @@ -114,11 +414,17 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, old_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); do { run_ctx.prog_item = &item; + local_bh_disable(); + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); + if (xdp) *retval = bpf_prog_run_xdp(prog, ctx); else *retval = bpf_prog_run(prog, ctx); - } while (bpf_test_timer_continue(&t, repeat, &ret, time)); + + bpf_net_ctx_clear(bpf_net_ctx); + local_bh_enable(); + } while (bpf_test_timer_continue(&t, 1, repeat, &ret, time)); bpf_reset_run_ctx(old_ctx); bpf_test_timer_leave(&t); @@ -130,7 +436,8 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, static int bpf_test_finish(const union bpf_attr *kattr, union bpf_attr __user *uattr, const void *data, - u32 size, u32 retval, u32 duration) + struct skb_shared_info *sinfo, u32 size, u32 frag_size, + u32 retval, u32 duration) { void __user *data_out = u64_to_user_ptr(kattr->test.data_out); int err = -EFAULT; @@ -145,8 +452,42 @@ static int bpf_test_finish(const union bpf_attr *kattr, err = -ENOSPC; } - if (data_out && copy_to_user(data_out, data, copy_size)) - goto out; + if (data_out) { + int len = sinfo ? copy_size - frag_size : copy_size; + + if (len < 0) { + err = -ENOSPC; + goto out; + } + + if (copy_to_user(data_out, data, len)) + goto out; + + if (sinfo) { + int i, offset = len; + u32 data_len; + + for (i = 0; i < sinfo->nr_frags; i++) { + skb_frag_t *frag = &sinfo->frags[i]; + + if (offset >= copy_size) { + err = -ENOSPC; + break; + } + + data_len = min_t(u32, copy_size - offset, + skb_frag_size(frag)); + + if (copy_to_user(data_out + offset, + skb_frag_address(frag), + data_len)) + goto out; + + offset += data_len; + } + } + } + if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size))) goto out; if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval))) @@ -164,35 +505,35 @@ out: * architecture dependent calling conventions. 7+ can be supported in the * future. */ -__diag_push(); -__diag_ignore(GCC, 8, "-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); -int noinline bpf_fentry_test1(int a) +__bpf_kfunc_start_defs(); + +__bpf_kfunc int bpf_fentry_test1(int a) { return a + 1; } +EXPORT_SYMBOL_GPL(bpf_fentry_test1); -int noinline bpf_fentry_test2(int a, u64 b) +noinline int bpf_fentry_test2(int a, u64 b) { return a + b; } -int noinline bpf_fentry_test3(char a, int b, u64 c) +noinline int bpf_fentry_test3(char a, int b, u64 c) { return a + b + c; } -int noinline bpf_fentry_test4(void *a, char b, int c, u64 d) +noinline int bpf_fentry_test4(void *a, char b, int c, u64 d) { return (long)a + b + c + d; } -int noinline bpf_fentry_test5(u64 a, void *b, short c, int d, u64 e) +noinline int bpf_fentry_test5(u64 a, void *b, short c, int d, u64 e) { return a + (long)b + c + d + e; } -int noinline bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f) +noinline int bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f) { return a + (long)b + c + d + (long)e + f; } @@ -201,67 +542,122 @@ struct bpf_fentry_test_t { struct bpf_fentry_test_t *a; }; -int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg) +noinline int bpf_fentry_test7(struct bpf_fentry_test_t *arg) { + asm volatile ("" : "+r"(arg)); return (long)arg; } -int noinline bpf_fentry_test8(struct bpf_fentry_test_t *arg) +noinline int bpf_fentry_test8(struct bpf_fentry_test_t *arg) { return (long)arg->a; } -int noinline bpf_modify_return_test(int a, int *b) +__bpf_kfunc u32 bpf_fentry_test9(u32 *a) +{ + return *a; +} + +noinline int bpf_fentry_test10(const void *a) +{ + return (long)a; +} + +noinline void bpf_fentry_test_sinfo(struct skb_shared_info *sinfo) +{ +} + +__bpf_kfunc int bpf_modify_return_test(int a, int *b) { *b += 1; return a + *b; } -u64 noinline bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d) +__bpf_kfunc int bpf_modify_return_test2(int a, int *b, short c, int d, + void *e, char f, int g) { - return a + b + c + d; + *b += 1; + return a + *b + c + d + (long)e + f + g; } -int noinline bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b) +__bpf_kfunc int bpf_modify_return_test_tp(int nonce) { - return a + b; + trace_bpf_trigger_tp(nonce); + + return nonce; } -struct sock * noinline bpf_kfunc_call_test3(struct sock *sk) +noinline int bpf_fentry_shadow_test(int a) { - return sk; + return a + 1; } -__diag_pop(); +struct prog_test_member1 { + int a; +}; + +struct prog_test_member { + struct prog_test_member1 m; + int c; +}; + +struct prog_test_ref_kfunc { + int a; + int b; + struct prog_test_member memb; + struct prog_test_ref_kfunc *next; + refcount_t cnt; +}; + +__bpf_kfunc void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) +{ + refcount_dec(&p->cnt); +} -ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO); +__bpf_kfunc void bpf_kfunc_call_test_release_dtor(void *p) +{ + bpf_kfunc_call_test_release(p); +} +CFI_NOSEAL(bpf_kfunc_call_test_release_dtor); -BTF_SET_START(test_sk_kfunc_ids) -BTF_ID(func, bpf_kfunc_call_test1) -BTF_ID(func, bpf_kfunc_call_test2) -BTF_ID(func, bpf_kfunc_call_test3) -BTF_SET_END(test_sk_kfunc_ids) +__bpf_kfunc void bpf_kfunc_call_memb_release(struct prog_test_member *p) +{ +} -bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner) +__bpf_kfunc void bpf_kfunc_call_memb_release_dtor(void *p) { - if (btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id)) - return true; - return bpf_check_mod_kfunc_call(&prog_test_kfunc_list, kfunc_id, owner); } +CFI_NOSEAL(bpf_kfunc_call_memb_release_dtor); + +__bpf_kfunc_end_defs(); -static void *bpf_test_init(const union bpf_attr *kattr, u32 size, - u32 headroom, u32 tailroom) +BTF_KFUNCS_START(bpf_test_modify_return_ids) +BTF_ID_FLAGS(func, bpf_modify_return_test) +BTF_ID_FLAGS(func, bpf_modify_return_test2) +BTF_ID_FLAGS(func, bpf_modify_return_test_tp) +BTF_ID_FLAGS(func, bpf_fentry_test1, KF_SLEEPABLE) +BTF_KFUNCS_END(bpf_test_modify_return_ids) + +static const struct btf_kfunc_id_set bpf_test_modify_return_set = { + .owner = THIS_MODULE, + .set = &bpf_test_modify_return_ids, +}; + +BTF_KFUNCS_START(test_sk_check_kfunc_ids) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_kfunc_call_memb_release, KF_RELEASE) +BTF_KFUNCS_END(test_sk_check_kfunc_ids) + +static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, + u32 size, u32 headroom, u32 tailroom) { void __user *data_in = u64_to_user_ptr(kattr->test.data_in); - u32 user_size = kattr->test.data_size_in; void *data; - if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom) + if (user_size > PAGE_SIZE - headroom - tailroom) return ERR_PTR(-EINVAL); - if (user_size > size) - return ERR_PTR(-EMSGSIZE); - + size = SKB_DATA_ALIGN(size); data = kzalloc(size + headroom + tailroom, GFP_USER); if (!data) return ERR_PTR(-ENOMEM); @@ -283,7 +679,7 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, int b = 2, err = -EFAULT; u32 retval = 0; - if (kattr->test.flags || kattr->test.cpu) + if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size) return -EINVAL; switch (prog->expected_attach_type) { @@ -296,13 +692,19 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 || bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111 || bpf_fentry_test7((struct bpf_fentry_test_t *)0) != 0 || - bpf_fentry_test8(&arg) != 0) + bpf_fentry_test8(&arg) != 0 || + bpf_fentry_test9(&retval) != 0 || + bpf_fentry_test10((void *)0) != 0) goto out; break; case BPF_MODIFY_RETURN: ret = bpf_modify_return_test(1, &b); if (b != 2) - side_effect = 1; + side_effect++; + b = 2; + ret += bpf_modify_return_test2(1, &b, 3, 4, (void *)5, 6, 7); + if (b != 2) + side_effect++; break; default: goto out; @@ -328,10 +730,16 @@ static void __bpf_prog_test_run_raw_tp(void *data) { struct bpf_raw_tp_test_run_info *info = data; + struct bpf_trace_run_ctx run_ctx = {}; + struct bpf_run_ctx *old_run_ctx; + + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); rcu_read_lock(); info->retval = bpf_prog_run(info->prog, info->ctx); rcu_read_unlock(); + + bpf_reset_run_ctx(old_run_ctx); } int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, @@ -347,7 +755,7 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, /* doesn't support data_in/out, ctx_out, duration, or repeat */ if (kattr->test.data_in || kattr->test.data_out || kattr->test.ctx_out || kattr->test.duration || - kattr->test.repeat) + kattr->test.repeat || kattr->test.batch_size) return -EINVAL; if (ctx_size_in < prog->aux->max_ctx_offset || @@ -491,6 +899,12 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) /* cb is allowed */ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, cb), + offsetof(struct __sk_buff, data_end))) + return -EINVAL; + + /* data_end is allowed, but not copied to skb */ + + if (!range_is_zero(__skb, offsetofend(struct __sk_buff, data_end), offsetof(struct __sk_buff, tstamp))) return -EINVAL; @@ -524,13 +938,18 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) cb->pkt_len = skb->len; } else { if (__skb->wire_len < skb->len || - __skb->wire_len > GSO_MAX_SIZE) + __skb->wire_len > GSO_LEGACY_MAX_SIZE) return -EINVAL; cb->pkt_len = __skb->wire_len; } if (__skb->gso_segs > GSO_MAX_SEGS) return -EINVAL; + + /* Currently GSO type is zero/unset. If this gets extended with + * a small list of accepted GSO types in future, the filter for + * an unset GSO type in bpf_clone_redirect() can be lifted. + */ skb_shinfo(skb)->gso_segs = __skb->gso_segs; skb_shinfo(skb)->gso_size = __skb->gso_size; skb_shinfo(skb)->hwtstamps.hwtstamp = __skb->hwtstamp; @@ -565,66 +984,129 @@ static struct proto bpf_dummy_proto = { int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { - bool is_l2 = false, is_direct_pkt_access = false; + bool is_l2 = false, is_direct_pkt_access = false, is_lwt = false; + u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); struct net *net = current->nsproxy->net_ns; struct net_device *dev = net->loopback_dev; - u32 size = kattr->test.data_size_in; + u32 headroom = NET_SKB_PAD + NET_IP_ALIGN; + u32 linear_sz = kattr->test.data_size_in; u32 repeat = kattr->test.repeat; struct __sk_buff *ctx = NULL; + struct sk_buff *skb = NULL; + struct sock *sk = NULL; u32 retval, duration; int hh_len = ETH_HLEN; - struct sk_buff *skb; - struct sock *sk; - void *data; + void *data = NULL; int ret; - if (kattr->test.flags || kattr->test.cpu) + if ((kattr->test.flags & ~BPF_F_TEST_SKB_CHECKSUM_COMPLETE) || + kattr->test.cpu || kattr->test.batch_size) return -EINVAL; - data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN, - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); - if (IS_ERR(data)) - return PTR_ERR(data); - - ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff)); - if (IS_ERR(ctx)) { - kfree(data); - return PTR_ERR(ctx); - } + if (kattr->test.data_size_in < ETH_HLEN) + return -EINVAL; switch (prog->type) { case BPF_PROG_TYPE_SCHED_CLS: case BPF_PROG_TYPE_SCHED_ACT: + is_direct_pkt_access = true; is_l2 = true; - fallthrough; + break; case BPF_PROG_TYPE_LWT_IN: case BPF_PROG_TYPE_LWT_OUT: case BPF_PROG_TYPE_LWT_XMIT: + is_lwt = true; + fallthrough; + case BPF_PROG_TYPE_CGROUP_SKB: is_direct_pkt_access = true; break; default: break; } + ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff)); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + if (ctx) { + if (ctx->data_end > kattr->test.data_size_in || ctx->data || ctx->data_meta) { + ret = -EINVAL; + goto out; + } + if (ctx->data_end) { + /* Non-linear LWT test_run is unsupported for now. */ + if (is_lwt) { + ret = -EINVAL; + goto out; + } + linear_sz = max(ETH_HLEN, ctx->data_end); + } + } + + linear_sz = min_t(u32, linear_sz, PAGE_SIZE - headroom - tailroom); + + data = bpf_test_init(kattr, linear_sz, linear_sz, headroom, tailroom); + if (IS_ERR(data)) { + ret = PTR_ERR(data); + data = NULL; + goto out; + } + sk = sk_alloc(net, AF_UNSPEC, GFP_USER, &bpf_dummy_proto, 1); if (!sk) { - kfree(data); - kfree(ctx); - return -ENOMEM; + ret = -ENOMEM; + goto out; } sock_init_data(NULL, sk); - skb = build_skb(data, 0); + skb = slab_build_skb(data); if (!skb) { - kfree(data); - kfree(ctx); - sk_free(sk); - return -ENOMEM; + ret = -ENOMEM; + goto out; } skb->sk = sk; + data = NULL; /* data released via kfree_skb */ + skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); - __skb_put(skb, size); + __skb_put(skb, linear_sz); + + if (unlikely(kattr->test.data_size_in > linear_sz)) { + void __user *data_in = u64_to_user_ptr(kattr->test.data_in); + struct skb_shared_info *sinfo = skb_shinfo(skb); + u32 copied = linear_sz; + + while (copied < kattr->test.data_size_in) { + struct page *page; + u32 data_len; + + if (sinfo->nr_frags == MAX_SKB_FRAGS) { + ret = -ENOMEM; + goto out; + } + + page = alloc_page(GFP_KERNEL); + if (!page) { + ret = -ENOMEM; + goto out; + } + + data_len = min_t(u32, kattr->test.data_size_in - copied, + PAGE_SIZE); + skb_fill_page_desc(skb, sinfo->nr_frags, page, 0, data_len); + + if (copy_from_user(page_address(page), data_in + copied, + data_len)) { + ret = -EFAULT; + goto out; + } + skb->data_len += data_len; + skb->truesize += PAGE_SIZE; + skb->len += data_len; + copied += data_len; + } + } + if (ctx && ctx->ifindex > 1) { dev = dev_get_by_index(net, ctx->ifindex); if (!dev) { @@ -660,9 +1142,19 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, __skb_push(skb, hh_len); if (is_direct_pkt_access) bpf_compute_data_pointers(skb); + ret = convert___skb_to_skb(skb, ctx); if (ret) goto out; + + if (kattr->test.flags & BPF_F_TEST_SKB_CHECKSUM_COMPLETE) { + const int off = skb_network_offset(skb); + int len = skb->len - off; + + skb->csum = skb_checksum(skb, off, len, 0); + skb->ip_summed = CHECKSUM_COMPLETE; + } + ret = bpf_test_run(prog, skb, repeat, &retval, &duration, false); if (ret) goto out; @@ -677,13 +1169,27 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, } memset(__skb_push(skb, hh_len), 0, hh_len); } + + if (kattr->test.flags & BPF_F_TEST_SKB_CHECKSUM_COMPLETE) { + const int off = skb_network_offset(skb); + int len = skb->len - off; + __wsum csum; + + csum = skb_checksum(skb, off, len, 0); + + if (csum_fold(skb->csum) != csum_fold(csum)) { + ret = -EBADMSG; + goto out; + } + } + convert_skb_to___skb(skb, ctx); - size = skb->len; - /* bpf program can never convert linear skb to non-linear */ - if (WARN_ON_ONCE(skb_is_nonlinear(skb))) - size = skb_headlen(skb); - ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration); + if (skb_is_nonlinear(skb)) + /* bpf program can never convert linear skb to non-linear */ + WARN_ON_ONCE(linear_sz == kattr->test.data_size_in); + ret = bpf_test_finish(kattr, uattr, skb->data, skb_shinfo(skb), skb->len, + skb->data_len, retval, duration); if (!ret) ret = bpf_ctx_finish(kattr, uattr, ctx, sizeof(struct __sk_buff)); @@ -691,7 +1197,9 @@ out: if (dev && dev != net->loopback_dev) dev_put(dev); kfree_skb(skb); - sk_free(sk); + kfree(data); + if (sk) + sk_free(sk); kfree(ctx); return ret; } @@ -757,57 +1265,129 @@ static void xdp_convert_buff_to_md(struct xdp_buff *xdp, struct xdp_md *xdp_md) int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { + bool do_live = (kattr->test.flags & BPF_F_TEST_XDP_LIVE_FRAMES); u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + u32 retval = 0, meta_sz = 0, duration, max_linear_sz, size; + u32 linear_sz = kattr->test.data_size_in; + u32 batch_size = kattr->test.batch_size; u32 headroom = XDP_PACKET_HEADROOM; - u32 size = kattr->test.data_size_in; u32 repeat = kattr->test.repeat; struct netdev_rx_queue *rxqueue; + struct skb_shared_info *sinfo; struct xdp_buff xdp = {}; - u32 retval, duration; + int i, ret = -EINVAL; struct xdp_md *ctx; - u32 max_data_sz; void *data; - int ret = -EINVAL; if (prog->expected_attach_type == BPF_XDP_DEVMAP || prog->expected_attach_type == BPF_XDP_CPUMAP) return -EINVAL; + if (kattr->test.flags & ~BPF_F_TEST_XDP_LIVE_FRAMES) + return -EINVAL; + + if (bpf_prog_is_dev_bound(prog->aux)) + return -EINVAL; + + if (do_live) { + if (!batch_size) + batch_size = NAPI_POLL_WEIGHT; + else if (batch_size > TEST_XDP_MAX_BATCH) + return -E2BIG; + + headroom += sizeof(struct xdp_page_head); + } else if (batch_size) { + return -EINVAL; + } + ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md)); if (IS_ERR(ctx)) return PTR_ERR(ctx); if (ctx) { /* There can't be user provided data before the meta data */ - if (ctx->data_meta || ctx->data_end != size || + if (ctx->data_meta || ctx->data_end > kattr->test.data_size_in || ctx->data > ctx->data_end || - unlikely(xdp_metalen_invalid(ctx->data))) + unlikely(xdp_metalen_invalid(ctx->data)) || + (do_live && (kattr->test.data_out || kattr->test.ctx_out))) goto free_ctx; /* Meta data is allocated from the headroom */ headroom -= ctx->data; + + meta_sz = ctx->data; + linear_sz = ctx->data_end; } - /* XDP have extra tailroom as (most) drivers use full page */ - max_data_sz = 4096 - headroom - tailroom; + max_linear_sz = PAGE_SIZE - headroom - tailroom; + linear_sz = min_t(u32, linear_sz, max_linear_sz); - data = bpf_test_init(kattr, max_data_sz, headroom, tailroom); + /* disallow live data mode for jumbo frames */ + if (do_live && kattr->test.data_size_in > linear_sz) + goto free_ctx; + + if (kattr->test.data_size_in - meta_sz < ETH_HLEN) + goto free_ctx; + + data = bpf_test_init(kattr, linear_sz, max_linear_sz, headroom, tailroom); if (IS_ERR(data)) { ret = PTR_ERR(data); goto free_ctx; } rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0); - xdp_init_buff(&xdp, headroom + max_data_sz + tailroom, - &rxqueue->xdp_rxq); - xdp_prepare_buff(&xdp, data, headroom, size, true); + rxqueue->xdp_rxq.frag_size = PAGE_SIZE; + xdp_init_buff(&xdp, rxqueue->xdp_rxq.frag_size, &rxqueue->xdp_rxq); + xdp_prepare_buff(&xdp, data, headroom, linear_sz, true); + sinfo = xdp_get_shared_info_from_buff(&xdp); ret = xdp_convert_md_to_buff(ctx, &xdp); if (ret) goto free_data; + size = linear_sz; + if (unlikely(kattr->test.data_size_in > size)) { + void __user *data_in = u64_to_user_ptr(kattr->test.data_in); + + while (size < kattr->test.data_size_in) { + struct page *page; + skb_frag_t *frag; + u32 data_len; + + if (sinfo->nr_frags == MAX_SKB_FRAGS) { + ret = -ENOMEM; + goto out; + } + + page = alloc_page(GFP_KERNEL); + if (!page) { + ret = -ENOMEM; + goto out; + } + + frag = &sinfo->frags[sinfo->nr_frags++]; + + data_len = min_t(u32, kattr->test.data_size_in - size, + PAGE_SIZE); + skb_frag_fill_page_desc(frag, page, 0, data_len); + + if (copy_from_user(page_address(page), data_in + size, + data_len)) { + ret = -EFAULT; + goto out; + } + sinfo->xdp_frags_size += data_len; + size += data_len; + } + xdp_buff_set_frags_flag(&xdp); + } + if (repeat > 1) bpf_prog_change_xdp(NULL, prog); - ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true); + + if (do_live) + ret = bpf_test_run_xdp_live(prog, &xdp, repeat, batch_size, &duration); + else + ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true); /* We convert the xdp_buff back to an xdp_md before checking the return * code so the reference count of any held netdevice will be decremented * even if the test run failed. @@ -816,12 +1396,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, if (ret) goto out; - if (xdp.data_meta != data + headroom || - xdp.data_end != xdp.data_meta + size) - size = xdp.data_end - xdp.data_meta; - - ret = bpf_test_finish(kattr, uattr, xdp.data_meta, size, retval, - duration); + size = xdp.data_end - xdp.data_meta + sinfo->xdp_frags_size; + ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size, sinfo->xdp_frags_size, + retval, duration); if (!ret) ret = bpf_ctx_finish(kattr, uattr, ctx, sizeof(struct xdp_md)); @@ -830,6 +1407,8 @@ out: if (repeat > 1) bpf_prog_change_xdp(prog, NULL); free_data: + for (i = 0; i < sinfo->nr_frags; i++) + __free_page(skb_frag_page(&sinfo->frags[i])); kfree(data); free_ctx: kfree(ctx); @@ -855,7 +1434,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { - struct bpf_test_timer t = { NO_PREEMPT }; + struct bpf_test_timer t = {}; u32 size = kattr->test.data_size_in; struct bpf_flow_dissector ctx = {}; u32 repeat = kattr->test.repeat; @@ -867,16 +1446,13 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, void *data; int ret; - if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR) - return -EINVAL; - - if (kattr->test.flags || kattr->test.cpu) + if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size) return -EINVAL; if (size < ETH_HLEN) return -EINVAL; - data = bpf_test_init(kattr, size, 0, 0); + data = bpf_test_init(kattr, kattr->test.data_size_in, size, 0, 0); if (IS_ERR(data)) return PTR_ERR(data); @@ -905,14 +1481,14 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, do { retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN, size, flags); - } while (bpf_test_timer_continue(&t, repeat, &ret, &duration)); + } while (bpf_test_timer_continue(&t, 1, repeat, &ret, &duration)); bpf_test_timer_leave(&t); if (ret < 0) goto out; - ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys), - retval, duration); + ret = bpf_test_finish(kattr, uattr, &flow_keys, NULL, + sizeof(flow_keys), 0, retval, duration); if (!ret) ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(struct bpf_flow_keys)); @@ -926,7 +1502,7 @@ out: int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { - struct bpf_test_timer t = { NO_PREEMPT }; + struct bpf_test_timer t = {}; struct bpf_prog_array *progs = NULL; struct bpf_sk_lookup_kern ctx = {}; u32 repeat = kattr->test.repeat; @@ -934,10 +1510,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat u32 retval, duration; int ret = -EINVAL; - if (prog->type != BPF_PROG_TYPE_SK_LOOKUP) - return -EINVAL; - - if (kattr->test.flags || kattr->test.cpu) + if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size) return -EINVAL; if (kattr->test.data_in || kattr->test.data_size_in || kattr->test.data_out || @@ -960,7 +1533,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx))) goto out; - if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) { + if (user_ctx->local_port > U16_MAX) { ret = -ERANGE; goto out; } @@ -968,7 +1541,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat ctx.family = (u16)user_ctx->family; ctx.protocol = (u16)user_ctx->protocol; ctx.dport = (u16)user_ctx->local_port; - ctx.sport = (__force __be16)user_ctx->remote_port; + ctx.sport = user_ctx->remote_port; switch (ctx.family) { case AF_INET: @@ -1000,7 +1573,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat do { ctx.selected_sk = NULL; retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, bpf_prog_run); - } while (bpf_test_timer_continue(&t, repeat, &ret, &duration)); + } while (bpf_test_timer_continue(&t, 1, repeat, &ret, &duration)); bpf_test_timer_leave(&t); if (ret < 0) @@ -1016,7 +1589,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat user_ctx->cookie = sock_gen_cookie(ctx.selected_sk); } - ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration); + ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, 0, retval, duration); if (!ret) ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx)); @@ -1039,7 +1612,8 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog, /* doesn't support data_in/out, ctx_out, duration, or repeat or flags */ if (kattr->test.data_in || kattr->test.data_out || kattr->test.ctx_out || kattr->test.duration || - kattr->test.repeat || kattr->test.flags) + kattr->test.repeat || kattr->test.flags || + kattr->test.batch_size) return -EINVAL; if (ctx_size_in < prog->aux->max_ctx_offset || @@ -1067,3 +1641,194 @@ out: kfree(ctx); return err; } + +static int verify_and_copy_hook_state(struct nf_hook_state *state, + const struct nf_hook_state *user, + struct net_device *dev) +{ + if (user->in || user->out) + return -EINVAL; + + if (user->net || user->sk || user->okfn) + return -EINVAL; + + switch (user->pf) { + case NFPROTO_IPV4: + case NFPROTO_IPV6: + switch (state->hook) { + case NF_INET_PRE_ROUTING: + state->in = dev; + break; + case NF_INET_LOCAL_IN: + state->in = dev; + break; + case NF_INET_FORWARD: + state->in = dev; + state->out = dev; + break; + case NF_INET_LOCAL_OUT: + state->out = dev; + break; + case NF_INET_POST_ROUTING: + state->out = dev; + break; + } + + break; + default: + return -EINVAL; + } + + state->pf = user->pf; + state->hook = user->hook; + + return 0; +} + +static __be16 nfproto_eth(int nfproto) +{ + switch (nfproto) { + case NFPROTO_IPV4: + return htons(ETH_P_IP); + case NFPROTO_IPV6: + break; + } + + return htons(ETH_P_IPV6); +} + +int bpf_prog_test_run_nf(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + struct net *net = current->nsproxy->net_ns; + struct net_device *dev = net->loopback_dev; + struct nf_hook_state *user_ctx, hook_state = { + .pf = NFPROTO_IPV4, + .hook = NF_INET_LOCAL_OUT, + }; + u32 size = kattr->test.data_size_in; + u32 repeat = kattr->test.repeat; + struct bpf_nf_ctx ctx = { + .state = &hook_state, + }; + struct sk_buff *skb = NULL; + u32 retval, duration; + void *data; + int ret; + + if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size) + return -EINVAL; + + if (size < sizeof(struct iphdr)) + return -EINVAL; + + data = bpf_test_init(kattr, kattr->test.data_size_in, size, + NET_SKB_PAD + NET_IP_ALIGN, + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); + if (IS_ERR(data)) + return PTR_ERR(data); + + if (!repeat) + repeat = 1; + + user_ctx = bpf_ctx_init(kattr, sizeof(struct nf_hook_state)); + if (IS_ERR(user_ctx)) { + kfree(data); + return PTR_ERR(user_ctx); + } + + if (user_ctx) { + ret = verify_and_copy_hook_state(&hook_state, user_ctx, dev); + if (ret) + goto out; + } + + skb = slab_build_skb(data); + if (!skb) { + ret = -ENOMEM; + goto out; + } + + data = NULL; /* data released via kfree_skb */ + + skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); + __skb_put(skb, size); + + ret = -EINVAL; + + if (hook_state.hook != NF_INET_LOCAL_OUT) { + if (size < ETH_HLEN + sizeof(struct iphdr)) + goto out; + + skb->protocol = eth_type_trans(skb, dev); + switch (skb->protocol) { + case htons(ETH_P_IP): + if (hook_state.pf == NFPROTO_IPV4) + break; + goto out; + case htons(ETH_P_IPV6): + if (size < ETH_HLEN + sizeof(struct ipv6hdr)) + goto out; + if (hook_state.pf == NFPROTO_IPV6) + break; + goto out; + default: + ret = -EPROTO; + goto out; + } + + skb_reset_network_header(skb); + } else { + skb->protocol = nfproto_eth(hook_state.pf); + } + + ctx.skb = skb; + + ret = bpf_test_run(prog, &ctx, repeat, &retval, &duration, false); + if (ret) + goto out; + + ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, 0, retval, duration); + +out: + kfree(user_ctx); + kfree_skb(skb); + kfree(data); + return ret; +} + +static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = { + .owner = THIS_MODULE, + .set = &test_sk_check_kfunc_ids, +}; + +BTF_ID_LIST(bpf_prog_test_dtor_kfunc_ids) +BTF_ID(struct, prog_test_ref_kfunc) +BTF_ID(func, bpf_kfunc_call_test_release_dtor) +BTF_ID(struct, prog_test_member) +BTF_ID(func, bpf_kfunc_call_memb_release_dtor) + +static int __init bpf_prog_test_run_init(void) +{ + const struct btf_id_dtor_kfunc bpf_prog_test_dtor_kfunc[] = { + { + .btf_id = bpf_prog_test_dtor_kfunc_ids[0], + .kfunc_btf_id = bpf_prog_test_dtor_kfunc_ids[1] + }, + { + .btf_id = bpf_prog_test_dtor_kfunc_ids[2], + .kfunc_btf_id = bpf_prog_test_dtor_kfunc_ids[3], + }, + }; + int ret; + + ret = register_btf_fmodret_id_set(&bpf_test_modify_return_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_prog_test_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_prog_test_kfunc_set); + return ret ?: register_btf_id_dtor_kfuncs(bpf_prog_test_dtor_kfunc, + ARRAY_SIZE(bpf_prog_test_dtor_kfunc), + THIS_MODULE); +} +late_initcall(bpf_prog_test_run_init); |
