diff options
-rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 1 | ||||
-rw-r--r-- | include/linux/bpf.h | 73 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 24 | ||||
-rw-r--r-- | kernel/bpf/Makefile | 2 | ||||
-rw-r--r-- | kernel/bpf/core.c | 110 | ||||
-rw-r--r-- | kernel/bpf/helpers.c | 38 | ||||
-rw-r--r-- | kernel/bpf/rqspinlock.c | 23 | ||||
-rw-r--r-- | kernel/bpf/stream.c | 526 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 25 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 1 | ||||
-rw-r--r-- | tools/bpf/bpftool/Documentation/bpftool-prog.rst | 7 | ||||
-rw-r--r-- | tools/bpf/bpftool/bash-completion/bpftool | 16 | ||||
-rw-r--r-- | tools/bpf/bpftool/prog.c | 49 | ||||
-rw-r--r-- | tools/include/uapi/linux/bpf.h | 24 | ||||
-rw-r--r-- | tools/lib/bpf/bpf.c | 20 | ||||
-rw-r--r-- | tools/lib/bpf/bpf.h | 21 | ||||
-rw-r--r-- | tools/lib/bpf/bpf_helpers.h | 16 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.map | 1 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/prog_tests/stream.c | 141 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/stream.c | 79 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/stream_fail.c | 33 |
21 files changed, 1206 insertions, 24 deletions
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 15672cb926fc..40e1b3b9634f 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -3845,7 +3845,6 @@ void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp } return; #endif - WARN(1, "verification of programs using bpf_throw should have failed\n"); } void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 287c956cdbd2..34dd90ec7fad 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1538,6 +1538,37 @@ struct btf_mod_pair { struct bpf_kfunc_desc_tab; +enum bpf_stream_id { + BPF_STDOUT = 1, + BPF_STDERR = 2, +}; + +struct bpf_stream_elem { + struct llist_node node; + int total_len; + int consumed_len; + char str[]; +}; + +enum { + /* 100k bytes */ + BPF_STREAM_MAX_CAPACITY = 100000ULL, +}; + +struct bpf_stream { + atomic_t capacity; + struct llist_head log; /* list of in-flight stream elements in LIFO order */ + + struct mutex lock; /* lock protecting backlog_{head,tail} */ + struct llist_node *backlog_head; /* list of in-flight stream elements in FIFO order */ + struct llist_node *backlog_tail; /* tail of the list above */ +}; + +struct bpf_stream_stage { + struct llist_head log; + int len; +}; + struct bpf_prog_aux { atomic64_t refcnt; u32 used_map_cnt; @@ -1646,6 +1677,7 @@ struct bpf_prog_aux { struct work_struct work; struct rcu_head rcu; }; + struct bpf_stream stream[2]; }; struct bpf_prog { @@ -2409,6 +2441,7 @@ int generic_map_delete_batch(struct bpf_map *map, struct bpf_map *bpf_map_get_curr_or_next(u32 *id); struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); + int bpf_map_alloc_pages(const struct bpf_map *map, int nid, unsigned long nr_pages, struct page **page_array); #ifdef CONFIG_MEMCG @@ -3551,6 +3584,16 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id); #define MAX_BPRINTF_VARARGS 12 #define MAX_BPRINTF_BUF 1024 +/* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary + * arguments representation. + */ +#define MAX_BPRINTF_BIN_ARGS 512 + +struct bpf_bprintf_buffers { + char bin_args[MAX_BPRINTF_BIN_ARGS]; + char buf[MAX_BPRINTF_BUF]; +}; + struct bpf_bprintf_data { u32 *bin_args; char *buf; @@ -3558,9 +3601,33 @@ struct bpf_bprintf_data { bool get_buf; }; -int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, +int bpf_bprintf_prepare(const char *fmt, u32 fmt_size, const u64 *raw_args, u32 num_args, struct bpf_bprintf_data *data); void bpf_bprintf_cleanup(struct bpf_bprintf_data *data); +int bpf_try_get_buffers(struct bpf_bprintf_buffers **bufs); +void bpf_put_buffers(void); + +void bpf_prog_stream_init(struct bpf_prog *prog); +void bpf_prog_stream_free(struct bpf_prog *prog); +int bpf_prog_stream_read(struct bpf_prog *prog, enum bpf_stream_id stream_id, void __user *buf, int len); +void bpf_stream_stage_init(struct bpf_stream_stage *ss); +void bpf_stream_stage_free(struct bpf_stream_stage *ss); +__printf(2, 3) +int bpf_stream_stage_printk(struct bpf_stream_stage *ss, const char *fmt, ...); +int bpf_stream_stage_commit(struct bpf_stream_stage *ss, struct bpf_prog *prog, + enum bpf_stream_id stream_id); +int bpf_stream_stage_dump_stack(struct bpf_stream_stage *ss); + +#define bpf_stream_printk(ss, ...) bpf_stream_stage_printk(&ss, __VA_ARGS__) +#define bpf_stream_dump_stack(ss) bpf_stream_stage_dump_stack(&ss) + +#define bpf_stream_stage(ss, prog, stream_id, expr) \ + ({ \ + bpf_stream_stage_init(&ss); \ + (expr); \ + bpf_stream_stage_commit(&ss, prog, stream_id); \ + bpf_stream_stage_free(&ss); \ + }) #ifdef CONFIG_BPF_LSM void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype); @@ -3596,4 +3663,8 @@ static inline bool bpf_is_subprog(const struct bpf_prog *prog) return prog->aux->func_idx != 0; } +int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char **filep, + const char **linep, int *nump); +struct bpf_prog *bpf_prog_find_from_stack(void); + #endif /* _LINUX_BPF_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 719ba230032f..0670e15a6100 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -906,6 +906,17 @@ union bpf_iter_link_info { * A new file descriptor (a nonnegative integer), or -1 if an * error occurred (in which case, *errno* is set appropriately). * + * BPF_PROG_STREAM_READ_BY_FD + * Description + * Read data of a program's BPF stream. The program is identified + * by *prog_fd*, and the stream is identified by the *stream_id*. + * The data is copied to a buffer pointed to by *stream_buf*, and + * filled less than or equal to *stream_buf_len* bytes. + * + * Return + * Number of bytes read from the stream on success, or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * * NOTES * eBPF objects (maps and programs) can be shared between processes. * @@ -961,6 +972,7 @@ enum bpf_cmd { BPF_LINK_DETACH, BPF_PROG_BIND_MAP, BPF_TOKEN_CREATE, + BPF_PROG_STREAM_READ_BY_FD, __MAX_BPF_CMD, }; @@ -1463,6 +1475,11 @@ struct bpf_stack_build_id { #define BPF_OBJ_NAME_LEN 16U +enum { + BPF_STREAM_STDOUT = 1, + BPF_STREAM_STDERR = 2, +}; + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -1849,6 +1866,13 @@ union bpf_attr { __u32 bpffs_fd; } token_create; + struct { + __aligned_u64 stream_buf; + __u32 stream_buf_len; + __u32 stream_id; + __u32 prog_fd; + } prog_stream_read; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 3a335c50e6e3..269c04a24664 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -14,7 +14,7 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o obj-$(CONFIG_BPF_SYSCALL) += disasm.o mprog.o obj-$(CONFIG_BPF_JIT) += trampoline.o -obj-$(CONFIG_BPF_SYSCALL) += btf.o memalloc.o rqspinlock.o +obj-$(CONFIG_BPF_SYSCALL) += btf.o memalloc.o rqspinlock.o stream.o ifeq ($(CONFIG_MMU)$(CONFIG_64BIT),yy) obj-$(CONFIG_BPF_SYSCALL) += arena.o range_tree.o endif diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index e536a34a32c8..fe8a53f3c5bc 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -134,6 +134,10 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag mutex_init(&fp->aux->ext_mutex); mutex_init(&fp->aux->dst_mutex); +#ifdef CONFIG_BPF_SYSCALL + bpf_prog_stream_init(fp); +#endif + return fp; } @@ -778,7 +782,10 @@ bool is_bpf_text_address(unsigned long addr) struct bpf_prog *bpf_prog_ksym_find(unsigned long addr) { - struct bpf_ksym *ksym = bpf_ksym_find(addr); + struct bpf_ksym *ksym; + + WARN_ON_ONCE(!rcu_read_lock_held()); + ksym = bpf_ksym_find(addr); return ksym && ksym->prog ? container_of(ksym, struct bpf_prog_aux, ksym)->prog : @@ -2862,6 +2869,7 @@ static void bpf_prog_free_deferred(struct work_struct *work) aux = container_of(work, struct bpf_prog_aux, work); #ifdef CONFIG_BPF_SYSCALL bpf_free_kfunc_btf_tab(aux->kfunc_btf_tab); + bpf_prog_stream_free(aux->prog); #endif #ifdef CONFIG_CGROUP_BPF if (aux->cgroup_atype != CGROUP_BPF_ATTACH_TYPE_INVALID) @@ -3160,6 +3168,22 @@ u64 __weak arch_bpf_timed_may_goto(void) return 0; } +static noinline void bpf_prog_report_may_goto_violation(void) +{ +#ifdef CONFIG_BPF_SYSCALL + struct bpf_stream_stage ss; + struct bpf_prog *prog; + + prog = bpf_prog_find_from_stack(); + if (!prog) + return; + bpf_stream_stage(ss, prog, BPF_STDERR, ({ + bpf_stream_printk(ss, "ERROR: Timeout detected for may_goto instruction\n"); + bpf_stream_dump_stack(ss); + })); +#endif +} + u64 bpf_check_timed_may_goto(struct bpf_timed_may_goto *p) { u64 time = ktime_get_mono_fast_ns(); @@ -3170,8 +3194,10 @@ u64 bpf_check_timed_may_goto(struct bpf_timed_may_goto *p) return BPF_MAX_TIMED_LOOPS; } /* Check if we've exhausted our time slice, and zero count. */ - if (time - p->timestamp >= (NSEC_PER_SEC / 4)) + if (unlikely(time - p->timestamp >= (NSEC_PER_SEC / 4))) { + bpf_prog_report_may_goto_violation(); return 0; + } /* Refresh the count for the stack frame. */ return BPF_MAX_TIMED_LOOPS; } @@ -3208,3 +3234,83 @@ EXPORT_SYMBOL(bpf_stats_enabled_key); EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception); EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_bulk_tx); + +#ifdef CONFIG_BPF_SYSCALL + +int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char **filep, + const char **linep, int *nump) +{ + int idx = -1, insn_start, insn_end, len; + struct bpf_line_info *linfo; + void **jited_linfo; + struct btf *btf; + + btf = prog->aux->btf; + linfo = prog->aux->linfo; + jited_linfo = prog->aux->jited_linfo; + + if (!btf || !linfo || !jited_linfo) + return -EINVAL; + len = prog->aux->func ? prog->aux->func[prog->aux->func_idx]->len : prog->len; + + linfo = &prog->aux->linfo[prog->aux->linfo_idx]; + jited_linfo = &prog->aux->jited_linfo[prog->aux->linfo_idx]; + + insn_start = linfo[0].insn_off; + insn_end = insn_start + len; + + for (int i = 0; i < prog->aux->nr_linfo && + linfo[i].insn_off >= insn_start && linfo[i].insn_off < insn_end; i++) { + if (jited_linfo[i] >= (void *)ip) + break; + idx = i; + } + + if (idx == -1) + return -ENOENT; + + /* Get base component of the file path. */ + *filep = btf_name_by_offset(btf, linfo[idx].file_name_off); + *filep = kbasename(*filep); + /* Obtain the source line, and strip whitespace in prefix. */ + *linep = btf_name_by_offset(btf, linfo[idx].line_off); + while (isspace(**linep)) + *linep += 1; + *nump = BPF_LINE_INFO_LINE_NUM(linfo[idx].line_col); + return 0; +} + +struct walk_stack_ctx { + struct bpf_prog *prog; +}; + +static bool find_from_stack_cb(void *cookie, u64 ip, u64 sp, u64 bp) +{ + struct walk_stack_ctx *ctxp = cookie; + struct bpf_prog *prog; + + /* + * The RCU read lock is held to safely traverse the latch tree, but we + * don't need its protection when accessing the prog, since it has an + * active stack frame on the current stack trace, and won't disappear. + */ + rcu_read_lock(); + prog = bpf_prog_ksym_find(ip); + rcu_read_unlock(); + if (!prog) + return true; + if (bpf_is_subprog(prog)) + return true; + ctxp->prog = prog; + return false; +} + +struct bpf_prog *bpf_prog_find_from_stack(void) +{ + struct walk_stack_ctx ctx = {}; + + arch_bpf_stack_walk(find_from_stack_cb, &ctx); + return ctx.prog; +} + +#endif diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 5269381d6d3d..3d33181d5e67 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -764,22 +764,13 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, return -EINVAL; } -/* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary - * arguments representation. - */ -#define MAX_BPRINTF_BIN_ARGS 512 - /* Support executing three nested bprintf helper calls on a given CPU */ #define MAX_BPRINTF_NEST_LEVEL 3 -struct bpf_bprintf_buffers { - char bin_args[MAX_BPRINTF_BIN_ARGS]; - char buf[MAX_BPRINTF_BUF]; -}; static DEFINE_PER_CPU(struct bpf_bprintf_buffers[MAX_BPRINTF_NEST_LEVEL], bpf_bprintf_bufs); static DEFINE_PER_CPU(int, bpf_bprintf_nest_level); -static int try_get_buffers(struct bpf_bprintf_buffers **bufs) +int bpf_try_get_buffers(struct bpf_bprintf_buffers **bufs) { int nest_level; @@ -795,16 +786,21 @@ static int try_get_buffers(struct bpf_bprintf_buffers **bufs) return 0; } -void bpf_bprintf_cleanup(struct bpf_bprintf_data *data) +void bpf_put_buffers(void) { - if (!data->bin_args && !data->buf) - return; if (WARN_ON_ONCE(this_cpu_read(bpf_bprintf_nest_level) == 0)) return; this_cpu_dec(bpf_bprintf_nest_level); preempt_enable(); } +void bpf_bprintf_cleanup(struct bpf_bprintf_data *data) +{ + if (!data->bin_args && !data->buf) + return; + bpf_put_buffers(); +} + /* * bpf_bprintf_prepare - Generic pass on format strings for bprintf-like helpers * @@ -819,7 +815,7 @@ void bpf_bprintf_cleanup(struct bpf_bprintf_data *data) * In argument preparation mode, if 0 is returned, safe temporary buffers are * allocated and bpf_bprintf_cleanup should be called to free them after use. */ -int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, +int bpf_bprintf_prepare(const char *fmt, u32 fmt_size, const u64 *raw_args, u32 num_args, struct bpf_bprintf_data *data) { bool get_buffers = (data->get_bin_args && num_args) || data->get_buf; @@ -835,7 +831,7 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, return -EINVAL; fmt_size = fmt_end - fmt; - if (get_buffers && try_get_buffers(&buffers)) + if (get_buffers && bpf_try_get_buffers(&buffers)) return -EBUSY; if (data->get_bin_args) { @@ -2985,9 +2981,16 @@ static bool bpf_stack_walker(void *cookie, u64 ip, u64 sp, u64 bp) struct bpf_throw_ctx *ctx = cookie; struct bpf_prog *prog; - if (!is_bpf_text_address(ip)) - return !ctx->cnt; + /* + * The RCU read lock is held to safely traverse the latch tree, but we + * don't need its protection when accessing the prog, since it has an + * active stack frame on the current stack trace, and won't disappear. + */ + rcu_read_lock(); prog = bpf_prog_ksym_find(ip); + rcu_read_unlock(); + if (!prog) + return !ctx->cnt; ctx->cnt++; if (bpf_is_subprog(prog)) return true; @@ -3829,6 +3832,7 @@ BTF_ID_FLAGS(func, bpf_strnstr); #if defined(CONFIG_BPF_LSM) && defined(CONFIG_CGROUPS) BTF_ID_FLAGS(func, bpf_cgroup_read_xattr, KF_RCU) #endif +BTF_ID_FLAGS(func, bpf_stream_vprintk, KF_TRUSTED_ARGS) BTF_KFUNCS_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { diff --git a/kernel/bpf/rqspinlock.c b/kernel/bpf/rqspinlock.c index 338305c8852c..5ab354d55d82 100644 --- a/kernel/bpf/rqspinlock.c +++ b/kernel/bpf/rqspinlock.c @@ -666,6 +666,27 @@ EXPORT_SYMBOL_GPL(resilient_queued_spin_lock_slowpath); __bpf_kfunc_start_defs(); +static void bpf_prog_report_rqspinlock_violation(const char *str, void *lock, bool irqsave) +{ + struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks); + struct bpf_stream_stage ss; + struct bpf_prog *prog; + + prog = bpf_prog_find_from_stack(); + if (!prog) + return; + bpf_stream_stage(ss, prog, BPF_STDERR, ({ + bpf_stream_printk(ss, "ERROR: %s for bpf_res_spin_lock%s\n", str, irqsave ? "_irqsave" : ""); + bpf_stream_printk(ss, "Attempted lock = 0x%px\n", lock); + bpf_stream_printk(ss, "Total held locks = %d\n", rqh->cnt); + for (int i = 0; i < min(RES_NR_HELD, rqh->cnt); i++) + bpf_stream_printk(ss, "Held lock[%2d] = 0x%px\n", i, rqh->locks[i]); + bpf_stream_dump_stack(ss); + })); +} + +#define REPORT_STR(ret) ({ (ret) == -ETIMEDOUT ? "Timeout detected" : "AA or ABBA deadlock detected"; }) + __bpf_kfunc int bpf_res_spin_lock(struct bpf_res_spin_lock *lock) { int ret; @@ -676,6 +697,7 @@ __bpf_kfunc int bpf_res_spin_lock(struct bpf_res_spin_lock *lock) preempt_disable(); ret = res_spin_lock((rqspinlock_t *)lock); if (unlikely(ret)) { + bpf_prog_report_rqspinlock_violation(REPORT_STR(ret), lock, false); preempt_enable(); return ret; } @@ -698,6 +720,7 @@ __bpf_kfunc int bpf_res_spin_lock_irqsave(struct bpf_res_spin_lock *lock, unsign local_irq_save(flags); ret = res_spin_lock((rqspinlock_t *)lock); if (unlikely(ret)) { + bpf_prog_report_rqspinlock_violation(REPORT_STR(ret), lock, true); local_irq_restore(flags); preempt_enable(); return ret; diff --git a/kernel/bpf/stream.c b/kernel/bpf/stream.c new file mode 100644 index 000000000000..8c842f845245 --- /dev/null +++ b/kernel/bpf/stream.c @@ -0,0 +1,526 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <linux/bpf.h> +#include <linux/filter.h> +#include <linux/bpf_mem_alloc.h> +#include <linux/percpu.h> +#include <linux/refcount.h> +#include <linux/gfp.h> +#include <linux/memory.h> +#include <linux/local_lock.h> +#include <linux/mutex.h> + +/* + * Simple per-CPU NMI-safe bump allocation mechanism, backed by the NMI-safe + * try_alloc_pages()/free_pages_nolock() primitives. We allocate a page and + * stash it in a local per-CPU variable, and bump allocate from the page + * whenever items need to be printed to a stream. Each page holds a global + * atomic refcount in its first 4 bytes, and then records of variable length + * that describe the printed messages. Once the global refcount has dropped to + * zero, it is a signal to free the page back to the kernel's page allocator, + * given all the individual records in it have been consumed. + * + * It is possible the same page is used to serve allocations across different + * programs, which may be consumed at different times individually, hence + * maintaining a reference count per-page is critical for correct lifetime + * tracking. + * + * The bpf_stream_page code will be replaced to use kmalloc_nolock() once it + * lands. + */ +struct bpf_stream_page { + refcount_t ref; + u32 consumed; + char buf[]; +}; + +/* Available room to add data to a refcounted page. */ +#define BPF_STREAM_PAGE_SZ (PAGE_SIZE - offsetofend(struct bpf_stream_page, consumed)) + +static DEFINE_PER_CPU(local_trylock_t, stream_local_lock) = INIT_LOCAL_TRYLOCK(stream_local_lock); +static DEFINE_PER_CPU(struct bpf_stream_page *, stream_pcpu_page); + +static bool bpf_stream_page_local_lock(unsigned long *flags) +{ + return local_trylock_irqsave(&stream_local_lock, *flags); +} + +static void bpf_stream_page_local_unlock(unsigned long *flags) +{ + local_unlock_irqrestore(&stream_local_lock, *flags); +} + +static void bpf_stream_page_free(struct bpf_stream_page *stream_page) +{ + struct page *p; + + if (!stream_page) + return; + p = virt_to_page(stream_page); + free_pages_nolock(p, 0); +} + +static void bpf_stream_page_get(struct bpf_stream_page *stream_page) +{ + refcount_inc(&stream_page->ref); +} + +static void bpf_stream_page_put(struct bpf_stream_page *stream_page) +{ + if (refcount_dec_and_test(&stream_page->ref)) + bpf_stream_page_free(stream_page); +} + +static void bpf_stream_page_init(struct bpf_stream_page *stream_page) +{ + refcount_set(&stream_page->ref, 1); + stream_page->consumed = 0; +} + +static struct bpf_stream_page *bpf_stream_page_replace(void) +{ + struct bpf_stream_page *stream_page, *old_stream_page; + struct page *page; + + page = alloc_pages_nolock(NUMA_NO_NODE, 0); + if (!page) + return NULL; + stream_page = page_address(page); + bpf_stream_page_init(stream_page); + + old_stream_page = this_cpu_read(stream_pcpu_page); + if (old_stream_page) + bpf_stream_page_put(old_stream_page); + this_cpu_write(stream_pcpu_page, stream_page); + return stream_page; +} + +static int bpf_stream_page_check_room(struct bpf_stream_page *stream_page, int len) +{ + int min = offsetof(struct bpf_stream_elem, str[0]); + int consumed = stream_page->consumed; + int total = BPF_STREAM_PAGE_SZ; + int rem = max(0, total - consumed - min); + + /* Let's give room of at least 8 bytes. */ + WARN_ON_ONCE(rem % 8 != 0); + rem = rem < 8 ? 0 : rem; + return min(len, rem); +} + +static void bpf_stream_elem_init(struct bpf_stream_elem *elem, int len) +{ + init_llist_node(&elem->node); + elem->total_len = len; + elem->consumed_len = 0; +} + +static struct bpf_stream_page *bpf_stream_page_from_elem(struct bpf_stream_elem *elem) +{ + unsigned long addr = (unsigned long)elem; + + return (struct bpf_stream_page *)PAGE_ALIGN_DOWN(addr); +} + +static struct bpf_stream_elem *bpf_stream_page_push_elem(struct bpf_stream_page *stream_page, int len) +{ + u32 consumed = stream_page->consumed; + + stream_page->consumed += round_up(offsetof(struct bpf_stream_elem, str[len]), 8); + return (struct bpf_stream_elem *)&stream_page->buf[consumed]; +} + +static struct bpf_stream_elem *bpf_stream_page_reserve_elem(int len) +{ + struct bpf_stream_elem *elem = NULL; + struct bpf_stream_page *page; + int room = 0; + + page = this_cpu_read(stream_pcpu_page); + if (!page) + page = bpf_stream_page_replace(); + if (!page) + return NULL; + + room = bpf_stream_page_check_room(page, len); + if (room != len) + page = bpf_stream_page_replace(); + if (!page) + return NULL; + bpf_stream_page_get(page); + room = bpf_stream_page_check_room(page, len); + WARN_ON_ONCE(room != len); + + elem = bpf_stream_page_push_elem(page, room); + bpf_stream_elem_init(elem, room); + return elem; +} + +static struct bpf_stream_elem *bpf_stream_elem_alloc(int len) +{ + const int max_len = ARRAY_SIZE((struct bpf_bprintf_buffers){}.buf); + struct bpf_stream_elem *elem; + unsigned long flags; + + BUILD_BUG_ON(max_len > BPF_STREAM_PAGE_SZ); + /* + * Length denotes the amount of data to be written as part of stream element, + * thus includes '\0' byte. We're capped by how much bpf_bprintf_buffers can + * accomodate, therefore deny allocations that won't fit into them. + */ + if (len < 0 || len > max_len) + return NULL; + + if (!bpf_stream_page_local_lock(&flags)) + return NULL; + elem = bpf_stream_page_reserve_elem(len); + bpf_stream_page_local_unlock(&flags); + return elem; +} + +static int __bpf_stream_push_str(struct llist_head *log, const char *str, int len) +{ + struct bpf_stream_elem *elem = NULL; + + /* + * Allocate a bpf_prog_stream_elem and push it to the bpf_prog_stream + * log, elements will be popped at once and reversed to print the log. + */ + elem = bpf_stream_elem_alloc(len); + if (!elem) + return -ENOMEM; + + memcpy(elem->str, str, len); + llist_add(&elem->node, log); + + return 0; +} + +static int bpf_stream_consume_capacity(struct bpf_stream *stream, int len) +{ + if (atomic_read(&stream->capacity) >= BPF_STREAM_MAX_CAPACITY) + return -ENOSPC; + if (atomic_add_return(len, &stream->capacity) >= BPF_STREAM_MAX_CAPACITY) { + atomic_sub(len, &stream->capacity); + return -ENOSPC; + } + return 0; +} + +static void bpf_stream_release_capacity(struct bpf_stream *stream, struct bpf_stream_elem *elem) +{ + int len = elem->total_len; + + atomic_sub(len, &stream->capacity); +} + +static int bpf_stream_push_str(struct bpf_stream *stream, const char *str, int len) +{ + int ret = bpf_stream_consume_capacity(stream, len); + + return ret ?: __bpf_stream_push_str(&stream->log, str, len); +} + +static struct bpf_stream *bpf_stream_get(enum bpf_stream_id stream_id, struct bpf_prog_aux *aux) +{ + if (stream_id != BPF_STDOUT && stream_id != BPF_STDERR) + return NULL; + return &aux->stream[stream_id - 1]; +} + +static void bpf_stream_free_elem(struct bpf_stream_elem *elem) +{ + struct bpf_stream_page *p; + + p = bpf_stream_page_from_elem(elem); + bpf_stream_page_put(p); +} + +static void bpf_stream_free_list(struct llist_node *list) +{ + struct bpf_stream_elem *elem, *tmp; + + llist_for_each_entry_safe(elem, tmp, list, node) + bpf_stream_free_elem(elem); +} + +static struct llist_node *bpf_stream_backlog_peek(struct bpf_stream *stream) +{ + return stream->backlog_head; +} + +static struct llist_node *bpf_stream_backlog_pop(struct bpf_stream *stream) +{ + struct llist_node *node; + + node = stream->backlog_head; + if (stream->backlog_head == stream->backlog_tail) + stream->backlog_head = stream->backlog_tail = NULL; + else + stream->backlog_head = node->next; + return node; +} + +static void bpf_stream_backlog_fill(struct bpf_stream *stream) +{ + struct llist_node *head, *tail; + + if (llist_empty(&stream->log)) + return; + tail = llist_del_all(&stream->log); + if (!tail) + return; + head = llist_reverse_order(tail); + + if (!stream->backlog_head) { + stream->backlog_head = head; + stream->backlog_tail = tail; + } else { + stream->backlog_tail->next = head; + stream->backlog_tail = tail; + } + + return; +} + +static bool bpf_stream_consume_elem(struct bpf_stream_elem *elem, int *len) +{ + int rem = elem->total_len - elem->consumed_len; + int used = min(rem, *len); + + elem->consumed_len += used; + *len -= used; + + return elem->consumed_len == elem->total_len; +} + +static int bpf_stream_read(struct bpf_stream *stream, void __user *buf, int len) +{ + int rem_len = len, cons_len, ret = 0; + struct bpf_stream_elem *elem = NULL; + struct llist_node *node; + + mutex_lock(&stream->lock); + + while (rem_len) { + int pos = len - rem_len; + bool cont; + + node = bpf_stream_backlog_peek(stream); + if (!node) { + bpf_stream_backlog_fill(stream); + node = bpf_stream_backlog_peek(stream); + } + if (!node) + break; + elem = container_of(node, typeof(*elem), node); + + cons_len = elem->consumed_len; + cont = bpf_stream_consume_elem(elem, &rem_len) == false; + + ret = copy_to_user(buf + pos, elem->str + cons_len, + elem->consumed_len - cons_len); + /* Restore in case of error. */ + if (ret) { + ret = -EFAULT; + elem->consumed_len = cons_len; + break; + } + + if (cont) + continue; + bpf_stream_backlog_pop(stream); + bpf_stream_release_capacity(stream, elem); + bpf_stream_free_elem(elem); + } + + mutex_unlock(&stream->lock); + return ret ? ret : len - rem_len; +} + +int bpf_prog_stream_read(struct bpf_prog *prog, enum bpf_stream_id stream_id, void __user *buf, int len) +{ + struct bpf_stream *stream; + + stream = bpf_stream_get(stream_id, prog->aux); + if (!stream) + return -ENOENT; + return bpf_stream_read(stream, buf, len); +} + +__bpf_kfunc_start_defs(); + +/* + * Avoid using enum bpf_stream_id so that kfunc users don't have to pull in the + * enum in headers. + */ +__bpf_kfunc int bpf_stream_vprintk(int stream_id, const char *fmt__str, const void *args, u32 len__sz, void *aux__prog) +{ + struct bpf_bprintf_data data = { + .get_bin_args = true, + .get_buf = true, + }; + struct bpf_prog_aux *aux = aux__prog; + u32 fmt_size = strlen(fmt__str) + 1; + struct bpf_stream *stream; + u32 data_len = len__sz; + int ret, num_args; + + stream = bpf_stream_get(stream_id, aux); + if (!stream) + return -ENOENT; + + if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || + (data_len && !args)) + return -EINVAL; + num_args = data_len / 8; + + ret = bpf_bprintf_prepare(fmt__str, fmt_size, args, num_args, &data); + if (ret < 0) + return ret; + + ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt__str, data.bin_args); + /* Exclude NULL byte during push. */ + ret = bpf_stream_push_str(stream, data.buf, ret); + bpf_bprintf_cleanup(&data); + + return ret; +} + +__bpf_kfunc_end_defs(); + +/* Added kfunc to common_btf_ids */ + +void bpf_prog_stream_init(struct bpf_prog *prog) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(prog->aux->stream); i++) { + atomic_set(&prog->aux->stream[i].capacity, 0); + init_llist_head(&prog->aux->stream[i].log); + mutex_init(&prog->aux->stream[i].lock); + prog->aux->stream[i].backlog_head = NULL; + prog->aux->stream[i].backlog_tail = NULL; + } +} + +void bpf_prog_stream_free(struct bpf_prog *prog) +{ + struct llist_node *list; + int i; + + for (i = 0; i < ARRAY_SIZE(prog->aux->stream); i++) { + list = llist_del_all(&prog->aux->stream[i].log); + bpf_stream_free_list(list); + bpf_stream_free_list(prog->aux->stream[i].backlog_head); + } +} + +void bpf_stream_stage_init(struct bpf_stream_stage *ss) +{ + init_llist_head(&ss->log); + ss->len = 0; +} + +void bpf_stream_stage_free(struct bpf_stream_stage *ss) +{ + struct llist_node *node; + + node = llist_del_all(&ss->log); + bpf_stream_free_list(node); +} + +int bpf_stream_stage_printk(struct bpf_stream_stage *ss, const char *fmt, ...) +{ + struct bpf_bprintf_buffers *buf; + va_list args; + int ret; + + if (bpf_try_get_buffers(&buf)) + return -EBUSY; + + va_start(args, fmt); + ret = vsnprintf(buf->buf, ARRAY_SIZE(buf->buf), fmt, args); + va_end(args); + ss->len += ret; + /* Exclude NULL byte during push. */ + ret = __bpf_stream_push_str(&ss->log, buf->buf, ret); + bpf_put_buffers(); + return ret; +} + +int bpf_stream_stage_commit(struct bpf_stream_stage *ss, struct bpf_prog *prog, + enum bpf_stream_id stream_id) +{ + struct llist_node *list, *head, *tail; + struct bpf_stream *stream; + int ret; + + stream = bpf_stream_get(stream_id, prog->aux); + if (!stream) + return -EINVAL; + + ret = bpf_stream_consume_capacity(stream, ss->len); + if (ret) + return ret; + + list = llist_del_all(&ss->log); + head = tail = list; + + if (!list) + return 0; + while (llist_next(list)) { + tail = llist_next(list); + list = tail; + } + llist_add_batch(head, tail, &stream->log); + return 0; +} + +struct dump_stack_ctx { + struct bpf_stream_stage *ss; + int err; +}; + +static bool dump_stack_cb(void *cookie, u64 ip, u64 sp, u64 bp) +{ + struct dump_stack_ctx *ctxp = cookie; + const char *file = "", *line = ""; + struct bpf_prog *prog; + int num, ret; + + rcu_read_lock(); + prog = bpf_prog_ksym_find(ip); + rcu_read_unlock(); + if (prog) { + ret = bpf_prog_get_file_line(prog, ip, &file, &line, &num); + if (ret < 0) + goto end; + ctxp->err = bpf_stream_stage_printk(ctxp->ss, "%pS\n %s @ %s:%d\n", + (void *)ip, line, file, num); + return !ctxp->err; + } +end: + ctxp->err = bpf_stream_stage_printk(ctxp->ss, "%pS\n", (void *)ip); + return !ctxp->err; +} + +int bpf_stream_stage_dump_stack(struct bpf_stream_stage *ss) +{ + struct dump_stack_ctx ctx = { .ss = ss }; + int ret; + + ret = bpf_stream_stage_printk(ss, "CPU: %d UID: %d PID: %d Comm: %s\n", + raw_smp_processor_id(), __kuid_val(current_real_cred()->euid), + current->pid, current->comm); + if (ret) + return ret; + ret = bpf_stream_stage_printk(ss, "Call trace:\n"); + if (ret) + return ret; + arch_bpf_stack_walk(dump_stack_cb, &ctx); + if (ctx.err) + return ctx.err; + return bpf_stream_stage_printk(ss, "\n"); +} diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index f1d9ee9717a1..7db7182a3057 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5943,6 +5943,28 @@ static int token_create(union bpf_attr *attr) return bpf_token_create(attr); } +#define BPF_PROG_STREAM_READ_BY_FD_LAST_FIELD prog_stream_read.prog_fd + +static int prog_stream_read(union bpf_attr *attr) +{ + char __user *buf = u64_to_user_ptr(attr->prog_stream_read.stream_buf); + u32 len = attr->prog_stream_read.stream_buf_len; + struct bpf_prog *prog; + int ret; + + if (CHECK_ATTR(BPF_PROG_STREAM_READ_BY_FD)) + return -EINVAL; + + prog = bpf_prog_get(attr->prog_stream_read.prog_fd); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + ret = bpf_prog_stream_read(prog, attr->prog_stream_read.stream_id, buf, len); + bpf_prog_put(prog); + + return ret; +} + static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) { union bpf_attr attr; @@ -6079,6 +6101,9 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) case BPF_TOKEN_CREATE: err = token_create(&attr); break; + case BPF_PROG_STREAM_READ_BY_FD: + err = prog_stream_read(&attr); + break; default: err = -EINVAL; break; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 52e36fd23f40..9f09dcd2eabb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -46,6 +46,7 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { enum bpf_features { BPF_FEAT_RDONLY_CAST_TO_VOID = 0, + BPF_FEAT_STREAMS = 1, __MAX_BPF_FEAT, }; diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index da3152c16228..f69fd92df8d8 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -35,6 +35,7 @@ PROG COMMANDS | **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*] | **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*] | **bpftool** **prog tracelog** +| **bpftool** **prog tracelog** [ { **stdout** | **stderr** } *PROG* ] | **bpftool** **prog run** *PROG* **data_in** *FILE* [**data_out** *FILE* [**data_size_out** *L*]] [**ctx_in** *FILE* [**ctx_out** *FILE* [**ctx_size_out** *M*]]] [**repeat** *N*] | **bpftool** **prog profile** *PROG* [**duration** *DURATION*] *METRICs* | **bpftool** **prog help** @@ -179,6 +180,12 @@ bpftool prog tracelog purposes. For streaming data from BPF programs to user space, one can use perf events (see also **bpftool-map**\ (8)). +bpftool prog tracelog { stdout | stderr } *PROG* + Dump the BPF stream of the program. BPF programs can write to these streams + at runtime with the **bpf_stream_vprintk**\ () kfunc. The kernel may write + error messages to the standard error stream. This facility should be used + only for debugging purposes. + bpftool prog run *PROG* data_in *FILE* [data_out *FILE* [data_size_out *L*]] [ctx_in *FILE* [ctx_out *FILE* [ctx_size_out *M*]]] [repeat *N*] Run BPF program *PROG* in the kernel testing infrastructure for BPF, meaning that the program works on the data and context provided by the diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 27512feb5c70..a759ba24471d 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -518,7 +518,21 @@ _bpftool() esac ;; tracelog) - return 0 + case $prev in + $command) + COMPREPLY+=( $( compgen -W "stdout stderr" -- \ + "$cur" ) ) + return 0 + ;; + stdout|stderr) + COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \ + "$cur" ) ) + return 0 + ;; + *) + return 0 + ;; + esac ;; profile) case $cword in diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index deeaa5c1ed7d..9722d841abc0 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1113,6 +1113,52 @@ static int do_detach(int argc, char **argv) return 0; } +enum prog_tracelog_mode { + TRACE_STDOUT, + TRACE_STDERR, +}; + +static int +prog_tracelog_stream(int prog_fd, enum prog_tracelog_mode mode) +{ + FILE *file = mode == TRACE_STDOUT ? stdout : stderr; + int stream_id = mode == TRACE_STDOUT ? 1 : 2; + char buf[512]; + int ret; + + ret = 0; + do { + ret = bpf_prog_stream_read(prog_fd, stream_id, buf, sizeof(buf), NULL); + if (ret > 0) + fwrite(buf, sizeof(buf[0]), ret, file); + } while (ret > 0); + + fflush(file); + return ret ? -1 : 0; +} + +static int do_tracelog_any(int argc, char **argv) +{ + enum prog_tracelog_mode mode; + int fd; + + if (argc == 0) + return do_tracelog(argc, argv); + if (!is_prefix(*argv, "stdout") && !is_prefix(*argv, "stderr")) + usage(); + mode = is_prefix(*argv, "stdout") ? TRACE_STDOUT : TRACE_STDERR; + NEXT_ARG(); + + if (!REQ_ARGS(2)) + return -1; + + fd = prog_parse_fd(&argc, &argv); + if (fd < 0) + return -1; + + return prog_tracelog_stream(fd, mode); +} + static int check_single_stdin(char *file_data_in, char *file_ctx_in) { if (file_data_in && file_ctx_in && @@ -2493,6 +2539,7 @@ static int do_help(int argc, char **argv) " [repeat N]\n" " %1$s %2$s profile PROG [duration DURATION] METRICs\n" " %1$s %2$s tracelog\n" + " %1$s %2$s tracelog { stdout | stderr } PROG\n" " %1$s %2$s help\n" "\n" " " HELP_SPEC_MAP "\n" @@ -2532,7 +2579,7 @@ static const struct cmd cmds[] = { { "loadall", do_loadall }, { "attach", do_attach }, { "detach", do_detach }, - { "tracelog", do_tracelog }, + { "tracelog", do_tracelog_any }, { "run", do_run }, { "profile", do_profile }, { 0 } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 719ba230032f..0670e15a6100 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -906,6 +906,17 @@ union bpf_iter_link_info { * A new file descriptor (a nonnegative integer), or -1 if an * error occurred (in which case, *errno* is set appropriately). * + * BPF_PROG_STREAM_READ_BY_FD + * Description + * Read data of a program's BPF stream. The program is identified + * by *prog_fd*, and the stream is identified by the *stream_id*. + * The data is copied to a buffer pointed to by *stream_buf*, and + * filled less than or equal to *stream_buf_len* bytes. + * + * Return + * Number of bytes read from the stream on success, or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * * NOTES * eBPF objects (maps and programs) can be shared between processes. * @@ -961,6 +972,7 @@ enum bpf_cmd { BPF_LINK_DETACH, BPF_PROG_BIND_MAP, BPF_TOKEN_CREATE, + BPF_PROG_STREAM_READ_BY_FD, __MAX_BPF_CMD, }; @@ -1463,6 +1475,11 @@ struct bpf_stack_build_id { #define BPF_OBJ_NAME_LEN 16U +enum { + BPF_STREAM_STDOUT = 1, + BPF_STREAM_STDERR = 2, +}; + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -1849,6 +1866,13 @@ union bpf_attr { __u32 bpffs_fd; } token_create; + struct { + __aligned_u64 stream_buf; + __u32 stream_buf_len; + __u32 stream_id; + __u32 prog_fd; + } prog_stream_read; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 6eb421ccf91b..ab40dbf9f020 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -1375,3 +1375,23 @@ int bpf_token_create(int bpffs_fd, struct bpf_token_create_opts *opts) fd = sys_bpf_fd(BPF_TOKEN_CREATE, &attr, attr_sz); return libbpf_err_errno(fd); } + +int bpf_prog_stream_read(int prog_fd, __u32 stream_id, void *buf, __u32 buf_len, + struct bpf_prog_stream_read_opts *opts) +{ + const size_t attr_sz = offsetofend(union bpf_attr, prog_stream_read); + union bpf_attr attr; + int err; + + if (!OPTS_VALID(opts, bpf_prog_stream_read_opts)) + return libbpf_err(-EINVAL); + + memset(&attr, 0, attr_sz); + attr.prog_stream_read.stream_buf = ptr_to_u64(buf); + attr.prog_stream_read.stream_buf_len = buf_len; + attr.prog_stream_read.stream_id = stream_id; + attr.prog_stream_read.prog_fd = prog_fd; + + err = sys_bpf(BPF_PROG_STREAM_READ_BY_FD, &attr, attr_sz); + return libbpf_err_errno(err); +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 1342564214c8..7252150e7ad3 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -709,6 +709,27 @@ struct bpf_token_create_opts { LIBBPF_API int bpf_token_create(int bpffs_fd, struct bpf_token_create_opts *opts); +struct bpf_prog_stream_read_opts { + size_t sz; + size_t :0; +}; +#define bpf_prog_stream_read_opts__last_field sz +/** + * @brief **bpf_prog_stream_read** reads data from the BPF stream of a given BPF + * program. + * + * @param prog_fd FD for the BPF program whose BPF stream is to be read. + * @param stream_id ID of the BPF stream to be read. + * @param buf Buffer to read data into from the BPF stream. + * @param buf_len Maximum number of bytes to read from the BPF stream. + * @param opts optional options, can be NULL + * + * @return The number of bytes read, on success; negative error code, otherwise + * (errno is also set to the error code) + */ +LIBBPF_API int bpf_prog_stream_read(int prog_fd, __u32 stream_id, void *buf, __u32 buf_len, + struct bpf_prog_stream_read_opts *opts); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index a50773d4616e..76b127a9f24d 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -314,6 +314,22 @@ enum libbpf_tristate { ___param, sizeof(___param)); \ }) +extern int bpf_stream_vprintk(int stream_id, const char *fmt__str, const void *args, + __u32 len__sz, void *aux__prog) __weak __ksym; + +#define bpf_stream_printk(stream_id, fmt, args...) \ +({ \ + static const char ___fmt[] = fmt; \ + unsigned long long ___param[___bpf_narg(args)]; \ + \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + ___bpf_fill(___param, args); \ + _Pragma("GCC diagnostic pop") \ + \ + bpf_stream_vprintk(stream_id, ___fmt, ___param, sizeof(___param), NULL);\ +}) + /* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args * Otherwise use __bpf_vprintk */ diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index c7fc0bde5648..1bbf77326420 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -437,6 +437,7 @@ LIBBPF_1.6.0 { bpf_linker__add_fd; bpf_linker__new_fd; bpf_object__prepare; + bpf_prog_stream_read; bpf_program__attach_cgroup_opts; bpf_program__func_info; bpf_program__func_info_cnt; diff --git a/tools/testing/selftests/bpf/prog_tests/stream.c b/tools/testing/selftests/bpf/prog_tests/stream.c new file mode 100644 index 000000000000..d9f0185dca61 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/stream.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <sys/mman.h> +#include <regex.h> + +#include "stream.skel.h" +#include "stream_fail.skel.h" + +void test_stream_failure(void) +{ + RUN_TESTS(stream_fail); +} + +void test_stream_success(void) +{ + RUN_TESTS(stream); + return; +} + +struct { + int prog_off; + const char *errstr; +} stream_error_arr[] = { + { + offsetof(struct stream, progs.stream_cond_break), + "ERROR: Timeout detected for may_goto instruction\n" + "CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n" + "Call trace:\n" + "([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" + "|[ \t]+[^\n]+\n)*", + }, + { + offsetof(struct stream, progs.stream_deadlock), + "ERROR: AA or ABBA deadlock detected for bpf_res_spin_lock\n" + "Attempted lock = (0x[0-9a-fA-F]+)\n" + "Total held locks = 1\n" + "Held lock\\[ 0\\] = \\1\n" // Lock address must match + "CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n" + "Call trace:\n" + "([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" + "|[ \t]+[^\n]+\n)*", + }, +}; + +static int match_regex(const char *pattern, const char *string) +{ + int err, rc; + regex_t re; + + err = regcomp(&re, pattern, REG_EXTENDED | REG_NEWLINE); + if (err) + return -1; + rc = regexec(&re, string, 0, NULL, 0); + regfree(&re); + return rc == 0 ? 1 : 0; +} + +void test_stream_errors(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts); + struct stream *skel; + int ret, prog_fd; + char buf[1024]; + + skel = stream__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stream__open_and_load")) + return; + + for (int i = 0; i < ARRAY_SIZE(stream_error_arr); i++) { + struct bpf_program **prog; + + prog = (struct bpf_program **)(((char *)skel) + stream_error_arr[i].prog_off); + prog_fd = bpf_program__fd(*prog); + ret = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_OK(ret, "ret"); + ASSERT_OK(opts.retval, "retval"); + +#if !defined(__x86_64__) + ASSERT_TRUE(1, "Timed may_goto unsupported, skip."); + if (i == 0) { + ret = bpf_prog_stream_read(prog_fd, 2, buf, sizeof(buf), &ropts); + ASSERT_EQ(ret, 0, "stream read"); + continue; + } +#endif + + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, sizeof(buf), &ropts); + ASSERT_GT(ret, 0, "stream read"); + ASSERT_LE(ret, 1023, "len for buf"); + buf[ret] = '\0'; + + ret = match_regex(stream_error_arr[i].errstr, buf); + if (!ASSERT_TRUE(ret == 1, "regex match")) + fprintf(stderr, "Output from stream:\n%s\n", buf); + } + + stream__destroy(skel); +} + +void test_stream_syscall(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts); + struct stream *skel; + int ret, prog_fd; + char buf[64]; + + skel = stream__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stream__open_and_load")) + return; + + prog_fd = bpf_program__fd(skel->progs.stream_syscall); + ret = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_OK(ret, "ret"); + ASSERT_OK(opts.retval, "retval"); + + ASSERT_LT(bpf_prog_stream_read(0, BPF_STREAM_STDOUT, buf, sizeof(buf), &ropts), 0, "error"); + ret = -errno; + ASSERT_EQ(ret, -EINVAL, "bad prog_fd"); + + ASSERT_LT(bpf_prog_stream_read(prog_fd, 0, buf, sizeof(buf), &ropts), 0, "error"); + ret = -errno; + ASSERT_EQ(ret, -ENOENT, "bad stream id"); + + ASSERT_LT(bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, NULL, sizeof(buf), NULL), 0, "error"); + ret = -errno; + ASSERT_EQ(ret, -EFAULT, "bad stream buf"); + + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 2, NULL); + ASSERT_EQ(ret, 2, "bytes"); + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 2, NULL); + ASSERT_EQ(ret, 1, "bytes"); + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 1, &ropts); + ASSERT_EQ(ret, 0, "no bytes stdout"); + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, 1, &ropts); + ASSERT_EQ(ret, 0, "no bytes stderr"); + + stream__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/stream.c b/tools/testing/selftests/bpf/progs/stream.c new file mode 100644 index 000000000000..35790897dc87 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/stream.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "bpf_experimental.h" + +struct arr_elem { + struct bpf_res_spin_lock lock; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct arr_elem); +} arrmap SEC(".maps"); + +#define ENOSPC 28 +#define _STR "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + +int size; + +SEC("syscall") +__success __retval(0) +int stream_exhaust(void *ctx) +{ + /* Use global variable for loop convergence. */ + size = 0; + bpf_repeat(BPF_MAX_LOOPS) { + if (bpf_stream_printk(BPF_STDOUT, _STR) == -ENOSPC && size == 99954) + return 0; + size += sizeof(_STR) - 1; + } + return 1; +} + +SEC("syscall") +__success __retval(0) +int stream_cond_break(void *ctx) +{ + while (can_loop) + ; + return 0; +} + +SEC("syscall") +__success __retval(0) +int stream_deadlock(void *ctx) +{ + struct bpf_res_spin_lock *lock, *nlock; + + lock = bpf_map_lookup_elem(&arrmap, &(int){0}); + if (!lock) + return 1; + nlock = bpf_map_lookup_elem(&arrmap, &(int){0}); + if (!nlock) + return 1; + if (bpf_res_spin_lock(lock)) + return 1; + if (bpf_res_spin_lock(nlock)) { + bpf_res_spin_unlock(lock); + return 0; + } + bpf_res_spin_unlock(nlock); + bpf_res_spin_unlock(lock); + return 1; +} + +SEC("syscall") +__success __retval(0) +int stream_syscall(void *ctx) +{ + bpf_stream_printk(BPF_STDOUT, "foo"); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/stream_fail.c b/tools/testing/selftests/bpf/progs/stream_fail.c new file mode 100644 index 000000000000..b4a0d0cc8ec8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/stream_fail.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_misc.h" + +SEC("syscall") +__failure __msg("Possibly NULL pointer passed") +int stream_vprintk_null_arg(void *ctx) +{ + bpf_stream_vprintk(BPF_STDOUT, "", NULL, 0, NULL); + return 0; +} + +SEC("syscall") +__failure __msg("R3 type=scalar expected=") +int stream_vprintk_scalar_arg(void *ctx) +{ + bpf_stream_vprintk(BPF_STDOUT, "", (void *)46, 0, NULL); + return 0; +} + +SEC("syscall") +__failure __msg("arg#1 doesn't point to a const string") +int stream_vprintk_string_arg(void *ctx) +{ + bpf_stream_vprintk(BPF_STDOUT, ctx, NULL, 0, NULL); + return 0; +} + +char _license[] SEC("license") = "GPL"; |