diff options
Diffstat (limited to 'include/linux/bpf.h')
| -rw-r--r-- | include/linux/bpf.h | 1462 |
1 files changed, 1225 insertions, 237 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 634d37a599fa..6498be4c44f8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -7,6 +7,7 @@ #include <uapi/linux/bpf.h> #include <uapi/linux/filter.h> +#include <crypto/sha2.h> #include <linux/workqueue.h> #include <linux/file.h> #include <linux/percpu.h> @@ -28,6 +29,9 @@ #include <linux/btf.h> #include <linux/rcupdate_trace.h> #include <linux/static_call.h> +#include <linux/memcontrol.h> +#include <linux/cfi.h> +#include <asm/rqspinlock.h> struct bpf_verifier_env; struct bpf_verifier_log; @@ -35,6 +39,7 @@ struct perf_event; struct bpf_prog; struct bpf_prog_aux; struct bpf_map; +struct bpf_arena; struct sock; struct seq_file; struct btf; @@ -50,11 +55,15 @@ struct module; struct bpf_func_state; struct ftrace_ops; struct cgroup; +struct bpf_token; +struct user_namespace; +struct super_block; +struct inode; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; extern struct kobject *btf_kobj; -extern struct bpf_mem_alloc bpf_global_ma; +extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma; extern bool bpf_global_ma_set; typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64); @@ -95,17 +104,22 @@ struct bpf_map_ops { /* funcs callable from userspace and from eBPF programs */ void *(*map_lookup_elem)(struct bpf_map *map, void *key); - int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); - int (*map_delete_elem)(struct bpf_map *map, void *key); - int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags); - int (*map_pop_elem)(struct bpf_map *map, void *value); - int (*map_peek_elem)(struct bpf_map *map, void *value); + long (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); + long (*map_delete_elem)(struct bpf_map *map, void *key); + long (*map_push_elem)(struct bpf_map *map, void *value, u64 flags); + long (*map_pop_elem)(struct bpf_map *map, void *value); + long (*map_peek_elem)(struct bpf_map *map, void *value); void *(*map_lookup_percpu_elem)(struct bpf_map *map, void *key, u32 cpu); + int (*map_get_hash)(struct bpf_map *map, u32 hash_buf_size, void *hash_buf); /* funcs called by prog_array and perf_event_array map */ void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, int fd); - void (*map_fd_put_ptr)(void *ptr); + /* If need_defer is true, the implementation should guarantee that + * the to-be-put element is still alive before the bpf program, which + * may manipulate it, exists. + */ + void (*map_fd_put_ptr)(struct bpf_map *map, void *ptr, bool need_defer); int (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); u32 (*map_fd_sys_lookup_elem)(void *ptr); void (*map_seq_show_elem)(struct bpf_map *map, void *key, @@ -129,6 +143,9 @@ struct bpf_map_ops { int (*map_mmap)(struct bpf_map *map, struct vm_area_struct *vma); __poll_t (*map_poll)(struct bpf_map *map, struct file *filp, struct poll_table_struct *pts); + unsigned long (*map_get_unmapped_area)(struct file *filep, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags); /* Functions called by bpf_local_storage maps */ int (*map_local_storage_charge)(struct bpf_local_storage_map *smap, @@ -138,7 +155,7 @@ struct bpf_map_ops { struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner); /* Misc helpers.*/ - int (*map_redirect)(struct bpf_map *map, u64 key, u64 flags); + long (*map_redirect)(struct bpf_map *map, u64 key, u64 flags); /* map_meta_equal must be implemented for maps that can be * used as an inner map. It is a runtime check to ensure @@ -156,10 +173,12 @@ struct bpf_map_ops { int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee); - int (*map_for_each_callback)(struct bpf_map *map, + long (*map_for_each_callback)(struct bpf_map *map, bpf_callback_t callback_fn, void *callback_ctx, u64 flags); + u64 (*map_mem_usage)(const struct bpf_map *map); + /* BTF id of struct allocated by map_alloc */ int *map_btf_id; @@ -168,8 +187,8 @@ struct bpf_map_ops { }; enum { - /* Support at most 10 fields in a BTF type */ - BTF_FIELDS_MAX = 10, + /* Support at most 11 fields in a BTF type */ + BTF_FIELDS_MAX = 11, }; enum btf_field_type { @@ -177,19 +196,48 @@ enum btf_field_type { BPF_TIMER = (1 << 1), BPF_KPTR_UNREF = (1 << 2), BPF_KPTR_REF = (1 << 3), - BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF, - BPF_LIST_HEAD = (1 << 4), - BPF_LIST_NODE = (1 << 5), + BPF_KPTR_PERCPU = (1 << 4), + BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF | BPF_KPTR_PERCPU, + BPF_LIST_HEAD = (1 << 5), + BPF_LIST_NODE = (1 << 6), + BPF_RB_ROOT = (1 << 7), + BPF_RB_NODE = (1 << 8), + BPF_GRAPH_NODE = BPF_RB_NODE | BPF_LIST_NODE, + BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD, + BPF_REFCOUNT = (1 << 9), + BPF_WORKQUEUE = (1 << 10), + BPF_UPTR = (1 << 11), + BPF_RES_SPIN_LOCK = (1 << 12), + BPF_TASK_WORK = (1 << 13), }; +enum bpf_cgroup_storage_type { + BPF_CGROUP_STORAGE_SHARED, + BPF_CGROUP_STORAGE_PERCPU, + __BPF_CGROUP_STORAGE_MAX +#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX +}; + +#ifdef CONFIG_CGROUP_BPF +# define for_each_cgroup_storage_type(stype) \ + for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) +#else +# define for_each_cgroup_storage_type(stype) for (; false; ) +#endif /* CONFIG_CGROUP_BPF */ + +typedef void (*btf_dtor_kfunc_t)(void *); + struct btf_field_kptr { struct btf *btf; struct module *module; + /* dtor used if btf_is_kernel(btf), otherwise the type is + * program-allocated, dtor is NULL, and __bpf_obj_drop_impl is used + */ btf_dtor_kfunc_t dtor; u32 btf_id; }; -struct btf_field_list_head { +struct btf_field_graph_root { struct btf *btf; u32 value_btf_id; u32 node_offset; @@ -198,10 +246,11 @@ struct btf_field_list_head { struct btf_field { u32 offset; + u32 size; enum btf_field_type type; union { struct btf_field_kptr kptr; - struct btf_field_list_head list_head; + struct btf_field_graph_root graph_root; }; }; @@ -209,21 +258,43 @@ struct btf_record { u32 cnt; u32 field_mask; int spin_lock_off; + int res_spin_lock_off; int timer_off; + int wq_off; + int refcount_off; + int task_work_off; struct btf_field fields[]; }; -struct btf_field_offs { - u32 cnt; - u32 field_off[BTF_FIELDS_MAX]; - u8 field_sz[BTF_FIELDS_MAX]; +/* Non-opaque version of bpf_rb_node in uapi/linux/bpf.h */ +struct bpf_rb_node_kern { + struct rb_node rb_node; + void *owner; +} __attribute__((aligned(8))); + +/* Non-opaque version of bpf_list_node in uapi/linux/bpf.h */ +struct bpf_list_node_kern { + struct list_head list_head; + void *owner; +} __attribute__((aligned(8))); + +/* 'Ownership' of program-containing map is claimed by the first program + * that is going to use this map or by the first program which FD is + * stored in the map to make sure that all callers and callees have the + * same prog type, JITed flag and xdp_has_frags flag. + */ +struct bpf_map_owner { + enum bpf_prog_type type; + bool jited; + bool xdp_has_frags; + u64 storage_cookie[MAX_BPF_CGROUP_STORAGE_TYPE]; + const struct btf_type *attach_func_proto; + enum bpf_attach_type expected_attach_type; }; struct bpf_map { - /* The first two cachelines with read-mostly members of which some - * are also accessed in fast-path (e.g. ops, max_entries). - */ - const struct bpf_map_ops *ops ____cacheline_aligned; + u8 sha[SHA256_DIGEST_SIZE]; + const struct bpf_map_ops *ops; struct bpf_map *inner_map_meta; #ifdef CONFIG_SECURITY void *security; @@ -241,32 +312,29 @@ struct bpf_map { u32 btf_value_type_id; u32 btf_vmlinux_value_type_id; struct btf *btf; -#ifdef CONFIG_MEMCG_KMEM +#ifdef CONFIG_MEMCG struct obj_cgroup *objcg; #endif char name[BPF_OBJ_NAME_LEN]; - struct btf_field_offs *field_offs; - /* The 3rd and 4th cacheline with misc members to avoid false sharing - * particularly with refcounting. - */ - atomic64_t refcnt ____cacheline_aligned; - atomic64_t usercnt; - struct work_struct work; struct mutex freeze_mutex; + atomic64_t refcnt; + atomic64_t usercnt; + /* rcu is used before freeing and work is only used during freeing */ + union { + struct work_struct work; + struct rcu_head rcu; + }; atomic64_t writecnt; - /* 'Ownership' of program-containing map is claimed by the first program - * that is going to use this map or by the first program which FD is - * stored in the map to make sure that all callers and callees have the - * same prog type, JITed flag and xdp_has_frags flag. - */ - struct { - spinlock_t lock; - enum bpf_prog_type type; - bool jited; - bool xdp_has_frags; - } owner; + spinlock_t owner_lock; + struct bpf_map_owner *owner; bool bypass_spec_v1; bool frozen; /* write-once; write-protected by freeze_mutex */ + bool free_after_mult_rcu_gp; + bool free_after_rcu_gp; + atomic64_t sleepable_refcnt; + s64 __percpu *elem_count; + u64 cookie; /* write-once */ + char *excl_prog_sha; }; static inline const char *btf_field_type_name(enum btf_field_type type) @@ -274,35 +342,71 @@ static inline const char *btf_field_type_name(enum btf_field_type type) switch (type) { case BPF_SPIN_LOCK: return "bpf_spin_lock"; + case BPF_RES_SPIN_LOCK: + return "bpf_res_spin_lock"; case BPF_TIMER: return "bpf_timer"; + case BPF_WORKQUEUE: + return "bpf_wq"; case BPF_KPTR_UNREF: case BPF_KPTR_REF: return "kptr"; + case BPF_KPTR_PERCPU: + return "percpu_kptr"; + case BPF_UPTR: + return "uptr"; case BPF_LIST_HEAD: return "bpf_list_head"; case BPF_LIST_NODE: return "bpf_list_node"; + case BPF_RB_ROOT: + return "bpf_rb_root"; + case BPF_RB_NODE: + return "bpf_rb_node"; + case BPF_REFCOUNT: + return "bpf_refcount"; + case BPF_TASK_WORK: + return "bpf_task_work"; default: WARN_ON_ONCE(1); return "unknown"; } } +#if IS_ENABLED(CONFIG_DEBUG_KERNEL) +#define BPF_WARN_ONCE(cond, format...) WARN_ONCE(cond, format) +#else +#define BPF_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) +#endif + static inline u32 btf_field_type_size(enum btf_field_type type) { switch (type) { case BPF_SPIN_LOCK: return sizeof(struct bpf_spin_lock); + case BPF_RES_SPIN_LOCK: + return sizeof(struct bpf_res_spin_lock); case BPF_TIMER: return sizeof(struct bpf_timer); + case BPF_WORKQUEUE: + return sizeof(struct bpf_wq); case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: + case BPF_UPTR: return sizeof(u64); case BPF_LIST_HEAD: return sizeof(struct bpf_list_head); case BPF_LIST_NODE: return sizeof(struct bpf_list_node); + case BPF_RB_ROOT: + return sizeof(struct bpf_rb_root); + case BPF_RB_NODE: + return sizeof(struct bpf_rb_node); + case BPF_REFCOUNT: + return sizeof(struct bpf_refcount); + case BPF_TASK_WORK: + return sizeof(struct bpf_task_work); default: WARN_ON_ONCE(1); return 0; @@ -314,21 +418,68 @@ static inline u32 btf_field_type_align(enum btf_field_type type) switch (type) { case BPF_SPIN_LOCK: return __alignof__(struct bpf_spin_lock); + case BPF_RES_SPIN_LOCK: + return __alignof__(struct bpf_res_spin_lock); case BPF_TIMER: return __alignof__(struct bpf_timer); + case BPF_WORKQUEUE: + return __alignof__(struct bpf_wq); case BPF_KPTR_UNREF: case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: + case BPF_UPTR: return __alignof__(u64); case BPF_LIST_HEAD: return __alignof__(struct bpf_list_head); case BPF_LIST_NODE: return __alignof__(struct bpf_list_node); + case BPF_RB_ROOT: + return __alignof__(struct bpf_rb_root); + case BPF_RB_NODE: + return __alignof__(struct bpf_rb_node); + case BPF_REFCOUNT: + return __alignof__(struct bpf_refcount); + case BPF_TASK_WORK: + return __alignof__(struct bpf_task_work); default: WARN_ON_ONCE(1); return 0; } } +static inline void bpf_obj_init_field(const struct btf_field *field, void *addr) +{ + memset(addr, 0, field->size); + + switch (field->type) { + case BPF_REFCOUNT: + refcount_set((refcount_t *)addr, 1); + break; + case BPF_RB_NODE: + RB_CLEAR_NODE((struct rb_node *)addr); + break; + case BPF_LIST_HEAD: + case BPF_LIST_NODE: + INIT_LIST_HEAD((struct list_head *)addr); + break; + case BPF_RB_ROOT: + /* RB_ROOT_CACHED 0-inits, no need to do anything after memset */ + case BPF_SPIN_LOCK: + case BPF_RES_SPIN_LOCK: + case BPF_TIMER: + case BPF_WORKQUEUE: + case BPF_KPTR_UNREF: + case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: + case BPF_UPTR: + case BPF_TASK_WORK: + break; + default: + WARN_ON_ONCE(1); + return; + } +} + static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_field_type type) { if (IS_ERR_OR_NULL(rec)) @@ -336,19 +487,26 @@ static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_f return rec->field_mask & type; } -static inline void bpf_obj_init(const struct btf_field_offs *foffs, void *obj) +static inline void bpf_obj_init(const struct btf_record *rec, void *obj) { int i; - if (!foffs) + if (IS_ERR_OR_NULL(rec)) return; - for (i = 0; i < foffs->cnt; i++) - memset(obj + foffs->field_off[i], 0, foffs->field_sz[i]); + for (i = 0; i < rec->cnt; i++) + bpf_obj_init_field(&rec->fields[i], obj + rec->fields[i].offset); } +/* 'dst' must be a temporary buffer and should not point to memory that is being + * used in parallel by a bpf program or bpf syscall, otherwise the access from + * the bpf program or bpf syscall may be corrupted by the reinitialization, + * leading to weird problems. Even 'dst' is newly-allocated from bpf memory + * allocator, it is still possible for 'dst' to be used in parallel by a bpf + * program or bpf syscall. + */ static inline void check_and_init_map_value(struct bpf_map *map, void *dst) { - bpf_obj_init(map->field_offs, dst); + bpf_obj_init(map->record, dst); } /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and @@ -364,18 +522,18 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) size /= sizeof(long); while (size--) - *ldst++ = *lsrc++; + data_race(*ldst++ = *lsrc++); } /* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */ -static inline void bpf_obj_memcpy(struct btf_field_offs *foffs, +static inline void bpf_obj_memcpy(struct btf_record *rec, void *dst, void *src, u32 size, bool long_memcpy) { u32 curr_off = 0; int i; - if (likely(!foffs)) { + if (IS_ERR_OR_NULL(rec)) { if (long_memcpy) bpf_long_memcpy(dst, src, round_up(size, 8)); else @@ -383,57 +541,81 @@ static inline void bpf_obj_memcpy(struct btf_field_offs *foffs, return; } - for (i = 0; i < foffs->cnt; i++) { - u32 next_off = foffs->field_off[i]; + for (i = 0; i < rec->cnt; i++) { + u32 next_off = rec->fields[i].offset; u32 sz = next_off - curr_off; memcpy(dst + curr_off, src + curr_off, sz); - curr_off += foffs->field_sz[i] + sz; + curr_off += rec->fields[i].size + sz; } memcpy(dst + curr_off, src + curr_off, size - curr_off); } static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) { - bpf_obj_memcpy(map->field_offs, dst, src, map->value_size, false); + bpf_obj_memcpy(map->record, dst, src, map->value_size, false); } static inline void copy_map_value_long(struct bpf_map *map, void *dst, void *src) { - bpf_obj_memcpy(map->field_offs, dst, src, map->value_size, true); + bpf_obj_memcpy(map->record, dst, src, map->value_size, true); +} + +static inline void bpf_obj_swap_uptrs(const struct btf_record *rec, void *dst, void *src) +{ + unsigned long *src_uptr, *dst_uptr; + const struct btf_field *field; + int i; + + if (!btf_record_has_field(rec, BPF_UPTR)) + return; + + for (i = 0, field = rec->fields; i < rec->cnt; i++, field++) { + if (field->type != BPF_UPTR) + continue; + + src_uptr = src + field->offset; + dst_uptr = dst + field->offset; + swap(*src_uptr, *dst_uptr); + } } -static inline void bpf_obj_memzero(struct btf_field_offs *foffs, void *dst, u32 size) +static inline void bpf_obj_memzero(struct btf_record *rec, void *dst, u32 size) { u32 curr_off = 0; int i; - if (likely(!foffs)) { + if (IS_ERR_OR_NULL(rec)) { memset(dst, 0, size); return; } - for (i = 0; i < foffs->cnt; i++) { - u32 next_off = foffs->field_off[i]; + for (i = 0; i < rec->cnt; i++) { + u32 next_off = rec->fields[i].offset; u32 sz = next_off - curr_off; memset(dst + curr_off, 0, sz); - curr_off += foffs->field_sz[i] + sz; + curr_off += rec->fields[i].size + sz; } memset(dst + curr_off, 0, size - curr_off); } static inline void zero_map_value(struct bpf_map *map, void *dst) { - bpf_obj_memzero(map->field_offs, dst, map->value_size); + bpf_obj_memzero(map->record, dst, map->value_size); } void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, bool lock_src); void bpf_timer_cancel_and_free(void *timer); +void bpf_wq_cancel_and_free(void *timer); +void bpf_task_work_cancel_and_free(void *timer); void bpf_list_head_free(const struct btf_field *field, void *list_head, struct bpf_spin_lock *spin_lock); - +void bpf_rb_root_free(const struct btf_field *field, void *rb_root, + struct bpf_spin_lock *spin_lock); +u64 bpf_arena_get_kern_vm_start(struct bpf_arena *arena); +u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena); int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size); struct bpf_offload_dev; @@ -481,6 +663,16 @@ int map_check_no_btf(const struct bpf_map *map, bool bpf_map_meta_equal(const struct bpf_map *meta0, const struct bpf_map *meta1); +static inline bool bpf_map_has_internal_structs(struct bpf_map *map) +{ + return btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK); +} + +void bpf_map_free_internal_structs(struct bpf_map *map, void *obj); + +int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags, + struct bpf_dynptr *ptr__uninit); + extern const struct bpf_map_ops bpf_map_offload_ops; /* bpf_type_flag contains a set of flags that are applicable to the values of @@ -527,6 +719,7 @@ enum bpf_type_flag { */ PTR_UNTRUSTED = BIT(6 + BPF_BASE_TYPE_BITS), + /* MEM can be uninitialized. */ MEM_UNINIT = BIT(7 + BPF_BASE_TYPE_BITS), /* DYNPTR points to memory local to the bpf program. */ @@ -575,11 +768,42 @@ enum bpf_type_flag { /* MEM is tagged with rcu and memory access needs rcu_read_lock protection. */ MEM_RCU = BIT(13 + BPF_BASE_TYPE_BITS), + /* Used to tag PTR_TO_BTF_ID | MEM_ALLOC references which are non-owning. + * Currently only valid for linked-list and rbtree nodes. If the nodes + * have a bpf_refcount_field, they must be tagged MEM_RCU as well. + */ + NON_OWN_REF = BIT(14 + BPF_BASE_TYPE_BITS), + + /* DYNPTR points to sk_buff */ + DYNPTR_TYPE_SKB = BIT(15 + BPF_BASE_TYPE_BITS), + + /* DYNPTR points to xdp_buff */ + DYNPTR_TYPE_XDP = BIT(16 + BPF_BASE_TYPE_BITS), + + /* Memory must be aligned on some architectures, used in combination with + * MEM_FIXED_SIZE. + */ + MEM_ALIGNED = BIT(17 + BPF_BASE_TYPE_BITS), + + /* MEM is being written to, often combined with MEM_UNINIT. Non-presence + * of MEM_WRITE means that MEM is only being read. MEM_WRITE without the + * MEM_UNINIT means that memory needs to be initialized since it is also + * read. + */ + MEM_WRITE = BIT(18 + BPF_BASE_TYPE_BITS), + + /* DYNPTR points to skb_metadata_end()-skb_metadata_len() */ + DYNPTR_TYPE_SKB_META = BIT(19 + BPF_BASE_TYPE_BITS), + + /* DYNPTR points to file */ + DYNPTR_TYPE_FILE = BIT(20 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; -#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF) +#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \ + | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META | DYNPTR_TYPE_FILE) /* Max number of base types. */ #define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) @@ -602,6 +826,7 @@ enum bpf_arg_type { * on eBPF program stack */ ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */ + ARG_PTR_TO_ARENA, ARG_CONST_SIZE, /* number of bytes accessed from memory */ ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */ @@ -610,8 +835,6 @@ enum bpf_arg_type { ARG_ANYTHING, /* any (initialized) argument is ok */ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ - ARG_PTR_TO_INT, /* pointer to int */ - ARG_PTR_TO_LONG, /* pointer to long */ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */ ARG_PTR_TO_RINGBUF_MEM, /* pointer to dynamically reserved ringbuf memory */ @@ -622,7 +845,7 @@ enum bpf_arg_type { ARG_PTR_TO_STACK, /* pointer to stack */ ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ - ARG_PTR_TO_KPTR, /* pointer to referenced kptr */ + ARG_KPTR_XCHG_DEST, /* pointer to destination that kptrs are bpf_kptr_xchg'd into */ ARG_PTR_TO_DYNPTR, /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */ __BPF_ARG_TYPE_MAX, @@ -633,10 +856,10 @@ enum bpf_arg_type { ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET, ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK, ARG_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID, - /* pointer to memory does not need to be initialized, helper function must fill - * all bytes or clear them in error case. + /* Pointer to memory does not need to be initialized, since helper function + * fills all bytes or clears them in error case. */ - ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | ARG_PTR_TO_MEM, + ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | MEM_WRITE | ARG_PTR_TO_MEM, /* Pointer to valid memory of size known at compile time. */ ARG_PTR_TO_FIXED_SIZE_MEM = MEM_FIXED_SIZE | ARG_PTR_TO_MEM, @@ -686,6 +909,12 @@ struct bpf_func_proto { bool gpl_only; bool pkt_access; bool might_sleep; + /* set to true if helper follows contract for llvm + * attribute bpf_fastcall: + * - void functions do not scratch r0 + * - functions taking N arguments scratch only registers r1-rN + */ + bool allow_fastcall; enum bpf_return_type ret_type; union { struct { @@ -768,13 +997,11 @@ enum bpf_reg_type { * additional context, assume the value is non-null. */ PTR_TO_BTF_ID, - /* PTR_TO_BTF_ID_OR_NULL points to a kernel struct that has not - * been checked for null. Used primarily to inform the verifier - * an explicit null check is required for this struct. - */ PTR_TO_MEM, /* reg points to valid memory region */ + PTR_TO_ARENA, PTR_TO_BUF, /* reg points to a read/write buffer */ PTR_TO_FUNC, /* reg points to a bpf program function */ + PTR_TO_INSN, /* reg points to a bpf program instruction */ CONST_PTR_TO_DYNPTR, /* reg points to a const struct bpf_dynptr */ __BPF_REG_TYPE_MAX, @@ -783,6 +1010,10 @@ enum bpf_reg_type { PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCKET, PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCK_COMMON, PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | PTR_TO_TCP_SOCK, + /* PTR_TO_BTF_ID_OR_NULL points to a kernel struct that has not + * been checked for null. Used primarily to inform the verifier + * an explicit null check is required for this struct. + */ PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | PTR_TO_BTF_ID, /* This must be the last entry. Its purpose is to ensure the enum is @@ -797,14 +1028,17 @@ static_assert(__BPF_REG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); */ struct bpf_insn_access_aux { enum bpf_reg_type reg_type; + bool is_ldsx; union { int ctx_field_size; struct { struct btf *btf; u32 btf_id; + u32 ref_obj_id; }; }; struct bpf_verifier_log *log; /* for verbose logs */ + bool is_retval; /* is accessing function return value ? */ }; static inline void @@ -813,10 +1047,29 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size) aux->ctx_field_size = size; } +static bool bpf_is_ldimm64(const struct bpf_insn *insn) +{ + return insn->code == (BPF_LD | BPF_IMM | BPF_DW); +} + static inline bool bpf_pseudo_func(const struct bpf_insn *insn) { - return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && - insn->src_reg == BPF_PSEUDO_FUNC; + return bpf_is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC; +} + +/* Given a BPF_ATOMIC instruction @atomic_insn, return true if it is an + * atomic load or store, and false if it is a read-modify-write instruction. + */ +static inline bool +bpf_atomic_is_load_store(const struct bpf_insn *atomic_insn) +{ + switch (atomic_insn->imm) { + case BPF_LOAD_ACQ: + case BPF_STORE_REL: + return true; + default: + return false; + } } struct bpf_prog_ops { @@ -839,6 +1092,8 @@ struct bpf_verifier_ops { struct bpf_insn_access_aux *info); int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, const struct bpf_prog *prog); + int (*gen_epilogue)(struct bpf_insn *insn, const struct bpf_prog *prog, + s16 ctx_stack_off); int (*gen_ld_abs)(const struct bpf_insn *orig, struct bpf_insn *insn_buf); u32 (*convert_ctx_access)(enum bpf_access_type type, @@ -847,8 +1102,7 @@ struct bpf_verifier_ops { struct bpf_prog *prog, u32 *target_size); int (*btf_struct_access)(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, - int off, int size, enum bpf_access_type atype, - u32 *next_btf_id, enum bpf_type_flag *flag); + int off, int size); }; struct bpf_prog_offload_ops { @@ -878,14 +1132,6 @@ struct bpf_prog_offload { u32 jited_len; }; -enum bpf_cgroup_storage_type { - BPF_CGROUP_STORAGE_SHARED, - BPF_CGROUP_STORAGE_PERCPU, - __BPF_CGROUP_STORAGE_MAX -}; - -#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX - /* The longest tracepoint has 12 args. * See include/trace/bpf_probe.h */ @@ -896,11 +1142,15 @@ enum bpf_cgroup_storage_type { */ #define MAX_BPF_FUNC_REG_ARGS 5 -/* The argument is a structure. */ +/* The argument is a structure or a union. */ #define BTF_FMODEL_STRUCT_ARG BIT(0) +/* The argument is signed. */ +#define BTF_FMODEL_SIGNED_ARG BIT(1) + struct btf_func_model { u8 ret_size; + u8 ret_flags; u8 nr_args; u8 arg_size[MAX_BPF_FUNC_ARGS]; u8 arg_flags[MAX_BPF_FUNC_ARGS]; @@ -936,10 +1186,32 @@ struct btf_func_model { */ #define BPF_TRAMP_F_SHARE_IPMODIFY BIT(6) +/* Indicate that current trampoline is in a tail call context. Then, it has to + * cache and restore tail_call_cnt to avoid infinite tail call loop. + */ +#define BPF_TRAMP_F_TAIL_CALL_CTX BIT(7) + +/* + * Indicate the trampoline should be suitable to receive indirect calls; + * without this indirectly calling the generated code can result in #UD/#CP, + * depending on the CFI options. + * + * Used by bpf_struct_ops. + * + * Incompatible with FENTRY usage, overloads @func_addr argument. + */ +#define BPF_TRAMP_F_INDIRECT BIT(8) + /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. */ -#define BPF_MAX_TRAMP_LINKS 38 +enum { +#if defined(__s390x__) + BPF_MAX_TRAMP_LINKS = 27, +#else + BPF_MAX_TRAMP_LINKS = 38, +#endif +}; struct bpf_tramp_links { struct bpf_tramp_link *links[BPF_MAX_TRAMP_LINKS]; @@ -969,10 +1241,16 @@ struct bpf_tramp_run_ctx; * fexit = a set of program to run after original function */ struct bpf_tramp_image; -int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end, +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, - void *orig_call); + void *func_addr); +void *arch_alloc_bpf_trampoline(unsigned int size); +void arch_free_bpf_trampoline(void *image, unsigned int size); +int __must_check arch_protect_bpf_trampoline(void *image, unsigned int size); +int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, void *func_addr); + u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start, @@ -986,6 +1264,18 @@ typedef void (*bpf_trampoline_exit_t)(struct bpf_prog *prog, u64 start, bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog); bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP +static inline bool bpf_trampoline_use_jmp(u64 flags) +{ + return flags & BPF_TRAMP_F_CALL_ORIG && !(flags & BPF_TRAMP_F_SKIP_FRAME); +} +#else +static inline bool bpf_trampoline_use_jmp(u64 flags) +{ + return false; +} +#endif + struct bpf_ksym { unsigned long start; unsigned long end; @@ -1005,6 +1295,7 @@ enum bpf_tramp_prog_type { struct bpf_tramp_image { void *image; + int size; struct bpf_ksym ksym; struct percpu_ref pcref; void *ip_after_call; @@ -1040,13 +1331,12 @@ struct bpf_trampoline { int progs_cnt[BPF_TRAMP_MAX]; /* Executable image of trampoline */ struct bpf_tramp_image *cur_image; - u64 selector; - struct module *mod; }; struct bpf_attach_target_info { struct btf_func_model fmodel; long tgt_addr; + struct module *tgt_mod; const char *tgt_name; const struct btf_type *tgt_type; }; @@ -1074,7 +1364,11 @@ struct bpf_dispatcher { #endif }; -static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func( +#ifndef __bpfcall +#define __bpfcall __nocfi +#endif + +static __always_inline __bpfcall unsigned int bpf_dispatcher_nop_func( const void *ctx, const struct bpf_insn *insnsi, bpf_func_t bpf_func) @@ -1082,9 +1376,65 @@ static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func( return bpf_func(ctx, insnsi); } +/* the implementation of the opaque uapi struct bpf_dynptr */ +struct bpf_dynptr_kern { + void *data; + /* Size represents the number of usable bytes of dynptr data. + * If for example the offset is at 4 for a local dynptr whose data is + * of type u64, the number of usable bytes is 4. + * + * The upper 8 bits are reserved. It is as follows: + * Bits 0 - 23 = size + * Bits 24 - 30 = dynptr type + * Bit 31 = whether dynptr is read-only + */ + u32 size; + u32 offset; +} __aligned(8); + +enum bpf_dynptr_type { + BPF_DYNPTR_TYPE_INVALID, + /* Points to memory that is local to the bpf program */ + BPF_DYNPTR_TYPE_LOCAL, + /* Underlying data is a ringbuf record */ + BPF_DYNPTR_TYPE_RINGBUF, + /* Underlying data is a sk_buff */ + BPF_DYNPTR_TYPE_SKB, + /* Underlying data is a xdp_buff */ + BPF_DYNPTR_TYPE_XDP, + /* Points to skb_metadata_end()-skb_metadata_len() */ + BPF_DYNPTR_TYPE_SKB_META, + /* Underlying data is a file */ + BPF_DYNPTR_TYPE_FILE, +}; + +int bpf_dynptr_check_size(u64 size); +u64 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); +const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u64 len); +void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u64 len); +bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr); +int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u64 offset, + void *src, u64 len, u64 flags); +void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u64 offset, + void *buffer__opt, u64 buffer__szk); + +static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u64 offset, u64 len) +{ + u64 size = __bpf_dynptr_size(ptr); + + if (len > size || offset > size - len) + return -E2BIG; + + return 0; +} + #ifdef CONFIG_BPF_JIT -int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); -int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); +int bpf_trampoline_link_prog(struct bpf_tramp_link *link, + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog); +int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog); struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); @@ -1133,7 +1483,7 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func #define DEFINE_BPF_DISPATCHER(name) \ __BPF_DISPATCHER_SC(name); \ - noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \ + noinline __bpfcall unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ bpf_func_t bpf_func) \ @@ -1156,7 +1506,8 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to); /* Called only from JIT-enabled code, so there's no need for stubs. */ -void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym); +void bpf_image_ksym_init(void *data, unsigned int size, struct bpf_ksym *ksym); +void bpf_image_ksym_add(struct bpf_ksym *ksym); void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); @@ -1165,19 +1516,21 @@ void bpf_jit_uncharge_modmem(u32 size); bool bpf_prog_has_trampoline(const struct bpf_prog *prog); #else static inline int bpf_trampoline_link_prog(struct bpf_tramp_link *link, - struct bpf_trampoline *tr) + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog) { return -ENOTSUPP; } static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, - struct bpf_trampoline *tr) + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog) { return -ENOTSUPP; } static inline struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info) { - return ERR_PTR(-EOPNOTSUPP); + return NULL; } static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} #define DEFINE_BPF_DISPATCHER(name) @@ -1200,6 +1553,8 @@ static inline bool bpf_prog_has_trampoline(const struct bpf_prog *prog) struct bpf_func_info_aux { u16 linkage; bool unreliable; + bool called : 1; + bool verified : 1; }; enum bpf_jit_poke_reason { @@ -1228,7 +1583,10 @@ struct bpf_jit_poke_descriptor { struct bpf_ctx_arg_aux { u32 offset; enum bpf_reg_type reg_type; + struct btf *btf; u32 btf_id; + u32 ref_obj_id; + bool refcounted; }; struct btf_mod_pair { @@ -1238,6 +1596,37 @@ struct btf_mod_pair { struct bpf_kfunc_desc_tab; +enum bpf_stream_id { + BPF_STDOUT = 1, + BPF_STDERR = 2, +}; + +struct bpf_stream_elem { + struct llist_node node; + int total_len; + int consumed_len; + char str[]; +}; + +enum { + /* 100k bytes */ + BPF_STREAM_MAX_CAPACITY = 100000ULL, +}; + +struct bpf_stream { + atomic_t capacity; + struct llist_head log; /* list of in-flight stream elements in LIFO order */ + + struct mutex lock; /* lock protecting backlog_{head,tail} */ + struct llist_node *backlog_head; /* list of in-flight stream elements in FIFO order */ + struct llist_node *backlog_tail; /* tail of the list above */ +}; + +struct bpf_stream_stage { + struct llist_head log; + int len; +}; + struct bpf_prog_aux { atomic64_t refcnt; u32 used_map_cnt; @@ -1248,37 +1637,59 @@ struct bpf_prog_aux { u32 stack_depth; u32 id; u32 func_cnt; /* used by non-func prog as the number of func progs */ + u32 real_func_cnt; /* includes hidden progs, only used for JIT and freeing progs */ u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ u32 attach_btf_id; /* in-kernel BTF type id to attach to */ + u32 attach_st_ops_member_off; u32 ctx_arg_info_size; u32 max_rdonly_access; u32 max_rdwr_access; + u32 subprog_start; struct btf *attach_btf; - const struct bpf_ctx_arg_aux *ctx_arg_info; + struct bpf_ctx_arg_aux *ctx_arg_info; + void __percpu *priv_stack_ptr; struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */ struct bpf_prog *dst_prog; struct bpf_trampoline *dst_trampoline; enum bpf_prog_type saved_dst_prog_type; enum bpf_attach_type saved_dst_attach_type; bool verifier_zext; /* Zero extensions has been inserted by verifier. */ - bool offload_requested; + bool dev_bound; /* Program is bound to the netdev. */ + bool offload_requested; /* Program is bound and offloaded to the netdev. */ bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ + bool attach_tracing_prog; /* true if tracing another tracing program */ bool func_proto_unreliable; - bool sleepable; bool tail_call_reachable; bool xdp_has_frags; + bool exception_cb; + bool exception_boundary; + bool is_extended; /* true if extended by freplace program */ + bool jits_use_priv_stack; + bool priv_stack_requested; + bool changes_pkt_data; + bool might_sleep; + bool kprobe_write_ctx; + u64 prog_array_member_cnt; /* counts how many times as member of prog_array */ + struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */ + struct bpf_arena *arena; + void (*recursion_detected)(struct bpf_prog *prog); /* callback if recursion is detected */ /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ const char *attach_func_name; struct bpf_prog **func; + struct bpf_prog_aux *main_prog_aux; void *jit_data; /* JIT specific data. arch dependent */ struct bpf_jit_poke_descriptor *poke_tab; struct bpf_kfunc_desc_tab *kfunc_tab; struct bpf_kfunc_btf_tab *kfunc_btf_tab; u32 size_poke_tab; +#ifdef CONFIG_FINEIBT + struct bpf_ksym ksym_prefix; +#endif struct bpf_ksym ksym; const struct bpf_prog_ops *ops; + const struct bpf_struct_ops *st_ops; struct bpf_map **used_maps; struct mutex used_maps_mutex; /* mutex for used_maps and used_map_cnt */ struct btf_mod_pair *used_btfs; @@ -1289,9 +1700,11 @@ struct bpf_prog_aux { int cgroup_atype; /* enum cgroup_bpf_attach_type */ struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]; char name[BPF_OBJ_NAME_LEN]; + u64 (*bpf_exception_cb)(u64 cookie, u64 sp, u64 bp, u64, u64); #ifdef CONFIG_SECURITY void *security; #endif + struct bpf_token *token; struct bpf_prog_offload *offload; struct btf *btf; struct bpf_func_info *func_info; @@ -1318,12 +1731,14 @@ struct bpf_prog_aux { * main prog always has linfo_idx == 0 */ u32 linfo_idx; + struct module *mod; u32 num_exentries; struct exception_table_entry *extable; union { struct work_struct work; struct rcu_head rcu; }; + struct bpf_stream stream[2]; }; struct bpf_prog { @@ -1341,12 +1756,16 @@ struct bpf_prog { enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ call_get_func_ip:1, /* Do we call get_func_ip() */ - tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */ + tstamp_type_access:1, /* Accessed __sk_buff->tstamp_type */ + sleepable:1; /* BPF program is sleepable */ enum bpf_prog_type type; /* Type of BPF program */ enum bpf_attach_type expected_attach_type; /* For some prog types */ u32 len; /* Number of filter blocks */ u32 jited_len; /* Size of jited insns in bytes */ - u8 tag[BPF_TAG_SIZE]; + union { + u8 digest[SHA256_DIGEST_SIZE]; + u8 tag[BPF_TAG_SIZE]; + }; struct bpf_prog_stats __percpu *stats; int __percpu *active; unsigned int (*bpf_func)(const void *ctx, @@ -1374,18 +1793,46 @@ struct bpf_link { enum bpf_link_type type; const struct bpf_link_ops *ops; struct bpf_prog *prog; - struct work_struct work; + + u32 flags; + enum bpf_attach_type attach_type; + + /* rcu is used before freeing, work can be used to schedule that + * RCU-based freeing before that, so they never overlap + */ + union { + struct rcu_head rcu; + struct work_struct work; + }; + /* whether BPF link itself has "sleepable" semantics, which can differ + * from underlying BPF program having a "sleepable" semantics, as BPF + * link's semantics is determined by target attach hook + */ + bool sleepable; }; struct bpf_link_ops { void (*release)(struct bpf_link *link); + /* deallocate link resources callback, called without RCU grace period + * waiting + */ void (*dealloc)(struct bpf_link *link); + /* deallocate link resources callback, called after RCU grace period; + * if either the underlying BPF program is sleepable or BPF link's + * target hook is sleepable, we'll go through tasks trace RCU GP and + * then "classic" RCU GP; this need for chaining tasks trace and + * classic RCU GPs is designated by setting bpf_link->sleepable flag + */ + void (*dealloc_deferred)(struct bpf_link *link); int (*detach)(struct bpf_link *link); int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog, struct bpf_prog *old_prog); void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq); int (*fill_link_info)(const struct bpf_link *link, struct bpf_link_info *info); + int (*update_map)(struct bpf_link *link, struct bpf_map *new_map, + struct bpf_map *old_map); + __poll_t (*poll)(struct file *file, struct poll_table_struct *pts); }; struct bpf_tramp_link { @@ -1401,11 +1848,16 @@ struct bpf_shim_tramp_link { struct bpf_tracing_link { struct bpf_tramp_link link; - enum bpf_attach_type attach_type; struct bpf_trampoline *trampoline; struct bpf_prog *tgt_prog; }; +struct bpf_raw_tp_link { + struct bpf_link link; + struct bpf_raw_event_map *btp; + u64 cookie; +}; + struct bpf_link_primer { struct bpf_link *link; struct file *file; @@ -1413,40 +1865,167 @@ struct bpf_link_primer { u32 id; }; +struct bpf_mount_opts { + kuid_t uid; + kgid_t gid; + umode_t mode; + + /* BPF token-related delegation options */ + u64 delegate_cmds; + u64 delegate_maps; + u64 delegate_progs; + u64 delegate_attachs; +}; + +struct bpf_token { + struct work_struct work; + atomic64_t refcnt; + struct user_namespace *userns; + u64 allowed_cmds; + u64 allowed_maps; + u64 allowed_progs; + u64 allowed_attachs; +#ifdef CONFIG_SECURITY + void *security; +#endif +}; + struct bpf_struct_ops_value; struct btf_member; #define BPF_STRUCT_OPS_MAX_NR_MEMBERS 64 +/** + * struct bpf_struct_ops - A structure of callbacks allowing a subsystem to + * define a BPF_MAP_TYPE_STRUCT_OPS map type composed + * of BPF_PROG_TYPE_STRUCT_OPS progs. + * @verifier_ops: A structure of callbacks that are invoked by the verifier + * when determining whether the struct_ops progs in the + * struct_ops map are valid. + * @init: A callback that is invoked a single time, and before any other + * callback, to initialize the structure. A nonzero return value means + * the subsystem could not be initialized. + * @check_member: When defined, a callback invoked by the verifier to allow + * the subsystem to determine if an entry in the struct_ops map + * is valid. A nonzero return value means that the map is + * invalid and should be rejected by the verifier. + * @init_member: A callback that is invoked for each member of the struct_ops + * map to allow the subsystem to initialize the member. A nonzero + * value means the member could not be initialized. This callback + * is exclusive with the @type, @type_id, @value_type, and + * @value_id fields. + * @reg: A callback that is invoked when the struct_ops map has been + * initialized and is being attached to. Zero means the struct_ops map + * has been successfully registered and is live. A nonzero return value + * means the struct_ops map could not be registered. + * @unreg: A callback that is invoked when the struct_ops map should be + * unregistered. + * @update: A callback that is invoked when the live struct_ops map is being + * updated to contain new values. This callback is only invoked when + * the struct_ops map is loaded with BPF_F_LINK. If not defined, the + * it is assumed that the struct_ops map cannot be updated. + * @validate: A callback that is invoked after all of the members have been + * initialized. This callback should perform static checks on the + * map, meaning that it should either fail or succeed + * deterministically. A struct_ops map that has been validated may + * not necessarily succeed in being registered if the call to @reg + * fails. For example, a valid struct_ops map may be loaded, but + * then fail to be registered due to there being another active + * struct_ops map on the system in the subsystem already. For this + * reason, if this callback is not defined, the check is skipped as + * the struct_ops map will have final verification performed in + * @reg. + * @cfi_stubs: Pointer to a structure of stub functions for CFI. These stubs + * provide the correct Control Flow Integrity hashes for the + * trampolines generated by BPF struct_ops. + * @owner: The module that owns this struct_ops. Used for module reference + * counting to ensure the module providing the struct_ops cannot be + * unloaded while in use. + * @name: The name of the struct bpf_struct_ops object. + * @func_models: Func models + */ struct bpf_struct_ops { const struct bpf_verifier_ops *verifier_ops; int (*init)(struct btf *btf); int (*check_member)(const struct btf_type *t, - const struct btf_member *member); + const struct btf_member *member, + const struct bpf_prog *prog); int (*init_member)(const struct btf_type *t, const struct btf_member *member, void *kdata, const void *udata); - int (*reg)(void *kdata); - void (*unreg)(void *kdata); - const struct btf_type *type; - const struct btf_type *value_type; + int (*reg)(void *kdata, struct bpf_link *link); + void (*unreg)(void *kdata, struct bpf_link *link); + int (*update)(void *kdata, void *old_kdata, struct bpf_link *link); + int (*validate)(void *kdata); + void *cfi_stubs; + struct module *owner; const char *name; struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS]; +}; + +/* Every member of a struct_ops type has an instance even a member is not + * an operator (function pointer). The "info" field will be assigned to + * prog->aux->ctx_arg_info of BPF struct_ops programs to provide the + * argument information required by the verifier to verify the program. + * + * btf_ctx_access() will lookup prog->aux->ctx_arg_info to find the + * corresponding entry for an given argument. + */ +struct bpf_struct_ops_arg_info { + struct bpf_ctx_arg_aux *info; + u32 cnt; +}; + +struct bpf_struct_ops_desc { + struct bpf_struct_ops *st_ops; + + const struct btf_type *type; + const struct btf_type *value_type; u32 type_id; u32 value_id; + + /* Collection of argument information for each member */ + struct bpf_struct_ops_arg_info *arg_info; +}; + +enum bpf_struct_ops_state { + BPF_STRUCT_OPS_STATE_INIT, + BPF_STRUCT_OPS_STATE_INUSE, + BPF_STRUCT_OPS_STATE_TOBEFREE, + BPF_STRUCT_OPS_STATE_READY, +}; + +struct bpf_struct_ops_common_value { + refcount_t refcnt; + enum bpf_struct_ops_state state; }; #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) +/* This macro helps developer to register a struct_ops type and generate + * type information correctly. Developers should use this macro to register + * a struct_ops type instead of calling __register_bpf_struct_ops() directly. + */ +#define register_bpf_struct_ops(st_ops, type) \ + ({ \ + struct bpf_struct_ops_##type { \ + struct bpf_struct_ops_common_value common; \ + struct type data ____cacheline_aligned_in_smp; \ + }; \ + BTF_TYPE_EMIT(struct bpf_struct_ops_##type); \ + __register_bpf_struct_ops(st_ops); \ + }) #define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA)) -const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id); -void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log); bool bpf_struct_ops_get(const void *kdata); void bpf_struct_ops_put(const void *kdata); +int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff); int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, void *value); int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, struct bpf_tramp_link *link, const struct btf_func_model *model, - void *image, void *image_end); + void *stub_func, + void **image, u32 *image_off, + bool allow_alloc); +void bpf_struct_ops_image_free(void *image); static inline bool bpf_try_module_get(const void *data, struct module *owner) { if (owner == BPF_MODULE_OWNER) @@ -1461,6 +2040,8 @@ static inline void bpf_module_put(const void *data, struct module *owner) else module_put(owner); } +int bpf_struct_ops_link_create(union bpf_attr *attr); +u32 bpf_struct_ops_id(const void *kdata); #ifdef CONFIG_NET /* Define it here to avoid the use of forward declaration */ @@ -1472,20 +2053,19 @@ struct bpf_dummy_ops { int (*test_1)(struct bpf_dummy_ops_state *cb); int (*test_2)(struct bpf_dummy_ops_state *cb, int a1, unsigned short a2, char a3, unsigned long a4); + int (*test_sleepable)(struct bpf_dummy_ops_state *cb); }; int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); #endif +int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, + struct btf *btf, + struct bpf_verifier_log *log); +void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map); +void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc); #else -static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) -{ - return NULL; -} -static inline void bpf_struct_ops_init(struct btf *btf, - struct bpf_verifier_log *log) -{ -} +#define register_bpf_struct_ops(st_ops, type) ({ (void *)(st_ops); 0; }) static inline bool bpf_try_module_get(const void *data, struct module *owner) { return try_module_get(owner); @@ -1494,21 +2074,42 @@ static inline void bpf_module_put(const void *data, struct module *owner) { module_put(owner); } +static inline int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff) +{ + return -ENOTSUPP; +} static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, void *value) { return -EINVAL; } +static inline int bpf_struct_ops_link_create(union bpf_attr *attr) +{ + return -EOPNOTSUPP; +} +static inline void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map) +{ +} + +static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc) +{ +} + #endif +int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog, + const struct bpf_ctx_arg_aux *info, u32 cnt); + #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM) int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, - int cgroup_atype); + int cgroup_atype, + enum bpf_attach_type attach_type); void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog); #else static inline int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, - int cgroup_atype) + int cgroup_atype, + enum bpf_attach_type attach_type) { return -EOPNOTSUPP; } @@ -1523,17 +2124,28 @@ struct bpf_array { u32 index_mask; struct bpf_array_aux *aux; union { - char value[0] __aligned(8); - void *ptrs[0] __aligned(8); - void __percpu *pptrs[0] __aligned(8); + DECLARE_FLEX_ARRAY(char, value) __aligned(8); + DECLARE_FLEX_ARRAY(void *, ptrs) __aligned(8); + DECLARE_FLEX_ARRAY(void __percpu *, pptrs) __aligned(8); }; }; +/* + * The bpf_array_get_next_key() function may be used for all array-like + * maps, i.e., maps with u32 keys with range [0 ,..., max_entries) + */ +int bpf_array_get_next_key(struct bpf_map *map, void *key, void *next_key); + #define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */ #define MAX_TAIL_CALL_CNT 33 -/* Maximum number of loops for bpf_loop */ -#define BPF_MAX_LOOPS BIT(23) +/* Maximum number of loops for bpf_loop and bpf_iter_num. + * It's enum to expose it (and thus make it discoverable) through BTF. + */ +enum { + BPF_MAX_LOOPS = 8 * 1024 * 1024, + BPF_MAX_TIMED_LOOPS = 0xffff, +}; #define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \ BPF_F_RDONLY_PROG | \ @@ -1569,6 +2181,16 @@ static inline bool bpf_map_flags_access_ok(u32 access_flags) (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG); } +static inline struct bpf_map_owner *bpf_map_owner_alloc(struct bpf_map *map) +{ + return kzalloc(sizeof(*map->owner), GFP_ATOMIC); +} + +static inline void bpf_map_owner_free(struct bpf_map *map) +{ + kfree(map->owner); +} + struct bpf_event_entry { struct perf_event *event; struct file *perf_file; @@ -1589,6 +2211,8 @@ int bpf_prog_calc_tag(struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void); +const struct bpf_func_proto *bpf_get_perf_event_read_value_proto(void); + typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, unsigned long off, unsigned long len); typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type, @@ -1672,6 +2296,7 @@ struct bpf_cg_run_ctx { struct bpf_trace_run_ctx { struct bpf_run_ctx run_ctx; u64 bpf_cookie; + bool is_uprobe; }; struct bpf_tramp_run_ctx { @@ -1720,6 +2345,8 @@ bpf_prog_run_array(const struct bpf_prog_array *array, if (unlikely(!array)) return ret; + run_ctx.is_uprobe = false; + migrate_disable(); old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); item = &array->items[0]; @@ -1744,44 +2371,49 @@ bpf_prog_run_array(const struct bpf_prog_array *array, * rcu-protected dynamically sized maps. */ static __always_inline u32 -bpf_prog_run_array_sleepable(const struct bpf_prog_array __rcu *array_rcu, - const void *ctx, bpf_prog_run_fn run_prog) +bpf_prog_run_array_uprobe(const struct bpf_prog_array *array, + const void *ctx, bpf_prog_run_fn run_prog) { const struct bpf_prog_array_item *item; const struct bpf_prog *prog; - const struct bpf_prog_array *array; struct bpf_run_ctx *old_run_ctx; struct bpf_trace_run_ctx run_ctx; u32 ret = 1; might_fault(); + RCU_LOCKDEP_WARN(!rcu_read_lock_trace_held(), "no rcu lock held"); + + if (unlikely(!array)) + return ret; - rcu_read_lock_trace(); migrate_disable(); - array = rcu_dereference_check(array_rcu, rcu_read_lock_trace_held()); - if (unlikely(!array)) - goto out; + run_ctx.is_uprobe = true; + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); item = &array->items[0]; while ((prog = READ_ONCE(item->prog))) { - if (!prog->aux->sleepable) + if (!prog->sleepable) rcu_read_lock(); run_ctx.bpf_cookie = item->bpf_cookie; ret &= run_prog(prog, ctx); item++; - if (!prog->aux->sleepable) + if (!prog->sleepable) rcu_read_unlock(); } bpf_reset_run_ctx(old_run_ctx); -out: migrate_enable(); - rcu_read_unlock_trace(); return ret; } +bool bpf_jit_bypass_spec_v1(void); +bool bpf_jit_bypass_spec_v4(void); + +#define bpf_rcu_lock_held() \ + (rcu_read_lock_held() || rcu_read_lock_trace_held() || rcu_read_lock_bh_held()) + #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); extern struct mutex bpf_stats_enabled_mutex; @@ -1804,9 +2436,11 @@ static inline void bpf_enable_instrumentation(void) migrate_enable(); } +extern const struct super_operations bpf_super_ops; extern const struct file_operations bpf_map_fops; extern const struct file_operations bpf_prog_fops; extern const struct file_operations bpf_iter_fops; +extern const struct file_operations bpf_token_fops; #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ extern const struct bpf_prog_ops _name ## _prog_ops; \ @@ -1833,22 +2467,52 @@ struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); void bpf_prog_free_id(struct bpf_prog *prog); -void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock); +void bpf_map_free_id(struct bpf_map *map); struct btf_field *btf_record_find(const struct btf_record *rec, - u32 offset, enum btf_field_type type); + u32 offset, u32 field_mask); void btf_record_free(struct btf_record *rec); void bpf_map_free_record(struct bpf_map *map); struct btf_record *btf_record_dup(const struct btf_record *rec); bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b); void bpf_obj_free_timer(const struct btf_record *rec, void *obj); +void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj); +void bpf_obj_free_task_work(const struct btf_record *rec, void *obj); void bpf_obj_free_fields(const struct btf_record *rec, void *obj); +void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu); struct bpf_map *bpf_map_get(u32 ufd); struct bpf_map *bpf_map_get_with_uref(u32 ufd); -struct bpf_map *__bpf_map_get(struct fd f); + +/* + * The __bpf_map_get() and __btf_get_by_fd() functions parse a file + * descriptor and return a corresponding map or btf object. + * Their names are double underscored to emphasize the fact that they + * do not increase refcnt. To also increase refcnt use corresponding + * bpf_map_get() and btf_get_by_fd() functions. + */ + +static inline struct bpf_map *__bpf_map_get(struct fd f) +{ + if (fd_empty(f)) + return ERR_PTR(-EBADF); + if (unlikely(fd_file(f)->f_op != &bpf_map_fops)) + return ERR_PTR(-EINVAL); + return fd_file(f)->private_data; +} + +static inline struct btf *__btf_get_by_fd(struct fd f) +{ + if (fd_empty(f)) + return ERR_PTR(-EBADF); + if (unlikely(fd_file(f)->f_op != &btf_fops)) + return ERR_PTR(-EINVAL); + return fd_file(f)->private_data; +} + void bpf_map_inc(struct bpf_map *map); void bpf_map_inc_with_uref(struct bpf_map *map); +struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref); struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); @@ -1869,73 +2533,130 @@ int generic_map_delete_batch(struct bpf_map *map, struct bpf_map *bpf_map_get_curr_or_next(u32 *id); struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); -#ifdef CONFIG_MEMCG_KMEM + +int bpf_map_alloc_pages(const struct bpf_map *map, int nid, + unsigned long nr_pages, struct page **page_array); +#ifdef CONFIG_MEMCG void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, int node); +void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags, + int node); void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags); +void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, + gfp_t flags); void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, gfp_t flags); #else -static inline void * -bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, - int node) +/* + * These specialized allocators have to be macros for their allocations to be + * accounted separately (to have separate alloc_tag). + */ +#define bpf_map_kmalloc_node(_map, _size, _flags, _node) \ + kmalloc_node(_size, _flags, _node) +#define bpf_map_kmalloc_nolock(_map, _size, _flags, _node) \ + kmalloc_nolock(_size, _flags, _node) +#define bpf_map_kzalloc(_map, _size, _flags) \ + kzalloc(_size, _flags) +#define bpf_map_kvcalloc(_map, _n, _size, _flags) \ + kvcalloc(_n, _size, _flags) +#define bpf_map_alloc_percpu(_map, _size, _align, _flags) \ + __alloc_percpu_gfp(_size, _align, _flags) +#endif + +static inline int +bpf_map_init_elem_count(struct bpf_map *map) { - return kmalloc_node(size, flags, node); + size_t size = sizeof(*map->elem_count), align = size; + gfp_t flags = GFP_USER | __GFP_NOWARN; + + map->elem_count = bpf_map_alloc_percpu(map, size, align, flags); + if (!map->elem_count) + return -ENOMEM; + + return 0; } -static inline void * -bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) +static inline void +bpf_map_free_elem_count(struct bpf_map *map) { - return kzalloc(size, flags); + free_percpu(map->elem_count); } -static inline void __percpu * -bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, - gfp_t flags) +static inline void bpf_map_inc_elem_count(struct bpf_map *map) { - return __alloc_percpu_gfp(size, align, flags); + this_cpu_inc(*map->elem_count); +} + +static inline void bpf_map_dec_elem_count(struct bpf_map *map) +{ + this_cpu_dec(*map->elem_count); } -#endif extern int sysctl_unprivileged_bpf_disabled; -static inline bool bpf_allow_ptr_leaks(void) +bool bpf_token_capable(const struct bpf_token *token, int cap); + +static inline bool bpf_allow_ptr_leaks(const struct bpf_token *token) { - return perfmon_capable(); + return bpf_token_capable(token, CAP_PERFMON); } -static inline bool bpf_allow_uninit_stack(void) +static inline bool bpf_allow_uninit_stack(const struct bpf_token *token) { - return perfmon_capable(); + return bpf_token_capable(token, CAP_PERFMON); } -static inline bool bpf_bypass_spec_v1(void) +static inline bool bpf_bypass_spec_v1(const struct bpf_token *token) { - return perfmon_capable(); + return bpf_jit_bypass_spec_v1() || + cpu_mitigations_off() || + bpf_token_capable(token, CAP_PERFMON); } -static inline bool bpf_bypass_spec_v4(void) +static inline bool bpf_bypass_spec_v4(const struct bpf_token *token) { - return perfmon_capable(); + return bpf_jit_bypass_spec_v4() || + cpu_mitigations_off() || + bpf_token_capable(token, CAP_PERFMON); } int bpf_map_new_fd(struct bpf_map *map, int flags); int bpf_prog_new_fd(struct bpf_prog *prog); void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, - const struct bpf_link_ops *ops, struct bpf_prog *prog); + const struct bpf_link_ops *ops, struct bpf_prog *prog, + enum bpf_attach_type attach_type); +void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type, + const struct bpf_link_ops *ops, struct bpf_prog *prog, + enum bpf_attach_type attach_type, bool sleepable); int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer); int bpf_link_settle(struct bpf_link_primer *primer); void bpf_link_cleanup(struct bpf_link_primer *primer); void bpf_link_inc(struct bpf_link *link); +struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link); void bpf_link_put(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link); -struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd); struct bpf_link *bpf_link_get_from_fd(u32 ufd); struct bpf_link *bpf_link_get_curr_or_next(u32 *id); -int bpf_obj_pin_user(u32 ufd, const char __user *pathname); -int bpf_obj_get_user(const char __user *pathname, int flags); +void bpf_token_inc(struct bpf_token *token); +void bpf_token_put(struct bpf_token *token); +int bpf_token_create(union bpf_attr *attr); +struct bpf_token *bpf_token_get_from_fd(u32 ufd); +int bpf_token_get_info_by_fd(struct bpf_token *token, + const union bpf_attr *attr, + union bpf_attr __user *uattr); + +bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd); +bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type); +bool bpf_token_allow_prog_type(const struct bpf_token *token, + enum bpf_prog_type prog_type, + enum bpf_attach_type attach_type); + +int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname); +int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags); +struct inode *bpf_get_inode(struct super_block *sb, const struct inode *dir, + umode_t mode); #define BPF_ITER_FUNC_PREFIX "bpf_iter_" #define DEFINE_BPF_ITER_FUNC(target, args...) \ @@ -2023,7 +2744,7 @@ struct bpf_iter__bpf_map_elem { int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info); void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info); -bool bpf_iter_prog_supported(struct bpf_prog *prog); +int bpf_iter_prog_supported(struct bpf_prog *prog); const struct bpf_func_proto * bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_prog *prog); @@ -2047,7 +2768,7 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, u64 flags); -int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value); +int bpf_stackmap_extract(struct bpf_map *map, void *key, void *value, bool delete); int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); @@ -2061,7 +2782,7 @@ int bpf_check_uarg_tail_zero(bpfptr_t uaddr, size_t expected_size, size_t actual_size); /* verify correctness of eBPF program */ -int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr); +int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size); #ifndef CONFIG_BPF_JIT_ALWAYS_ON void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); @@ -2075,7 +2796,7 @@ struct sk_buff; struct bpf_dtab_netdev; struct bpf_cpu_map_entry; -void __dev_flush(void); +void __dev_flush(struct list_head *flush_list); int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx); int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, @@ -2083,12 +2804,12 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_map *map, bool exclude_ingress); int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, - struct bpf_prog *xdp_prog); + const struct bpf_prog *xdp_prog); int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, - struct bpf_prog *xdp_prog, struct bpf_map *map, - bool exclude_ingress); + const struct bpf_prog *xdp_prog, + struct bpf_map *map, bool exclude_ingress); -void __cpu_map_flush(void); +void __cpu_map_flush(struct list_head *flush_list); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf, struct net_device *dev_rx); int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu, @@ -2120,6 +2841,9 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +int bpf_prog_test_run_nf(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); @@ -2149,7 +2873,7 @@ static inline bool bpf_tracing_btf_ctx_access(int off, int size, int btf_struct_access(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, int off, int size, enum bpf_access_type atype, - u32 *next_btf_id, enum bpf_type_flag *flag); + u32 *next_btf_id, enum bpf_type_flag *flag, const char **field_name); bool btf_struct_ids_match(struct bpf_verifier_log *log, const struct btf *btf, u32 id, int off, const struct btf *need_btf, u32 need_type_id, @@ -2162,30 +2886,41 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, struct btf_func_model *m); struct bpf_reg_state; -int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *regs); -int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *regs); -int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *reg); +int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog); int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, struct btf *btf, const struct btf_type *t); +const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt, + int comp_idx, const char *tag_key); +int btf_find_next_decl_tag(const struct btf *btf, const struct btf_type *pt, + int comp_idx, const char *tag_key, int last_id); struct bpf_prog *bpf_prog_by_id(u32 id); struct bpf_link *bpf_link_by_id(u32 id); -const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); +const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id, + const struct bpf_prog *prog); void bpf_task_storage_free(struct task_struct *task); void bpf_cgrp_storage_free(struct cgroup *cgroup); bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); const struct btf_func_model * bpf_jit_find_kfunc_model(const struct bpf_prog *prog, const struct bpf_insn *insn); +int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id, + u16 btf_fd_idx, u8 **func_addr); + struct bpf_core_ctx { struct bpf_verifier_log *log; const struct btf *btf; }; +bool btf_nested_type_is_trusted(struct bpf_verifier_log *log, + const struct bpf_reg_state *reg, + const char *field_name, u32 btf_id, const char *suffix); + +bool btf_type_ids_nocast_alias(struct bpf_verifier_log *log, + const struct btf *reg_btf, u32 reg_id, + const struct btf *arg_btf, u32 arg_id); + int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, int relo_idx, void *insn); @@ -2205,6 +2940,13 @@ static inline bool has_current_bpf_ctx(void) } void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog); + +void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, + enum bpf_dynptr_type type, u32 offset, u32 size); +void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); +void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr); +void bpf_prog_report_arena_violation(bool write, unsigned long addr, unsigned long fault_ip); + #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -2242,7 +2984,13 @@ bpf_prog_inc_not_zero(struct bpf_prog *prog) static inline void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops, - struct bpf_prog *prog) + struct bpf_prog *prog, enum bpf_attach_type attach_type) +{ +} + +static inline void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type, + const struct bpf_link_ops *ops, struct bpf_prog *prog, + enum bpf_attach_type attach_type, bool sleepable) { } @@ -2265,6 +3013,11 @@ static inline void bpf_link_inc(struct bpf_link *link) { } +static inline struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) +{ + return NULL; +} + static inline void bpf_link_put(struct bpf_link *link) { } @@ -2274,7 +3027,32 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags) return -EOPNOTSUPP; } -static inline void __dev_flush(void) +static inline bool bpf_token_capable(const struct bpf_token *token, int cap) +{ + return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN)); +} + +static inline void bpf_token_inc(struct bpf_token *token) +{ +} + +static inline void bpf_token_put(struct bpf_token *token) +{ +} + +static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline int bpf_token_get_info_by_fd(struct bpf_token *token, + const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + return -EOPNOTSUPP; +} + +static inline void __dev_flush(struct list_head *flush_list) { } @@ -2307,20 +3085,20 @@ struct sk_buff; static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, - struct bpf_prog *xdp_prog) + const struct bpf_prog *xdp_prog) { return 0; } static inline int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, - struct bpf_prog *xdp_prog, struct bpf_map *map, - bool exclude_ingress) + const struct bpf_prog *xdp_prog, + struct bpf_map *map, bool exclude_ingress) { return 0; } -static inline void __cpu_map_flush(void) +static inline void __cpu_map_flush(struct list_head *flush_list) { } @@ -2390,13 +3168,14 @@ static inline struct bpf_prog *bpf_prog_by_id(u32 id) static inline int btf_struct_access(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, int off, int size, enum bpf_access_type atype, - u32 *next_btf_id, enum bpf_type_flag *flag) + u32 *next_btf_id, enum bpf_type_flag *flag, + const char **field_name) { return -EACCES; } static inline const struct bpf_func_proto * -bpf_base_func_proto(enum bpf_func_id func_id) +bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { return NULL; } @@ -2417,6 +3196,13 @@ bpf_jit_find_kfunc_model(const struct bpf_prog *prog, return NULL; } +static inline int +bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id, + u16 btf_fd_idx, u8 **func_addr) +{ + return -ENOTSUPP; +} + static inline bool unprivileged_ebpf_enabled(void) { return false; @@ -2434,10 +3220,39 @@ static inline void bpf_prog_inc_misses_counter(struct bpf_prog *prog) static inline void bpf_cgrp_storage_free(struct cgroup *cgroup) { } + +static inline void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, + enum bpf_dynptr_type type, u32 offset, u32 size) +{ +} + +static inline void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr) +{ +} + +static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr) +{ +} + +static inline void bpf_prog_report_arena_violation(bool write, unsigned long addr, + unsigned long fault_ip) +{ +} #endif /* CONFIG_BPF_SYSCALL */ -void __bpf_free_used_btfs(struct bpf_prog_aux *aux, - struct btf_mod_pair *used_btfs, u32 len); +static __always_inline int +bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr) +{ + int ret = -EFAULT; + + if (IS_ENABLED(CONFIG_BPF_EVENTS)) + ret = copy_from_kernel_nofault(dst, unsafe_ptr, size); + if (unlikely(ret < 0)) + memset(dst, 0, size); + return ret; +} + +void __bpf_free_used_btfs(struct btf_mod_pair *used_btfs, u32 len); static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) @@ -2451,7 +3266,7 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux, bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool); int bpf_prog_offload_compile(struct bpf_prog *prog); -void bpf_prog_offload_destroy(struct bpf_prog *prog); +void bpf_prog_dev_bound_destroy(struct bpf_prog *prog); int bpf_prog_offload_info_fill(struct bpf_prog_info *info, struct bpf_prog *prog); @@ -2479,20 +3294,33 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev); void unpriv_ebpf_notify(int new_state); #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) -int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); +int bpf_dev_bound_kfunc_check(struct bpf_verifier_log *log, + struct bpf_prog_aux *prog_aux); +void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id); +int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr); +int bpf_prog_dev_bound_inherit(struct bpf_prog *new_prog, struct bpf_prog *old_prog); +void bpf_dev_bound_netdev_unregister(struct net_device *dev); static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux) { + return aux->dev_bound; +} + +static inline bool bpf_prog_is_offloaded(const struct bpf_prog_aux *aux) +{ return aux->offload_requested; } -static inline bool bpf_map_is_dev_bound(struct bpf_map *map) +bool bpf_prog_dev_bound_match(const struct bpf_prog *lhs, const struct bpf_prog *rhs); + +static inline bool bpf_map_is_offloaded(struct bpf_map *map) { return unlikely(map->ops == &bpf_map_offload_ops); } struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr); void bpf_map_offload_map_free(struct bpf_map *map); +u64 bpf_map_offload_map_mem_usage(const struct bpf_map *map); int bpf_prog_test_run_syscall(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); @@ -2502,23 +3330,56 @@ int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); int sock_map_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); +int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog); void sock_map_unhash(struct sock *sk); void sock_map_destroy(struct sock *sk); void sock_map_close(struct sock *sk, long timeout); #else -static inline int bpf_prog_offload_init(struct bpf_prog *prog, - union bpf_attr *attr) +static inline int bpf_dev_bound_kfunc_check(struct bpf_verifier_log *log, + struct bpf_prog_aux *prog_aux) { return -EOPNOTSUPP; } -static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux) +static inline void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, + u32 func_id) +{ + return NULL; +} + +static inline int bpf_prog_dev_bound_init(struct bpf_prog *prog, + union bpf_attr *attr) +{ + return -EOPNOTSUPP; +} + +static inline int bpf_prog_dev_bound_inherit(struct bpf_prog *new_prog, + struct bpf_prog *old_prog) +{ + return -EOPNOTSUPP; +} + +static inline void bpf_dev_bound_netdev_unregister(struct net_device *dev) +{ +} + +static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux) +{ + return false; +} + +static inline bool bpf_prog_is_offloaded(struct bpf_prog_aux *aux) { return false; } -static inline bool bpf_map_is_dev_bound(struct bpf_map *map) +static inline bool bpf_prog_dev_bound_match(const struct bpf_prog *lhs, const struct bpf_prog *rhs) +{ + return false; +} + +static inline bool bpf_map_is_offloaded(struct bpf_map *map) { return false; } @@ -2532,6 +3393,11 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map) { } +static inline u64 bpf_map_offload_map_mem_usage(const struct bpf_map *map) +{ + return 0; +} + static inline int bpf_prog_test_run_syscall(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) @@ -2563,9 +3429,30 @@ static inline int sock_map_bpf_prog_query(const union bpf_attr *attr, { return -EINVAL; } + +static inline int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_BPF_SYSCALL */ #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ +static __always_inline void +bpf_prog_inc_misses_counters(const struct bpf_prog_array *array) +{ + const struct bpf_prog_array_item *item; + struct bpf_prog *prog; + + if (unlikely(!array)) + return; + + item = &array->items[0]; + while ((prog = READ_ONCE(item->prog))) { + bpf_prog_inc_misses_counter(prog); + item++; + } +} + #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) void bpf_sk_reuseport_detach(struct sock *sk); int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, @@ -2593,6 +3480,38 @@ static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, #endif /* CONFIG_BPF_SYSCALL */ #endif /* defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) */ +#if defined(CONFIG_KEYS) && defined(CONFIG_BPF_SYSCALL) + +struct bpf_key *bpf_lookup_user_key(s32 serial, u64 flags); +struct bpf_key *bpf_lookup_system_key(u64 id); +void bpf_key_put(struct bpf_key *bkey); +int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p, + struct bpf_dynptr *sig_p, + struct bpf_key *trusted_keyring); + +#else +static inline struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags) +{ + return NULL; +} + +static inline struct bpf_key *bpf_lookup_system_key(u64 id) +{ + return NULL; +} + +static inline void bpf_key_put(struct bpf_key *bkey) +{ +} + +static inline int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p, + struct bpf_dynptr *sig_p, + struct bpf_key *trusted_keyring) +{ + return -EOPNOTSUPP; +} +#endif /* defined(CONFIG_KEYS) && defined(CONFIG_BPF_SYSCALL) */ + /* verifier prototypes for helper functions called from eBPF programs */ extern const struct bpf_func_proto bpf_map_lookup_elem_proto; extern const struct bpf_func_proto bpf_map_update_elem_proto; @@ -2614,7 +3533,9 @@ extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; extern const struct bpf_func_proto bpf_get_current_comm_proto; extern const struct bpf_func_proto bpf_get_stackid_proto; extern const struct bpf_func_proto bpf_get_stack_proto; +extern const struct bpf_func_proto bpf_get_stack_sleepable_proto; extern const struct bpf_func_proto bpf_get_task_stack_proto; +extern const struct bpf_func_proto bpf_get_task_stack_sleepable_proto; extern const struct bpf_func_proto bpf_get_stackid_proto_pe; extern const struct bpf_func_proto bpf_get_stack_proto_pe; extern const struct bpf_func_proto bpf_sock_map_update_proto; @@ -2622,6 +3543,7 @@ extern const struct bpf_func_proto bpf_sock_hash_update_proto; extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto; extern const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto; extern const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto; +extern const struct bpf_func_proto bpf_current_task_under_cgroup_proto; extern const struct bpf_func_proto bpf_msg_redirect_hash_proto; extern const struct bpf_func_proto bpf_msg_redirect_map_proto; extern const struct bpf_func_proto bpf_sk_redirect_hash_proto; @@ -2696,6 +3618,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, struct bpf_insn *insn_buf, struct bpf_prog *prog, u32 *target_size); +int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, + struct bpf_dynptr *ptr); #else static inline bool bpf_sock_common_is_valid_access(int off, int size, enum bpf_access_type type, @@ -2717,6 +3641,11 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, { return 0; } +static inline int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, + struct bpf_dynptr *ptr) +{ + return -EOPNOTSUPP; +} #endif #ifdef CONFIG_INET @@ -2781,12 +3710,17 @@ static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type, #endif /* CONFIG_INET */ enum bpf_text_poke_type { + BPF_MOD_NOP, BPF_MOD_CALL, BPF_MOD_JUMP, }; -int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, - void *addr1, void *addr2); +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t, + enum bpf_text_poke_type new_t, void *old_addr, + void *new_addr); + +void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, + struct bpf_prog *new, struct bpf_prog *old); void *bpf_arch_text_copy(void *dst, void *src, size_t len); int bpf_arch_text_invalidate(void *dst, size_t len); @@ -2795,40 +3729,52 @@ struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); #define MAX_BPRINTF_VARARGS 12 +#define MAX_BPRINTF_BUF 1024 -int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, - u32 **bin_buf, u32 num_args); -void bpf_bprintf_cleanup(void); - -/* the implementation of the opaque uapi struct bpf_dynptr */ -struct bpf_dynptr_kern { - void *data; - /* Size represents the number of usable bytes of dynptr data. - * If for example the offset is at 4 for a local dynptr whose data is - * of type u64, the number of usable bytes is 4. - * - * The upper 8 bits are reserved. It is as follows: - * Bits 0 - 23 = size - * Bits 24 - 30 = dynptr type - * Bit 31 = whether dynptr is read-only - */ - u32 size; - u32 offset; -} __aligned(8); - -enum bpf_dynptr_type { - BPF_DYNPTR_TYPE_INVALID, - /* Points to memory that is local to the bpf program */ - BPF_DYNPTR_TYPE_LOCAL, - /* Underlying data is a kernel-produced ringbuf record */ - BPF_DYNPTR_TYPE_RINGBUF, -}; - -void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, - enum bpf_dynptr_type type, u32 offset, u32 size); -void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); -int bpf_dynptr_check_size(u32 size); -u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr); +/* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary + * arguments representation. + */ +#define MAX_BPRINTF_BIN_ARGS 512 + +struct bpf_bprintf_buffers { + char bin_args[MAX_BPRINTF_BIN_ARGS]; + char buf[MAX_BPRINTF_BUF]; +}; + +struct bpf_bprintf_data { + u32 *bin_args; + char *buf; + bool get_bin_args; + bool get_buf; +}; + +int bpf_bprintf_prepare(const char *fmt, u32 fmt_size, const u64 *raw_args, + u32 num_args, struct bpf_bprintf_data *data); +void bpf_bprintf_cleanup(struct bpf_bprintf_data *data); +int bpf_try_get_buffers(struct bpf_bprintf_buffers **bufs); +void bpf_put_buffers(void); + +void bpf_prog_stream_init(struct bpf_prog *prog); +void bpf_prog_stream_free(struct bpf_prog *prog); +int bpf_prog_stream_read(struct bpf_prog *prog, enum bpf_stream_id stream_id, void __user *buf, int len); +void bpf_stream_stage_init(struct bpf_stream_stage *ss); +void bpf_stream_stage_free(struct bpf_stream_stage *ss); +__printf(2, 3) +int bpf_stream_stage_printk(struct bpf_stream_stage *ss, const char *fmt, ...); +int bpf_stream_stage_commit(struct bpf_stream_stage *ss, struct bpf_prog *prog, + enum bpf_stream_id stream_id); +int bpf_stream_stage_dump_stack(struct bpf_stream_stage *ss); + +#define bpf_stream_printk(ss, ...) bpf_stream_stage_printk(&ss, __VA_ARGS__) +#define bpf_stream_dump_stack(ss) bpf_stream_stage_dump_stack(&ss) + +#define bpf_stream_stage(ss, prog, stream_id, expr) \ + ({ \ + bpf_stream_stage_init(&ss); \ + (expr); \ + bpf_stream_stage_commit(&ss, prog, stream_id); \ + bpf_stream_stage_free(&ss); \ + }) #ifdef CONFIG_BPF_LSM void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype); @@ -2852,4 +3798,46 @@ static inline bool type_is_alloc(u32 type) return type & MEM_ALLOC; } +static inline gfp_t bpf_memcg_flags(gfp_t flags) +{ + if (memcg_bpf_enabled()) + return flags | __GFP_ACCOUNT; + return flags; +} + +static inline bool bpf_is_subprog(const struct bpf_prog *prog) +{ + return prog->aux->func_idx != 0; +} + +int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char **filep, + const char **linep, int *nump); +struct bpf_prog *bpf_prog_find_from_stack(void); + +int bpf_insn_array_init(struct bpf_map *map, const struct bpf_prog *prog); +int bpf_insn_array_ready(struct bpf_map *map); +void bpf_insn_array_release(struct bpf_map *map); +void bpf_insn_array_adjust(struct bpf_map *map, u32 off, u32 len); +void bpf_insn_array_adjust_after_remove(struct bpf_map *map, u32 off, u32 len); + +#ifdef CONFIG_BPF_SYSCALL +void bpf_prog_update_insn_ptrs(struct bpf_prog *prog, u32 *offsets, void *image); +#else +static inline void +bpf_prog_update_insn_ptrs(struct bpf_prog *prog, u32 *offsets, void *image) +{ +} +#endif + +static inline int bpf_map_check_op_flags(struct bpf_map *map, u64 flags, u64 allowed_flags) +{ + if (flags & ~allowed_flags) + return -EINVAL; + + if ((flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK)) + return -EINVAL; + + return 0; +} + #endif /* _LINUX_BPF_H */ |
