diff options
Diffstat (limited to 'include/linux/bpf.h')
-rw-r--r-- | include/linux/bpf.h | 493 |
1 files changed, 376 insertions, 117 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e30100597d0a..f3f50e29d639 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -37,6 +37,7 @@ struct perf_event; struct bpf_prog; struct bpf_prog_aux; struct bpf_map; +struct bpf_arena; struct sock; struct seq_file; struct btf; @@ -52,6 +53,10 @@ struct module; struct bpf_func_state; struct ftrace_ops; struct cgroup; +struct bpf_token; +struct user_namespace; +struct super_block; +struct inode; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -135,6 +140,9 @@ struct bpf_map_ops { int (*map_mmap)(struct bpf_map *map, struct vm_area_struct *vma); __poll_t (*map_poll)(struct bpf_map *map, struct file *filp, struct poll_table_struct *pts); + unsigned long (*map_get_unmapped_area)(struct file *filep, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags); /* Functions called by bpf_local_storage maps */ int (*map_local_storage_charge)(struct bpf_local_storage_map *smap, @@ -176,8 +184,8 @@ struct bpf_map_ops { }; enum { - /* Support at most 10 fields in a BTF type */ - BTF_FIELDS_MAX = 10, + /* Support at most 11 fields in a BTF type */ + BTF_FIELDS_MAX = 11, }; enum btf_field_type { @@ -194,6 +202,8 @@ enum btf_field_type { BPF_GRAPH_NODE = BPF_RB_NODE | BPF_LIST_NODE, BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD, BPF_REFCOUNT = (1 << 9), + BPF_WORKQUEUE = (1 << 10), + BPF_UPTR = (1 << 11), }; typedef void (*btf_dtor_kfunc_t)(void *); @@ -230,6 +240,7 @@ struct btf_record { u32 field_mask; int spin_lock_off; int timer_off; + int wq_off; int refcount_off; struct btf_field fields[]; }; @@ -247,10 +258,7 @@ struct bpf_list_node_kern { } __attribute__((aligned(8))); struct bpf_map { - /* The first two cachelines with read-mostly members of which some - * are also accessed in fast-path (e.g. ops, max_entries). - */ - const struct bpf_map_ops *ops ____cacheline_aligned; + const struct bpf_map_ops *ops; struct bpf_map *inner_map_meta; #ifdef CONFIG_SECURITY void *security; @@ -268,21 +276,18 @@ struct bpf_map { u32 btf_value_type_id; u32 btf_vmlinux_value_type_id; struct btf *btf; -#ifdef CONFIG_MEMCG_KMEM +#ifdef CONFIG_MEMCG struct obj_cgroup *objcg; #endif char name[BPF_OBJ_NAME_LEN]; - /* The 3rd and 4th cacheline with misc members to avoid false sharing - * particularly with refcounting. - */ - atomic64_t refcnt ____cacheline_aligned; + struct mutex freeze_mutex; + atomic64_t refcnt; atomic64_t usercnt; /* rcu is used before freeing and work is only used during freeing */ union { struct work_struct work; struct rcu_head rcu; }; - struct mutex freeze_mutex; atomic64_t writecnt; /* 'Ownership' of program-containing map is claimed by the first program * that is going to use this map or by the first program which FD is @@ -290,6 +295,7 @@ struct bpf_map { * same prog type, JITed flag and xdp_has_frags flag. */ struct { + const struct btf_type *attach_func_proto; spinlock_t lock; enum bpf_prog_type type; bool jited; @@ -310,11 +316,15 @@ static inline const char *btf_field_type_name(enum btf_field_type type) return "bpf_spin_lock"; case BPF_TIMER: return "bpf_timer"; + case BPF_WORKQUEUE: + return "bpf_wq"; case BPF_KPTR_UNREF: case BPF_KPTR_REF: return "kptr"; case BPF_KPTR_PERCPU: return "percpu_kptr"; + case BPF_UPTR: + return "uptr"; case BPF_LIST_HEAD: return "bpf_list_head"; case BPF_LIST_NODE: @@ -338,9 +348,12 @@ static inline u32 btf_field_type_size(enum btf_field_type type) return sizeof(struct bpf_spin_lock); case BPF_TIMER: return sizeof(struct bpf_timer); + case BPF_WORKQUEUE: + return sizeof(struct bpf_wq); case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: + case BPF_UPTR: return sizeof(u64); case BPF_LIST_HEAD: return sizeof(struct bpf_list_head); @@ -365,9 +378,12 @@ static inline u32 btf_field_type_align(enum btf_field_type type) return __alignof__(struct bpf_spin_lock); case BPF_TIMER: return __alignof__(struct bpf_timer); + case BPF_WORKQUEUE: + return __alignof__(struct bpf_wq); case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: + case BPF_UPTR: return __alignof__(u64); case BPF_LIST_HEAD: return __alignof__(struct bpf_list_head); @@ -404,9 +420,11 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr) /* RB_ROOT_CACHED 0-inits, no need to do anything after memset */ case BPF_SPIN_LOCK: case BPF_TIMER: + case BPF_WORKQUEUE: case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: + case BPF_UPTR: break; default: WARN_ON_ONCE(1); @@ -495,6 +513,25 @@ static inline void copy_map_value_long(struct bpf_map *map, void *dst, void *src bpf_obj_memcpy(map->record, dst, src, map->value_size, true); } +static inline void bpf_obj_swap_uptrs(const struct btf_record *rec, void *dst, void *src) +{ + unsigned long *src_uptr, *dst_uptr; + const struct btf_field *field; + int i; + + if (!btf_record_has_field(rec, BPF_UPTR)) + return; + + for (i = 0, field = rec->fields; i < rec->cnt; i++, field++) { + if (field->type != BPF_UPTR) + continue; + + src_uptr = src + field->offset; + dst_uptr = dst + field->offset; + swap(*src_uptr, *dst_uptr); + } +} + static inline void bpf_obj_memzero(struct btf_record *rec, void *dst, u32 size) { u32 curr_off = 0; @@ -523,12 +560,13 @@ static inline void zero_map_value(struct bpf_map *map, void *dst) void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, bool lock_src); void bpf_timer_cancel_and_free(void *timer); +void bpf_wq_cancel_and_free(void *timer); void bpf_list_head_free(const struct btf_field *field, void *list_head, struct bpf_spin_lock *spin_lock); void bpf_rb_root_free(const struct btf_field *field, void *rb_root, struct bpf_spin_lock *spin_lock); - - +u64 bpf_arena_get_kern_vm_start(struct bpf_arena *arena); +u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena); int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size); struct bpf_offload_dev; @@ -622,6 +660,7 @@ enum bpf_type_flag { */ PTR_UNTRUSTED = BIT(6 + BPF_BASE_TYPE_BITS), + /* MEM can be uninitialized. */ MEM_UNINIT = BIT(7 + BPF_BASE_TYPE_BITS), /* DYNPTR points to memory local to the bpf program. */ @@ -682,6 +721,18 @@ enum bpf_type_flag { /* DYNPTR points to xdp_buff */ DYNPTR_TYPE_XDP = BIT(16 + BPF_BASE_TYPE_BITS), + /* Memory must be aligned on some architectures, used in combination with + * MEM_FIXED_SIZE. + */ + MEM_ALIGNED = BIT(17 + BPF_BASE_TYPE_BITS), + + /* MEM is being written to, often combined with MEM_UNINIT. Non-presence + * of MEM_WRITE means that MEM is only being read. MEM_WRITE without the + * MEM_UNINIT means that memory needs to be initialized since it is also + * read. + */ + MEM_WRITE = BIT(18 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; @@ -710,6 +761,7 @@ enum bpf_arg_type { * on eBPF program stack */ ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */ + ARG_PTR_TO_ARENA, ARG_CONST_SIZE, /* number of bytes accessed from memory */ ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */ @@ -718,8 +770,6 @@ enum bpf_arg_type { ARG_ANYTHING, /* any (initialized) argument is ok */ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ - ARG_PTR_TO_INT, /* pointer to int */ - ARG_PTR_TO_LONG, /* pointer to long */ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */ ARG_PTR_TO_RINGBUF_MEM, /* pointer to dynamically reserved ringbuf memory */ @@ -730,7 +780,7 @@ enum bpf_arg_type { ARG_PTR_TO_STACK, /* pointer to stack */ ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ - ARG_PTR_TO_KPTR, /* pointer to referenced kptr */ + ARG_KPTR_XCHG_DEST, /* pointer to destination that kptrs are bpf_kptr_xchg'd into */ ARG_PTR_TO_DYNPTR, /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */ __BPF_ARG_TYPE_MAX, @@ -741,10 +791,10 @@ enum bpf_arg_type { ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET, ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK, ARG_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID, - /* pointer to memory does not need to be initialized, helper function must fill - * all bytes or clear them in error case. + /* Pointer to memory does not need to be initialized, since helper function + * fills all bytes or clears them in error case. */ - ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | ARG_PTR_TO_MEM, + ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | MEM_WRITE | ARG_PTR_TO_MEM, /* Pointer to valid memory of size known at compile time. */ ARG_PTR_TO_FIXED_SIZE_MEM = MEM_FIXED_SIZE | ARG_PTR_TO_MEM, @@ -794,6 +844,12 @@ struct bpf_func_proto { bool gpl_only; bool pkt_access; bool might_sleep; + /* set to true if helper follows contract for llvm + * attribute bpf_fastcall: + * - void functions do not scratch r0 + * - functions taking N arguments scratch only registers r1-rN + */ + bool allow_fastcall; enum bpf_return_type ret_type; union { struct { @@ -876,11 +932,8 @@ enum bpf_reg_type { * additional context, assume the value is non-null. */ PTR_TO_BTF_ID, - /* PTR_TO_BTF_ID_OR_NULL points to a kernel struct that has not - * been checked for null. Used primarily to inform the verifier - * an explicit null check is required for this struct. - */ PTR_TO_MEM, /* reg points to valid memory region */ + PTR_TO_ARENA, PTR_TO_BUF, /* reg points to a read/write buffer */ PTR_TO_FUNC, /* reg points to a bpf program function */ CONST_PTR_TO_DYNPTR, /* reg points to a const struct bpf_dynptr */ @@ -891,6 +944,10 @@ enum bpf_reg_type { PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCKET, PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCK_COMMON, PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | PTR_TO_TCP_SOCK, + /* PTR_TO_BTF_ID_OR_NULL points to a kernel struct that has not + * been checked for null. Used primarily to inform the verifier + * an explicit null check is required for this struct. + */ PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | PTR_TO_BTF_ID, /* This must be the last entry. Its purpose is to ensure the enum is @@ -905,6 +962,7 @@ static_assert(__BPF_REG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); */ struct bpf_insn_access_aux { enum bpf_reg_type reg_type; + bool is_ldsx; union { int ctx_field_size; struct { @@ -913,6 +971,7 @@ struct bpf_insn_access_aux { }; }; struct bpf_verifier_log *log; /* for verbose logs */ + bool is_retval; /* is accessing function return value ? */ }; static inline void @@ -951,6 +1010,8 @@ struct bpf_verifier_ops { struct bpf_insn_access_aux *info); int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, const struct bpf_prog *prog); + int (*gen_epilogue)(struct bpf_insn *insn, const struct bpf_prog *prog, + s16 ctx_stack_off); int (*gen_ld_abs)(const struct bpf_insn *orig, struct bpf_insn *insn_buf); u32 (*convert_ctx_access)(enum bpf_access_type type, @@ -1112,8 +1173,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i void *func_addr); void *arch_alloc_bpf_trampoline(unsigned int size); void arch_free_bpf_trampoline(void *image, unsigned int size); -void arch_protect_bpf_trampoline(void *image, unsigned int size); -void arch_unprotect_bpf_trampoline(void *image, unsigned int size); +int __must_check arch_protect_bpf_trampoline(void *image, unsigned int size); int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, void *func_addr); @@ -1185,7 +1245,6 @@ struct bpf_trampoline { int progs_cnt[BPF_TRAMP_MAX]; /* Executable image of trampoline */ struct bpf_tramp_image *cur_image; - struct module *mod; }; struct bpf_attach_target_info { @@ -1263,10 +1322,15 @@ int bpf_dynptr_check_size(u32 size); u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len); void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len); +bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr); #ifdef CONFIG_BPF_JIT -int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); -int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); +int bpf_trampoline_link_prog(struct bpf_tramp_link *link, + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog); +int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog); struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); @@ -1338,7 +1402,8 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to); /* Called only from JIT-enabled code, so there's no need for stubs. */ -void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym); +void bpf_image_ksym_init(void *data, unsigned int size, struct bpf_ksym *ksym); +void bpf_image_ksym_add(struct bpf_ksym *ksym); void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); @@ -1347,12 +1412,14 @@ void bpf_jit_uncharge_modmem(u32 size); bool bpf_prog_has_trampoline(const struct bpf_prog *prog); #else static inline int bpf_trampoline_link_prog(struct bpf_tramp_link *link, - struct bpf_trampoline *tr) + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog) { return -ENOTSUPP; } static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, - struct bpf_trampoline *tr) + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog) { return -ENOTSUPP; } @@ -1412,6 +1479,7 @@ struct bpf_jit_poke_descriptor { struct bpf_ctx_arg_aux { u32 offset; enum bpf_reg_type reg_type; + struct btf *btf; u32 btf_id; }; @@ -1440,6 +1508,7 @@ struct bpf_prog_aux { u32 max_rdwr_access; struct btf *attach_btf; const struct bpf_ctx_arg_aux *ctx_arg_info; + void __percpu *priv_stack_ptr; struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */ struct bpf_prog *dst_prog; struct bpf_trampoline *dst_trampoline; @@ -1451,11 +1520,18 @@ struct bpf_prog_aux { bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ bool attach_tracing_prog; /* true if tracing another tracing program */ bool func_proto_unreliable; - bool sleepable; bool tail_call_reachable; bool xdp_has_frags; bool exception_cb; bool exception_boundary; + bool is_extended; /* true if extended by freplace program */ + bool jits_use_priv_stack; + bool priv_stack_requested; + bool changes_pkt_data; + u64 prog_array_member_cnt; /* counts how many times as member of prog_array */ + struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */ + struct bpf_arena *arena; + void (*recursion_detected)(struct bpf_prog *prog); /* callback if recursion is detected */ /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ @@ -1485,6 +1561,7 @@ struct bpf_prog_aux { #ifdef CONFIG_SECURITY void *security; #endif + struct bpf_token *token; struct bpf_prog_offload *offload; struct btf *btf; struct bpf_func_info *func_info; @@ -1535,7 +1612,8 @@ struct bpf_prog { enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ call_get_func_ip:1, /* Do we call get_func_ip() */ - tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */ + tstamp_type_access:1, /* Accessed __sk_buff->tstamp_type */ + sleepable:1; /* BPF program is sleepable */ enum bpf_prog_type type; /* Type of BPF program */ enum bpf_attach_type expected_attach_type; /* For some prog types */ u32 len; /* Number of filter blocks */ @@ -1568,12 +1646,33 @@ struct bpf_link { enum bpf_link_type type; const struct bpf_link_ops *ops; struct bpf_prog *prog; - struct work_struct work; + /* whether BPF link itself has "sleepable" semantics, which can differ + * from underlying BPF program having a "sleepable" semantics, as BPF + * link's semantics is determined by target attach hook + */ + bool sleepable; + /* rcu is used before freeing, work can be used to schedule that + * RCU-based freeing before that, so they never overlap + */ + union { + struct rcu_head rcu; + struct work_struct work; + }; }; struct bpf_link_ops { void (*release)(struct bpf_link *link); + /* deallocate link resources callback, called without RCU grace period + * waiting + */ void (*dealloc)(struct bpf_link *link); + /* deallocate link resources callback, called after RCU grace period; + * if either the underlying BPF program is sleepable or BPF link's + * target hook is sleepable, we'll go through tasks trace RCU GP and + * then "classic" RCU GP; this need for chaining tasks trace and + * classic RCU GPs is designated by setting bpf_link->sleepable flag + */ + void (*dealloc_deferred)(struct bpf_link *link); int (*detach)(struct bpf_link *link); int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog, struct bpf_prog *old_prog); @@ -1582,6 +1681,7 @@ struct bpf_link_ops { struct bpf_link_info *info); int (*update_map)(struct bpf_link *link, struct bpf_map *new_map, struct bpf_map *old_map); + __poll_t (*poll)(struct file *file, struct poll_table_struct *pts); }; struct bpf_tramp_link { @@ -1602,6 +1702,12 @@ struct bpf_tracing_link { struct bpf_prog *tgt_prog; }; +struct bpf_raw_tp_link { + struct bpf_link link; + struct bpf_raw_event_map *btp; + u64 cookie; +}; + struct bpf_link_primer { struct bpf_link *link; struct file *file; @@ -1609,6 +1715,31 @@ struct bpf_link_primer { u32 id; }; +struct bpf_mount_opts { + kuid_t uid; + kgid_t gid; + umode_t mode; + + /* BPF token-related delegation options */ + u64 delegate_cmds; + u64 delegate_maps; + u64 delegate_progs; + u64 delegate_attachs; +}; + +struct bpf_token { + struct work_struct work; + atomic64_t refcnt; + struct user_namespace *userns; + u64 allowed_cmds; + u64 allowed_maps; + u64 allowed_progs; + u64 allowed_attachs; +#ifdef CONFIG_SECURITY + void *security; +#endif +}; + struct bpf_struct_ops_value; struct btf_member; @@ -1669,32 +1800,80 @@ struct bpf_struct_ops { int (*init_member)(const struct btf_type *t, const struct btf_member *member, void *kdata, const void *udata); - int (*reg)(void *kdata); - void (*unreg)(void *kdata); - int (*update)(void *kdata, void *old_kdata); + int (*reg)(void *kdata, struct bpf_link *link); + void (*unreg)(void *kdata, struct bpf_link *link); + int (*update)(void *kdata, void *old_kdata, struct bpf_link *link); int (*validate)(void *kdata); - const struct btf_type *type; - const struct btf_type *value_type; + void *cfi_stubs; + struct module *owner; const char *name; struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS]; +}; + +/* Every member of a struct_ops type has an instance even a member is not + * an operator (function pointer). The "info" field will be assigned to + * prog->aux->ctx_arg_info of BPF struct_ops programs to provide the + * argument information required by the verifier to verify the program. + * + * btf_ctx_access() will lookup prog->aux->ctx_arg_info to find the + * corresponding entry for an given argument. + */ +struct bpf_struct_ops_arg_info { + struct bpf_ctx_arg_aux *info; + u32 cnt; +}; + +struct bpf_struct_ops_desc { + struct bpf_struct_ops *st_ops; + + const struct btf_type *type; + const struct btf_type *value_type; u32 type_id; u32 value_id; - void *cfi_stubs; + + /* Collection of argument information for each member */ + struct bpf_struct_ops_arg_info *arg_info; +}; + +enum bpf_struct_ops_state { + BPF_STRUCT_OPS_STATE_INIT, + BPF_STRUCT_OPS_STATE_INUSE, + BPF_STRUCT_OPS_STATE_TOBEFREE, + BPF_STRUCT_OPS_STATE_READY, +}; + +struct bpf_struct_ops_common_value { + refcount_t refcnt; + enum bpf_struct_ops_state state; }; #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) +/* This macro helps developer to register a struct_ops type and generate + * type information correctly. Developers should use this macro to register + * a struct_ops type instead of calling __register_bpf_struct_ops() directly. + */ +#define register_bpf_struct_ops(st_ops, type) \ + ({ \ + struct bpf_struct_ops_##type { \ + struct bpf_struct_ops_common_value common; \ + struct type data ____cacheline_aligned_in_smp; \ + }; \ + BTF_TYPE_EMIT(struct bpf_struct_ops_##type); \ + __register_bpf_struct_ops(st_ops); \ + }) #define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA)) -const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id); -void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log); bool bpf_struct_ops_get(const void *kdata); void bpf_struct_ops_put(const void *kdata); +int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff); int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, void *value); int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, struct bpf_tramp_link *link, const struct btf_func_model *model, void *stub_func, - void *image, void *image_end); + void **image, u32 *image_off, + bool allow_alloc); +void bpf_struct_ops_image_free(void *image); static inline bool bpf_try_module_get(const void *data, struct module *owner) { if (owner == BPF_MODULE_OWNER) @@ -1727,15 +1906,13 @@ struct bpf_dummy_ops { int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); #endif +int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, + struct btf *btf, + struct bpf_verifier_log *log); +void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map); +void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc); #else -static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) -{ - return NULL; -} -static inline void bpf_struct_ops_init(struct btf *btf, - struct bpf_verifier_log *log) -{ -} +#define register_bpf_struct_ops(st_ops, type) ({ (void *)(st_ops); 0; }) static inline bool bpf_try_module_get(const void *data, struct module *owner) { return try_module_get(owner); @@ -1744,6 +1921,10 @@ static inline void bpf_module_put(const void *data, struct module *owner) { module_put(owner); } +static inline int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff) +{ + return -ENOTSUPP; +} static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, void *value) @@ -1754,6 +1935,13 @@ static inline int bpf_struct_ops_link_create(union bpf_attr *attr) { return -EOPNOTSUPP; } +static inline void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map) +{ +} + +static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc) +{ +} #endif @@ -2006,43 +2194,40 @@ bpf_prog_run_array(const struct bpf_prog_array *array, * rcu-protected dynamically sized maps. */ static __always_inline u32 -bpf_prog_run_array_uprobe(const struct bpf_prog_array __rcu *array_rcu, +bpf_prog_run_array_uprobe(const struct bpf_prog_array *array, const void *ctx, bpf_prog_run_fn run_prog) { const struct bpf_prog_array_item *item; const struct bpf_prog *prog; - const struct bpf_prog_array *array; struct bpf_run_ctx *old_run_ctx; struct bpf_trace_run_ctx run_ctx; u32 ret = 1; might_fault(); + RCU_LOCKDEP_WARN(!rcu_read_lock_trace_held(), "no rcu lock held"); + + if (unlikely(!array)) + return ret; - rcu_read_lock_trace(); migrate_disable(); run_ctx.is_uprobe = true; - array = rcu_dereference_check(array_rcu, rcu_read_lock_trace_held()); - if (unlikely(!array)) - goto out; old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); item = &array->items[0]; while ((prog = READ_ONCE(item->prog))) { - if (!prog->aux->sleepable) + if (!prog->sleepable) rcu_read_lock(); run_ctx.bpf_cookie = item->bpf_cookie; ret &= run_prog(prog, ctx); item++; - if (!prog->aux->sleepable) + if (!prog->sleepable) rcu_read_unlock(); } bpf_reset_run_ctx(old_run_ctx); -out: migrate_enable(); - rcu_read_unlock_trace(); return ret; } @@ -2068,6 +2253,7 @@ static inline void bpf_enable_instrumentation(void) migrate_enable(); } +extern const struct super_operations bpf_super_ops; extern const struct file_operations bpf_map_fops; extern const struct file_operations bpf_prog_fops; extern const struct file_operations bpf_iter_fops; @@ -2106,12 +2292,39 @@ void bpf_map_free_record(struct bpf_map *map); struct btf_record *btf_record_dup(const struct btf_record *rec); bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b); void bpf_obj_free_timer(const struct btf_record *rec, void *obj); +void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj); void bpf_obj_free_fields(const struct btf_record *rec, void *obj); void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu); struct bpf_map *bpf_map_get(u32 ufd); struct bpf_map *bpf_map_get_with_uref(u32 ufd); -struct bpf_map *__bpf_map_get(struct fd f); + +/* + * The __bpf_map_get() and __btf_get_by_fd() functions parse a file + * descriptor and return a corresponding map or btf object. + * Their names are double underscored to emphasize the fact that they + * do not increase refcnt. To also increase refcnt use corresponding + * bpf_map_get() and btf_get_by_fd() functions. + */ + +static inline struct bpf_map *__bpf_map_get(struct fd f) +{ + if (fd_empty(f)) + return ERR_PTR(-EBADF); + if (unlikely(fd_file(f)->f_op != &bpf_map_fops)) + return ERR_PTR(-EINVAL); + return fd_file(f)->private_data; +} + +static inline struct btf *__btf_get_by_fd(struct fd f) +{ + if (fd_empty(f)) + return ERR_PTR(-EBADF); + if (unlikely(fd_file(f)->f_op != &btf_fops)) + return ERR_PTR(-EINVAL); + return fd_file(f)->private_data; +} + void bpf_map_inc(struct bpf_map *map); void bpf_map_inc_with_uref(struct bpf_map *map); struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref); @@ -2135,7 +2348,9 @@ int generic_map_delete_batch(struct bpf_map *map, struct bpf_map *bpf_map_get_curr_or_next(u32 *id); struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); -#ifdef CONFIG_MEMCG_KMEM +int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid, + unsigned long nr_pages, struct page **page_array); +#ifdef CONFIG_MEMCG void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, int node); void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags); @@ -2144,31 +2359,18 @@ void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, gfp_t flags); #else -static inline void * -bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, - int node) -{ - return kmalloc_node(size, flags, node); -} - -static inline void * -bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) -{ - return kzalloc(size, flags); -} - -static inline void * -bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, gfp_t flags) -{ - return kvcalloc(n, size, flags); -} - -static inline void __percpu * -bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, - gfp_t flags) -{ - return __alloc_percpu_gfp(size, align, flags); -} +/* + * These specialized allocators have to be macros for their allocations to be + * accounted separately (to have separate alloc_tag). + */ +#define bpf_map_kmalloc_node(_map, _size, _flags, _node) \ + kmalloc_node(_size, _flags, _node) +#define bpf_map_kzalloc(_map, _size, _flags) \ + kzalloc(_size, _flags) +#define bpf_map_kvcalloc(_map, _n, _size, _flags) \ + kvcalloc(_n, _size, _flags) +#define bpf_map_alloc_percpu(_map, _size, _align, _flags) \ + __alloc_percpu_gfp(_size, _align, _flags) #endif static inline int @@ -2202,24 +2404,26 @@ static inline void bpf_map_dec_elem_count(struct bpf_map *map) extern int sysctl_unprivileged_bpf_disabled; -static inline bool bpf_allow_ptr_leaks(void) +bool bpf_token_capable(const struct bpf_token *token, int cap); + +static inline bool bpf_allow_ptr_leaks(const struct bpf_token *token) { - return perfmon_capable(); + return bpf_token_capable(token, CAP_PERFMON); } -static inline bool bpf_allow_uninit_stack(void) +static inline bool bpf_allow_uninit_stack(const struct bpf_token *token) { - return perfmon_capable(); + return bpf_token_capable(token, CAP_PERFMON); } -static inline bool bpf_bypass_spec_v1(void) +static inline bool bpf_bypass_spec_v1(const struct bpf_token *token) { - return cpu_mitigations_off() || perfmon_capable(); + return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON); } -static inline bool bpf_bypass_spec_v4(void) +static inline bool bpf_bypass_spec_v4(const struct bpf_token *token) { - return cpu_mitigations_off() || perfmon_capable(); + return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON); } int bpf_map_new_fd(struct bpf_map *map, int flags); @@ -2227,17 +2431,34 @@ int bpf_prog_new_fd(struct bpf_prog *prog); void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops, struct bpf_prog *prog); +void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type, + const struct bpf_link_ops *ops, struct bpf_prog *prog, + bool sleepable); int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer); int bpf_link_settle(struct bpf_link_primer *primer); void bpf_link_cleanup(struct bpf_link_primer *primer); void bpf_link_inc(struct bpf_link *link); +struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link); void bpf_link_put(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link); struct bpf_link *bpf_link_get_from_fd(u32 ufd); struct bpf_link *bpf_link_get_curr_or_next(u32 *id); +void bpf_token_inc(struct bpf_token *token); +void bpf_token_put(struct bpf_token *token); +int bpf_token_create(union bpf_attr *attr); +struct bpf_token *bpf_token_get_from_fd(u32 ufd); + +bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd); +bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type); +bool bpf_token_allow_prog_type(const struct bpf_token *token, + enum bpf_prog_type prog_type, + enum bpf_attach_type attach_type); + int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname); int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags); +struct inode *bpf_get_inode(struct super_block *sb, const struct inode *dir, + umode_t mode); #define BPF_ITER_FUNC_PREFIX "bpf_iter_" #define DEFINE_BPF_ITER_FUNC(target, args...) \ @@ -2377,7 +2598,7 @@ struct sk_buff; struct bpf_dtab_netdev; struct bpf_cpu_map_entry; -void __dev_flush(void); +void __dev_flush(struct list_head *flush_list); int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx); int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, @@ -2385,12 +2606,12 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_map *map, bool exclude_ingress); int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, - struct bpf_prog *xdp_prog); + const struct bpf_prog *xdp_prog); int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, - struct bpf_prog *xdp_prog, struct bpf_map *map, - bool exclude_ingress); + const struct bpf_prog *xdp_prog, + struct bpf_map *map, bool exclude_ingress); -void __cpu_map_flush(void); +void __cpu_map_flush(struct list_head *flush_list); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf, struct net_device *dev_rx); int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu, @@ -2472,11 +2693,14 @@ int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *pr struct btf *btf, const struct btf_type *t); const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt, int comp_idx, const char *tag_key); +int btf_find_next_decl_tag(const struct btf *btf, const struct btf_type *pt, + int comp_idx, const char *tag_key, int last_id); struct bpf_prog *bpf_prog_by_id(u32 id); struct bpf_link *bpf_link_by_id(u32 id); -const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); +const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id, + const struct bpf_prog *prog); void bpf_task_storage_free(struct task_struct *task); void bpf_cgrp_storage_free(struct cgroup *cgroup); bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); @@ -2524,8 +2748,6 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr); -bool dev_check_flush(void); -bool cpu_map_check_flush(void); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -2567,6 +2789,12 @@ static inline void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, { } +static inline void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type, + const struct bpf_link_ops *ops, struct bpf_prog *prog, + bool sleepable) +{ +} + static inline int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer) { @@ -2586,6 +2814,11 @@ static inline void bpf_link_inc(struct bpf_link *link) { } +static inline struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) +{ + return NULL; +} + static inline void bpf_link_put(struct bpf_link *link) { } @@ -2595,7 +2828,25 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags) return -EOPNOTSUPP; } -static inline void __dev_flush(void) +static inline bool bpf_token_capable(const struct bpf_token *token, int cap) +{ + return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN)); +} + +static inline void bpf_token_inc(struct bpf_token *token) +{ +} + +static inline void bpf_token_put(struct bpf_token *token) +{ +} + +static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void __dev_flush(struct list_head *flush_list) { } @@ -2628,20 +2879,20 @@ struct sk_buff; static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, - struct bpf_prog *xdp_prog) + const struct bpf_prog *xdp_prog) { return 0; } static inline int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, - struct bpf_prog *xdp_prog, struct bpf_map *map, - bool exclude_ingress) + const struct bpf_prog *xdp_prog, + struct bpf_map *map, bool exclude_ingress) { return 0; } -static inline void __cpu_map_flush(void) +static inline void __cpu_map_flush(struct list_head *flush_list) { } @@ -2718,7 +2969,7 @@ static inline int btf_struct_access(struct bpf_verifier_log *log, } static inline const struct bpf_func_proto * -bpf_base_func_proto(enum bpf_func_id func_id) +bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { return NULL; } @@ -2790,8 +3041,7 @@ bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr) return ret; } -void __bpf_free_used_btfs(struct bpf_prog_aux *aux, - struct btf_mod_pair *used_btfs, u32 len); +void __bpf_free_used_btfs(struct btf_mod_pair *used_btfs, u32 len); static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) @@ -2869,6 +3119,7 @@ int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); int sock_map_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); +int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog); void sock_map_unhash(struct sock *sk); void sock_map_destroy(struct sock *sk); @@ -2967,6 +3218,11 @@ static inline int sock_map_bpf_prog_query(const union bpf_attr *attr, { return -EINVAL; } + +static inline int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_BPF_SYSCALL */ #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ @@ -3034,7 +3290,9 @@ extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; extern const struct bpf_func_proto bpf_get_current_comm_proto; extern const struct bpf_func_proto bpf_get_stackid_proto; extern const struct bpf_func_proto bpf_get_stack_proto; +extern const struct bpf_func_proto bpf_get_stack_sleepable_proto; extern const struct bpf_func_proto bpf_get_task_stack_proto; +extern const struct bpf_func_proto bpf_get_task_stack_sleepable_proto; extern const struct bpf_func_proto bpf_get_stackid_proto_pe; extern const struct bpf_func_proto bpf_get_stack_proto_pe; extern const struct bpf_func_proto bpf_sock_map_update_proto; @@ -3042,6 +3300,7 @@ extern const struct bpf_func_proto bpf_sock_hash_update_proto; extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto; extern const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto; extern const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto; +extern const struct bpf_func_proto bpf_current_task_under_cgroup_proto; extern const struct bpf_func_proto bpf_msg_redirect_hash_proto; extern const struct bpf_func_proto bpf_msg_redirect_map_proto; extern const struct bpf_func_proto bpf_sk_redirect_hash_proto; @@ -3116,8 +3375,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, struct bpf_insn *insn_buf, struct bpf_prog *prog, u32 *target_size); -int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, - struct bpf_dynptr_kern *ptr); +int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, + struct bpf_dynptr *ptr); #else static inline bool bpf_sock_common_is_valid_access(int off, int size, enum bpf_access_type type, @@ -3139,8 +3398,8 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, { return 0; } -static inline int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, - struct bpf_dynptr_kern *ptr) +static inline int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, + struct bpf_dynptr *ptr) { return -EOPNOTSUPP; } |