summaryrefslogtreecommitdiff
path: root/include/linux/bpf.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/bpf.h')
-rw-r--r--include/linux/bpf.h1462
1 files changed, 1225 insertions, 237 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 634d37a599fa..6498be4c44f8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -7,6 +7,7 @@
#include <uapi/linux/bpf.h>
#include <uapi/linux/filter.h>
+#include <crypto/sha2.h>
#include <linux/workqueue.h>
#include <linux/file.h>
#include <linux/percpu.h>
@@ -28,6 +29,9 @@
#include <linux/btf.h>
#include <linux/rcupdate_trace.h>
#include <linux/static_call.h>
+#include <linux/memcontrol.h>
+#include <linux/cfi.h>
+#include <asm/rqspinlock.h>
struct bpf_verifier_env;
struct bpf_verifier_log;
@@ -35,6 +39,7 @@ struct perf_event;
struct bpf_prog;
struct bpf_prog_aux;
struct bpf_map;
+struct bpf_arena;
struct sock;
struct seq_file;
struct btf;
@@ -50,11 +55,15 @@ struct module;
struct bpf_func_state;
struct ftrace_ops;
struct cgroup;
+struct bpf_token;
+struct user_namespace;
+struct super_block;
+struct inode;
extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
extern struct kobject *btf_kobj;
-extern struct bpf_mem_alloc bpf_global_ma;
+extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
extern bool bpf_global_ma_set;
typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
@@ -95,17 +104,22 @@ struct bpf_map_ops {
/* funcs callable from userspace and from eBPF programs */
void *(*map_lookup_elem)(struct bpf_map *map, void *key);
- int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
- int (*map_delete_elem)(struct bpf_map *map, void *key);
- int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
- int (*map_pop_elem)(struct bpf_map *map, void *value);
- int (*map_peek_elem)(struct bpf_map *map, void *value);
+ long (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
+ long (*map_delete_elem)(struct bpf_map *map, void *key);
+ long (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
+ long (*map_pop_elem)(struct bpf_map *map, void *value);
+ long (*map_peek_elem)(struct bpf_map *map, void *value);
void *(*map_lookup_percpu_elem)(struct bpf_map *map, void *key, u32 cpu);
+ int (*map_get_hash)(struct bpf_map *map, u32 hash_buf_size, void *hash_buf);
/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
int fd);
- void (*map_fd_put_ptr)(void *ptr);
+ /* If need_defer is true, the implementation should guarantee that
+ * the to-be-put element is still alive before the bpf program, which
+ * may manipulate it, exists.
+ */
+ void (*map_fd_put_ptr)(struct bpf_map *map, void *ptr, bool need_defer);
int (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
u32 (*map_fd_sys_lookup_elem)(void *ptr);
void (*map_seq_show_elem)(struct bpf_map *map, void *key,
@@ -129,6 +143,9 @@ struct bpf_map_ops {
int (*map_mmap)(struct bpf_map *map, struct vm_area_struct *vma);
__poll_t (*map_poll)(struct bpf_map *map, struct file *filp,
struct poll_table_struct *pts);
+ unsigned long (*map_get_unmapped_area)(struct file *filep, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags);
/* Functions called by bpf_local_storage maps */
int (*map_local_storage_charge)(struct bpf_local_storage_map *smap,
@@ -138,7 +155,7 @@ struct bpf_map_ops {
struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner);
/* Misc helpers.*/
- int (*map_redirect)(struct bpf_map *map, u64 key, u64 flags);
+ long (*map_redirect)(struct bpf_map *map, u64 key, u64 flags);
/* map_meta_equal must be implemented for maps that can be
* used as an inner map. It is a runtime check to ensure
@@ -156,10 +173,12 @@ struct bpf_map_ops {
int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee);
- int (*map_for_each_callback)(struct bpf_map *map,
+ long (*map_for_each_callback)(struct bpf_map *map,
bpf_callback_t callback_fn,
void *callback_ctx, u64 flags);
+ u64 (*map_mem_usage)(const struct bpf_map *map);
+
/* BTF id of struct allocated by map_alloc */
int *map_btf_id;
@@ -168,8 +187,8 @@ struct bpf_map_ops {
};
enum {
- /* Support at most 10 fields in a BTF type */
- BTF_FIELDS_MAX = 10,
+ /* Support at most 11 fields in a BTF type */
+ BTF_FIELDS_MAX = 11,
};
enum btf_field_type {
@@ -177,19 +196,48 @@ enum btf_field_type {
BPF_TIMER = (1 << 1),
BPF_KPTR_UNREF = (1 << 2),
BPF_KPTR_REF = (1 << 3),
- BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF,
- BPF_LIST_HEAD = (1 << 4),
- BPF_LIST_NODE = (1 << 5),
+ BPF_KPTR_PERCPU = (1 << 4),
+ BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF | BPF_KPTR_PERCPU,
+ BPF_LIST_HEAD = (1 << 5),
+ BPF_LIST_NODE = (1 << 6),
+ BPF_RB_ROOT = (1 << 7),
+ BPF_RB_NODE = (1 << 8),
+ BPF_GRAPH_NODE = BPF_RB_NODE | BPF_LIST_NODE,
+ BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD,
+ BPF_REFCOUNT = (1 << 9),
+ BPF_WORKQUEUE = (1 << 10),
+ BPF_UPTR = (1 << 11),
+ BPF_RES_SPIN_LOCK = (1 << 12),
+ BPF_TASK_WORK = (1 << 13),
};
+enum bpf_cgroup_storage_type {
+ BPF_CGROUP_STORAGE_SHARED,
+ BPF_CGROUP_STORAGE_PERCPU,
+ __BPF_CGROUP_STORAGE_MAX
+#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX
+};
+
+#ifdef CONFIG_CGROUP_BPF
+# define for_each_cgroup_storage_type(stype) \
+ for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
+#else
+# define for_each_cgroup_storage_type(stype) for (; false; )
+#endif /* CONFIG_CGROUP_BPF */
+
+typedef void (*btf_dtor_kfunc_t)(void *);
+
struct btf_field_kptr {
struct btf *btf;
struct module *module;
+ /* dtor used if btf_is_kernel(btf), otherwise the type is
+ * program-allocated, dtor is NULL, and __bpf_obj_drop_impl is used
+ */
btf_dtor_kfunc_t dtor;
u32 btf_id;
};
-struct btf_field_list_head {
+struct btf_field_graph_root {
struct btf *btf;
u32 value_btf_id;
u32 node_offset;
@@ -198,10 +246,11 @@ struct btf_field_list_head {
struct btf_field {
u32 offset;
+ u32 size;
enum btf_field_type type;
union {
struct btf_field_kptr kptr;
- struct btf_field_list_head list_head;
+ struct btf_field_graph_root graph_root;
};
};
@@ -209,21 +258,43 @@ struct btf_record {
u32 cnt;
u32 field_mask;
int spin_lock_off;
+ int res_spin_lock_off;
int timer_off;
+ int wq_off;
+ int refcount_off;
+ int task_work_off;
struct btf_field fields[];
};
-struct btf_field_offs {
- u32 cnt;
- u32 field_off[BTF_FIELDS_MAX];
- u8 field_sz[BTF_FIELDS_MAX];
+/* Non-opaque version of bpf_rb_node in uapi/linux/bpf.h */
+struct bpf_rb_node_kern {
+ struct rb_node rb_node;
+ void *owner;
+} __attribute__((aligned(8)));
+
+/* Non-opaque version of bpf_list_node in uapi/linux/bpf.h */
+struct bpf_list_node_kern {
+ struct list_head list_head;
+ void *owner;
+} __attribute__((aligned(8)));
+
+/* 'Ownership' of program-containing map is claimed by the first program
+ * that is going to use this map or by the first program which FD is
+ * stored in the map to make sure that all callers and callees have the
+ * same prog type, JITed flag and xdp_has_frags flag.
+ */
+struct bpf_map_owner {
+ enum bpf_prog_type type;
+ bool jited;
+ bool xdp_has_frags;
+ u64 storage_cookie[MAX_BPF_CGROUP_STORAGE_TYPE];
+ const struct btf_type *attach_func_proto;
+ enum bpf_attach_type expected_attach_type;
};
struct bpf_map {
- /* The first two cachelines with read-mostly members of which some
- * are also accessed in fast-path (e.g. ops, max_entries).
- */
- const struct bpf_map_ops *ops ____cacheline_aligned;
+ u8 sha[SHA256_DIGEST_SIZE];
+ const struct bpf_map_ops *ops;
struct bpf_map *inner_map_meta;
#ifdef CONFIG_SECURITY
void *security;
@@ -241,32 +312,29 @@ struct bpf_map {
u32 btf_value_type_id;
u32 btf_vmlinux_value_type_id;
struct btf *btf;
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
struct obj_cgroup *objcg;
#endif
char name[BPF_OBJ_NAME_LEN];
- struct btf_field_offs *field_offs;
- /* The 3rd and 4th cacheline with misc members to avoid false sharing
- * particularly with refcounting.
- */
- atomic64_t refcnt ____cacheline_aligned;
- atomic64_t usercnt;
- struct work_struct work;
struct mutex freeze_mutex;
+ atomic64_t refcnt;
+ atomic64_t usercnt;
+ /* rcu is used before freeing and work is only used during freeing */
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
atomic64_t writecnt;
- /* 'Ownership' of program-containing map is claimed by the first program
- * that is going to use this map or by the first program which FD is
- * stored in the map to make sure that all callers and callees have the
- * same prog type, JITed flag and xdp_has_frags flag.
- */
- struct {
- spinlock_t lock;
- enum bpf_prog_type type;
- bool jited;
- bool xdp_has_frags;
- } owner;
+ spinlock_t owner_lock;
+ struct bpf_map_owner *owner;
bool bypass_spec_v1;
bool frozen; /* write-once; write-protected by freeze_mutex */
+ bool free_after_mult_rcu_gp;
+ bool free_after_rcu_gp;
+ atomic64_t sleepable_refcnt;
+ s64 __percpu *elem_count;
+ u64 cookie; /* write-once */
+ char *excl_prog_sha;
};
static inline const char *btf_field_type_name(enum btf_field_type type)
@@ -274,35 +342,71 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
switch (type) {
case BPF_SPIN_LOCK:
return "bpf_spin_lock";
+ case BPF_RES_SPIN_LOCK:
+ return "bpf_res_spin_lock";
case BPF_TIMER:
return "bpf_timer";
+ case BPF_WORKQUEUE:
+ return "bpf_wq";
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
return "kptr";
+ case BPF_KPTR_PERCPU:
+ return "percpu_kptr";
+ case BPF_UPTR:
+ return "uptr";
case BPF_LIST_HEAD:
return "bpf_list_head";
case BPF_LIST_NODE:
return "bpf_list_node";
+ case BPF_RB_ROOT:
+ return "bpf_rb_root";
+ case BPF_RB_NODE:
+ return "bpf_rb_node";
+ case BPF_REFCOUNT:
+ return "bpf_refcount";
+ case BPF_TASK_WORK:
+ return "bpf_task_work";
default:
WARN_ON_ONCE(1);
return "unknown";
}
}
+#if IS_ENABLED(CONFIG_DEBUG_KERNEL)
+#define BPF_WARN_ONCE(cond, format...) WARN_ONCE(cond, format)
+#else
+#define BPF_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
+#endif
+
static inline u32 btf_field_type_size(enum btf_field_type type)
{
switch (type) {
case BPF_SPIN_LOCK:
return sizeof(struct bpf_spin_lock);
+ case BPF_RES_SPIN_LOCK:
+ return sizeof(struct bpf_res_spin_lock);
case BPF_TIMER:
return sizeof(struct bpf_timer);
+ case BPF_WORKQUEUE:
+ return sizeof(struct bpf_wq);
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
+ case BPF_UPTR:
return sizeof(u64);
case BPF_LIST_HEAD:
return sizeof(struct bpf_list_head);
case BPF_LIST_NODE:
return sizeof(struct bpf_list_node);
+ case BPF_RB_ROOT:
+ return sizeof(struct bpf_rb_root);
+ case BPF_RB_NODE:
+ return sizeof(struct bpf_rb_node);
+ case BPF_REFCOUNT:
+ return sizeof(struct bpf_refcount);
+ case BPF_TASK_WORK:
+ return sizeof(struct bpf_task_work);
default:
WARN_ON_ONCE(1);
return 0;
@@ -314,21 +418,68 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
switch (type) {
case BPF_SPIN_LOCK:
return __alignof__(struct bpf_spin_lock);
+ case BPF_RES_SPIN_LOCK:
+ return __alignof__(struct bpf_res_spin_lock);
case BPF_TIMER:
return __alignof__(struct bpf_timer);
+ case BPF_WORKQUEUE:
+ return __alignof__(struct bpf_wq);
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
+ case BPF_UPTR:
return __alignof__(u64);
case BPF_LIST_HEAD:
return __alignof__(struct bpf_list_head);
case BPF_LIST_NODE:
return __alignof__(struct bpf_list_node);
+ case BPF_RB_ROOT:
+ return __alignof__(struct bpf_rb_root);
+ case BPF_RB_NODE:
+ return __alignof__(struct bpf_rb_node);
+ case BPF_REFCOUNT:
+ return __alignof__(struct bpf_refcount);
+ case BPF_TASK_WORK:
+ return __alignof__(struct bpf_task_work);
default:
WARN_ON_ONCE(1);
return 0;
}
}
+static inline void bpf_obj_init_field(const struct btf_field *field, void *addr)
+{
+ memset(addr, 0, field->size);
+
+ switch (field->type) {
+ case BPF_REFCOUNT:
+ refcount_set((refcount_t *)addr, 1);
+ break;
+ case BPF_RB_NODE:
+ RB_CLEAR_NODE((struct rb_node *)addr);
+ break;
+ case BPF_LIST_HEAD:
+ case BPF_LIST_NODE:
+ INIT_LIST_HEAD((struct list_head *)addr);
+ break;
+ case BPF_RB_ROOT:
+ /* RB_ROOT_CACHED 0-inits, no need to do anything after memset */
+ case BPF_SPIN_LOCK:
+ case BPF_RES_SPIN_LOCK:
+ case BPF_TIMER:
+ case BPF_WORKQUEUE:
+ case BPF_KPTR_UNREF:
+ case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
+ case BPF_UPTR:
+ case BPF_TASK_WORK:
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return;
+ }
+}
+
static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_field_type type)
{
if (IS_ERR_OR_NULL(rec))
@@ -336,19 +487,26 @@ static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_f
return rec->field_mask & type;
}
-static inline void bpf_obj_init(const struct btf_field_offs *foffs, void *obj)
+static inline void bpf_obj_init(const struct btf_record *rec, void *obj)
{
int i;
- if (!foffs)
+ if (IS_ERR_OR_NULL(rec))
return;
- for (i = 0; i < foffs->cnt; i++)
- memset(obj + foffs->field_off[i], 0, foffs->field_sz[i]);
+ for (i = 0; i < rec->cnt; i++)
+ bpf_obj_init_field(&rec->fields[i], obj + rec->fields[i].offset);
}
+/* 'dst' must be a temporary buffer and should not point to memory that is being
+ * used in parallel by a bpf program or bpf syscall, otherwise the access from
+ * the bpf program or bpf syscall may be corrupted by the reinitialization,
+ * leading to weird problems. Even 'dst' is newly-allocated from bpf memory
+ * allocator, it is still possible for 'dst' to be used in parallel by a bpf
+ * program or bpf syscall.
+ */
static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
{
- bpf_obj_init(map->field_offs, dst);
+ bpf_obj_init(map->record, dst);
}
/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
@@ -364,18 +522,18 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
size /= sizeof(long);
while (size--)
- *ldst++ = *lsrc++;
+ data_race(*ldst++ = *lsrc++);
}
/* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */
-static inline void bpf_obj_memcpy(struct btf_field_offs *foffs,
+static inline void bpf_obj_memcpy(struct btf_record *rec,
void *dst, void *src, u32 size,
bool long_memcpy)
{
u32 curr_off = 0;
int i;
- if (likely(!foffs)) {
+ if (IS_ERR_OR_NULL(rec)) {
if (long_memcpy)
bpf_long_memcpy(dst, src, round_up(size, 8));
else
@@ -383,57 +541,81 @@ static inline void bpf_obj_memcpy(struct btf_field_offs *foffs,
return;
}
- for (i = 0; i < foffs->cnt; i++) {
- u32 next_off = foffs->field_off[i];
+ for (i = 0; i < rec->cnt; i++) {
+ u32 next_off = rec->fields[i].offset;
u32 sz = next_off - curr_off;
memcpy(dst + curr_off, src + curr_off, sz);
- curr_off += foffs->field_sz[i] + sz;
+ curr_off += rec->fields[i].size + sz;
}
memcpy(dst + curr_off, src + curr_off, size - curr_off);
}
static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
{
- bpf_obj_memcpy(map->field_offs, dst, src, map->value_size, false);
+ bpf_obj_memcpy(map->record, dst, src, map->value_size, false);
}
static inline void copy_map_value_long(struct bpf_map *map, void *dst, void *src)
{
- bpf_obj_memcpy(map->field_offs, dst, src, map->value_size, true);
+ bpf_obj_memcpy(map->record, dst, src, map->value_size, true);
+}
+
+static inline void bpf_obj_swap_uptrs(const struct btf_record *rec, void *dst, void *src)
+{
+ unsigned long *src_uptr, *dst_uptr;
+ const struct btf_field *field;
+ int i;
+
+ if (!btf_record_has_field(rec, BPF_UPTR))
+ return;
+
+ for (i = 0, field = rec->fields; i < rec->cnt; i++, field++) {
+ if (field->type != BPF_UPTR)
+ continue;
+
+ src_uptr = src + field->offset;
+ dst_uptr = dst + field->offset;
+ swap(*src_uptr, *dst_uptr);
+ }
}
-static inline void bpf_obj_memzero(struct btf_field_offs *foffs, void *dst, u32 size)
+static inline void bpf_obj_memzero(struct btf_record *rec, void *dst, u32 size)
{
u32 curr_off = 0;
int i;
- if (likely(!foffs)) {
+ if (IS_ERR_OR_NULL(rec)) {
memset(dst, 0, size);
return;
}
- for (i = 0; i < foffs->cnt; i++) {
- u32 next_off = foffs->field_off[i];
+ for (i = 0; i < rec->cnt; i++) {
+ u32 next_off = rec->fields[i].offset;
u32 sz = next_off - curr_off;
memset(dst + curr_off, 0, sz);
- curr_off += foffs->field_sz[i] + sz;
+ curr_off += rec->fields[i].size + sz;
}
memset(dst + curr_off, 0, size - curr_off);
}
static inline void zero_map_value(struct bpf_map *map, void *dst)
{
- bpf_obj_memzero(map->field_offs, dst, map->value_size);
+ bpf_obj_memzero(map->record, dst, map->value_size);
}
void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
bool lock_src);
void bpf_timer_cancel_and_free(void *timer);
+void bpf_wq_cancel_and_free(void *timer);
+void bpf_task_work_cancel_and_free(void *timer);
void bpf_list_head_free(const struct btf_field *field, void *list_head,
struct bpf_spin_lock *spin_lock);
-
+void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
+ struct bpf_spin_lock *spin_lock);
+u64 bpf_arena_get_kern_vm_start(struct bpf_arena *arena);
+u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena);
int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size);
struct bpf_offload_dev;
@@ -481,6 +663,16 @@ int map_check_no_btf(const struct bpf_map *map,
bool bpf_map_meta_equal(const struct bpf_map *meta0,
const struct bpf_map *meta1);
+static inline bool bpf_map_has_internal_structs(struct bpf_map *map)
+{
+ return btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK);
+}
+
+void bpf_map_free_internal_structs(struct bpf_map *map, void *obj);
+
+int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags,
+ struct bpf_dynptr *ptr__uninit);
+
extern const struct bpf_map_ops bpf_map_offload_ops;
/* bpf_type_flag contains a set of flags that are applicable to the values of
@@ -527,6 +719,7 @@ enum bpf_type_flag {
*/
PTR_UNTRUSTED = BIT(6 + BPF_BASE_TYPE_BITS),
+ /* MEM can be uninitialized. */
MEM_UNINIT = BIT(7 + BPF_BASE_TYPE_BITS),
/* DYNPTR points to memory local to the bpf program. */
@@ -575,11 +768,42 @@ enum bpf_type_flag {
/* MEM is tagged with rcu and memory access needs rcu_read_lock protection. */
MEM_RCU = BIT(13 + BPF_BASE_TYPE_BITS),
+ /* Used to tag PTR_TO_BTF_ID | MEM_ALLOC references which are non-owning.
+ * Currently only valid for linked-list and rbtree nodes. If the nodes
+ * have a bpf_refcount_field, they must be tagged MEM_RCU as well.
+ */
+ NON_OWN_REF = BIT(14 + BPF_BASE_TYPE_BITS),
+
+ /* DYNPTR points to sk_buff */
+ DYNPTR_TYPE_SKB = BIT(15 + BPF_BASE_TYPE_BITS),
+
+ /* DYNPTR points to xdp_buff */
+ DYNPTR_TYPE_XDP = BIT(16 + BPF_BASE_TYPE_BITS),
+
+ /* Memory must be aligned on some architectures, used in combination with
+ * MEM_FIXED_SIZE.
+ */
+ MEM_ALIGNED = BIT(17 + BPF_BASE_TYPE_BITS),
+
+ /* MEM is being written to, often combined with MEM_UNINIT. Non-presence
+ * of MEM_WRITE means that MEM is only being read. MEM_WRITE without the
+ * MEM_UNINIT means that memory needs to be initialized since it is also
+ * read.
+ */
+ MEM_WRITE = BIT(18 + BPF_BASE_TYPE_BITS),
+
+ /* DYNPTR points to skb_metadata_end()-skb_metadata_len() */
+ DYNPTR_TYPE_SKB_META = BIT(19 + BPF_BASE_TYPE_BITS),
+
+ /* DYNPTR points to file */
+ DYNPTR_TYPE_FILE = BIT(20 + BPF_BASE_TYPE_BITS),
+
__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};
-#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF)
+#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \
+ | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META | DYNPTR_TYPE_FILE)
/* Max number of base types. */
#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS)
@@ -602,6 +826,7 @@ enum bpf_arg_type {
* on eBPF program stack
*/
ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */
+ ARG_PTR_TO_ARENA,
ARG_CONST_SIZE, /* number of bytes accessed from memory */
ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */
@@ -610,8 +835,6 @@ enum bpf_arg_type {
ARG_ANYTHING, /* any (initialized) argument is ok */
ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */
ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
- ARG_PTR_TO_INT, /* pointer to int */
- ARG_PTR_TO_LONG, /* pointer to long */
ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */
ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */
ARG_PTR_TO_RINGBUF_MEM, /* pointer to dynamically reserved ringbuf memory */
@@ -622,7 +845,7 @@ enum bpf_arg_type {
ARG_PTR_TO_STACK, /* pointer to stack */
ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */
ARG_PTR_TO_TIMER, /* pointer to bpf_timer */
- ARG_PTR_TO_KPTR, /* pointer to referenced kptr */
+ ARG_KPTR_XCHG_DEST, /* pointer to destination that kptrs are bpf_kptr_xchg'd into */
ARG_PTR_TO_DYNPTR, /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */
__BPF_ARG_TYPE_MAX,
@@ -633,10 +856,10 @@ enum bpf_arg_type {
ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET,
ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK,
ARG_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID,
- /* pointer to memory does not need to be initialized, helper function must fill
- * all bytes or clear them in error case.
+ /* Pointer to memory does not need to be initialized, since helper function
+ * fills all bytes or clears them in error case.
*/
- ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | ARG_PTR_TO_MEM,
+ ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | MEM_WRITE | ARG_PTR_TO_MEM,
/* Pointer to valid memory of size known at compile time. */
ARG_PTR_TO_FIXED_SIZE_MEM = MEM_FIXED_SIZE | ARG_PTR_TO_MEM,
@@ -686,6 +909,12 @@ struct bpf_func_proto {
bool gpl_only;
bool pkt_access;
bool might_sleep;
+ /* set to true if helper follows contract for llvm
+ * attribute bpf_fastcall:
+ * - void functions do not scratch r0
+ * - functions taking N arguments scratch only registers r1-rN
+ */
+ bool allow_fastcall;
enum bpf_return_type ret_type;
union {
struct {
@@ -768,13 +997,11 @@ enum bpf_reg_type {
* additional context, assume the value is non-null.
*/
PTR_TO_BTF_ID,
- /* PTR_TO_BTF_ID_OR_NULL points to a kernel struct that has not
- * been checked for null. Used primarily to inform the verifier
- * an explicit null check is required for this struct.
- */
PTR_TO_MEM, /* reg points to valid memory region */
+ PTR_TO_ARENA,
PTR_TO_BUF, /* reg points to a read/write buffer */
PTR_TO_FUNC, /* reg points to a bpf program function */
+ PTR_TO_INSN, /* reg points to a bpf program instruction */
CONST_PTR_TO_DYNPTR, /* reg points to a const struct bpf_dynptr */
__BPF_REG_TYPE_MAX,
@@ -783,6 +1010,10 @@ enum bpf_reg_type {
PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCKET,
PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCK_COMMON,
PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | PTR_TO_TCP_SOCK,
+ /* PTR_TO_BTF_ID_OR_NULL points to a kernel struct that has not
+ * been checked for null. Used primarily to inform the verifier
+ * an explicit null check is required for this struct.
+ */
PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | PTR_TO_BTF_ID,
/* This must be the last entry. Its purpose is to ensure the enum is
@@ -797,14 +1028,17 @@ static_assert(__BPF_REG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT);
*/
struct bpf_insn_access_aux {
enum bpf_reg_type reg_type;
+ bool is_ldsx;
union {
int ctx_field_size;
struct {
struct btf *btf;
u32 btf_id;
+ u32 ref_obj_id;
};
};
struct bpf_verifier_log *log; /* for verbose logs */
+ bool is_retval; /* is accessing function return value ? */
};
static inline void
@@ -813,10 +1047,29 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size)
aux->ctx_field_size = size;
}
+static bool bpf_is_ldimm64(const struct bpf_insn *insn)
+{
+ return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
static inline bool bpf_pseudo_func(const struct bpf_insn *insn)
{
- return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
- insn->src_reg == BPF_PSEUDO_FUNC;
+ return bpf_is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
+}
+
+/* Given a BPF_ATOMIC instruction @atomic_insn, return true if it is an
+ * atomic load or store, and false if it is a read-modify-write instruction.
+ */
+static inline bool
+bpf_atomic_is_load_store(const struct bpf_insn *atomic_insn)
+{
+ switch (atomic_insn->imm) {
+ case BPF_LOAD_ACQ:
+ case BPF_STORE_REL:
+ return true;
+ default:
+ return false;
+ }
}
struct bpf_prog_ops {
@@ -839,6 +1092,8 @@ struct bpf_verifier_ops {
struct bpf_insn_access_aux *info);
int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
const struct bpf_prog *prog);
+ int (*gen_epilogue)(struct bpf_insn *insn, const struct bpf_prog *prog,
+ s16 ctx_stack_off);
int (*gen_ld_abs)(const struct bpf_insn *orig,
struct bpf_insn *insn_buf);
u32 (*convert_ctx_access)(enum bpf_access_type type,
@@ -847,8 +1102,7 @@ struct bpf_verifier_ops {
struct bpf_prog *prog, u32 *target_size);
int (*btf_struct_access)(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
- int off, int size, enum bpf_access_type atype,
- u32 *next_btf_id, enum bpf_type_flag *flag);
+ int off, int size);
};
struct bpf_prog_offload_ops {
@@ -878,14 +1132,6 @@ struct bpf_prog_offload {
u32 jited_len;
};
-enum bpf_cgroup_storage_type {
- BPF_CGROUP_STORAGE_SHARED,
- BPF_CGROUP_STORAGE_PERCPU,
- __BPF_CGROUP_STORAGE_MAX
-};
-
-#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX
-
/* The longest tracepoint has 12 args.
* See include/trace/bpf_probe.h
*/
@@ -896,11 +1142,15 @@ enum bpf_cgroup_storage_type {
*/
#define MAX_BPF_FUNC_REG_ARGS 5
-/* The argument is a structure. */
+/* The argument is a structure or a union. */
#define BTF_FMODEL_STRUCT_ARG BIT(0)
+/* The argument is signed. */
+#define BTF_FMODEL_SIGNED_ARG BIT(1)
+
struct btf_func_model {
u8 ret_size;
+ u8 ret_flags;
u8 nr_args;
u8 arg_size[MAX_BPF_FUNC_ARGS];
u8 arg_flags[MAX_BPF_FUNC_ARGS];
@@ -936,10 +1186,32 @@ struct btf_func_model {
*/
#define BPF_TRAMP_F_SHARE_IPMODIFY BIT(6)
+/* Indicate that current trampoline is in a tail call context. Then, it has to
+ * cache and restore tail_call_cnt to avoid infinite tail call loop.
+ */
+#define BPF_TRAMP_F_TAIL_CALL_CTX BIT(7)
+
+/*
+ * Indicate the trampoline should be suitable to receive indirect calls;
+ * without this indirectly calling the generated code can result in #UD/#CP,
+ * depending on the CFI options.
+ *
+ * Used by bpf_struct_ops.
+ *
+ * Incompatible with FENTRY usage, overloads @func_addr argument.
+ */
+#define BPF_TRAMP_F_INDIRECT BIT(8)
+
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
* bytes on x86.
*/
-#define BPF_MAX_TRAMP_LINKS 38
+enum {
+#if defined(__s390x__)
+ BPF_MAX_TRAMP_LINKS = 27,
+#else
+ BPF_MAX_TRAMP_LINKS = 38,
+#endif
+};
struct bpf_tramp_links {
struct bpf_tramp_link *links[BPF_MAX_TRAMP_LINKS];
@@ -969,10 +1241,16 @@ struct bpf_tramp_run_ctx;
* fexit = a set of program to run after original function
*/
struct bpf_tramp_image;
-int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end,
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
struct bpf_tramp_links *tlinks,
- void *orig_call);
+ void *func_addr);
+void *arch_alloc_bpf_trampoline(unsigned int size);
+void arch_free_bpf_trampoline(void *image, unsigned int size);
+int __must_check arch_protect_bpf_trampoline(void *image, unsigned int size);
+int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
+ struct bpf_tramp_links *tlinks, void *func_addr);
+
u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
struct bpf_tramp_run_ctx *run_ctx);
void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start,
@@ -986,6 +1264,18 @@ typedef void (*bpf_trampoline_exit_t)(struct bpf_prog *prog, u64 start,
bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog);
bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog);
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP
+static inline bool bpf_trampoline_use_jmp(u64 flags)
+{
+ return flags & BPF_TRAMP_F_CALL_ORIG && !(flags & BPF_TRAMP_F_SKIP_FRAME);
+}
+#else
+static inline bool bpf_trampoline_use_jmp(u64 flags)
+{
+ return false;
+}
+#endif
+
struct bpf_ksym {
unsigned long start;
unsigned long end;
@@ -1005,6 +1295,7 @@ enum bpf_tramp_prog_type {
struct bpf_tramp_image {
void *image;
+ int size;
struct bpf_ksym ksym;
struct percpu_ref pcref;
void *ip_after_call;
@@ -1040,13 +1331,12 @@ struct bpf_trampoline {
int progs_cnt[BPF_TRAMP_MAX];
/* Executable image of trampoline */
struct bpf_tramp_image *cur_image;
- u64 selector;
- struct module *mod;
};
struct bpf_attach_target_info {
struct btf_func_model fmodel;
long tgt_addr;
+ struct module *tgt_mod;
const char *tgt_name;
const struct btf_type *tgt_type;
};
@@ -1074,7 +1364,11 @@ struct bpf_dispatcher {
#endif
};
-static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func(
+#ifndef __bpfcall
+#define __bpfcall __nocfi
+#endif
+
+static __always_inline __bpfcall unsigned int bpf_dispatcher_nop_func(
const void *ctx,
const struct bpf_insn *insnsi,
bpf_func_t bpf_func)
@@ -1082,9 +1376,65 @@ static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func(
return bpf_func(ctx, insnsi);
}
+/* the implementation of the opaque uapi struct bpf_dynptr */
+struct bpf_dynptr_kern {
+ void *data;
+ /* Size represents the number of usable bytes of dynptr data.
+ * If for example the offset is at 4 for a local dynptr whose data is
+ * of type u64, the number of usable bytes is 4.
+ *
+ * The upper 8 bits are reserved. It is as follows:
+ * Bits 0 - 23 = size
+ * Bits 24 - 30 = dynptr type
+ * Bit 31 = whether dynptr is read-only
+ */
+ u32 size;
+ u32 offset;
+} __aligned(8);
+
+enum bpf_dynptr_type {
+ BPF_DYNPTR_TYPE_INVALID,
+ /* Points to memory that is local to the bpf program */
+ BPF_DYNPTR_TYPE_LOCAL,
+ /* Underlying data is a ringbuf record */
+ BPF_DYNPTR_TYPE_RINGBUF,
+ /* Underlying data is a sk_buff */
+ BPF_DYNPTR_TYPE_SKB,
+ /* Underlying data is a xdp_buff */
+ BPF_DYNPTR_TYPE_XDP,
+ /* Points to skb_metadata_end()-skb_metadata_len() */
+ BPF_DYNPTR_TYPE_SKB_META,
+ /* Underlying data is a file */
+ BPF_DYNPTR_TYPE_FILE,
+};
+
+int bpf_dynptr_check_size(u64 size);
+u64 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr);
+const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u64 len);
+void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u64 len);
+bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr);
+int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u64 offset,
+ void *src, u64 len, u64 flags);
+void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u64 offset,
+ void *buffer__opt, u64 buffer__szk);
+
+static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u64 offset, u64 len)
+{
+ u64 size = __bpf_dynptr_size(ptr);
+
+ if (len > size || offset > size - len)
+ return -E2BIG;
+
+ return 0;
+}
+
#ifdef CONFIG_BPF_JIT
-int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
-int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
+int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
+ struct bpf_trampoline *tr,
+ struct bpf_prog *tgt_prog);
+int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
+ struct bpf_trampoline *tr,
+ struct bpf_prog *tgt_prog);
struct bpf_trampoline *bpf_trampoline_get(u64 key,
struct bpf_attach_target_info *tgt_info);
void bpf_trampoline_put(struct bpf_trampoline *tr);
@@ -1133,7 +1483,7 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
#define DEFINE_BPF_DISPATCHER(name) \
__BPF_DISPATCHER_SC(name); \
- noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \
+ noinline __bpfcall unsigned int bpf_dispatcher_##name##_func( \
const void *ctx, \
const struct bpf_insn *insnsi, \
bpf_func_t bpf_func) \
@@ -1156,7 +1506,8 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
struct bpf_prog *to);
/* Called only from JIT-enabled code, so there's no need for stubs. */
-void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym);
+void bpf_image_ksym_init(void *data, unsigned int size, struct bpf_ksym *ksym);
+void bpf_image_ksym_add(struct bpf_ksym *ksym);
void bpf_image_ksym_del(struct bpf_ksym *ksym);
void bpf_ksym_add(struct bpf_ksym *ksym);
void bpf_ksym_del(struct bpf_ksym *ksym);
@@ -1165,19 +1516,21 @@ void bpf_jit_uncharge_modmem(u32 size);
bool bpf_prog_has_trampoline(const struct bpf_prog *prog);
#else
static inline int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
- struct bpf_trampoline *tr)
+ struct bpf_trampoline *tr,
+ struct bpf_prog *tgt_prog)
{
return -ENOTSUPP;
}
static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
- struct bpf_trampoline *tr)
+ struct bpf_trampoline *tr,
+ struct bpf_prog *tgt_prog)
{
return -ENOTSUPP;
}
static inline struct bpf_trampoline *bpf_trampoline_get(u64 key,
struct bpf_attach_target_info *tgt_info)
{
- return ERR_PTR(-EOPNOTSUPP);
+ return NULL;
}
static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
#define DEFINE_BPF_DISPATCHER(name)
@@ -1200,6 +1553,8 @@ static inline bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
struct bpf_func_info_aux {
u16 linkage;
bool unreliable;
+ bool called : 1;
+ bool verified : 1;
};
enum bpf_jit_poke_reason {
@@ -1228,7 +1583,10 @@ struct bpf_jit_poke_descriptor {
struct bpf_ctx_arg_aux {
u32 offset;
enum bpf_reg_type reg_type;
+ struct btf *btf;
u32 btf_id;
+ u32 ref_obj_id;
+ bool refcounted;
};
struct btf_mod_pair {
@@ -1238,6 +1596,37 @@ struct btf_mod_pair {
struct bpf_kfunc_desc_tab;
+enum bpf_stream_id {
+ BPF_STDOUT = 1,
+ BPF_STDERR = 2,
+};
+
+struct bpf_stream_elem {
+ struct llist_node node;
+ int total_len;
+ int consumed_len;
+ char str[];
+};
+
+enum {
+ /* 100k bytes */
+ BPF_STREAM_MAX_CAPACITY = 100000ULL,
+};
+
+struct bpf_stream {
+ atomic_t capacity;
+ struct llist_head log; /* list of in-flight stream elements in LIFO order */
+
+ struct mutex lock; /* lock protecting backlog_{head,tail} */
+ struct llist_node *backlog_head; /* list of in-flight stream elements in FIFO order */
+ struct llist_node *backlog_tail; /* tail of the list above */
+};
+
+struct bpf_stream_stage {
+ struct llist_head log;
+ int len;
+};
+
struct bpf_prog_aux {
atomic64_t refcnt;
u32 used_map_cnt;
@@ -1248,37 +1637,59 @@ struct bpf_prog_aux {
u32 stack_depth;
u32 id;
u32 func_cnt; /* used by non-func prog as the number of func progs */
+ u32 real_func_cnt; /* includes hidden progs, only used for JIT and freeing progs */
u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
u32 attach_btf_id; /* in-kernel BTF type id to attach to */
+ u32 attach_st_ops_member_off;
u32 ctx_arg_info_size;
u32 max_rdonly_access;
u32 max_rdwr_access;
+ u32 subprog_start;
struct btf *attach_btf;
- const struct bpf_ctx_arg_aux *ctx_arg_info;
+ struct bpf_ctx_arg_aux *ctx_arg_info;
+ void __percpu *priv_stack_ptr;
struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */
struct bpf_prog *dst_prog;
struct bpf_trampoline *dst_trampoline;
enum bpf_prog_type saved_dst_prog_type;
enum bpf_attach_type saved_dst_attach_type;
bool verifier_zext; /* Zero extensions has been inserted by verifier. */
- bool offload_requested;
+ bool dev_bound; /* Program is bound to the netdev. */
+ bool offload_requested; /* Program is bound and offloaded to the netdev. */
bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
+ bool attach_tracing_prog; /* true if tracing another tracing program */
bool func_proto_unreliable;
- bool sleepable;
bool tail_call_reachable;
bool xdp_has_frags;
+ bool exception_cb;
+ bool exception_boundary;
+ bool is_extended; /* true if extended by freplace program */
+ bool jits_use_priv_stack;
+ bool priv_stack_requested;
+ bool changes_pkt_data;
+ bool might_sleep;
+ bool kprobe_write_ctx;
+ u64 prog_array_member_cnt; /* counts how many times as member of prog_array */
+ struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */
+ struct bpf_arena *arena;
+ void (*recursion_detected)(struct bpf_prog *prog); /* callback if recursion is detected */
/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
const struct btf_type *attach_func_proto;
/* function name for valid attach_btf_id */
const char *attach_func_name;
struct bpf_prog **func;
+ struct bpf_prog_aux *main_prog_aux;
void *jit_data; /* JIT specific data. arch dependent */
struct bpf_jit_poke_descriptor *poke_tab;
struct bpf_kfunc_desc_tab *kfunc_tab;
struct bpf_kfunc_btf_tab *kfunc_btf_tab;
u32 size_poke_tab;
+#ifdef CONFIG_FINEIBT
+ struct bpf_ksym ksym_prefix;
+#endif
struct bpf_ksym ksym;
const struct bpf_prog_ops *ops;
+ const struct bpf_struct_ops *st_ops;
struct bpf_map **used_maps;
struct mutex used_maps_mutex; /* mutex for used_maps and used_map_cnt */
struct btf_mod_pair *used_btfs;
@@ -1289,9 +1700,11 @@ struct bpf_prog_aux {
int cgroup_atype; /* enum cgroup_bpf_attach_type */
struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
char name[BPF_OBJ_NAME_LEN];
+ u64 (*bpf_exception_cb)(u64 cookie, u64 sp, u64 bp, u64, u64);
#ifdef CONFIG_SECURITY
void *security;
#endif
+ struct bpf_token *token;
struct bpf_prog_offload *offload;
struct btf *btf;
struct bpf_func_info *func_info;
@@ -1318,12 +1731,14 @@ struct bpf_prog_aux {
* main prog always has linfo_idx == 0
*/
u32 linfo_idx;
+ struct module *mod;
u32 num_exentries;
struct exception_table_entry *extable;
union {
struct work_struct work;
struct rcu_head rcu;
};
+ struct bpf_stream stream[2];
};
struct bpf_prog {
@@ -1341,12 +1756,16 @@ struct bpf_prog {
enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */
call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */
call_get_func_ip:1, /* Do we call get_func_ip() */
- tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */
+ tstamp_type_access:1, /* Accessed __sk_buff->tstamp_type */
+ sleepable:1; /* BPF program is sleepable */
enum bpf_prog_type type; /* Type of BPF program */
enum bpf_attach_type expected_attach_type; /* For some prog types */
u32 len; /* Number of filter blocks */
u32 jited_len; /* Size of jited insns in bytes */
- u8 tag[BPF_TAG_SIZE];
+ union {
+ u8 digest[SHA256_DIGEST_SIZE];
+ u8 tag[BPF_TAG_SIZE];
+ };
struct bpf_prog_stats __percpu *stats;
int __percpu *active;
unsigned int (*bpf_func)(const void *ctx,
@@ -1374,18 +1793,46 @@ struct bpf_link {
enum bpf_link_type type;
const struct bpf_link_ops *ops;
struct bpf_prog *prog;
- struct work_struct work;
+
+ u32 flags;
+ enum bpf_attach_type attach_type;
+
+ /* rcu is used before freeing, work can be used to schedule that
+ * RCU-based freeing before that, so they never overlap
+ */
+ union {
+ struct rcu_head rcu;
+ struct work_struct work;
+ };
+ /* whether BPF link itself has "sleepable" semantics, which can differ
+ * from underlying BPF program having a "sleepable" semantics, as BPF
+ * link's semantics is determined by target attach hook
+ */
+ bool sleepable;
};
struct bpf_link_ops {
void (*release)(struct bpf_link *link);
+ /* deallocate link resources callback, called without RCU grace period
+ * waiting
+ */
void (*dealloc)(struct bpf_link *link);
+ /* deallocate link resources callback, called after RCU grace period;
+ * if either the underlying BPF program is sleepable or BPF link's
+ * target hook is sleepable, we'll go through tasks trace RCU GP and
+ * then "classic" RCU GP; this need for chaining tasks trace and
+ * classic RCU GPs is designated by setting bpf_link->sleepable flag
+ */
+ void (*dealloc_deferred)(struct bpf_link *link);
int (*detach)(struct bpf_link *link);
int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog,
struct bpf_prog *old_prog);
void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq);
int (*fill_link_info)(const struct bpf_link *link,
struct bpf_link_info *info);
+ int (*update_map)(struct bpf_link *link, struct bpf_map *new_map,
+ struct bpf_map *old_map);
+ __poll_t (*poll)(struct file *file, struct poll_table_struct *pts);
};
struct bpf_tramp_link {
@@ -1401,11 +1848,16 @@ struct bpf_shim_tramp_link {
struct bpf_tracing_link {
struct bpf_tramp_link link;
- enum bpf_attach_type attach_type;
struct bpf_trampoline *trampoline;
struct bpf_prog *tgt_prog;
};
+struct bpf_raw_tp_link {
+ struct bpf_link link;
+ struct bpf_raw_event_map *btp;
+ u64 cookie;
+};
+
struct bpf_link_primer {
struct bpf_link *link;
struct file *file;
@@ -1413,40 +1865,167 @@ struct bpf_link_primer {
u32 id;
};
+struct bpf_mount_opts {
+ kuid_t uid;
+ kgid_t gid;
+ umode_t mode;
+
+ /* BPF token-related delegation options */
+ u64 delegate_cmds;
+ u64 delegate_maps;
+ u64 delegate_progs;
+ u64 delegate_attachs;
+};
+
+struct bpf_token {
+ struct work_struct work;
+ atomic64_t refcnt;
+ struct user_namespace *userns;
+ u64 allowed_cmds;
+ u64 allowed_maps;
+ u64 allowed_progs;
+ u64 allowed_attachs;
+#ifdef CONFIG_SECURITY
+ void *security;
+#endif
+};
+
struct bpf_struct_ops_value;
struct btf_member;
#define BPF_STRUCT_OPS_MAX_NR_MEMBERS 64
+/**
+ * struct bpf_struct_ops - A structure of callbacks allowing a subsystem to
+ * define a BPF_MAP_TYPE_STRUCT_OPS map type composed
+ * of BPF_PROG_TYPE_STRUCT_OPS progs.
+ * @verifier_ops: A structure of callbacks that are invoked by the verifier
+ * when determining whether the struct_ops progs in the
+ * struct_ops map are valid.
+ * @init: A callback that is invoked a single time, and before any other
+ * callback, to initialize the structure. A nonzero return value means
+ * the subsystem could not be initialized.
+ * @check_member: When defined, a callback invoked by the verifier to allow
+ * the subsystem to determine if an entry in the struct_ops map
+ * is valid. A nonzero return value means that the map is
+ * invalid and should be rejected by the verifier.
+ * @init_member: A callback that is invoked for each member of the struct_ops
+ * map to allow the subsystem to initialize the member. A nonzero
+ * value means the member could not be initialized. This callback
+ * is exclusive with the @type, @type_id, @value_type, and
+ * @value_id fields.
+ * @reg: A callback that is invoked when the struct_ops map has been
+ * initialized and is being attached to. Zero means the struct_ops map
+ * has been successfully registered and is live. A nonzero return value
+ * means the struct_ops map could not be registered.
+ * @unreg: A callback that is invoked when the struct_ops map should be
+ * unregistered.
+ * @update: A callback that is invoked when the live struct_ops map is being
+ * updated to contain new values. This callback is only invoked when
+ * the struct_ops map is loaded with BPF_F_LINK. If not defined, the
+ * it is assumed that the struct_ops map cannot be updated.
+ * @validate: A callback that is invoked after all of the members have been
+ * initialized. This callback should perform static checks on the
+ * map, meaning that it should either fail or succeed
+ * deterministically. A struct_ops map that has been validated may
+ * not necessarily succeed in being registered if the call to @reg
+ * fails. For example, a valid struct_ops map may be loaded, but
+ * then fail to be registered due to there being another active
+ * struct_ops map on the system in the subsystem already. For this
+ * reason, if this callback is not defined, the check is skipped as
+ * the struct_ops map will have final verification performed in
+ * @reg.
+ * @cfi_stubs: Pointer to a structure of stub functions for CFI. These stubs
+ * provide the correct Control Flow Integrity hashes for the
+ * trampolines generated by BPF struct_ops.
+ * @owner: The module that owns this struct_ops. Used for module reference
+ * counting to ensure the module providing the struct_ops cannot be
+ * unloaded while in use.
+ * @name: The name of the struct bpf_struct_ops object.
+ * @func_models: Func models
+ */
struct bpf_struct_ops {
const struct bpf_verifier_ops *verifier_ops;
int (*init)(struct btf *btf);
int (*check_member)(const struct btf_type *t,
- const struct btf_member *member);
+ const struct btf_member *member,
+ const struct bpf_prog *prog);
int (*init_member)(const struct btf_type *t,
const struct btf_member *member,
void *kdata, const void *udata);
- int (*reg)(void *kdata);
- void (*unreg)(void *kdata);
- const struct btf_type *type;
- const struct btf_type *value_type;
+ int (*reg)(void *kdata, struct bpf_link *link);
+ void (*unreg)(void *kdata, struct bpf_link *link);
+ int (*update)(void *kdata, void *old_kdata, struct bpf_link *link);
+ int (*validate)(void *kdata);
+ void *cfi_stubs;
+ struct module *owner;
const char *name;
struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS];
+};
+
+/* Every member of a struct_ops type has an instance even a member is not
+ * an operator (function pointer). The "info" field will be assigned to
+ * prog->aux->ctx_arg_info of BPF struct_ops programs to provide the
+ * argument information required by the verifier to verify the program.
+ *
+ * btf_ctx_access() will lookup prog->aux->ctx_arg_info to find the
+ * corresponding entry for an given argument.
+ */
+struct bpf_struct_ops_arg_info {
+ struct bpf_ctx_arg_aux *info;
+ u32 cnt;
+};
+
+struct bpf_struct_ops_desc {
+ struct bpf_struct_ops *st_ops;
+
+ const struct btf_type *type;
+ const struct btf_type *value_type;
u32 type_id;
u32 value_id;
+
+ /* Collection of argument information for each member */
+ struct bpf_struct_ops_arg_info *arg_info;
+};
+
+enum bpf_struct_ops_state {
+ BPF_STRUCT_OPS_STATE_INIT,
+ BPF_STRUCT_OPS_STATE_INUSE,
+ BPF_STRUCT_OPS_STATE_TOBEFREE,
+ BPF_STRUCT_OPS_STATE_READY,
+};
+
+struct bpf_struct_ops_common_value {
+ refcount_t refcnt;
+ enum bpf_struct_ops_state state;
};
#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
+/* This macro helps developer to register a struct_ops type and generate
+ * type information correctly. Developers should use this macro to register
+ * a struct_ops type instead of calling __register_bpf_struct_ops() directly.
+ */
+#define register_bpf_struct_ops(st_ops, type) \
+ ({ \
+ struct bpf_struct_ops_##type { \
+ struct bpf_struct_ops_common_value common; \
+ struct type data ____cacheline_aligned_in_smp; \
+ }; \
+ BTF_TYPE_EMIT(struct bpf_struct_ops_##type); \
+ __register_bpf_struct_ops(st_ops); \
+ })
#define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA))
-const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id);
-void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log);
bool bpf_struct_ops_get(const void *kdata);
void bpf_struct_ops_put(const void *kdata);
+int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff);
int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
void *value);
int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
struct bpf_tramp_link *link,
const struct btf_func_model *model,
- void *image, void *image_end);
+ void *stub_func,
+ void **image, u32 *image_off,
+ bool allow_alloc);
+void bpf_struct_ops_image_free(void *image);
static inline bool bpf_try_module_get(const void *data, struct module *owner)
{
if (owner == BPF_MODULE_OWNER)
@@ -1461,6 +2040,8 @@ static inline void bpf_module_put(const void *data, struct module *owner)
else
module_put(owner);
}
+int bpf_struct_ops_link_create(union bpf_attr *attr);
+u32 bpf_struct_ops_id(const void *kdata);
#ifdef CONFIG_NET
/* Define it here to avoid the use of forward declaration */
@@ -1472,20 +2053,19 @@ struct bpf_dummy_ops {
int (*test_1)(struct bpf_dummy_ops_state *cb);
int (*test_2)(struct bpf_dummy_ops_state *cb, int a1, unsigned short a2,
char a3, unsigned long a4);
+ int (*test_sleepable)(struct bpf_dummy_ops_state *cb);
};
int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr);
#endif
+int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc,
+ struct btf *btf,
+ struct bpf_verifier_log *log);
+void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map);
+void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc);
#else
-static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id)
-{
- return NULL;
-}
-static inline void bpf_struct_ops_init(struct btf *btf,
- struct bpf_verifier_log *log)
-{
-}
+#define register_bpf_struct_ops(st_ops, type) ({ (void *)(st_ops); 0; })
static inline bool bpf_try_module_get(const void *data, struct module *owner)
{
return try_module_get(owner);
@@ -1494,21 +2074,42 @@ static inline void bpf_module_put(const void *data, struct module *owner)
{
module_put(owner);
}
+static inline int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff)
+{
+ return -ENOTSUPP;
+}
static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map,
void *key,
void *value)
{
return -EINVAL;
}
+static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
+{
+ return -EOPNOTSUPP;
+}
+static inline void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map)
+{
+}
+
+static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc)
+{
+}
+
#endif
+int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog,
+ const struct bpf_ctx_arg_aux *info, u32 cnt);
+
#if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM)
int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
- int cgroup_atype);
+ int cgroup_atype,
+ enum bpf_attach_type attach_type);
void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog);
#else
static inline int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
- int cgroup_atype)
+ int cgroup_atype,
+ enum bpf_attach_type attach_type)
{
return -EOPNOTSUPP;
}
@@ -1523,17 +2124,28 @@ struct bpf_array {
u32 index_mask;
struct bpf_array_aux *aux;
union {
- char value[0] __aligned(8);
- void *ptrs[0] __aligned(8);
- void __percpu *pptrs[0] __aligned(8);
+ DECLARE_FLEX_ARRAY(char, value) __aligned(8);
+ DECLARE_FLEX_ARRAY(void *, ptrs) __aligned(8);
+ DECLARE_FLEX_ARRAY(void __percpu *, pptrs) __aligned(8);
};
};
+/*
+ * The bpf_array_get_next_key() function may be used for all array-like
+ * maps, i.e., maps with u32 keys with range [0 ,..., max_entries)
+ */
+int bpf_array_get_next_key(struct bpf_map *map, void *key, void *next_key);
+
#define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */
#define MAX_TAIL_CALL_CNT 33
-/* Maximum number of loops for bpf_loop */
-#define BPF_MAX_LOOPS BIT(23)
+/* Maximum number of loops for bpf_loop and bpf_iter_num.
+ * It's enum to expose it (and thus make it discoverable) through BTF.
+ */
+enum {
+ BPF_MAX_LOOPS = 8 * 1024 * 1024,
+ BPF_MAX_TIMED_LOOPS = 0xffff,
+};
#define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \
BPF_F_RDONLY_PROG | \
@@ -1569,6 +2181,16 @@ static inline bool bpf_map_flags_access_ok(u32 access_flags)
(BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG);
}
+static inline struct bpf_map_owner *bpf_map_owner_alloc(struct bpf_map *map)
+{
+ return kzalloc(sizeof(*map->owner), GFP_ATOMIC);
+}
+
+static inline void bpf_map_owner_free(struct bpf_map *map)
+{
+ kfree(map->owner);
+}
+
struct bpf_event_entry {
struct perf_event *event;
struct file *perf_file;
@@ -1589,6 +2211,8 @@ int bpf_prog_calc_tag(struct bpf_prog *fp);
const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void);
+const struct bpf_func_proto *bpf_get_perf_event_read_value_proto(void);
+
typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
unsigned long off, unsigned long len);
typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type,
@@ -1672,6 +2296,7 @@ struct bpf_cg_run_ctx {
struct bpf_trace_run_ctx {
struct bpf_run_ctx run_ctx;
u64 bpf_cookie;
+ bool is_uprobe;
};
struct bpf_tramp_run_ctx {
@@ -1720,6 +2345,8 @@ bpf_prog_run_array(const struct bpf_prog_array *array,
if (unlikely(!array))
return ret;
+ run_ctx.is_uprobe = false;
+
migrate_disable();
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
item = &array->items[0];
@@ -1744,44 +2371,49 @@ bpf_prog_run_array(const struct bpf_prog_array *array,
* rcu-protected dynamically sized maps.
*/
static __always_inline u32
-bpf_prog_run_array_sleepable(const struct bpf_prog_array __rcu *array_rcu,
- const void *ctx, bpf_prog_run_fn run_prog)
+bpf_prog_run_array_uprobe(const struct bpf_prog_array *array,
+ const void *ctx, bpf_prog_run_fn run_prog)
{
const struct bpf_prog_array_item *item;
const struct bpf_prog *prog;
- const struct bpf_prog_array *array;
struct bpf_run_ctx *old_run_ctx;
struct bpf_trace_run_ctx run_ctx;
u32 ret = 1;
might_fault();
+ RCU_LOCKDEP_WARN(!rcu_read_lock_trace_held(), "no rcu lock held");
+
+ if (unlikely(!array))
+ return ret;
- rcu_read_lock_trace();
migrate_disable();
- array = rcu_dereference_check(array_rcu, rcu_read_lock_trace_held());
- if (unlikely(!array))
- goto out;
+ run_ctx.is_uprobe = true;
+
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
item = &array->items[0];
while ((prog = READ_ONCE(item->prog))) {
- if (!prog->aux->sleepable)
+ if (!prog->sleepable)
rcu_read_lock();
run_ctx.bpf_cookie = item->bpf_cookie;
ret &= run_prog(prog, ctx);
item++;
- if (!prog->aux->sleepable)
+ if (!prog->sleepable)
rcu_read_unlock();
}
bpf_reset_run_ctx(old_run_ctx);
-out:
migrate_enable();
- rcu_read_unlock_trace();
return ret;
}
+bool bpf_jit_bypass_spec_v1(void);
+bool bpf_jit_bypass_spec_v4(void);
+
+#define bpf_rcu_lock_held() \
+ (rcu_read_lock_held() || rcu_read_lock_trace_held() || rcu_read_lock_bh_held())
+
#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);
extern struct mutex bpf_stats_enabled_mutex;
@@ -1804,9 +2436,11 @@ static inline void bpf_enable_instrumentation(void)
migrate_enable();
}
+extern const struct super_operations bpf_super_ops;
extern const struct file_operations bpf_map_fops;
extern const struct file_operations bpf_prog_fops;
extern const struct file_operations bpf_iter_fops;
+extern const struct file_operations bpf_token_fops;
#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
extern const struct bpf_prog_ops _name ## _prog_ops; \
@@ -1833,22 +2467,52 @@ struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog);
void bpf_prog_put(struct bpf_prog *prog);
void bpf_prog_free_id(struct bpf_prog *prog);
-void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock);
+void bpf_map_free_id(struct bpf_map *map);
struct btf_field *btf_record_find(const struct btf_record *rec,
- u32 offset, enum btf_field_type type);
+ u32 offset, u32 field_mask);
void btf_record_free(struct btf_record *rec);
void bpf_map_free_record(struct bpf_map *map);
struct btf_record *btf_record_dup(const struct btf_record *rec);
bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b);
void bpf_obj_free_timer(const struct btf_record *rec, void *obj);
+void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj);
+void bpf_obj_free_task_work(const struct btf_record *rec, void *obj);
void bpf_obj_free_fields(const struct btf_record *rec, void *obj);
+void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu);
struct bpf_map *bpf_map_get(u32 ufd);
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
-struct bpf_map *__bpf_map_get(struct fd f);
+
+/*
+ * The __bpf_map_get() and __btf_get_by_fd() functions parse a file
+ * descriptor and return a corresponding map or btf object.
+ * Their names are double underscored to emphasize the fact that they
+ * do not increase refcnt. To also increase refcnt use corresponding
+ * bpf_map_get() and btf_get_by_fd() functions.
+ */
+
+static inline struct bpf_map *__bpf_map_get(struct fd f)
+{
+ if (fd_empty(f))
+ return ERR_PTR(-EBADF);
+ if (unlikely(fd_file(f)->f_op != &bpf_map_fops))
+ return ERR_PTR(-EINVAL);
+ return fd_file(f)->private_data;
+}
+
+static inline struct btf *__btf_get_by_fd(struct fd f)
+{
+ if (fd_empty(f))
+ return ERR_PTR(-EBADF);
+ if (unlikely(fd_file(f)->f_op != &btf_fops))
+ return ERR_PTR(-EINVAL);
+ return fd_file(f)->private_data;
+}
+
void bpf_map_inc(struct bpf_map *map);
void bpf_map_inc_with_uref(struct bpf_map *map);
+struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref);
struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map);
void bpf_map_put_with_uref(struct bpf_map *map);
void bpf_map_put(struct bpf_map *map);
@@ -1869,73 +2533,130 @@ int generic_map_delete_batch(struct bpf_map *map,
struct bpf_map *bpf_map_get_curr_or_next(u32 *id);
struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id);
-#ifdef CONFIG_MEMCG_KMEM
+
+int bpf_map_alloc_pages(const struct bpf_map *map, int nid,
+ unsigned long nr_pages, struct page **page_array);
+#ifdef CONFIG_MEMCG
void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
int node);
+void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags,
+ int node);
void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags);
+void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size,
+ gfp_t flags);
void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
size_t align, gfp_t flags);
#else
-static inline void *
-bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
- int node)
+/*
+ * These specialized allocators have to be macros for their allocations to be
+ * accounted separately (to have separate alloc_tag).
+ */
+#define bpf_map_kmalloc_node(_map, _size, _flags, _node) \
+ kmalloc_node(_size, _flags, _node)
+#define bpf_map_kmalloc_nolock(_map, _size, _flags, _node) \
+ kmalloc_nolock(_size, _flags, _node)
+#define bpf_map_kzalloc(_map, _size, _flags) \
+ kzalloc(_size, _flags)
+#define bpf_map_kvcalloc(_map, _n, _size, _flags) \
+ kvcalloc(_n, _size, _flags)
+#define bpf_map_alloc_percpu(_map, _size, _align, _flags) \
+ __alloc_percpu_gfp(_size, _align, _flags)
+#endif
+
+static inline int
+bpf_map_init_elem_count(struct bpf_map *map)
{
- return kmalloc_node(size, flags, node);
+ size_t size = sizeof(*map->elem_count), align = size;
+ gfp_t flags = GFP_USER | __GFP_NOWARN;
+
+ map->elem_count = bpf_map_alloc_percpu(map, size, align, flags);
+ if (!map->elem_count)
+ return -ENOMEM;
+
+ return 0;
}
-static inline void *
-bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
+static inline void
+bpf_map_free_elem_count(struct bpf_map *map)
{
- return kzalloc(size, flags);
+ free_percpu(map->elem_count);
}
-static inline void __percpu *
-bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align,
- gfp_t flags)
+static inline void bpf_map_inc_elem_count(struct bpf_map *map)
{
- return __alloc_percpu_gfp(size, align, flags);
+ this_cpu_inc(*map->elem_count);
+}
+
+static inline void bpf_map_dec_elem_count(struct bpf_map *map)
+{
+ this_cpu_dec(*map->elem_count);
}
-#endif
extern int sysctl_unprivileged_bpf_disabled;
-static inline bool bpf_allow_ptr_leaks(void)
+bool bpf_token_capable(const struct bpf_token *token, int cap);
+
+static inline bool bpf_allow_ptr_leaks(const struct bpf_token *token)
{
- return perfmon_capable();
+ return bpf_token_capable(token, CAP_PERFMON);
}
-static inline bool bpf_allow_uninit_stack(void)
+static inline bool bpf_allow_uninit_stack(const struct bpf_token *token)
{
- return perfmon_capable();
+ return bpf_token_capable(token, CAP_PERFMON);
}
-static inline bool bpf_bypass_spec_v1(void)
+static inline bool bpf_bypass_spec_v1(const struct bpf_token *token)
{
- return perfmon_capable();
+ return bpf_jit_bypass_spec_v1() ||
+ cpu_mitigations_off() ||
+ bpf_token_capable(token, CAP_PERFMON);
}
-static inline bool bpf_bypass_spec_v4(void)
+static inline bool bpf_bypass_spec_v4(const struct bpf_token *token)
{
- return perfmon_capable();
+ return bpf_jit_bypass_spec_v4() ||
+ cpu_mitigations_off() ||
+ bpf_token_capable(token, CAP_PERFMON);
}
int bpf_map_new_fd(struct bpf_map *map, int flags);
int bpf_prog_new_fd(struct bpf_prog *prog);
void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
- const struct bpf_link_ops *ops, struct bpf_prog *prog);
+ const struct bpf_link_ops *ops, struct bpf_prog *prog,
+ enum bpf_attach_type attach_type);
+void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type,
+ const struct bpf_link_ops *ops, struct bpf_prog *prog,
+ enum bpf_attach_type attach_type, bool sleepable);
int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer);
int bpf_link_settle(struct bpf_link_primer *primer);
void bpf_link_cleanup(struct bpf_link_primer *primer);
void bpf_link_inc(struct bpf_link *link);
+struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link);
void bpf_link_put(struct bpf_link *link);
int bpf_link_new_fd(struct bpf_link *link);
-struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd);
struct bpf_link *bpf_link_get_from_fd(u32 ufd);
struct bpf_link *bpf_link_get_curr_or_next(u32 *id);
-int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
-int bpf_obj_get_user(const char __user *pathname, int flags);
+void bpf_token_inc(struct bpf_token *token);
+void bpf_token_put(struct bpf_token *token);
+int bpf_token_create(union bpf_attr *attr);
+struct bpf_token *bpf_token_get_from_fd(u32 ufd);
+int bpf_token_get_info_by_fd(struct bpf_token *token,
+ const union bpf_attr *attr,
+ union bpf_attr __user *uattr);
+
+bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd);
+bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type);
+bool bpf_token_allow_prog_type(const struct bpf_token *token,
+ enum bpf_prog_type prog_type,
+ enum bpf_attach_type attach_type);
+
+int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname);
+int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags);
+struct inode *bpf_get_inode(struct super_block *sb, const struct inode *dir,
+ umode_t mode);
#define BPF_ITER_FUNC_PREFIX "bpf_iter_"
#define DEFINE_BPF_ITER_FUNC(target, args...) \
@@ -2023,7 +2744,7 @@ struct bpf_iter__bpf_map_elem {
int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info);
void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info);
-bool bpf_iter_prog_supported(struct bpf_prog *prog);
+int bpf_iter_prog_supported(struct bpf_prog *prog);
const struct bpf_func_proto *
bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_prog *prog);
@@ -2047,7 +2768,7 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
u64 flags);
-int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
+int bpf_stackmap_extract(struct bpf_map *map, void *key, void *value, bool delete);
int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
void *key, void *value, u64 map_flags);
@@ -2061,7 +2782,7 @@ int bpf_check_uarg_tail_zero(bpfptr_t uaddr, size_t expected_size,
size_t actual_size);
/* verify correctness of eBPF program */
-int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr);
+int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size);
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
@@ -2075,7 +2796,7 @@ struct sk_buff;
struct bpf_dtab_netdev;
struct bpf_cpu_map_entry;
-void __dev_flush(void);
+void __dev_flush(struct list_head *flush_list);
int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
struct net_device *dev_rx);
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf,
@@ -2083,12 +2804,12 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf,
int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
struct bpf_map *map, bool exclude_ingress);
int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
- struct bpf_prog *xdp_prog);
+ const struct bpf_prog *xdp_prog);
int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
- struct bpf_prog *xdp_prog, struct bpf_map *map,
- bool exclude_ingress);
+ const struct bpf_prog *xdp_prog,
+ struct bpf_map *map, bool exclude_ingress);
-void __cpu_map_flush(void);
+void __cpu_map_flush(struct list_head *flush_list);
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf,
struct net_device *dev_rx);
int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
@@ -2120,6 +2841,9 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr);
+int bpf_prog_test_run_nf(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr);
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info);
@@ -2149,7 +2873,7 @@ static inline bool bpf_tracing_btf_ctx_access(int off, int size,
int btf_struct_access(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
int off, int size, enum bpf_access_type atype,
- u32 *next_btf_id, enum bpf_type_flag *flag);
+ u32 *next_btf_id, enum bpf_type_flag *flag, const char **field_name);
bool btf_struct_ids_match(struct bpf_verifier_log *log,
const struct btf *btf, u32 id, int off,
const struct btf *need_btf, u32 need_type_id,
@@ -2162,30 +2886,41 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
struct btf_func_model *m);
struct bpf_reg_state;
-int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
- struct bpf_reg_state *regs);
-int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
- struct bpf_reg_state *regs);
-int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
- struct bpf_reg_state *reg);
+int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog);
int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog,
struct btf *btf, const struct btf_type *t);
+const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt,
+ int comp_idx, const char *tag_key);
+int btf_find_next_decl_tag(const struct btf *btf, const struct btf_type *pt,
+ int comp_idx, const char *tag_key, int last_id);
struct bpf_prog *bpf_prog_by_id(u32 id);
struct bpf_link *bpf_link_by_id(u32 id);
-const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
+const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id,
+ const struct bpf_prog *prog);
void bpf_task_storage_free(struct task_struct *task);
void bpf_cgrp_storage_free(struct cgroup *cgroup);
bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog);
const struct btf_func_model *
bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
const struct bpf_insn *insn);
+int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id,
+ u16 btf_fd_idx, u8 **func_addr);
+
struct bpf_core_ctx {
struct bpf_verifier_log *log;
const struct btf *btf;
};
+bool btf_nested_type_is_trusted(struct bpf_verifier_log *log,
+ const struct bpf_reg_state *reg,
+ const char *field_name, u32 btf_id, const char *suffix);
+
+bool btf_type_ids_nocast_alias(struct bpf_verifier_log *log,
+ const struct btf *reg_btf, u32 reg_id,
+ const struct btf *arg_btf, u32 arg_id);
+
int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
int relo_idx, void *insn);
@@ -2205,6 +2940,13 @@ static inline bool has_current_bpf_ctx(void)
}
void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog);
+
+void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
+ enum bpf_dynptr_type type, u32 offset, u32 size);
+void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
+void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr);
+void bpf_prog_report_arena_violation(bool write, unsigned long addr, unsigned long fault_ip);
+
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
@@ -2242,7 +2984,13 @@ bpf_prog_inc_not_zero(struct bpf_prog *prog)
static inline void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
const struct bpf_link_ops *ops,
- struct bpf_prog *prog)
+ struct bpf_prog *prog, enum bpf_attach_type attach_type)
+{
+}
+
+static inline void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type,
+ const struct bpf_link_ops *ops, struct bpf_prog *prog,
+ enum bpf_attach_type attach_type, bool sleepable)
{
}
@@ -2265,6 +3013,11 @@ static inline void bpf_link_inc(struct bpf_link *link)
{
}
+static inline struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link)
+{
+ return NULL;
+}
+
static inline void bpf_link_put(struct bpf_link *link)
{
}
@@ -2274,7 +3027,32 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags)
return -EOPNOTSUPP;
}
-static inline void __dev_flush(void)
+static inline bool bpf_token_capable(const struct bpf_token *token, int cap)
+{
+ return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN));
+}
+
+static inline void bpf_token_inc(struct bpf_token *token)
+{
+}
+
+static inline void bpf_token_put(struct bpf_token *token)
+{
+}
+
+static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline int bpf_token_get_info_by_fd(struct bpf_token *token,
+ const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void __dev_flush(struct list_head *flush_list)
{
}
@@ -2307,20 +3085,20 @@ struct sk_buff;
static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
struct sk_buff *skb,
- struct bpf_prog *xdp_prog)
+ const struct bpf_prog *xdp_prog)
{
return 0;
}
static inline
int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
- struct bpf_prog *xdp_prog, struct bpf_map *map,
- bool exclude_ingress)
+ const struct bpf_prog *xdp_prog,
+ struct bpf_map *map, bool exclude_ingress)
{
return 0;
}
-static inline void __cpu_map_flush(void)
+static inline void __cpu_map_flush(struct list_head *flush_list)
{
}
@@ -2390,13 +3168,14 @@ static inline struct bpf_prog *bpf_prog_by_id(u32 id)
static inline int btf_struct_access(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
int off, int size, enum bpf_access_type atype,
- u32 *next_btf_id, enum bpf_type_flag *flag)
+ u32 *next_btf_id, enum bpf_type_flag *flag,
+ const char **field_name)
{
return -EACCES;
}
static inline const struct bpf_func_proto *
-bpf_base_func_proto(enum bpf_func_id func_id)
+bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
return NULL;
}
@@ -2417,6 +3196,13 @@ bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
return NULL;
}
+static inline int
+bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id,
+ u16 btf_fd_idx, u8 **func_addr)
+{
+ return -ENOTSUPP;
+}
+
static inline bool unprivileged_ebpf_enabled(void)
{
return false;
@@ -2434,10 +3220,39 @@ static inline void bpf_prog_inc_misses_counter(struct bpf_prog *prog)
static inline void bpf_cgrp_storage_free(struct cgroup *cgroup)
{
}
+
+static inline void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
+ enum bpf_dynptr_type type, u32 offset, u32 size)
+{
+}
+
+static inline void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
+{
+}
+
+static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
+{
+}
+
+static inline void bpf_prog_report_arena_violation(bool write, unsigned long addr,
+ unsigned long fault_ip)
+{
+}
#endif /* CONFIG_BPF_SYSCALL */
-void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
- struct btf_mod_pair *used_btfs, u32 len);
+static __always_inline int
+bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
+{
+ int ret = -EFAULT;
+
+ if (IS_ENABLED(CONFIG_BPF_EVENTS))
+ ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
+ if (unlikely(ret < 0))
+ memset(dst, 0, size);
+ return ret;
+}
+
+void __bpf_free_used_btfs(struct btf_mod_pair *used_btfs, u32 len);
static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
enum bpf_prog_type type)
@@ -2451,7 +3266,7 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux,
bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool);
int bpf_prog_offload_compile(struct bpf_prog *prog);
-void bpf_prog_offload_destroy(struct bpf_prog *prog);
+void bpf_prog_dev_bound_destroy(struct bpf_prog *prog);
int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
struct bpf_prog *prog);
@@ -2479,20 +3294,33 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev);
void unpriv_ebpf_notify(int new_state);
#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
-int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
+int bpf_dev_bound_kfunc_check(struct bpf_verifier_log *log,
+ struct bpf_prog_aux *prog_aux);
+void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id);
+int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr);
+int bpf_prog_dev_bound_inherit(struct bpf_prog *new_prog, struct bpf_prog *old_prog);
+void bpf_dev_bound_netdev_unregister(struct net_device *dev);
static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux)
{
+ return aux->dev_bound;
+}
+
+static inline bool bpf_prog_is_offloaded(const struct bpf_prog_aux *aux)
+{
return aux->offload_requested;
}
-static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
+bool bpf_prog_dev_bound_match(const struct bpf_prog *lhs, const struct bpf_prog *rhs);
+
+static inline bool bpf_map_is_offloaded(struct bpf_map *map)
{
return unlikely(map->ops == &bpf_map_offload_ops);
}
struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr);
void bpf_map_offload_map_free(struct bpf_map *map);
+u64 bpf_map_offload_map_mem_usage(const struct bpf_map *map);
int bpf_prog_test_run_syscall(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr);
@@ -2502,23 +3330,56 @@ int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
int sock_map_bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr);
+int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog);
void sock_map_unhash(struct sock *sk);
void sock_map_destroy(struct sock *sk);
void sock_map_close(struct sock *sk, long timeout);
#else
-static inline int bpf_prog_offload_init(struct bpf_prog *prog,
- union bpf_attr *attr)
+static inline int bpf_dev_bound_kfunc_check(struct bpf_verifier_log *log,
+ struct bpf_prog_aux *prog_aux)
{
return -EOPNOTSUPP;
}
-static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux)
+static inline void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog,
+ u32 func_id)
+{
+ return NULL;
+}
+
+static inline int bpf_prog_dev_bound_init(struct bpf_prog *prog,
+ union bpf_attr *attr)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int bpf_prog_dev_bound_inherit(struct bpf_prog *new_prog,
+ struct bpf_prog *old_prog)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void bpf_dev_bound_netdev_unregister(struct net_device *dev)
+{
+}
+
+static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux)
+{
+ return false;
+}
+
+static inline bool bpf_prog_is_offloaded(struct bpf_prog_aux *aux)
{
return false;
}
-static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
+static inline bool bpf_prog_dev_bound_match(const struct bpf_prog *lhs, const struct bpf_prog *rhs)
+{
+ return false;
+}
+
+static inline bool bpf_map_is_offloaded(struct bpf_map *map)
{
return false;
}
@@ -2532,6 +3393,11 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
{
}
+static inline u64 bpf_map_offload_map_mem_usage(const struct bpf_map *map)
+{
+ return 0;
+}
+
static inline int bpf_prog_test_run_syscall(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr)
@@ -2563,9 +3429,30 @@ static inline int sock_map_bpf_prog_query(const union bpf_attr *attr,
{
return -EINVAL;
}
+
+static inline int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_BPF_SYSCALL */
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
+static __always_inline void
+bpf_prog_inc_misses_counters(const struct bpf_prog_array *array)
+{
+ const struct bpf_prog_array_item *item;
+ struct bpf_prog *prog;
+
+ if (unlikely(!array))
+ return;
+
+ item = &array->items[0];
+ while ((prog = READ_ONCE(item->prog))) {
+ bpf_prog_inc_misses_counter(prog);
+ item++;
+ }
+}
+
#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
void bpf_sk_reuseport_detach(struct sock *sk);
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
@@ -2593,6 +3480,38 @@ static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map,
#endif /* CONFIG_BPF_SYSCALL */
#endif /* defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) */
+#if defined(CONFIG_KEYS) && defined(CONFIG_BPF_SYSCALL)
+
+struct bpf_key *bpf_lookup_user_key(s32 serial, u64 flags);
+struct bpf_key *bpf_lookup_system_key(u64 id);
+void bpf_key_put(struct bpf_key *bkey);
+int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p,
+ struct bpf_dynptr *sig_p,
+ struct bpf_key *trusted_keyring);
+
+#else
+static inline struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags)
+{
+ return NULL;
+}
+
+static inline struct bpf_key *bpf_lookup_system_key(u64 id)
+{
+ return NULL;
+}
+
+static inline void bpf_key_put(struct bpf_key *bkey)
+{
+}
+
+static inline int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p,
+ struct bpf_dynptr *sig_p,
+ struct bpf_key *trusted_keyring)
+{
+ return -EOPNOTSUPP;
+}
+#endif /* defined(CONFIG_KEYS) && defined(CONFIG_BPF_SYSCALL) */
+
/* verifier prototypes for helper functions called from eBPF programs */
extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
extern const struct bpf_func_proto bpf_map_update_elem_proto;
@@ -2614,7 +3533,9 @@ extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
extern const struct bpf_func_proto bpf_get_current_comm_proto;
extern const struct bpf_func_proto bpf_get_stackid_proto;
extern const struct bpf_func_proto bpf_get_stack_proto;
+extern const struct bpf_func_proto bpf_get_stack_sleepable_proto;
extern const struct bpf_func_proto bpf_get_task_stack_proto;
+extern const struct bpf_func_proto bpf_get_task_stack_sleepable_proto;
extern const struct bpf_func_proto bpf_get_stackid_proto_pe;
extern const struct bpf_func_proto bpf_get_stack_proto_pe;
extern const struct bpf_func_proto bpf_sock_map_update_proto;
@@ -2622,6 +3543,7 @@ extern const struct bpf_func_proto bpf_sock_hash_update_proto;
extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
extern const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto;
extern const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto;
+extern const struct bpf_func_proto bpf_current_task_under_cgroup_proto;
extern const struct bpf_func_proto bpf_msg_redirect_hash_proto;
extern const struct bpf_func_proto bpf_msg_redirect_map_proto;
extern const struct bpf_func_proto bpf_sk_redirect_hash_proto;
@@ -2696,6 +3618,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
struct bpf_insn *insn_buf,
struct bpf_prog *prog,
u32 *target_size);
+int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
+ struct bpf_dynptr *ptr);
#else
static inline bool bpf_sock_common_is_valid_access(int off, int size,
enum bpf_access_type type,
@@ -2717,6 +3641,11 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
{
return 0;
}
+static inline int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
+ struct bpf_dynptr *ptr)
+{
+ return -EOPNOTSUPP;
+}
#endif
#ifdef CONFIG_INET
@@ -2781,12 +3710,17 @@ static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
#endif /* CONFIG_INET */
enum bpf_text_poke_type {
+ BPF_MOD_NOP,
BPF_MOD_CALL,
BPF_MOD_JUMP,
};
-int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
- void *addr1, void *addr2);
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
+ enum bpf_text_poke_type new_t, void *old_addr,
+ void *new_addr);
+
+void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
+ struct bpf_prog *new, struct bpf_prog *old);
void *bpf_arch_text_copy(void *dst, void *src, size_t len);
int bpf_arch_text_invalidate(void *dst, size_t len);
@@ -2795,40 +3729,52 @@ struct btf_id_set;
bool btf_id_set_contains(const struct btf_id_set *set, u32 id);
#define MAX_BPRINTF_VARARGS 12
+#define MAX_BPRINTF_BUF 1024
-int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
- u32 **bin_buf, u32 num_args);
-void bpf_bprintf_cleanup(void);
-
-/* the implementation of the opaque uapi struct bpf_dynptr */
-struct bpf_dynptr_kern {
- void *data;
- /* Size represents the number of usable bytes of dynptr data.
- * If for example the offset is at 4 for a local dynptr whose data is
- * of type u64, the number of usable bytes is 4.
- *
- * The upper 8 bits are reserved. It is as follows:
- * Bits 0 - 23 = size
- * Bits 24 - 30 = dynptr type
- * Bit 31 = whether dynptr is read-only
- */
- u32 size;
- u32 offset;
-} __aligned(8);
-
-enum bpf_dynptr_type {
- BPF_DYNPTR_TYPE_INVALID,
- /* Points to memory that is local to the bpf program */
- BPF_DYNPTR_TYPE_LOCAL,
- /* Underlying data is a kernel-produced ringbuf record */
- BPF_DYNPTR_TYPE_RINGBUF,
-};
-
-void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
- enum bpf_dynptr_type type, u32 offset, u32 size);
-void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
-int bpf_dynptr_check_size(u32 size);
-u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr);
+/* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary
+ * arguments representation.
+ */
+#define MAX_BPRINTF_BIN_ARGS 512
+
+struct bpf_bprintf_buffers {
+ char bin_args[MAX_BPRINTF_BIN_ARGS];
+ char buf[MAX_BPRINTF_BUF];
+};
+
+struct bpf_bprintf_data {
+ u32 *bin_args;
+ char *buf;
+ bool get_bin_args;
+ bool get_buf;
+};
+
+int bpf_bprintf_prepare(const char *fmt, u32 fmt_size, const u64 *raw_args,
+ u32 num_args, struct bpf_bprintf_data *data);
+void bpf_bprintf_cleanup(struct bpf_bprintf_data *data);
+int bpf_try_get_buffers(struct bpf_bprintf_buffers **bufs);
+void bpf_put_buffers(void);
+
+void bpf_prog_stream_init(struct bpf_prog *prog);
+void bpf_prog_stream_free(struct bpf_prog *prog);
+int bpf_prog_stream_read(struct bpf_prog *prog, enum bpf_stream_id stream_id, void __user *buf, int len);
+void bpf_stream_stage_init(struct bpf_stream_stage *ss);
+void bpf_stream_stage_free(struct bpf_stream_stage *ss);
+__printf(2, 3)
+int bpf_stream_stage_printk(struct bpf_stream_stage *ss, const char *fmt, ...);
+int bpf_stream_stage_commit(struct bpf_stream_stage *ss, struct bpf_prog *prog,
+ enum bpf_stream_id stream_id);
+int bpf_stream_stage_dump_stack(struct bpf_stream_stage *ss);
+
+#define bpf_stream_printk(ss, ...) bpf_stream_stage_printk(&ss, __VA_ARGS__)
+#define bpf_stream_dump_stack(ss) bpf_stream_stage_dump_stack(&ss)
+
+#define bpf_stream_stage(ss, prog, stream_id, expr) \
+ ({ \
+ bpf_stream_stage_init(&ss); \
+ (expr); \
+ bpf_stream_stage_commit(&ss, prog, stream_id); \
+ bpf_stream_stage_free(&ss); \
+ })
#ifdef CONFIG_BPF_LSM
void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype);
@@ -2852,4 +3798,46 @@ static inline bool type_is_alloc(u32 type)
return type & MEM_ALLOC;
}
+static inline gfp_t bpf_memcg_flags(gfp_t flags)
+{
+ if (memcg_bpf_enabled())
+ return flags | __GFP_ACCOUNT;
+ return flags;
+}
+
+static inline bool bpf_is_subprog(const struct bpf_prog *prog)
+{
+ return prog->aux->func_idx != 0;
+}
+
+int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char **filep,
+ const char **linep, int *nump);
+struct bpf_prog *bpf_prog_find_from_stack(void);
+
+int bpf_insn_array_init(struct bpf_map *map, const struct bpf_prog *prog);
+int bpf_insn_array_ready(struct bpf_map *map);
+void bpf_insn_array_release(struct bpf_map *map);
+void bpf_insn_array_adjust(struct bpf_map *map, u32 off, u32 len);
+void bpf_insn_array_adjust_after_remove(struct bpf_map *map, u32 off, u32 len);
+
+#ifdef CONFIG_BPF_SYSCALL
+void bpf_prog_update_insn_ptrs(struct bpf_prog *prog, u32 *offsets, void *image);
+#else
+static inline void
+bpf_prog_update_insn_ptrs(struct bpf_prog *prog, u32 *offsets, void *image)
+{
+}
+#endif
+
+static inline int bpf_map_check_op_flags(struct bpf_map *map, u64 flags, u64 allowed_flags)
+{
+ if (flags & ~allowed_flags)
+ return -EINVAL;
+
+ if ((flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK))
+ return -EINVAL;
+
+ return 0;
+}
+
#endif /* _LINUX_BPF_H */