summaryrefslogtreecommitdiff
path: root/include/linux/bpf.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-11-25 20:02:57 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2019-11-25 20:02:57 -0800
commit386403a115f95997c2715691226e11a7b5cffcfd (patch)
treea685df70bd3d5b295683713818ddf0752c3d75b6 /include/linux/bpf.h
parent642356cb5f4a8c82b5ca5ebac288c327d10df236 (diff)
parent622dc5ad8052f4f0c6b7a12787696a5caa3c6a58 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from David Miller: "Another merge window, another pull full of stuff: 1) Support alternative names for network devices, from Jiri Pirko. 2) Introduce per-netns netdev notifiers, also from Jiri Pirko. 3) Support MSG_PEEK in vsock/virtio, from Matias Ezequiel Vara Larsen. 4) Allow compiling out the TLS TOE code, from Jakub Kicinski. 5) Add several new tracepoints to the kTLS code, also from Jakub. 6) Support set channels ethtool callback in ena driver, from Sameeh Jubran. 7) New SCTP events SCTP_ADDR_ADDED, SCTP_ADDR_REMOVED, SCTP_ADDR_MADE_PRIM, and SCTP_SEND_FAILED_EVENT. From Xin Long. 8) Add XDP support to mvneta driver, from Lorenzo Bianconi. 9) Lots of netfilter hw offload fixes, cleanups and enhancements, from Pablo Neira Ayuso. 10) PTP support for aquantia chips, from Egor Pomozov. 11) Add UDP segmentation offload support to igb, ixgbe, and i40e. From Josh Hunt. 12) Add smart nagle to tipc, from Jon Maloy. 13) Support L2 field rewrite by TC offloads in bnxt_en, from Venkat Duvvuru. 14) Add a flow mask cache to OVS, from Tonghao Zhang. 15) Add XDP support to ice driver, from Maciej Fijalkowski. 16) Add AF_XDP support to ice driver, from Krzysztof Kazimierczak. 17) Support UDP GSO offload in atlantic driver, from Igor Russkikh. 18) Support it in stmmac driver too, from Jose Abreu. 19) Support TIPC encryption and auth, from Tuong Lien. 20) Introduce BPF trampolines, from Alexei Starovoitov. 21) Make page_pool API more numa friendly, from Saeed Mahameed. 22) Introduce route hints to ipv4 and ipv6, from Paolo Abeni. 23) Add UDP segmentation offload to cxgb4, Rahul Lakkireddy" * git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1857 commits) libbpf: Fix usage of u32 in userspace code mm: Implement no-MMU variant of vmalloc_user_node_flags slip: Fix use-after-free Read in slip_open net: dsa: sja1105: fix sja1105_parse_rgmii_delays() macvlan: schedule bc_work even if error enetc: add support Credit Based Shaper(CBS) for hardware offload net: phy: add helpers phy_(un)lock_mdio_bus mdio_bus: don't use managed reset-controller ax88179_178a: add ethtool_op_get_ts_info() mlxsw: spectrum_router: Fix use of uninitialized adjacency index mlxsw: spectrum_router: After underlay moves, demote conflicting tunnels bpf: Simplify __bpf_arch_text_poke poke type handling bpf: Introduce BPF_TRACE_x helper for the tracing tests bpf: Add bpf_jit_blinding_enabled for !CONFIG_BPF_JIT bpf, testing: Add various tail call test cases bpf, x86: Emit patchable direct jump as tail call bpf: Constant map key tracking for prog array pokes bpf: Add poke dependency tracking for prog array maps bpf: Add initial poke descriptor table for jit images bpf: Move owner type, jited info into array auxiliary data ...
Diffstat (limited to 'include/linux/bpf.h')
-rw-r--r--include/linux/bpf.h296
1 files changed, 241 insertions, 55 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 3bf3835d0e86..35903f148be5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -12,17 +12,23 @@
#include <linux/err.h>
#include <linux/rbtree_latch.h>
#include <linux/numa.h>
+#include <linux/mm_types.h>
#include <linux/wait.h>
#include <linux/u64_stats_sync.h>
+#include <linux/refcount.h>
+#include <linux/mutex.h>
struct bpf_verifier_env;
+struct bpf_verifier_log;
struct perf_event;
struct bpf_prog;
+struct bpf_prog_aux;
struct bpf_map;
struct sock;
struct seq_file;
struct btf;
struct btf_type;
+struct exception_table_entry;
extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
@@ -59,11 +65,18 @@ struct bpf_map_ops {
const struct btf_type *key_type,
const struct btf_type *value_type);
+ /* Prog poke tracking helpers. */
+ int (*map_poke_track)(struct bpf_map *map, struct bpf_prog_aux *aux);
+ void (*map_poke_untrack)(struct bpf_map *map, struct bpf_prog_aux *aux);
+ void (*map_poke_run)(struct bpf_map *map, u32 key, struct bpf_prog *old,
+ struct bpf_prog *new);
+
/* Direct value access helpers. */
int (*map_direct_value_addr)(const struct bpf_map *map,
u64 *imm, u32 off);
int (*map_direct_value_meta)(const struct bpf_map *map,
u64 imm, u32 *off);
+ int (*map_mmap)(struct bpf_map *map, struct vm_area_struct *vma);
};
struct bpf_map_memory {
@@ -92,17 +105,19 @@ struct bpf_map {
u32 btf_value_type_id;
struct btf *btf;
struct bpf_map_memory memory;
+ char name[BPF_OBJ_NAME_LEN];
bool unpriv_array;
- bool frozen; /* write-once */
- /* 48 bytes hole */
+ bool frozen; /* write-once; write-protected by freeze_mutex */
+ /* 22 bytes hole */
/* The 3rd and 4th cacheline with misc members to avoid false sharing
* particularly with refcounting.
*/
- atomic_t refcnt ____cacheline_aligned;
- atomic_t usercnt;
+ atomic64_t refcnt ____cacheline_aligned;
+ atomic64_t usercnt;
struct work_struct work;
- char name[BPF_OBJ_NAME_LEN];
+ struct mutex freeze_mutex;
+ u64 writecnt; /* writable mmap cnt; protected by freeze_mutex */
};
static inline bool map_value_has_spin_lock(const struct bpf_map *map)
@@ -211,6 +226,7 @@ enum bpf_arg_type {
ARG_PTR_TO_INT, /* pointer to int */
ARG_PTR_TO_LONG, /* pointer to long */
ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */
+ ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */
};
/* type of values returned from helper functions */
@@ -233,11 +249,17 @@ struct bpf_func_proto {
bool gpl_only;
bool pkt_access;
enum bpf_return_type ret_type;
- enum bpf_arg_type arg1_type;
- enum bpf_arg_type arg2_type;
- enum bpf_arg_type arg3_type;
- enum bpf_arg_type arg4_type;
- enum bpf_arg_type arg5_type;
+ union {
+ struct {
+ enum bpf_arg_type arg1_type;
+ enum bpf_arg_type arg2_type;
+ enum bpf_arg_type arg3_type;
+ enum bpf_arg_type arg4_type;
+ enum bpf_arg_type arg5_type;
+ };
+ enum bpf_arg_type arg_type[5];
+ };
+ int *btf_id; /* BTF ids of arguments */
};
/* bpf_context is intentionally undefined structure. Pointer to bpf_context is
@@ -281,6 +303,7 @@ enum bpf_reg_type {
PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */
PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */
+ PTR_TO_BTF_ID, /* reg points to kernel struct */
};
/* The information passed from prog-specific *_is_valid_access
@@ -288,7 +311,11 @@ enum bpf_reg_type {
*/
struct bpf_insn_access_aux {
enum bpf_reg_type reg_type;
- int ctx_field_size;
+ union {
+ int ctx_field_size;
+ u32 btf_id;
+ };
+ struct bpf_verifier_log *log; /* for verbose logs */
};
static inline void
@@ -359,14 +386,135 @@ enum bpf_cgroup_storage_type {
#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX
+/* The longest tracepoint has 12 args.
+ * See include/trace/bpf_probe.h
+ */
+#define MAX_BPF_FUNC_ARGS 12
+
struct bpf_prog_stats {
u64 cnt;
u64 nsecs;
struct u64_stats_sync syncp;
+} __aligned(2 * sizeof(u64));
+
+struct btf_func_model {
+ u8 ret_size;
+ u8 nr_args;
+ u8 arg_size[MAX_BPF_FUNC_ARGS];
+};
+
+/* Restore arguments before returning from trampoline to let original function
+ * continue executing. This flag is used for fentry progs when there are no
+ * fexit progs.
+ */
+#define BPF_TRAMP_F_RESTORE_REGS BIT(0)
+/* Call original function after fentry progs, but before fexit progs.
+ * Makes sense for fentry/fexit, normal calls and indirect calls.
+ */
+#define BPF_TRAMP_F_CALL_ORIG BIT(1)
+/* Skip current frame and return to parent. Makes sense for fentry/fexit
+ * programs only. Should not be used with normal calls and indirect calls.
+ */
+#define BPF_TRAMP_F_SKIP_FRAME BIT(2)
+
+/* Different use cases for BPF trampoline:
+ * 1. replace nop at the function entry (kprobe equivalent)
+ * flags = BPF_TRAMP_F_RESTORE_REGS
+ * fentry = a set of programs to run before returning from trampoline
+ *
+ * 2. replace nop at the function entry (kprobe + kretprobe equivalent)
+ * flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME
+ * orig_call = fentry_ip + MCOUNT_INSN_SIZE
+ * fentry = a set of program to run before calling original function
+ * fexit = a set of program to run after original function
+ *
+ * 3. replace direct call instruction anywhere in the function body
+ * or assign a function pointer for indirect call (like tcp_congestion_ops->cong_avoid)
+ * With flags = 0
+ * fentry = a set of programs to run before returning from trampoline
+ * With flags = BPF_TRAMP_F_CALL_ORIG
+ * orig_call = original callback addr or direct function addr
+ * fentry = a set of program to run before calling original function
+ * fexit = a set of program to run after original function
+ */
+int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags,
+ struct bpf_prog **fentry_progs, int fentry_cnt,
+ struct bpf_prog **fexit_progs, int fexit_cnt,
+ void *orig_call);
+/* these two functions are called from generated trampoline */
+u64 notrace __bpf_prog_enter(void);
+void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
+
+enum bpf_tramp_prog_type {
+ BPF_TRAMP_FENTRY,
+ BPF_TRAMP_FEXIT,
+ BPF_TRAMP_MAX
+};
+
+struct bpf_trampoline {
+ /* hlist for trampoline_table */
+ struct hlist_node hlist;
+ /* serializes access to fields of this trampoline */
+ struct mutex mutex;
+ refcount_t refcnt;
+ u64 key;
+ struct {
+ struct btf_func_model model;
+ void *addr;
+ } func;
+ /* list of BPF programs using this trampoline */
+ struct hlist_head progs_hlist[BPF_TRAMP_MAX];
+ /* Number of attached programs. A counter per kind. */
+ int progs_cnt[BPF_TRAMP_MAX];
+ /* Executable image of trampoline */
+ void *image;
+ u64 selector;
+};
+#ifdef CONFIG_BPF_JIT
+struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
+int bpf_trampoline_link_prog(struct bpf_prog *prog);
+int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
+void bpf_trampoline_put(struct bpf_trampoline *tr);
+#else
+static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
+{
+ return NULL;
+}
+static inline int bpf_trampoline_link_prog(struct bpf_prog *prog)
+{
+ return -ENOTSUPP;
+}
+static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
+{
+ return -ENOTSUPP;
+}
+static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
+#endif
+
+struct bpf_func_info_aux {
+ bool unreliable;
+};
+
+enum bpf_jit_poke_reason {
+ BPF_POKE_REASON_TAIL_CALL,
+};
+
+/* Descriptor of pokes pointing /into/ the JITed image. */
+struct bpf_jit_poke_descriptor {
+ void *ip;
+ union {
+ struct {
+ struct bpf_map *map;
+ u32 key;
+ } tail_call;
+ };
+ bool ip_stable;
+ u8 adj_off;
+ u16 reason;
};
struct bpf_prog_aux {
- atomic_t refcnt;
+ atomic64_t refcnt;
u32 used_map_cnt;
u32 max_ctx_offset;
u32 max_pkt_offset;
@@ -375,10 +523,23 @@ struct bpf_prog_aux {
u32 id;
u32 func_cnt; /* used by non-func prog as the number of func progs */
u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
+ u32 attach_btf_id; /* in-kernel BTF type id to attach to */
+ struct bpf_prog *linked_prog;
bool verifier_zext; /* Zero extensions has been inserted by verifier. */
bool offload_requested;
+ bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
+ bool func_proto_unreliable;
+ enum bpf_tramp_prog_type trampoline_prog_type;
+ struct bpf_trampoline *trampoline;
+ struct hlist_node tramp_hlist;
+ /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
+ const struct btf_type *attach_func_proto;
+ /* function name for valid attach_btf_id */
+ const char *attach_func_name;
struct bpf_prog **func;
void *jit_data; /* JIT specific data. arch dependent */
+ struct bpf_jit_poke_descriptor *poke_tab;
+ u32 size_poke_tab;
struct latch_tree_node ksym_tnode;
struct list_head ksym_lnode;
const struct bpf_prog_ops *ops;
@@ -394,6 +555,7 @@ struct bpf_prog_aux {
struct bpf_prog_offload *offload;
struct btf *btf;
struct bpf_func_info *func_info;
+ struct bpf_func_info_aux *func_info_aux;
/* bpf_line_info loaded from userspace. linfo->insn_off
* has the xlated insn offset.
* Both the main and sub prog share the same linfo.
@@ -416,6 +578,8 @@ struct bpf_prog_aux {
* main prog always has linfo_idx == 0
*/
u32 linfo_idx;
+ u32 num_exentries;
+ struct exception_table_entry *extable;
struct bpf_prog_stats __percpu *stats;
union {
struct work_struct work;
@@ -423,17 +587,26 @@ struct bpf_prog_aux {
};
};
+struct bpf_array_aux {
+ /* 'Ownership' of prog array is claimed by the first program that
+ * is going to use this map or by the first program which FD is
+ * stored in the map to make sure that all callers and callees have
+ * the same prog type and JITed flag.
+ */
+ enum bpf_prog_type type;
+ bool jited;
+ /* Programs with direct jumps into programs part of this array. */
+ struct list_head poke_progs;
+ struct bpf_map *map;
+ struct mutex poke_mutex;
+ struct work_struct work;
+};
+
struct bpf_array {
struct bpf_map map;
u32 elem_size;
u32 index_mask;
- /* 'ownership' of prog_array is claimed by the first program that
- * is going to use this map or by the first program which FD is stored
- * in the map to make sure that all callers and callees have the same
- * prog_type and JITed flag
- */
- enum bpf_prog_type owner_prog_type;
- bool owner_jited;
+ struct bpf_array_aux *aux;
union {
char value[0] __aligned(8);
void *ptrs[0] __aligned(8);
@@ -482,6 +655,7 @@ struct bpf_event_entry {
bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
int bpf_prog_calc_tag(struct bpf_prog *fp);
+const char *kernel_type_name(u32 btf_type_id);
const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
@@ -620,7 +794,7 @@ DECLARE_PER_CPU(int, bpf_prog_active);
extern const struct file_operations bpf_map_fops;
extern const struct file_operations bpf_prog_fops;
-#define BPF_PROG_TYPE(_id, _name) \
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
extern const struct bpf_prog_ops _name ## _prog_ops; \
extern const struct bpf_verifier_ops _name ## _verifier_ops;
#define BPF_MAP_TYPE(_id, _ops) \
@@ -636,9 +810,9 @@ extern const struct bpf_verifier_ops xdp_analyzer_ops;
struct bpf_prog *bpf_prog_get(u32 ufd);
struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
bool attach_drv);
-struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i);
+void bpf_prog_add(struct bpf_prog *prog, int i);
void bpf_prog_sub(struct bpf_prog *prog, int i);
-struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog);
+void bpf_prog_inc(struct bpf_prog *prog);
struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog);
void bpf_prog_put(struct bpf_prog *prog);
int __bpf_prog_charge(struct user_struct *user, u32 pages);
@@ -649,9 +823,9 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock);
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
struct bpf_map *__bpf_map_get(struct fd f);
-struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
-struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map,
- bool uref);
+void bpf_map_inc(struct bpf_map *map);
+void bpf_map_inc_with_uref(struct bpf_map *map);
+struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map);
void bpf_map_put_with_uref(struct bpf_map *map);
void bpf_map_put(struct bpf_map *map);
int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
@@ -661,6 +835,7 @@ void bpf_map_charge_finish(struct bpf_map_memory *mem);
void bpf_map_charge_move(struct bpf_map_memory *dst,
struct bpf_map_memory *src);
void *bpf_map_area_alloc(u64 size, int numa_node);
+void *bpf_map_area_mmapable_alloc(u64 size, int numa_node);
void bpf_map_area_free(void *base);
void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
@@ -747,6 +922,24 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr);
+bool btf_ctx_access(int off, int size, enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info);
+int btf_struct_access(struct bpf_verifier_log *log,
+ const struct btf_type *t, int off, int size,
+ enum bpf_access_type atype,
+ u32 *next_btf_id);
+int btf_resolve_helper_id(struct bpf_verifier_log *log,
+ const struct bpf_func_proto *fn, int);
+
+int btf_distill_func_proto(struct bpf_verifier_log *log,
+ struct btf *btf,
+ const struct btf_type *func_proto,
+ const char *func_name,
+ struct btf_func_model *m);
+
+int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog);
+
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
@@ -760,10 +953,8 @@ static inline struct bpf_prog *bpf_prog_get_type_dev(u32 ufd,
return ERR_PTR(-EOPNOTSUPP);
}
-static inline struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog,
- int i)
+static inline void bpf_prog_add(struct bpf_prog *prog, int i)
{
- return ERR_PTR(-EOPNOTSUPP);
}
static inline void bpf_prog_sub(struct bpf_prog *prog, int i)
@@ -774,9 +965,8 @@ static inline void bpf_prog_put(struct bpf_prog *prog)
{
}
-static inline struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog)
+static inline void bpf_prog_inc(struct bpf_prog *prog)
{
- return ERR_PTR(-EOPNOTSUPP);
}
static inline struct bpf_prog *__must_check
@@ -877,6 +1067,10 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
{
return -ENOTSUPP;
}
+
+static inline void bpf_map_put(struct bpf_map *map)
+{
+}
#endif /* CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
@@ -972,31 +1166,6 @@ static inline int sock_map_get_from_fd(const union bpf_attr *attr,
}
#endif
-#if defined(CONFIG_XDP_SOCKETS)
-struct xdp_sock;
-struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key);
-int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
- struct xdp_sock *xs);
-void __xsk_map_flush(struct bpf_map *map);
-#else
-struct xdp_sock;
-static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
- u32 key)
-{
- return NULL;
-}
-
-static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
- struct xdp_sock *xs)
-{
- return -EOPNOTSUPP;
-}
-
-static inline void __xsk_map_flush(struct bpf_map *map)
-{
-}
-#endif
-
#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
void bpf_sk_reuseport_detach(struct sock *sk);
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
@@ -1095,6 +1264,15 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
#endif
#ifdef CONFIG_INET
+struct sk_reuseport_kern {
+ struct sk_buff *skb;
+ struct sock *sk;
+ struct sock *selected_sk;
+ void *data_end;
+ u32 hash;
+ u32 reuseport_id;
+ bool bind_inany;
+};
bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info);
@@ -1145,4 +1323,12 @@ static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
}
#endif /* CONFIG_INET */
+enum bpf_text_poke_type {
+ BPF_MOD_CALL,
+ BPF_MOD_JUMP,
+};
+
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+ void *addr1, void *addr2);
+
#endif /* _LINUX_BPF_H */