From d279f1f8c64711ca986c3121c8ec811b892932f0 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 11 Dec 2017 11:39:03 -0800 Subject: bpf/tracing: add a bpf test for new ioctl query interface Added a subtest in test_progs. The tracepoint is sched/sched_switch. Multiple bpf programs are attached to this tracepoint and the query interface is exercised. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Peter Zijlstra (Intel) Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/perf_event.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 362493a2f950..f2c354d5f519 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -418,6 +418,27 @@ struct perf_event_attr { __u16 __reserved_2; /* align to __u64 */ }; +/* + * Structure used by below PERF_EVENT_IOC_QUERY_BPF command + * to query bpf programs attached to the same perf tracepoint + * as the given perf event. + */ +struct perf_event_query_bpf { + /* + * The below ids array length + */ + __u32 ids_len; + /* + * Set by the kernel to indicate the number of + * available programs + */ + __u32 prog_cnt; + /* + * User provided buffer to store program ids + */ + __u32 ids[0]; +}; + #define perf_flags(attr) (*(&(attr)->read_format + 1)) /* @@ -433,6 +454,7 @@ struct perf_event_attr { #define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) #define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) #define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32) +#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *) enum perf_event_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, -- cgit From 965de87e54b803223bff703ea6b2a76c056695ae Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 11 Dec 2017 11:36:49 -0500 Subject: samples/bpf: add a test for bpf_override_return This adds a basic test for bpf_override_return to verify it works. We override the main function for mounting a btrfs fs so it'll return -ENOMEM and then make sure that trying to mount a btrfs fs will fail. Acked-by: Alexei Starovoitov Acked-by: Ingo Molnar Signed-off-by: Josef Bacik Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/bpf.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4c223ab30293..cf446c25c0ec 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -677,6 +677,10 @@ union bpf_attr { * @buf: buf to fill * @buf_size: size of the buf * Return : 0 on success or negative error code + * + * int bpf_override_return(pt_regs, rc) + * @pt_regs: pointer to struct pt_regs + * @rc: the return value to set */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -736,7 +740,8 @@ union bpf_attr { FN(xdp_adjust_meta), \ FN(perf_event_read_value), \ FN(perf_prog_read_value), \ - FN(getsockopt), + FN(getsockopt), \ + FN(override_return), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit From 48cca7e44f9f8268fdcd4351e2f19ff2275119d1 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 14 Dec 2017 17:55:10 -0800 Subject: libbpf: add support for bpf_call - recognize relocation emitted by llvm - since all regular function will be kept in .text section and llvm takes care of pc-relative offsets in bpf_call instruction simply copy all of .text to relevant program section while adjusting bpf_call instructions in program section to point to newly copied body of instructions from .text - do so for all programs in the elf file - set all programs types to the one passed to bpf_prog_load() Note for elf files with multiple programs that use different functions in .text section we need to do 'linker' style logic. This work is still TBD Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index cf446c25c0ec..db1b0923a308 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -197,8 +197,14 @@ enum bpf_attach_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) +/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */ #define BPF_PSEUDO_MAP_FD 1 +/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative + * offset to another bpf function + */ +#define BPF_PSEUDO_CALL 1 + /* flags for BPF_MAP_UPDATE_ELEM command */ #define BPF_ANY 0 /* create new element or update existing */ #define BPF_NOEXIST 1 /* create new element if it didn't exist */ -- cgit From 675fc275a3a2d905535207237402c6d8dcb5fa4b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Dec 2017 18:39:09 -0800 Subject: bpf: offload: report device information for offloaded programs Report to the user ifindex and namespace information of offloaded programs. If device has disappeared return -ENODEV. Specify the namespace using dev/inode combination. CC: Eric W. Biederman Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index db1b0923a308..4e8c60acfa32 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -921,6 +921,9 @@ struct bpf_prog_info { __u32 nr_map_ids; __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u64 netns_dev; + __u64 netns_ino; } __attribute__((aligned(8))); struct bpf_map_info { -- cgit From a38845729ea3985db5d2544ec3ef3dc8f6313a27 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 11 Jan 2018 20:29:09 -0800 Subject: bpf: offload: add map offload infrastructure BPF map offload follow similar path to program offload. At creation time users may specify ifindex of the device on which they want to create the map. Map will be validated by the kernel's .map_alloc_check callback and device driver will be called for the actual allocation. Map will have an empty set of operations associated with it (save for alloc and free callbacks). The real device callbacks are kept in map->offload->dev_ops because they have slightly different signatures. Map operations are called in process context so the driver may communicate with HW freely, msleep(), wait() etc. Map alloc and free callbacks are muxed via existing .ndo_bpf, and are always called with rtnl lock held. Maps and programs are guaranteed to be destroyed before .ndo_uninit (i.e. before unregister_netdev() returns). Map callbacks are invoked with bpf_devs_lock *read* locked, drivers must take care of exclusive locking if necessary. All offload-specific branches are marked with unlikely() (through bpf_map_is_dev_bound()), given that branch penalty will be negligible compared to IO anyway, and we don't want to penalize SW path unnecessarily. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4e8c60acfa32..69f96af4a569 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -245,6 +245,7 @@ union bpf_attr { * BPF_F_NUMA_NODE is set). */ char map_name[BPF_OBJ_NAME_LEN]; + __u32 map_ifindex; /* ifindex of netdev to create on */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ -- cgit From e7b2823a582a5bca5ee47644f448e317178e8824 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 18 Jan 2018 17:49:08 +0100 Subject: bpf: Sync kernel ABI header with tooling header Update tools/include/uapi/linux/bpf.h to bring it in sync with include/uapi/linux/bpf.h. The listed commits forgot to update it. Fixes: 02dd3291b2f0 ("bpf: finally expose xdp_rxq_info to XDP bpf-programs") Fixes: f19397a5c656 ("bpf: Add access to snd_cwnd and others in sock_ops") Fixes: 06ef0ccb5a36 ("bpf/cgroup: fix a verification error for a CGROUP_DEVICE type prog") Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 69f96af4a569..7c2259e8bc54 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -900,6 +900,9 @@ struct xdp_md { __u32 data; __u32 data_end; __u32 data_meta; + /* Below access go through struct xdp_rxq_info */ + __u32 ingress_ifindex; /* rxq->dev->ifindex */ + __u32 rx_queue_index; /* rxq->queue_index */ }; enum sk_action { @@ -956,6 +959,12 @@ struct bpf_sock_ops { __u32 local_ip6[4]; /* Stored in network byte order */ __u32 remote_port; /* Stored in network byte order */ __u32 local_port; /* stored in host byte order */ + __u32 is_fullsock; /* Some TCP fields are only valid if + * there is a full socket. If not, the + * fields read as zero. + */ + __u32 snd_cwnd; + __u32 srtt_us; /* Averaged RTT << 3 in usecs */ }; /* List of known BPF sock_ops operators. @@ -1010,7 +1019,8 @@ struct bpf_perf_event_value { #define BPF_DEVCG_DEV_CHAR (1ULL << 1) struct bpf_cgroup_dev_ctx { - __u32 access_type; /* (access << 16) | type */ + /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ + __u32 access_type; __u32 major; __u32 minor; }; -- cgit From 52775b33bb5072fbc07b02c0cf4fe8da1f7ee7cd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 17 Jan 2018 19:13:28 -0800 Subject: bpf: offload: report device information about offloaded maps Tell user space about device on which the map was created. Unfortunate reality of user ABI makes sharing this code with program offload difficult but the information is the same. Signed-off-by: Jakub Kicinski Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7c2259e8bc54..af1f49ad8b88 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -938,6 +938,9 @@ struct bpf_map_info { __u32 max_entries; __u32 map_flags; char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u64 netns_dev; + __u64 netns_ino; } __attribute__((aligned(8))); /* User bpf_sock_ops struct to access socket values and specify request ops -- cgit From d6d4f60c3a0933852dcc40a2142d93027ea1da76 Mon Sep 17 00:00:00 2001 From: Lawrence Brakmo Date: Thu, 25 Jan 2018 16:14:16 -0800 Subject: bpf: add selftest for tcpbpf Added a selftest for tcpbpf (sock_ops) that checks that the appropriate callbacks occured and that it can access tcp_sock fields and that their values are correct. Run with command: ./test_tcpbpf_user Adding the flag "-d" will show why it did not pass. Signed-off-by: Lawrence Brakmo Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/bpf.h | 86 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 82 insertions(+), 4 deletions(-) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index af1f49ad8b88..db6bdc375126 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -17,7 +17,7 @@ #define BPF_ALU64 0x07 /* alu mode in double word width */ /* ld/ldx fields */ -#define BPF_DW 0x18 /* double word */ +#define BPF_DW 0x18 /* double word (64-bit) */ #define BPF_XADD 0xc0 /* exclusive add */ /* alu/jmp fields */ @@ -642,6 +642,14 @@ union bpf_attr { * @optlen: length of optval in bytes * Return: 0 or negative error * + * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags) + * Set callback flags for sock_ops + * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct + * @flags: flags value + * Return: 0 for no error + * -EINVAL if there is no full tcp socket + * bits in flags that are not supported by current kernel + * * int bpf_skb_adjust_room(skb, len_diff, mode, flags) * Grow or shrink room in sk_buff. * @skb: pointer to skb @@ -748,7 +756,8 @@ union bpf_attr { FN(perf_event_read_value), \ FN(perf_prog_read_value), \ FN(getsockopt), \ - FN(override_return), + FN(override_return), \ + FN(sock_ops_cb_flags_set), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -952,8 +961,9 @@ struct bpf_map_info { struct bpf_sock_ops { __u32 op; union { - __u32 reply; - __u32 replylong[4]; + __u32 args[4]; /* Optionally passed to bpf program */ + __u32 reply; /* Returned by bpf program */ + __u32 replylong[4]; /* Optionally returned by bpf prog */ }; __u32 family; __u32 remote_ip4; /* Stored in network byte order */ @@ -968,8 +978,39 @@ struct bpf_sock_ops { */ __u32 snd_cwnd; __u32 srtt_us; /* Averaged RTT << 3 in usecs */ + __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */ + __u32 state; + __u32 rtt_min; + __u32 snd_ssthresh; + __u32 rcv_nxt; + __u32 snd_nxt; + __u32 snd_una; + __u32 mss_cache; + __u32 ecn_flags; + __u32 rate_delivered; + __u32 rate_interval_us; + __u32 packets_out; + __u32 retrans_out; + __u32 total_retrans; + __u32 segs_in; + __u32 data_segs_in; + __u32 segs_out; + __u32 data_segs_out; + __u32 lost_out; + __u32 sacked_out; + __u32 sk_txhash; + __u64 bytes_received; + __u64 bytes_acked; }; +/* Definitions for bpf_sock_ops_cb_flags */ +#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) +#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) +#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) +#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently + * supported cb flags + */ + /* List of known BPF sock_ops operators. * New entries can only be added at the end */ @@ -1003,6 +1044,43 @@ enum { * a congestion threshold. RTTs above * this indicate congestion */ + BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered. + * Arg1: value of icsk_retransmits + * Arg2: value of icsk_rto + * Arg3: whether RTO has expired + */ + BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted. + * Arg1: sequence number of 1st byte + * Arg2: # segments + * Arg3: return value of + * tcp_transmit_skb (0 => success) + */ + BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state. + * Arg1: old_state + * Arg2: new_state + */ +}; + +/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect + * changes between the TCP and BPF versions. Ideally this should never happen. + * If it does, we need to add code to convert them before calling + * the BPF sock_ops function. + */ +enum { + BPF_TCP_ESTABLISHED = 1, + BPF_TCP_SYN_SENT, + BPF_TCP_SYN_RECV, + BPF_TCP_FIN_WAIT1, + BPF_TCP_FIN_WAIT2, + BPF_TCP_TIME_WAIT, + BPF_TCP_CLOSE, + BPF_TCP_CLOSE_WAIT, + BPF_TCP_LAST_ACK, + BPF_TCP_LISTEN, + BPF_TCP_CLOSING, /* Now a valid state */ + BPF_TCP_NEW_SYN_RECV, + + BPF_TCP_MAX_STATES /* Leave at the end! */ }; #define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ -- cgit