summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2018-05-04 23:41:05 +0200
committerDaniel Borkmann <daniel@iogearbox.net>2018-05-04 23:41:06 +0200
commita5458aa923be8960a78d6fdfa1c6ff769b34deb2 (patch)
tree1e14a76c6a39662d602bf9589a31245ca9d74cdd
parentc27638c0628a5507e421f325dae3d3c9a45f227e (diff)
parentab7f5bf0928be2f148d000a6eaa6c0a36e74750e (diff)
Merge branch 'bpf-event-output-offload'
Jakub Kicinski says: ==================== This series centres on NFP offload of bpf_event_output(). The first patch allows perf event arrays to be used by offloaded programs. Next patch makes the nfp driver keep track of such arrays to be able to filter FW events referring to maps. Perf event arrays are not device bound. Having driver reimplement and manage the perf array seems brittle and unnecessary. Patch 4 moves slightly the verifier step which replaces map fds with map pointers. This is useful for nfp JIT since we can then easily replace host pointers with NFP table ids (patch 6). This allows us to lift the limitation on map helpers having to be used with the same map pointer on all paths. Second use of replacing fds with real host map pointers is that we can use the host map pointer as a key for FW events in perf event array offload. Patch 5 adds perf event output offload support for the NFP. There are some differences between bpf_event_output() offloaded and non-offloaded version. The FW messages which carry events may get dropped and reordered relatively easily. The return codes from the helper are also not guaranteed to match the host. Users are warned about some of those discrepancies with a one time warning message to kernel logs. bpftool gains an ability to dump perf ring events in a very simple format. This was very useful for testing and simple debug, maybe it will be useful to others? Last patch is a trivial comment fix. ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/cmsg.c16
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/fw.h20
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/jit.c76
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.c28
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.h24
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/offload.c172
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/verifier.c78
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_app.c2
-rw-r--r--include/linux/bpf.h5
-rw-r--r--kernel/bpf/core.c1
-rw-r--r--kernel/bpf/offload.c6
-rw-r--r--kernel/bpf/syscall.c2
-rw-r--r--kernel/bpf/verifier.c14
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst40
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool.rst2
-rw-r--r--tools/bpf/bpftool/Makefile7
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool36
-rw-r--r--tools/bpf/bpftool/common.c77
-rw-r--r--tools/bpf/bpftool/main.h7
-rw-r--r--tools/bpf/bpftool/map.c80
-rw-r--r--tools/bpf/bpftool/map_perf_ring.c347
21 files changed, 916 insertions, 124 deletions
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
index 7e298148ca26..cb87fccb9f6a 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
@@ -102,6 +102,15 @@ nfp_bpf_cmsg_map_req_alloc(struct nfp_app_bpf *bpf, unsigned int n)
return nfp_bpf_cmsg_alloc(bpf, size);
}
+static u8 nfp_bpf_cmsg_get_type(struct sk_buff *skb)
+{
+ struct cmsg_hdr *hdr;
+
+ hdr = (struct cmsg_hdr *)skb->data;
+
+ return hdr->type;
+}
+
static unsigned int nfp_bpf_cmsg_get_tag(struct sk_buff *skb)
{
struct cmsg_hdr *hdr;
@@ -431,6 +440,11 @@ void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb)
goto err_free;
}
+ if (nfp_bpf_cmsg_get_type(skb) == CMSG_TYPE_BPF_EVENT) {
+ nfp_bpf_event_output(bpf, skb);
+ return;
+ }
+
nfp_ctrl_lock(bpf->app->ctrl);
tag = nfp_bpf_cmsg_get_tag(skb);
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
index 39639ac28b01..3dbc21653ce5 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/fw.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
@@ -37,6 +37,14 @@
#include <linux/bitops.h>
#include <linux/types.h>
+/* Kernel's enum bpf_reg_type is not uABI so people may change it breaking
+ * our FW ABI. In that case we will do translation in the driver.
+ */
+#define NFP_BPF_SCALAR_VALUE 1
+#define NFP_BPF_MAP_VALUE 4
+#define NFP_BPF_STACK 6
+#define NFP_BPF_PACKET_DATA 8
+
enum bpf_cap_tlv_type {
NFP_BPF_CAP_TYPE_FUNC = 1,
NFP_BPF_CAP_TYPE_ADJUST_HEAD = 2,
@@ -81,6 +89,7 @@ enum nfp_bpf_cmsg_type {
CMSG_TYPE_MAP_DELETE = 5,
CMSG_TYPE_MAP_GETNEXT = 6,
CMSG_TYPE_MAP_GETFIRST = 7,
+ CMSG_TYPE_BPF_EVENT = 8,
__CMSG_TYPE_MAP_MAX,
};
@@ -155,4 +164,13 @@ struct cmsg_reply_map_op {
__be32 resv;
struct cmsg_key_value_pair elem[0];
};
+
+struct cmsg_bpf_event {
+ struct cmsg_hdr hdr;
+ __be32 cpu_id;
+ __be64 map_ptr;
+ __be32 data_size;
+ __be32 pkt_size;
+ u8 data[0];
+};
#endif
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 65f0791cae0c..326a2085d650 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2016-2017 Netronome Systems, Inc.
+ * Copyright (C) 2016-2018 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
@@ -1395,15 +1395,9 @@ static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
static int
map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
- struct bpf_offloaded_map *offmap;
- struct nfp_bpf_map *nfp_map;
bool load_lm_ptr;
u32 ret_tgt;
s64 lm_off;
- swreg tid;
-
- offmap = (struct bpf_offloaded_map *)meta->arg1.map_ptr;
- nfp_map = offmap->dev_priv;
/* We only have to reload LM0 if the key is not at start of stack */
lm_off = nfp_prog->stack_depth;
@@ -1416,17 +1410,12 @@ map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
if (meta->func_id == BPF_FUNC_map_update_elem)
emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2);
- /* Load map ID into a register, it should actually fit as an immediate
- * but in case it doesn't deal with it here, not in the delay slots.
- */
- tid = ur_load_imm_any(nfp_prog, nfp_map->tid, imm_a(nfp_prog));
-
emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
2, RELO_BR_HELPER);
ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
/* Load map ID into A0 */
- wrp_mov(nfp_prog, reg_a(0), tid);
+ wrp_mov(nfp_prog, reg_a(0), reg_a(2));
/* Load the return address into B0 */
wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
@@ -1456,6 +1445,31 @@ nfp_get_prandom_u32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
return 0;
}
+static int
+nfp_perf_event_output(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ swreg ptr_type;
+ u32 ret_tgt;
+
+ ptr_type = ur_load_imm_any(nfp_prog, meta->arg1.type, imm_a(nfp_prog));
+
+ ret_tgt = nfp_prog_current_offset(nfp_prog) + 3;
+
+ emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
+ 2, RELO_BR_HELPER);
+
+ /* Load ptr type into A1 */
+ wrp_mov(nfp_prog, reg_a(1), ptr_type);
+
+ /* Load the return address into B0 */
+ wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
+
+ if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
+ return -EINVAL;
+
+ return 0;
+}
+
/* --- Callbacks --- */
static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
@@ -2411,6 +2425,8 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
return map_call_stack_common(nfp_prog, meta);
case BPF_FUNC_get_prandom_u32:
return nfp_get_prandom_u32(nfp_prog, meta);
+ case BPF_FUNC_perf_event_output:
+ return nfp_perf_event_output(nfp_prog, meta);
default:
WARN_ONCE(1, "verifier allowed unsupported function\n");
return -EOPNOTSUPP;
@@ -3227,6 +3243,33 @@ static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
return 0;
}
+static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog)
+{
+ struct nfp_insn_meta *meta1, *meta2;
+ struct nfp_bpf_map *nfp_map;
+ struct bpf_map *map;
+
+ nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
+ if (meta1->skip || meta2->skip)
+ continue;
+
+ if (meta1->insn.code != (BPF_LD | BPF_IMM | BPF_DW) ||
+ meta1->insn.src_reg != BPF_PSEUDO_MAP_FD)
+ continue;
+
+ map = (void *)(unsigned long)((u32)meta1->insn.imm |
+ (u64)meta2->insn.imm << 32);
+ if (bpf_map_offload_neutral(map))
+ continue;
+ nfp_map = map_to_offmap(map)->dev_priv;
+
+ meta1->insn.imm = nfp_map->tid;
+ meta2->insn.imm = 0;
+ }
+
+ return 0;
+}
+
static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len)
{
__le64 *ustore = (__force __le64 *)prog;
@@ -3263,6 +3306,10 @@ int nfp_bpf_jit(struct nfp_prog *nfp_prog)
{
int ret;
+ ret = nfp_bpf_replace_map_ptrs(nfp_prog);
+ if (ret)
+ return ret;
+
ret = nfp_bpf_optimize(nfp_prog);
if (ret)
return ret;
@@ -3353,6 +3400,9 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
case BPF_FUNC_map_delete_elem:
val = nfp_prog->bpf->helpers.map_delete;
break;
+ case BPF_FUNC_perf_event_output:
+ val = nfp_prog->bpf->helpers.perf_event_output;
+ break;
default:
pr_err("relocation of unknown helper %d\n",
val);
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index 1dc424685f4e..d72f9e7f42da 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
@@ -43,6 +43,14 @@
#include "fw.h"
#include "main.h"
+const struct rhashtable_params nfp_bpf_maps_neutral_params = {
+ .nelem_hint = 4,
+ .key_len = FIELD_SIZEOF(struct nfp_bpf_neutral_map, ptr),
+ .key_offset = offsetof(struct nfp_bpf_neutral_map, ptr),
+ .head_offset = offsetof(struct nfp_bpf_neutral_map, l),
+ .automatic_shrinking = true,
+};
+
static bool nfp_net_ebpf_capable(struct nfp_net *nn)
{
#ifdef __LITTLE_ENDIAN
@@ -290,6 +298,9 @@ nfp_bpf_parse_cap_func(struct nfp_app_bpf *bpf, void __iomem *value, u32 length)
case BPF_FUNC_map_delete_elem:
bpf->helpers.map_delete = readl(&cap->func_addr);
break;
+ case BPF_FUNC_perf_event_output:
+ bpf->helpers.perf_event_output = readl(&cap->func_addr);
+ break;
}
return 0;
@@ -401,17 +412,28 @@ static int nfp_bpf_init(struct nfp_app *app)
init_waitqueue_head(&bpf->cmsg_wq);
INIT_LIST_HEAD(&bpf->map_list);
- err = nfp_bpf_parse_capabilities(app);
+ err = rhashtable_init(&bpf->maps_neutral, &nfp_bpf_maps_neutral_params);
if (err)
goto err_free_bpf;
+ err = nfp_bpf_parse_capabilities(app);
+ if (err)
+ goto err_free_neutral_maps;
+
return 0;
+err_free_neutral_maps:
+ rhashtable_destroy(&bpf->maps_neutral);
err_free_bpf:
kfree(bpf);
return err;
}
+static void nfp_check_rhashtable_empty(void *ptr, void *arg)
+{
+ WARN_ON_ONCE(1);
+}
+
static void nfp_bpf_clean(struct nfp_app *app)
{
struct nfp_app_bpf *bpf = app->priv;
@@ -419,6 +441,8 @@ static void nfp_bpf_clean(struct nfp_app *app)
WARN_ON(!skb_queue_empty(&bpf->cmsg_replies));
WARN_ON(!list_empty(&bpf->map_list));
WARN_ON(bpf->maps_in_use || bpf->map_elems_in_use);
+ rhashtable_free_and_destroy(&bpf->maps_neutral,
+ nfp_check_rhashtable_empty, NULL);
kfree(bpf);
}
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 68b5d326483d..82682378d57f 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2016-2017 Netronome Systems, Inc.
+ * Copyright (C) 2016-2018 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
@@ -39,6 +39,7 @@
#include <linux/bpf_verifier.h>
#include <linux/kernel.h>
#include <linux/list.h>
+#include <linux/rhashtable.h>
#include <linux/skbuff.h>
#include <linux/types.h>
#include <linux/wait.h>
@@ -114,6 +115,8 @@ enum pkt_vec {
* @maps_in_use: number of currently offloaded maps
* @map_elems_in_use: number of elements allocated to offloaded maps
*
+ * @maps_neutral: hash table of offload-neutral maps (on pointer)
+ *
* @adjust_head: adjust head capability
* @adjust_head.flags: extra flags for adjust head
* @adjust_head.off_min: minimal packet offset within buffer required
@@ -133,6 +136,7 @@ enum pkt_vec {
* @helpers.map_lookup: map lookup helper address
* @helpers.map_update: map update helper address
* @helpers.map_delete: map delete helper address
+ * @helpers.perf_event_output: output perf event to a ring buffer
*
* @pseudo_random: FW initialized the pseudo-random machinery (CSRs)
*/
@@ -150,6 +154,8 @@ struct nfp_app_bpf {
unsigned int maps_in_use;
unsigned int map_elems_in_use;
+ struct rhashtable maps_neutral;
+
struct nfp_bpf_cap_adjust_head {
u32 flags;
int off_min;
@@ -171,6 +177,7 @@ struct nfp_app_bpf {
u32 map_lookup;
u32 map_update;
u32 map_delete;
+ u32 perf_event_output;
} helpers;
bool pseudo_random;
@@ -199,6 +206,14 @@ struct nfp_bpf_map {
enum nfp_bpf_map_use use_map[];
};
+struct nfp_bpf_neutral_map {
+ struct rhash_head l;
+ struct bpf_map *ptr;
+ u32 count;
+};
+
+extern const struct rhashtable_params nfp_bpf_maps_neutral_params;
+
struct nfp_prog;
struct nfp_insn_meta;
typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
@@ -367,6 +382,8 @@ static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta)
* @error: error code if something went wrong
* @stack_depth: max stack depth from the verifier
* @adjust_head_location: if program has single adjust head call - the insn no.
+ * @map_records_cnt: the number of map pointers recorded for this prog
+ * @map_records: the map record pointers from bpf->maps_neutral
* @insns: list of BPF instruction wrappers (struct nfp_insn_meta)
*/
struct nfp_prog {
@@ -390,6 +407,9 @@ struct nfp_prog {
unsigned int stack_depth;
unsigned int adjust_head_location;
+ unsigned int map_records_cnt;
+ struct nfp_bpf_neutral_map **map_records;
+
struct list_head insns;
};
@@ -440,5 +460,7 @@ int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap,
int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap,
void *key, void *next_key);
+int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb);
+
void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb);
#endif
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 42d98792bd25..4db0ac1e42a8 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2016-2017 Netronome Systems, Inc.
+ * Copyright (C) 2016-2018 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
@@ -57,6 +57,126 @@
#include "../nfp_net.h"
static int
+nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
+ struct bpf_map *map)
+{
+ struct nfp_bpf_neutral_map *record;
+ int err;
+
+ /* Map record paths are entered via ndo, update side is protected. */
+ ASSERT_RTNL();
+
+ /* Reuse path - other offloaded program is already tracking this map. */
+ record = rhashtable_lookup_fast(&bpf->maps_neutral, &map,
+ nfp_bpf_maps_neutral_params);
+ if (record) {
+ nfp_prog->map_records[nfp_prog->map_records_cnt++] = record;
+ record->count++;
+ return 0;
+ }
+
+ /* Grab a single ref to the map for our record. The prog destroy ndo
+ * happens after free_used_maps().
+ */
+ map = bpf_map_inc(map, false);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ record = kmalloc(sizeof(*record), GFP_KERNEL);
+ if (!record) {
+ err = -ENOMEM;
+ goto err_map_put;
+ }
+
+ record->ptr = map;
+ record->count = 1;
+
+ err = rhashtable_insert_fast(&bpf->maps_neutral, &record->l,
+ nfp_bpf_maps_neutral_params);
+ if (err)
+ goto err_free_rec;
+
+ nfp_prog->map_records[nfp_prog->map_records_cnt++] = record;
+
+ return 0;
+
+err_free_rec:
+ kfree(record);
+err_map_put:
+ bpf_map_put(map);
+ return err;
+}
+
+static void
+nfp_map_ptrs_forget(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog)
+{
+ bool freed = false;
+ int i;
+
+ ASSERT_RTNL();
+
+ for (i = 0; i < nfp_prog->map_records_cnt; i++) {
+ if (--nfp_prog->map_records[i]->count) {
+ nfp_prog->map_records[i] = NULL;
+ continue;
+ }
+
+ WARN_ON(rhashtable_remove_fast(&bpf->maps_neutral,
+ &nfp_prog->map_records[i]->l,
+ nfp_bpf_maps_neutral_params));
+ freed = true;
+ }
+
+ if (freed) {
+ synchronize_rcu();
+
+ for (i = 0; i < nfp_prog->map_records_cnt; i++)
+ if (nfp_prog->map_records[i]) {
+ bpf_map_put(nfp_prog->map_records[i]->ptr);
+ kfree(nfp_prog->map_records[i]);
+ }
+ }
+
+ kfree(nfp_prog->map_records);
+ nfp_prog->map_records = NULL;
+ nfp_prog->map_records_cnt = 0;
+}
+
+static int
+nfp_map_ptrs_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
+ struct bpf_prog *prog)
+{
+ int i, cnt, err;
+
+ /* Quickly count the maps we will have to remember */
+ cnt = 0;
+ for (i = 0; i < prog->aux->used_map_cnt; i++)
+ if (bpf_map_offload_neutral(prog->aux->used_maps[i]))
+ cnt++;
+ if (!cnt)
+ return 0;
+
+ nfp_prog->map_records = kmalloc_array(cnt,
+ sizeof(nfp_prog->map_records[0]),
+ GFP_KERNEL);
+ if (!nfp_prog->map_records)
+ return -ENOMEM;
+
+ for (i = 0; i < prog->aux->used_map_cnt; i++)
+ if (bpf_map_offload_neutral(prog->aux->used_maps[i])) {
+ err = nfp_map_ptr_record(bpf, nfp_prog,
+ prog->aux->used_maps[i]);
+ if (err) {
+ nfp_map_ptrs_forget(bpf, nfp_prog);
+ return err;
+ }
+ }
+ WARN_ON(cnt != nfp_prog->map_records_cnt);
+
+ return 0;
+}
+
+static int
nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
unsigned int cnt)
{
@@ -151,7 +271,7 @@ static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog)
prog->aux->offload->jited_len = nfp_prog->prog_len * sizeof(u64);
prog->aux->offload->jited_image = nfp_prog->prog;
- return 0;
+ return nfp_map_ptrs_record(nfp_prog->bpf, nfp_prog, prog);
}
static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog)
@@ -159,6 +279,7 @@ static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog)
struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
kvfree(nfp_prog->prog);
+ nfp_map_ptrs_forget(nfp_prog->bpf, nfp_prog);
nfp_prog_free(nfp_prog);
return 0;
@@ -320,6 +441,53 @@ int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf)
}
}
+static unsigned long
+nfp_bpf_perf_event_copy(void *dst, const void *src,
+ unsigned long off, unsigned long len)
+{
+ memcpy(dst, src + off, len);
+ return 0;
+}
+
+int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb)
+{
+ struct cmsg_bpf_event *cbe = (void *)skb->data;
+ u32 pkt_size, data_size;
+ struct bpf_map *map;
+
+ if (skb->len < sizeof(struct cmsg_bpf_event))
+ goto err_drop;
+
+ pkt_size = be32_to_cpu(cbe->pkt_size);
+ data_size = be32_to_cpu(cbe->data_size);
+ map = (void *)(unsigned long)be64_to_cpu(cbe->map_ptr);
+
+ if (skb->len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size)
+ goto err_drop;
+ if (cbe->hdr.ver != CMSG_MAP_ABI_VERSION)
+ goto err_drop;
+
+ rcu_read_lock();
+ if (!rhashtable_lookup_fast(&bpf->maps_neutral, &map,
+ nfp_bpf_maps_neutral_params)) {
+ rcu_read_unlock();
+ pr_warn("perf event: dest map pointer %px not recognized, dropping event\n",
+ map);
+ goto err_drop;
+ }
+
+ bpf_event_output(map, be32_to_cpu(cbe->cpu_id),
+ &cbe->data[round_up(pkt_size, 4)], data_size,
+ cbe->data, pkt_size, nfp_bpf_perf_event_copy);
+ rcu_read_unlock();
+
+ dev_consume_skb_any(skb);
+ return 0;
+err_drop:
+ dev_kfree_skb_any(skb);
+ return -EINVAL;
+}
+
static int
nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 06ad53ce4ad9..e163f3cfa47d 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2016-2017 Netronome Systems, Inc.
+ * Copyright (C) 2016-2018 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
@@ -36,6 +36,8 @@
#include <linux/kernel.h>
#include <linux/pkt_cls.h>
+#include "../nfp_app.h"
+#include "../nfp_main.h"
#include "fw.h"
#include "main.h"
@@ -149,15 +151,6 @@ nfp_bpf_map_call_ok(const char *fname, struct bpf_verifier_env *env,
return false;
}
- /* Rest of the checks is only if we re-parse the same insn */
- if (!meta->func_id)
- return true;
-
- if (meta->arg1.map_ptr != reg1->map_ptr) {
- pr_vlog(env, "%s: called for different map\n", fname);
- return false;
- }
-
return true;
}
@@ -216,6 +209,71 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
pr_vlog(env, "bpf_get_prandom_u32(): FW doesn't support random number generation\n");
return -EOPNOTSUPP;
+ case BPF_FUNC_perf_event_output:
+ BUILD_BUG_ON(NFP_BPF_SCALAR_VALUE != SCALAR_VALUE ||
+ NFP_BPF_MAP_VALUE != PTR_TO_MAP_VALUE ||
+ NFP_BPF_STACK != PTR_TO_STACK ||
+ NFP_BPF_PACKET_DATA != PTR_TO_PACKET);
+
+ if (!bpf->helpers.perf_event_output) {
+ pr_vlog(env, "event_output: not supported by FW\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* Force current CPU to make sure we can report the event
+ * wherever we get the control message from FW.
+ */
+ if (reg3->var_off.mask & BPF_F_INDEX_MASK ||
+ (reg3->var_off.value & BPF_F_INDEX_MASK) !=
+ BPF_F_CURRENT_CPU) {
+ char tn_buf[48];
+
+ tnum_strn(tn_buf, sizeof(tn_buf), reg3->var_off);
+ pr_vlog(env, "event_output: must use BPF_F_CURRENT_CPU, var_off: %s\n",
+ tn_buf);
+ return -EOPNOTSUPP;
+ }
+
+ /* Save space in meta, we don't care about arguments other
+ * than 4th meta, shove it into arg1.
+ */
+ reg1 = cur_regs(env) + BPF_REG_4;
+
+ if (reg1->type != SCALAR_VALUE /* NULL ptr */ &&
+ reg1->type != PTR_TO_STACK &&
+ reg1->type != PTR_TO_MAP_VALUE &&
+ reg1->type != PTR_TO_PACKET) {
+ pr_vlog(env, "event_output: unsupported ptr type: %d\n",
+ reg1->type);
+ return -EOPNOTSUPP;
+ }
+
+ if (reg1->type == PTR_TO_STACK &&
+ !nfp_bpf_stack_arg_ok("event_output", env, reg1, NULL))
+ return -EOPNOTSUPP;
+
+ /* Warn user that on offload NFP may return success even if map
+ * is not going to accept the event, since the event output is
+ * fully async and device won't know the state of the map.
+ * There is also FW limitation on the event length.
+ *
+ * Lost events will not show up on the perf ring, driver
+ * won't see them at all. Events may also get reordered.
+ */
+ dev_warn_once(&nfp_prog->bpf->app->pf->pdev->dev,
+ "bpf: note: return codes and behavior of bpf_event_output() helper differs for offloaded programs!\n");
+ pr_vlog(env, "warning: return codes and behavior of event_output helper differ for offload!\n");
+
+ if (!meta->func_id)
+ break;
+
+ if (reg1->type != meta->arg1.type) {
+ pr_vlog(env, "event_output: ptr type changed: %d %d\n",
+ meta->arg1.type, reg1->type);
+ return -EINVAL;
+ }
+ break;
+
default:
pr_vlog(env, "unsupported function id: %d\n", func_id);
return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c
index 6aedef0ad433..0e0253c7e17b 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0e00a13ff01b..321969da67b7 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -110,6 +110,11 @@ static inline struct bpf_offloaded_map *map_to_offmap(struct bpf_map *map)
return container_of(map, struct bpf_offloaded_map, map);
}
+static inline bool bpf_map_offload_neutral(const struct bpf_map *map)
+{
+ return map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
+}
+
static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
{
return map->ops->map_seq_show_elem && map->ops->map_check_btf;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 1127552c8033..d0d7d9462368 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1719,6 +1719,7 @@ bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
{
return -ENOTSUPP;
}
+EXPORT_SYMBOL_GPL(bpf_event_output);
/* Always built-in helper functions. */
const struct bpf_func_proto bpf_tail_call_proto = {
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index c9401075b58c..ac747d5cf7c6 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
*
* This software is licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
@@ -474,8 +474,10 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map)
struct bpf_prog_offload *offload;
bool ret;
- if (!bpf_prog_is_dev_bound(prog->aux) || !bpf_map_is_dev_bound(map))
+ if (!bpf_prog_is_dev_bound(prog->aux))
return false;
+ if (!bpf_map_is_dev_bound(map))
+ return bpf_map_offload_neutral(map);
down_read(&bpf_devs_lock);
offload = prog->aux->offload;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 263e13ede029..9b87198deea2 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -282,6 +282,7 @@ void bpf_map_put(struct bpf_map *map)
{
__bpf_map_put(map, true);
}
+EXPORT_SYMBOL_GPL(bpf_map_put);
void bpf_map_put_with_uref(struct bpf_map *map)
{
@@ -543,6 +544,7 @@ struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
atomic_inc(&map->usercnt);
return map;
}
+EXPORT_SYMBOL_GPL(bpf_map_inc);
struct bpf_map *bpf_map_get_with_uref(u32 ufd)
{
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 37e0affa515e..d5e1a6c4165d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5055,7 +5055,7 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
/* hold the map. If the program is rejected by verifier,
* the map will be released by release_maps() or it
* will be used by the valid program until it's unloaded
- * and all maps are released in free_bpf_prog_info()
+ * and all maps are released in free_used_maps()
*/
map = bpf_map_inc(map, false);
if (IS_ERR(map)) {
@@ -5741,16 +5741,16 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
env->strict_alignment = true;
+ ret = replace_map_fd_with_map_ptr(env);
+ if (ret < 0)
+ goto skip_full_check;
+
if (bpf_prog_is_dev_bound(env->prog->aux)) {
ret = bpf_prog_offload_verifier_prep(env);
if (ret)
- goto err_unlock;
+ goto skip_full_check;
}
- ret = replace_map_fd_with_map_ptr(env);
- if (ret < 0)
- goto skip_full_check;
-
env->explored_states = kcalloc(env->prog->len,
sizeof(struct bpf_verifier_state_list *),
GFP_USER);
@@ -5821,7 +5821,7 @@ skip_full_check:
err_release_maps:
if (!env->prog->aux->used_maps)
/* if we didn't copy map pointers into bpf_prog_info, release
- * them now. Otherwise free_bpf_prog_info() will release them.
+ * them now. Otherwise free_used_maps() will release them.
*/
release_maps(env);
*prog = env->prog;
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 5f512b14bff9..a6258bc8ec4f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -22,17 +22,19 @@ MAP COMMANDS
=============
| **bpftool** **map { show | list }** [*MAP*]
-| **bpftool** **map dump** *MAP*
-| **bpftool** **map update** *MAP* **key** [**hex**] *BYTES* **value** [**hex**] *VALUE* [*UPDATE_FLAGS*]
-| **bpftool** **map lookup** *MAP* **key** [**hex**] *BYTES*
-| **bpftool** **map getnext** *MAP* [**key** [**hex**] *BYTES*]
-| **bpftool** **map delete** *MAP* **key** [**hex**] *BYTES*
-| **bpftool** **map pin** *MAP* *FILE*
+| **bpftool** **map dump** *MAP*
+| **bpftool** **map update** *MAP* **key** *DATA* **value** *VALUE* [*UPDATE_FLAGS*]
+| **bpftool** **map lookup** *MAP* **key** *DATA*
+| **bpftool** **map getnext** *MAP* [**key** *DATA*]
+| **bpftool** **map delete** *MAP* **key** *DATA*
+| **bpftool** **map pin** *MAP* *FILE*
+| **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*]
| **bpftool** **map help**
|
| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
+| *DATA* := { [**hex**] *BYTES* }
| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
-| *VALUE* := { *BYTES* | *MAP* | *PROG* }
+| *VALUE* := { *DATA* | *MAP* | *PROG* }
| *UPDATE_FLAGS* := { **any** | **exist** | **noexist** }
DESCRIPTION
@@ -48,7 +50,7 @@ DESCRIPTION
**bpftool map dump** *MAP*
Dump all entries in a given *MAP*.
- **bpftool map update** *MAP* **key** [**hex**] *BYTES* **value** [**hex**] *VALUE* [*UPDATE_FLAGS*]
+ **bpftool map update** *MAP* **key** *DATA* **value** *VALUE* [*UPDATE_FLAGS*]
Update map entry for a given *KEY*.
*UPDATE_FLAGS* can be one of: **any** update existing entry
@@ -61,13 +63,13 @@ DESCRIPTION
the bytes are parsed as decimal values, unless a "0x" prefix
(for hexadecimal) or a "0" prefix (for octal) is provided.
- **bpftool map lookup** *MAP* **key** [**hex**] *BYTES*
+ **bpftool map lookup** *MAP* **key** *DATA*
Lookup **key** in the map.
- **bpftool map getnext** *MAP* [**key** [**hex**] *BYTES*]
+ **bpftool map getnext** *MAP* [**key** *DATA*]
Get next key. If *key* is not specified, get first key.
- **bpftool map delete** *MAP* **key** [**hex**] *BYTES*
+ **bpftool map delete** *MAP* **key** *DATA*
Remove entry from the map.
**bpftool map pin** *MAP* *FILE*
@@ -75,6 +77,22 @@ DESCRIPTION
Note: *FILE* must be located in *bpffs* mount.
+ **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*]
+ Read events from a BPF_MAP_TYPE_PERF_EVENT_ARRAY map.
+
+ Install perf rings into a perf event array map and dump
+ output of any bpf_perf_event_output() call in the kernel.
+ By default read the number of CPUs on the system and
+ install perf ring for each CPU in the corresponding index
+ in the array.
+
+ If **cpu** and **index** are specified, install perf ring
+ for given **cpu** at **index** in the array (single ring).
+
+ Note that installing a perf ring into an array will silently
+ replace any existing ring. Any other application will stop
+ receiving events if it installed its rings earlier.
+
**bpftool map help**
Print short help message.
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 20689a321ffe..564cb0d9692b 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -23,7 +23,7 @@ SYNOPSIS
*MAP-COMMANDS* :=
{ **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete**
- | **pin** | **help** }
+ | **pin** | **event_pipe** | **help** }
*PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin**
| **load** | **help** }
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 4e69782c4a79..892dbf095bff 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -39,7 +39,12 @@ CC = gcc
CFLAGS += -O2
CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -Wno-missing-field-initializers
-CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/
+CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \
+ -I$(srctree)/kernel/bpf/ \
+ -I$(srctree)/tools/include \
+ -I$(srctree)/tools/include/uapi \
+ -I$(srctree)/tools/lib/bpf \
+ -I$(srctree)/tools/perf
CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"'
LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 852d84a98acd..b301c9b315f1 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -1,6 +1,6 @@
# bpftool(8) bash completion -*- shell-script -*-
#
-# Copyright (C) 2017 Netronome Systems, Inc.
+# Copyright (C) 2017-2018 Netronome Systems, Inc.
#
# This software is dual licensed under the GNU General License
# Version 2, June 1991 as shown in the file COPYING in the top-level
@@ -79,6 +79,14 @@ _bpftool_get_map_ids()
command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
}
+_bpftool_get_perf_map_ids()
+{
+ COMPREPLY+=( $( compgen -W "$( bpftool -jp map 2>&1 | \
+ command grep -C2 perf_event_array | \
+ command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
+}
+
+
_bpftool_get_prog_ids()
{
COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \
@@ -359,10 +367,34 @@ _bpftool()
fi
return 0
;;
+ event_pipe)
+ case $prev in
+ $command)
+ COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
+ return 0
+ ;;
+ id)
+ _bpftool_get_perf_map_ids
+ return 0
+ ;;
+ cpu)
+ return 0
+ ;;
+ index)
+ return 0
+ ;;
+ *)
+ _bpftool_once_attr 'cpu'
+ _bpftool_once_attr 'index'
+ return 0
+ ;;
+ esac
+ ;;
*)
[[ $prev == $object ]] && \
COMPREPLY=( $( compgen -W 'delete dump getnext help \
- lookup pin show list update' -- "$cur" ) )
+ lookup pin event_pipe show list update' -- \
+ "$cur" ) )
;;
esac
;;
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 465995281dcd..32f9e397a6c0 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
@@ -33,6 +33,7 @@
/* Author: Jakub Kicinski <kubakici@wp.pl> */
+#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <fts.h>
@@ -330,6 +331,16 @@ char *get_fdinfo(int fd, const char *key)
return NULL;
}
+void print_data_json(uint8_t *data, size_t len)
+{
+ unsigned int i;
+
+ jsonw_start_array(json_wtr);
+ for (i = 0; i < len; i++)
+ jsonw_printf(json_wtr, "%d", data[i]);
+ jsonw_end_array(json_wtr);
+}
+
void print_hex_data_json(uint8_t *data, size_t len)
{
unsigned int i;
@@ -420,6 +431,70 @@ void delete_pinned_obj_table(struct pinned_obj_table *tab)
}
}
+unsigned int get_page_size(void)
+{
+ static int result;
+
+ if (!result)
+ result = getpagesize();
+ return result;
+}
+
+unsigned int get_possible_cpus(void)
+{
+ static unsigned int result;
+ char buf[128];
+ long int n;
+ char *ptr;
+ int fd;
+
+ if (result)
+ return result;
+
+ fd = open("/sys/devices/system/cpu/possible", O_RDONLY);
+ if (fd < 0) {
+ p_err("can't open sysfs possible cpus");
+ exit(-1);
+ }
+
+ n = read(fd, buf, sizeof(buf));
+ if (n < 2) {
+ p_err("can't read sysfs possible cpus");
+ exit(-1);
+ }
+ close(fd);
+
+ if (n == sizeof(buf)) {
+ p_err("read sysfs possible cpus overflow");
+ exit(-1);
+ }
+
+ ptr = buf;
+ n = 0;
+ while (*ptr && *ptr != '\n') {
+ unsigned int a, b;
+
+ if (sscanf(ptr, "%u-%u", &a, &b) == 2) {
+ n += b - a + 1;
+
+ ptr = strchr(ptr, '-') + 1;
+ } else if (sscanf(ptr, "%u", &a) == 1) {
+ n++;
+ } else {
+ assert(0);
+ }
+
+ while (isdigit(*ptr))
+ ptr++;
+ if (*ptr == ',')
+ ptr++;
+ }
+
+ result = n;
+
+ return result;
+}
+
static char *
ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf)
{
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index b8e9584d6246..6173cd997e7a 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
@@ -117,14 +117,19 @@ int do_pin_fd(int fd, const char *name);
int do_prog(int argc, char **arg);
int do_map(int argc, char **arg);
+int do_event_pipe(int argc, char **argv);
int do_cgroup(int argc, char **arg);
int prog_parse_fd(int *argc, char ***argv);
+int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
const char *arch);
+void print_data_json(uint8_t *data, size_t len);
void print_hex_data_json(uint8_t *data, size_t len);
+unsigned int get_page_size(void);
+unsigned int get_possible_cpus(void);
const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino);
#endif
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index a6cdb640a0d7..af6766e956ba 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2017 Netronome Systems, Inc.
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
@@ -34,7 +34,6 @@
/* Author: Jakub Kicinski <kubakici@wp.pl> */
#include <assert.h>
-#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <stdbool.h>
@@ -69,61 +68,6 @@ static const char * const map_type_name[] = {
[BPF_MAP_TYPE_CPUMAP] = "cpumap",
};
-static unsigned int get_possible_cpus(void)
-{
- static unsigned int result;
- char buf[128];
- long int n;
- char *ptr;
- int fd;
-
- if (result)
- return result;
-
- fd = open("/sys/devices/system/cpu/possible", O_RDONLY);
- if (fd < 0) {
- p_err("can't open sysfs possible cpus");
- exit(-1);
- }
-
- n = read(fd, buf, sizeof(buf));
- if (n < 2) {
- p_err("can't read sysfs possible cpus");
- exit(-1);
- }
- close(fd);
-
- if (n == sizeof(buf)) {
- p_err("read sysfs possible cpus overflow");
- exit(-1);
- }
-
- ptr = buf;
- n = 0;
- while (*ptr && *ptr != '\n') {
- unsigned int a, b;
-
- if (sscanf(ptr, "%u-%u", &a, &b) == 2) {
- n += b - a + 1;
-
- ptr = strchr(ptr, '-') + 1;
- } else if (sscanf(ptr, "%u", &a) == 1) {
- n++;
- } else {
- assert(0);
- }
-
- while (isdigit(*ptr))
- ptr++;
- if (*ptr == ',')
- ptr++;
- }
-
- result = n;
-
- return result;
-}
-
static bool map_is_per_cpu(__u32 type)
{
return type == BPF_MAP_TYPE_PERCPU_HASH ||
@@ -186,8 +130,7 @@ static int map_parse_fd(int *argc, char ***argv)
return -1;
}
-static int
-map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
+int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
{
int err;
int fd;
@@ -873,23 +816,25 @@ static int do_help(int argc, char **argv)
fprintf(stderr,
"Usage: %s %s { show | list } [MAP]\n"
- " %s %s dump MAP\n"
- " %s %s update MAP key [hex] BYTES value [hex] VALUE [UPDATE_FLAGS]\n"
- " %s %s lookup MAP key [hex] BYTES\n"
- " %s %s getnext MAP [key [hex] BYTES]\n"
- " %s %s delete MAP key [hex] BYTES\n"
- " %s %s pin MAP FILE\n"
+ " %s %s dump MAP\n"
+ " %s %s update MAP key DATA value VALUE [UPDATE_FLAGS]\n"
+ " %s %s lookup MAP key DATA\n"
+ " %s %s getnext MAP [key DATA]\n"
+ " %s %s delete MAP key DATA\n"
+ " %s %s pin MAP FILE\n"
+ " %s %s event_pipe MAP [cpu N index M]\n"
" %s %s help\n"
"\n"
" MAP := { id MAP_ID | pinned FILE }\n"
+ " DATA := { [hex] BYTES }\n"
" " HELP_SPEC_PROGRAM "\n"
- " VALUE := { BYTES | MAP | PROG }\n"
+ " VALUE := { DATA | MAP | PROG }\n"
" UPDATE_FLAGS := { any | exist | noexist }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2]);
+ bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]);
return 0;
}
@@ -904,6 +849,7 @@ static const struct cmd cmds[] = {
{ "getnext", do_getnext },
{ "delete", do_delete },
{ "pin", do_pin },
+ { "event_pipe", do_event_pipe },
{ 0 }
};
diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c
new file mode 100644
index 000000000000..c5a2ced8552d
--- /dev/null
+++ b/tools/bpf/bpftool/map_perf_ring.c
@@ -0,0 +1,347 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2018 Netronome Systems, Inc. */
+/* This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <errno.h>
+#include <fcntl.h>
+#include <libbpf.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <linux/bpf.h>
+#include <linux/perf_event.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+
+#include <bpf.h>
+#include <perf-sys.h>
+
+#include "main.h"
+
+#define MMAP_PAGE_CNT 16
+
+static bool stop;
+
+struct event_ring_info {
+ int fd;
+ int key;
+ unsigned int cpu;
+ void *mem;
+};
+
+struct perf_event_sample {
+ struct perf_event_header header;
+ __u32 size;
+ unsigned char data[];
+};
+
+static void int_exit(int signo)
+{
+ fprintf(stderr, "Stopping...\n");
+ stop = true;
+}
+
+static void
+print_bpf_output(struct event_ring_info *ring, struct perf_event_sample *e)
+{
+ struct {
+ struct perf_event_header header;
+ __u64 id;
+ __u64 lost;
+ } *lost = (void *)e;
+ struct timespec ts;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &ts)) {
+ perror("Can't read clock for timestamp");
+ return;
+ }
+
+ if (json_output) {
+ jsonw_start_object(json_wtr);
+ jsonw_name(json_wtr, "timestamp");
+ jsonw_uint(json_wtr, ts.tv_sec * 1000000000ull + ts.tv_nsec);
+ jsonw_name(json_wtr, "type");
+ jsonw_uint(json_wtr, e->header.type);
+ jsonw_name(json_wtr, "cpu");
+ jsonw_uint(json_wtr, ring->cpu);
+ jsonw_name(json_wtr, "index");
+ jsonw_uint(json_wtr, ring->key);
+ if (e->header.type == PERF_RECORD_SAMPLE) {
+ jsonw_name(json_wtr, "data");
+ print_data_json(e->data, e->size);
+ } else if (e->header.type == PERF_RECORD_LOST) {
+ jsonw_name(json_wtr, "lost");
+ jsonw_start_object(json_wtr);
+ jsonw_name(json_wtr, "id");
+ jsonw_uint(json_wtr, lost->id);
+ jsonw_name(json_wtr, "count");
+ jsonw_uint(json_wtr, lost->lost);
+ jsonw_end_object(json_wtr);
+ }
+ jsonw_end_object(json_wtr);
+ } else {
+ if (e->header.type == PERF_RECORD_SAMPLE) {
+ printf("== @%ld.%ld CPU: %d index: %d =====\n",
+ (long)ts.tv_sec, ts.tv_nsec,
+ ring->cpu, ring->key);
+ fprint_hex(stdout, e->data, e->size, " ");
+ printf("\n");
+ } else if (e->header.type == PERF_RECORD_LOST) {
+ printf("lost %lld events\n", lost->lost);
+ } else {
+ printf("unknown event type=%d size=%d\n",
+ e->header.type, e->header.size);
+ }
+ }
+}
+
+static void
+perf_event_read(struct event_ring_info *ring, void **buf, size_t *buf_len)
+{
+ volatile struct perf_event_mmap_page *header = ring->mem;
+ __u64 buffer_size = MMAP_PAGE_CNT * get_page_size();
+ __u64 data_tail = header->data_tail;
+ __u64 data_head = header->data_head;
+ void *base, *begin, *end;
+
+ asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
+ if (data_head == data_tail)
+ return;
+
+ base = ((char *)header) + get_page_size();
+
+ begin = base + data_tail % buffer_size;
+ end = base + data_head % buffer_size;
+
+ while (begin != end) {
+ struct perf_event_sample *e;
+
+ e = begin;
+ if (begin + e->header.size > base + buffer_size) {
+ long len = base + buffer_size - begin;
+
+ if (*buf_len < e->header.size) {
+ free(*buf);
+ *buf = malloc(e->header.size);
+ if (!*buf) {
+ fprintf(stderr,
+ "can't allocate memory");
+ stop = true;
+ return;
+ }
+ *buf_len = e->header.size;
+ }
+
+ memcpy(*buf, begin, len);
+ memcpy(*buf + len, base, e->header.size - len);
+ e = (void *)*buf;
+ begin = base + e->header.size - len;
+ } else if (begin + e->header.size == base + buffer_size) {
+ begin = base;
+ } else {
+ begin += e->header.size;
+ }
+
+ print_bpf_output(ring, e);
+ }
+
+ __sync_synchronize(); /* smp_mb() */
+ header->data_tail = data_head;
+}
+
+static int perf_mmap_size(void)
+{
+ return get_page_size() * (MMAP_PAGE_CNT + 1);
+}
+
+static void *perf_event_mmap(int fd)
+{
+ int mmap_size = perf_mmap_size();
+ void *base;
+
+ base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (base == MAP_FAILED) {
+ p_err("event mmap failed: %s\n", strerror(errno));
+ return NULL;
+ }
+
+ return base;
+}
+
+static void perf_event_unmap(void *mem)
+{
+ if (munmap(mem, perf_mmap_size()))
+ fprintf(stderr, "Can't unmap ring memory!\n");
+}
+
+static int bpf_perf_event_open(int map_fd, int key, int cpu)
+{
+ struct perf_event_attr attr = {
+ .sample_type = PERF_SAMPLE_RAW,
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_BPF_OUTPUT,
+ };
+ int pmu_fd;
+
+ pmu_fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
+ if (pmu_fd < 0) {
+ p_err("failed to open perf event %d for CPU %d", key, cpu);
+ return -1;
+ }
+
+ if (bpf_map_update_elem(map_fd, &key, &pmu_fd, BPF_ANY)) {
+ p_err("failed to update map for event %d for CPU %d", key, cpu);
+ goto err_close;
+ }
+ if (ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) {
+ p_err("failed to enable event %d for CPU %d", key, cpu);
+ goto err_close;
+ }
+
+ return pmu_fd;
+
+err_close:
+ close(pmu_fd);
+ return -1;
+}
+
+int do_event_pipe(int argc, char **argv)
+{
+ int i, nfds, map_fd, index = -1, cpu = -1;
+ struct bpf_map_info map_info = {};
+ struct event_ring_info *rings;
+ size_t tmp_buf_sz = 0;
+ void *tmp_buf = NULL;
+ struct pollfd *pfds;
+ __u32 map_info_len;
+ bool do_all = true;
+
+ map_info_len = sizeof(map_info);
+ map_fd = map_parse_fd_and_info(&argc, &argv, &map_info, &map_info_len);
+ if (map_fd < 0)
+ return -1;
+
+ if (map_info.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+ p_err("map is not a perf event array");
+ goto err_close_map;
+ }
+
+ while (argc) {
+ if (argc < 2)
+ BAD_ARG();
+
+ if (is_prefix(*argv, "cpu")) {
+ char *endptr;
+
+ NEXT_ARG();
+ cpu = strtoul(*argv, &endptr, 0);
+ if (*endptr) {
+ p_err("can't parse %s as CPU ID", **argv);
+ goto err_close_map;
+ }
+
+ NEXT_ARG();
+ } else if (is_prefix(*argv, "index")) {
+ char *endptr;
+
+ NEXT_ARG();
+ index = strtoul(*argv, &endptr, 0);
+ if (*endptr) {
+ p_err("can't parse %s as index", **argv);
+ goto err_close_map;
+ }
+
+ NEXT_ARG();
+ } else {
+ BAD_ARG();
+ }
+
+ do_all = false;
+ }
+
+ if (!do_all) {
+ if (index == -1 || cpu == -1) {
+ p_err("cpu and index must be specified together");
+ goto err_close_map;
+ }
+
+ nfds = 1;
+ } else {
+ nfds = min(get_possible_cpus(), map_info.max_entries);
+ cpu = 0;
+ index = 0;
+ }
+
+ rings = calloc(nfds, sizeof(rings[0]));
+ if (!rings)
+ goto err_close_map;
+
+ pfds = calloc(nfds, sizeof(pfds[0]));
+ if (!pfds)
+ goto err_free_rings;
+
+ for (i = 0; i < nfds; i++) {
+ rings[i].cpu = cpu + i;
+ rings[i].key = index + i;
+
+ rings[i].fd = bpf_perf_event_open(map_fd, rings[i].key,
+ rings[i].cpu);
+ if (rings[i].fd < 0)
+ goto err_close_fds_prev;
+
+ rings[i].mem = perf_event_mmap(rings[i].fd);
+ if (!rings[i].mem)
+ goto err_close_fds_current;
+
+ pfds[i].fd = rings[i].fd;
+ pfds[i].events = POLLIN;
+ }
+
+ signal(SIGINT, int_exit);
+ signal(SIGHUP, int_exit);
+ signal(SIGTERM, int_exit);
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+
+ while (!stop) {
+ poll(pfds, nfds, 200);
+ for (i = 0; i < nfds; i++)
+ perf_event_read(&rings[i], &tmp_buf, &tmp_buf_sz);
+ }
+ free(tmp_buf);
+
+ if (json_output)
+ jsonw_end_array(json_wtr);
+
+ for (i = 0; i < nfds; i++) {
+ perf_event_unmap(rings[i].mem);
+ close(rings[i].fd);
+ }
+ free(pfds);
+ free(rings);
+ close(map_fd);
+
+ return 0;
+
+err_close_fds_prev:
+ while (i--) {
+ perf_event_unmap(rings[i].mem);
+err_close_fds_current:
+ close(rings[i].fd);
+ }
+ free(pfds);
+err_free_rings:
+ free(rings);
+err_close_map:
+ close(map_fd);
+ return -1;
+}