summaryrefslogtreecommitdiff
path: root/kernel/bpf/cpumap.c
diff options
context:
space:
mode:
authorJesper Dangaard Brouer <brouer@redhat.com>2018-04-17 16:46:32 +0200
committerDavid S. Miller <davem@davemloft.net>2018-04-17 10:50:29 -0400
commit039930945a72d9af5ff04ae9b9e60658a52e0770 (patch)
treee92821099697576bcdcd41e9d61f561be0e6923c /kernel/bpf/cpumap.c
parent60bbf7eeef10dc647430646d7fe5e3d8d132dbec (diff)
xdp: transition into using xdp_frame for return API
Changing API xdp_return_frame() to take struct xdp_frame as argument, seems like a natural choice. But there are some subtle performance details here that needs extra care, which is a deliberate choice. When de-referencing xdp_frame on a remote CPU during DMA-TX completion, result in the cache-line is change to "Shared" state. Later when the page is reused for RX, then this xdp_frame cache-line is written, which change the state to "Modified". This situation already happens (naturally) for, virtio_net, tun and cpumap as the xdp_frame pointer is the queued object. In tun and cpumap, the ptr_ring is used for efficiently transferring cache-lines (with pointers) between CPUs. Thus, the only option is to de-referencing xdp_frame. It is only the ixgbe driver that had an optimization, in which it can avoid doing the de-reference of xdp_frame. The driver already have TX-ring queue, which (in case of remote DMA-TX completion) have to be transferred between CPUs anyhow. In this data area, we stored a struct xdp_mem_info and a data pointer, which allowed us to avoid de-referencing xdp_frame. To compensate for this, a prefetchw is used for telling the cache coherency protocol about our access pattern. My benchmarks show that this prefetchw is enough to compensate the ixgbe driver. V7: Adjust for commit d9314c474d4f ("i40e: add support for XDP_REDIRECT") V8: Adjust for commit bd658dda4237 ("net/mlx5e: Separate dma base address and offset in dma_sync call") Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/bpf/cpumap.c')
-rw-r--r--kernel/bpf/cpumap.c6
1 files changed, 3 insertions, 3 deletions
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index bcdc4dea5ce7..c95b04ec103e 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -219,7 +219,7 @@ static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
while ((xdpf = ptr_ring_consume(ring)))
if (WARN_ON_ONCE(xdpf))
- xdp_return_frame(xdpf->data, &xdpf->mem);
+ xdp_return_frame(xdpf);
}
static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
@@ -275,7 +275,7 @@ static int cpu_map_kthread_run(void *data)
skb = cpu_map_build_skb(rcpu, xdpf);
if (!skb) {
- xdp_return_frame(xdpf->data, &xdpf->mem);
+ xdp_return_frame(xdpf);
continue;
}
@@ -578,7 +578,7 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
err = __ptr_ring_produce(q, xdpf);
if (err) {
drops++;
- xdp_return_frame(xdpf->data, &xdpf->mem);
+ xdp_return_frame(xdpf);
}
processed++;
}