summaryrefslogtreecommitdiff
path: root/net/xdp/xsk.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/xdp/xsk.c')
-rw-r--r--net/xdp/xsk.c134
1 files changed, 67 insertions, 67 deletions
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 727aa20be4bd..72c000c0ae5f 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -25,6 +25,7 @@
#include <linux/vmalloc.h>
#include <net/xdp_sock_drv.h>
#include <net/busy_poll.h>
+#include <net/netdev_lock.h>
#include <net/netdev_rx_queue.h>
#include <net/xdp.h>
@@ -35,8 +36,6 @@
#define TX_BATCH_SIZE 32
#define MAX_PER_SOCKET_BUDGET (TX_BATCH_SIZE)
-static DEFINE_PER_CPU(struct list_head, xskmap_flush_list);
-
void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
{
if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
@@ -143,7 +142,7 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff_xsk *xskb, u32 len,
u64 addr;
int err;
- addr = xp_get_handle(xskb);
+ addr = xp_get_handle(xskb, xskb->pool);
err = xskq_prod_reserve_desc(xs->rx, addr, len, flags);
if (err) {
xs->rx_queue_full++;
@@ -173,14 +172,14 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
return 0;
xskb_list = &xskb->pool->xskb_list;
- list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
+ list_for_each_entry_safe(pos, tmp, xskb_list, list_node) {
if (list_is_singular(xskb_list))
contd = 0;
len = pos->xdp.data_end - pos->xdp.data;
err = __xsk_rcv_zc(xs, pos, len, contd);
if (err)
goto err;
- list_del(&pos->xskb_list_node);
+ list_del(&pos->list_node);
}
return 0;
@@ -313,13 +312,10 @@ static bool xsk_is_bound(struct xdp_sock *xs)
static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
- struct net_device *dev = xdp->rxq->dev;
- u32 qid = xdp->rxq->queue_index;
-
if (!xsk_is_bound(xs))
return -ENXIO;
- if (!dev->_rx[qid].pool || xs->umem != dev->_rx[qid].pool->umem)
+ if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
return -EINVAL;
if (len > xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) {
@@ -327,7 +323,6 @@ static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
return -ENOSPC;
}
- sk_mark_napi_id_once_xdp(&xs->sk, xdp);
return 0;
}
@@ -343,13 +338,14 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
u32 len = xdp_get_buff_len(xdp);
int err;
- spin_lock_bh(&xs->rx_lock);
err = xsk_rcv_check(xs, xdp, len);
if (!err) {
+ spin_lock_bh(&xs->pool->rx_lock);
err = __xsk_rcv(xs, xdp, len);
xsk_flush(xs);
+ spin_unlock_bh(&xs->pool->rx_lock);
}
- spin_unlock_bh(&xs->rx_lock);
+
return err;
}
@@ -375,22 +371,23 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
{
- struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
int err;
err = xsk_rcv(xs, xdp);
if (err)
return err;
- if (!xs->flush_node.prev)
+ if (!xs->flush_node.prev) {
+ struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list();
+
list_add(&xs->flush_node, flush_list);
+ }
return 0;
}
-void __xsk_map_flush(void)
+void __xsk_map_flush(struct list_head *flush_list)
{
- struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
struct xdp_sock *xs, *tmp;
list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
@@ -399,16 +396,6 @@ void __xsk_map_flush(void)
}
}
-#ifdef CONFIG_DEBUG_NET
-bool xsk_map_check_flush(void)
-{
- if (list_empty(this_cpu_ptr(&xskmap_flush_list)))
- return false;
- __xsk_map_flush();
- return true;
-}
-#endif
-
void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries)
{
xskq_prod_submit_n(pool->cq, nb_entries);
@@ -541,34 +528,34 @@ static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
}
-static int xsk_cq_reserve_addr_locked(struct xdp_sock *xs, u64 addr)
+static int xsk_cq_reserve_addr_locked(struct xsk_buff_pool *pool, u64 addr)
{
unsigned long flags;
int ret;
- spin_lock_irqsave(&xs->pool->cq_lock, flags);
- ret = xskq_prod_reserve_addr(xs->pool->cq, addr);
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+ spin_lock_irqsave(&pool->cq_lock, flags);
+ ret = xskq_prod_reserve_addr(pool->cq, addr);
+ spin_unlock_irqrestore(&pool->cq_lock, flags);
return ret;
}
-static void xsk_cq_submit_locked(struct xdp_sock *xs, u32 n)
+static void xsk_cq_submit_locked(struct xsk_buff_pool *pool, u32 n)
{
unsigned long flags;
- spin_lock_irqsave(&xs->pool->cq_lock, flags);
- xskq_prod_submit_n(xs->pool->cq, n);
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+ spin_lock_irqsave(&pool->cq_lock, flags);
+ xskq_prod_submit_n(pool->cq, n);
+ spin_unlock_irqrestore(&pool->cq_lock, flags);
}
-static void xsk_cq_cancel_locked(struct xdp_sock *xs, u32 n)
+static void xsk_cq_cancel_locked(struct xsk_buff_pool *pool, u32 n)
{
unsigned long flags;
- spin_lock_irqsave(&xs->pool->cq_lock, flags);
- xskq_prod_cancel_n(xs->pool->cq, n);
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+ spin_lock_irqsave(&pool->cq_lock, flags);
+ xskq_prod_cancel_n(pool->cq, n);
+ spin_unlock_irqrestore(&pool->cq_lock, flags);
}
static u32 xsk_get_num_desc(struct sk_buff *skb)
@@ -585,7 +572,7 @@ static void xsk_destruct_skb(struct sk_buff *skb)
*compl->tx_timestamp = ktime_get_tai_fast_ns();
}
- xsk_cq_submit_locked(xdp_sk(skb->sk), xsk_get_num_desc(skb));
+ xsk_cq_submit_locked(xdp_sk(skb->sk)->pool, xsk_get_num_desc(skb));
sock_wfree(skb);
}
@@ -601,7 +588,7 @@ static void xsk_consume_skb(struct sk_buff *skb)
struct xdp_sock *xs = xdp_sk(skb->sk);
skb->destructor = sock_wfree;
- xsk_cq_cancel_locked(xs, xsk_get_num_desc(skb));
+ xsk_cq_cancel_locked(xs->pool, xsk_get_num_desc(skb));
/* Free skb without triggering the perf drop trace */
consume_skb(skb);
xs->skb = NULL;
@@ -689,6 +676,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
len = desc->len;
if (!skb) {
+ first_frag = true;
+
hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
tr = dev->needed_tailroom;
skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err);
@@ -699,12 +688,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
skb_put(skb, len);
err = skb_store_bits(skb, 0, buffer, len);
- if (unlikely(err)) {
- kfree_skb(skb);
+ if (unlikely(err))
goto free_err;
- }
-
- first_frag = true;
} else {
int nr_frags = skb_shinfo(skb)->nr_frags;
struct page *page;
@@ -759,6 +744,9 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
goto free_err;
}
}
+
+ if (meta->flags & XDP_TXMD_FLAGS_LAUNCH_TIME)
+ skb->skb_mstamp_ns = meta->request.launch_time;
}
}
@@ -772,6 +760,9 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
return skb;
free_err:
+ if (first_frag && skb)
+ kfree_skb(skb);
+
if (err == -EOVERFLOW) {
/* Drop the packet */
xsk_set_destructor_arg(xs->skb);
@@ -779,7 +770,7 @@ free_err:
xskq_cons_release(xs->tx);
} else {
/* Let application retry */
- xsk_cq_cancel_locked(xs, 1);
+ xsk_cq_cancel_locked(xs->pool, 1);
}
return ERR_PTR(err);
@@ -816,8 +807,11 @@ static int __xsk_generic_xmit(struct sock *sk)
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
*/
- if (xsk_cq_reserve_addr_locked(xs, desc.addr))
+ err = xsk_cq_reserve_addr_locked(xs->pool, desc.addr);
+ if (err) {
+ err = -EAGAIN;
goto out;
+ }
skb = xsk_build_skb(xs, &desc);
if (IS_ERR(skb)) {
@@ -889,7 +883,7 @@ static bool xsk_no_wakeup(struct sock *sk)
#ifdef CONFIG_NET_RX_BUSY_POLL
/* Prefer busy-polling, skip the wakeup. */
return READ_ONCE(sk->sk_prefer_busy_poll) && READ_ONCE(sk->sk_ll_usec) &&
- READ_ONCE(sk->sk_napi_id) >= MIN_NAPI_ID;
+ napi_id_valid(READ_ONCE(sk->sk_napi_id));
#else
return false;
#endif
@@ -921,11 +915,8 @@ static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len
if (unlikely(!xs->tx))
return -ENOBUFS;
- if (sk_can_busy_loop(sk)) {
- if (xs->zc)
- __sk_mark_napi_id_once(sk, xsk_pool_get_napi_id(xs->pool));
+ if (sk_can_busy_loop(sk))
sk_busy_loop(sk, 1); /* only support non-blocking sockets */
- }
if (xs->zc && xsk_no_wakeup(sk))
return 0;
@@ -1195,6 +1186,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
goto out_release;
}
+ netdev_lock_ops(dev);
+
if (!xs->rx && !xs->tx) {
err = -EINVAL;
goto out_unlock;
@@ -1311,6 +1304,14 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xs->queue_id = qid;
xp_add_xsk(xs->pool, xs);
+ if (qid < dev->real_num_rx_queues) {
+ struct netdev_rx_queue *rxq;
+
+ rxq = __netif_get_rx_queue(dev, qid);
+ if (rxq->napi)
+ __sk_mark_napi_id_once(sk, rxq->napi->napi_id);
+ }
+
out_unlock:
if (err) {
dev_put(dev);
@@ -1321,6 +1322,7 @@ out_unlock:
smp_wmb();
WRITE_ONCE(xs->state, XSK_BOUND);
}
+ netdev_unlock_ops(dev);
out_release:
mutex_unlock(&xs->mutex);
rtnl_unlock();
@@ -1334,14 +1336,6 @@ struct xdp_umem_reg_v1 {
__u32 headroom;
};
-struct xdp_umem_reg_v2 {
- __u64 addr; /* Start of packet data area */
- __u64 len; /* Length of packet data area */
- __u32 chunk_size;
- __u32 headroom;
- __u32 flags;
-};
-
static int xsk_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -1385,10 +1379,19 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(struct xdp_umem_reg_v1))
return -EINVAL;
- else if (optlen < sizeof(struct xdp_umem_reg_v2))
- mr_size = sizeof(struct xdp_umem_reg_v1);
else if (optlen < sizeof(mr))
- mr_size = sizeof(struct xdp_umem_reg_v2);
+ mr_size = sizeof(struct xdp_umem_reg_v1);
+
+ BUILD_BUG_ON(sizeof(struct xdp_umem_reg_v1) >= sizeof(struct xdp_umem_reg));
+
+ /* Make sure the last field of the struct doesn't have
+ * uninitialized padding. All padding has to be explicit
+ * and has to be set to zero by the userspace to make
+ * struct xdp_umem_reg extensible in the future.
+ */
+ BUILD_BUG_ON(offsetof(struct xdp_umem_reg, tx_metadata_len) +
+ sizeof_field(struct xdp_umem_reg, tx_metadata_len) !=
+ sizeof(struct xdp_umem_reg));
if (copy_from_sockptr(&mr, optval, mr_size))
return -EFAULT;
@@ -1732,7 +1735,6 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
xs = xdp_sk(sk);
xs->state = XSK_READY;
mutex_init(&xs->mutex);
- spin_lock_init(&xs->rx_lock);
INIT_LIST_HEAD(&xs->map_list);
spin_lock_init(&xs->map_list_lock);
@@ -1775,7 +1777,7 @@ static struct pernet_operations xsk_net_ops = {
static int __init xsk_init(void)
{
- int err, cpu;
+ int err;
err = proto_register(&xsk_proto, 0 /* no slab */);
if (err)
@@ -1793,8 +1795,6 @@ static int __init xsk_init(void)
if (err)
goto out_pernet;
- for_each_possible_cpu(cpu)
- INIT_LIST_HEAD(&per_cpu(xskmap_flush_list, cpu));
return 0;
out_pernet: