summaryrefslogtreecommitdiff
path: root/net/xdp/xsk.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/xdp/xsk.c')
-rw-r--r--net/xdp/xsk.c113
1 files changed, 54 insertions, 59 deletions
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index b78c0e095e22..89d2bef96469 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -35,8 +35,6 @@
#define TX_BATCH_SIZE 32
#define MAX_PER_SOCKET_BUDGET (TX_BATCH_SIZE)
-static DEFINE_PER_CPU(struct list_head, xskmap_flush_list);
-
void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
{
if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
@@ -143,7 +141,7 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff_xsk *xskb, u32 len,
u64 addr;
int err;
- addr = xp_get_handle(xskb);
+ addr = xp_get_handle(xskb, xskb->pool);
err = xskq_prod_reserve_desc(xs->rx, addr, len, flags);
if (err) {
xs->rx_queue_full++;
@@ -173,14 +171,14 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
return 0;
xskb_list = &xskb->pool->xskb_list;
- list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
+ list_for_each_entry_safe(pos, tmp, xskb_list, list_node) {
if (list_is_singular(xskb_list))
contd = 0;
len = pos->xdp.data_end - pos->xdp.data;
err = __xsk_rcv_zc(xs, pos, len, contd);
if (err)
goto err;
- list_del(&pos->xskb_list_node);
+ list_del(&pos->list_node);
}
return 0;
@@ -324,7 +322,6 @@ static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
return -ENOSPC;
}
- sk_mark_napi_id_once_xdp(&xs->sk, xdp);
return 0;
}
@@ -372,22 +369,23 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
{
- struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
int err;
err = xsk_rcv(xs, xdp);
if (err)
return err;
- if (!xs->flush_node.prev)
+ if (!xs->flush_node.prev) {
+ struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list();
+
list_add(&xs->flush_node, flush_list);
+ }
return 0;
}
-void __xsk_map_flush(void)
+void __xsk_map_flush(struct list_head *flush_list)
{
- struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
struct xdp_sock *xs, *tmp;
list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
@@ -396,16 +394,6 @@ void __xsk_map_flush(void)
}
}
-#ifdef CONFIG_DEBUG_NET
-bool xsk_map_check_flush(void)
-{
- if (list_empty(this_cpu_ptr(&xskmap_flush_list)))
- return false;
- __xsk_map_flush();
- return true;
-}
-#endif
-
void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries)
{
xskq_prod_submit_n(pool->cq, nb_entries);
@@ -538,34 +526,34 @@ static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
}
-static int xsk_cq_reserve_addr_locked(struct xdp_sock *xs, u64 addr)
+static int xsk_cq_reserve_addr_locked(struct xsk_buff_pool *pool, u64 addr)
{
unsigned long flags;
int ret;
- spin_lock_irqsave(&xs->pool->cq_lock, flags);
- ret = xskq_prod_reserve_addr(xs->pool->cq, addr);
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+ spin_lock_irqsave(&pool->cq_lock, flags);
+ ret = xskq_prod_reserve_addr(pool->cq, addr);
+ spin_unlock_irqrestore(&pool->cq_lock, flags);
return ret;
}
-static void xsk_cq_submit_locked(struct xdp_sock *xs, u32 n)
+static void xsk_cq_submit_locked(struct xsk_buff_pool *pool, u32 n)
{
unsigned long flags;
- spin_lock_irqsave(&xs->pool->cq_lock, flags);
- xskq_prod_submit_n(xs->pool->cq, n);
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+ spin_lock_irqsave(&pool->cq_lock, flags);
+ xskq_prod_submit_n(pool->cq, n);
+ spin_unlock_irqrestore(&pool->cq_lock, flags);
}
-static void xsk_cq_cancel_locked(struct xdp_sock *xs, u32 n)
+static void xsk_cq_cancel_locked(struct xsk_buff_pool *pool, u32 n)
{
unsigned long flags;
- spin_lock_irqsave(&xs->pool->cq_lock, flags);
- xskq_prod_cancel_n(xs->pool->cq, n);
- spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+ spin_lock_irqsave(&pool->cq_lock, flags);
+ xskq_prod_cancel_n(pool->cq, n);
+ spin_unlock_irqrestore(&pool->cq_lock, flags);
}
static u32 xsk_get_num_desc(struct sk_buff *skb)
@@ -582,7 +570,7 @@ static void xsk_destruct_skb(struct sk_buff *skb)
*compl->tx_timestamp = ktime_get_tai_fast_ns();
}
- xsk_cq_submit_locked(xdp_sk(skb->sk), xsk_get_num_desc(skb));
+ xsk_cq_submit_locked(xdp_sk(skb->sk)->pool, xsk_get_num_desc(skb));
sock_wfree(skb);
}
@@ -598,7 +586,7 @@ static void xsk_consume_skb(struct sk_buff *skb)
struct xdp_sock *xs = xdp_sk(skb->sk);
skb->destructor = sock_wfree;
- xsk_cq_cancel_locked(xs, xsk_get_num_desc(skb));
+ xsk_cq_cancel_locked(xs->pool, xsk_get_num_desc(skb));
/* Free skb without triggering the perf drop trace */
consume_skb(skb);
xs->skb = NULL;
@@ -686,6 +674,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
len = desc->len;
if (!skb) {
+ first_frag = true;
+
hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
tr = dev->needed_tailroom;
skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err);
@@ -696,12 +686,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
skb_put(skb, len);
err = skb_store_bits(skb, 0, buffer, len);
- if (unlikely(err)) {
- kfree_skb(skb);
+ if (unlikely(err))
goto free_err;
- }
-
- first_frag = true;
} else {
int nr_frags = skb_shinfo(skb)->nr_frags;
struct page *page;
@@ -769,6 +755,9 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
return skb;
free_err:
+ if (first_frag && skb)
+ kfree_skb(skb);
+
if (err == -EOVERFLOW) {
/* Drop the packet */
xsk_set_destructor_arg(xs->skb);
@@ -776,7 +765,7 @@ free_err:
xskq_cons_release(xs->tx);
} else {
/* Let application retry */
- xsk_cq_cancel_locked(xs, 1);
+ xsk_cq_cancel_locked(xs->pool, 1);
}
return ERR_PTR(err);
@@ -813,7 +802,7 @@ static int __xsk_generic_xmit(struct sock *sk)
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
*/
- if (xsk_cq_reserve_addr_locked(xs, desc.addr))
+ if (xsk_cq_reserve_addr_locked(xs->pool, desc.addr))
goto out;
skb = xsk_build_skb(xs, &desc);
@@ -918,11 +907,8 @@ static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len
if (unlikely(!xs->tx))
return -ENOBUFS;
- if (sk_can_busy_loop(sk)) {
- if (xs->zc)
- __sk_mark_napi_id_once(sk, xsk_pool_get_napi_id(xs->pool));
+ if (sk_can_busy_loop(sk))
sk_busy_loop(sk, 1); /* only support non-blocking sockets */
- }
if (xs->zc && xsk_no_wakeup(sk))
return 0;
@@ -1308,6 +1294,14 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xs->queue_id = qid;
xp_add_xsk(xs->pool, xs);
+ if (xs->zc && qid < dev->real_num_rx_queues) {
+ struct netdev_rx_queue *rxq;
+
+ rxq = __netif_get_rx_queue(dev, qid);
+ if (rxq->napi)
+ __sk_mark_napi_id_once(sk, rxq->napi->napi_id);
+ }
+
out_unlock:
if (err) {
dev_put(dev);
@@ -1331,14 +1325,6 @@ struct xdp_umem_reg_v1 {
__u32 headroom;
};
-struct xdp_umem_reg_v2 {
- __u64 addr; /* Start of packet data area */
- __u64 len; /* Length of packet data area */
- __u32 chunk_size;
- __u32 headroom;
- __u32 flags;
-};
-
static int xsk_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -1382,10 +1368,19 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(struct xdp_umem_reg_v1))
return -EINVAL;
- else if (optlen < sizeof(struct xdp_umem_reg_v2))
- mr_size = sizeof(struct xdp_umem_reg_v1);
else if (optlen < sizeof(mr))
- mr_size = sizeof(struct xdp_umem_reg_v2);
+ mr_size = sizeof(struct xdp_umem_reg_v1);
+
+ BUILD_BUG_ON(sizeof(struct xdp_umem_reg_v1) >= sizeof(struct xdp_umem_reg));
+
+ /* Make sure the last field of the struct doesn't have
+ * uninitialized padding. All padding has to be explicit
+ * and has to be set to zero by the userspace to make
+ * struct xdp_umem_reg extensible in the future.
+ */
+ BUILD_BUG_ON(offsetof(struct xdp_umem_reg, tx_metadata_len) +
+ sizeof_field(struct xdp_umem_reg, tx_metadata_len) !=
+ sizeof(struct xdp_umem_reg));
if (copy_from_sockptr(&mr, optval, mr_size))
return -EFAULT;
@@ -1414,6 +1409,8 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
struct xsk_queue **q;
int entries;
+ if (optlen < sizeof(entries))
+ return -EINVAL;
if (copy_from_sockptr(&entries, optval, sizeof(entries)))
return -EFAULT;
@@ -1770,7 +1767,7 @@ static struct pernet_operations xsk_net_ops = {
static int __init xsk_init(void)
{
- int err, cpu;
+ int err;
err = proto_register(&xsk_proto, 0 /* no slab */);
if (err)
@@ -1788,8 +1785,6 @@ static int __init xsk_init(void)
if (err)
goto out_pernet;
- for_each_possible_cpu(cpu)
- INIT_LIST_HEAD(&per_cpu(xskmap_flush_list, cpu));
return 0;
out_pernet: