summaryrefslogtreecommitdiff
path: root/drivers/net/veth.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/veth.c')
-rw-r--r--drivers/net/veth.c91
1 files changed, 63 insertions, 28 deletions
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 0d6d0d749d44..14e6f2a2fb77 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -17,6 +17,7 @@
#include <net/rtnetlink.h>
#include <net/dst.h>
+#include <net/netdev_lock.h>
#include <net/xfrm.h>
#include <net/xdp.h>
#include <linux/veth.h>
@@ -306,12 +307,10 @@ static void __veth_xdp_flush(struct veth_rq *rq)
static int veth_xdp_rx(struct veth_rq *rq, struct sk_buff *skb)
{
- if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb))) {
- dev_kfree_skb_any(skb);
- return NET_RX_DROP;
- }
+ if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb)))
+ return NETDEV_TX_BUSY; /* signal qdisc layer */
- return NET_RX_SUCCESS;
+ return NET_RX_SUCCESS; /* same as NETDEV_TX_OK */
}
static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb,
@@ -345,11 +344,11 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
struct veth_rq *rq = NULL;
- int ret = NETDEV_TX_OK;
+ struct netdev_queue *txq;
struct net_device *rcv;
int length = skb->len;
bool use_napi = false;
- int rxq;
+ int ret, rxq;
rcu_read_lock();
rcv = rcu_dereference(priv->peer);
@@ -372,17 +371,43 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
}
skb_tx_timestamp(skb);
- if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) {
+
+ ret = veth_forward_skb(rcv, skb, rq, use_napi);
+ switch (ret) {
+ case NET_RX_SUCCESS: /* same as NETDEV_TX_OK */
if (!use_napi)
dev_sw_netstats_tx_add(dev, 1, length);
else
__veth_xdp_flush(rq);
- } else {
+ break;
+ case NETDEV_TX_BUSY:
+ /* If a qdisc is attached to our virtual device, returning
+ * NETDEV_TX_BUSY is allowed.
+ */
+ txq = netdev_get_tx_queue(dev, rxq);
+
+ if (qdisc_txq_has_no_queue(txq)) {
+ dev_kfree_skb_any(skb);
+ goto drop;
+ }
+ /* Restore Eth hdr pulled by dev_forward_skb/eth_type_trans */
+ __skb_push(skb, ETH_HLEN);
+ netif_tx_stop_queue(txq);
+ /* Makes sure NAPI peer consumer runs. Consumer is responsible
+ * for starting txq again, until then ndo_start_xmit (this
+ * function) will not be invoked by the netstack again.
+ */
+ __veth_xdp_flush(rq);
+ break;
+ case NET_RX_DROP: /* same as NET_XMIT_DROP */
drop:
atomic64_inc(&priv->dropped);
ret = NET_XMIT_DROP;
+ break;
+ default:
+ net_crit_ratelimited("%s(%s): Invalid return code(%d)",
+ __func__, dev->name, ret);
}
-
rcu_read_unlock();
return ret;
@@ -634,7 +659,7 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
break;
case XDP_TX:
orig_frame = *frame;
- xdp->rxq->mem = frame->mem;
+ xdp->rxq->mem.type = frame->mem_type;
if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) {
trace_xdp_exception(rq->dev, xdp_prog, act);
frame = &orig_frame;
@@ -646,7 +671,7 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
goto xdp_xmit;
case XDP_REDIRECT:
orig_frame = *frame;
- xdp->rxq->mem = frame->mem;
+ xdp->rxq->mem.type = frame->mem_type;
if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) {
frame = &orig_frame;
stats->rx_drops++;
@@ -684,8 +709,7 @@ static void veth_xdp_rcv_bulk_skb(struct veth_rq *rq, void **frames,
void *skbs[VETH_XDP_BATCH];
int i;
- if (xdp_alloc_skb_bulk(skbs, n_xdpf,
- GFP_ATOMIC | __GFP_ZERO) < 0) {
+ if (unlikely(!napi_skb_cache_get_bulk(skbs, n_xdpf))) {
for (i = 0; i < n_xdpf; i++)
xdp_return_frame(frames[i]);
stats->rx_drops += n_xdpf;
@@ -932,17 +956,28 @@ static int veth_poll(struct napi_struct *napi, int budget)
{
struct veth_rq *rq =
container_of(napi, struct veth_rq, xdp_napi);
+ struct veth_priv *priv = netdev_priv(rq->dev);
+ int queue_idx = rq->xdp_rxq.queue_index;
+ struct netdev_queue *peer_txq;
struct veth_stats stats = {};
+ struct net_device *peer_dev;
struct veth_xdp_tx_bq bq;
int done;
bq.count = 0;
+ /* NAPI functions as RCU section */
+ peer_dev = rcu_dereference_check(priv->peer, rcu_read_lock_bh_held());
+ peer_txq = peer_dev ? netdev_get_tx_queue(peer_dev, queue_idx) : NULL;
+
xdp_set_return_frame_no_direct();
done = veth_xdp_rcv(rq, budget, &bq, &stats);
if (stats.xdp_redirect > 0)
xdp_do_flush();
+ if (stats.xdp_tx > 0)
+ veth_xdp_flush(rq, &bq);
+ xdp_clear_return_frame_no_direct();
if (done < budget && napi_complete_done(napi, done)) {
/* Write rx_notify_masked before reading ptr_ring */
@@ -955,9 +990,12 @@ static int veth_poll(struct napi_struct *napi, int budget)
}
}
- if (stats.xdp_tx > 0)
- veth_xdp_flush(rq, &bq);
- xdp_clear_return_frame_no_direct();
+ /* Release backpressure per NAPI poll */
+ smp_rmb(); /* Paired with netif_tx_stop_queue set_bit */
+ if (peer_txq && netif_tx_queue_stopped(peer_txq)) {
+ txq_trans_cond_update(peer_txq);
+ netif_tx_wake_queue(peer_txq);
+ }
return done;
}
@@ -1286,7 +1324,7 @@ static int veth_set_channels(struct net_device *dev,
if (peer)
netif_carrier_off(peer);
- /* try to allocate new resurces, as needed*/
+ /* try to allocate new resources, as needed*/
err = veth_enable_range_safe(dev, old_rx_count, new_rx_count);
if (err)
goto out;
@@ -1765,10 +1803,13 @@ static int veth_init_queues(struct net_device *dev, struct nlattr *tb[])
return 0;
}
-static int veth_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[],
+static int veth_newlink(struct net_device *dev,
+ struct rtnl_newlink_params *params,
struct netlink_ext_ack *extack)
{
+ struct net *peer_net = rtnl_newlink_peer_net(params);
+ struct nlattr **data = params->data;
+ struct nlattr **tb = params->tb;
int err;
struct net_device *peer;
struct veth_priv *priv;
@@ -1776,7 +1817,6 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *peer_tb[IFLA_MAX + 1], **tbp;
unsigned char name_assign_type;
struct ifinfomsg *ifmp;
- struct net *net;
/*
* create and register peer first
@@ -1800,13 +1840,10 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
name_assign_type = NET_NAME_ENUM;
}
- net = rtnl_link_get_net(src_net, tbp);
- peer = rtnl_create_link(net, ifname, name_assign_type,
+ peer = rtnl_create_link(peer_net, ifname, name_assign_type,
&veth_link_ops, tbp, extack);
- if (IS_ERR(peer)) {
- put_net(net);
+ if (IS_ERR(peer))
return PTR_ERR(peer);
- }
if (!ifmp || !tbp[IFLA_ADDRESS])
eth_hw_addr_random(peer);
@@ -1817,8 +1854,6 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
netif_inherit_tso_max(peer, dev);
err = register_netdevice(peer);
- put_net(net);
- net = NULL;
if (err < 0)
goto err_register_peer;