summaryrefslogtreecommitdiff
path: root/drivers/net/veth.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/veth.c')
-rw-r--r--drivers/net/veth.c207
1 files changed, 73 insertions, 134 deletions
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 578e36ea1589..e58a0f1b5c5b 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -17,6 +17,7 @@
#include <net/rtnetlink.h>
#include <net/dst.h>
+#include <net/netdev_lock.h>
#include <net/xfrm.h>
#include <net/xdp.h>
#include <linux/veth.h>
@@ -26,6 +27,7 @@
#include <linux/ptr_ring.h>
#include <linux/bpf_trace.h>
#include <linux/net_tstamp.h>
+#include <linux/skbuff_ref.h>
#include <net/page_pool/helpers.h>
#define DRV_NAME "veth"
@@ -305,12 +307,10 @@ static void __veth_xdp_flush(struct veth_rq *rq)
static int veth_xdp_rx(struct veth_rq *rq, struct sk_buff *skb)
{
- if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb))) {
- dev_kfree_skb_any(skb);
- return NET_RX_DROP;
- }
+ if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb)))
+ return NETDEV_TX_BUSY; /* signal qdisc layer */
- return NET_RX_SUCCESS;
+ return NET_RX_SUCCESS; /* same as NETDEV_TX_OK */
}
static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb,
@@ -344,11 +344,11 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
struct veth_rq *rq = NULL;
- int ret = NETDEV_TX_OK;
+ struct netdev_queue *txq;
struct net_device *rcv;
int length = skb->len;
bool use_napi = false;
- int rxq;
+ int ret, rxq;
rcu_read_lock();
rcv = rcu_dereference(priv->peer);
@@ -371,17 +371,45 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
}
skb_tx_timestamp(skb);
- if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) {
+
+ ret = veth_forward_skb(rcv, skb, rq, use_napi);
+ switch (ret) {
+ case NET_RX_SUCCESS: /* same as NETDEV_TX_OK */
if (!use_napi)
dev_sw_netstats_tx_add(dev, 1, length);
else
__veth_xdp_flush(rq);
- } else {
+ break;
+ case NETDEV_TX_BUSY:
+ /* If a qdisc is attached to our virtual device, returning
+ * NETDEV_TX_BUSY is allowed.
+ */
+ txq = netdev_get_tx_queue(dev, rxq);
+
+ if (qdisc_txq_has_no_queue(txq)) {
+ dev_kfree_skb_any(skb);
+ goto drop;
+ }
+ /* Restore Eth hdr pulled by dev_forward_skb/eth_type_trans */
+ __skb_push(skb, ETH_HLEN);
+ /* Depend on prior success packets started NAPI consumer via
+ * __veth_xdp_flush(). Cancel TXQ stop if consumer stopped,
+ * paired with empty check in veth_poll().
+ */
+ netif_tx_stop_queue(txq);
+ smp_mb__after_atomic();
+ if (unlikely(__ptr_ring_empty(&rq->xdp_ring)))
+ netif_tx_wake_queue(txq);
+ break;
+ case NET_RX_DROP: /* same as NET_XMIT_DROP */
drop:
atomic64_inc(&priv->dropped);
ret = NET_XMIT_DROP;
+ break;
+ default:
+ net_crit_ratelimited("%s(%s): Invalid return code(%d)",
+ __func__, dev->name, ret);
}
-
rcu_read_unlock();
return ret;
@@ -633,7 +661,7 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
break;
case XDP_TX:
orig_frame = *frame;
- xdp->rxq->mem = frame->mem;
+ xdp->rxq->mem.type = frame->mem_type;
if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) {
trace_xdp_exception(rq->dev, xdp_prog, act);
frame = &orig_frame;
@@ -645,7 +673,7 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
goto xdp_xmit;
case XDP_REDIRECT:
orig_frame = *frame;
- xdp->rxq->mem = frame->mem;
+ xdp->rxq->mem.type = frame->mem_type;
if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) {
frame = &orig_frame;
stats->rx_drops++;
@@ -683,8 +711,7 @@ static void veth_xdp_rcv_bulk_skb(struct veth_rq *rq, void **frames,
void *skbs[VETH_XDP_BATCH];
int i;
- if (xdp_alloc_skb_bulk(skbs, n_xdpf,
- GFP_ATOMIC | __GFP_ZERO) < 0) {
+ if (unlikely(!napi_skb_cache_get_bulk(skbs, n_xdpf))) {
for (i = 0; i < n_xdpf; i++)
xdp_return_frame(frames[i]);
stats->rx_drops += n_xdpf;
@@ -729,80 +756,10 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
if (skb_shared(skb) || skb_head_is_locked(skb) ||
skb_shinfo(skb)->nr_frags ||
skb_headroom(skb) < XDP_PACKET_HEADROOM) {
- u32 size, len, max_head_size, off, truesize, page_offset;
- struct sk_buff *nskb;
- struct page *page;
- int i, head_off;
- void *va;
-
- /* We need a private copy of the skb and data buffers since
- * the ebpf program can modify it. We segment the original skb
- * into order-0 pages without linearize it.
- *
- * Make sure we have enough space for linear and paged area
- */
- max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE -
- VETH_XDP_HEADROOM);
- if (skb->len > PAGE_SIZE * MAX_SKB_FRAGS + max_head_size)
- goto drop;
-
- size = min_t(u32, skb->len, max_head_size);
- truesize = SKB_HEAD_ALIGN(size) + VETH_XDP_HEADROOM;
-
- /* Allocate skb head */
- va = page_pool_dev_alloc_va(rq->page_pool, &truesize);
- if (!va)
+ if (skb_pp_cow_data(rq->page_pool, pskb, XDP_PACKET_HEADROOM))
goto drop;
- nskb = napi_build_skb(va, truesize);
- if (!nskb) {
- page_pool_free_va(rq->page_pool, va, true);
- goto drop;
- }
-
- skb_reserve(nskb, VETH_XDP_HEADROOM);
- skb_copy_header(nskb, skb);
- skb_mark_for_recycle(nskb);
-
- if (skb_copy_bits(skb, 0, nskb->data, size)) {
- consume_skb(nskb);
- goto drop;
- }
- skb_put(nskb, size);
-
- head_off = skb_headroom(nskb) - skb_headroom(skb);
- skb_headers_offset_update(nskb, head_off);
-
- /* Allocate paged area of new skb */
- off = size;
- len = skb->len - off;
-
- for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
- size = min_t(u32, len, PAGE_SIZE);
- truesize = size;
-
- page = page_pool_dev_alloc(rq->page_pool, &page_offset,
- &truesize);
- if (!page) {
- consume_skb(nskb);
- goto drop;
- }
-
- skb_add_rx_frag(nskb, i, page, page_offset, size,
- truesize);
- if (skb_copy_bits(skb, off,
- page_address(page) + page_offset,
- size)) {
- consume_skb(nskb);
- goto drop;
- }
-
- len -= size;
- off += size;
- }
-
- consume_skb(skb);
- skb = nskb;
+ skb = *pskb;
}
/* SKB "head" area always have tailroom for skb_shared_info */
@@ -943,9 +900,17 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget,
struct veth_xdp_tx_bq *bq,
struct veth_stats *stats)
{
+ struct veth_priv *priv = netdev_priv(rq->dev);
+ int queue_idx = rq->xdp_rxq.queue_index;
+ struct netdev_queue *peer_txq;
+ struct net_device *peer_dev;
int i, done = 0, n_xdpf = 0;
void *xdpf[VETH_XDP_BATCH];
+ /* NAPI functions as RCU section */
+ peer_dev = rcu_dereference_check(priv->peer, rcu_read_lock_bh_held());
+ peer_txq = netdev_get_tx_queue(peer_dev, queue_idx);
+
for (i = 0; i < budget; i++) {
void *ptr = __ptr_ring_consume(&rq->xdp_ring);
@@ -994,6 +959,9 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget,
rq->stats.vs.xdp_packets += done;
u64_stats_update_end(&rq->stats.syncp);
+ if (unlikely(netif_tx_queue_stopped(peer_txq)))
+ netif_tx_wake_queue(peer_txq);
+
return done;
}
@@ -1208,14 +1176,6 @@ static int veth_enable_xdp(struct net_device *dev)
veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true);
return err;
}
-
- if (!veth_gro_requested(dev)) {
- /* user-space did not require GRO, but adding XDP
- * is supposed to get GRO working
- */
- dev->features |= NETIF_F_GRO;
- netdev_features_change(dev);
- }
}
}
@@ -1235,18 +1195,9 @@ static void veth_disable_xdp(struct net_device *dev)
for (i = 0; i < dev->real_num_rx_queues; i++)
rcu_assign_pointer(priv->rq[i].xdp_prog, NULL);
- if (!netif_running(dev) || !veth_gro_requested(dev)) {
+ if (!netif_running(dev) || !veth_gro_requested(dev))
veth_napi_del(dev);
- /* if user-space did not require GRO, since adding XDP
- * enabled it, clear it now
- */
- if (!veth_gro_requested(dev) && netif_running(dev)) {
- dev->features &= ~NETIF_F_GRO;
- netdev_features_change(dev);
- }
- }
-
veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false);
}
@@ -1478,7 +1429,8 @@ static int veth_alloc_queues(struct net_device *dev)
struct veth_priv *priv = netdev_priv(dev);
int i;
- priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL_ACCOUNT);
+ priv->rq = kvcalloc(dev->num_rx_queues, sizeof(*priv->rq),
+ GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
if (!priv->rq)
return -ENOMEM;
@@ -1494,11 +1446,12 @@ static void veth_free_queues(struct net_device *dev)
{
struct veth_priv *priv = netdev_priv(dev);
- kfree(priv->rq);
+ kvfree(priv->rq);
}
static int veth_dev_init(struct net_device *dev)
{
+ netdev_lockdep_set_classes(dev);
return veth_alloc_queues(dev);
}
@@ -1530,7 +1483,7 @@ static int veth_get_iflink(const struct net_device *dev)
rcu_read_lock();
peer = rcu_dereference(priv->peer);
- iflink = peer ? peer->ifindex : 0;
+ iflink = peer ? READ_ONCE(peer->ifindex) : 0;
rcu_read_unlock();
return iflink;
@@ -1549,8 +1502,6 @@ static netdev_features_t veth_fix_features(struct net_device *dev,
if (peer_priv->_xdp_prog)
features &= ~NETIF_F_GSO_SOFTWARE;
}
- if (priv->_xdp_prog)
- features |= NETIF_F_GRO;
return features;
}
@@ -1782,11 +1733,12 @@ static void veth_setup(struct net_device *dev)
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
dev->priv_flags |= IFF_NO_QUEUE;
dev->priv_flags |= IFF_PHONY_HEADROOM;
+ dev->priv_flags |= IFF_DISABLE_NETPOLL;
+ dev->lltx = true;
dev->netdev_ops = &veth_netdev_ops;
dev->xdp_metadata_ops = &veth_xdp_metadata_ops;
dev->ethtool_ops = &veth_ethtool_ops;
- dev->features |= NETIF_F_LLTX;
dev->features |= VETH_FEATURES;
dev->vlan_features = dev->features &
~(NETIF_F_HW_VLAN_CTAG_TX |
@@ -1850,10 +1802,13 @@ static int veth_init_queues(struct net_device *dev, struct nlattr *tb[])
return 0;
}
-static int veth_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[],
+static int veth_newlink(struct net_device *dev,
+ struct rtnl_newlink_params *params,
struct netlink_ext_ack *extack)
{
+ struct net *peer_net = rtnl_newlink_peer_net(params);
+ struct nlattr **data = params->data;
+ struct nlattr **tb = params->tb;
int err;
struct net_device *peer;
struct veth_priv *priv;
@@ -1861,24 +1816,15 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *peer_tb[IFLA_MAX + 1], **tbp;
unsigned char name_assign_type;
struct ifinfomsg *ifmp;
- struct net *net;
/*
* create and register peer first
*/
- if (data != NULL && data[VETH_INFO_PEER] != NULL) {
- struct nlattr *nla_peer;
+ if (data && data[VETH_INFO_PEER]) {
+ struct nlattr *nla_peer = data[VETH_INFO_PEER];
- nla_peer = data[VETH_INFO_PEER];
ifmp = nla_data(nla_peer);
- err = rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack);
- if (err < 0)
- return err;
-
- err = veth_validate(peer_tb, NULL, extack);
- if (err < 0)
- return err;
-
+ rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack);
tbp = peer_tb;
} else {
ifmp = NULL;
@@ -1893,16 +1839,10 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
name_assign_type = NET_NAME_ENUM;
}
- net = rtnl_link_get_net(src_net, tbp);
- if (IS_ERR(net))
- return PTR_ERR(net);
-
- peer = rtnl_create_link(net, ifname, name_assign_type,
+ peer = rtnl_create_link(peer_net, ifname, name_assign_type,
&veth_link_ops, tbp, extack);
- if (IS_ERR(peer)) {
- put_net(net);
+ if (IS_ERR(peer))
return PTR_ERR(peer);
- }
if (!ifmp || !tbp[IFLA_ADDRESS])
eth_hw_addr_random(peer);
@@ -1913,8 +1853,6 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
netif_inherit_tso_max(peer, dev);
err = register_netdevice(peer);
- put_net(net);
- net = NULL;
if (err < 0)
goto err_register_peer;
@@ -2037,6 +1975,7 @@ static struct rtnl_link_ops veth_link_ops = {
.newlink = veth_newlink,
.dellink = veth_dellink,
.policy = veth_policy,
+ .peer_type = VETH_INFO_PEER,
.maxtype = VETH_INFO_MAX,
.get_link_net = veth_get_link_net,
.get_num_tx_queues = veth_get_num_queues,