summaryrefslogtreecommitdiff
path: root/drivers/net/veth.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/veth.c')
-rw-r--r--drivers/net/veth.c486
1 files changed, 290 insertions, 196 deletions
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index dfc7d87fad59..14e6f2a2fb77 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -17,6 +17,7 @@
#include <net/rtnetlink.h>
#include <net/dst.h>
+#include <net/netdev_lock.h>
#include <net/xfrm.h>
#include <net/xdp.h>
#include <linux/veth.h>
@@ -26,6 +27,8 @@
#include <linux/ptr_ring.h>
#include <linux/bpf_trace.h>
#include <linux/net_tstamp.h>
+#include <linux/skbuff_ref.h>
+#include <net/page_pool/helpers.h>
#define DRV_NAME "veth"
#define DRV_VERSION "1.0"
@@ -65,6 +68,7 @@ struct veth_rq {
bool rx_notify_masked;
struct ptr_ring xdp_ring;
struct xdp_rxq_info xdp_rxq;
+ struct page_pool *page_pool;
};
struct veth_priv {
@@ -116,6 +120,11 @@ static struct {
{ "peer_ifindex" },
};
+struct veth_xdp_buff {
+ struct xdp_buff xdp;
+ struct sk_buff *skb;
+};
+
static int veth_get_link_ksettings(struct net_device *dev,
struct ethtool_link_ksettings *cmd)
{
@@ -150,6 +159,8 @@ static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
for (j = 0; j < VETH_TQ_STATS_LEN; j++)
ethtool_sprintf(&p, "tx_queue_%u_%.18s",
i, veth_tq_stats_desc[j].desc);
+
+ page_pool_ethtool_stats_get_strings(p);
break;
}
}
@@ -160,18 +171,35 @@ static int veth_get_sset_count(struct net_device *dev, int sset)
case ETH_SS_STATS:
return ARRAY_SIZE(ethtool_stats_keys) +
VETH_RQ_STATS_LEN * dev->real_num_rx_queues +
- VETH_TQ_STATS_LEN * dev->real_num_tx_queues;
+ VETH_TQ_STATS_LEN * dev->real_num_tx_queues +
+ page_pool_ethtool_stats_get_count();
default:
return -EOPNOTSUPP;
}
}
+static void veth_get_page_pool_stats(struct net_device *dev, u64 *data)
+{
+#ifdef CONFIG_PAGE_POOL_STATS
+ struct veth_priv *priv = netdev_priv(dev);
+ struct page_pool_stats pp_stats = {};
+ int i;
+
+ for (i = 0; i < dev->real_num_rx_queues; i++) {
+ if (!priv->rq[i].page_pool)
+ continue;
+ page_pool_get_stats(priv->rq[i].page_pool, &pp_stats);
+ }
+ page_pool_ethtool_stats_get(data, &pp_stats);
+#endif /* CONFIG_PAGE_POOL_STATS */
+}
+
static void veth_get_ethtool_stats(struct net_device *dev,
struct ethtool_stats *stats, u64 *data)
{
struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
struct net_device *peer = rtnl_dereference(priv->peer);
- int i, j, idx;
+ int i, j, idx, pp_idx;
data[0] = peer ? peer->ifindex : 0;
idx = 1;
@@ -190,9 +218,10 @@ static void veth_get_ethtool_stats(struct net_device *dev,
} while (u64_stats_fetch_retry(&rq_stats->syncp, start));
idx += VETH_RQ_STATS_LEN;
}
+ pp_idx = idx;
if (!peer)
- return;
+ goto page_pool_stats;
rcv_priv = netdev_priv(peer);
for (i = 0; i < peer->real_num_rx_queues; i++) {
@@ -210,6 +239,10 @@ static void veth_get_ethtool_stats(struct net_device *dev,
}
} while (u64_stats_fetch_retry(&rq_stats->syncp, start));
}
+ pp_idx = idx + dev->real_num_tx_queues * VETH_TQ_STATS_LEN;
+
+page_pool_stats:
+ veth_get_page_pool_stats(dev, &data[pp_idx]);
}
static void veth_get_channels(struct net_device *dev,
@@ -274,12 +307,10 @@ static void __veth_xdp_flush(struct veth_rq *rq)
static int veth_xdp_rx(struct veth_rq *rq, struct sk_buff *skb)
{
- if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb))) {
- dev_kfree_skb_any(skb);
- return NET_RX_DROP;
- }
+ if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb)))
+ return NETDEV_TX_BUSY; /* signal qdisc layer */
- return NET_RX_SUCCESS;
+ return NET_RX_SUCCESS; /* same as NETDEV_TX_OK */
}
static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb,
@@ -313,10 +344,11 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
struct veth_rq *rq = NULL;
+ struct netdev_queue *txq;
struct net_device *rcv;
int length = skb->len;
bool use_napi = false;
- int rxq;
+ int ret, rxq;
rcu_read_lock();
rcv = rcu_dereference(priv->peer);
@@ -339,28 +371,46 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
}
skb_tx_timestamp(skb);
- if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) {
+
+ ret = veth_forward_skb(rcv, skb, rq, use_napi);
+ switch (ret) {
+ case NET_RX_SUCCESS: /* same as NETDEV_TX_OK */
if (!use_napi)
- dev_lstats_add(dev, length);
- } else {
+ dev_sw_netstats_tx_add(dev, 1, length);
+ else
+ __veth_xdp_flush(rq);
+ break;
+ case NETDEV_TX_BUSY:
+ /* If a qdisc is attached to our virtual device, returning
+ * NETDEV_TX_BUSY is allowed.
+ */
+ txq = netdev_get_tx_queue(dev, rxq);
+
+ if (qdisc_txq_has_no_queue(txq)) {
+ dev_kfree_skb_any(skb);
+ goto drop;
+ }
+ /* Restore Eth hdr pulled by dev_forward_skb/eth_type_trans */
+ __skb_push(skb, ETH_HLEN);
+ netif_tx_stop_queue(txq);
+ /* Makes sure NAPI peer consumer runs. Consumer is responsible
+ * for starting txq again, until then ndo_start_xmit (this
+ * function) will not be invoked by the netstack again.
+ */
+ __veth_xdp_flush(rq);
+ break;
+ case NET_RX_DROP: /* same as NET_XMIT_DROP */
drop:
atomic64_inc(&priv->dropped);
+ ret = NET_XMIT_DROP;
+ break;
+ default:
+ net_crit_ratelimited("%s(%s): Invalid return code(%d)",
+ __func__, dev->name, ret);
}
-
- if (use_napi)
- __veth_xdp_flush(rq);
-
rcu_read_unlock();
- return NETDEV_TX_OK;
-}
-
-static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes)
-{
- struct veth_priv *priv = netdev_priv(dev);
-
- dev_lstats_read(dev, packets, bytes);
- return atomic64_read(&priv->dropped);
+ return ret;
}
static void veth_stats_rx(struct veth_stats *result, struct net_device *dev)
@@ -400,24 +450,24 @@ static void veth_get_stats64(struct net_device *dev,
struct veth_priv *priv = netdev_priv(dev);
struct net_device *peer;
struct veth_stats rx;
- u64 packets, bytes;
- tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes);
- tot->tx_bytes = bytes;
- tot->tx_packets = packets;
+ tot->tx_dropped = atomic64_read(&priv->dropped);
+ dev_fetch_sw_netstats(tot, dev->tstats);
veth_stats_rx(&rx, dev);
tot->tx_dropped += rx.xdp_tx_err;
tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err;
- tot->rx_bytes = rx.xdp_bytes;
- tot->rx_packets = rx.xdp_packets;
+ tot->rx_bytes += rx.xdp_bytes;
+ tot->rx_packets += rx.xdp_packets;
rcu_read_lock();
peer = rcu_dereference(priv->peer);
if (peer) {
- veth_stats_tx(peer, &packets, &bytes);
- tot->rx_bytes += bytes;
- tot->rx_packets += packets;
+ struct rtnl_link_stats64 tot_peer = {};
+
+ dev_fetch_sw_netstats(&tot_peer, peer->tstats);
+ tot->rx_bytes += tot_peer.tx_bytes;
+ tot->rx_packets += tot_peer.tx_packets;
veth_stats_rx(&rx, peer);
tot->tx_dropped += rx.peer_tq_xdp_xmit_err;
@@ -592,23 +642,25 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
rcu_read_lock();
xdp_prog = rcu_dereference(rq->xdp_prog);
if (likely(xdp_prog)) {
- struct xdp_buff xdp;
+ struct veth_xdp_buff vxbuf;
+ struct xdp_buff *xdp = &vxbuf.xdp;
u32 act;
- xdp_convert_frame_to_buff(frame, &xdp);
- xdp.rxq = &rq->xdp_rxq;
+ xdp_convert_frame_to_buff(frame, xdp);
+ xdp->rxq = &rq->xdp_rxq;
+ vxbuf.skb = NULL;
- act = bpf_prog_run_xdp(xdp_prog, &xdp);
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
switch (act) {
case XDP_PASS:
- if (xdp_update_frame_from_buff(&xdp, frame))
+ if (xdp_update_frame_from_buff(xdp, frame))
goto err_xdp;
break;
case XDP_TX:
orig_frame = *frame;
- xdp.rxq->mem = frame->mem;
- if (unlikely(veth_xdp_tx(rq, &xdp, bq) < 0)) {
+ xdp->rxq->mem.type = frame->mem_type;
+ if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) {
trace_xdp_exception(rq->dev, xdp_prog, act);
frame = &orig_frame;
stats->rx_drops++;
@@ -619,8 +671,8 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
goto xdp_xmit;
case XDP_REDIRECT:
orig_frame = *frame;
- xdp.rxq->mem = frame->mem;
- if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) {
+ xdp->rxq->mem.type = frame->mem_type;
+ if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) {
frame = &orig_frame;
stats->rx_drops++;
goto err_xdp;
@@ -657,8 +709,7 @@ static void veth_xdp_rcv_bulk_skb(struct veth_rq *rq, void **frames,
void *skbs[VETH_XDP_BATCH];
int i;
- if (xdp_alloc_skb_bulk(skbs, n_xdpf,
- GFP_ATOMIC | __GFP_ZERO) < 0) {
+ if (unlikely(!napi_skb_cache_get_bulk(skbs, n_xdpf))) {
for (i = 0; i < n_xdpf; i++)
xdp_return_frame(frames[i]);
stats->rx_drops += n_xdpf;
@@ -701,74 +752,12 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
u32 frame_sz;
if (skb_shared(skb) || skb_head_is_locked(skb) ||
- skb_shinfo(skb)->nr_frags) {
- u32 size, len, max_head_size, off;
- struct sk_buff *nskb;
- struct page *page;
- int i, head_off;
-
- /* We need a private copy of the skb and data buffers since
- * the ebpf program can modify it. We segment the original skb
- * into order-0 pages without linearize it.
- *
- * Make sure we have enough space for linear and paged area
- */
- max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE -
- VETH_XDP_HEADROOM);
- if (skb->len > PAGE_SIZE * MAX_SKB_FRAGS + max_head_size)
- goto drop;
-
- /* Allocate skb head */
- page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
- if (!page)
- goto drop;
-
- nskb = build_skb(page_address(page), PAGE_SIZE);
- if (!nskb) {
- put_page(page);
+ skb_shinfo(skb)->nr_frags ||
+ skb_headroom(skb) < XDP_PACKET_HEADROOM) {
+ if (skb_pp_cow_data(rq->page_pool, pskb, XDP_PACKET_HEADROOM))
goto drop;
- }
-
- skb_reserve(nskb, VETH_XDP_HEADROOM);
- size = min_t(u32, skb->len, max_head_size);
- if (skb_copy_bits(skb, 0, nskb->data, size)) {
- consume_skb(nskb);
- goto drop;
- }
- skb_put(nskb, size);
-
- skb_copy_header(nskb, skb);
- head_off = skb_headroom(nskb) - skb_headroom(skb);
- skb_headers_offset_update(nskb, head_off);
- /* Allocate paged area of new skb */
- off = size;
- len = skb->len - off;
-
- for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
- page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
- if (!page) {
- consume_skb(nskb);
- goto drop;
- }
-
- size = min_t(u32, len, PAGE_SIZE);
- skb_add_rx_frag(nskb, i, page, 0, size, PAGE_SIZE);
- if (skb_copy_bits(skb, off, page_address(page),
- size)) {
- consume_skb(nskb);
- goto drop;
- }
-
- len -= size;
- off += size;
- }
-
- consume_skb(skb);
- skb = nskb;
- } else if (skb_headroom(skb) < XDP_PACKET_HEADROOM &&
- pskb_expand_head(skb, VETH_XDP_HEADROOM, 0, GFP_ATOMIC)) {
- goto drop;
+ skb = *pskb;
}
/* SKB "head" area always have tailroom for skb_shared_info */
@@ -801,7 +790,8 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
{
void *orig_data, *orig_data_end;
struct bpf_prog *xdp_prog;
- struct xdp_buff xdp;
+ struct veth_xdp_buff vxbuf;
+ struct xdp_buff *xdp = &vxbuf.xdp;
u32 act, metalen;
int off;
@@ -815,22 +805,23 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
}
__skb_push(skb, skb->data - skb_mac_header(skb));
- if (veth_convert_skb_to_xdp_buff(rq, &xdp, &skb))
+ if (veth_convert_skb_to_xdp_buff(rq, xdp, &skb))
goto drop;
+ vxbuf.skb = skb;
- orig_data = xdp.data;
- orig_data_end = xdp.data_end;
+ orig_data = xdp->data;
+ orig_data_end = xdp->data_end;
- act = bpf_prog_run_xdp(xdp_prog, &xdp);
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
switch (act) {
case XDP_PASS:
break;
case XDP_TX:
- veth_xdp_get(&xdp);
+ veth_xdp_get(xdp);
consume_skb(skb);
- xdp.rxq->mem = rq->xdp_mem;
- if (unlikely(veth_xdp_tx(rq, &xdp, bq) < 0)) {
+ xdp->rxq->mem = rq->xdp_mem;
+ if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) {
trace_xdp_exception(rq->dev, xdp_prog, act);
stats->rx_drops++;
goto err_xdp;
@@ -839,10 +830,10 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
rcu_read_unlock();
goto xdp_xmit;
case XDP_REDIRECT:
- veth_xdp_get(&xdp);
+ veth_xdp_get(xdp);
consume_skb(skb);
- xdp.rxq->mem = rq->xdp_mem;
- if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) {
+ xdp->rxq->mem = rq->xdp_mem;
+ if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) {
stats->rx_drops++;
goto err_xdp;
}
@@ -862,7 +853,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
rcu_read_unlock();
/* check if bpf_xdp_adjust_head was used */
- off = orig_data - xdp.data;
+ off = orig_data - xdp->data;
if (off > 0)
__skb_push(skb, off);
else if (off < 0)
@@ -871,21 +862,21 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
skb_reset_mac_header(skb);
/* check if bpf_xdp_adjust_tail was used */
- off = xdp.data_end - orig_data_end;
+ off = xdp->data_end - orig_data_end;
if (off != 0)
__skb_put(skb, off); /* positive on grow, negative on shrink */
/* XDP frag metadata (e.g. nr_frags) are updated in eBPF helpers
* (e.g. bpf_xdp_adjust_tail), we need to update data_len here.
*/
- if (xdp_buff_has_frags(&xdp))
+ if (xdp_buff_has_frags(xdp))
skb->data_len = skb_shinfo(skb)->xdp_frags_size;
else
skb->data_len = 0;
skb->protocol = eth_type_trans(skb, rq->dev);
- metalen = xdp.data - xdp.data_meta;
+ metalen = xdp->data - xdp->data_meta;
if (metalen)
skb_metadata_set(skb, metalen);
out:
@@ -898,7 +889,7 @@ xdp_drop:
return NULL;
err_xdp:
rcu_read_unlock();
- xdp_return_buff(&xdp);
+ xdp_return_buff(xdp);
xdp_xmit:
return NULL;
}
@@ -965,17 +956,28 @@ static int veth_poll(struct napi_struct *napi, int budget)
{
struct veth_rq *rq =
container_of(napi, struct veth_rq, xdp_napi);
+ struct veth_priv *priv = netdev_priv(rq->dev);
+ int queue_idx = rq->xdp_rxq.queue_index;
+ struct netdev_queue *peer_txq;
struct veth_stats stats = {};
+ struct net_device *peer_dev;
struct veth_xdp_tx_bq bq;
int done;
bq.count = 0;
+ /* NAPI functions as RCU section */
+ peer_dev = rcu_dereference_check(priv->peer, rcu_read_lock_bh_held());
+ peer_txq = peer_dev ? netdev_get_tx_queue(peer_dev, queue_idx) : NULL;
+
xdp_set_return_frame_no_direct();
done = veth_xdp_rcv(rq, budget, &bq, &stats);
if (stats.xdp_redirect > 0)
xdp_do_flush();
+ if (stats.xdp_tx > 0)
+ veth_xdp_flush(rq, &bq);
+ xdp_clear_return_frame_no_direct();
if (done < budget && napi_complete_done(napi, done)) {
/* Write rx_notify_masked before reading ptr_ring */
@@ -988,19 +990,48 @@ static int veth_poll(struct napi_struct *napi, int budget)
}
}
- if (stats.xdp_tx > 0)
- veth_xdp_flush(rq, &bq);
- xdp_clear_return_frame_no_direct();
+ /* Release backpressure per NAPI poll */
+ smp_rmb(); /* Paired with netif_tx_stop_queue set_bit */
+ if (peer_txq && netif_tx_queue_stopped(peer_txq)) {
+ txq_trans_cond_update(peer_txq);
+ netif_tx_wake_queue(peer_txq);
+ }
return done;
}
+static int veth_create_page_pool(struct veth_rq *rq)
+{
+ struct page_pool_params pp_params = {
+ .order = 0,
+ .pool_size = VETH_RING_SIZE,
+ .nid = NUMA_NO_NODE,
+ .dev = &rq->dev->dev,
+ };
+
+ rq->page_pool = page_pool_create(&pp_params);
+ if (IS_ERR(rq->page_pool)) {
+ int err = PTR_ERR(rq->page_pool);
+
+ rq->page_pool = NULL;
+ return err;
+ }
+
+ return 0;
+}
+
static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
{
struct veth_priv *priv = netdev_priv(dev);
int err, i;
for (i = start; i < end; i++) {
+ err = veth_create_page_pool(&priv->rq[i]);
+ if (err)
+ goto err_page_pool;
+ }
+
+ for (i = start; i < end; i++) {
struct veth_rq *rq = &priv->rq[i];
err = ptr_ring_init(&rq->xdp_ring, VETH_RING_SIZE, GFP_KERNEL);
@@ -1020,6 +1051,12 @@ static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
err_xdp_ring:
for (i--; i >= start; i--)
ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free);
+ i = end;
+err_page_pool:
+ for (i--; i >= start; i--) {
+ page_pool_destroy(priv->rq[i].page_pool);
+ priv->rq[i].page_pool = NULL;
+ }
return err;
}
@@ -1049,6 +1086,11 @@ static void veth_napi_del_range(struct net_device *dev, int start, int end)
rq->rx_notify_masked = false;
ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free);
}
+
+ for (i = start; i < end; i++) {
+ page_pool_destroy(priv->rq[i].page_pool);
+ priv->rq[i].page_pool = NULL;
+ }
}
static void veth_napi_del(struct net_device *dev)
@@ -1135,14 +1177,6 @@ static int veth_enable_xdp(struct net_device *dev)
veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true);
return err;
}
-
- if (!veth_gro_requested(dev)) {
- /* user-space did not require GRO, but adding XDP
- * is supposed to get GRO working
- */
- dev->features |= NETIF_F_GRO;
- netdev_features_change(dev);
- }
}
}
@@ -1162,18 +1196,9 @@ static void veth_disable_xdp(struct net_device *dev)
for (i = 0; i < dev->real_num_rx_queues; i++)
rcu_assign_pointer(priv->rq[i].xdp_prog, NULL);
- if (!netif_running(dev) || !veth_gro_requested(dev)) {
+ if (!netif_running(dev) || !veth_gro_requested(dev))
veth_napi_del(dev);
- /* if user-space did not require GRO, since adding XDP
- * enabled it, clear it now
- */
- if (!veth_gro_requested(dev) && netif_running(dev)) {
- dev->features &= ~NETIF_F_GRO;
- netdev_features_change(dev);
- }
- }
-
veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false);
}
@@ -1248,6 +1273,27 @@ static int veth_enable_range_safe(struct net_device *dev, int start, int end)
return 0;
}
+static void veth_set_xdp_features(struct net_device *dev)
+{
+ struct veth_priv *priv = netdev_priv(dev);
+ struct net_device *peer;
+
+ peer = rtnl_dereference(priv->peer);
+ if (peer && peer->real_num_tx_queues <= dev->real_num_rx_queues) {
+ struct veth_priv *priv_peer = netdev_priv(peer);
+ xdp_features_t val = NETDEV_XDP_ACT_BASIC |
+ NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_RX_SG;
+
+ if (priv_peer->_xdp_prog || veth_gro_requested(peer))
+ val |= NETDEV_XDP_ACT_NDO_XMIT |
+ NETDEV_XDP_ACT_NDO_XMIT_SG;
+ xdp_set_features_flag(dev, val);
+ } else {
+ xdp_clear_features_flag(dev);
+ }
+}
+
static int veth_set_channels(struct net_device *dev,
struct ethtool_channels *ch)
{
@@ -1278,7 +1324,7 @@ static int veth_set_channels(struct net_device *dev,
if (peer)
netif_carrier_off(peer);
- /* try to allocate new resurces, as needed*/
+ /* try to allocate new resources, as needed*/
err = veth_enable_range_safe(dev, old_rx_count, new_rx_count);
if (err)
goto out;
@@ -1314,6 +1360,12 @@ out:
if (peer)
netif_carrier_on(peer);
}
+
+ /* update XDP supported features */
+ veth_set_xdp_features(dev);
+ if (peer)
+ veth_set_xdp_features(peer);
+
return err;
revert:
@@ -1346,6 +1398,8 @@ static int veth_open(struct net_device *dev)
netif_carrier_on(peer);
}
+ veth_set_xdp_features(dev);
+
return 0;
}
@@ -1376,7 +1430,8 @@ static int veth_alloc_queues(struct net_device *dev)
struct veth_priv *priv = netdev_priv(dev);
int i;
- priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL_ACCOUNT);
+ priv->rq = kvcalloc(dev->num_rx_queues, sizeof(*priv->rq),
+ GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
if (!priv->rq)
return -ENOMEM;
@@ -1392,30 +1447,18 @@ static void veth_free_queues(struct net_device *dev)
{
struct veth_priv *priv = netdev_priv(dev);
- kfree(priv->rq);
+ kvfree(priv->rq);
}
static int veth_dev_init(struct net_device *dev)
{
- int err;
-
- dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
- if (!dev->lstats)
- return -ENOMEM;
-
- err = veth_alloc_queues(dev);
- if (err) {
- free_percpu(dev->lstats);
- return err;
- }
-
- return 0;
+ netdev_lockdep_set_classes(dev);
+ return veth_alloc_queues(dev);
}
static void veth_dev_free(struct net_device *dev)
{
veth_free_queues(dev);
- free_percpu(dev->lstats);
}
#ifdef CONFIG_NET_POLL_CONTROLLER
@@ -1441,7 +1484,7 @@ static int veth_get_iflink(const struct net_device *dev)
rcu_read_lock();
peer = rcu_dereference(priv->peer);
- iflink = peer ? peer->ifindex : 0;
+ iflink = peer ? READ_ONCE(peer->ifindex) : 0;
rcu_read_unlock();
return iflink;
@@ -1460,8 +1503,6 @@ static netdev_features_t veth_fix_features(struct net_device *dev,
if (peer_priv->_xdp_prog)
features &= ~NETIF_F_GSO_SOFTWARE;
}
- if (priv->_xdp_prog)
- features |= NETIF_F_GRO;
return features;
}
@@ -1471,16 +1512,23 @@ static int veth_set_features(struct net_device *dev,
{
netdev_features_t changed = features ^ dev->features;
struct veth_priv *priv = netdev_priv(dev);
+ struct net_device *peer;
int err;
if (!(changed & NETIF_F_GRO) || !(dev->flags & IFF_UP) || priv->_xdp_prog)
return 0;
+ peer = rtnl_dereference(priv->peer);
if (features & NETIF_F_GRO) {
err = veth_napi_enable(dev);
if (err)
return err;
+
+ if (peer)
+ xdp_features_set_redirect_target(peer, true);
} else {
+ if (peer)
+ xdp_features_clear_redirect_target(peer);
veth_napi_del(dev);
}
return 0;
@@ -1561,10 +1609,15 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
peer->hw_features &= ~NETIF_F_GSO_SOFTWARE;
peer->max_mtu = max_mtu;
}
+
+ xdp_features_set_redirect_target(peer, true);
}
if (old_prog) {
if (!prog) {
+ if (peer && !veth_gro_requested(dev))
+ xdp_features_clear_redirect_target(peer);
+
if (dev->flags & IFF_UP)
veth_disable_xdp(dev);
@@ -1596,6 +1649,50 @@ static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
}
}
+static int veth_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
+{
+ struct veth_xdp_buff *_ctx = (void *)ctx;
+
+ if (!_ctx->skb)
+ return -ENODATA;
+
+ *timestamp = skb_hwtstamps(_ctx->skb)->hwtstamp;
+ return 0;
+}
+
+static int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash,
+ enum xdp_rss_hash_type *rss_type)
+{
+ struct veth_xdp_buff *_ctx = (void *)ctx;
+ struct sk_buff *skb = _ctx->skb;
+
+ if (!skb)
+ return -ENODATA;
+
+ *hash = skb_get_hash(skb);
+ *rss_type = skb->l4_hash ? XDP_RSS_TYPE_L4_ANY : XDP_RSS_TYPE_NONE;
+
+ return 0;
+}
+
+static int veth_xdp_rx_vlan_tag(const struct xdp_md *ctx, __be16 *vlan_proto,
+ u16 *vlan_tci)
+{
+ const struct veth_xdp_buff *_ctx = (void *)ctx;
+ const struct sk_buff *skb = _ctx->skb;
+ int err;
+
+ if (!skb)
+ return -ENODATA;
+
+ err = __vlan_hwaccel_get_tag(skb, vlan_tci);
+ if (err)
+ return err;
+
+ *vlan_proto = skb->vlan_proto;
+ return err;
+}
+
static const struct net_device_ops veth_netdev_ops = {
.ndo_init = veth_dev_init,
.ndo_open = veth_open,
@@ -1617,6 +1714,12 @@ static const struct net_device_ops veth_netdev_ops = {
.ndo_get_peer_dev = veth_peer_dev,
};
+static const struct xdp_metadata_ops veth_xdp_metadata_ops = {
+ .xmo_rx_timestamp = veth_xdp_rx_timestamp,
+ .xmo_rx_hash = veth_xdp_rx_hash,
+ .xmo_rx_vlan_tag = veth_xdp_rx_vlan_tag,
+};
+
#define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \
NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \
@@ -1631,10 +1734,12 @@ static void veth_setup(struct net_device *dev)
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
dev->priv_flags |= IFF_NO_QUEUE;
dev->priv_flags |= IFF_PHONY_HEADROOM;
+ dev->priv_flags |= IFF_DISABLE_NETPOLL;
+ dev->lltx = true;
dev->netdev_ops = &veth_netdev_ops;
+ dev->xdp_metadata_ops = &veth_xdp_metadata_ops;
dev->ethtool_ops = &veth_ethtool_ops;
- dev->features |= NETIF_F_LLTX;
dev->features |= VETH_FEATURES;
dev->vlan_features = dev->features &
~(NETIF_F_HW_VLAN_CTAG_TX |
@@ -1643,6 +1748,7 @@ static void veth_setup(struct net_device *dev)
NETIF_F_HW_VLAN_STAG_RX);
dev->needs_free_netdev = true;
dev->priv_destructor = veth_dev_free;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
dev->max_mtu = ETH_MAX_MTU;
dev->hw_features = VETH_FEATURES;
@@ -1697,10 +1803,13 @@ static int veth_init_queues(struct net_device *dev, struct nlattr *tb[])
return 0;
}
-static int veth_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[],
+static int veth_newlink(struct net_device *dev,
+ struct rtnl_newlink_params *params,
struct netlink_ext_ack *extack)
{
+ struct net *peer_net = rtnl_newlink_peer_net(params);
+ struct nlattr **data = params->data;
+ struct nlattr **tb = params->tb;
int err;
struct net_device *peer;
struct veth_priv *priv;
@@ -1708,27 +1817,15 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *peer_tb[IFLA_MAX + 1], **tbp;
unsigned char name_assign_type;
struct ifinfomsg *ifmp;
- struct net *net;
/*
* create and register peer first
*/
- if (data != NULL && data[VETH_INFO_PEER] != NULL) {
- struct nlattr *nla_peer;
+ if (data && data[VETH_INFO_PEER]) {
+ struct nlattr *nla_peer = data[VETH_INFO_PEER];
- nla_peer = data[VETH_INFO_PEER];
ifmp = nla_data(nla_peer);
- err = rtnl_nla_parse_ifla(peer_tb,
- nla_data(nla_peer) + sizeof(struct ifinfomsg),
- nla_len(nla_peer) - sizeof(struct ifinfomsg),
- NULL);
- if (err < 0)
- return err;
-
- err = veth_validate(peer_tb, NULL, extack);
- if (err < 0)
- return err;
-
+ rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack);
tbp = peer_tb;
} else {
ifmp = NULL;
@@ -1743,16 +1840,10 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
name_assign_type = NET_NAME_ENUM;
}
- net = rtnl_link_get_net(src_net, tbp);
- if (IS_ERR(net))
- return PTR_ERR(net);
-
- peer = rtnl_create_link(net, ifname, name_assign_type,
+ peer = rtnl_create_link(peer_net, ifname, name_assign_type,
&veth_link_ops, tbp, extack);
- if (IS_ERR(peer)) {
- put_net(net);
+ if (IS_ERR(peer))
return PTR_ERR(peer);
- }
if (!ifmp || !tbp[IFLA_ADDRESS])
eth_hw_addr_random(peer);
@@ -1763,8 +1854,6 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
netif_inherit_tso_max(peer, dev);
err = register_netdevice(peer);
- put_net(net);
- net = NULL;
if (err < 0)
goto err_register_peer;
@@ -1816,6 +1905,10 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
goto err_queues;
veth_disable_gro(dev);
+ /* update XDP supported features */
+ veth_set_xdp_features(dev);
+ veth_set_xdp_features(peer);
+
return 0;
err_queues:
@@ -1883,6 +1976,7 @@ static struct rtnl_link_ops veth_link_ops = {
.newlink = veth_newlink,
.dellink = veth_dellink,
.policy = veth_policy,
+ .peer_type = VETH_INFO_PEER,
.maxtype = VETH_INFO_MAX,
.get_link_net = veth_get_link_net,
.get_num_tx_queues = veth_get_num_queues,