summaryrefslogtreecommitdiff
path: root/drivers/net/xen-netfront.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/xen-netfront.c')
-rw-r--r--drivers/net/xen-netfront.c1060
1 files changed, 779 insertions, 281 deletions
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index c914c24f880b..7c2220366623 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -44,6 +44,9 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <net/ip.h>
+#include <linux/bpf.h>
+#include <net/page_pool/types.h>
+#include <linux/bpf_trace.h>
#include <xen/xen.h>
#include <xen/xenbus.h>
@@ -63,6 +66,12 @@ module_param_named(max_queues, xennet_max_queues, uint, 0644);
MODULE_PARM_DESC(max_queues,
"Maximum number of queues per virtual interface");
+static bool __read_mostly xennet_trusted = true;
+module_param_named(trusted, xennet_trusted, bool, 0644);
+MODULE_PARM_DESC(trusted, "Is the backend trusted");
+
+#define XENNET_TIMEOUT (5 * HZ)
+
static const struct ethtool_ops xennet_ethtool_ops;
struct netfront_cb {
@@ -73,8 +82,6 @@ struct netfront_cb {
#define RX_COPY_THRESHOLD 256
-#define GRANT_INVALID_REF 0
-
#define NET_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, XEN_PAGE_SIZE)
#define NET_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, XEN_PAGE_SIZE)
@@ -102,6 +109,8 @@ struct netfront_queue {
char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */
struct netfront_info *info;
+ struct bpf_prog __rcu *xdp_prog;
+
struct napi_struct napi;
/* Split event channels support, tx_* == rx_* when using
@@ -119,21 +128,17 @@ struct netfront_queue {
/*
* {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
- * are linked from tx_skb_freelist through skb_entry.link.
- *
- * NB. Freelist index entries are always going to be less than
- * PAGE_OFFSET, whereas pointers to skbs will always be equal or
- * greater than PAGE_OFFSET: we use this property to distinguish
- * them.
+ * are linked from tx_skb_freelist through tx_link.
*/
- union skb_entry {
- struct sk_buff *skb;
- unsigned long link;
- } tx_skbs[NET_TX_RING_SIZE];
+ struct sk_buff *tx_skbs[NET_TX_RING_SIZE];
+ unsigned short tx_link[NET_TX_RING_SIZE];
+#define TX_LINK_NONE 0xffff
+#define TX_PENDING 0xfffe
grant_ref_t gref_tx_head;
grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
struct page *grant_tx_page[NET_TX_RING_SIZE];
unsigned tx_skb_freelist;
+ unsigned int tx_pend_queue;
spinlock_t rx_lock ____cacheline_aligned_in_smp;
struct xen_netif_rx_front_ring rx;
@@ -144,6 +149,12 @@ struct netfront_queue {
struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
grant_ref_t gref_rx_head;
grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
+
+ unsigned int rx_rsp_unconsumed;
+ spinlock_t rx_cons_lock;
+
+ struct page_pool *page_pool;
+ struct xdp_rxq_info xdp_rxq;
};
struct netfront_info {
@@ -159,6 +170,16 @@ struct netfront_info {
struct netfront_stats __percpu *rx_stats;
struct netfront_stats __percpu *tx_stats;
+ /* XDP state */
+ bool netback_has_xdp_headroom;
+ bool netfront_xdp_enabled;
+
+ /* Is device behaving sane? */
+ bool broken;
+
+ /* Should skbs be bounced into a zeroed buffer? */
+ bool bounce;
+
atomic_t rx_gso_checksum_fixup;
};
@@ -167,33 +188,25 @@ struct netfront_rx_info {
struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
};
-static void skb_entry_set_link(union skb_entry *list, unsigned short id)
-{
- list->link = id;
-}
-
-static int skb_entry_is_link(const union skb_entry *list)
-{
- BUILD_BUG_ON(sizeof(list->skb) != sizeof(list->link));
- return (unsigned long)list->skb < PAGE_OFFSET;
-}
-
/*
* Access macros for acquiring freeing slots in tx_skbs[].
*/
-static void add_id_to_freelist(unsigned *head, union skb_entry *list,
- unsigned short id)
+static void add_id_to_list(unsigned *head, unsigned short *list,
+ unsigned short id)
{
- skb_entry_set_link(&list[id], *head);
+ list[id] = *head;
*head = id;
}
-static unsigned short get_id_from_freelist(unsigned *head,
- union skb_entry *list)
+static unsigned short get_id_from_list(unsigned *head, unsigned short *list)
{
unsigned int id = *head;
- *head = list[id].link;
+
+ if (id != TX_LINK_NONE) {
+ *head = list[id];
+ list[id] = TX_LINK_NONE;
+ }
return id;
}
@@ -216,7 +229,7 @@ static grant_ref_t xennet_get_rx_ref(struct netfront_queue *queue,
{
int i = xennet_rxidx(ri);
grant_ref_t ref = queue->grant_rx_ref[i];
- queue->grant_rx_ref[i] = GRANT_INVALID_REF;
+ queue->grant_rx_ref[i] = INVALID_GRANT_REF;
return ref;
}
@@ -232,7 +245,8 @@ static bool xennet_can_sg(struct net_device *dev)
static void rx_refill_timeout(struct timer_list *t)
{
- struct netfront_queue *queue = from_timer(queue, t, rx_refill_timer);
+ struct netfront_queue *queue = timer_container_of(queue, t,
+ rx_refill_timer);
napi_schedule(&queue->napi);
}
@@ -265,12 +279,14 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue)
if (unlikely(!skb))
return NULL;
- page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
- if (!page) {
+ page = page_pool_alloc_pages(queue->page_pool,
+ GFP_ATOMIC | __GFP_NOWARN | __GFP_ZERO);
+ if (unlikely(!page)) {
kfree_skb(skb);
return NULL;
}
skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE);
+ skb_mark_for_recycle(skb);
/* Align ip header to a 16 bytes boundary */
skb_reserve(skb, NET_IP_ALIGN);
@@ -349,7 +365,7 @@ static int xennet_open(struct net_device *dev)
unsigned int i = 0;
struct netfront_queue *queue = NULL;
- if (!np->queues)
+ if (!np->queues || np->broken)
return -ENODEV;
for (i = 0; i < num_queues; ++i) {
@@ -371,41 +387,62 @@ static int xennet_open(struct net_device *dev)
return 0;
}
-static void xennet_tx_buf_gc(struct netfront_queue *queue)
+static bool xennet_tx_buf_gc(struct netfront_queue *queue)
{
RING_IDX cons, prod;
unsigned short id;
struct sk_buff *skb;
bool more_to_do;
+ bool work_done = false;
+ const struct device *dev = &queue->info->netdev->dev;
BUG_ON(!netif_carrier_ok(queue->info->netdev));
do {
prod = queue->tx.sring->rsp_prod;
+ if (RING_RESPONSE_PROD_OVERFLOW(&queue->tx, prod)) {
+ dev_alert(dev, "Illegal number of responses %u\n",
+ prod - queue->tx.rsp_cons);
+ goto err;
+ }
rmb(); /* Ensure we see responses up to 'rp'. */
for (cons = queue->tx.rsp_cons; cons != prod; cons++) {
- struct xen_netif_tx_response *txrsp;
+ struct xen_netif_tx_response txrsp;
- txrsp = RING_GET_RESPONSE(&queue->tx, cons);
- if (txrsp->status == XEN_NETIF_RSP_NULL)
+ work_done = true;
+
+ RING_COPY_RESPONSE(&queue->tx, cons, &txrsp);
+ if (txrsp.status == XEN_NETIF_RSP_NULL)
continue;
- id = txrsp->id;
- skb = queue->tx_skbs[id].skb;
- if (unlikely(gnttab_query_foreign_access(
- queue->grant_tx_ref[id]) != 0)) {
- pr_alert("%s: warning -- grant still in use by backend domain\n",
- __func__);
- BUG();
+ id = txrsp.id;
+ if (id >= RING_SIZE(&queue->tx)) {
+ dev_alert(dev,
+ "Response has incorrect id (%u)\n",
+ id);
+ goto err;
+ }
+ if (queue->tx_link[id] != TX_PENDING) {
+ dev_alert(dev,
+ "Response for inactive request\n");
+ goto err;
+ }
+
+ queue->tx_link[id] = TX_LINK_NONE;
+ skb = queue->tx_skbs[id];
+ queue->tx_skbs[id] = NULL;
+ if (unlikely(!gnttab_end_foreign_access_ref(
+ queue->grant_tx_ref[id]))) {
+ dev_alert(dev,
+ "Grant still in use by backend domain\n");
+ goto err;
}
- gnttab_end_foreign_access_ref(
- queue->grant_tx_ref[id], GNTMAP_readonly);
gnttab_release_grant_reference(
&queue->gref_tx_head, queue->grant_tx_ref[id]);
- queue->grant_tx_ref[id] = GRANT_INVALID_REF;
+ queue->grant_tx_ref[id] = INVALID_GRANT_REF;
queue->grant_tx_page[id] = NULL;
- add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, id);
+ add_id_to_list(&queue->tx_skb_freelist, queue->tx_link, id);
dev_kfree_skb_irq(skb);
}
@@ -415,13 +452,22 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue)
} while (more_to_do);
xennet_maybe_wake_tx(queue);
+
+ return work_done;
+
+ err:
+ queue->info->broken = true;
+ dev_alert(dev, "Disabled for further use\n");
+
+ return work_done;
}
struct xennet_gnttab_make_txreq {
struct netfront_queue *queue;
struct sk_buff *skb;
struct page *page;
- struct xen_netif_tx_request *tx; /* Last request */
+ struct xen_netif_tx_request *tx; /* Last request on ring page */
+ struct xen_netif_tx_request tx_local; /* Last request local copy*/
unsigned int size;
};
@@ -437,7 +483,7 @@ static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset,
struct netfront_queue *queue = info->queue;
struct sk_buff *skb = info->skb;
- id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs);
+ id = get_id_from_list(&queue->tx_skb_freelist, queue->tx_link);
tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
ref = gnttab_claim_grant_reference(&queue->gref_tx_head);
WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref));
@@ -445,34 +491,37 @@ static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset,
gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id,
gfn, GNTMAP_readonly);
- queue->tx_skbs[id].skb = skb;
+ queue->tx_skbs[id] = skb;
queue->grant_tx_page[id] = page;
queue->grant_tx_ref[id] = ref;
- tx->id = id;
- tx->gref = ref;
- tx->offset = offset;
- tx->size = len;
- tx->flags = 0;
+ info->tx_local.id = id;
+ info->tx_local.gref = ref;
+ info->tx_local.offset = offset;
+ info->tx_local.size = len;
+ info->tx_local.flags = 0;
+
+ *tx = info->tx_local;
+
+ /*
+ * Put the request in the pending queue, it will be set to be pending
+ * when the producer index is about to be raised.
+ */
+ add_id_to_list(&queue->tx_pend_queue, queue->tx_link, id);
info->tx = tx;
- info->size += tx->size;
+ info->size += info->tx_local.size;
}
static struct xen_netif_tx_request *xennet_make_first_txreq(
- struct netfront_queue *queue, struct sk_buff *skb,
- struct page *page, unsigned int offset, unsigned int len)
+ struct xennet_gnttab_make_txreq *info,
+ unsigned int offset, unsigned int len)
{
- struct xennet_gnttab_make_txreq info = {
- .queue = queue,
- .skb = skb,
- .page = page,
- .size = 0,
- };
+ info->size = 0;
- gnttab_for_one_grant(page, offset, len, xennet_tx_setup_grant, &info);
+ gnttab_for_one_grant(info->page, offset, len, xennet_tx_setup_grant, info);
- return info.tx;
+ return info->tx;
}
static void xennet_make_one_txreq(unsigned long gfn, unsigned int offset,
@@ -485,35 +534,27 @@ static void xennet_make_one_txreq(unsigned long gfn, unsigned int offset,
xennet_tx_setup_grant(gfn, offset, len, data);
}
-static struct xen_netif_tx_request *xennet_make_txreqs(
- struct netfront_queue *queue, struct xen_netif_tx_request *tx,
- struct sk_buff *skb, struct page *page,
+static void xennet_make_txreqs(
+ struct xennet_gnttab_make_txreq *info,
+ struct page *page,
unsigned int offset, unsigned int len)
{
- struct xennet_gnttab_make_txreq info = {
- .queue = queue,
- .skb = skb,
- .tx = tx,
- };
-
/* Skip unused frames from start of page */
page += offset >> PAGE_SHIFT;
offset &= ~PAGE_MASK;
while (len) {
- info.page = page;
- info.size = 0;
+ info->page = page;
+ info->size = 0;
gnttab_foreach_grant_in_range(page, offset, len,
xennet_make_one_txreq,
- &info);
+ info);
page++;
offset = 0;
- len -= info.size;
+ len -= info->size;
}
-
- return info.tx;
}
/*
@@ -531,7 +572,7 @@ static int xennet_count_skb_slots(struct sk_buff *skb)
for (i = 0; i < frags; i++) {
skb_frag_t *frag = skb_shinfo(skb)->frags + i;
unsigned long size = skb_frag_size(frag);
- unsigned long offset = frag->page_offset;
+ unsigned long offset = skb_frag_off(frag);
/* Skip unused frames from start of page */
offset &= ~PAGE_MASK;
@@ -543,8 +584,7 @@ static int xennet_count_skb_slots(struct sk_buff *skb)
}
static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb,
- struct net_device *sb_dev,
- select_queue_fallback_t fallback)
+ struct net_device *sb_dev)
{
unsigned int num_queues = dev->real_num_tx_queues;
u32 hash;
@@ -561,13 +601,113 @@ static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb,
return queue_idx;
}
+static void xennet_mark_tx_pending(struct netfront_queue *queue)
+{
+ unsigned int i;
+
+ while ((i = get_id_from_list(&queue->tx_pend_queue, queue->tx_link)) !=
+ TX_LINK_NONE)
+ queue->tx_link[i] = TX_PENDING;
+}
+
+static int xennet_xdp_xmit_one(struct net_device *dev,
+ struct netfront_queue *queue,
+ struct xdp_frame *xdpf)
+{
+ struct netfront_info *np = netdev_priv(dev);
+ struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats);
+ struct xennet_gnttab_make_txreq info = {
+ .queue = queue,
+ .skb = NULL,
+ .page = virt_to_page(xdpf->data),
+ };
+ int notify;
+
+ xennet_make_first_txreq(&info,
+ offset_in_page(xdpf->data),
+ xdpf->len);
+
+ xennet_mark_tx_pending(queue);
+
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify);
+ if (notify)
+ notify_remote_via_irq(queue->tx_irq);
+
+ u64_stats_update_begin(&tx_stats->syncp);
+ tx_stats->bytes += xdpf->len;
+ tx_stats->packets++;
+ u64_stats_update_end(&tx_stats->syncp);
+
+ return 0;
+}
+
+static int xennet_xdp_xmit(struct net_device *dev, int n,
+ struct xdp_frame **frames, u32 flags)
+{
+ unsigned int num_queues = dev->real_num_tx_queues;
+ struct netfront_info *np = netdev_priv(dev);
+ struct netfront_queue *queue = NULL;
+ unsigned long irq_flags;
+ int nxmit = 0;
+ int i;
+
+ if (unlikely(np->broken))
+ return -ENODEV;
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ queue = &np->queues[smp_processor_id() % num_queues];
+
+ spin_lock_irqsave(&queue->tx_lock, irq_flags);
+ for (i = 0; i < n; i++) {
+ struct xdp_frame *xdpf = frames[i];
+
+ if (!xdpf)
+ continue;
+ if (xennet_xdp_xmit_one(dev, queue, xdpf))
+ break;
+ nxmit++;
+ }
+ spin_unlock_irqrestore(&queue->tx_lock, irq_flags);
+
+ return nxmit;
+}
+
+static struct sk_buff *bounce_skb(const struct sk_buff *skb)
+{
+ unsigned int headerlen = skb_headroom(skb);
+ /* Align size to allocate full pages and avoid contiguous data leaks */
+ unsigned int size = ALIGN(skb_end_offset(skb) + skb->data_len,
+ XEN_PAGE_SIZE);
+ struct sk_buff *n = alloc_skb(size, GFP_ATOMIC | __GFP_ZERO);
+
+ if (!n)
+ return NULL;
+
+ if (!IS_ALIGNED((uintptr_t)n->head, XEN_PAGE_SIZE)) {
+ WARN_ONCE(1, "misaligned skb allocated\n");
+ kfree_skb(n);
+ return NULL;
+ }
+
+ /* Set the data pointer */
+ skb_reserve(n, headerlen);
+ /* Set the tail pointer and length */
+ skb_put(n, skb->len);
+
+ BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len));
+
+ skb_copy_header(n, skb);
+ return n;
+}
+
#define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1)
static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct netfront_info *np = netdev_priv(dev);
struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats);
- struct xen_netif_tx_request *tx, *first_tx;
+ struct xen_netif_tx_request *first_tx;
unsigned int i;
int notify;
int slots;
@@ -576,6 +716,7 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
unsigned int len;
unsigned long flags;
struct netfront_queue *queue = NULL;
+ struct xennet_gnttab_make_txreq info = { };
unsigned int num_queues = dev->real_num_tx_queues;
u16 queue_index;
struct sk_buff *nskb;
@@ -583,6 +724,8 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
/* Drop the packet if no queues are set up */
if (num_queues < 1)
goto drop;
+ if (unlikely(np->broken))
+ goto drop;
/* Determine which queue to transmit this SKB on */
queue_index = skb_get_queue_mapping(skb);
queue = &np->queues[queue_index];
@@ -610,9 +753,13 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
/* The first req should be at least ETH_HLEN size or the packet will be
* dropped by netback.
+ *
+ * If the backend is not trusted bounce all data to zeroed pages to
+ * avoid exposing contiguous data on the granted page not belonging to
+ * the skb.
*/
- if (unlikely(PAGE_SIZE - offset < ETH_HLEN)) {
- nskb = skb_copy(skb, GFP_ATOMIC);
+ if (np->bounce || unlikely(PAGE_SIZE - offset < ETH_HLEN)) {
+ nskb = bounce_skb(skb);
if (!nskb)
goto drop;
dev_consume_skb_any(skb);
@@ -633,21 +780,24 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
}
/* First request for the linear area. */
- first_tx = tx = xennet_make_first_txreq(queue, skb,
- page, offset, len);
- offset += tx->size;
+ info.queue = queue;
+ info.skb = skb;
+ info.page = page;
+ first_tx = xennet_make_first_txreq(&info, offset, len);
+ offset += info.tx_local.size;
if (offset == PAGE_SIZE) {
page++;
offset = 0;
}
- len -= tx->size;
+ len -= info.tx_local.size;
if (skb->ip_summed == CHECKSUM_PARTIAL)
/* local packet? */
- tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated;
+ first_tx->flags |= XEN_NETTXF_csum_blank |
+ XEN_NETTXF_data_validated;
else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
/* remote but checksummed. */
- tx->flags |= XEN_NETTXF_data_validated;
+ first_tx->flags |= XEN_NETTXF_data_validated;
/* Optional extra info after the first request. */
if (skb_shinfo(skb)->gso_size) {
@@ -656,7 +806,7 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
gso = (struct xen_netif_extra_info *)
RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
- tx->flags |= XEN_NETTXF_extra_info;
+ first_tx->flags |= XEN_NETTXF_extra_info;
gso->u.gso.size = skb_shinfo(skb)->gso_size;
gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ?
@@ -670,19 +820,24 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
}
/* Requests for the rest of the linear area. */
- tx = xennet_make_txreqs(queue, tx, skb, page, offset, len);
+ xennet_make_txreqs(&info, page, offset, len);
/* Requests for all the frags. */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- tx = xennet_make_txreqs(queue, tx, skb,
- skb_frag_page(frag), frag->page_offset,
+ xennet_make_txreqs(&info, skb_frag_page(frag),
+ skb_frag_off(frag),
skb_frag_size(frag));
}
/* First request has the packet length. */
first_tx->size = skb->len;
+ /* timestamp packet in software */
+ skb_tx_timestamp(skb);
+
+ xennet_mark_tx_pending(queue);
+
RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify);
if (notify)
notify_remote_via_irq(queue->tx_irq);
@@ -692,9 +847,6 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
tx_stats->packets++;
u64_stats_update_end(&tx_stats->syncp);
- /* Note: It is not safe to access skb after xennet_tx_buf_gc()! */
- xennet_tx_buf_gc(queue);
-
if (!netfront_tx_slot_available(queue))
netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
@@ -711,7 +863,7 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
static int xennet_close(struct net_device *dev)
{
struct netfront_info *np = netdev_priv(dev);
- unsigned int num_queues = dev->real_num_tx_queues;
+ unsigned int num_queues = np->queues ? dev->real_num_tx_queues : 0;
unsigned int i;
struct netfront_queue *queue;
netif_tx_stop_all_queues(np->netdev);
@@ -722,6 +874,41 @@ static int xennet_close(struct net_device *dev)
return 0;
}
+static void xennet_destroy_queues(struct netfront_info *info)
+{
+ unsigned int i;
+
+ if (!info->queues)
+ return;
+
+ for (i = 0; i < info->netdev->real_num_tx_queues; i++) {
+ struct netfront_queue *queue = &info->queues[i];
+
+ if (netif_running(info->netdev))
+ napi_disable(&queue->napi);
+ netif_napi_del(&queue->napi);
+ }
+
+ kfree(info->queues);
+ info->queues = NULL;
+}
+
+static void xennet_uninit(struct net_device *dev)
+{
+ struct netfront_info *np = netdev_priv(dev);
+ xennet_destroy_queues(np);
+}
+
+static void xennet_set_rx_rsp_cons(struct netfront_queue *queue, RING_IDX val)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&queue->rx_cons_lock, flags);
+ queue->rx.rsp_cons = val;
+ queue->rx_rsp_unconsumed = XEN_RING_NR_UNCONSUMED_RESPONSES(&queue->rx);
+ spin_unlock_irqrestore(&queue->rx_cons_lock, flags);
+}
+
static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb,
grant_ref_t ref)
{
@@ -740,7 +927,7 @@ static int xennet_get_extras(struct netfront_queue *queue,
RING_IDX rp)
{
- struct xen_netif_extra_info *extra;
+ struct xen_netif_extra_info extra;
struct device *dev = &queue->info->netdev->dev;
RING_IDX cons = queue->rx.rsp_cons;
int err = 0;
@@ -756,66 +943,119 @@ static int xennet_get_extras(struct netfront_queue *queue,
break;
}
- extra = (struct xen_netif_extra_info *)
- RING_GET_RESPONSE(&queue->rx, ++cons);
+ RING_COPY_RESPONSE(&queue->rx, ++cons, &extra);
- if (unlikely(!extra->type ||
- extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+ if (unlikely(!extra.type ||
+ extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
if (net_ratelimit())
dev_warn(dev, "Invalid extra type: %d\n",
- extra->type);
+ extra.type);
err = -EINVAL;
} else {
- memcpy(&extras[extra->type - 1], extra,
- sizeof(*extra));
+ extras[extra.type - 1] = extra;
}
skb = xennet_get_rx_skb(queue, cons);
ref = xennet_get_rx_ref(queue, cons);
xennet_move_rx_slot(queue, skb, ref);
- } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
+ } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
- queue->rx.rsp_cons = cons;
+ xennet_set_rx_rsp_cons(queue, cons);
return err;
}
+static u32 xennet_run_xdp(struct netfront_queue *queue, struct page *pdata,
+ struct xen_netif_rx_response *rx, struct bpf_prog *prog,
+ struct xdp_buff *xdp, bool *need_xdp_flush)
+{
+ struct xdp_frame *xdpf;
+ u32 len = rx->status;
+ u32 act;
+ int err;
+
+ xdp_init_buff(xdp, XEN_PAGE_SIZE - XDP_PACKET_HEADROOM,
+ &queue->xdp_rxq);
+ xdp_prepare_buff(xdp, page_address(pdata), XDP_PACKET_HEADROOM,
+ len, false);
+
+ act = bpf_prog_run_xdp(prog, xdp);
+ switch (act) {
+ case XDP_TX:
+ xdpf = xdp_convert_buff_to_frame(xdp);
+ if (unlikely(!xdpf)) {
+ trace_xdp_exception(queue->info->netdev, prog, act);
+ break;
+ }
+ get_page(pdata);
+ err = xennet_xdp_xmit(queue->info->netdev, 1, &xdpf, 0);
+ if (unlikely(err <= 0)) {
+ if (err < 0)
+ trace_xdp_exception(queue->info->netdev, prog, act);
+ xdp_return_frame_rx_napi(xdpf);
+ }
+ break;
+ case XDP_REDIRECT:
+ get_page(pdata);
+ err = xdp_do_redirect(queue->info->netdev, xdp, prog);
+ *need_xdp_flush = true;
+ if (unlikely(err)) {
+ trace_xdp_exception(queue->info->netdev, prog, act);
+ xdp_return_buff(xdp);
+ }
+ break;
+ case XDP_PASS:
+ case XDP_DROP:
+ break;
+
+ case XDP_ABORTED:
+ trace_xdp_exception(queue->info->netdev, prog, act);
+ break;
+
+ default:
+ bpf_warn_invalid_xdp_action(queue->info->netdev, prog, act);
+ }
+
+ return act;
+}
+
static int xennet_get_responses(struct netfront_queue *queue,
struct netfront_rx_info *rinfo, RING_IDX rp,
- struct sk_buff_head *list)
+ struct sk_buff_head *list,
+ bool *need_xdp_flush)
{
- struct xen_netif_rx_response *rx = &rinfo->rx;
- struct xen_netif_extra_info *extras = rinfo->extras;
- struct device *dev = &queue->info->netdev->dev;
+ struct xen_netif_rx_response *rx = &rinfo->rx, rx_local;
+ int max = XEN_NETIF_NR_SLOTS_MIN + (rx->status <= RX_COPY_THRESHOLD);
RING_IDX cons = queue->rx.rsp_cons;
struct sk_buff *skb = xennet_get_rx_skb(queue, cons);
+ struct xen_netif_extra_info *extras = rinfo->extras;
grant_ref_t ref = xennet_get_rx_ref(queue, cons);
- int max = XEN_NETIF_NR_SLOTS_MIN + (rx->status <= RX_COPY_THRESHOLD);
+ struct device *dev = &queue->info->netdev->dev;
+ struct bpf_prog *xdp_prog;
+ struct xdp_buff xdp;
int slots = 1;
int err = 0;
- unsigned long ret;
+ u32 verdict;
if (rx->flags & XEN_NETRXF_extra_info) {
err = xennet_get_extras(queue, extras, rp);
+ if (!err) {
+ if (extras[XEN_NETIF_EXTRA_TYPE_XDP - 1].type) {
+ struct xen_netif_extra_info *xdp;
+
+ xdp = &extras[XEN_NETIF_EXTRA_TYPE_XDP - 1];
+ rx->offset = xdp->u.xdp.headroom;
+ }
+ }
cons = queue->rx.rsp_cons;
}
for (;;) {
- if (unlikely(rx->status < 0 ||
- rx->offset + rx->status > XEN_PAGE_SIZE)) {
- if (net_ratelimit())
- dev_warn(dev, "rx->offset: %u, size: %d\n",
- rx->offset, rx->status);
- xennet_move_rx_slot(queue, skb, ref);
- err = -EINVAL;
- goto next;
- }
-
/*
* This definitely indicates a bug, either in this driver or in
* the backend driver. In future this should flag the bad
* situation to the system controller to reboot the backend.
*/
- if (ref == GRANT_INVALID_REF) {
+ if (ref == INVALID_GRANT_REF) {
if (net_ratelimit())
dev_warn(dev, "Bad rx response id %d.\n",
rx->id);
@@ -823,11 +1063,43 @@ static int xennet_get_responses(struct netfront_queue *queue,
goto next;
}
- ret = gnttab_end_foreign_access_ref(ref, 0);
- BUG_ON(!ret);
+ if (unlikely(rx->status < 0 ||
+ rx->offset + rx->status > XEN_PAGE_SIZE)) {
+ if (net_ratelimit())
+ dev_warn(dev, "rx->offset: %u, size: %d\n",
+ rx->offset, rx->status);
+ xennet_move_rx_slot(queue, skb, ref);
+ err = -EINVAL;
+ goto next;
+ }
+
+ if (!gnttab_end_foreign_access_ref(ref)) {
+ dev_alert(dev,
+ "Grant still in use by backend domain\n");
+ queue->info->broken = true;
+ dev_alert(dev, "Disabled for further use\n");
+ return -EINVAL;
+ }
gnttab_release_grant_reference(&queue->gref_rx_head, ref);
+ rcu_read_lock();
+ xdp_prog = rcu_dereference(queue->xdp_prog);
+ if (xdp_prog) {
+ if (!(rx->flags & XEN_NETRXF_more_data)) {
+ /* currently only a single page contains data */
+ verdict = xennet_run_xdp(queue,
+ skb_frag_page(&skb_shinfo(skb)->frags[0]),
+ rx, xdp_prog, &xdp, need_xdp_flush);
+ if (verdict != XDP_PASS)
+ err = -EINVAL;
+ } else {
+ /* drop the frame */
+ err = -EINVAL;
+ }
+ }
+ rcu_read_unlock();
+
__skb_queue_tail(list, skb);
next:
@@ -841,7 +1113,8 @@ next:
break;
}
- rx = RING_GET_RESPONSE(&queue->rx, cons + slots);
+ RING_COPY_RESPONSE(&queue->rx, cons + slots, &rx_local);
+ rx = &rx_local;
skb = xennet_get_rx_skb(queue, cons + slots);
ref = xennet_get_rx_ref(queue, cons + slots);
slots++;
@@ -854,7 +1127,7 @@ next:
}
if (unlikely(err))
- queue->rx.rsp_cons = cons + slots;
+ xennet_set_rx_rsp_cons(queue, cons + slots);
return err;
}
@@ -888,18 +1161,19 @@ static int xennet_set_skb_gso(struct sk_buff *skb,
return 0;
}
-static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
- struct sk_buff *skb,
- struct sk_buff_head *list)
+static int xennet_fill_frags(struct netfront_queue *queue,
+ struct sk_buff *skb,
+ struct sk_buff_head *list)
{
RING_IDX cons = queue->rx.rsp_cons;
struct sk_buff *nskb;
while ((nskb = __skb_dequeue(list))) {
- struct xen_netif_rx_response *rx =
- RING_GET_RESPONSE(&queue->rx, ++cons);
+ struct xen_netif_rx_response rx;
skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
+ RING_COPY_RESPONSE(&queue->rx, ++cons, &rx);
+
if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) {
unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
@@ -907,20 +1181,23 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
__pskb_pull_tail(skb, pull_to - skb_headlen(skb));
}
if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) {
- queue->rx.rsp_cons = ++cons;
+ xennet_set_rx_rsp_cons(queue,
+ ++cons + skb_queue_len(list));
kfree_skb(nskb);
- return ~0U;
+ return -ENOENT;
}
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
skb_frag_page(nfrag),
- rx->offset, rx->status, PAGE_SIZE);
+ rx.offset, rx.status, PAGE_SIZE);
skb_shinfo(nskb)->nr_frags = 0;
kfree_skb(nskb);
}
- return cons;
+ xennet_set_rx_rsp_cons(queue, cons);
+
+ return 0;
}
static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
@@ -997,6 +1274,7 @@ static int xennet_poll(struct napi_struct *napi, int budget)
struct sk_buff_head errq;
struct sk_buff_head tmpq;
int err;
+ bool need_xdp_flush = false;
spin_lock(&queue->rx_lock);
@@ -1005,17 +1283,29 @@ static int xennet_poll(struct napi_struct *napi, int budget)
skb_queue_head_init(&tmpq);
rp = queue->rx.sring->rsp_prod;
+ if (RING_RESPONSE_PROD_OVERFLOW(&queue->rx, rp)) {
+ dev_alert(&dev->dev, "Illegal number of responses %u\n",
+ rp - queue->rx.rsp_cons);
+ queue->info->broken = true;
+ spin_unlock(&queue->rx_lock);
+ return 0;
+ }
rmb(); /* Ensure we see queued responses up to 'rp'. */
i = queue->rx.rsp_cons;
work_done = 0;
while ((i != rp) && (work_done < budget)) {
- memcpy(rx, RING_GET_RESPONSE(&queue->rx, i), sizeof(*rx));
+ RING_COPY_RESPONSE(&queue->rx, i, rx);
memset(extras, 0, sizeof(rinfo.extras));
- err = xennet_get_responses(queue, &rinfo, rp, &tmpq);
+ err = xennet_get_responses(queue, &rinfo, rp, &tmpq,
+ &need_xdp_flush);
if (unlikely(err)) {
+ if (queue->info->broken) {
+ spin_unlock(&queue->rx_lock);
+ return 0;
+ }
err:
while ((skb = __skb_dequeue(&tmpq)))
__skb_queue_tail(&errq, skb);
@@ -1032,7 +1322,9 @@ err:
if (unlikely(xennet_set_skb_gso(skb, gso))) {
__skb_queue_head(&tmpq, skb);
- queue->rx.rsp_cons += skb_queue_len(&tmpq);
+ xennet_set_rx_rsp_cons(queue,
+ queue->rx.rsp_cons +
+ skb_queue_len(&tmpq));
goto err;
}
}
@@ -1041,13 +1333,12 @@ err:
if (NETFRONT_SKB_CB(skb)->pull_to > RX_COPY_THRESHOLD)
NETFRONT_SKB_CB(skb)->pull_to = RX_COPY_THRESHOLD;
- skb_shinfo(skb)->frags[0].page_offset = rx->offset;
+ skb_frag_off_set(&skb_shinfo(skb)->frags[0], rx->offset);
skb_frag_size_set(&skb_shinfo(skb)->frags[0], rx->status);
skb->data_len = rx->status;
skb->len += rx->status;
- i = xennet_fill_frags(queue, skb, &tmpq);
- if (unlikely(i == ~0U))
+ if (unlikely(xennet_fill_frags(queue, skb, &tmpq)))
goto err;
if (rx->flags & XEN_NETRXF_csum_blank)
@@ -1057,9 +1348,12 @@ err:
__skb_queue_tail(&rxq, skb);
- queue->rx.rsp_cons = ++i;
+ i = queue->rx.rsp_cons + 1;
+ xennet_set_rx_rsp_cons(queue, i);
work_done++;
}
+ if (need_xdp_flush)
+ xdp_do_flush();
__skb_queue_purge(&errq);
@@ -1088,7 +1382,7 @@ static int xennet_change_mtu(struct net_device *dev, int mtu)
if (mtu > max)
return -EINVAL;
- dev->mtu = mtu;
+ WRITE_ONCE(dev->mtu, mtu);
return 0;
}
@@ -1105,16 +1399,16 @@ static void xennet_get_stats64(struct net_device *dev,
unsigned int start;
do {
- start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
+ start = u64_stats_fetch_begin(&tx_stats->syncp);
tx_packets = tx_stats->packets;
tx_bytes = tx_stats->bytes;
- } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
+ } while (u64_stats_fetch_retry(&tx_stats->syncp, start));
do {
- start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
+ start = u64_stats_fetch_begin(&rx_stats->syncp);
rx_packets = rx_stats->packets;
rx_bytes = rx_stats->bytes;
- } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
+ } while (u64_stats_fetch_retry(&rx_stats->syncp, start));
tot->rx_packets += rx_packets;
tot->tx_packets += tx_packets;
@@ -1133,17 +1427,17 @@ static void xennet_release_tx_bufs(struct netfront_queue *queue)
for (i = 0; i < NET_TX_RING_SIZE; i++) {
/* Skip over entries which are actually freelist references */
- if (skb_entry_is_link(&queue->tx_skbs[i]))
+ if (!queue->tx_skbs[i])
continue;
- skb = queue->tx_skbs[i].skb;
+ skb = queue->tx_skbs[i];
+ queue->tx_skbs[i] = NULL;
get_page(queue->grant_tx_page[i]);
gnttab_end_foreign_access(queue->grant_tx_ref[i],
- GNTMAP_readonly,
- (unsigned long)page_address(queue->grant_tx_page[i]));
+ queue->grant_tx_page[i]);
queue->grant_tx_page[i] = NULL;
- queue->grant_tx_ref[i] = GRANT_INVALID_REF;
- add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, i);
+ queue->grant_tx_ref[i] = INVALID_GRANT_REF;
+ add_id_to_list(&queue->tx_skb_freelist, queue->tx_link, i);
dev_kfree_skb_irq(skb);
}
}
@@ -1163,7 +1457,7 @@ static void xennet_release_rx_bufs(struct netfront_queue *queue)
continue;
ref = queue->grant_rx_ref[id];
- if (ref == GRANT_INVALID_REF)
+ if (ref == INVALID_GRANT_REF)
continue;
page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
@@ -1172,9 +1466,8 @@ static void xennet_release_rx_bufs(struct netfront_queue *queue)
* foreign access is ended (which may be deferred).
*/
get_page(page);
- gnttab_end_foreign_access(ref, 0,
- (unsigned long)page_address(page));
- queue->grant_rx_ref[id] = GRANT_INVALID_REF;
+ gnttab_end_foreign_access(ref, page);
+ queue->grant_rx_ref[id] = INVALID_GRANT_REF;
kfree_skb(skb);
}
@@ -1218,34 +1511,79 @@ static int xennet_set_features(struct net_device *dev,
return 0;
}
-static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id)
+static bool xennet_handle_tx(struct netfront_queue *queue, unsigned int *eoi)
{
- struct netfront_queue *queue = dev_id;
unsigned long flags;
+ if (unlikely(queue->info->broken))
+ return false;
+
spin_lock_irqsave(&queue->tx_lock, flags);
- xennet_tx_buf_gc(queue);
+ if (xennet_tx_buf_gc(queue))
+ *eoi = 0;
spin_unlock_irqrestore(&queue->tx_lock, flags);
+ return true;
+}
+
+static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id)
+{
+ unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
+
+ if (likely(xennet_handle_tx(dev_id, &eoiflag)))
+ xen_irq_lateeoi(irq, eoiflag);
+
return IRQ_HANDLED;
}
-static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id)
+static bool xennet_handle_rx(struct netfront_queue *queue, unsigned int *eoi)
{
- struct netfront_queue *queue = dev_id;
- struct net_device *dev = queue->info->netdev;
+ unsigned int work_queued;
+ unsigned long flags;
- if (likely(netif_carrier_ok(dev) &&
- RING_HAS_UNCONSUMED_RESPONSES(&queue->rx)))
+ if (unlikely(queue->info->broken))
+ return false;
+
+ spin_lock_irqsave(&queue->rx_cons_lock, flags);
+ work_queued = XEN_RING_NR_UNCONSUMED_RESPONSES(&queue->rx);
+ if (work_queued > queue->rx_rsp_unconsumed) {
+ queue->rx_rsp_unconsumed = work_queued;
+ *eoi = 0;
+ } else if (unlikely(work_queued < queue->rx_rsp_unconsumed)) {
+ const struct device *dev = &queue->info->netdev->dev;
+
+ spin_unlock_irqrestore(&queue->rx_cons_lock, flags);
+ dev_alert(dev, "RX producer index going backwards\n");
+ dev_alert(dev, "Disabled for further use\n");
+ queue->info->broken = true;
+ return false;
+ }
+ spin_unlock_irqrestore(&queue->rx_cons_lock, flags);
+
+ if (likely(netif_carrier_ok(queue->info->netdev) && work_queued))
napi_schedule(&queue->napi);
+ return true;
+}
+
+static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id)
+{
+ unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
+
+ if (likely(xennet_handle_rx(dev_id, &eoiflag)))
+ xen_irq_lateeoi(irq, eoiflag);
+
return IRQ_HANDLED;
}
static irqreturn_t xennet_interrupt(int irq, void *dev_id)
{
- xennet_tx_interrupt(irq, dev_id);
- xennet_rx_interrupt(irq, dev_id);
+ unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
+
+ if (xennet_handle_tx(dev_id, &eoiflag) &&
+ xennet_handle_rx(dev_id, &eoiflag))
+ xen_irq_lateeoi(irq, eoiflag);
+
return IRQ_HANDLED;
}
@@ -1256,12 +1594,96 @@ static void xennet_poll_controller(struct net_device *dev)
struct netfront_info *info = netdev_priv(dev);
unsigned int num_queues = dev->real_num_tx_queues;
unsigned int i;
+
+ if (info->broken)
+ return;
+
for (i = 0; i < num_queues; ++i)
xennet_interrupt(0, &info->queues[i]);
}
#endif
+#define NETBACK_XDP_HEADROOM_DISABLE 0
+#define NETBACK_XDP_HEADROOM_ENABLE 1
+
+static int talk_to_netback_xdp(struct netfront_info *np, int xdp)
+{
+ int err;
+ unsigned short headroom;
+
+ headroom = xdp ? XDP_PACKET_HEADROOM : 0;
+ err = xenbus_printf(XBT_NIL, np->xbdev->nodename,
+ "xdp-headroom", "%hu",
+ headroom);
+ if (err)
+ pr_warn("Error writing xdp-headroom\n");
+
+ return err;
+}
+
+static int xennet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+ struct netlink_ext_ack *extack)
+{
+ unsigned long max_mtu = XEN_PAGE_SIZE - XDP_PACKET_HEADROOM;
+ struct netfront_info *np = netdev_priv(dev);
+ struct bpf_prog *old_prog;
+ unsigned int i, err;
+
+ if (dev->mtu > max_mtu) {
+ netdev_warn(dev, "XDP requires MTU less than %lu\n", max_mtu);
+ return -EINVAL;
+ }
+
+ if (!np->netback_has_xdp_headroom)
+ return 0;
+
+ xenbus_switch_state(np->xbdev, XenbusStateReconfiguring);
+
+ err = talk_to_netback_xdp(np, prog ? NETBACK_XDP_HEADROOM_ENABLE :
+ NETBACK_XDP_HEADROOM_DISABLE);
+ if (err)
+ return err;
+
+ /* avoid the race with XDP headroom adjustment */
+ wait_event(module_wq,
+ xenbus_read_driver_state(np->xbdev->otherend) ==
+ XenbusStateReconfigured);
+ np->netfront_xdp_enabled = true;
+
+ old_prog = rtnl_dereference(np->queues[0].xdp_prog);
+
+ if (prog)
+ bpf_prog_add(prog, dev->real_num_tx_queues);
+
+ for (i = 0; i < dev->real_num_tx_queues; ++i)
+ rcu_assign_pointer(np->queues[i].xdp_prog, prog);
+
+ if (old_prog)
+ for (i = 0; i < dev->real_num_tx_queues; ++i)
+ bpf_prog_put(old_prog);
+
+ xenbus_switch_state(np->xbdev, XenbusStateConnected);
+
+ return 0;
+}
+
+static int xennet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+ struct netfront_info *np = netdev_priv(dev);
+
+ if (np->broken)
+ return -ENODEV;
+
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return xennet_xdp_set(dev, xdp->prog, xdp->extack);
+ default:
+ return -EINVAL;
+ }
+}
+
static const struct net_device_ops xennet_netdev_ops = {
+ .ndo_uninit = xennet_uninit,
.ndo_open = xennet_open,
.ndo_stop = xennet_close,
.ndo_start_xmit = xennet_start_xmit,
@@ -1272,6 +1694,8 @@ static const struct net_device_ops xennet_netdev_ops = {
.ndo_fix_features = xennet_fix_features,
.ndo_set_features = xennet_set_features,
.ndo_select_queue = xennet_select_queue,
+ .ndo_bpf = xennet_xdp,
+ .ndo_xdp_xmit = xennet_xdp_xmit,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = xennet_poll_controller,
#endif
@@ -1324,6 +1748,8 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
* negotiate with the backend regarding supported features.
*/
netdev->features |= netdev->hw_features;
+ netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_NDO_XMIT;
netdev->ethtool_ops = &xennet_ethtool_ops;
netdev->min_mtu = ETH_MIN_MTU;
@@ -1331,15 +1757,19 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
SET_NETDEV_DEV(netdev, &dev->dev);
np->netdev = netdev;
+ np->netfront_xdp_enabled = false;
netif_carrier_off(netdev);
- xenbus_switch_state(dev, XenbusStateInitialising);
- wait_event(module_wq,
- xenbus_read_driver_state(dev->otherend) !=
- XenbusStateClosed &&
- xenbus_read_driver_state(dev->otherend) !=
- XenbusStateUnknown);
+ do {
+ xenbus_switch_state(dev, XenbusStateInitialising);
+ err = wait_event_timeout(module_wq,
+ xenbus_read_driver_state(dev->otherend) !=
+ XenbusStateClosed &&
+ xenbus_read_driver_state(dev->otherend) !=
+ XenbusStateUnknown, XENNET_TIMEOUT);
+ } while (!err);
+
return netdev;
exit:
@@ -1347,7 +1777,7 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
return ERR_PTR(err);
}
-/**
+/*
* Entry point to this code when a new device is created. Allocate the basic
* structures and the ring buffers for communication with the backend, and
* inform the backend of the appropriate details for those.
@@ -1378,8 +1808,8 @@ static int netfront_probe(struct xenbus_device *dev,
static void xennet_end_access(int ref, void *page)
{
/* This frees the page as a side-effect */
- if (ref != GRANT_INVALID_REF)
- gnttab_end_foreign_access(ref, 0, (unsigned long)page);
+ if (ref != INVALID_GRANT_REF)
+ gnttab_end_foreign_access(ref, virt_to_page(page));
}
static void xennet_disconnect_backend(struct netfront_info *info)
@@ -1392,7 +1822,7 @@ static void xennet_disconnect_backend(struct netfront_info *info)
for (i = 0; i < num_queues && info->queues; ++i) {
struct netfront_queue *queue = &info->queues[i];
- del_timer_sync(&queue->rx_refill_timer);
+ timer_delete_sync(&queue->rx_refill_timer);
if (queue->tx_irq && (queue->tx_irq == queue->rx_irq))
unbind_from_irqhandler(queue->tx_irq, queue);
@@ -1415,14 +1845,16 @@ static void xennet_disconnect_backend(struct netfront_info *info)
xennet_end_access(queue->tx_ring_ref, queue->tx.sring);
xennet_end_access(queue->rx_ring_ref, queue->rx.sring);
- queue->tx_ring_ref = GRANT_INVALID_REF;
- queue->rx_ring_ref = GRANT_INVALID_REF;
+ queue->tx_ring_ref = INVALID_GRANT_REF;
+ queue->rx_ring_ref = INVALID_GRANT_REF;
queue->tx.sring = NULL;
queue->rx.sring = NULL;
+
+ page_pool_destroy(queue->page_pool);
}
}
-/**
+/*
* We are reconnecting to the backend, due to a suspend/resume, or a backend
* driver restart. We tear down our netif structure and recreate it, but
* leave the device-layer structures intact so that this is transparent to the
@@ -1434,7 +1866,17 @@ static int netfront_resume(struct xenbus_device *dev)
dev_dbg(&dev->dev, "%s\n", dev->nodename);
+ netif_tx_lock_bh(info->netdev);
+ netif_device_detach(info->netdev);
+ netif_tx_unlock_bh(info->netdev);
+
xennet_disconnect_backend(info);
+
+ rtnl_lock();
+ if (info->queues)
+ xennet_destroy_queues(info);
+ rtnl_unlock();
+
return 0;
}
@@ -1468,9 +1910,10 @@ static int setup_netfront_single(struct netfront_queue *queue)
if (err < 0)
goto fail;
- err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
- xennet_interrupt,
- 0, queue->info->netdev->name, queue);
+ err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn,
+ xennet_interrupt, 0,
+ queue->info->netdev->name,
+ queue);
if (err < 0)
goto bind_fail;
queue->rx_evtchn = queue->tx_evtchn;
@@ -1498,18 +1941,18 @@ static int setup_netfront_split(struct netfront_queue *queue)
snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
"%s-tx", queue->name);
- err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
- xennet_tx_interrupt,
- 0, queue->tx_irq_name, queue);
+ err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn,
+ xennet_tx_interrupt, 0,
+ queue->tx_irq_name, queue);
if (err < 0)
goto bind_tx_fail;
queue->tx_irq = err;
snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
"%s-rx", queue->name);
- err = bind_evtchn_to_irqhandler(queue->rx_evtchn,
- xennet_rx_interrupt,
- 0, queue->rx_irq_name, queue);
+ err = bind_evtchn_to_irqhandler_lateeoi(queue->rx_evtchn,
+ xennet_rx_interrupt, 0,
+ queue->rx_irq_name, queue);
if (err < 0)
goto bind_rx_fail;
queue->rx_irq = err;
@@ -1534,41 +1977,26 @@ static int setup_netfront(struct xenbus_device *dev,
{
struct xen_netif_tx_sring *txs;
struct xen_netif_rx_sring *rxs;
- grant_ref_t gref;
int err;
- queue->tx_ring_ref = GRANT_INVALID_REF;
- queue->rx_ring_ref = GRANT_INVALID_REF;
+ queue->tx_ring_ref = INVALID_GRANT_REF;
+ queue->rx_ring_ref = INVALID_GRANT_REF;
queue->rx.sring = NULL;
queue->tx.sring = NULL;
- txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
- if (!txs) {
- err = -ENOMEM;
- xenbus_dev_fatal(dev, err, "allocating tx ring page");
+ err = xenbus_setup_ring(dev, GFP_NOIO | __GFP_HIGH, (void **)&txs,
+ 1, &queue->tx_ring_ref);
+ if (err)
goto fail;
- }
- SHARED_RING_INIT(txs);
- FRONT_RING_INIT(&queue->tx, txs, XEN_PAGE_SIZE);
- err = xenbus_grant_ring(dev, txs, 1, &gref);
- if (err < 0)
- goto grant_tx_ring_fail;
- queue->tx_ring_ref = gref;
+ XEN_FRONT_RING_INIT(&queue->tx, txs, XEN_PAGE_SIZE);
- rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
- if (!rxs) {
- err = -ENOMEM;
- xenbus_dev_fatal(dev, err, "allocating rx ring page");
- goto alloc_rx_ring_fail;
- }
- SHARED_RING_INIT(rxs);
- FRONT_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE);
+ err = xenbus_setup_ring(dev, GFP_NOIO | __GFP_HIGH, (void **)&rxs,
+ 1, &queue->rx_ring_ref);
+ if (err)
+ goto fail;
- err = xenbus_grant_ring(dev, rxs, 1, &gref);
- if (err < 0)
- goto grant_rx_ring_fail;
- queue->rx_ring_ref = gref;
+ XEN_FRONT_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE);
if (feature_split_evtchn)
err = setup_netfront_split(queue);
@@ -1576,26 +2004,18 @@ static int setup_netfront(struct xenbus_device *dev,
* a) feature-split-event-channels == 0
* b) feature-split-event-channels == 1 but failed to setup
*/
- if (!feature_split_evtchn || (feature_split_evtchn && err))
+ if (!feature_split_evtchn || err)
err = setup_netfront_single(queue);
if (err)
- goto alloc_evtchn_fail;
+ goto fail;
return 0;
- /* If we fail to setup netfront, it is safe to just revoke access to
- * granted pages because backend is not accessing it at this point.
- */
-alloc_evtchn_fail:
- gnttab_end_foreign_access_ref(queue->rx_ring_ref, 0);
-grant_rx_ring_fail:
- free_page((unsigned long)rxs);
-alloc_rx_ring_fail:
- gnttab_end_foreign_access_ref(queue->tx_ring_ref, 0);
-grant_tx_ring_fail:
- free_page((unsigned long)txs);
-fail:
+ fail:
+ xenbus_teardown_ring((void **)&queue->rx.sring, 1, &queue->rx_ring_ref);
+ xenbus_teardown_ring((void **)&queue->tx.sring, 1, &queue->tx_ring_ref);
+
return err;
}
@@ -1611,6 +2031,7 @@ static int xennet_init_queue(struct netfront_queue *queue)
spin_lock_init(&queue->tx_lock);
spin_lock_init(&queue->rx_lock);
+ spin_lock_init(&queue->rx_cons_lock);
timer_setup(&queue->rx_refill_timer, rx_refill_timeout, 0);
@@ -1618,18 +2039,20 @@ static int xennet_init_queue(struct netfront_queue *queue)
snprintf(queue->name, sizeof(queue->name), "vif%s-q%u",
devid, queue->id);
- /* Initialise tx_skbs as a free chain containing every entry. */
+ /* Initialise tx_skb_freelist as a free chain containing every entry. */
queue->tx_skb_freelist = 0;
+ queue->tx_pend_queue = TX_LINK_NONE;
for (i = 0; i < NET_TX_RING_SIZE; i++) {
- skb_entry_set_link(&queue->tx_skbs[i], i+1);
- queue->grant_tx_ref[i] = GRANT_INVALID_REF;
+ queue->tx_link[i] = i + 1;
+ queue->grant_tx_ref[i] = INVALID_GRANT_REF;
queue->grant_tx_page[i] = NULL;
}
+ queue->tx_link[NET_TX_RING_SIZE - 1] = TX_LINK_NONE;
/* Clear out rx_skbs */
for (i = 0; i < NET_RX_RING_SIZE; i++) {
queue->rx_skbs[i] = NULL;
- queue->grant_rx_ref[i] = GRANT_INVALID_REF;
+ queue->grant_rx_ref[i] = INVALID_GRANT_REF;
}
/* A grant for every tx ring slot */
@@ -1738,20 +2161,49 @@ error:
return err;
}
-static void xennet_destroy_queues(struct netfront_info *info)
+
+
+static int xennet_create_page_pool(struct netfront_queue *queue)
{
- unsigned int i;
+ int err;
+ struct page_pool_params pp_params = {
+ .order = 0,
+ .flags = 0,
+ .pool_size = NET_RX_RING_SIZE,
+ .nid = NUMA_NO_NODE,
+ .dev = &queue->info->netdev->dev,
+ .offset = XDP_PACKET_HEADROOM,
+ .max_len = XEN_PAGE_SIZE - XDP_PACKET_HEADROOM,
+ };
- for (i = 0; i < info->netdev->real_num_tx_queues; i++) {
- struct netfront_queue *queue = &info->queues[i];
+ queue->page_pool = page_pool_create(&pp_params);
+ if (IS_ERR(queue->page_pool)) {
+ err = PTR_ERR(queue->page_pool);
+ queue->page_pool = NULL;
+ return err;
+ }
- if (netif_running(info->netdev))
- napi_disable(&queue->napi);
- netif_napi_del(&queue->napi);
+ err = xdp_rxq_info_reg(&queue->xdp_rxq, queue->info->netdev,
+ queue->id, 0);
+ if (err) {
+ netdev_err(queue->info->netdev, "xdp_rxq_info_reg failed\n");
+ goto err_free_pp;
}
- kfree(info->queues);
- info->queues = NULL;
+ err = xdp_rxq_info_reg_mem_model(&queue->xdp_rxq,
+ MEM_TYPE_PAGE_POOL, queue->page_pool);
+ if (err) {
+ netdev_err(queue->info->netdev, "xdp_rxq_info_reg_mem_model failed\n");
+ goto err_unregister_rxq;
+ }
+ return 0;
+
+err_unregister_rxq:
+ xdp_rxq_info_unreg(&queue->xdp_rxq);
+err_free_pp:
+ page_pool_destroy(queue->page_pool);
+ queue->page_pool = NULL;
+ return err;
}
static int xennet_create_queues(struct netfront_info *info,
@@ -1779,8 +2231,15 @@ static int xennet_create_queues(struct netfront_info *info,
break;
}
- netif_napi_add(queue->info->netdev, &queue->napi,
- xennet_poll, 64);
+ /* use page pool recycling instead of buddy allocator */
+ ret = xennet_create_page_pool(queue);
+ if (ret < 0) {
+ dev_err(&info->xbdev->dev, "can't allocate page pool\n");
+ *num_queues = i;
+ return ret;
+ }
+
+ netif_napi_add(queue->info->netdev, &queue->napi, xennet_poll);
if (netif_running(info->netdev))
napi_enable(&queue->napi);
}
@@ -1806,9 +2265,14 @@ static int talk_to_netback(struct xenbus_device *dev,
unsigned int max_queues = 0;
struct netfront_queue *queue = NULL;
unsigned int num_queues = 1;
+ u8 addr[ETH_ALEN];
info->netdev->irq = 0;
+ /* Check if backend is trusted. */
+ info->bounce = !xennet_trusted ||
+ !xenbus_read_unsigned(dev->nodename, "trusted", 1);
+
/* Check if backend supports multiple queues */
max_queues = xenbus_read_unsigned(info->xbdev->otherend,
"multi-queue-max-queues", 1);
@@ -1819,16 +2283,31 @@ static int talk_to_netback(struct xenbus_device *dev,
"feature-split-event-channels", 0);
/* Read mac addr. */
- err = xen_net_read_mac(dev, info->netdev->dev_addr);
+ err = xen_net_read_mac(dev, addr);
if (err) {
xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
goto out_unlocked;
}
+ eth_hw_addr_set(info->netdev, addr);
+
+ info->netback_has_xdp_headroom = xenbus_read_unsigned(info->xbdev->otherend,
+ "feature-xdp-headroom", 0);
+ if (info->netback_has_xdp_headroom) {
+ /* set the current xen-netfront xdp state */
+ err = talk_to_netback_xdp(info, info->netfront_xdp_enabled ?
+ NETBACK_XDP_HEADROOM_ENABLE :
+ NETBACK_XDP_HEADROOM_DISABLE);
+ if (err)
+ goto out_unlocked;
+ }
rtnl_lock();
if (info->queues)
xennet_destroy_queues(info);
+ /* For the case of a reconnect reset the "broken" indicator. */
+ info->broken = false;
+
err = xennet_create_queues(info, &num_queues);
if (err < 0) {
xenbus_dev_fatal(dev, err, "creating queues");
@@ -1959,6 +2438,11 @@ static int xennet_connect(struct net_device *dev)
err = talk_to_netback(np->xbdev, np);
if (err)
return err;
+ if (np->netback_has_xdp_headroom)
+ pr_info("backend supports XDP headroom\n");
+ if (np->bounce)
+ dev_info(&np->xbdev->dev,
+ "bouncing transmitted data to zeroed pages\n");
/* talk_to_netback() sets the correct number of queues */
num_queues = dev->real_num_tx_queues;
@@ -1982,6 +2466,10 @@ static int xennet_connect(struct net_device *dev)
* domain a kick because we've probably just requeued some
* packets.
*/
+ netif_tx_lock_bh(np->netdev);
+ netif_device_attach(np->netdev);
+ netif_tx_unlock_bh(np->netdev);
+
netif_carrier_on(np->netdev);
for (j = 0; j < num_queues; ++j) {
queue = &np->queues[j];
@@ -1990,10 +2478,6 @@ static int xennet_connect(struct net_device *dev)
if (queue->tx_irq != queue->rx_irq)
notify_remote_via_irq(queue->rx_irq);
- spin_lock_irq(&queue->tx_lock);
- xennet_tx_buf_gc(queue);
- spin_unlock_irq(&queue->tx_lock);
-
spin_lock_bh(&queue->rx_lock);
xennet_alloc_rx_buffers(queue);
spin_unlock_bh(&queue->rx_lock);
@@ -2002,7 +2486,7 @@ static int xennet_connect(struct net_device *dev)
return 0;
}
-/**
+/*
* Callback received when the backend's state changes.
*/
static void netback_changed(struct xenbus_device *dev,
@@ -2038,7 +2522,7 @@ static void netback_changed(struct xenbus_device *dev,
case XenbusStateClosed:
if (dev->state == XenbusStateClosed)
break;
- /* Missed the backend's CLOSING state -- fallthrough */
+ fallthrough; /* Missed the backend's CLOSING state */
case XenbusStateClosing:
xenbus_frontend_closed(dev);
break;
@@ -2095,6 +2579,7 @@ static const struct ethtool_ops xennet_ethtool_ops =
.get_sset_count = xennet_get_sset_count,
.get_ethtool_stats = xennet_get_ethtool_stats,
.get_strings = xennet_get_strings,
+ .get_ts_info = ethtool_op_get_ts_info,
};
#ifdef CONFIG_SYSFS
@@ -2109,12 +2594,11 @@ static ssize_t store_rxbuf(struct device *dev,
const char *buf, size_t len)
{
char *endp;
- unsigned long target;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- target = simple_strtoul(buf, &endp, 0);
+ simple_strtoul(buf, &endp, 0);
if (endp == buf)
return -EBADMSG;
@@ -2139,28 +2623,43 @@ static const struct attribute_group xennet_dev_group = {
};
#endif /* CONFIG_SYSFS */
-static int xennet_remove(struct xenbus_device *dev)
+static void xennet_bus_close(struct xenbus_device *dev)
{
- struct netfront_info *info = dev_get_drvdata(&dev->dev);
-
- dev_dbg(&dev->dev, "%s\n", dev->nodename);
+ int ret;
- if (xenbus_read_driver_state(dev->otherend) != XenbusStateClosed) {
+ if (xenbus_read_driver_state(dev->otherend) == XenbusStateClosed)
+ return;
+ do {
xenbus_switch_state(dev, XenbusStateClosing);
- wait_event(module_wq,
- xenbus_read_driver_state(dev->otherend) ==
- XenbusStateClosing ||
- xenbus_read_driver_state(dev->otherend) ==
- XenbusStateUnknown);
+ ret = wait_event_timeout(module_wq,
+ xenbus_read_driver_state(dev->otherend) ==
+ XenbusStateClosing ||
+ xenbus_read_driver_state(dev->otherend) ==
+ XenbusStateClosed ||
+ xenbus_read_driver_state(dev->otherend) ==
+ XenbusStateUnknown,
+ XENNET_TIMEOUT);
+ } while (!ret);
+
+ if (xenbus_read_driver_state(dev->otherend) == XenbusStateClosed)
+ return;
+ do {
xenbus_switch_state(dev, XenbusStateClosed);
- wait_event(module_wq,
- xenbus_read_driver_state(dev->otherend) ==
- XenbusStateClosed ||
- xenbus_read_driver_state(dev->otherend) ==
- XenbusStateUnknown);
- }
+ ret = wait_event_timeout(module_wq,
+ xenbus_read_driver_state(dev->otherend) ==
+ XenbusStateClosed ||
+ xenbus_read_driver_state(dev->otherend) ==
+ XenbusStateUnknown,
+ XENNET_TIMEOUT);
+ } while (!ret);
+}
+static void xennet_remove(struct xenbus_device *dev)
+{
+ struct netfront_info *info = dev_get_drvdata(&dev->dev);
+
+ xennet_bus_close(dev);
xennet_disconnect_backend(info);
if (info->netdev->reg_state == NETREG_REGISTERED)
@@ -2172,8 +2671,6 @@ static int xennet_remove(struct xenbus_device *dev)
rtnl_unlock();
}
xennet_free_netdev(info->netdev);
-
- return 0;
}
static const struct xenbus_device_id netfront_ids[] = {
@@ -2199,8 +2696,9 @@ static int __init netif_init(void)
pr_info("Initialising Xen virtual ethernet driver\n");
- /* Allow as many queues as there are CPUs inut max. 8 if user has not
- * specified a value.
+ /* Allow the number of queues to match the number of CPUs, but not exceed
+ * the maximum limit. If the user has not specified a value, the default
+ * maximum limit is 8.
*/
if (xennet_max_queues == 0)
xennet_max_queues = min_t(unsigned int, MAX_QUEUES_DEFAULT,